diff --git a/app/services/protected_branches/cache_service.rb b/app/services/protected_branches/cache_service.rb index 8c521f4ebcba326b0b1ab04107c0743fe460473d..a664ff0542f967edfc47b0632d7b44eaa9545d0a 100644 --- a/app/services/protected_branches/cache_service.rb +++ b/app/services/protected_branches/cache_service.rb @@ -7,20 +7,26 @@ class CacheService < ProtectedBranches::BaseService CACHE_EXPIRE_IN = 1.day CACHE_LIMIT = 1000 - def fetch(ref_name, dry_run: false) + def fetch(ref_name, dry_run: false, &block) record = OpenSSL::Digest::SHA256.hexdigest(ref_name) Gitlab::Redis::Cache.with do |redis| cached_result = redis.hget(redis_key, record) - decoded_result = Gitlab::Redis::Boolean.decode(cached_result) unless cached_result.nil? + if cached_result.nil? + metrics.increment_cache_miss + else + metrics.increment_cache_hit + + decoded_result = Gitlab::Redis::Boolean.decode(cached_result) + end # If we're dry-running, don't break because we need to check against # the real value to ensure the cache is working properly. # If the result is nil we'll need to run the block, so don't break yet. break decoded_result unless dry_run || decoded_result.nil? - calculated_value = yield + calculated_value = metrics.observe_cache_generation(&block) check_and_log_discrepancy(decoded_result, calculated_value, ref_name) if dry_run @@ -64,5 +70,14 @@ def check_and_log_discrepancy(cached_value, real_value, ref_name) def redis_key @redis_key ||= [CACHE_ROOT_KEY, @project.id].join(':') end + + def metrics + @metrics ||= Gitlab::Cache::Metrics.new( + caller_id: Gitlab::ApplicationContext.current_context_attribute(:caller_id), + cache_identifier: "#{self.class}#fetch", + feature_category: :source_code_management, + backing_resource: :cpu + ) + end end end diff --git a/doc/administration/monitoring/prometheus/gitlab_metrics.md b/doc/administration/monitoring/prometheus/gitlab_metrics.md index ba33bcc47f5f9fa2e602722086fc50f795ee552c..bbbfa90e0d403fb6bcc1aaa6263efdab52e40823 100644 --- a/doc/administration/monitoring/prometheus/gitlab_metrics.md +++ b/doc/administration/monitoring/prometheus/gitlab_metrics.md @@ -148,6 +148,8 @@ The following metrics are available: | `gitlab_ci_build_trace_errors_total` | Counter | 14.4 | Total amount of different error types on a build trace | `error_reason` | | `gitlab_presentable_object_cacheless_render_real_duration_seconds` | Histogram | 15.3 | Duration of real time spent caching and representing specific web request objects | `controller`, `action` | | `cached_object_operations_total` | Counter | 15.3 | Total number of objects cached for specific web requests | `controller`, `action` | +| `redis_hit_miss_operations_total` | Counter | 15.6 | Total number of Redis cache hits and misses | `cache_hit`, `caller_id`, `cache_identifier`, `feature_category`, `backing_resource` | +| `redis_cache_generation_duration_seconds` | Histogram | 15.6 | Time to generate Redis cache | `cache_hit`, `caller_id`, `cache_identifier`, `feature_category`, `backing_resource` | ## Metrics controlled by a feature flag diff --git a/lib/gitlab/cache/metrics.rb b/lib/gitlab/cache/metrics.rb new file mode 100644 index 0000000000000000000000000000000000000000..0143052beb1a63310303666b717b223db2417aec --- /dev/null +++ b/lib/gitlab/cache/metrics.rb @@ -0,0 +1,87 @@ +# frozen_string_literal: true + +# Instrumentation for cache efficiency metrics +module Gitlab + module Cache + class Metrics + DEFAULT_BUCKETS = [0, 1, 5].freeze + VALID_BACKING_RESOURCES = [:cpu, :database, :gitaly, :memory, :unknown].freeze + DEFAULT_BACKING_RESOURCE = :unknown + + def initialize( + caller_id:, + cache_identifier:, + feature_category: ::Gitlab::FeatureCategories::FEATURE_CATEGORY_DEFAULT, + backing_resource: DEFAULT_BACKING_RESOURCE + ) + @caller_id = caller_id + @cache_identifier = cache_identifier + @feature_category = Gitlab::FeatureCategories.default.get!(feature_category) + @backing_resource = fetch_backing_resource!(backing_resource) + end + + # Increase cache hit counter + # + def increment_cache_hit + counter.increment(labels.merge(cache_hit: true)) + end + + # Increase cache miss counter + # + def increment_cache_miss + counter.increment(labels.merge(cache_hit: false)) + end + + # Measure the duration of cacheable action + # + # @example + # observe_cache_generation do + # cacheable_action + # end + # + def observe_cache_generation(&block) + real_start = Gitlab::Metrics::System.monotonic_time + + value = yield + + histogram.observe({}, Gitlab::Metrics::System.monotonic_time - real_start) + + value + end + + private + + attr_reader :caller_id, :cache_identifier, :feature_category, :backing_resource + + def counter + @counter ||= Gitlab::Metrics.counter(:redis_hit_miss_operations_total, "Hit/miss Redis cache counter") + end + + def histogram + @histogram ||= Gitlab::Metrics.histogram( + :redis_cache_generation_duration_seconds, + 'Duration of Redis cache generation', + labels, + DEFAULT_BUCKETS + ) + end + + def labels + @labels ||= { + caller_id: caller_id, + cache_identifier: cache_identifier, + feature_category: feature_category, + backing_resource: backing_resource + } + end + + def fetch_backing_resource!(resource) + return resource if VALID_BACKING_RESOURCES.include?(resource) + + raise "Unknown backing resource: #{resource}" if Gitlab.dev_or_test_env? + + DEFAULT_BACKING_RESOURCE + end + end + end +end diff --git a/lib/gitlab/feature_categories.rb b/lib/gitlab/feature_categories.rb index d06f3b14fed59cbe59c12ae50667ffd94f86db4c..17586a94d7eb8f91360c7c019a7b2d02f7dde85d 100644 --- a/lib/gitlab/feature_categories.rb +++ b/lib/gitlab/feature_categories.rb @@ -31,6 +31,14 @@ def from_request(request) category end + def get!(feature_category) + return feature_category if valid?(feature_category) + + raise "Unknown feature category: #{feature_category}" if Gitlab.dev_or_test_env? + + FEATURE_CATEGORY_DEFAULT + end + def valid?(category) categories.include?(category.to_s) end diff --git a/spec/lib/gitlab/cache/metrics_spec.rb b/spec/lib/gitlab/cache/metrics_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..d8103837708ab84ee3cc2ce090bce631a596c34e --- /dev/null +++ b/spec/lib/gitlab/cache/metrics_spec.rb @@ -0,0 +1,118 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::Cache::Metrics do + subject(:metrics) do + described_class.new( + caller_id: caller_id, + cache_identifier: cache_identifier, + feature_category: feature_category, + backing_resource: backing_resource + ) + end + + let(:caller_id) { 'caller-id' } + let(:cache_identifier) { 'ApplicationController#show' } + let(:feature_category) { :source_code_management } + let(:backing_resource) { :unknown } + + let(:counter_mock) { instance_double(Prometheus::Client::Counter) } + + before do + allow(Gitlab::Metrics).to receive(:counter) + .with( + :redis_hit_miss_operations_total, + 'Hit/miss Redis cache counter' + ).and_return(counter_mock) + end + + describe '#initialize' do + context 'when backing resource is not supported' do + let(:backing_resource) { 'foo' } + + it { expect { metrics }.to raise_error(RuntimeError) } + + context 'when on production' do + before do + allow(Gitlab).to receive(:dev_or_test_env?).and_return(false) + end + + it 'does not raise an exception' do + expect { metrics }.not_to raise_error + end + end + end + end + + describe '#increment_cache_hit' do + subject { metrics.increment_cache_hit } + + it 'increments number of hits' do + expect(counter_mock) + .to receive(:increment) + .with( + { + caller_id: caller_id, + cache_identifier: cache_identifier, + feature_category: feature_category, + backing_resource: backing_resource, + cache_hit: true + } + ).once + + subject + end + end + + describe '#increment_cache_miss' do + subject { metrics.increment_cache_miss } + + it 'increments number of misses' do + expect(counter_mock) + .to receive(:increment) + .with( + { + caller_id: caller_id, + cache_identifier: cache_identifier, + feature_category: feature_category, + backing_resource: backing_resource, + cache_hit: false + } + ).once + + subject + end + end + + describe '#observe_cache_generation' do + subject do + metrics.observe_cache_generation { action } + end + + let(:action) { 'action' } + let(:histogram_mock) { instance_double(Prometheus::Client::Histogram) } + + before do + allow(Gitlab::Metrics::System).to receive(:monotonic_time).and_return(100.0, 500.0) + end + + it 'updates histogram metric' do + expect(Gitlab::Metrics).to receive(:histogram).with( + :redis_cache_generation_duration_seconds, + 'Duration of Redis cache generation', + { + caller_id: caller_id, + cache_identifier: cache_identifier, + feature_category: feature_category, + backing_resource: backing_resource + }, + [0, 1, 5] + ).and_return(histogram_mock) + + expect(histogram_mock).to receive(:observe).with({}, 400.0) + + is_expected.to eq(action) + end + end +end diff --git a/spec/lib/gitlab/feature_categories_spec.rb b/spec/lib/gitlab/feature_categories_spec.rb index 477da900d0a33008a9d2621a1261b89518d81f0b..a35166a4499deb4194b6d9d7fd7231a1ee11b5fe 100644 --- a/spec/lib/gitlab/feature_categories_spec.rb +++ b/spec/lib/gitlab/feature_categories_spec.rb @@ -5,7 +5,7 @@ RSpec.describe Gitlab::FeatureCategories do let(:fake_categories) { %w(foo bar) } - subject { described_class.new(fake_categories) } + subject(:feature_categories) { described_class.new(fake_categories) } describe "#valid?" do it "returns true if category is known", :aggregate_failures do @@ -14,6 +14,28 @@ end end + describe '#get!' do + subject { feature_categories.get!(category) } + + let(:category) { 'foo' } + + it { is_expected.to eq('foo') } + + context 'when category does not exist' do + let(:category) { 'zzz' } + + it { expect { subject }.to raise_error(RuntimeError) } + + context 'when on production' do + before do + allow(Gitlab).to receive(:dev_or_test_env?).and_return(false) + end + + it { is_expected.to eq('unknown') } + end + end + end + describe "#from_request" do let(:request_env) { {} } let(:verified) { true } diff --git a/spec/services/protected_branches/cache_service_spec.rb b/spec/services/protected_branches/cache_service_spec.rb index 00d1e8b5457ba4562f1bb74d701b1588e57465e7..d7a3258160bc1ec65f7cd65a73503a596e945fb9 100644 --- a/spec/services/protected_branches/cache_service_spec.rb +++ b/spec/services/protected_branches/cache_service_spec.rb @@ -111,5 +111,16 @@ expect(service.fetch('not-found') { true }).to eq(true) end end + + describe 'metrics' do + it 'records hit ratio metrics' do + expect_next_instance_of(Gitlab::Cache::Metrics) do |metrics| + expect(metrics).to receive(:increment_cache_miss).once + expect(metrics).to receive(:increment_cache_hit).exactly(4).times + end + + 5.times { service.fetch('main') { true } } + end + end end # rubocop:enable Style/RedundantFetchBlock