From 02316988301ab7f01265de9a570690f23a47632c Mon Sep 17 00:00:00 2001 From: hustewart Date: Wed, 6 Aug 2025 11:22:22 -0400 Subject: [PATCH] Cache git access on download commands We want to roll out caching access checks by actor, so that we can roll it out gradually. See https://docs.gitlab.com/development/feature_flags/#feature-actors Actors in this file can be a different entities so we are not guaranteed a consistent concept like "user" or "project" that we can use as a feature actor, because those things might not exist. The simplest way we can ensure a consistent rollout is to make a specific actor's existence a pre-condition to checking the Feature Flag's status. For example, we can check if a project exists and if the FeatureFlag is enabled for that project. This means when we have the feature flag rolled out 100%, it will still only apply when projects are present. To fully roll out the caching mechanism incrementally, we'll need to add a feature flag for each type of actor. We will need a different strategy for rolling out to ci. --- .../beta/cache_git_project_access_check.yml | 10 + lib/gitlab/git_access.rb | 115 +++++++-- spec/lib/gitlab/git_access_spec.rb | 218 ++++++++++++++++++ 3 files changed, 322 insertions(+), 21 deletions(-) create mode 100644 config/feature_flags/beta/cache_git_project_access_check.yml diff --git a/config/feature_flags/beta/cache_git_project_access_check.yml b/config/feature_flags/beta/cache_git_project_access_check.yml new file mode 100644 index 00000000000000..36ad342ee20e8a --- /dev/null +++ b/config/feature_flags/beta/cache_git_project_access_check.yml @@ -0,0 +1,10 @@ +--- +name: cache_git_project_access_check +description: +feature_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/553981 +introduced_by_url: +rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/560137 +milestone: '18.3' +group: group::source code +type: beta +default_enabled: false diff --git a/lib/gitlab/git_access.rb b/lib/gitlab/git_access.rb index 977deb56ccffc4..5e0d6de4ade75d 100644 --- a/lib/gitlab/git_access.rb +++ b/lib/gitlab/git_access.rb @@ -41,6 +41,7 @@ class GitAccess Timing information for debugging purposes: MESSAGE + CACHEABLE_COMMANDS = %w[git-upload-pack].freeze DOWNLOAD_COMMANDS = %w[git-upload-pack git-upload-archive].freeze PUSH_COMMANDS = %w[git-receive-pack].freeze ALL_COMMANDS = DOWNLOAD_COMMANDS + PUSH_COMMANDS @@ -76,28 +77,14 @@ def check(cmd, changes) @changes = changes @cmd = cmd - check_protocol! - check_valid_actor! - check_active_user! - check_authentication_abilities! - check_command_disabled! - check_command_existence! - - check_custom_ssh_action! - - check_db_accessibility! - check_container! - check_repository_existence! - - case cmd - when *DOWNLOAD_COMMANDS - check_download_access! - when *PUSH_COMMANDS - check_push_access! + # Use cache for read operations + if should_cache_access_check? + Rails.cache.fetch(access_check_cache_key, expires_in: 10.seconds) do + perform_full_access_check + end + else + perform_full_access_check end - check_additional_conditions! - - success_result end def logger @@ -142,6 +129,92 @@ def protocol_allowed? private + # We want to roll out caching access checks by actor, so that we can roll it out gradually. See + # https://docs.gitlab.com/development/feature_flags/#feature-actors + # + # Actors in this file can be a different entities so we are not guaranteed a consistent concept like "user" or + # "project" that we can use as a feature actor, because those things might not exist. + # + # The simplest way we can ensure a consistent rollout is to make a specific actor's existence a pre-condition to checking the + # Feature Flag's status. For example, we can check if a project exists and if the FeatureFlag is enabled for that + # project. + # + # This means when we have the feature flag rolled out 100%, it will still only apply when projects are present. + # + # To fully roll out the caching mechanism incrementally, we'll need to add a feature flag for each type of actor. + # + # We will need a different strategy for rolling out to ci. + def should_cache_access_check? + project? && + CACHEABLE_COMMANDS.include?(@cmd) && + Feature.enabled?(:cache_git_project_access_check, project) + end + + def perform_full_access_check + check_protocol! + check_valid_actor! + check_active_user! + check_authentication_abilities! + check_command_disabled! + check_command_existence! + check_custom_ssh_action! + check_db_accessibility! + check_container! + check_repository_existence! + + case @cmd + when *DOWNLOAD_COMMANDS + check_download_access! + when *PUSH_COMMANDS + check_push_access! + end + + check_additional_conditions! + success_result + end + + def access_check_cache_key + key_components = [ + 'git_access', + cmd, + actor_cache_key, + container_cache_key, + auth_abilities_hash, + protocol + ].compact + + key = key_components.join(':') + # Ensure key doesn't exceed Redis limits + key.length > 250 ? OpenSSL::Digest::SHA256.hexdigest(key) : key + end + + def actor_cache_key + case actor + when User + "u:#{actor.id}:#{actor.updated_at.to_i}" + when Key + "k:#{actor.id}:#{actor.updated_at.to_i}" + when DeployKey + "dk:#{actor.id}:#{actor.updated_at.to_i}" + when :ci + 'ci' + else + 'anon' + end + end + + def container_cache_key + return 'nil' unless container + + # Use shorter cache key for container + "#{container.class.name}:#{container.id}:#{container.updated_at.to_i}" + end + + def auth_abilities_hash + # Convert abilities array to short hash to reduce key size + OpenSSL::Digest::SHA256.hexdigest(authentication_abilities.sort.join(','))[0..7] + end + # when accessing via the CI_JOB_TOKEN def build_can_download_code? authentication_abilities.include?(:build_download_code) && user_access.can_do_action?(:build_download_code) diff --git a/spec/lib/gitlab/git_access_spec.rb b/spec/lib/gitlab/git_access_spec.rb index 1327473bd64eed..07dacf54f26fbc 100644 --- a/spec/lib/gitlab/git_access_spec.rb +++ b/spec/lib/gitlab/git_access_spec.rb @@ -33,6 +33,224 @@ def download_ability end end + describe 'access check caching', :use_clean_rails_memory_store_caching, :use_clean_rails_redis_caching do + before do + project.add_maintainer(user) + end + + context 'when cache_git_project_access_check feature flag is disabled' do + before do + stub_feature_flags(cache_git_project_access_check: false) + end + + it 'does not use cache for download operations' do + expect(Rails.cache).not_to receive(:fetch) + + access.check('git-upload-pack', changes) + end + + it 'does not use cache for push operations' do + expect(Rails.cache).not_to receive(:fetch) + + access.check('git-receive-pack', changes) + end + end + + context 'when cache_git_project_access_check feature flag is enabled' do + before do + stub_feature_flags(cache_git_project_access_check: true) + end + + context 'for download operations (git-upload-pack)' do + let(:cmd) { 'git-upload-pack' } + + it 'caches successful access check results' do + expect(Rails.cache).to receive(:fetch).with( + anything, + expires_in: 10.seconds + ).and_call_original + + result = access.check(cmd, changes) + expect(result).to be_a(Gitlab::GitAccessResult::Success) + end + + it 'uses cached result on subsequent calls' do + # First call - should populate cache + result1 = access.check(cmd, changes) + expect(result1).to be_a(Gitlab::GitAccessResult::Success) + + # Mock the cache to verify it's being used + expect(Rails.cache).to receive(:fetch).and_call_original + + # Second call - should use cached result + result2 = access.check(cmd, changes) + expect(result2).to be_a(Gitlab::GitAccessResult::Success) + end + + it 'generates consistent cache keys for same parameters' do + access1 = access_class.new(actor, project, protocol, + authentication_abilities: authentication_abilities, + repository_path: repository_path) + + access2 = access_class.new(actor, project, protocol, + authentication_abilities: authentication_abilities, + repository_path: repository_path) + + # Set the command for both instances + access1.instance_variable_set(:@cmd, cmd) + access2.instance_variable_set(:@cmd, cmd) + + key1 = access1.send(:access_check_cache_key) + key2 = access2.send(:access_check_cache_key) + + expect(key1).to eq(key2) + end + + it 'generates different cache keys for different users' do + other_user = create(:user) + project.add_maintainer(other_user) + + access1 = access_class.new(user, project, protocol, + authentication_abilities: authentication_abilities, + repository_path: repository_path) + access1.instance_variable_set(:@cmd, cmd) + + access2 = access_class.new(other_user, project, protocol, + authentication_abilities: authentication_abilities, + repository_path: repository_path) + access2.instance_variable_set(:@cmd, cmd) + + key1 = access1.send(:access_check_cache_key) + key2 = access2.send(:access_check_cache_key) + + expect(key1).not_to eq(key2) + end + + it 'generates different cache keys for different projects' do + other_project = create(:project, :repository) + other_project.add_maintainer(user) + + access1 = access_class.new(user, project, protocol, + authentication_abilities: authentication_abilities, + repository_path: repository_path) + access1.instance_variable_set(:@cmd, cmd) + + access2 = access_class.new(user, other_project, protocol, + authentication_abilities: authentication_abilities, + repository_path: "#{other_project.full_path}.git") + access2.instance_variable_set(:@cmd, cmd) + + key1 = access1.send(:access_check_cache_key) + key2 = access2.send(:access_check_cache_key) + + expect(key1).not_to eq(key2) + end + + it 'does not cache failed access checks' do + # Use a completely different user that has no relationship to the project + other_user = create(:user) + + # Create access object with the unauthorized user + unauthorized_access = access_class.new(other_user, project, protocol, + authentication_abilities: authentication_abilities, + repository_path: repository_path) + + # Set the command + unauthorized_access.instance_variable_set(:@cmd, cmd) + + # Make sure project is private and user has no access + project.update!(visibility_level: Gitlab::VisibilityLevel::PRIVATE) + + # Verify the user has no project members record + expect(project.project_members.where(user: other_user)).to be_empty + + # Failed checks should raise an error consistently + expect { unauthorized_access.check(cmd, changes) }.to raise_error(Gitlab::GitAccess::NotFoundError) + expect { unauthorized_access.check(cmd, changes) }.to raise_error(Gitlab::GitAccess::NotFoundError) + end + end + + context 'for push operations (git-receive-pack)' do + let(:cmd) { 'git-receive-pack' } + + it 'does not cache push operations' do + expect(Rails.cache).not_to receive(:fetch) + + access.check(cmd, changes) + end + end + + context 'cache key optimization' do + let(:cmd) { 'git-upload-pack' } + + it 'keeps cache keys under 250 characters' do + # Create a project with a very long path + long_path = 'a' * 200 + long_project = create(:project, :repository, path: long_path) + long_project.add_maintainer(user) + + long_access = access_class.new(user, long_project, protocol, + authentication_abilities: authentication_abilities, + repository_path: "#{long_project.full_path}.git") + long_access.instance_variable_set(:@cmd, cmd) + + cache_key = long_access.send(:access_check_cache_key) + expect(cache_key.length).to be <= 250 + end + + it 'uses short actor cache keys' do + access.instance_variable_set(:@cmd, cmd) + cache_key = access.send(:actor_cache_key) + + expect(cache_key).to match(/^u:\d+:\d+$/) # Format: u:user_id:updated_at + expect(cache_key.length).to be < 50 + end + + it 'uses short container cache keys' do + access.instance_variable_set(:@cmd, cmd) + cache_key = access.send(:container_cache_key) + + expect(cache_key).to match(/^Project:\d+:\d+$/) # Format: Project:id:updated_at + expect(cache_key.length).to be < 50 + end + + it 'hashes authentication abilities to keep keys short' do + access.instance_variable_set(:@cmd, cmd) + abilities_hash = access.send(:auth_abilities_hash) + + expect(abilities_hash).to be_a(String) + expect(abilities_hash.length).to eq(8) # SHA1 first 8 chars + end + end + + context 'integration with git HTTP controller' do + let(:protocol) { 'http' } + let(:authentication_abilities) { [:download_code] } + + # Doing this because in this test file access is a method that always returns a new instance + let(:first_git_http_access) { access } + let(:second_git_http_access) { access } + + it 'caches access checks between info_refs and git_upload_pack calls' do + # First call - populates cache + expect(first_git_http_access).to receive(:perform_full_access_check).once.and_call_original + result1 = first_git_http_access.check('git-upload-pack', changes) + expect(result1).to be_a(Gitlab::GitAccessResult::Success) + + # Verify cache key exists + cache_key = first_git_http_access.send(:access_check_cache_key) + + expect(Rails.cache.exist?(cache_key)).to be true + + # Second call - uses cached result + expect(second_git_http_access).not_to receive(:perform_full_access_check).and_call_original + result2 = second_git_http_access.check('git-upload-pack', changes) + expect(result2).to be_a(Gitlab::GitAccessResult::Success) + end + end + end + end + describe '#check with single protocols allowed' do def disable_protocol(protocol) allow(Gitlab::ProtocolAccess).to receive(:allowed?).with(protocol, project: project).and_return(false) -- GitLab