diff --git a/ee/lib/ee/gitlab/checks/push_rules/secrets_check.rb b/ee/lib/ee/gitlab/checks/push_rules/secrets_check.rb index c0fa9d868dbe099242171b5f8bfdbab280731de8..1b3df04d4a2c90105cef194f8b7d702ea1c1a796 100644 --- a/ee/lib/ee/gitlab/checks/push_rules/secrets_check.rb +++ b/ee/lib/ee/gitlab/checks/push_rules/secrets_check.rb @@ -5,6 +5,10 @@ module Gitlab module Checks module PushRules class SecretsCheck < ::Gitlab::Checks::BaseBulkChecker + BLOB_BYTES_LIMIT = 1.megabyte # Limit is 1MiB to start with. + + LOG_MESSAGE = "Detecting secrets..." + def validate! # Return early and not perform the check if: # 1. unless application setting is enabled (regardless of whether it's a gitlab dedicated instance or not) @@ -13,10 +17,74 @@ def validate! # 4. license is not ultimate return unless ::Gitlab::CurrentSettings.pre_receive_secret_detection_enabled - return if ::Gitlab::CurrentSettings.gitlab_dedicated_instance != true && - ::Feature.disabled?(:pre_receive_secret_detection_push_check, push_rule.project) + return unless ::Gitlab::CurrentSettings.gitlab_dedicated_instance || + ::Feature.enabled?(:pre_receive_secret_detection_push_check, project) + + return unless push_rule && project.licensed_feature_available?(:pre_receive_secret_detection) + + logger.log_timed(LOG_MESSAGE) do + # List all blobs via `ListAllBlobs()` based on the existence of a + # quarantine directory. If no directory exists, we use `ListBlobs()` instead. + blobs = + if ignore_alternate_directories? + all_blobs = project.repository.list_all_blobs( + bytes_limit: BLOB_BYTES_LIMIT + 1, + dynamic_timeout: logger.time_left, + ignore_alternate_object_directories: true + ).to_a + + # A quarantine directory would generally only contain objects which are actually new but + # this is unfortunately not guaranteed by Git, so it might be that a push has objects which + # already exist in the repository. To fix this, we have to filter the blobs that already exist. + # + # This is not a silver bullet though, a limitation of this is: a secret could possibly go into + # a commit in a new branch (`refs/heads/secret`) that gets deleted later on, so the commit becomes + # unreachable but it is still present in the repository, if the same secret is pushed in the same file + # or even in a new file, it would be ignored because we filter the blob out because it still "exists". + # + # See https://gitlab.com/gitlab-org/gitlab/-/merge_requests/136896#note_1680680116 for more details. + filter_existing(all_blobs) + else + revisions = changes_access + .changes + .pluck(:newrev) # rubocop:disable CodeReuse/ActiveRecord -- Array#pluck + .reject { |revision| ::Gitlab::Git.blank_ref?(revision) } + .compact + + # We add `--not --all --not revisions` to ensure we only get new blobs. + project.repository.list_blobs( + ['--not', '--all', '--not'] + revisions, + bytes_limit: BLOB_BYTES_LIMIT + 1, + dynamic_timeout: logger.time_left + ).to_a + end + + # Filter out larger than BLOB_BYTES_LIMIT blobs and binary blobs. + blobs.reject! { |blob| blob.size > BLOB_BYTES_LIMIT || blob.binary } + end + end + + private + + def ignore_alternate_directories? + git_env = ::Gitlab::Git::HookEnv.all(project.repository.gl_repository) + git_env['GIT_OBJECT_DIRECTORY_RELATIVE'].present? + end + + def filter_existing(blobs) + # We check for object existence in the main repository, but the + # object directory points to the object quarantine. This can be fixed + # by unsetting it, which will cause us to use the normal repository as + # indicated by its relative path again. + gitaly_repo = project.repository.gitaly_repository.dup.tap { |repo| repo.git_object_directory = "" } + + map_blob_id_to_existence = project.repository.gitaly_commit_client.object_existence_map( + blobs.map(&:id), + gitaly_repo: gitaly_repo + ) - return unless push_rule && push_rule.project.licensed_feature_available?(:pre_receive_secret_detection) + # Remove blobs that already exist. + blobs.reject { |blob| map_blob_id_to_existence[blob.id] } end end end diff --git a/ee/spec/lib/ee/gitlab/checks/push_rules/secrets_check_spec.rb b/ee/spec/lib/ee/gitlab/checks/push_rules/secrets_check_spec.rb index 585cb7c0d553adf05cde3002e10ed2e20a51987f..6701451560a0f608babc6f4393b964240074e026 100644 --- a/ee/spec/lib/ee/gitlab/checks/push_rules/secrets_check_spec.rb +++ b/ee/spec/lib/ee/gitlab/checks/push_rules/secrets_check_spec.rb @@ -3,10 +3,67 @@ require 'spec_helper' RSpec.describe EE::Gitlab::Checks::PushRules::SecretsCheck, feature_category: :secret_detection do - include_context 'push rules checks context' - + let_it_be(:user) { create(:user) } let_it_be(:push_rule) { create(:push_rule) } + # Project is created with a custom repository, so + # we create a README to have a blob committed already. + let_it_be(:project) do + create( + :project, + :custom_repo, + files: { 'README' => 'Documentation goes here.' }, + push_rule: push_rule + ) + end + + let_it_be(:repository) { project.repository } + + # Set revisions as follows: + # 1. oldrev to commit created with the project above. + # 2. newrev to commit created in before_all below. + let(:oldrev) { '5e08a9ef8e7e257cbc727d6786bbc33ad7662625' } + let(:newrev) { 'fe29d93da4843da433e62711ace82db601eb4f8f' } + let(:changes) do + [ + { + oldrev: oldrev, + newrev: newrev, + ref: 'refs/heads/master' + } + ] + end + + let(:protocol) { 'ssh' } + let(:timeout) { Gitlab::GitAccess::INTERNAL_TIMEOUT } + let(:logger) { Gitlab::Checks::TimedLogger.new(timeout: timeout) } + let(:user_access) { Gitlab::UserAccess.new(user, container: project) } + let(:changes_access) do + Gitlab::Checks::ChangesAccess.new( + changes, + project: project, + user_access: user_access, + protocol: protocol, + logger: logger + ) + end + + subject { described_class.new(changes_access) } + + before_all do + project.add_developer(user) + + # Create a new commit to be used as the new revision in changes passed to secrets check. + repository.commit_files( + user, + branch_name: 'add-dotenv-file', + message: 'Add .env file', + actions: [ + { action: :create, file_path: '.env', content: "SECRET=glpat-JUST20LETTERSANDNUMB" } # gitleaks:allow + ] + ) + end + describe '#validate!' do it_behaves_like 'check ignored when push rule unlicensed' @@ -43,10 +100,7 @@ stub_licensed_features(pre_receive_secret_detection: true) end - it 'returns without raising errors' do - # Since the check does nothing at the moment, it just execute without raising errors - expect { subject.validate! }.not_to raise_error - end + it_behaves_like 'list and filter blobs' end end @@ -66,10 +120,7 @@ stub_licensed_features(pre_receive_secret_detection: true) end - it 'returns without raising errors' do - # Since the check does nothing at the moment, it just execute without raising errors - expect { subject.validate! }.not_to raise_error - end + it_behaves_like 'list and filter blobs' end context 'when feature flag is disabled' do diff --git a/ee/spec/support/shared_examples/lib/gitlab/secrets_check_shared_examples.rb b/ee/spec/support/shared_examples/lib/gitlab/secrets_check_shared_examples.rb new file mode 100644 index 0000000000000000000000000000000000000000..d0fa66211d6f874ed9b57833f81b958fd5cce59b --- /dev/null +++ b/ee/spec/support/shared_examples/lib/gitlab/secrets_check_shared_examples.rb @@ -0,0 +1,88 @@ +# frozen_string_literal: true + +RSpec.shared_examples 'list and filter blobs' do + # We cannot really get the same Gitlab::Git::Blob objects even if we call `list_all_blobs` + # or `list_blobs` directly in the spec (which is perhaps also not a good idea) as the object + # ids will always be different, so we expect the blobs to be an array with two objects of that kind. + let(:old_blob) do + have_attributes( + class: Gitlab::Git::Blob, + id: oldrev, + size: 24 + ) + end + + let(:new_blob) do + have_attributes( + class: Gitlab::Git::Blob, + id: newrev, + size: 33 + ) + end + + context 'when quarantine directory exists' do + let(:git_env) { { 'GIT_OBJECT_DIRECTORY_RELATIVE' => 'objects' } } + let(:gitaly_commit_client) { instance_double(Gitlab::GitalyClient::CommitService) } + + before do + allow(Gitlab::Git::HookEnv).to receive(:all).with(repository.gl_repository).and_return(git_env) + + # Since both blobs are committed to the repository, we mock the gitaly commit + # client in such way that only the first is considered to exist in the repository. + allow(repository).to receive(:gitaly_commit_client).and_return(gitaly_commit_client) + allow(gitaly_commit_client).to receive(:object_existence_map).and_return( + { + oldrev.to_s => true, + newrev.to_s => false + } + ) + end + + it 'lists all blobs of a repository' do + expect(repository).to receive(:list_all_blobs) + .with( + bytes_limit: EE::Gitlab::Checks::PushRules::SecretsCheck::BLOB_BYTES_LIMIT + 1, + dynamic_timeout: kind_of(Float), + ignore_alternate_object_directories: true + ) + .once + .and_return( + [old_blob, new_blob] + ) + .and_call_original + + expect(subject.validate!).to be_truthy + end + + it 'filters existing blobs out' do + expect_next_instance_of(described_class) do |instance| + # old blob is expected to be filtered out + expect(instance).to receive(:filter_existing) + .with( + [old_blob, new_blob] + ) + .once + .and_return(new_blob) + .and_call_original + end + + expect(subject.validate!).to be_truthy + end + end + + context 'when quarantine directory does not exist' do + it 'list new blobs' do + expect(repository).to receive(:list_blobs) + .with( + ['--not', '--all', '--not'] + changes.pluck(:newrev), + bytes_limit: EE::Gitlab::Checks::PushRules::SecretsCheck::BLOB_BYTES_LIMIT + 1, + dynamic_timeout: kind_of(Float) + ) + .once + .and_return(new_blob) + .and_call_original + + expect(subject.validate!).to be_truthy + end + end +end diff --git a/lib/gitlab/git/repository.rb b/lib/gitlab/git/repository.rb index 848705028ca9fc72dccff0ea953b4e18f9438d63..312e05b5f545cbb4cbe47c0df64b28a41c5a5d21 100644 --- a/lib/gitlab/git/repository.rb +++ b/lib/gitlab/git/repository.rb @@ -46,7 +46,7 @@ def initialize(error_code) attr_reader :storage, :gl_repository, :gl_project_path, :container - delegate :list_all_blobs, to: :gitaly_blob_client + delegate :list_all_blobs, :list_blobs, to: :gitaly_blob_client # This remote name has to be stable for all types of repositories that # can join an object pool. If it's structure ever changes, a migration