From 7243604cbb1a1a2d4e594bd4db5e8122f449aa11 Mon Sep 17 00:00:00 2001 From: Ahmed Hemdan Date: Tue, 14 Nov 2023 20:05:06 +0100 Subject: [PATCH 1/5] List and filter blobs in secrets check This commit updates the secrets push check to list and filter blobs: * To be below a certain size threshold. * If they have already been committed to the repository. * If they are binary. --- .../gitlab/checks/push_rules/secrets_check.rb | 65 +++++++++++++- .../checks/push_rules/secrets_check_spec.rb | 71 ++++++++++++--- .../gitlab/secrets_check_shared_examples.rb | 88 +++++++++++++++++++ lib/gitlab/git/repository.rb | 2 +- 4 files changed, 213 insertions(+), 13 deletions(-) create mode 100644 ee/spec/support/shared_examples/lib/gitlab/secrets_check_shared_examples.rb diff --git a/ee/lib/ee/gitlab/checks/push_rules/secrets_check.rb b/ee/lib/ee/gitlab/checks/push_rules/secrets_check.rb index c0fa9d868dbe09..ba359eabbe6ae9 100644 --- a/ee/lib/ee/gitlab/checks/push_rules/secrets_check.rb +++ b/ee/lib/ee/gitlab/checks/push_rules/secrets_check.rb @@ -5,6 +5,10 @@ module Gitlab module Checks module PushRules class SecretsCheck < ::Gitlab::Checks::BaseBulkChecker + BLOB_BYTES_LIMIT = 1024 # Limit is 1MB to start with. + + LOG_MESSAGE = "Detecting secrets..." + def validate! # Return early and not perform the check if: # 1. unless application setting is enabled (regardless of whether it's a gitlab dedicated instance or not) @@ -14,9 +18,66 @@ def validate! return unless ::Gitlab::CurrentSettings.pre_receive_secret_detection_enabled return if ::Gitlab::CurrentSettings.gitlab_dedicated_instance != true && - ::Feature.disabled?(:pre_receive_secret_detection_push_check, push_rule.project) + ::Feature.disabled?(:pre_receive_secret_detection_push_check, project) + + return unless push_rule && project.licensed_feature_available?(:pre_receive_secret_detection) + + logger.log_timed(LOG_MESSAGE) do + # List all blobs via `ListAllBlobs()` based on the existence of a + # quarantine directory. If no directory exist, we use `ListBlobs()` instead. + blobs = + if ignore_alternate_directories? + all_blobs = project.repository.list_all_blobs( + bytes_limit: BLOB_BYTES_LIMIT, + dynamic_timeout: logger.time_left, + ignore_alternate_object_directories: true + ).to_a + + # A quarantine directory would generally only contain objects which are actually new but + # this is unfortunately not guaranteed by Git, so it might be that a push has objects which + # already exist in the repository. To fix this, we have to filter out the blobs that don't exist. + filter_existing(all_blobs) + else + revisions = changes_access + .changes + .pluck(:newrev) # rubocop:disable CodeReuse/ActiveRecord -- Array#pluck + .reject { |revision| revision.blank? || revision == ::Gitlab::Git::BLANK_SHA } + .compact + + # We add `--not --all --not revisions` to ensure we only get new blobs. + project.repository.list_blobs( + ['--not', '--all', '--not'] + revisions, + bytes_limit: BLOB_BYTES_LIMIT, + dynamic_timeout: logger.time_left + ).to_a + end + + # Filter out binary blobs. + blobs.reject(&:binary) + end + end + + private + + def ignore_alternate_directories? + git_env = ::Gitlab::Git::HookEnv.all(project.repository.gl_repository) + git_env['GIT_OBJECT_DIRECTORY_RELATIVE'].present? + end + + def filter_existing(blobs) + # We check for object existence in the main repository, but the + # object directory points to the object quarantine. This can be fixed + # by unsetting it, which will cause us to use the normal repository as + # indicated by its relative path again. + gitaly_repo = project.repository.gitaly_repository.dup.tap { |repo| repo.git_object_directory = "" } + + map_blob_id_to_existence = project.repository.gitaly_commit_client.object_existence_map( + blobs.map(&:id), + gitaly_repo: gitaly_repo + ) - return unless push_rule && push_rule.project.licensed_feature_available?(:pre_receive_secret_detection) + # Remove blobs that already exist. + blobs.reject { |blob| map_blob_id_to_existence[blob.id] == true } end end end diff --git a/ee/spec/lib/ee/gitlab/checks/push_rules/secrets_check_spec.rb b/ee/spec/lib/ee/gitlab/checks/push_rules/secrets_check_spec.rb index 585cb7c0d553ad..6701451560a0f6 100644 --- a/ee/spec/lib/ee/gitlab/checks/push_rules/secrets_check_spec.rb +++ b/ee/spec/lib/ee/gitlab/checks/push_rules/secrets_check_spec.rb @@ -3,10 +3,67 @@ require 'spec_helper' RSpec.describe EE::Gitlab::Checks::PushRules::SecretsCheck, feature_category: :secret_detection do - include_context 'push rules checks context' - + let_it_be(:user) { create(:user) } let_it_be(:push_rule) { create(:push_rule) } + # Project is created with a custom repository, so + # we create a README to have a blob committed already. + let_it_be(:project) do + create( + :project, + :custom_repo, + files: { 'README' => 'Documentation goes here.' }, + push_rule: push_rule + ) + end + + let_it_be(:repository) { project.repository } + + # Set revisions as follows: + # 1. oldrev to commit created with the project above. + # 2. newrev to commit created in before_all below. + let(:oldrev) { '5e08a9ef8e7e257cbc727d6786bbc33ad7662625' } + let(:newrev) { 'fe29d93da4843da433e62711ace82db601eb4f8f' } + let(:changes) do + [ + { + oldrev: oldrev, + newrev: newrev, + ref: 'refs/heads/master' + } + ] + end + + let(:protocol) { 'ssh' } + let(:timeout) { Gitlab::GitAccess::INTERNAL_TIMEOUT } + let(:logger) { Gitlab::Checks::TimedLogger.new(timeout: timeout) } + let(:user_access) { Gitlab::UserAccess.new(user, container: project) } + let(:changes_access) do + Gitlab::Checks::ChangesAccess.new( + changes, + project: project, + user_access: user_access, + protocol: protocol, + logger: logger + ) + end + + subject { described_class.new(changes_access) } + + before_all do + project.add_developer(user) + + # Create a new commit to be used as the new revision in changes passed to secrets check. + repository.commit_files( + user, + branch_name: 'add-dotenv-file', + message: 'Add .env file', + actions: [ + { action: :create, file_path: '.env', content: "SECRET=glpat-JUST20LETTERSANDNUMB" } # gitleaks:allow + ] + ) + end + describe '#validate!' do it_behaves_like 'check ignored when push rule unlicensed' @@ -43,10 +100,7 @@ stub_licensed_features(pre_receive_secret_detection: true) end - it 'returns without raising errors' do - # Since the check does nothing at the moment, it just execute without raising errors - expect { subject.validate! }.not_to raise_error - end + it_behaves_like 'list and filter blobs' end end @@ -66,10 +120,7 @@ stub_licensed_features(pre_receive_secret_detection: true) end - it 'returns without raising errors' do - # Since the check does nothing at the moment, it just execute without raising errors - expect { subject.validate! }.not_to raise_error - end + it_behaves_like 'list and filter blobs' end context 'when feature flag is disabled' do diff --git a/ee/spec/support/shared_examples/lib/gitlab/secrets_check_shared_examples.rb b/ee/spec/support/shared_examples/lib/gitlab/secrets_check_shared_examples.rb new file mode 100644 index 00000000000000..95f674223b24b1 --- /dev/null +++ b/ee/spec/support/shared_examples/lib/gitlab/secrets_check_shared_examples.rb @@ -0,0 +1,88 @@ +# frozen_string_literal: true + +RSpec.shared_examples 'list and filter blobs' do + # We cannot really get the same Gitlab::Git::Blob objects even if we call `list_all_blobs` + # or `list_blobs` directly in the spec (which is perhaps also not a good idea) as the object + # ids will always be different, so we expect the blobs to be an array with two objects of that kind. + let(:old_blob) do + have_attributes( + class: Gitlab::Git::Blob, + id: oldrev, + size: 24 + ) + end + + let(:new_blob) do + have_attributes( + class: Gitlab::Git::Blob, + id: newrev, + size: 33 + ) + end + + context 'when quarantine directory exists' do + let(:git_env) { { 'GIT_OBJECT_DIRECTORY_RELATIVE' => 'objects' } } + let(:gitaly_commit_client) { instance_double(Gitlab::GitalyClient::CommitService) } + + before do + allow(Gitlab::Git::HookEnv).to receive(:all).with(repository.gl_repository).and_return(git_env) + + # Since both blobs are committed to the repository, we mock the gitaly commit + # client in such way that only the first is considered to exist in the repository. + allow(repository).to receive(:gitaly_commit_client).and_return(gitaly_commit_client) + allow(gitaly_commit_client).to receive(:object_existence_map).and_return( + { + oldrev.to_s => true, + newrev.to_s => false + } + ) + end + + it 'lists all blobs of a repository' do + expect(repository).to receive(:list_all_blobs) + .with( + bytes_limit: EE::Gitlab::Checks::PushRules::SecretsCheck::BLOB_BYTES_LIMIT, + dynamic_timeout: kind_of(Float), + ignore_alternate_object_directories: true + ) + .once + .and_return( + [old_blob, new_blob] + ) + .and_call_original + + expect(subject.validate!).to be_truthy + end + + it 'filters existing blobs out' do + expect_next_instance_of(described_class) do |instance| + # old blob is expected to be filtered out + expect(instance).to receive(:filter_existing) + .with( + [old_blob, new_blob] + ) + .once + .and_return(new_blob) + .and_call_original + end + + expect(subject.validate!).to be_truthy + end + end + + context 'when quarantine directory does not exist' do + it 'list new blobs' do + expect(repository).to receive(:list_blobs) + .with( + ['--not', '--all', '--not'] + changes.pluck(:newrev), + bytes_limit: EE::Gitlab::Checks::PushRules::SecretsCheck::BLOB_BYTES_LIMIT, + dynamic_timeout: kind_of(Float) + ) + .once + .and_return(new_blob) + .and_call_original + + expect(subject.validate!).to be_truthy + end + end +end diff --git a/lib/gitlab/git/repository.rb b/lib/gitlab/git/repository.rb index 848705028ca9fc..312e05b5f545cb 100644 --- a/lib/gitlab/git/repository.rb +++ b/lib/gitlab/git/repository.rb @@ -46,7 +46,7 @@ def initialize(error_code) attr_reader :storage, :gl_repository, :gl_project_path, :container - delegate :list_all_blobs, to: :gitaly_blob_client + delegate :list_all_blobs, :list_blobs, to: :gitaly_blob_client # This remote name has to be stable for all types of repositories that # can join an object pool. If it's structure ever changes, a migration -- GitLab From d4157d7ce7499cc6d837e05305967d182b93c014 Mon Sep 17 00:00:00 2001 From: Ahmed Hemdan Date: Mon, 4 Dec 2023 13:13:35 +0100 Subject: [PATCH 2/5] Apply reviewer feedback --- ee/lib/ee/gitlab/checks/push_rules/secrets_check.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ee/lib/ee/gitlab/checks/push_rules/secrets_check.rb b/ee/lib/ee/gitlab/checks/push_rules/secrets_check.rb index ba359eabbe6ae9..369f4d4e86e11f 100644 --- a/ee/lib/ee/gitlab/checks/push_rules/secrets_check.rb +++ b/ee/lib/ee/gitlab/checks/push_rules/secrets_check.rb @@ -5,7 +5,7 @@ module Gitlab module Checks module PushRules class SecretsCheck < ::Gitlab::Checks::BaseBulkChecker - BLOB_BYTES_LIMIT = 1024 # Limit is 1MB to start with. + BLOB_BYTES_LIMIT = 1024 # Limit is 1MiB to start with. LOG_MESSAGE = "Detecting secrets..." @@ -24,7 +24,7 @@ def validate! logger.log_timed(LOG_MESSAGE) do # List all blobs via `ListAllBlobs()` based on the existence of a - # quarantine directory. If no directory exist, we use `ListBlobs()` instead. + # quarantine directory. If no directory exists, we use `ListBlobs()` instead. blobs = if ignore_alternate_directories? all_blobs = project.repository.list_all_blobs( -- GitLab From 3718dfa1919d1c88d032b26237e714df5d373fc0 Mon Sep 17 00:00:00 2001 From: Ahmed Hemdan Date: Wed, 6 Dec 2023 23:14:20 +0100 Subject: [PATCH 3/5] Apply more reviewer feedback --- ee/lib/ee/gitlab/checks/push_rules/secrets_check.rb | 9 ++++----- .../lib/gitlab/secrets_check_shared_examples.rb | 4 ++-- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/ee/lib/ee/gitlab/checks/push_rules/secrets_check.rb b/ee/lib/ee/gitlab/checks/push_rules/secrets_check.rb index 369f4d4e86e11f..94c8d51373a8fd 100644 --- a/ee/lib/ee/gitlab/checks/push_rules/secrets_check.rb +++ b/ee/lib/ee/gitlab/checks/push_rules/secrets_check.rb @@ -28,7 +28,7 @@ def validate! blobs = if ignore_alternate_directories? all_blobs = project.repository.list_all_blobs( - bytes_limit: BLOB_BYTES_LIMIT, + bytes_limit: BLOB_BYTES_LIMIT + 1, dynamic_timeout: logger.time_left, ignore_alternate_object_directories: true ).to_a @@ -41,19 +41,18 @@ def validate! revisions = changes_access .changes .pluck(:newrev) # rubocop:disable CodeReuse/ActiveRecord -- Array#pluck - .reject { |revision| revision.blank? || revision == ::Gitlab::Git::BLANK_SHA } .compact # We add `--not --all --not revisions` to ensure we only get new blobs. project.repository.list_blobs( ['--not', '--all', '--not'] + revisions, - bytes_limit: BLOB_BYTES_LIMIT, + bytes_limit: BLOB_BYTES_LIMIT + 1, dynamic_timeout: logger.time_left ).to_a end - # Filter out binary blobs. - blobs.reject(&:binary) + # Filter out larger than BLOB_BYTES_LIMIT blobs and binary blobs. + blobs.reject! { |blob| blob.size > BLOB_BYTES_LIMIT || blob.binary } end end diff --git a/ee/spec/support/shared_examples/lib/gitlab/secrets_check_shared_examples.rb b/ee/spec/support/shared_examples/lib/gitlab/secrets_check_shared_examples.rb index 95f674223b24b1..d0fa66211d6f87 100644 --- a/ee/spec/support/shared_examples/lib/gitlab/secrets_check_shared_examples.rb +++ b/ee/spec/support/shared_examples/lib/gitlab/secrets_check_shared_examples.rb @@ -41,7 +41,7 @@ it 'lists all blobs of a repository' do expect(repository).to receive(:list_all_blobs) .with( - bytes_limit: EE::Gitlab::Checks::PushRules::SecretsCheck::BLOB_BYTES_LIMIT, + bytes_limit: EE::Gitlab::Checks::PushRules::SecretsCheck::BLOB_BYTES_LIMIT + 1, dynamic_timeout: kind_of(Float), ignore_alternate_object_directories: true ) @@ -75,7 +75,7 @@ expect(repository).to receive(:list_blobs) .with( ['--not', '--all', '--not'] + changes.pluck(:newrev), - bytes_limit: EE::Gitlab::Checks::PushRules::SecretsCheck::BLOB_BYTES_LIMIT, + bytes_limit: EE::Gitlab::Checks::PushRules::SecretsCheck::BLOB_BYTES_LIMIT + 1, dynamic_timeout: kind_of(Float) ) .once -- GitLab From 7f88c355c66501120a298f740d574af8ca04fd1e Mon Sep 17 00:00:00 2001 From: Ahmed Hemdan Date: Fri, 8 Dec 2023 14:19:19 +0100 Subject: [PATCH 4/5] Apply more feedback --- ee/lib/ee/gitlab/checks/push_rules/secrets_check.rb | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/ee/lib/ee/gitlab/checks/push_rules/secrets_check.rb b/ee/lib/ee/gitlab/checks/push_rules/secrets_check.rb index 94c8d51373a8fd..6b17e91706e16e 100644 --- a/ee/lib/ee/gitlab/checks/push_rules/secrets_check.rb +++ b/ee/lib/ee/gitlab/checks/push_rules/secrets_check.rb @@ -35,12 +35,20 @@ def validate! # A quarantine directory would generally only contain objects which are actually new but # this is unfortunately not guaranteed by Git, so it might be that a push has objects which - # already exist in the repository. To fix this, we have to filter out the blobs that don't exist. + # already exist in the repository. To fix this, we have to filter the blobs that already exist. + # + # This is not a silver bullet though, a limitation of this is: a secret could possibly go into + # a commit in a new branch (`refs/heads/secret`) that gets deleted later on, so the commit becomes + # unreachable but it is still present in the repository, if the same secret is pushed in the same file + # or even in a new file, it would be ignored because we filter the blob out because it still "exists". + # + # See https://gitlab.com/gitlab-org/gitlab/-/merge_requests/136896#note_1680680116 for more details. filter_existing(all_blobs) else revisions = changes_access .changes .pluck(:newrev) # rubocop:disable CodeReuse/ActiveRecord -- Array#pluck + .reject { |revision| ::Gitlab::Git.blank_ref?(revision) } .compact # We add `--not --all --not revisions` to ensure we only get new blobs. -- GitLab From a17fe41483de5f4f90c3610ff29cafc48c8e875d Mon Sep 17 00:00:00 2001 From: Ahmed Hemdan Date: Mon, 11 Dec 2023 18:19:53 +0100 Subject: [PATCH 5/5] Apply more feedback from reviews --- ee/lib/ee/gitlab/checks/push_rules/secrets_check.rb | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ee/lib/ee/gitlab/checks/push_rules/secrets_check.rb b/ee/lib/ee/gitlab/checks/push_rules/secrets_check.rb index 6b17e91706e16e..1b3df04d4a2c90 100644 --- a/ee/lib/ee/gitlab/checks/push_rules/secrets_check.rb +++ b/ee/lib/ee/gitlab/checks/push_rules/secrets_check.rb @@ -5,7 +5,7 @@ module Gitlab module Checks module PushRules class SecretsCheck < ::Gitlab::Checks::BaseBulkChecker - BLOB_BYTES_LIMIT = 1024 # Limit is 1MiB to start with. + BLOB_BYTES_LIMIT = 1.megabyte # Limit is 1MiB to start with. LOG_MESSAGE = "Detecting secrets..." @@ -17,8 +17,8 @@ def validate! # 4. license is not ultimate return unless ::Gitlab::CurrentSettings.pre_receive_secret_detection_enabled - return if ::Gitlab::CurrentSettings.gitlab_dedicated_instance != true && - ::Feature.disabled?(:pre_receive_secret_detection_push_check, project) + return unless ::Gitlab::CurrentSettings.gitlab_dedicated_instance || + ::Feature.enabled?(:pre_receive_secret_detection_push_check, project) return unless push_rule && project.licensed_feature_available?(:pre_receive_secret_detection) @@ -84,7 +84,7 @@ def filter_existing(blobs) ) # Remove blobs that already exist. - blobs.reject { |blob| map_blob_id_to_existence[blob.id] == true } + blobs.reject { |blob| map_blob_id_to_existence[blob.id] } end end end -- GitLab