diff --git a/ee/lib/ee/gitlab/checks/push_rules/secrets_check.rb b/ee/lib/ee/gitlab/checks/push_rules/secrets_check.rb index ec7b50696c6d7cb671348e8f8f7b20f1c160b196..5cc78e970b2aad0536df82ee3c1763ccbc7cefd8 100644 --- a/ee/lib/ee/gitlab/checks/push_rules/secrets_check.rb +++ b/ee/lib/ee/gitlab/checks/push_rules/secrets_check.rb @@ -11,7 +11,8 @@ class SecretsCheck < ::Gitlab::Checks::BaseBulkChecker scan_timeout_error: 'Secret detection scan timed out.', scan_initialization_error: 'Secret detection scan failed to initialize.', invalid_input_error: 'Secret detection scan failed due to invalid input.', - invalid_scan_status_code_error: 'Invalid secret detection scan status, check passed.' + invalid_scan_status_code_error: 'Invalid secret detection scan status, check passed.', + too_many_tree_entries_error: 'Too many tree entries exist for commit(sha: %{sha}).' }.freeze LOG_MESSAGES = { @@ -21,12 +22,10 @@ class SecretsCheck < ::Gitlab::Checks::BaseBulkChecker found_secrets_post_message: "\n\nPlease remove the identified secrets in your commits and try again.", found_secrets_with_errors: 'Secret detection scan completed with one or more findings ' \ 'but some errors occured during the scan.', - finding_details_message: <<~MESSAGE - \n\nBlob id: %{blob_id} - -- Line: %{line_number} - -- Type: %{type} - -- Description: %{description}\n - MESSAGE + finding_message_occurrence: "\n\nSecret leaked in commit: %{sha}" \ + "\n -- %{path}:%{line_number} | %{description}", + finding_message: "\n\nSecret leaked in blob: %{blob_id}" \ + "\n -- line:%{line_number} | %{description}" }.freeze BLOB_BYTES_LIMIT = 1.megabyte # Limit is 1MiB to start with. @@ -67,13 +66,8 @@ def validate! # See https://gitlab.com/gitlab-org/gitlab/-/merge_requests/136896#note_1680680116 for more details. filter_existing(all_blobs) else - revisions = changes_access - .changes - .pluck(:newrev) # rubocop:disable CodeReuse/ActiveRecord -- Array#pluck - .reject { |revision| ::Gitlab::Git.blank_ref?(revision) } - .compact - - # We add `--not --all --not revisions` to ensure we only get new blobs. + # We use `--not --all --not revisions` to ensure we only get new blobs. + # TODO: explore using `with_paths: true` as an argument to load blob paths. project.repository.list_blobs( ['--not', '--all', '--not'] + revisions, bytes_limit: BLOB_BYTES_LIMIT + 1, @@ -127,19 +121,22 @@ def filter_existing(blobs) end def format_response(response) + # Try to retrieve file path and commit sha for the blobs found. + if [ + ::Gitlab::SecretDetection::Status::FOUND, + ::Gitlab::SecretDetection::Status::FOUND_WITH_ERRORS + ].include?(response.status) + # TODO: filter out revisions not related to found secrets + collect_findings_occurrences(response) + end + case response.status when ::Gitlab::SecretDetection::Status::NOT_FOUND # No secrets found, we log and skip the check. secret_detection_logger.info(message: LOG_MESSAGES[:secrets_not_found]) when ::Gitlab::SecretDetection::Status::FOUND # One or more secrets found, generate message with findings and fail check. - message = LOG_MESSAGES[:found_secrets] - - response.results.each do |finding| - message += format(LOG_MESSAGES[:finding_details_message], finding.to_h) - end - - message += LOG_MESSAGES[:found_secrets_post_message] + message = build_secrets_found_message(response) secret_detection_logger.info(message: LOG_MESSAGES[:found_secrets]) @@ -147,21 +144,7 @@ def format_response(response) when ::Gitlab::SecretDetection::Status::FOUND_WITH_ERRORS # One or more secrets found, but with scan errors, so we # generate a message with findings and errors, and fail the check. - - message = LOG_MESSAGES[:found_secrets_with_errors] - - response.results.each do |finding| - case finding.status - when ::Gitlab::SecretDetection::Status::FOUND - message += format(LOG_MESSAGES[:finding_details_message], finding.to_h) - when ::Gitlab::SecretDetection::Status::SCAN_ERROR - message += format(ERROR_MESSAGES[:failed_to_scan_regex_error], finding.to_h) - when ::Gitlab::SecretDetection::Status::BLOB_TIMEOUT - message += format(ERROR_MESSAGES[:blob_timed_out_error], finding.to_h) - end - end - - message += LOG_MESSAGES[:found_secrets_post_message] + message = build_secrets_found_with_errors_message(response) secret_detection_logger.info(message: LOG_MESSAGES[:found_secrets_with_errors]) @@ -169,9 +152,6 @@ def format_response(response) when ::Gitlab::SecretDetection::Status::SCAN_TIMEOUT # Entire scan timed out, we log and skip the check for now. secret_detection_logger.error(message: ERROR_MESSAGES[:scan_timeout_error]) - - # TODO: Decide if we should also fail the check here. - # raise ::Gitlab::GitAccess::ForbiddenError, ERROR_MESSAGES[:scan_timeout_error] when ::Gitlab::SecretDetection::Status::INPUT_ERROR # Scan failed to invalid input. We skip the check because an input error # could be due to not having `blobs` being empty (i.e. no new blobs to scan). @@ -182,6 +162,97 @@ def format_response(response) secret_detection_logger.error(message: ERROR_MESSAGES[:invalid_scan_status_code_error]) end end + + def revisions + @revisions ||= changes_access + .changes + .pluck(:newrev) # rubocop:disable CodeReuse/ActiveRecord -- Array#pluck + .reject { |revision| ::Gitlab::Git.blank_ref?(revision) } + .compact + end + + def build_secrets_found_message(response) + message = LOG_MESSAGES[:found_secrets] + + response.results.each { |finding| message += build_finding_message(finding) } + + message += LOG_MESSAGES[:found_secrets_post_message] + message + end + + def build_secrets_found_with_errors_message(response) + message = LOG_MESSAGES[:found_secrets_with_errors] + + response.results.each do |finding| + case finding.status + when ::Gitlab::SecretDetection::Status::FOUND + message += build_finding_message(finding) + when ::Gitlab::SecretDetection::Status::SCAN_ERROR + message += format(ERROR_MESSAGES[:failed_to_scan_regex_error], finding.to_h) + when ::Gitlab::SecretDetection::Status::BLOB_TIMEOUT + message += format(ERROR_MESSAGES[:blob_timed_out_error], finding.to_h) + end + end + + message += LOG_MESSAGES[:found_secrets_post_message] + message + end + + def build_finding_message(finding) + # If no occurrences are found, we display a more generic message (using blob id). + return format(LOG_MESSAGES[:finding_message], finding.to_h) unless finding.occurrences.present? + + # If we have found the tree entries for those findings, let's display them. + finding.occurrences.reduce('') do |message, occurrence| + message + format( + LOG_MESSAGES[:finding_message_occurrence], + { + sha: occurrence[:sha], + path: occurrence[:path], + line_number: finding.line_number, + description: finding.description + } + ) + end + end + + def collect_findings_occurrences(response) + # Let's put aside the findings with secrets. + findings_with_secrets = response + .results + .select { |finding| finding.status == ::Gitlab::SecretDetection::Status::FOUND } + + # Scanning had found secrets, let's try to look up their file path and commit id. This can be done + # by using `GetTreeEntries()` RPC, and cross examining blobs with ones where secrets where found. + revisions.each do |revision| + # We could try to handle pagination, but it is likely to timeout way earlier given the + # huge default limit (100000) of entries, so we log an error if we get too many results. + entries, cursor = ::Gitlab::Git::Tree.tree_entries(project.repository, revision, nil, true, true, nil) + + # TODO: Handle pagination in the upcoming iterations + # We don't raise because we could still provide a hint to the user + # about the detected secrets even without a commit sha/file path information. + unless cursor.next_cursor.empty? + secret_detection_logger.error( + message: format(ERROR_MESSAGES[:too_many_tree_entries_error], { sha: revision }) + ) + end + + # Let's grab the `commit_id` and the `path` for that entry, we use the blob id as key. + entries.each do |entry| + # Skip any entry that isn't a blob. + next if entry.type != :blob + + # Update response with occurrences found. + current_entry_finding = findings_with_secrets.find { |finding| finding.blob_id == entry.id } + + if current_entry_finding + current_entry_finding.occurrences ||= [] + current_entry_finding.occurrences << { sha: entry.commit_id, path: entry.path } + end + end + end + end end end end diff --git a/ee/spec/support/shared_contexts/secrets_check_shared_contexts.rb b/ee/spec/support/shared_contexts/secrets_check_shared_contexts.rb index 84637cde1376e4c21415778a7ae82d33b59a6809..f93cfc63c8d22e21762ed8e398817c6f955fc658 100644 --- a/ee/spec/support/shared_contexts/secrets_check_shared_contexts.rb +++ b/ee/spec/support/shared_contexts/secrets_check_shared_contexts.rb @@ -60,6 +60,22 @@ let(:old_blob) { have_attributes(class: Gitlab::Git::Blob, id: old_blob_reference, size: 23) } let(:new_blob) { have_attributes(class: Gitlab::Git::Blob, id: new_blob_reference, size: 33) } + # Used for mocking calls to `tree_entries` methods. + let(:gitaly_pagination_cursor) { Gitaly::PaginationCursor.new(next_cursor: "") } + let(:tree_entries) do + [ + Gitlab::Git::Tree.new( + id: new_blob_reference, + type: :blob, + mode: '100644', + name: '.env', + path: '.env', + flat_path: '.env', + commit_id: new_commit + ) + ] + end + # Used for mocking calls to logger. let(:secret_detection_logger) { instance_double(::Gitlab::SecretDetectionLogger) } @@ -95,7 +111,8 @@ scan_timeout_error: 'Secret detection scan timed out.', scan_initialization_error: 'Secret detection scan failed to initialize.', invalid_input_error: 'Secret detection scan failed due to invalid input.', - invalid_scan_status_code_error: 'Invalid secret detection scan status, check passed.' + invalid_scan_status_code_error: 'Invalid secret detection scan status, check passed.', + too_many_tree_entries_error: format('Too many tree entries exist for commit(sha: %{sha}).', { sha: new_commit }) } end @@ -107,25 +124,35 @@ 'Secret detection scan completed with one or more findings but some errors occured during the scan.' end + let(:found_secret_path) { '.env' } let(:found_secret_line_number) { '1' } - let(:found_secret_type) { 'gitlab_personal_access_token' } let(:found_secret_description) { 'GitLab Personal Access Token' } - let(:found_secrets_message) do - message = <<~MESSAGE - \nBlob id: %{found_secret_blob_id} - -- Line: %{found_secret_line_number} - -- Type: %{found_secret_type} - -- Description: %{found_secret_description}\n - MESSAGE + let(:found_message_occurrence) do + message = "\n\nSecret leaked in commit: %{sha}" \ + "\n -- %{path}:%{line_number} | %{description}" format( message, { - found_secret_blob_id: new_blob_reference, - found_secret_line_number: found_secret_line_number, - found_secret_type: found_secret_type, - found_secret_description: found_secret_description + sha: new_commit, + path: found_secret_path, + line_number: found_secret_line_number, + description: found_secret_description + } + ) + end + + let(:found_message) do + message = "\n\nSecret leaked in blob: %{blob_id}" \ + "\n -- line:%{line_number} | %{description}" + + format( + message, + { + blob_id: new_blob_reference, + line_number: found_secret_line_number, + description: found_secret_description } ) end @@ -149,6 +176,9 @@ # client and `object_existence_map` in such way only some of them are considered new. allow(repository).to receive(:gitaly_commit_client).and_return(gitaly_commit_client) allow(gitaly_commit_client).to receive(:object_existence_map).and_return(object_existence_map) + + # We also want to have the client return the tree entries. + allow(gitaly_commit_client).to receive(:tree_entries).and_return([tree_entries, gitaly_pagination_cursor]) end end diff --git a/ee/spec/support/shared_examples/lib/gitlab/secrets_check_shared_examples.rb b/ee/spec/support/shared_examples/lib/gitlab/secrets_check_shared_examples.rb index 7b371fd8fd6d030a4e2b00807c3736eccbaab009..3c6e78c03bcd8b187467ca228003882d7ff3399d 100644 --- a/ee/spec/support/shared_examples/lib/gitlab/secrets_check_shared_examples.rb +++ b/ee/spec/support/shared_examples/lib/gitlab/secrets_check_shared_examples.rb @@ -82,9 +82,9 @@ expect_next_instance_of(described_class) do |instance| expect(instance).to receive(:format_response) - .with(passed_scan_response) - .once - .and_call_original + .with(passed_scan_response) + .once + .and_call_original end expect(secret_detection_logger).to receive(:info) @@ -189,12 +189,24 @@ .and_call_original end - expect_next_instance_of(described_class) do |instance| - expect(instance).to receive(:format_response) - .with(successful_scan_response) + expect(secret_detection_logger).to receive(:info) .once - .and_call_original + .with(message: found_secrets) + + expect { subject.validate! }.to raise_error do |error| + expect(error).to be_a(::Gitlab::GitAccess::ForbiddenError) + expect(error.message).to include(found_secrets) + expect(error.message).to include(found_message_occurrence) + expect(error.message).to include(found_secrets_post_message) end + end + + it 'loads tree entries of the new commit' do + expect(::Gitlab::Git::Tree).to receive(:tree_entries) + .once + .with(repository, new_commit, nil, true, true, nil) + .and_return([tree_entries, gitaly_pagination_cursor]) + .and_call_original expect(secret_detection_logger).to receive(:info) .once @@ -203,10 +215,89 @@ expect { subject.validate! }.to raise_error do |error| expect(error).to be_a(::Gitlab::GitAccess::ForbiddenError) expect(error.message).to include(found_secrets) - expect(error.message).to include(found_secrets_message) + expect(error.message).to include(found_message_occurrence) expect(error.message).to include(found_secrets_post_message) end end + + context 'when no tree entries exist or cannot be loaded' do + it 'gracefully raises an error with existing information' do + expect(::Gitlab::Git::Tree).to receive(:tree_entries) + .once + .with(repository, new_commit, nil, true, true, nil) + .and_return([{}, gitaly_pagination_cursor]) + + expect(secret_detection_logger).to receive(:info) + .once + .with(message: found_secrets) + + expect { subject.validate! }.to raise_error do |error| + expect(error).to be_a(::Gitlab::GitAccess::ForbiddenError) + expect(error.message).to include(found_secrets) + expect(error.message).to include(found_message) + expect(error.message).to include(found_secrets_post_message) + end + end + end + + context 'when tree has too many entries' do + let(:gitaly_pagination_cursor) { Gitaly::PaginationCursor.new(next_cursor: "abcdef") } + + it 'logs an error and continue to raise and present findings' do + expect(::Gitlab::Git::Tree).to receive(:tree_entries) + .once + .with(repository, new_commit, nil, true, true, nil) + .and_return([tree_entries, gitaly_pagination_cursor]) + + expect(secret_detection_logger).to receive(:info) + .once + .with(message: found_secrets) + + expect(secret_detection_logger).to receive(:error) + .once + .with(message: error_messages[:too_many_tree_entries_error]) + + expect { subject.validate! }.to raise_error(::Gitlab::GitAccess::ForbiddenError) + end + end + + context 'when new commit has file in subdirectory' do + let_it_be(:new_commit) { create_commit('config/.env' => 'SECRET=glpat-JUST20LETTERSANDNUMB') } # gitleaks:allow + + let(:found_secret_path) { 'config/.env' } + let(:tree_entries) do + [ + Gitlab::Git::Tree.new( + id: new_blob_reference, + type: :blob, + mode: '100644', + name: '.env', + path: 'config/.env', + flat_path: 'config/.env', + commit_id: new_commit + ) + ] + end + + it 'loads tree entries of the new commit in subdirectories' do + expect(::Gitlab::Git::Tree).to receive(:tree_entries) + .once + .with(repository, new_commit, nil, true, true, nil) + .and_return([tree_entries, gitaly_pagination_cursor]) + .and_call_original + + expect(secret_detection_logger).to receive(:info) + .once + .with(message: found_secrets) + + expect { subject.validate! }.to raise_error do |error| + expect(error).to be_a(::Gitlab::GitAccess::ForbiddenError) + expect(error.message).to include(found_secrets) + expect(error.message).to include(found_message_occurrence) + expect(error.message).to include(found_secrets_post_message) + end + end + end end RSpec.shared_examples 'scan detected secrets but some errors occured' do @@ -382,7 +473,53 @@ expect { subject.validate! }.to raise_error do |error| expect(error).to be_a(::Gitlab::GitAccess::ForbiddenError) expect(error.message).to include(found_secrets_with_errors) - expect(error.message).to include(found_secrets_message) + expect(error.message).to include(found_message_occurrence) + expect(error.message).to include(blob_timed_out_error) + expect(error.message).to include(failed_to_scan_regex_error) + expect(error.message).to include(found_secrets_post_message) + end + end + + it 'loads tree entries of the new commit' do + expect_next_instance_of(::Gitlab::SecretDetection::Scan) do |instance| + expect(instance).to receive(:secrets_scan) + .with( + array_including(new_blob, timed_out_blob, failed_to_scan_blob), + timeout: kind_of(Float) + ) + .once + .and_return(successful_scan_with_errors_response) + end + + expect(::Gitlab::Git::Tree).to receive(:tree_entries) + .with(repository, new_commit, nil, true, true, nil) + .once + .ordered + .and_return([tree_entries, gitaly_pagination_cursor]) + .and_call_original + + expect(::Gitlab::Git::Tree).to receive(:tree_entries) + .with(repository, timed_out_commit, nil, true, true, nil) + .once + .ordered + .and_return([[], nil]) + .and_call_original + + expect(::Gitlab::Git::Tree).to receive(:tree_entries) + .with(repository, failed_to_scan_commit, nil, true, true, nil) + .once + .ordered + .and_return([[], nil]) + .and_call_original + + expect(secret_detection_logger).to receive(:info) + .once + .with(message: found_secrets_with_errors) + + expect { subject.validate! }.to raise_error do |error| + expect(error).to be_a(::Gitlab::GitAccess::ForbiddenError) + expect(error.message).to include(found_secrets_with_errors) + expect(error.message).to include(found_message_occurrence) expect(error.message).to include(blob_timed_out_error) expect(error.message).to include(failed_to_scan_regex_error) expect(error.message).to include(found_secrets_post_message) diff --git a/gems/gitlab-secret_detection/lib/gitlab/secret_detection/finding.rb b/gems/gitlab-secret_detection/lib/gitlab/secret_detection/finding.rb index e8d83178b09dc99720aabb757a4e19b964bd65ff..728d45d62283f38d5845a57ed907cef9bea49cdb 100644 --- a/gems/gitlab-secret_detection/lib/gitlab/secret_detection/finding.rb +++ b/gems/gitlab-secret_detection/lib/gitlab/secret_detection/finding.rb @@ -5,13 +5,15 @@ module SecretDetection # Finding is a data object representing a secret finding identified within a blob class Finding attr_reader :blob_id, :status, :line_number, :type, :description + attr_accessor :occurrences - def initialize(blob_id, status, line_number = nil, type = nil, description = nil) + def initialize(blob_id, status, line_number = nil, type = nil, description = nil, occurrences = nil) # rubocop:disable Metrics/ParameterLists -- all params are needed @blob_id = blob_id @status = status @line_number = line_number @type = type @description = description + @occurrences = occurrences end def ==(other) @@ -24,7 +26,8 @@ def to_h status: status, line_number: line_number, type: type, - description: description + description: description, + occurrences: occurrences } end