diff --git a/ee/lib/ee/gitlab/checks/push_rule_check.rb b/ee/lib/ee/gitlab/checks/push_rule_check.rb index 67a7c9b9dff23679f50f2a3754b7966ee0bf9200..8dc61203963cfe892b649cbd5701bf0a62b4d91a 100644 --- a/ee/lib/ee/gitlab/checks/push_rule_check.rb +++ b/ee/lib/ee/gitlab/checks/push_rule_check.rb @@ -5,7 +5,7 @@ module Gitlab module Checks class PushRuleCheck < ::Gitlab::Checks::BaseBulkChecker def validate! - return unless push_rule + # return unless push_rule if ::Feature.enabled?(:parallel_push_checks, project, type: :ops) run_checks_in_parallel! diff --git a/ee/lib/ee/gitlab/checks/push_rules/secrets_check.rb b/ee/lib/ee/gitlab/checks/push_rules/secrets_check.rb index c0fa9d868dbe099242171b5f8bfdbab280731de8..07a4a4e15b92483b363a092b725ca4ff89707d43 100644 --- a/ee/lib/ee/gitlab/checks/push_rules/secrets_check.rb +++ b/ee/lib/ee/gitlab/checks/push_rules/secrets_check.rb @@ -5,18 +5,101 @@ module Gitlab module Checks module PushRules class SecretsCheck < ::Gitlab::Checks::BaseBulkChecker + ERROR_MESSAGE = "Secrets check failed:" + LOG_MESSAGE = "Checking if any files contain secrets..." + BLOB_BYTES_LIMIT = 1024 # Limit is 1MB to start with. + + SKIP_PATTERN = /\[(secret[ _-]detection[ _-]skip|skip[ _-]secret[ _-]detection)\]/i + def validate! # Return early and not perform the check if: # 1. unless application setting is enabled (regardless of whether it's a gitlab dedicated instance or not) # 2. feature flag is disabled for this project (when instance type is not gitlab dedicated) # 3. no push rule exist # 4. license is not ultimate - return unless ::Gitlab::CurrentSettings.pre_receive_secret_detection_enabled + # return unless ::Gitlab::CurrentSettings.pre_receive_secret_detection_enabled + + # return if ::Gitlab::CurrentSettings.gitlab_dedicated_instance != true && + # ::Feature.disabled?(:pre_receive_secret_detection_push_check, project) + + # return unless push_rule && project.licensed_feature_available?(:pre_receive_secret_detection) + + return if skip_secret_detection? + + # Log that we're running secrets check + logger.log_timed(LOG_MESSAGE) do + # Maybe move all code below into its own check class? (similar to FileSizeCheck::AnyOversizedBlobs) + all_blobs = get_all_blobs + + found_secrets = scan_for_secrets(all_blobs) + + show_message(found_secrets) if found_secrets.any? + # Rails.logger.debug("let's stop this here") if found_secrets.present? + end + end + + def get_all_blobs + blobs = + if ignore_alternate_directories? + # filter_existing( + project.repository.list_all_blobs( + bytes_limit: BLOB_BYTES_LIMIT, + dynamic_timeout: logger.time_left, + ignore_alternate_object_directories: true + ) + # ) + else + revisions = changes_access.newrev + + project.repository.list_blobs( + revisions, + bytes_limit: BLOB_BYTES_LIMIT, + with_paths: true + ) + end + + # filter out binary blobs + blobs.reject(&:binary) + end - return if ::Gitlab::CurrentSettings.gitlab_dedicated_instance != true && - ::Feature.disabled?(:pre_receive_secret_detection_push_check, push_rule.project) + def scan_for_secrets(all_blobs) + ::Gitlab::SecretDetection::Scan.new.secrets_scan(all_blobs) + end + + def show_message(found_secrets) + secrets_error_message = "" + + found_secrets[:result].each_key do |blob_id| + secrets_error_message += " Blob ID: #{blob_id}\n" + + found_secrets[:result][blob_id].each do |secret| + secrets_error_message += " Found #{secret['secret_type']} on line #{secret['line_number']}\n" + end + end + raise ::Gitlab::GitAccess::ForbiddenError, "#{ERROR_MESSAGE}\n#{secrets_error_message}" + end + + private + + def ignore_alternate_directories? + git_env = ::Gitlab::Git::HookEnv.all(project.repository.gl_repository) + git_env['GIT_OBJECT_DIRECTORY_RELATIVE'].present? + end + + def filter_existing(blobs) + # Filter out already existing blobs from blobs returned by `ListAllBlobs()`. + gitaly_repo = project.repository.gitaly_repository.dup.tap { |repo| repo.git_object_directory = "" } + + map_blob_id_to_existence = project.repository.gitaly_commit_client.object_existence_map( + blobs.map(&:id), + gitaly_repo: gitaly_repo + ) + + blobs.reject { |blob| map_blob_id_to_existence[blob.id].present? } + end - return unless push_rule && push_rule.project.licensed_feature_available?(:pre_receive_secret_detection) + def skip_secret_detection? + changes_access.commits.any? { |commit| commit.safe_message =~ SKIP_PATTERN } end end end diff --git a/gems/gitlab-secret_detection/Gemfile.lock b/gems/gitlab-secret_detection/Gemfile.lock index 2f615b24d86a91df1b591b74c37be792aa434ab4..659ff5319fc1550983b6ce181cb6bd4f17400317 100644 --- a/gems/gitlab-secret_detection/Gemfile.lock +++ b/gems/gitlab-secret_detection/Gemfile.lock @@ -40,6 +40,7 @@ GEM i18n (1.14.1) concurrent-ruby (~> 1.0) json (2.6.3) + mini_portile2 (2.8.5) minitest (5.20.0) mutex_m (0.2.0) parallel (1.23.0) @@ -53,6 +54,8 @@ GEM racc (1.7.1) rack (3.0.8) rainbow (3.1.1) + re2 (2.3.0) + mini_portile2 (~> 2.8.5) regexp_parser (2.8.2) rexml (3.2.6) rspec (3.12.0) @@ -115,6 +118,7 @@ GEM rubocop-factory_bot (~> 2.22) ruby-progressbar (1.13.0) ruby2_keywords (0.0.5) + tomlrb (2.0.3) tzinfo (2.0.6) concurrent-ruby (~> 1.0) unicode-display_width (2.5.0) @@ -128,12 +132,14 @@ PLATFORMS DEPENDENCIES gitlab-secret_detection! gitlab-styles (~> 10.1.0) + re2 rspec (~> 3.0) rspec-benchmark (~> 0.6.0) rspec-parameterized (~> 1.0) rubocop (~> 1.50) rubocop-rails (<= 2.20) rubocop-rspec (~> 2.22) + tomlrb BUNDLED WITH 2.4.14 diff --git a/gems/gitlab-secret_detection/gitlab-secret_detection.gemspec b/gems/gitlab-secret_detection/gitlab-secret_detection.gemspec index ff5121846f4683c2074942949d747c1a3375ffab..36aa81d66bba6bcdd8ed3f610743f21c997c3213 100644 --- a/gems/gitlab-secret_detection/gitlab-secret_detection.gemspec +++ b/gems/gitlab-secret_detection/gitlab-secret_detection.gemspec @@ -25,10 +25,12 @@ Gem::Specification.new do |spec| spec.require_paths = ["lib"] spec.add_development_dependency "gitlab-styles", "~> 10.1.0" + spec.add_development_dependency "re2" spec.add_development_dependency "rspec", "~> 3.0" spec.add_development_dependency "rspec-benchmark", "~> 0.6.0" spec.add_development_dependency "rspec-parameterized", "~> 1.0" spec.add_development_dependency "rubocop", "~> 1.50" spec.add_development_dependency "rubocop-rails", "<= 2.20" # https://github.com/rubocop/rubocop-rails/issues/1173 spec.add_development_dependency "rubocop-rspec", "~> 2.22" + spec.add_development_dependency "tomlrb" end diff --git a/gems/gitlab-secret_detection/lib/gitlab/secret_detection.rb b/gems/gitlab-secret_detection/lib/gitlab/secret_detection.rb index 54e0eb794a37f23178fd5029f5765896bd8f8630..25f175e0721c71abf678b50086f885a0fc0d63d7 100644 --- a/gems/gitlab-secret_detection/lib/gitlab/secret_detection.rb +++ b/gems/gitlab-secret_detection/lib/gitlab/secret_detection.rb @@ -1,6 +1,7 @@ # frozen_string_literal: true require_relative "secret_detection/version" +require_relative "secret_detection/scan" module Gitlab module SecretDetection diff --git a/gems/gitlab-secret_detection/lib/gitlab/secret_detection/scan.rb b/gems/gitlab-secret_detection/lib/gitlab/secret_detection/scan.rb new file mode 100644 index 0000000000000000000000000000000000000000..263f2d4dd178eae1237cdd813ca6d7d376e046c9 --- /dev/null +++ b/gems/gitlab-secret_detection/lib/gitlab/secret_detection/scan.rb @@ -0,0 +1,99 @@ +# frozen_string_literal: true + +require 'tomlrb' +require 're2' +require 'logger' +require 'set' + +module Gitlab + module SecretDetection + class Scan + DEFAULT_SCAN_TIMEOUT = 50.seconds.freeze + BLOB_SCAN_TIMEOUT = 10.seconds.freeze + + # status codes + # success: { status: 0, results: { blob_id: [ { line_number:, secret_type:, status: } ] } } + # failure: { status: 1 } + SUCCESS = '0' + SCAN_TIMEOUT = '1' # whole scan times out + BLOB_TIMEOUT = '2' # individual blob scan times out + INITIALIZATION_ERROR = '3' # parse_file or create_patterns_and_keywords error + SCAN_ERROR = '4' # error while scanning + + def initialize(logger: ::Logger) + @secrets_config_data = parse_file + @secrets_pattern_set, @secrets_keywords = create_patterns_and_keywords + @logger = logger + end + + def secrets_scan(blobs, timeout: DEFAULT_SCAN_TIMEOUT) + Timeout.timeout(timeout) do + matched_blobs = keyword_match(blobs) + found_secrets = regex_match(matched_blobs) + + break { status: '0', result: found_secrets } if found_secrets.any? + + { status: '1' } + end + end + + def parse_file + file_path = File.expand_path('../../gitleaks.toml', __dir__) + Tomlrb.load_file(file_path) + rescue StandardError + INITIALIZATION_ERROR + end + + def create_patterns_and_keywords + @secrets_pattern_set = RE2::Set.new + @secrets_keywords = [] + + @secrets_config_data["rules"].each do |rule| + @secrets_pattern_set.add(rule["regex"]) + @secrets_keywords << rule["keywords"] + end + + @secrets_pattern_set.compile + + [@secrets_pattern_set, @secrets_keywords.compact.flatten] + rescue StandardError + INITIALIZATION_ERROR + end + + def keyword_match(blobs) + secrets_set = Set.new(@secrets_keywords) + matched_blobs = [] + blobs.each do |blob| + matched_blobs << blob if secrets_set.any? { |keyword| blob.data.freeze.include?(keyword) } + end + + matched_blobs + end + + def regex_match(blobs) + found_secrets = Hash.new { |blob, secret| blob[secret] = [] } + + blobs.each do |blob| + Timeout.timeout(BLOB_SCAN_TIMEOUT) do + gitaly_blob_data = blob.data + + gitaly_blob_data.each_line.with_index do |string, line| + patterns = @secrets_pattern_set.match(string) + next unless patterns.any? + + patterns.each do |pattern| + secret_info = { + "line_number" => line + 1, + "secret_type" => [@secrets_config_data["rules"][pattern]["description"]], + "status" => 0 + } + found_secrets[blob.id] << secret_info + end + end + end + end + found_secrets + end + end + end +end diff --git a/gems/gitlab-secret_detection/lib/gitleaks.toml b/gems/gitlab-secret_detection/lib/gitleaks.toml new file mode 100644 index 0000000000000000000000000000000000000000..f00688674bf7ebe327dfcf94de4b26a65cb11825 --- /dev/null +++ b/gems/gitlab-secret_detection/lib/gitleaks.toml @@ -0,0 +1,54 @@ +title = "gitleaks config" + +[[rules]] +id = "gitlab_personal_access_token" +description = "GitLab Personal Access Token" +regex = '''glpat-[0-9a-zA-Z_\-]{20}''' +tags = ["gitlab", "revocation_type"] +keywords = [ + "glpat", +] + +[[rules]] +id = "gitlab_pipeline_trigger_token" +description = "GitLab Pipeline Trigger Token" +regex = '''glptt-[0-9a-zA-Z_\-]{20}''' +tags = ["gitlab"] +keywords = [ + "glptt", +] + +[[rules]] +id = "gitlab_runner_registration_token" +description = "GitLab Runner Registration Token" +regex = '''GR1348941[0-9a-zA-Z_\-]{20}''' +tags = ["gitlab"] +keywords = [ + "GR1348941", +] + +[[rules]] +id = "gitlab_runner_auth_token" +description = "GitLab Runner Authentication Token" +regex = '''glrt-[0-9a-zA-Z_\-]{20}''' +tags = ["gitlab"] +keywords = [ + "glrt", +] + +[[rules]] +id = "gitlab_feed_token" +description = "GitLab Feed Token" +regex = '''glft=[0-9a-zA-Z_\-]{20}''' +tags = ["gitlab"] +keywords = [ + "glft", +] + +[allowlist] +description = "global allow lists" +paths = [ + '''gitleaks.toml''', + '''(.*?)(jpg|gif|doc|pdf|bin|svg|socket)$''', + '''(go.mod|go.sum)$''' +] \ No newline at end of file diff --git a/gems/gitlab-secret_detection/spec/gitlab/secret_detection/scan_spec.rb b/gems/gitlab-secret_detection/spec/gitlab/secret_detection/scan_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..cd141000125d2ed9810db338bce85c02e7451130 --- /dev/null +++ b/gems/gitlab-secret_detection/spec/gitlab/secret_detection/scan_spec.rb @@ -0,0 +1,73 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::SecretDetection::Scan do + let(:scan) { described_class.new } + + file_data = { + "title" => "gitleaks config", + "rules" => [ + { "id" => "gitlab_personal_access_token", + "description" => "GitLab Personal Access Token", + "regex" => "glpat-[0-9a-zA-Z_\\-]{20}", + "tags" => %w[gitlab revocation_type], + "keywords" => ["glpat"] }, + { "id" => "gitlab_pipeline_trigger_token", + "description" => "GitLab Pipeline Trigger Token", + "regex" => "glptt-[0-9a-zA-Z_\\-]{20}", + "tags" => ["gitlab"], + "keywords" => ["glptt"] } + ] + } + + it "has a version number" do + expect(Gitlab::SecretDetection::VERSION).not_to be_nil + end + + it "parses the toml file" do + expect(scan.parse_file).not_to be_nil + end + + context "when it creates RE2 patterns from file data" do + before do + allow(scan).to receive(:parse_file).and_return(file_data) + end + + it "successfully creates RE2 patterns" do + expect(scan.create_patterns).not_to be_nil + end + end + + context "when matching patterns" do + before do + allow(scan).to receive(:parse_file).and_return(file_data) + end + + context 'when the blob does not contain a secret' do + blob = Struct.new(:data).new("no secrets") + + it "does not match" do + expect(scan.secrets_scan(blob)).to be_nil + end + + it "attempts to keyword match" do + expect(scan).to receive(:keyword_match) + scan.secrets_scan(blob) + end + + it "does not attempt to regex match" do + expect(scan).not_to receive(:regex_match) + scan.secrets_scan(blob) + end + end + + context "when the blob contains a secret" do + blob = Struct.new(:data).new("glpat-" + "12312312312312312312") # rubocop:disable Style/StringConcatenation -- Creates a gitleaks false positive + + it "matches glpat" do + expect(scan.secrets_scan(blob)).to eq({ 1 => ["gitlab_personal_access_token"] }) + end + end + end +end diff --git a/gems/gitlab-secret_detection/spec/gitlab/secret_detection_spec.rb b/gems/gitlab-secret_detection/spec/gitlab/secret_detection_spec.rb deleted file mode 100644 index 112ab8c7468c16cee311d28e64ef7f29c5d18f82..0000000000000000000000000000000000000000 --- a/gems/gitlab-secret_detection/spec/gitlab/secret_detection_spec.rb +++ /dev/null @@ -1,7 +0,0 @@ -# frozen_string_literal: true - -RSpec.describe Gitlab::SecretDetection do - it "has a version number" do - expect(Gitlab::SecretDetection::VERSION).not_to be_nil - end -end diff --git a/lib/gitlab/git/repository.rb b/lib/gitlab/git/repository.rb index db6e6b4d00b2b38d672f168bf0c0469504cd6106..e701e14a3c09e48c7c199c704b0c137c737984ce 100644 --- a/lib/gitlab/git/repository.rb +++ b/lib/gitlab/git/repository.rb @@ -46,7 +46,7 @@ def initialize(error_code) attr_reader :storage, :gl_repository, :gl_project_path, :container - delegate :list_all_blobs, to: :gitaly_blob_client + delegate :list_all_blobs, :list_blobs, to: :gitaly_blob_client # This remote name has to be stable for all types of repositories that # can join an object pool. If it's structure ever changes, a migration