From 104a99e8b8b7c1e27bf63dcd46b2f34e57b58b0c Mon Sep 17 00:00:00 2001 From: Serena Fang Date: Mon, 30 Oct 2023 14:42:23 -0500 Subject: [PATCH 01/18] Initial commit of gitlab-secret_detection gem Changelog: added --- gems/gitlab-secret_detection/lib/gitlab/secret_detection.rb | 1 + .../spec/gitlab/secret_detection_spec.rb | 2 ++ 2 files changed, 3 insertions(+) diff --git a/gems/gitlab-secret_detection/lib/gitlab/secret_detection.rb b/gems/gitlab-secret_detection/lib/gitlab/secret_detection.rb index 54e0eb794a37f2..25f175e0721c71 100644 --- a/gems/gitlab-secret_detection/lib/gitlab/secret_detection.rb +++ b/gems/gitlab-secret_detection/lib/gitlab/secret_detection.rb @@ -1,6 +1,7 @@ # frozen_string_literal: true require_relative "secret_detection/version" +require_relative "secret_detection/scan" module Gitlab module SecretDetection diff --git a/gems/gitlab-secret_detection/spec/gitlab/secret_detection_spec.rb b/gems/gitlab-secret_detection/spec/gitlab/secret_detection_spec.rb index 112ab8c7468c16..a44db22aea7a2a 100644 --- a/gems/gitlab-secret_detection/spec/gitlab/secret_detection_spec.rb +++ b/gems/gitlab-secret_detection/spec/gitlab/secret_detection_spec.rb @@ -1,5 +1,7 @@ # frozen_string_literal: true +require 'spec_helper' + RSpec.describe Gitlab::SecretDetection do it "has a version number" do expect(Gitlab::SecretDetection::VERSION).not_to be_nil -- GitLab From 317d397c58063a5b192b40e3777c0a498fc8b47c Mon Sep 17 00:00:00 2001 From: Serena Fang Date: Mon, 6 Nov 2023 13:51:02 -0600 Subject: [PATCH 02/18] Apply reviewer suggestions --- .../spec/gitlab/secret_detection_spec.rb | 2 -- 1 file changed, 2 deletions(-) diff --git a/gems/gitlab-secret_detection/spec/gitlab/secret_detection_spec.rb b/gems/gitlab-secret_detection/spec/gitlab/secret_detection_spec.rb index a44db22aea7a2a..112ab8c7468c16 100644 --- a/gems/gitlab-secret_detection/spec/gitlab/secret_detection_spec.rb +++ b/gems/gitlab-secret_detection/spec/gitlab/secret_detection_spec.rb @@ -1,7 +1,5 @@ # frozen_string_literal: true -require 'spec_helper' - RSpec.describe Gitlab::SecretDetection do it "has a version number" do expect(Gitlab::SecretDetection::VERSION).not_to be_nil -- GitLab From 32790e6a1516f834bb1a69fc80a43563f420f674 Mon Sep 17 00:00:00 2001 From: Serena Fang Date: Thu, 9 Nov 2023 13:19:07 -0600 Subject: [PATCH 03/18] Add gitlab styles dependency --- gems/gitlab-secret_detection/Gemfile.lock | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/gems/gitlab-secret_detection/Gemfile.lock b/gems/gitlab-secret_detection/Gemfile.lock index 2f615b24d86a91..e8bcfc23de3143 100644 --- a/gems/gitlab-secret_detection/Gemfile.lock +++ b/gems/gitlab-secret_detection/Gemfile.lock @@ -6,7 +6,11 @@ PATH GEM remote: https://rubygems.org/ specs: +<<<<<<< HEAD activesupport (7.1.2) +======= + activesupport (7.1.1) +>>>>>>> 6e381c3897a (Add gitlab styles dependency) base64 bigdecimal concurrent-ruby (~> 1.0, >= 1.0.2) -- GitLab From b100f1c844ebb758bb7dd89f694eff83ae79ca83 Mon Sep 17 00:00:00 2001 From: Serena Fang Date: Wed, 8 Nov 2023 16:49:40 -0600 Subject: [PATCH 04/18] Secret detection gem scan logic Changelog: added --- gems/gitlab-secret_detection/Gemfile.lock | 9 + .../gitlab-secret_detection.gemspec | 3 + .../lib/gitlab/secret_detection/scan.rb | 79 ++ .../gitlab-secret_detection/lib/gitleaks.toml | 989 ++++++++++++++++++ .../spec/gitlab/secret_detection/scan_spec.rb | 60 ++ .../spec/gitlab/secret_detection_spec.rb | 7 - 6 files changed, 1140 insertions(+), 7 deletions(-) create mode 100644 gems/gitlab-secret_detection/lib/gitlab/secret_detection/scan.rb create mode 100644 gems/gitlab-secret_detection/lib/gitleaks.toml create mode 100644 gems/gitlab-secret_detection/spec/gitlab/secret_detection/scan_spec.rb delete mode 100644 gems/gitlab-secret_detection/spec/gitlab/secret_detection_spec.rb diff --git a/gems/gitlab-secret_detection/Gemfile.lock b/gems/gitlab-secret_detection/Gemfile.lock index e8bcfc23de3143..36a71e5fde4022 100644 --- a/gems/gitlab-secret_detection/Gemfile.lock +++ b/gems/gitlab-secret_detection/Gemfile.lock @@ -46,6 +46,7 @@ GEM json (2.6.3) minitest (5.20.0) mutex_m (0.2.0) + method_source (1.0.0) parallel (1.23.0) parser (3.2.2.4) ast (~> 2.4.1) @@ -54,9 +55,13 @@ GEM coderay parser unparser + pry (0.14.2) + coderay (~> 1.1) + method_source (~> 1.0) racc (1.7.1) rack (3.0.8) rainbow (3.1.1) + re2 (2.3.0-arm64-darwin) regexp_parser (2.8.2) rexml (3.2.6) rspec (3.12.0) @@ -121,6 +126,7 @@ GEM ruby2_keywords (0.0.5) tzinfo (2.0.6) concurrent-ruby (~> 1.0) + tomlrb (2.0.3) unicode-display_width (2.5.0) unparser (0.6.9) diff-lcs (~> 1.3) @@ -132,12 +138,15 @@ PLATFORMS DEPENDENCIES gitlab-secret_detection! gitlab-styles (~> 10.1.0) + pry + re2 rspec (~> 3.0) rspec-benchmark (~> 0.6.0) rspec-parameterized (~> 1.0) rubocop (~> 1.50) rubocop-rails (<= 2.20) rubocop-rspec (~> 2.22) + tomlrb BUNDLED WITH 2.4.14 diff --git a/gems/gitlab-secret_detection/gitlab-secret_detection.gemspec b/gems/gitlab-secret_detection/gitlab-secret_detection.gemspec index ff5121846f4683..16f162ece825aa 100644 --- a/gems/gitlab-secret_detection/gitlab-secret_detection.gemspec +++ b/gems/gitlab-secret_detection/gitlab-secret_detection.gemspec @@ -25,10 +25,13 @@ Gem::Specification.new do |spec| spec.require_paths = ["lib"] spec.add_development_dependency "gitlab-styles", "~> 10.1.0" + spec.add_development_dependency "pry" + spec.add_development_dependency "re2" spec.add_development_dependency "rspec", "~> 3.0" spec.add_development_dependency "rspec-benchmark", "~> 0.6.0" spec.add_development_dependency "rspec-parameterized", "~> 1.0" spec.add_development_dependency "rubocop", "~> 1.50" spec.add_development_dependency "rubocop-rails", "<= 2.20" # https://github.com/rubocop/rubocop-rails/issues/1173 spec.add_development_dependency "rubocop-rspec", "~> 2.22" + spec.add_development_dependency "tomlrb" end diff --git a/gems/gitlab-secret_detection/lib/gitlab/secret_detection/scan.rb b/gems/gitlab-secret_detection/lib/gitlab/secret_detection/scan.rb new file mode 100644 index 00000000000000..64c6cd6687d004 --- /dev/null +++ b/gems/gitlab-secret_detection/lib/gitlab/secret_detection/scan.rb @@ -0,0 +1,79 @@ +# frozen_string_literal: true + +require 'tomlrb' +require 're2' +require 'benchmark' +require 'set' + +module Gitlab + module SecretDetection + class Scan + def initialize + @@secrets_config_data ||= parse_file + end + + def create_patterns + secrets_pattern_set = RE2::Set.new + + @@secrets_config_data["rules"].each do |rule| + pattern = rule["regex"] + secrets_pattern_set.add(pattern) + end + + secrets_pattern_set.compile + secrets_pattern_set + end + + def secrets_scan(gitaly_blob, secrets_pattern_set) + return unless keyword_match(gitaly_blob) + + regex_match(gitaly_blob, secrets_pattern_set) + end + + # private + + def parse_file + file_path = File.expand_path('../../gitleaks.toml', __dir__) + Tomlrb.load_file(file_path) + end + + def keyword_match(gitaly_blob) + secrets_keywords = [] + + @@secrets_config_data["rules"].each do |rule| + pattern = rule["keywords"] + secrets_keywords << pattern + end + + secrets_keywords.compact.flatten.any? do |keyword| + break true if gitaly_blob.data.include?(keyword) + end + end + + def regex_match(gitaly_blob, secrets_pattern_set) + # binding.pry_shell + gitaly_blob_data = gitaly_blob.data + split_strings = gitaly_blob_data.split("\n") + + found_secrets = {} + + split_strings.each_with_index do |s, i| + patterns = secrets_pattern_set.match(s) + next unless patterns.any? + + matched_patterns = [] + patterns.each do |p| + matched_patterns << @@secrets_config_data["rules"][p]["id"] + end + found_secrets[i + 1] = matched_patterns + end + found_secrets + end + + def error(message) + # we don't call this anywhere yet but it could be useful later + ServiceResponse.error(message: message) + end + end + end +end diff --git a/gems/gitlab-secret_detection/lib/gitleaks.toml b/gems/gitlab-secret_detection/lib/gitleaks.toml new file mode 100644 index 00000000000000..53c65a0468e7c6 --- /dev/null +++ b/gems/gitlab-secret_detection/lib/gitleaks.toml @@ -0,0 +1,989 @@ +title = "gitleaks config" + +[[rules]] +id = "gitlab_personal_access_token" +description = "GitLab Personal Access Token" +regex = '''glpat-[0-9a-zA-Z_\-]{20}''' +tags = ["gitlab", "revocation_type"] +keywords = [ + "glpat", +] + +[[rules]] +id = "gitlab_pipeline_trigger_token" +description = "GitLab Pipeline Trigger Token" +regex = '''glptt-[0-9a-zA-Z_\-]{20}''' +tags = ["gitlab"] +keywords = [ + "glptt", +] + +[[rules]] +id = "gitlab_runner_registration_token" +description = "GitLab Runner Registration Token" +regex = '''GR1348941[0-9a-zA-Z_\-]{20}''' +tags = ["gitlab"] +keywords = [ + "GR1348941", +] + +[[rules]] +id = "gitlab_runner_auth_token" +description = "GitLab Runner Authentication Token" +regex = '''glrt-[0-9a-zA-Z_\-]{20}''' +tags = ["gitlab"] +keywords = [ + "glrt", +] + +[[rules]] +id = "gitlab_feed_token" +description = "GitLab Feed Token" +regex = '''feed_token=[0-9a-zA-Z_\-]{20}''' +tags = ["gitlab"] +keywords = [ + "feed_token", +] + +[[rules]] +id = "AWS" +description = "AWS Access Token" +regex = '''AKIA[0-9A-Z]{16}''' +tags = ["aws", "revocation_type"] +keywords = [ + "AKIA", +] + +# Cryptographic keys +[[rules]] +id = "PKCS8 private key" +description = "PKCS8 private key" +regex = '''-----BEGIN PRIVATE KEY-----''' +keywords = [ + "-----BEGIN PRIVATE KEY-----", +] + +[[rules]] +id = "RSA private key" +description = "RSA private key" +regex = '''-----BEGIN RSA PRIVATE KEY-----''' +keywords = [ + "-----BEGIN RSA PRIVATE KEY-----", +] + +[[rules]] +id = "SSH private key" +description = "SSH private key" +regex = '''-----BEGIN OPENSSH PRIVATE KEY-----''' +keywords = [ + "-----BEGIN OPENSSH PRIVATE KEY-----", +] + +[[rules]] +id = "PGP private key" +description = "PGP private key" +regex = '''-----BEGIN PGP PRIVATE KEY BLOCK-----''' +keywords = [ + "-----BEGIN PGP PRIVATE KEY BLOCK-----", +] + +[[rules]] +description = "systemd machine-id" +id = "systemd-machine-id" +path = '''^machine-id$''' +regex = '''^[0-9a-f]{32}\n$''' +entropy = 3.5 + +[[rules]] +id = "Github Personal Access Token" +description = "Github Personal Access Token" +regex = '''ghp_[0-9a-zA-Z]{36}''' +keywords = [ + "ghp_", +] + +[[rules]] +id = "Github OAuth Access Token" +description = "Github OAuth Access Token" +regex = '''gho_[0-9a-zA-Z]{36}''' +keywords = [ + "gho_", +] + +[[rules]] +id = "SSH (DSA) private key" +description = "SSH (DSA) private key" +regex = '''-----BEGIN DSA PRIVATE KEY-----''' +keywords = [ + "-----BEGIN DSA PRIVATE KEY-----", +] + +[[rules]] +id = "SSH (EC) private key" +description = "SSH (EC) private key" +regex = '''-----BEGIN EC PRIVATE KEY-----''' +keywords = [ + "-----BEGIN EC PRIVATE KEY-----", +] + + +[[rules]] +id = "Github App Token" +description = "Github App Token" +regex = '''(ghu|ghs)_[0-9a-zA-Z]{36}''' +keywords = [ + "ghu_", + "ghs_" +] + +[[rules]] +id = "Github Refresh Token" +description = "Github Refresh Token" +regex = '''ghr_[0-9a-zA-Z]{76}''' +keywords = [ + "ghr_" +] + +[[rules]] +id = "Shopify shared secret" +description = "Shopify shared secret" +regex = '''shpss_[a-fA-F0-9]{32}''' +keywords = [ + "shpss_" +] + +[[rules]] +id = "Shopify access token" +description = "Shopify access token" +regex = '''shpat_[a-fA-F0-9]{32}''' +keywords = [ + "shpat_" +] + +[[rules]] +id = "Shopify custom app access token" +description = "Shopify custom app access token" +regex = '''shpca_[a-fA-F0-9]{32}''' +keywords = [ + "shpca_" +] + +[[rules]] +id = "Shopify private app access token" +description = "Shopify private app access token" +regex = '''shppa_[a-fA-F0-9]{32}''' +keywords = [ + "shppa_" +] + +[[rules]] +id = "Slack token" +description = "Slack token" +regex = '''xox[baprs]-([0-9a-zA-Z]{10,48})?''' +keywords = [ + "xoxb","xoxa","xoxp","xoxr","xoxs", +] + +[[rules]] +id = "Stripe" +description = "Stripe" +regex = '''(?i)(sk|pk)_(test|live)_[0-9a-z]{10,32}''' +keywords = [ + "sk_test","pk_test","sk_live","pk_live", +] + +[[rules]] +id = "PyPI upload token" +description = "PyPI upload token" +regex = '''pypi-AgEIcHlwaS5vcmc[A-Za-z0-9-_]{50,1000}''' +tags = ["pypi", "revocation_type"] +keywords = [ + "pypi-AgEIcHlwaS5vcmc", +] + +[[rules]] +id = "Google (GCP) Service-account" +description = "Google (GCP) Service-account" +tags = ["gitlab_partner_token", "revocation_type"] +regex = '''\"private_key\":\s*\"-{5}BEGIN PRIVATE KEY-{5}[\s\S]*?",''' +keywords = [ + "service_account", +] + +[[rules]] +id = "GCP API key" +description = "GCP API keys can be misused to gain API quota from billed projects" +tags = ["gitlab_partner_token", "revocation_type"] +regex = '''(?i)\b(AIza[0-9A-Za-z-_]{35})(?:['|\"|\n|\r|\s|\x60|;]|$)''' +secretGroup = 1 +keywords = [ + "AIza", +] + +[[rules]] +id = "GCP OAuth client secret" +description = "GCP OAuth client secrets can be misused to spoof your application" +tags = ["gitlab_partner_token", "revocation_type"] +regex = '''GOCSPX-[a-zA-Z0-9_-]{28}''' +keywords = [ + "GOCSPX-", +] + +[[rules]] +# demo of this regex not matching passwords in urls that contain env vars: +# https://regex101.com/r/rT9Lv9/6 +id = "Password in URL" +description = "Password in URL" +regex = '''[a-zA-Z]{3,10}:\/\/[^$][^:@\/\n]{3,20}:[^$][^:@\n\/]{3,40}@.{1,100}''' + + +[[rules]] +id = "Heroku API Key" +description = "Heroku API Key" +regex = '''(?i)(?:heroku)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:=|\|\|:|<=|=>|:)(?:'|\"|\s|=|\x60){0,5}([0-9A-F]{8}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{12})(?:['|\"|\n|\r|\s|\x60]|$)''' +secretGroup = 1 +keywords = [ + "heroku", +] + +[[rules]] +id = "Slack Webhook" +description = "Slack Webhook" +regex = '''https://hooks.slack.com/services/T[a-zA-Z0-9_]{8}/B[a-zA-Z0-9_]{8,12}/[a-zA-Z0-9_]{24}''' +keywords = [ + "https://hooks.slack.com/services", +] + +[[rules]] +id = "Twilio API Key" +description = "Twilio API Key" +regex = '''SK[0-9a-fA-F]{32}''' +keywords = [ + "SK", + "twilio" +] + +[[rules]] +id = "Age secret key" +description = "Age secret key" +regex = '''AGE-SECRET-KEY-1[QPZRY9X8GF2TVDW0S3JN54KHCE6MUA7L]{58}''' +keywords = [ + "AGE-SECRET-KEY-1", +] + +[[rules]] +id = "Facebook token" +description = "Facebook token" +regex = '''(?i)(facebook[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-f0-9]{32})['\"]''' +secretGroup = 3 +keywords = [ + "facebook", +] + +[[rules]] +id = "Twitter token" +description = "Twitter token" +regex = '''(?i)(twitter[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-f0-9]{35,44})['\"]''' +secretGroup = 3 +keywords = [ + "twitter", +] + +[[rules]] +id = "Adobe Client ID (Oauth Web)" +description = "Adobe Client ID (Oauth Web)" +regex = '''(?i)(adobe[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-f0-9]{32})['\"]''' +secretGroup = 3 +keywords = [ + "adobe", +] + +[[rules]] +id = "Adobe Client Secret" +description = "Adobe Client Secret" +regex = '''(p8e-)(?i)[a-z0-9]{32}''' +keywords = [ + "adobe", + "p8e-," +] + +[[rules]] +id = "Alibaba AccessKey ID" +description = "Alibaba AccessKey ID" +regex = '''(LTAI)(?i)[a-z0-9]{20}''' +keywords = [ + "LTAI", +] + +[[rules]] +id = "Alibaba Secret Key" +description = "Alibaba Secret Key" +regex = '''(?i)(alibaba[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-z0-9]{30})['\"]''' +secretGroup = 3 +keywords = [ + "alibaba", +] + +[[rules]] +id = "Asana Client ID" +description = "Asana Client ID" +regex = '''(?i)(asana[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([0-9]{16})['\"]''' +secretGroup = 3 +keywords = [ + "asana", +] + +[[rules]] +id = "Asana Client Secret" +description = "Asana Client Secret" +regex = '''(?i)(asana[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-z0-9]{32})['\"]''' +secretGroup = 3 +keywords = [ + "asana", +] + +[[rules]] +id = "Atlassian API token" +description = "Atlassian API token" +regex = '''(?i)(atlassian[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-z0-9]{24})['\"]''' +secretGroup = 3 +keywords = [ + "atlassian", +] + +[[rules]] +id = "Bitbucket client ID" +description = "Bitbucket client ID" +regex = '''(?i)(bitbucket[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-z0-9]{32})['\"]''' +secretGroup = 3 +keywords = [ + "bitbucket", +] + +[[rules]] +id = "Bitbucket client secret" +description = "Bitbucket client secret" +regex = '''(?i)(bitbucket[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-z0-9_\-]{64})['\"]''' +secretGroup = 3 +keywords = [ + "bitbucket", +] + +[[rules]] +id = "Beamer API token" +description = "Beamer API token" +regex = '''(?i)(beamer[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"](b_[a-z0-9=_\-]{44})['\"]''' +secretGroup = 3 +keywords = [ + "beamer", +] + +[[rules]] +id = "Clojars API token" +description = "Clojars API token" +regex = '''(CLOJARS_)(?i)[a-z0-9]{60}''' +keywords = [ + "CLOJARS_", +] + +[[rules]] +id = "Contentful delivery API token" +description = "Contentful delivery API token" +regex = '''(?i)(contentful[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-z0-9\-=_]{43})['\"]''' +secretGroup = 3 +keywords = [ + "contentful", +] + +[[rules]] +id = "Contentful preview API token" +description = "Contentful preview API token" +regex = '''(?i)(contentful[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-z0-9\-=_]{43})['\"]''' +secretGroup = 3 +keywords = [ + "contentful", +] + +[[rules]] +id = "Databricks API token" +description = "Databricks API token" +regex = '''dapi[a-h0-9]{32}''' +keywords = [ + "dapi", + "databricks" +] + +[[rules]] +description = "DigitalOcean OAuth Access Token" +id = "digitalocean-access-token" +regex = '''(?i)\b(doo_v1_[a-f0-9]{64})(?:['|\"|\n|\r|\s|\x60|;]|$)''' +secretGroup = 1 +keywords = [ + "doo_v1_", +] + +[[rules]] +description = "DigitalOcean Personal Access Token" +id = "digitalocean-pat" +regex = '''(?i)\b(dop_v1_[a-f0-9]{64})(?:['|\"|\n|\r|\s|\x60|;]|$)''' +secretGroup = 1 +keywords = [ + "dop_v1_", +] + +[[rules]] +description = "DigitalOcean OAuth Refresh Token" +id = "digitalocean-refresh-token" +regex = '''(?i)\b(dor_v1_[a-f0-9]{64})(?:['|\"|\n|\r|\s|\x60|;]|$)''' +secretGroup = 1 +keywords = [ + "dor_v1_", +] + +[[rules]] +id = "Discord API key" +description = "Discord API key" +regex = '''(?i)(discord[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-h0-9]{64})['\"]''' +secretGroup = 3 +keywords = [ + "discord", +] + +[[rules]] +id = "Discord client ID" +description = "Discord client ID" +regex = '''(?i)(discord[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([0-9]{18})['\"]''' +secretGroup = 3 +keywords = [ + "discord", +] + +[[rules]] +id = "Discord client secret" +description = "Discord client secret" +regex = '''(?i)(discord[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-z0-9=_\-]{32})['\"]''' +secretGroup = 3 +keywords = [ + "discord", +] + +[[rules]] +id = "Doppler API token" +description = "Doppler API token" +regex = '''['\"](dp\.pt\.)(?i)[a-z0-9]{43}['\"]''' +keywords = [ + "doppler", +] + +[[rules]] +id = "Dropbox API secret/key" +description = "Dropbox API secret/key" +regex = '''(?i)(dropbox[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-z0-9]{15})['\"]''' +keywords = [ + "dropbox", +] + +[[rules]] +id = "Dropbox short lived API token" +description = "Dropbox short lived API token" +regex = '''(?i)(dropbox[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"](sl\.[a-z0-9\-=_]{135})['\"]''' +keywords = [ + "dropbox", +] + +[[rules]] +id = "Dropbox long lived API token" +description = "Dropbox long lived API token" +regex = '''(?i)(dropbox[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"][a-z0-9]{11}(AAAAAAAAAA)[a-z0-9\-_=]{43}['\"]''' +keywords = [ + "dropbox", +] + +[[rules]] +id = "Duffel API token" +description = "Duffel API token" +regex = '''['\"]duffel_(test|live)_(?i)[a-z0-9_-]{43}['\"]''' +keywords = [ + "duffel", +] + +[[rules]] +id = "Dynatrace API token" +description = "Dynatrace API token" +regex = '''['\"]dt0c01\.(?i)[a-z0-9]{24}\.[a-z0-9]{64}['\"]''' +keywords = [ + "dt0c01", +] + +[[rules]] +id = "EasyPost API token" +description = "EasyPost API token" +regex = '''['\"]EZAK(?i)[a-z0-9]{54}['\"]''' +keywords = [ + "EZAK", +] + + +[[rules]] +id = "EasyPost test API token" +description = "EasyPost test API token" +regex = '''['\"]EZTK(?i)[a-z0-9]{54}['\"]''' +keywords = [ + "EZTK", +] + +[[rules]] +id = "Fastly API token" +description = "Fastly API token" +regex = '''(?i)(fastly[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-z0-9\-=_]{32})['\"]''' +secretGroup = 3 +keywords = [ + "fastly", +] + +[[rules]] +id = "Finicity client secret" +description = "Finicity client secret" +regex = '''(?i)(finicity[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-z0-9]{20})['\"]''' +secretGroup = 3 +keywords = [ + "finicity", +] + +[[rules]] +id = "Finicity API token" +description = "Finicity API token" +regex = '''(?i)(finicity[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-f0-9]{32})['\"]''' +secretGroup = 3 +keywords = [ + "finicity", +] + +[[rules]] +id = "Flutterwave public key" +description = "Flutterwave public key" +regex = '''FLWPUBK_TEST-(?i)[a-h0-9]{32}-X''' +keywords = [ + "FLWPUBK_TEST", +] + +[[rules]] +id = "Flutterwave secret key" +description = "Flutterwave secret key" +regex = '''FLWSECK_TEST-(?i)[a-h0-9]{32}-X''' +keywords = [ + "FLWSECK_TEST", +] + +[[rules]] +id = "Flutterwave encrypted key" +description = "Flutterwave encrypted key" +regex = '''FLWSECK_TEST[a-h0-9]{12}''' +keywords = [ + "FLWSECK_TEST", +] + +[[rules]] +id = "Frame.io API token" +description = "Frame.io API token" +regex = '''fio-u-(?i)[a-z0-9-_=]{64}''' +keywords = [ + "fio-u-", +] + +[[rules]] +id = "GoCardless API token" +description = "GoCardless API token" +regex = '''['\"]live_(?i)[a-z0-9-_=]{40}['\"]''' +keywords = [ + "gocardless", +] + +[[rules]] +id = "Grafana API token" +description = "Grafana API token" +regex = '''['\"]eyJrIjoi(?i)[a-z0-9-_=]{72,92}['\"]''' +keywords = [ + "grafana", +] + +[[rules]] +id = "Hashicorp Terraform user/org API token" +description = "Hashicorp Terraform user/org API token" +regex = '''['\"](?i)[a-z0-9]{14}\.atlasv1\.[a-z0-9-_=]{60,70}['\"]''' +keywords = [ + "atlasv1", + "hashicorp", + "terraform" +] + +[[rules]] +id = "Hashicorp Vault batch token" +description = "Hashicorp Vault batch token" +regex = '''b\.AAAAAQ[0-9a-zA-Z_-]{156}''' +keywords = [ + "hashicorp", + "AAAAAQ", + "vault" +] + +[[rules]] +id = "Hubspot API token" +description = "Hubspot API token" +regex = '''(?i)(hubspot[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-h0-9]{8}-[a-h0-9]{4}-[a-h0-9]{4}-[a-h0-9]{4}-[a-h0-9]{12})['\"]''' +secretGroup = 3 +keywords = [ + "hubspot", +] + +[[rules]] +id = "Intercom API token" +description = "Intercom API token" +regex = '''(?i)(intercom[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-z0-9=_]{60})['\"]''' +secretGroup = 3 +keywords = [ + "intercom", +] + +[[rules]] +id = "Intercom client secret/ID" +description = "Intercom client secret/ID" +regex = '''(?i)(intercom[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-h0-9]{8}-[a-h0-9]{4}-[a-h0-9]{4}-[a-h0-9]{4}-[a-h0-9]{12})['\"]''' +secretGroup = 3 +keywords = [ + "intercom", +] + +[[rules]] +id = "Ionic API token" +description = "Ionic API token" +regex = '''ion_(?i)[a-z0-9]{42}''' +keywords = [ + "ion_", +] + +[[rules]] +id = "Linear API token" +description = "Linear API token" +regex = '''lin_api_(?i)[a-z0-9]{40}''' +keywords = [ + "lin_api_", +] + +[[rules]] +id = "Linear client secret/ID" +description = "Linear client secret/ID" +regex = '''(?i)(linear[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-f0-9]{32})['\"]''' +secretGroup = 3 +keywords = [ + "linear", +] + +[[rules]] +id = "Lob API Key" +description = "Lob API Key" +regex = '''(?i)(lob[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]((live|test)_[a-f0-9]{35})['\"]''' +secretGroup = 3 +keywords = [ + "lob", +] + +[[rules]] +id = "Lob Publishable API Key" +description = "Lob Publishable API Key" +regex = '''(?i)(lob[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]((test|live)_pub_[a-f0-9]{31})['\"]''' +secretGroup = 3 +keywords = [ + "lob", +] + +[[rules]] +id = "Mailchimp API key" +description = "Mailchimp API key" +regex = '''(?i)(mailchimp[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-f0-9]{32}-us20)['\"]''' +secretGroup = 3 +keywords = [ + "mailchimp", +] + +[[rules]] +id = "Mailgun private API token" +description = "Mailgun private API token" +regex = '''(?i)(mailgun[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"](key-[a-f0-9]{32})['\"]''' +secretGroup = 3 +keywords = [ + "mailgun", +] + +[[rules]] +id = "Mailgun public validation key" +description = "Mailgun public validation key" +regex = '''(?i)(mailgun[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"](pubkey-[a-f0-9]{32})['\"]''' +secretGroup = 3 +keywords = [ + "mailgun", +] + +[[rules]] +id = "Mailgun webhook signing key" +description = "Mailgun webhook signing key" +regex = '''(?i)(mailgun[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-h0-9]{32}-[a-h0-9]{8}-[a-h0-9]{8})['\"]''' +secretGroup = 3 +keywords = [ + "mailgun", +] + +[[rules]] +id = "Mapbox API token" +description = "Mapbox API token" +regex = '''(?i)(pk\.[a-z0-9]{60}\.[a-z0-9]{22})''' +keywords = [ + "mapbox", +] + +[[rules]] +id = "messagebird-api-token" +description = "MessageBird API token" +regex = '''(?i)(messagebird[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-z0-9]{25})['\"]''' +secretGroup = 3 +keywords = [ + "messagebird", +] + +[[rules]] +id = "MessageBird API client ID" +description = "MessageBird API client ID" +regex = '''(?i)(messagebird[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-h0-9]{8}-[a-h0-9]{4}-[a-h0-9]{4}-[a-h0-9]{4}-[a-h0-9]{12})['\"]''' +secretGroup = 3 +keywords = [ + "messagebird", +] + +[[rules]] +id = "New Relic user API Key" +description = "New Relic user API Key" +regex = '''['\"](NRAK-[A-Z0-9]{27})['\"]''' +keywords = [ + "NRAK", +] + +[[rules]] +id = "New Relic user API ID" +description = "New Relic user API ID" +regex = '''(?i)(newrelic[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([A-Z0-9]{64})['\"]''' +secretGroup = 3 +keywords = [ + "newrelic", +] + +[[rules]] +id = "New Relic ingest browser API token" +description = "New Relic ingest browser API token" +regex = '''['\"](NRJS-[a-f0-9]{19})['\"]''' +keywords = [ + "NRJS", +] + +[[rules]] +id = "npm access token" +description = "npm access token" +regex = '''['\"](npm_(?i)[a-z0-9]{36})['\"]''' +keywords = [ + "npm_", +] + +[[rules]] +id = "Planetscale password" +description = "Planetscale password" +regex = '''pscale_pw_(?i)[a-z0-9\-_\.]{43}''' +keywords = [ + "pscale_pw_", +] + +[[rules]] +id = "Planetscale API token" +description = "Planetscale API token" +regex = '''pscale_tkn_(?i)[a-z0-9\-_\.]{43}''' +keywords = [ + "pscale_tkn_", +] + +[[rules]] +id = "Postman API token" +description = "Postman API token" +regex = '''PMAK-(?i)[a-f0-9]{24}\-[a-f0-9]{34}''' +keywords = [ + "PMAK-", +] + +[[rules]] +id = "Pulumi API token" +description = "Pulumi API token" +regex = '''pul-[a-f0-9]{40}''' +keywords = [ + "pul-", +] + +[[rules]] +id = "Rubygem API token" +description = "Rubygem API token" +regex = '''rubygems_[a-f0-9]{48}''' +keywords = [ + "rubygems_", +] + +[[rules]] +id = "Segment Public API token" +description = "Segment Public API token" +regex = '''sgp_[a-zA-Z0-9]{64}''' +keywords = [ + "sgp_", +] + +[[rules]] +id = "Sendgrid API token" +description = "Sendgrid API token" +regex = '''SG\.(?i)[a-z0-9_\-\.]{66}''' +keywords = [ + "sendgrid", +] + +[[rules]] +id = "Sendinblue API token" +description = "Sendinblue API token" +regex = '''xkeysib-[a-f0-9]{64}\-(?i)[a-z0-9]{16}''' +keywords = [ + "xkeysib-", +] + +[[rules]] +id = "Sendinblue SMTP token" +description = "Sendinblue SMTP token" +regex = '''xsmtpsib-[a-f0-9]{64}\-(?i)[a-z0-9]{16}''' +keywords = [ + "xsmtpsib-", +] + +[[rules]] +id = "Shippo API token" +description = "Shippo API token" +regex = '''shippo_(live|test)_[a-f0-9]{40}''' +keywords = [ + "shippo_", +] + +[[rules]] +id = "Linkedin Client secret" +description = "Linkedin Client secret" +regex = '''(?i)(linkedin[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-z]{16})['\"]''' +secretGroup = 3 +keywords = [ + "linkedin", +] + +[[rules]] +id = "Linkedin Client ID" +description = "Linkedin Client ID" +regex = '''(?i)(linkedin[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-z0-9]{14})['\"]''' +secretGroup = 3 +keywords = [ + "linkedin", +] + +[[rules]] +id = "Twitch API token" +description = "Twitch API token" +regex = '''(?i)(twitch[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-z0-9]{30})['\"]''' +secretGroup = 3 +keywords = [ + "twitch", +] + +[[rules]] +id = "Typeform API token" +description = "Typeform API token" +regex = '''(?i)(typeform[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}(tfp_[a-z0-9\-_\.=]{59})''' +secretGroup = 3 +keywords = [ + "typeform", +] + +[[rules]] +id = "Yandex.Cloud IAM Cookie v1 - 1" +description = "Yandex.Cloud IAM Cookie v1" +regex = '''\bc1\.[A-Z0-9a-z_-]+[=]{0,2}\.[A-Z0-9a-z_-]{86}[=]{0,2}['|\"|\n|\r|\s|\x60]''' +keywords = [ + "yandex", +] + +[[rules]] +id = "Yandex.Cloud IAM Cookie v1 - 2" +description = "Yandex.Cloud IAM Token v1" +regex = '''\bt1\.[A-Z0-9a-z_-]+[=]{0,2}\.[A-Z0-9a-z_-]{86}[=]{0,2}['|\"|\n|\r|\s|\x60]''' +keywords = [ + "yandex", +] + +[[rules]] +id = "Yandex.Cloud IAM Cookie v1 - 3" +description = "Yandex.Cloud IAM API key v1" +regex = '''\bAQVN[A-Za-z0-9_\-]{35,38}['|\"|\n|\r|\s|\x60]''' +keywords = [ + "yandex", +] + +[[rules]] +id = "Yandex.Cloud AWS API compatible Access Secret" +description = "Yandex.Cloud AWS API compatible Access Secret" +regex = '''\bYC[a-zA-Z0-9_\-]{38}['|\"|\n|\r|\s|\x60]''' +keywords = [ + "yandex", +] + +[[rules]] +id = "Meta access token" +description = "Meta access token" +regex = '''\bEA[a-zA-Z0-9]{90,400}['|\"|\n|\r|\s|\x60]''' +keywords = [ + "EA", +] + +[[rules]] +id = "Oculus access token" +description = "Oculus access token" +regex = '''\bOC[a-zA-Z0-9]{90,400}['|\"|\n|\r|\s|\x60]''' +keywords = [ + "OC", +] + +[[rules]] +id = "Instagram access token" +description = "Instagram access token" +regex = '''\bIG[a-zA-Z0-9]{90,400}['|\"|\n|\r|\s|\x60]''' +keywords = [ + "IG", +] + +[[rules]] +id = "CircleCI access tokens" +description = "CircleCI access tokens" +regex = '''\bCCI(?:PAT|PRJ)_[a-zA-Z0-9]{22}_[a-f0-9]{40}''' +keywords = [ + "CircleCI" +] + +[[rules]] +description = "Open AI API key" +id = "open ai token" +regex = '''\bsk-[a-zA-Z0-9]{48}\b''' +keywords = [ + "sk-", +] + +[allowlist] +description = "global allow lists" +paths = [ + '''gitleaks.toml''', + '''(.*?)(jpg|gif|doc|pdf|bin|svg|socket)$''', + '''(go.mod|go.sum)$''' +] \ No newline at end of file diff --git a/gems/gitlab-secret_detection/spec/gitlab/secret_detection/scan_spec.rb b/gems/gitlab-secret_detection/spec/gitlab/secret_detection/scan_spec.rb new file mode 100644 index 00000000000000..37477bfe0facb4 --- /dev/null +++ b/gems/gitlab-secret_detection/spec/gitlab/secret_detection/scan_spec.rb @@ -0,0 +1,60 @@ +# frozen_string_literal: true + +require 'spec_helper' +require 'pry' + +RSpec.describe Gitlab::SecretDetection::Scan do + file_data = { + "title" => "gitleaks config", + "rules" => [ + { "id" => "gitlab_personal_access_token", + "description" => "GitLab Personal Access Token", + "regex" => "glpat-[0-9a-zA-Z_\\-]{20}", + "tags" => %w[gitlab revocation_type], + "keywords" => ["glpat"] }, + { "id" => "gitlab_pipeline_trigger_token", + "description" => "GitLab Pipeline Trigger Token", + "regex" => "glptt-[0-9a-zA-Z_\\-]{20}", + "tags" => ["gitlab"], + "keywords" => ["glptt"] } + ] + } + + let(:pattern_set) { described_class.new.create_patterns } + + it "has a version number" do + expect(Gitlab::SecretDetection::VERSION).not_to be_nil + end + + context "when it creates RE2 patterns from file data" do + before do + allow(described_class).to receive(:parse_file).and_return(file_data) + end + + it "successfully creates RE2 patterns" do + expect(pattern_set).not_to be_nil + end + end + + context "when matching patterns" do + before do + allow(described_class).to receive(:parse_file).and_return(file_data) + end + + context 'when the blob does not contain a secret' do + blob = Struct.new(:data).new("no secret") + + it "does not match" do + expect(described_class.new.secrets_scan(blob, pattern_set)).to be_nil + end + end + + context "when the blob contains a secret" do + blob = Struct.new(:data).new("glpat" + "12312312312312312312") # rubocop:disable Style/StringConcatenation + + it "matches glpat" do + expect(described_class.new.secrets_scan(blob, pattern_set)).to eq({ 1 => ["gitlab_personal_access_token"] }) + end + end + end +end diff --git a/gems/gitlab-secret_detection/spec/gitlab/secret_detection_spec.rb b/gems/gitlab-secret_detection/spec/gitlab/secret_detection_spec.rb deleted file mode 100644 index 112ab8c7468c16..00000000000000 --- a/gems/gitlab-secret_detection/spec/gitlab/secret_detection_spec.rb +++ /dev/null @@ -1,7 +0,0 @@ -# frozen_string_literal: true - -RSpec.describe Gitlab::SecretDetection do - it "has a version number" do - expect(Gitlab::SecretDetection::VERSION).not_to be_nil - end -end -- GitLab From 0074181d70ffe71f2533662ed0af18b8e72670b1 Mon Sep 17 00:00:00 2001 From: Serena Fang Date: Wed, 8 Nov 2023 18:02:23 -0600 Subject: [PATCH 05/18] Change scan init --- .../lib/gitlab/secret_detection/scan.rb | 33 ++++++++----------- .../spec/gitlab/secret_detection/scan_spec.rb | 13 +++++--- 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/gems/gitlab-secret_detection/lib/gitlab/secret_detection/scan.rb b/gems/gitlab-secret_detection/lib/gitlab/secret_detection/scan.rb index 64c6cd6687d004..432bd7e99ddaeb 100644 --- a/gems/gitlab-secret_detection/lib/gitlab/secret_detection/scan.rb +++ b/gems/gitlab-secret_detection/lib/gitlab/secret_detection/scan.rb @@ -2,20 +2,20 @@ require 'tomlrb' require 're2' -require 'benchmark' -require 'set' module Gitlab module SecretDetection class Scan - def initialize - @@secrets_config_data ||= parse_file + def parse_file + file_path = File.expand_path('../../gitleaks.toml', __dir__) + Tomlrb.load_file(file_path) end def create_patterns + secrets_config_data = parse_file secrets_pattern_set = RE2::Set.new - @@secrets_config_data["rules"].each do |rule| + secrets_config_data["rules"].each do |rule| pattern = rule["regex"] secrets_pattern_set.add(pattern) end @@ -24,23 +24,18 @@ def create_patterns secrets_pattern_set end - def secrets_scan(gitaly_blob, secrets_pattern_set) - return unless keyword_match(gitaly_blob) + def secrets_scan(gitaly_blob:, secrets_config_data:, secrets_pattern_set:) + return unless keyword_match(gitaly_blob, secrets_config_data) - regex_match(gitaly_blob, secrets_pattern_set) + regex_match(gitaly_blob, secrets_config_data, secrets_pattern_set) end - # private + private - def parse_file - file_path = File.expand_path('../../gitleaks.toml', __dir__) - Tomlrb.load_file(file_path) - end - - def keyword_match(gitaly_blob) + def keyword_match(gitaly_blob, secrets_config_data) secrets_keywords = [] - @@secrets_config_data["rules"].each do |rule| + secrets_config_data["rules"].each do |rule| pattern = rule["keywords"] secrets_keywords << pattern end @@ -50,8 +45,7 @@ def keyword_match(gitaly_blob) end end - def regex_match(gitaly_blob, secrets_pattern_set) - # binding.pry_shell + def regex_match(gitaly_blob, secrets_config_data, secrets_pattern_set) gitaly_blob_data = gitaly_blob.data split_strings = gitaly_blob_data.split("\n") @@ -62,8 +56,9 @@ def regex_match(gitaly_blob, secrets_pattern_set) next unless patterns.any? matched_patterns = [] + patterns.each do |p| - matched_patterns << @@secrets_config_data["rules"][p]["id"] + matched_patterns << secrets_config_data["rules"][p]["id"] end found_secrets[i + 1] = matched_patterns end diff --git a/gems/gitlab-secret_detection/spec/gitlab/secret_detection/scan_spec.rb b/gems/gitlab-secret_detection/spec/gitlab/secret_detection/scan_spec.rb index 37477bfe0facb4..ddbefe3443f06e 100644 --- a/gems/gitlab-secret_detection/spec/gitlab/secret_detection/scan_spec.rb +++ b/gems/gitlab-secret_detection/spec/gitlab/secret_detection/scan_spec.rb @@ -1,7 +1,6 @@ # frozen_string_literal: true require 'spec_helper' -require 'pry' RSpec.describe Gitlab::SecretDetection::Scan do file_data = { @@ -26,6 +25,10 @@ expect(Gitlab::SecretDetection::VERSION).not_to be_nil end + it "parses the toml file" do + expect(described_class.new.parse_file).not_to be_nil + end + context "when it creates RE2 patterns from file data" do before do allow(described_class).to receive(:parse_file).and_return(file_data) @@ -45,15 +48,17 @@ blob = Struct.new(:data).new("no secret") it "does not match" do - expect(described_class.new.secrets_scan(blob, pattern_set)).to be_nil + expect(described_class.new.secrets_scan(gitaly_blob: blob, secrets_config_data: file_data, + secrets_pattern_set: pattern_set)).to be_nil end end context "when the blob contains a secret" do - blob = Struct.new(:data).new("glpat" + "12312312312312312312") # rubocop:disable Style/StringConcatenation + blob = Struct.new(:data).new("glpat-" + "12312312312312312312") # rubocop:disable Style/StringConcatenation -- Creates a gitleaks false positive it "matches glpat" do - expect(described_class.new.secrets_scan(blob, pattern_set)).to eq({ 1 => ["gitlab_personal_access_token"] }) + expect(described_class.new.secrets_scan(gitaly_blob: blob, secrets_config_data: file_data, + secrets_pattern_set: pattern_set)).to eq({ 1 => ["gitlab_personal_access_token"] }) end end end -- GitLab From 5d411df7e032da182d560c4faf5e4c9ac9eb75d2 Mon Sep 17 00:00:00 2001 From: Serena Fang Date: Thu, 9 Nov 2023 11:52:28 -0600 Subject: [PATCH 06/18] Update gemfile platform --- gems/gitlab-secret_detection/Gemfile.lock | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/gems/gitlab-secret_detection/Gemfile.lock b/gems/gitlab-secret_detection/Gemfile.lock index 36a71e5fde4022..b3196d63418314 100644 --- a/gems/gitlab-secret_detection/Gemfile.lock +++ b/gems/gitlab-secret_detection/Gemfile.lock @@ -47,6 +47,7 @@ GEM minitest (5.20.0) mutex_m (0.2.0) method_source (1.0.0) + mini_portile2 (2.8.5) parallel (1.23.0) parser (3.2.2.4) ast (~> 2.4.1) @@ -61,7 +62,8 @@ GEM racc (1.7.1) rack (3.0.8) rainbow (3.1.1) - re2 (2.3.0-arm64-darwin) + re2 (2.3.0) + mini_portile2 (~> 2.8.5) regexp_parser (2.8.2) rexml (3.2.6) rspec (3.12.0) -- GitLab From 678b93de437e433088fb3be72f0f51c063366cae Mon Sep 17 00:00:00 2001 From: Serena Fang Date: Tue, 14 Nov 2023 15:40:02 -0600 Subject: [PATCH 07/18] Apply reviewer suggestions --- .../lib/gitlab/secret_detection/scan.rb | 54 ++++++++++--------- 1 file changed, 30 insertions(+), 24 deletions(-) diff --git a/gems/gitlab-secret_detection/lib/gitlab/secret_detection/scan.rb b/gems/gitlab-secret_detection/lib/gitlab/secret_detection/scan.rb index 432bd7e99ddaeb..a8a3356ec443b0 100644 --- a/gems/gitlab-secret_detection/lib/gitlab/secret_detection/scan.rb +++ b/gems/gitlab-secret_detection/lib/gitlab/secret_detection/scan.rb @@ -6,36 +6,47 @@ module Gitlab module SecretDetection class Scan + def initialize + @secrets_config_data = parse_file + @secrets_pattern_set = create_patterns + @logger = Gitlab::Git::Logger.build + end + + def secrets_scan(gitaly_blob) + return unless keyword_match(gitaly_blob) + + regex_match(gitaly_blob) + end + + private + def parse_file file_path = File.expand_path('../../gitleaks.toml', __dir__) Tomlrb.load_file(file_path) + rescue StandardError => e + logger.error(message: e.message) + false end def create_patterns - secrets_config_data = parse_file - secrets_pattern_set = RE2::Set.new + @secrets_pattern_set = RE2::Set.new - secrets_config_data["rules"].each do |rule| + @secrets_config_data["rules"].each do |rule| pattern = rule["regex"] - secrets_pattern_set.add(pattern) + @secrets_pattern_set.add(pattern) end - secrets_pattern_set.compile - secrets_pattern_set + @secrets_pattern_set.compile + @secrets_pattern_set + rescue StandardError => e + logger.error(message: e.message) + false end - def secrets_scan(gitaly_blob:, secrets_config_data:, secrets_pattern_set:) - return unless keyword_match(gitaly_blob, secrets_config_data) - - regex_match(gitaly_blob, secrets_config_data, secrets_pattern_set) - end - - private - - def keyword_match(gitaly_blob, secrets_config_data) + def keyword_match(gitaly_blob) secrets_keywords = [] - secrets_config_data["rules"].each do |rule| + @secrets_config_data["rules"].each do |rule| pattern = rule["keywords"] secrets_keywords << pattern end @@ -45,30 +56,25 @@ def keyword_match(gitaly_blob, secrets_config_data) end end - def regex_match(gitaly_blob, secrets_config_data, secrets_pattern_set) + def regex_match(gitaly_blob) gitaly_blob_data = gitaly_blob.data split_strings = gitaly_blob_data.split("\n") found_secrets = {} split_strings.each_with_index do |s, i| - patterns = secrets_pattern_set.match(s) + patterns = @secrets_pattern_set.match(s) next unless patterns.any? matched_patterns = [] patterns.each do |p| - matched_patterns << secrets_config_data["rules"][p]["id"] + matched_patterns << @secrets_config_data["rules"][p]["id"] end found_secrets[i + 1] = matched_patterns end found_secrets end - - def error(message) - # we don't call this anywhere yet but it could be useful later - ServiceResponse.error(message: message) - end end end end -- GitLab From 77ff49e0c65308153f9dda2b118d56cc24685d11 Mon Sep 17 00:00:00 2001 From: Serena Fang Date: Tue, 14 Nov 2023 15:42:02 -0600 Subject: [PATCH 08/18] Reduce gitleaks toml --- .../gitlab-secret_detection/lib/gitleaks.toml | 935 ------------------ 1 file changed, 935 deletions(-) diff --git a/gems/gitlab-secret_detection/lib/gitleaks.toml b/gems/gitlab-secret_detection/lib/gitleaks.toml index 53c65a0468e7c6..a83dd9f6e32ff6 100644 --- a/gems/gitlab-secret_detection/lib/gitleaks.toml +++ b/gems/gitlab-secret_detection/lib/gitleaks.toml @@ -45,941 +45,6 @@ keywords = [ "feed_token", ] -[[rules]] -id = "AWS" -description = "AWS Access Token" -regex = '''AKIA[0-9A-Z]{16}''' -tags = ["aws", "revocation_type"] -keywords = [ - "AKIA", -] - -# Cryptographic keys -[[rules]] -id = "PKCS8 private key" -description = "PKCS8 private key" -regex = '''-----BEGIN PRIVATE KEY-----''' -keywords = [ - "-----BEGIN PRIVATE KEY-----", -] - -[[rules]] -id = "RSA private key" -description = "RSA private key" -regex = '''-----BEGIN RSA PRIVATE KEY-----''' -keywords = [ - "-----BEGIN RSA PRIVATE KEY-----", -] - -[[rules]] -id = "SSH private key" -description = "SSH private key" -regex = '''-----BEGIN OPENSSH PRIVATE KEY-----''' -keywords = [ - "-----BEGIN OPENSSH PRIVATE KEY-----", -] - -[[rules]] -id = "PGP private key" -description = "PGP private key" -regex = '''-----BEGIN PGP PRIVATE KEY BLOCK-----''' -keywords = [ - "-----BEGIN PGP PRIVATE KEY BLOCK-----", -] - -[[rules]] -description = "systemd machine-id" -id = "systemd-machine-id" -path = '''^machine-id$''' -regex = '''^[0-9a-f]{32}\n$''' -entropy = 3.5 - -[[rules]] -id = "Github Personal Access Token" -description = "Github Personal Access Token" -regex = '''ghp_[0-9a-zA-Z]{36}''' -keywords = [ - "ghp_", -] - -[[rules]] -id = "Github OAuth Access Token" -description = "Github OAuth Access Token" -regex = '''gho_[0-9a-zA-Z]{36}''' -keywords = [ - "gho_", -] - -[[rules]] -id = "SSH (DSA) private key" -description = "SSH (DSA) private key" -regex = '''-----BEGIN DSA PRIVATE KEY-----''' -keywords = [ - "-----BEGIN DSA PRIVATE KEY-----", -] - -[[rules]] -id = "SSH (EC) private key" -description = "SSH (EC) private key" -regex = '''-----BEGIN EC PRIVATE KEY-----''' -keywords = [ - "-----BEGIN EC PRIVATE KEY-----", -] - - -[[rules]] -id = "Github App Token" -description = "Github App Token" -regex = '''(ghu|ghs)_[0-9a-zA-Z]{36}''' -keywords = [ - "ghu_", - "ghs_" -] - -[[rules]] -id = "Github Refresh Token" -description = "Github Refresh Token" -regex = '''ghr_[0-9a-zA-Z]{76}''' -keywords = [ - "ghr_" -] - -[[rules]] -id = "Shopify shared secret" -description = "Shopify shared secret" -regex = '''shpss_[a-fA-F0-9]{32}''' -keywords = [ - "shpss_" -] - -[[rules]] -id = "Shopify access token" -description = "Shopify access token" -regex = '''shpat_[a-fA-F0-9]{32}''' -keywords = [ - "shpat_" -] - -[[rules]] -id = "Shopify custom app access token" -description = "Shopify custom app access token" -regex = '''shpca_[a-fA-F0-9]{32}''' -keywords = [ - "shpca_" -] - -[[rules]] -id = "Shopify private app access token" -description = "Shopify private app access token" -regex = '''shppa_[a-fA-F0-9]{32}''' -keywords = [ - "shppa_" -] - -[[rules]] -id = "Slack token" -description = "Slack token" -regex = '''xox[baprs]-([0-9a-zA-Z]{10,48})?''' -keywords = [ - "xoxb","xoxa","xoxp","xoxr","xoxs", -] - -[[rules]] -id = "Stripe" -description = "Stripe" -regex = '''(?i)(sk|pk)_(test|live)_[0-9a-z]{10,32}''' -keywords = [ - "sk_test","pk_test","sk_live","pk_live", -] - -[[rules]] -id = "PyPI upload token" -description = "PyPI upload token" -regex = '''pypi-AgEIcHlwaS5vcmc[A-Za-z0-9-_]{50,1000}''' -tags = ["pypi", "revocation_type"] -keywords = [ - "pypi-AgEIcHlwaS5vcmc", -] - -[[rules]] -id = "Google (GCP) Service-account" -description = "Google (GCP) Service-account" -tags = ["gitlab_partner_token", "revocation_type"] -regex = '''\"private_key\":\s*\"-{5}BEGIN PRIVATE KEY-{5}[\s\S]*?",''' -keywords = [ - "service_account", -] - -[[rules]] -id = "GCP API key" -description = "GCP API keys can be misused to gain API quota from billed projects" -tags = ["gitlab_partner_token", "revocation_type"] -regex = '''(?i)\b(AIza[0-9A-Za-z-_]{35})(?:['|\"|\n|\r|\s|\x60|;]|$)''' -secretGroup = 1 -keywords = [ - "AIza", -] - -[[rules]] -id = "GCP OAuth client secret" -description = "GCP OAuth client secrets can be misused to spoof your application" -tags = ["gitlab_partner_token", "revocation_type"] -regex = '''GOCSPX-[a-zA-Z0-9_-]{28}''' -keywords = [ - "GOCSPX-", -] - -[[rules]] -# demo of this regex not matching passwords in urls that contain env vars: -# https://regex101.com/r/rT9Lv9/6 -id = "Password in URL" -description = "Password in URL" -regex = '''[a-zA-Z]{3,10}:\/\/[^$][^:@\/\n]{3,20}:[^$][^:@\n\/]{3,40}@.{1,100}''' - - -[[rules]] -id = "Heroku API Key" -description = "Heroku API Key" -regex = '''(?i)(?:heroku)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:=|\|\|:|<=|=>|:)(?:'|\"|\s|=|\x60){0,5}([0-9A-F]{8}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{12})(?:['|\"|\n|\r|\s|\x60]|$)''' -secretGroup = 1 -keywords = [ - "heroku", -] - -[[rules]] -id = "Slack Webhook" -description = "Slack Webhook" -regex = '''https://hooks.slack.com/services/T[a-zA-Z0-9_]{8}/B[a-zA-Z0-9_]{8,12}/[a-zA-Z0-9_]{24}''' -keywords = [ - "https://hooks.slack.com/services", -] - -[[rules]] -id = "Twilio API Key" -description = "Twilio API Key" -regex = '''SK[0-9a-fA-F]{32}''' -keywords = [ - "SK", - "twilio" -] - -[[rules]] -id = "Age secret key" -description = "Age secret key" -regex = '''AGE-SECRET-KEY-1[QPZRY9X8GF2TVDW0S3JN54KHCE6MUA7L]{58}''' -keywords = [ - "AGE-SECRET-KEY-1", -] - -[[rules]] -id = "Facebook token" -description = "Facebook token" -regex = '''(?i)(facebook[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-f0-9]{32})['\"]''' -secretGroup = 3 -keywords = [ - "facebook", -] - -[[rules]] -id = "Twitter token" -description = "Twitter token" -regex = '''(?i)(twitter[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-f0-9]{35,44})['\"]''' -secretGroup = 3 -keywords = [ - "twitter", -] - -[[rules]] -id = "Adobe Client ID (Oauth Web)" -description = "Adobe Client ID (Oauth Web)" -regex = '''(?i)(adobe[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-f0-9]{32})['\"]''' -secretGroup = 3 -keywords = [ - "adobe", -] - -[[rules]] -id = "Adobe Client Secret" -description = "Adobe Client Secret" -regex = '''(p8e-)(?i)[a-z0-9]{32}''' -keywords = [ - "adobe", - "p8e-," -] - -[[rules]] -id = "Alibaba AccessKey ID" -description = "Alibaba AccessKey ID" -regex = '''(LTAI)(?i)[a-z0-9]{20}''' -keywords = [ - "LTAI", -] - -[[rules]] -id = "Alibaba Secret Key" -description = "Alibaba Secret Key" -regex = '''(?i)(alibaba[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-z0-9]{30})['\"]''' -secretGroup = 3 -keywords = [ - "alibaba", -] - -[[rules]] -id = "Asana Client ID" -description = "Asana Client ID" -regex = '''(?i)(asana[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([0-9]{16})['\"]''' -secretGroup = 3 -keywords = [ - "asana", -] - -[[rules]] -id = "Asana Client Secret" -description = "Asana Client Secret" -regex = '''(?i)(asana[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-z0-9]{32})['\"]''' -secretGroup = 3 -keywords = [ - "asana", -] - -[[rules]] -id = "Atlassian API token" -description = "Atlassian API token" -regex = '''(?i)(atlassian[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-z0-9]{24})['\"]''' -secretGroup = 3 -keywords = [ - "atlassian", -] - -[[rules]] -id = "Bitbucket client ID" -description = "Bitbucket client ID" -regex = '''(?i)(bitbucket[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-z0-9]{32})['\"]''' -secretGroup = 3 -keywords = [ - "bitbucket", -] - -[[rules]] -id = "Bitbucket client secret" -description = "Bitbucket client secret" -regex = '''(?i)(bitbucket[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-z0-9_\-]{64})['\"]''' -secretGroup = 3 -keywords = [ - "bitbucket", -] - -[[rules]] -id = "Beamer API token" -description = "Beamer API token" -regex = '''(?i)(beamer[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"](b_[a-z0-9=_\-]{44})['\"]''' -secretGroup = 3 -keywords = [ - "beamer", -] - -[[rules]] -id = "Clojars API token" -description = "Clojars API token" -regex = '''(CLOJARS_)(?i)[a-z0-9]{60}''' -keywords = [ - "CLOJARS_", -] - -[[rules]] -id = "Contentful delivery API token" -description = "Contentful delivery API token" -regex = '''(?i)(contentful[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-z0-9\-=_]{43})['\"]''' -secretGroup = 3 -keywords = [ - "contentful", -] - -[[rules]] -id = "Contentful preview API token" -description = "Contentful preview API token" -regex = '''(?i)(contentful[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-z0-9\-=_]{43})['\"]''' -secretGroup = 3 -keywords = [ - "contentful", -] - -[[rules]] -id = "Databricks API token" -description = "Databricks API token" -regex = '''dapi[a-h0-9]{32}''' -keywords = [ - "dapi", - "databricks" -] - -[[rules]] -description = "DigitalOcean OAuth Access Token" -id = "digitalocean-access-token" -regex = '''(?i)\b(doo_v1_[a-f0-9]{64})(?:['|\"|\n|\r|\s|\x60|;]|$)''' -secretGroup = 1 -keywords = [ - "doo_v1_", -] - -[[rules]] -description = "DigitalOcean Personal Access Token" -id = "digitalocean-pat" -regex = '''(?i)\b(dop_v1_[a-f0-9]{64})(?:['|\"|\n|\r|\s|\x60|;]|$)''' -secretGroup = 1 -keywords = [ - "dop_v1_", -] - -[[rules]] -description = "DigitalOcean OAuth Refresh Token" -id = "digitalocean-refresh-token" -regex = '''(?i)\b(dor_v1_[a-f0-9]{64})(?:['|\"|\n|\r|\s|\x60|;]|$)''' -secretGroup = 1 -keywords = [ - "dor_v1_", -] - -[[rules]] -id = "Discord API key" -description = "Discord API key" -regex = '''(?i)(discord[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-h0-9]{64})['\"]''' -secretGroup = 3 -keywords = [ - "discord", -] - -[[rules]] -id = "Discord client ID" -description = "Discord client ID" -regex = '''(?i)(discord[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([0-9]{18})['\"]''' -secretGroup = 3 -keywords = [ - "discord", -] - -[[rules]] -id = "Discord client secret" -description = "Discord client secret" -regex = '''(?i)(discord[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-z0-9=_\-]{32})['\"]''' -secretGroup = 3 -keywords = [ - "discord", -] - -[[rules]] -id = "Doppler API token" -description = "Doppler API token" -regex = '''['\"](dp\.pt\.)(?i)[a-z0-9]{43}['\"]''' -keywords = [ - "doppler", -] - -[[rules]] -id = "Dropbox API secret/key" -description = "Dropbox API secret/key" -regex = '''(?i)(dropbox[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-z0-9]{15})['\"]''' -keywords = [ - "dropbox", -] - -[[rules]] -id = "Dropbox short lived API token" -description = "Dropbox short lived API token" -regex = '''(?i)(dropbox[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"](sl\.[a-z0-9\-=_]{135})['\"]''' -keywords = [ - "dropbox", -] - -[[rules]] -id = "Dropbox long lived API token" -description = "Dropbox long lived API token" -regex = '''(?i)(dropbox[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"][a-z0-9]{11}(AAAAAAAAAA)[a-z0-9\-_=]{43}['\"]''' -keywords = [ - "dropbox", -] - -[[rules]] -id = "Duffel API token" -description = "Duffel API token" -regex = '''['\"]duffel_(test|live)_(?i)[a-z0-9_-]{43}['\"]''' -keywords = [ - "duffel", -] - -[[rules]] -id = "Dynatrace API token" -description = "Dynatrace API token" -regex = '''['\"]dt0c01\.(?i)[a-z0-9]{24}\.[a-z0-9]{64}['\"]''' -keywords = [ - "dt0c01", -] - -[[rules]] -id = "EasyPost API token" -description = "EasyPost API token" -regex = '''['\"]EZAK(?i)[a-z0-9]{54}['\"]''' -keywords = [ - "EZAK", -] - - -[[rules]] -id = "EasyPost test API token" -description = "EasyPost test API token" -regex = '''['\"]EZTK(?i)[a-z0-9]{54}['\"]''' -keywords = [ - "EZTK", -] - -[[rules]] -id = "Fastly API token" -description = "Fastly API token" -regex = '''(?i)(fastly[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-z0-9\-=_]{32})['\"]''' -secretGroup = 3 -keywords = [ - "fastly", -] - -[[rules]] -id = "Finicity client secret" -description = "Finicity client secret" -regex = '''(?i)(finicity[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-z0-9]{20})['\"]''' -secretGroup = 3 -keywords = [ - "finicity", -] - -[[rules]] -id = "Finicity API token" -description = "Finicity API token" -regex = '''(?i)(finicity[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-f0-9]{32})['\"]''' -secretGroup = 3 -keywords = [ - "finicity", -] - -[[rules]] -id = "Flutterwave public key" -description = "Flutterwave public key" -regex = '''FLWPUBK_TEST-(?i)[a-h0-9]{32}-X''' -keywords = [ - "FLWPUBK_TEST", -] - -[[rules]] -id = "Flutterwave secret key" -description = "Flutterwave secret key" -regex = '''FLWSECK_TEST-(?i)[a-h0-9]{32}-X''' -keywords = [ - "FLWSECK_TEST", -] - -[[rules]] -id = "Flutterwave encrypted key" -description = "Flutterwave encrypted key" -regex = '''FLWSECK_TEST[a-h0-9]{12}''' -keywords = [ - "FLWSECK_TEST", -] - -[[rules]] -id = "Frame.io API token" -description = "Frame.io API token" -regex = '''fio-u-(?i)[a-z0-9-_=]{64}''' -keywords = [ - "fio-u-", -] - -[[rules]] -id = "GoCardless API token" -description = "GoCardless API token" -regex = '''['\"]live_(?i)[a-z0-9-_=]{40}['\"]''' -keywords = [ - "gocardless", -] - -[[rules]] -id = "Grafana API token" -description = "Grafana API token" -regex = '''['\"]eyJrIjoi(?i)[a-z0-9-_=]{72,92}['\"]''' -keywords = [ - "grafana", -] - -[[rules]] -id = "Hashicorp Terraform user/org API token" -description = "Hashicorp Terraform user/org API token" -regex = '''['\"](?i)[a-z0-9]{14}\.atlasv1\.[a-z0-9-_=]{60,70}['\"]''' -keywords = [ - "atlasv1", - "hashicorp", - "terraform" -] - -[[rules]] -id = "Hashicorp Vault batch token" -description = "Hashicorp Vault batch token" -regex = '''b\.AAAAAQ[0-9a-zA-Z_-]{156}''' -keywords = [ - "hashicorp", - "AAAAAQ", - "vault" -] - -[[rules]] -id = "Hubspot API token" -description = "Hubspot API token" -regex = '''(?i)(hubspot[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-h0-9]{8}-[a-h0-9]{4}-[a-h0-9]{4}-[a-h0-9]{4}-[a-h0-9]{12})['\"]''' -secretGroup = 3 -keywords = [ - "hubspot", -] - -[[rules]] -id = "Intercom API token" -description = "Intercom API token" -regex = '''(?i)(intercom[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-z0-9=_]{60})['\"]''' -secretGroup = 3 -keywords = [ - "intercom", -] - -[[rules]] -id = "Intercom client secret/ID" -description = "Intercom client secret/ID" -regex = '''(?i)(intercom[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-h0-9]{8}-[a-h0-9]{4}-[a-h0-9]{4}-[a-h0-9]{4}-[a-h0-9]{12})['\"]''' -secretGroup = 3 -keywords = [ - "intercom", -] - -[[rules]] -id = "Ionic API token" -description = "Ionic API token" -regex = '''ion_(?i)[a-z0-9]{42}''' -keywords = [ - "ion_", -] - -[[rules]] -id = "Linear API token" -description = "Linear API token" -regex = '''lin_api_(?i)[a-z0-9]{40}''' -keywords = [ - "lin_api_", -] - -[[rules]] -id = "Linear client secret/ID" -description = "Linear client secret/ID" -regex = '''(?i)(linear[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-f0-9]{32})['\"]''' -secretGroup = 3 -keywords = [ - "linear", -] - -[[rules]] -id = "Lob API Key" -description = "Lob API Key" -regex = '''(?i)(lob[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]((live|test)_[a-f0-9]{35})['\"]''' -secretGroup = 3 -keywords = [ - "lob", -] - -[[rules]] -id = "Lob Publishable API Key" -description = "Lob Publishable API Key" -regex = '''(?i)(lob[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]((test|live)_pub_[a-f0-9]{31})['\"]''' -secretGroup = 3 -keywords = [ - "lob", -] - -[[rules]] -id = "Mailchimp API key" -description = "Mailchimp API key" -regex = '''(?i)(mailchimp[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-f0-9]{32}-us20)['\"]''' -secretGroup = 3 -keywords = [ - "mailchimp", -] - -[[rules]] -id = "Mailgun private API token" -description = "Mailgun private API token" -regex = '''(?i)(mailgun[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"](key-[a-f0-9]{32})['\"]''' -secretGroup = 3 -keywords = [ - "mailgun", -] - -[[rules]] -id = "Mailgun public validation key" -description = "Mailgun public validation key" -regex = '''(?i)(mailgun[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"](pubkey-[a-f0-9]{32})['\"]''' -secretGroup = 3 -keywords = [ - "mailgun", -] - -[[rules]] -id = "Mailgun webhook signing key" -description = "Mailgun webhook signing key" -regex = '''(?i)(mailgun[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-h0-9]{32}-[a-h0-9]{8}-[a-h0-9]{8})['\"]''' -secretGroup = 3 -keywords = [ - "mailgun", -] - -[[rules]] -id = "Mapbox API token" -description = "Mapbox API token" -regex = '''(?i)(pk\.[a-z0-9]{60}\.[a-z0-9]{22})''' -keywords = [ - "mapbox", -] - -[[rules]] -id = "messagebird-api-token" -description = "MessageBird API token" -regex = '''(?i)(messagebird[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-z0-9]{25})['\"]''' -secretGroup = 3 -keywords = [ - "messagebird", -] - -[[rules]] -id = "MessageBird API client ID" -description = "MessageBird API client ID" -regex = '''(?i)(messagebird[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-h0-9]{8}-[a-h0-9]{4}-[a-h0-9]{4}-[a-h0-9]{4}-[a-h0-9]{12})['\"]''' -secretGroup = 3 -keywords = [ - "messagebird", -] - -[[rules]] -id = "New Relic user API Key" -description = "New Relic user API Key" -regex = '''['\"](NRAK-[A-Z0-9]{27})['\"]''' -keywords = [ - "NRAK", -] - -[[rules]] -id = "New Relic user API ID" -description = "New Relic user API ID" -regex = '''(?i)(newrelic[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([A-Z0-9]{64})['\"]''' -secretGroup = 3 -keywords = [ - "newrelic", -] - -[[rules]] -id = "New Relic ingest browser API token" -description = "New Relic ingest browser API token" -regex = '''['\"](NRJS-[a-f0-9]{19})['\"]''' -keywords = [ - "NRJS", -] - -[[rules]] -id = "npm access token" -description = "npm access token" -regex = '''['\"](npm_(?i)[a-z0-9]{36})['\"]''' -keywords = [ - "npm_", -] - -[[rules]] -id = "Planetscale password" -description = "Planetscale password" -regex = '''pscale_pw_(?i)[a-z0-9\-_\.]{43}''' -keywords = [ - "pscale_pw_", -] - -[[rules]] -id = "Planetscale API token" -description = "Planetscale API token" -regex = '''pscale_tkn_(?i)[a-z0-9\-_\.]{43}''' -keywords = [ - "pscale_tkn_", -] - -[[rules]] -id = "Postman API token" -description = "Postman API token" -regex = '''PMAK-(?i)[a-f0-9]{24}\-[a-f0-9]{34}''' -keywords = [ - "PMAK-", -] - -[[rules]] -id = "Pulumi API token" -description = "Pulumi API token" -regex = '''pul-[a-f0-9]{40}''' -keywords = [ - "pul-", -] - -[[rules]] -id = "Rubygem API token" -description = "Rubygem API token" -regex = '''rubygems_[a-f0-9]{48}''' -keywords = [ - "rubygems_", -] - -[[rules]] -id = "Segment Public API token" -description = "Segment Public API token" -regex = '''sgp_[a-zA-Z0-9]{64}''' -keywords = [ - "sgp_", -] - -[[rules]] -id = "Sendgrid API token" -description = "Sendgrid API token" -regex = '''SG\.(?i)[a-z0-9_\-\.]{66}''' -keywords = [ - "sendgrid", -] - -[[rules]] -id = "Sendinblue API token" -description = "Sendinblue API token" -regex = '''xkeysib-[a-f0-9]{64}\-(?i)[a-z0-9]{16}''' -keywords = [ - "xkeysib-", -] - -[[rules]] -id = "Sendinblue SMTP token" -description = "Sendinblue SMTP token" -regex = '''xsmtpsib-[a-f0-9]{64}\-(?i)[a-z0-9]{16}''' -keywords = [ - "xsmtpsib-", -] - -[[rules]] -id = "Shippo API token" -description = "Shippo API token" -regex = '''shippo_(live|test)_[a-f0-9]{40}''' -keywords = [ - "shippo_", -] - -[[rules]] -id = "Linkedin Client secret" -description = "Linkedin Client secret" -regex = '''(?i)(linkedin[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-z]{16})['\"]''' -secretGroup = 3 -keywords = [ - "linkedin", -] - -[[rules]] -id = "Linkedin Client ID" -description = "Linkedin Client ID" -regex = '''(?i)(linkedin[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-z0-9]{14})['\"]''' -secretGroup = 3 -keywords = [ - "linkedin", -] - -[[rules]] -id = "Twitch API token" -description = "Twitch API token" -regex = '''(?i)(twitch[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-z0-9]{30})['\"]''' -secretGroup = 3 -keywords = [ - "twitch", -] - -[[rules]] -id = "Typeform API token" -description = "Typeform API token" -regex = '''(?i)(typeform[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}(tfp_[a-z0-9\-_\.=]{59})''' -secretGroup = 3 -keywords = [ - "typeform", -] - -[[rules]] -id = "Yandex.Cloud IAM Cookie v1 - 1" -description = "Yandex.Cloud IAM Cookie v1" -regex = '''\bc1\.[A-Z0-9a-z_-]+[=]{0,2}\.[A-Z0-9a-z_-]{86}[=]{0,2}['|\"|\n|\r|\s|\x60]''' -keywords = [ - "yandex", -] - -[[rules]] -id = "Yandex.Cloud IAM Cookie v1 - 2" -description = "Yandex.Cloud IAM Token v1" -regex = '''\bt1\.[A-Z0-9a-z_-]+[=]{0,2}\.[A-Z0-9a-z_-]{86}[=]{0,2}['|\"|\n|\r|\s|\x60]''' -keywords = [ - "yandex", -] - -[[rules]] -id = "Yandex.Cloud IAM Cookie v1 - 3" -description = "Yandex.Cloud IAM API key v1" -regex = '''\bAQVN[A-Za-z0-9_\-]{35,38}['|\"|\n|\r|\s|\x60]''' -keywords = [ - "yandex", -] - -[[rules]] -id = "Yandex.Cloud AWS API compatible Access Secret" -description = "Yandex.Cloud AWS API compatible Access Secret" -regex = '''\bYC[a-zA-Z0-9_\-]{38}['|\"|\n|\r|\s|\x60]''' -keywords = [ - "yandex", -] - -[[rules]] -id = "Meta access token" -description = "Meta access token" -regex = '''\bEA[a-zA-Z0-9]{90,400}['|\"|\n|\r|\s|\x60]''' -keywords = [ - "EA", -] - -[[rules]] -id = "Oculus access token" -description = "Oculus access token" -regex = '''\bOC[a-zA-Z0-9]{90,400}['|\"|\n|\r|\s|\x60]''' -keywords = [ - "OC", -] - -[[rules]] -id = "Instagram access token" -description = "Instagram access token" -regex = '''\bIG[a-zA-Z0-9]{90,400}['|\"|\n|\r|\s|\x60]''' -keywords = [ - "IG", -] - -[[rules]] -id = "CircleCI access tokens" -description = "CircleCI access tokens" -regex = '''\bCCI(?:PAT|PRJ)_[a-zA-Z0-9]{22}_[a-f0-9]{40}''' -keywords = [ - "CircleCI" -] - -[[rules]] -description = "Open AI API key" -id = "open ai token" -regex = '''\bsk-[a-zA-Z0-9]{48}\b''' -keywords = [ - "sk-", -] - [allowlist] description = "global allow lists" paths = [ -- GitLab From 06cc4b804dcd634f0afaae0020ec6722a7da3505 Mon Sep 17 00:00:00 2001 From: Serena Fang Date: Tue, 14 Nov 2023 15:48:20 -0600 Subject: [PATCH 09/18] Edit spec and scan cleanups --- gems/gitlab-secret_detection/Gemfile.lock | 4 ---- .../lib/gitlab/secret_detection/scan.rb | 12 ++++++------ .../spec/gitlab/secret_detection/scan_spec.rb | 6 ++---- 3 files changed, 8 insertions(+), 14 deletions(-) diff --git a/gems/gitlab-secret_detection/Gemfile.lock b/gems/gitlab-secret_detection/Gemfile.lock index b3196d63418314..8189479cdb72cb 100644 --- a/gems/gitlab-secret_detection/Gemfile.lock +++ b/gems/gitlab-secret_detection/Gemfile.lock @@ -6,11 +6,7 @@ PATH GEM remote: https://rubygems.org/ specs: -<<<<<<< HEAD activesupport (7.1.2) -======= - activesupport (7.1.1) ->>>>>>> 6e381c3897a (Add gitlab styles dependency) base64 bigdecimal concurrent-ruby (~> 1.0, >= 1.0.2) diff --git a/gems/gitlab-secret_detection/lib/gitlab/secret_detection/scan.rb b/gems/gitlab-secret_detection/lib/gitlab/secret_detection/scan.rb index a8a3356ec443b0..e62e661c26a77f 100644 --- a/gems/gitlab-secret_detection/lib/gitlab/secret_detection/scan.rb +++ b/gems/gitlab-secret_detection/lib/gitlab/secret_detection/scan.rb @@ -9,7 +9,7 @@ class Scan def initialize @secrets_config_data = parse_file @secrets_pattern_set = create_patterns - @logger = Gitlab::Git::Logger.build + # @logger = Gitlab::Git::Logger.build end def secrets_scan(gitaly_blob) @@ -18,13 +18,13 @@ def secrets_scan(gitaly_blob) regex_match(gitaly_blob) end - private + # private def parse_file file_path = File.expand_path('../../gitleaks.toml', __dir__) Tomlrb.load_file(file_path) - rescue StandardError => e - logger.error(message: e.message) + rescue StandardError + # @logger.error(message: e.message) false end @@ -38,8 +38,8 @@ def create_patterns @secrets_pattern_set.compile @secrets_pattern_set - rescue StandardError => e - logger.error(message: e.message) + rescue StandardError + # @logger.error(message: e.message) false end diff --git a/gems/gitlab-secret_detection/spec/gitlab/secret_detection/scan_spec.rb b/gems/gitlab-secret_detection/spec/gitlab/secret_detection/scan_spec.rb index ddbefe3443f06e..2e7f67b49234b6 100644 --- a/gems/gitlab-secret_detection/spec/gitlab/secret_detection/scan_spec.rb +++ b/gems/gitlab-secret_detection/spec/gitlab/secret_detection/scan_spec.rb @@ -48,8 +48,7 @@ blob = Struct.new(:data).new("no secret") it "does not match" do - expect(described_class.new.secrets_scan(gitaly_blob: blob, secrets_config_data: file_data, - secrets_pattern_set: pattern_set)).to be_nil + expect(described_class.new.secrets_scan(gitaly_blob: blob)).to be_nil end end @@ -57,8 +56,7 @@ blob = Struct.new(:data).new("glpat-" + "12312312312312312312") # rubocop:disable Style/StringConcatenation -- Creates a gitleaks false positive it "matches glpat" do - expect(described_class.new.secrets_scan(gitaly_blob: blob, secrets_config_data: file_data, - secrets_pattern_set: pattern_set)).to eq({ 1 => ["gitlab_personal_access_token"] }) + expect(described_class.new.secrets_scan(gitaly_blob: blob)).to eq({ 1 => ["gitlab_personal_access_token"] }) end end end -- GitLab From 47c48322a17960fb567458e9a2b6a618c621586c Mon Sep 17 00:00:00 2001 From: Serena Fang Date: Wed, 15 Nov 2023 18:12:50 -0600 Subject: [PATCH 10/18] Apply reviewer suggestions --- gems/gitlab-secret_detection/Gemfile.lock | 9 ++---- .../gitlab-secret_detection.gemspec | 1 - .../lib/gitlab/secret_detection/scan.rb | 9 +++--- .../gitlab-secret_detection/lib/gitleaks.toml | 4 +-- .../spec/gitlab/secret_detection/scan_spec.rb | 28 +++++++++++++------ 5 files changed, 27 insertions(+), 24 deletions(-) diff --git a/gems/gitlab-secret_detection/Gemfile.lock b/gems/gitlab-secret_detection/Gemfile.lock index 8189479cdb72cb..659ff5319fc155 100644 --- a/gems/gitlab-secret_detection/Gemfile.lock +++ b/gems/gitlab-secret_detection/Gemfile.lock @@ -40,10 +40,9 @@ GEM i18n (1.14.1) concurrent-ruby (~> 1.0) json (2.6.3) + mini_portile2 (2.8.5) minitest (5.20.0) mutex_m (0.2.0) - method_source (1.0.0) - mini_portile2 (2.8.5) parallel (1.23.0) parser (3.2.2.4) ast (~> 2.4.1) @@ -52,9 +51,6 @@ GEM coderay parser unparser - pry (0.14.2) - coderay (~> 1.1) - method_source (~> 1.0) racc (1.7.1) rack (3.0.8) rainbow (3.1.1) @@ -122,9 +118,9 @@ GEM rubocop-factory_bot (~> 2.22) ruby-progressbar (1.13.0) ruby2_keywords (0.0.5) + tomlrb (2.0.3) tzinfo (2.0.6) concurrent-ruby (~> 1.0) - tomlrb (2.0.3) unicode-display_width (2.5.0) unparser (0.6.9) diff-lcs (~> 1.3) @@ -136,7 +132,6 @@ PLATFORMS DEPENDENCIES gitlab-secret_detection! gitlab-styles (~> 10.1.0) - pry re2 rspec (~> 3.0) rspec-benchmark (~> 0.6.0) diff --git a/gems/gitlab-secret_detection/gitlab-secret_detection.gemspec b/gems/gitlab-secret_detection/gitlab-secret_detection.gemspec index 16f162ece825aa..36aa81d66bba6b 100644 --- a/gems/gitlab-secret_detection/gitlab-secret_detection.gemspec +++ b/gems/gitlab-secret_detection/gitlab-secret_detection.gemspec @@ -25,7 +25,6 @@ Gem::Specification.new do |spec| spec.require_paths = ["lib"] spec.add_development_dependency "gitlab-styles", "~> 10.1.0" - spec.add_development_dependency "pry" spec.add_development_dependency "re2" spec.add_development_dependency "rspec", "~> 3.0" spec.add_development_dependency "rspec-benchmark", "~> 0.6.0" diff --git a/gems/gitlab-secret_detection/lib/gitlab/secret_detection/scan.rb b/gems/gitlab-secret_detection/lib/gitlab/secret_detection/scan.rb index e62e661c26a77f..e9cc83cc531c34 100644 --- a/gems/gitlab-secret_detection/lib/gitlab/secret_detection/scan.rb +++ b/gems/gitlab-secret_detection/lib/gitlab/secret_detection/scan.rb @@ -2,6 +2,7 @@ require 'tomlrb' require 're2' +require 'logger' module Gitlab module SecretDetection @@ -9,7 +10,7 @@ class Scan def initialize @secrets_config_data = parse_file @secrets_pattern_set = create_patterns - # @logger = Gitlab::Git::Logger.build + @logger = ::Logger.new($stdout) end def secrets_scan(gitaly_blob) @@ -18,13 +19,11 @@ def secrets_scan(gitaly_blob) regex_match(gitaly_blob) end - # private - def parse_file file_path = File.expand_path('../../gitleaks.toml', __dir__) Tomlrb.load_file(file_path) rescue StandardError - # @logger.error(message: e.message) + @logger.error(message: e.message) false end @@ -39,7 +38,7 @@ def create_patterns @secrets_pattern_set.compile @secrets_pattern_set rescue StandardError - # @logger.error(message: e.message) + @logger.error(message: e.message) false end diff --git a/gems/gitlab-secret_detection/lib/gitleaks.toml b/gems/gitlab-secret_detection/lib/gitleaks.toml index a83dd9f6e32ff6..f00688674bf7eb 100644 --- a/gems/gitlab-secret_detection/lib/gitleaks.toml +++ b/gems/gitlab-secret_detection/lib/gitleaks.toml @@ -39,10 +39,10 @@ keywords = [ [[rules]] id = "gitlab_feed_token" description = "GitLab Feed Token" -regex = '''feed_token=[0-9a-zA-Z_\-]{20}''' +regex = '''glft=[0-9a-zA-Z_\-]{20}''' tags = ["gitlab"] keywords = [ - "feed_token", + "glft", ] [allowlist] diff --git a/gems/gitlab-secret_detection/spec/gitlab/secret_detection/scan_spec.rb b/gems/gitlab-secret_detection/spec/gitlab/secret_detection/scan_spec.rb index 2e7f67b49234b6..cd141000125d2e 100644 --- a/gems/gitlab-secret_detection/spec/gitlab/secret_detection/scan_spec.rb +++ b/gems/gitlab-secret_detection/spec/gitlab/secret_detection/scan_spec.rb @@ -3,6 +3,8 @@ require 'spec_helper' RSpec.describe Gitlab::SecretDetection::Scan do + let(:scan) { described_class.new } + file_data = { "title" => "gitleaks config", "rules" => [ @@ -19,36 +21,44 @@ ] } - let(:pattern_set) { described_class.new.create_patterns } - it "has a version number" do expect(Gitlab::SecretDetection::VERSION).not_to be_nil end it "parses the toml file" do - expect(described_class.new.parse_file).not_to be_nil + expect(scan.parse_file).not_to be_nil end context "when it creates RE2 patterns from file data" do before do - allow(described_class).to receive(:parse_file).and_return(file_data) + allow(scan).to receive(:parse_file).and_return(file_data) end it "successfully creates RE2 patterns" do - expect(pattern_set).not_to be_nil + expect(scan.create_patterns).not_to be_nil end end context "when matching patterns" do before do - allow(described_class).to receive(:parse_file).and_return(file_data) + allow(scan).to receive(:parse_file).and_return(file_data) end context 'when the blob does not contain a secret' do - blob = Struct.new(:data).new("no secret") + blob = Struct.new(:data).new("no secrets") it "does not match" do - expect(described_class.new.secrets_scan(gitaly_blob: blob)).to be_nil + expect(scan.secrets_scan(blob)).to be_nil + end + + it "attempts to keyword match" do + expect(scan).to receive(:keyword_match) + scan.secrets_scan(blob) + end + + it "does not attempt to regex match" do + expect(scan).not_to receive(:regex_match) + scan.secrets_scan(blob) end end @@ -56,7 +66,7 @@ blob = Struct.new(:data).new("glpat-" + "12312312312312312312") # rubocop:disable Style/StringConcatenation -- Creates a gitleaks false positive it "matches glpat" do - expect(described_class.new.secrets_scan(gitaly_blob: blob)).to eq({ 1 => ["gitlab_personal_access_token"] }) + expect(scan.secrets_scan(blob)).to eq({ 1 => ["gitlab_personal_access_token"] }) end end end -- GitLab From c1c1954de1610e7ab148c66b97a8549314c9666f Mon Sep 17 00:00:00 2001 From: Serena Fang Date: Wed, 15 Nov 2023 18:16:57 -0600 Subject: [PATCH 11/18] Rescue error e --- .../lib/gitlab/secret_detection/scan.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gems/gitlab-secret_detection/lib/gitlab/secret_detection/scan.rb b/gems/gitlab-secret_detection/lib/gitlab/secret_detection/scan.rb index e9cc83cc531c34..6a28e20b578932 100644 --- a/gems/gitlab-secret_detection/lib/gitlab/secret_detection/scan.rb +++ b/gems/gitlab-secret_detection/lib/gitlab/secret_detection/scan.rb @@ -22,7 +22,7 @@ def secrets_scan(gitaly_blob) def parse_file file_path = File.expand_path('../../gitleaks.toml', __dir__) Tomlrb.load_file(file_path) - rescue StandardError + rescue StandardError => e @logger.error(message: e.message) false end @@ -37,7 +37,7 @@ def create_patterns @secrets_pattern_set.compile @secrets_pattern_set - rescue StandardError + rescue StandardError => e @logger.error(message: e.message) false end -- GitLab From 8b4e8b60fda15358894f55668edc5c51ac75a3a8 Mon Sep 17 00:00:00 2001 From: Serena Fang Date: Wed, 8 Nov 2023 16:49:40 -0600 Subject: [PATCH 12/18] Secret detection gem scan logic Changelog: added --- gems/gitlab-secret_detection/Gemfile.lock | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/gems/gitlab-secret_detection/Gemfile.lock b/gems/gitlab-secret_detection/Gemfile.lock index 659ff5319fc155..98734b973faa8d 100644 --- a/gems/gitlab-secret_detection/Gemfile.lock +++ b/gems/gitlab-secret_detection/Gemfile.lock @@ -43,6 +43,7 @@ GEM mini_portile2 (2.8.5) minitest (5.20.0) mutex_m (0.2.0) + method_source (1.0.0) parallel (1.23.0) parser (3.2.2.4) ast (~> 2.4.1) @@ -51,6 +52,9 @@ GEM coderay parser unparser + pry (0.14.2) + coderay (~> 1.1) + method_source (~> 1.0) racc (1.7.1) rack (3.0.8) rainbow (3.1.1) @@ -121,6 +125,7 @@ GEM tomlrb (2.0.3) tzinfo (2.0.6) concurrent-ruby (~> 1.0) + tomlrb (2.0.3) unicode-display_width (2.5.0) unparser (0.6.9) diff-lcs (~> 1.3) -- GitLab From 9fce2c0694c16700dbe9fc01f0c4e59a0c3e92db Mon Sep 17 00:00:00 2001 From: Serena Fang Date: Thu, 9 Nov 2023 15:40:46 -0600 Subject: [PATCH 13/18] Connect push check to secret detection gem Changelog: added --- .../gitlab/checks/push_rules/secrets_check.rb | 80 ++++++++++++++++--- 1 file changed, 71 insertions(+), 9 deletions(-) diff --git a/ee/lib/ee/gitlab/checks/push_rules/secrets_check.rb b/ee/lib/ee/gitlab/checks/push_rules/secrets_check.rb index c0fa9d868dbe09..d6ad0afd4b2e98 100644 --- a/ee/lib/ee/gitlab/checks/push_rules/secrets_check.rb +++ b/ee/lib/ee/gitlab/checks/push_rules/secrets_check.rb @@ -5,18 +5,80 @@ module Gitlab module Checks module PushRules class SecretsCheck < ::Gitlab::Checks::BaseBulkChecker + ERROR_MESSAGE = "Secrets check failed:" + LOG_MESSAGE = "Checking if any files contain secrets..." + + SKIP_PATTERN = /\[(secret[ _-]detection[ _-]skip|skip[ _-]secret[ _-]detection)\]/i + def validate! - # Return early and not perform the check if: - # 1. unless application setting is enabled (regardless of whether it's a gitlab dedicated instance or not) - # 2. feature flag is disabled for this project (when instance type is not gitlab dedicated) - # 3. no push rule exist - # 4. license is not ultimate - return unless ::Gitlab::CurrentSettings.pre_receive_secret_detection_enabled + # let's skip this, for dev convenience + # return unless push_rule + + # To show how a [skip secret detection] flag might work, + # we will go over the commit message and see if we can + # match it with the regex pattern above + return if skip_secret_detection? + + # Log that we're running secrets check + logger.log_timed(LOG_MESSAGE) do + # Maybe move all code below into its own check class? (similar to FileSizeCheck::AnyOversizedBlobs) + all_blobs = project.repository.list_all_blobs( + # Limit blobs size to 10MB, see https://gitlab.com/gitlab-org/gitlab/-/issues/422574#note_1524129330 + # we could also check if the blob is a binary through `blob.binary` but we would have to fetch + # all blobs first, so it might be better to limit the blob size early on here instead. + bytes_limit: 1_000_000_0, + dynamic_timeout: logger.time_left + ) + + # Enumerate through blobs and check secrets + found_secrets = [] + # limiting to 5 for dev convenience + # binding.pry_shell + + secret_detection = ::Gitlab::SecretDetection::Scan.new + secrets_pattern_set = secret_detection.create_patterns + secrets_config_data = secret_detection.parse_file + + # secrets_check = ::Gitlab::SecretDetection::Scan.new + # try processing all the blobs + all_blobs.first(5).each_with_index do |blob, i| + # use hush to scan known file with a secret + # this should generate 5 secret findings, but we won't see them anywhere, we'll just know they happened + # rubocop:disable Layout/LineLength + # finding = `cat ~/code/secure/pocs/secret-detection-go-poc/testdata/blobs/small | ~/code/secure/pocs/secret-detection-go-poc/hush stdin` + # binding.pry_shell + finding = secret_detection.secrets_scan(gitaly_blob: blob, secrets_config_data: secrets_config_data, secrets_pattern_set: secrets_pattern_set) + # rubocop:enable Layout/LineLength + # rubocop:disable Style/Next + + if finding.present? + found_secrets << finding + # rubocop:disable Gitlab/RailsLogger + Rails.logger.debug("out(#{i}):#{finding}") + # rubocop:enable Gitlab/RailsLogger + end + # rubocop:enable Style/Next + end + + # Rails.logger.debug("let's stop this here") if found_secrets.present? + if found_secrets.present? + secrets_hash = found_secrets.first # found_secrets is an array so let's unpack it for convenience + secrets_error_messsage = "" + + secrets_hash.each do |line, secret| + secrets_error_messsage += " Found #{secret} on line #{line}\n" + end + + raise ::Gitlab::GitAccess::ForbiddenError, "#{ERROR_MESSAGE}\n#{secrets_error_messsage}" + # raise ::Gitlab::GitAccess::ForbiddenError, ERROR_MESSAGE if [].empty? + end + end + end - return if ::Gitlab::CurrentSettings.gitlab_dedicated_instance != true && - ::Feature.disabled?(:pre_receive_secret_detection_push_check, push_rule.project) + private - return unless push_rule && push_rule.project.licensed_feature_available?(:pre_receive_secret_detection) + def skip_secret_detection? + changes_access.commits.any? { |commit| commit.safe_message =~ SKIP_PATTERN } end end end -- GitLab From bdf6063ec385a5f34c2b70e67a4d7d2340f428d6 Mon Sep 17 00:00:00 2001 From: Serena Fang Date: Wed, 15 Nov 2023 18:53:25 -0600 Subject: [PATCH 14/18] Add rubocop disable comments --- ee/lib/ee/gitlab/checks/push_rules/secrets_check.rb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ee/lib/ee/gitlab/checks/push_rules/secrets_check.rb b/ee/lib/ee/gitlab/checks/push_rules/secrets_check.rb index d6ad0afd4b2e98..aca83b57ad18b2 100644 --- a/ee/lib/ee/gitlab/checks/push_rules/secrets_check.rb +++ b/ee/lib/ee/gitlab/checks/push_rules/secrets_check.rb @@ -44,16 +44,16 @@ def validate! all_blobs.first(5).each_with_index do |blob, i| # use hush to scan known file with a secret # this should generate 5 secret findings, but we won't see them anywhere, we'll just know they happened - # rubocop:disable Layout/LineLength + # rubocop:disable Layout/LineLength -- WIP, will likely remove these rubocop disables later # finding = `cat ~/code/secure/pocs/secret-detection-go-poc/testdata/blobs/small | ~/code/secure/pocs/secret-detection-go-poc/hush stdin` # binding.pry_shell finding = secret_detection.secrets_scan(gitaly_blob: blob, secrets_config_data: secrets_config_data, secrets_pattern_set: secrets_pattern_set) # rubocop:enable Layout/LineLength - # rubocop:disable Style/Next + # rubocop:disable Style/Next -- WIP, will likely remove these rubocop disables later if finding.present? found_secrets << finding - # rubocop:disable Gitlab/RailsLogger + # rubocop:disable Gitlab/RailsLogger -- WIP, will likely remove these rubocop disables later Rails.logger.debug("out(#{i}):#{finding}") # rubocop:enable Gitlab/RailsLogger end -- GitLab From 3205e0d4946a47f2d9535260fadf5a14fdf26955 Mon Sep 17 00:00:00 2001 From: Serena Fang Date: Wed, 15 Nov 2023 18:56:52 -0600 Subject: [PATCH 15/18] Fix gemfile lock --- gems/gitlab-secret_detection/Gemfile.lock | 5 ----- 1 file changed, 5 deletions(-) diff --git a/gems/gitlab-secret_detection/Gemfile.lock b/gems/gitlab-secret_detection/Gemfile.lock index 98734b973faa8d..659ff5319fc155 100644 --- a/gems/gitlab-secret_detection/Gemfile.lock +++ b/gems/gitlab-secret_detection/Gemfile.lock @@ -43,7 +43,6 @@ GEM mini_portile2 (2.8.5) minitest (5.20.0) mutex_m (0.2.0) - method_source (1.0.0) parallel (1.23.0) parser (3.2.2.4) ast (~> 2.4.1) @@ -52,9 +51,6 @@ GEM coderay parser unparser - pry (0.14.2) - coderay (~> 1.1) - method_source (~> 1.0) racc (1.7.1) rack (3.0.8) rainbow (3.1.1) @@ -125,7 +121,6 @@ GEM tomlrb (2.0.3) tzinfo (2.0.6) concurrent-ruby (~> 1.0) - tomlrb (2.0.3) unicode-display_width (2.5.0) unparser (0.6.9) diff-lcs (~> 1.3) -- GitLab From 9f604af4a8718e7afd2c1d8f45615da03e778a52 Mon Sep 17 00:00:00 2001 From: Serena Fang Date: Thu, 16 Nov 2023 02:28:36 -0600 Subject: [PATCH 16/18] WIP secrets check --- .../gitlab/checks/push_rules/secrets_check.rb | 123 +++++++++++------- 1 file changed, 74 insertions(+), 49 deletions(-) diff --git a/ee/lib/ee/gitlab/checks/push_rules/secrets_check.rb b/ee/lib/ee/gitlab/checks/push_rules/secrets_check.rb index aca83b57ad18b2..8b013d99737b6e 100644 --- a/ee/lib/ee/gitlab/checks/push_rules/secrets_check.rb +++ b/ee/lib/ee/gitlab/checks/push_rules/secrets_check.rb @@ -11,68 +11,93 @@ class SecretsCheck < ::Gitlab::Checks::BaseBulkChecker SKIP_PATTERN = /\[(secret[ _-]detection[ _-]skip|skip[ _-]secret[ _-]detection)\]/i def validate! - # let's skip this, for dev convenience - # return unless push_rule + # Return early and not perform the check if: + # 1. unless application setting is enabled (regardless of whether it's a gitlab dedicated instance or not) + # 2. feature flag is disabled for this project (when instance type is not gitlab dedicated) + # 3. no push rule exist + # 4. license is not ultimate + # return unless ::Gitlab::CurrentSettings.pre_receive_secret_detection_enabled - # To show how a [skip secret detection] flag might work, - # we will go over the commit message and see if we can - # match it with the regex pattern above return if skip_secret_detection? # Log that we're running secrets check logger.log_timed(LOG_MESSAGE) do # Maybe move all code below into its own check class? (similar to FileSizeCheck::AnyOversizedBlobs) - all_blobs = project.repository.list_all_blobs( - # Limit blobs size to 10MB, see https://gitlab.com/gitlab-org/gitlab/-/issues/422574#note_1524129330 - # we could also check if the blob is a binary through `blob.binary` but we would have to fetch - # all blobs first, so it might be better to limit the blob size early on here instead. - bytes_limit: 1_000_000_0, - dynamic_timeout: logger.time_left - ) - - # Enumerate through blobs and check secrets - found_secrets = [] - # limiting to 5 for dev convenience - # binding.pry_shell - - secret_detection = ::Gitlab::SecretDetection::Scan.new - secrets_pattern_set = secret_detection.create_patterns - secrets_config_data = secret_detection.parse_file - - # secrets_check = ::Gitlab::SecretDetection::Scan.new - # try processing all the blobs - all_blobs.first(5).each_with_index do |blob, i| - # use hush to scan known file with a secret - # this should generate 5 secret findings, but we won't see them anywhere, we'll just know they happened - # rubocop:disable Layout/LineLength -- WIP, will likely remove these rubocop disables later - # finding = `cat ~/code/secure/pocs/secret-detection-go-poc/testdata/blobs/small | ~/code/secure/pocs/secret-detection-go-poc/hush stdin` - # binding.pry_shell - finding = secret_detection.secrets_scan(gitaly_blob: blob, secrets_config_data: secrets_config_data, secrets_pattern_set: secrets_pattern_set) - # rubocop:enable Layout/LineLength - # rubocop:disable Style/Next -- WIP, will likely remove these rubocop disables later - - if finding.present? - found_secrets << finding - # rubocop:disable Gitlab/RailsLogger -- WIP, will likely remove these rubocop disables later - Rails.logger.debug("out(#{i}):#{finding}") - # rubocop:enable Gitlab/RailsLogger - end - # rubocop:enable Style/Next - end + all_blobs = get_all_blobs + + found_secrets = scan_for_secrets(all_blobs) + show_message(found_secrets) if found_secrets.any? # Rails.logger.debug("let's stop this here") if found_secrets.present? - if found_secrets.present? - secrets_hash = found_secrets.first # found_secrets is an array so let's unpack it for convenience - secrets_error_messsage = "" + end + end + + def get_all_commits + new_revs = changes_access.changes.pluck(:newrev).compact # rubocop:disable CodeReuse/ActiveRecord -- WIP, will likely remove these rubocop disables later + new_blobs = project.repository.new_blobs( + new_revs, + dynamic_timeout: logger.time_left + ) + + arr = {} + new_blobs.each do |new_blob| + arr["file_path"] = new_blob.path + arr["commit_id"] = new_blob.commit_id + end + arr + end + + def get_all_blobs + project.repository.list_all_blobs( + # Limit blobs size to 10MB, see https://gitlab.com/gitlab-org/gitlab/-/issues/422574#note_1524129330 + # we could also check if the blob is a binary through `blob.binary` but we would have to fetch + # all blobs first, so it might be better to limit the blob size early on here instead. + bytes_limit: 1_000_000_0, + dynamic_timeout: logger.time_left + ) + end + + def scan_for_secrets(all_blobs) + found_secrets = Hash.new { |secret, details| secret[details] = {} } - secrets_hash.each do |line, secret| - secrets_error_messsage += " Found #{secret} on line #{line}\n" + all_blobs.first(5).each_with_index do |blob, i| + finding = ::Gitlab::SecretDetection::Scan.new.secrets_scan(blob) + # rubocop:disable Style/Next -- WIP, will likely remove these rubocop disables later + # binding.pry_shell + if finding.present? + finding.each do |line, secret| + found_secrets[i]["line_number"] = line + found_secrets[i]["secret_type"] = secret end - raise ::Gitlab::GitAccess::ForbiddenError, "#{ERROR_MESSAGE}\n#{secrets_error_messsage}" - # raise ::Gitlab::GitAccess::ForbiddenError, ERROR_MESSAGE if [].empty? + path_arr = get_all_commits + + if path_arr.present? + found_secrets[i]["file_path"] = path_arr["file_path"] + found_secrets[i]["commit_id"] = path_arr["commit_id"] + + end + # rubocop:disable Gitlab/RailsLogger -- WIP, will likely remove these rubocop disables later + Rails.logger.debug("out(#{i}):#{finding}") + # rubocop:enable Gitlab/RailsLogger end + # rubocop:enable Style/Next end + found_secrets + end + + def show_message(found_secrets) + # binding.pry_shell + secrets_error_messsage = "" + + found_secrets.each do |_, secret| + # rubocop:disable Layout/LineLength -- WIP, will likely remove these rubocop disables later + secrets_error_messsage += " Found #{secret['secret_type']} in file #{secret['file_path']} on line #{secret['line_number']} in commit #{secret['commit_id']}\n" + # rubocop:enable Layout/LineLength + end + + raise ::Gitlab::GitAccess::ForbiddenError, "#{ERROR_MESSAGE}\n#{secrets_error_messsage}" + # raise ::Gitlab::GitAccess::ForbiddenError, ERROR_MESSAGE if [].empty? end private -- GitLab From 043869bd7c94e077b1a669d37329d9164192d60f Mon Sep 17 00:00:00 2001 From: Serena Fang Date: Thu, 16 Nov 2023 23:02:58 -0600 Subject: [PATCH 17/18] Updates secret check --- .../gitlab/checks/push_rules/secrets_check.rb | 110 ++++++++++-------- .../lib/gitlab/secret_detection/scan.rb | 47 ++++---- lib/gitlab/git/repository.rb | 2 +- 3 files changed, 87 insertions(+), 72 deletions(-) diff --git a/ee/lib/ee/gitlab/checks/push_rules/secrets_check.rb b/ee/lib/ee/gitlab/checks/push_rules/secrets_check.rb index 8b013d99737b6e..fd8e33cb83618c 100644 --- a/ee/lib/ee/gitlab/checks/push_rules/secrets_check.rb +++ b/ee/lib/ee/gitlab/checks/push_rules/secrets_check.rb @@ -7,6 +7,7 @@ module PushRules class SecretsCheck < ::Gitlab::Checks::BaseBulkChecker ERROR_MESSAGE = "Secrets check failed:" LOG_MESSAGE = "Checking if any files contain secrets..." + BLOB_BYTES_LIMIT = 1024 # Limit is 1MB to start with. SKIP_PATTERN = /\[(secret[ _-]detection[ _-]skip|skip[ _-]secret[ _-]detection)\]/i @@ -18,6 +19,11 @@ def validate! # 4. license is not ultimate # return unless ::Gitlab::CurrentSettings.pre_receive_secret_detection_enabled + # return if ::Gitlab::CurrentSettings.gitlab_dedicated_instance != true && + # ::Feature.disabled?(:pre_receive_secret_detection_push_check, project) + + # return unless push_rule && project.licensed_feature_available?(:pre_receive_secret_detection) + return if skip_secret_detection? # Log that we're running secrets check @@ -32,76 +38,80 @@ def validate! end end - def get_all_commits - new_revs = changes_access.changes.pluck(:newrev).compact # rubocop:disable CodeReuse/ActiveRecord -- WIP, will likely remove these rubocop disables later - new_blobs = project.repository.new_blobs( - new_revs, - dynamic_timeout: logger.time_left - ) - - arr = {} - new_blobs.each do |new_blob| - arr["file_path"] = new_blob.path - arr["commit_id"] = new_blob.commit_id - end - arr - end - def get_all_blobs - project.repository.list_all_blobs( - # Limit blobs size to 10MB, see https://gitlab.com/gitlab-org/gitlab/-/issues/422574#note_1524129330 - # we could also check if the blob is a binary through `blob.binary` but we would have to fetch - # all blobs first, so it might be better to limit the blob size early on here instead. - bytes_limit: 1_000_000_0, - dynamic_timeout: logger.time_left - ) + blobs = + if ignore_alternate_directories? + # filter_existing( + project.repository.list_all_blobs( + bytes_limit: BLOB_BYTES_LIMIT, + dynamic_timeout: logger.time_left, + ignore_alternate_object_directories: true + ) + # ) + else + revisions = changes_access.newrev + + project.repository.list_blobs( + revisions, + bytes_limit: BLOB_BYTES_LIMIT, + with_paths: true + ) + end + + # filter out binary blobs + blobs.reject(&:binary) end def scan_for_secrets(all_blobs) - found_secrets = Hash.new { |secret, details| secret[details] = {} } - - all_blobs.first(5).each_with_index do |blob, i| - finding = ::Gitlab::SecretDetection::Scan.new.secrets_scan(blob) - # rubocop:disable Style/Next -- WIP, will likely remove these rubocop disables later - # binding.pry_shell - if finding.present? - finding.each do |line, secret| - found_secrets[i]["line_number"] = line - found_secrets[i]["secret_type"] = secret - end - - path_arr = get_all_commits + found_secrets = {} - if path_arr.present? - found_secrets[i]["file_path"] = path_arr["file_path"] - found_secrets[i]["commit_id"] = path_arr["commit_id"] + all_blobs.each_with_index do |_blob, _i| + finding = ::Gitlab::SecretDetection::Scan.new.secrets_scan(all_blobs) + next unless finding.present? + finding.each do |key, secrets_hash| + secrets_hash.each do |line, secrets| + found_secrets[key] ||= [] + found_secrets[key] << { "line_number" => line, "secret_type" => secrets } end - # rubocop:disable Gitlab/RailsLogger -- WIP, will likely remove these rubocop disables later - Rails.logger.debug("out(#{i}):#{finding}") - # rubocop:enable Gitlab/RailsLogger end - # rubocop:enable Style/Next end found_secrets end def show_message(found_secrets) - # binding.pry_shell - secrets_error_messsage = "" + secrets_error_message = "" - found_secrets.each do |_, secret| - # rubocop:disable Layout/LineLength -- WIP, will likely remove these rubocop disables later - secrets_error_messsage += " Found #{secret['secret_type']} in file #{secret['file_path']} on line #{secret['line_number']} in commit #{secret['commit_id']}\n" - # rubocop:enable Layout/LineLength + found_secrets.each do |_, secrets_array| + secrets_array.each do |secret| + # rubocop:disable Layout/LineLength -- WIP, will likely remove these rubocop disables later + secrets_error_message += " Found #{secret['secret_type'].first} on line #{secret['line_number']}\n" + # rubocop:enable Layout/LineLength + end end - raise ::Gitlab::GitAccess::ForbiddenError, "#{ERROR_MESSAGE}\n#{secrets_error_messsage}" - # raise ::Gitlab::GitAccess::ForbiddenError, ERROR_MESSAGE if [].empty? + raise ::Gitlab::GitAccess::ForbiddenError, "#{ERROR_MESSAGE}\n#{secrets_error_message}" end private + def ignore_alternate_directories? + git_env = ::Gitlab::Git::HookEnv.all(project.repository.gl_repository) + git_env['GIT_OBJECT_DIRECTORY_RELATIVE'].present? + end + + def filter_existing(blobs) + # Filter out already existing blobs from blobs returned by `ListAllBlobs()`. + gitaly_repo = project.repository.gitaly_repository.dup.tap { |repo| repo.git_object_directory = "" } + + map_blob_id_to_existence = project.repository.gitaly_commit_client.object_existence_map( + blobs.map(&:id), + gitaly_repo: gitaly_repo + ) + + blobs.reject { |blob| map_blob_id_to_existence[blob.id].present? } + end + def skip_secret_detection? changes_access.commits.any? { |commit| commit.safe_message =~ SKIP_PATTERN } end diff --git a/gems/gitlab-secret_detection/lib/gitlab/secret_detection/scan.rb b/gems/gitlab-secret_detection/lib/gitlab/secret_detection/scan.rb index 6a28e20b578932..048b8ac2815a02 100644 --- a/gems/gitlab-secret_detection/lib/gitlab/secret_detection/scan.rb +++ b/gems/gitlab-secret_detection/lib/gitlab/secret_detection/scan.rb @@ -13,10 +13,10 @@ def initialize @logger = ::Logger.new($stdout) end - def secrets_scan(gitaly_blob) - return unless keyword_match(gitaly_blob) + def secrets_scan(blobs) + return unless keyword_match(blobs) - regex_match(gitaly_blob) + regex_match(blobs) end def parse_file @@ -42,35 +42,40 @@ def create_patterns false end - def keyword_match(gitaly_blob) + def keyword_match(blobs) secrets_keywords = [] - @secrets_config_data["rules"].each do |rule| - pattern = rule["keywords"] - secrets_keywords << pattern - end + blobs.each do |blob| + @secrets_config_data["rules"].each do |rule| + pattern = rule["keywords"] + secrets_keywords << pattern + end - secrets_keywords.compact.flatten.any? do |keyword| - break true if gitaly_blob.data.include?(keyword) + secrets_keywords.compact.flatten.any? do |keyword| + break true if blob.data.include?(keyword) + end end end - def regex_match(gitaly_blob) - gitaly_blob_data = gitaly_blob.data - split_strings = gitaly_blob_data.split("\n") + def regex_match(blobs) + # binding.pry_shell + found_secrets = Hash.new { |secret, details| secret[details] = {} } - found_secrets = {} + blobs.each_with_index do |blob, i| + gitaly_blob_data = blob.data + split_strings = gitaly_blob_data.split("\n") - split_strings.each_with_index do |s, i| - patterns = @secrets_pattern_set.match(s) - next unless patterns.any? + split_strings.each_with_index do |string, l| + patterns = @secrets_pattern_set.match(string) + next unless patterns.any? - matched_patterns = [] + matched_patterns = [] - patterns.each do |p| - matched_patterns << @secrets_config_data["rules"][p]["id"] + patterns.each do |p| + matched_patterns << @secrets_config_data["rules"][p]["id"] + end + found_secrets[i][l + 1] = matched_patterns end - found_secrets[i + 1] = matched_patterns end found_secrets end diff --git a/lib/gitlab/git/repository.rb b/lib/gitlab/git/repository.rb index db6e6b4d00b2b3..e701e14a3c09e4 100644 --- a/lib/gitlab/git/repository.rb +++ b/lib/gitlab/git/repository.rb @@ -46,7 +46,7 @@ def initialize(error_code) attr_reader :storage, :gl_repository, :gl_project_path, :container - delegate :list_all_blobs, to: :gitaly_blob_client + delegate :list_all_blobs, :list_blobs, to: :gitaly_blob_client # This remote name has to be stable for all types of repositories that # can join an object pool. If it's structure ever changes, a migration -- GitLab From b7e94ae7933217c1b97bb654892ca0064d277f70 Mon Sep 17 00:00:00 2001 From: Serena Fang Date: Tue, 21 Nov 2023 21:43:04 -0600 Subject: [PATCH 18/18] Fix spacing --- ee/lib/ee/gitlab/checks/push_rule_check.rb | 2 +- .../gitlab/checks/push_rules/secrets_check.rb | 26 ++--- .../lib/gitlab/secret_detection/scan.rb | 97 +++++++++++-------- 3 files changed, 63 insertions(+), 62 deletions(-) diff --git a/ee/lib/ee/gitlab/checks/push_rule_check.rb b/ee/lib/ee/gitlab/checks/push_rule_check.rb index 67a7c9b9dff236..8dc61203963cfe 100644 --- a/ee/lib/ee/gitlab/checks/push_rule_check.rb +++ b/ee/lib/ee/gitlab/checks/push_rule_check.rb @@ -5,7 +5,7 @@ module Gitlab module Checks class PushRuleCheck < ::Gitlab::Checks::BaseBulkChecker def validate! - return unless push_rule + # return unless push_rule if ::Feature.enabled?(:parallel_push_checks, project, type: :ops) run_checks_in_parallel! diff --git a/ee/lib/ee/gitlab/checks/push_rules/secrets_check.rb b/ee/lib/ee/gitlab/checks/push_rules/secrets_check.rb index fd8e33cb83618c..07a4a4e15b9248 100644 --- a/ee/lib/ee/gitlab/checks/push_rules/secrets_check.rb +++ b/ee/lib/ee/gitlab/checks/push_rules/secrets_check.rb @@ -63,33 +63,19 @@ def get_all_blobs end def scan_for_secrets(all_blobs) - found_secrets = {} - - all_blobs.each_with_index do |_blob, _i| - finding = ::Gitlab::SecretDetection::Scan.new.secrets_scan(all_blobs) - next unless finding.present? - - finding.each do |key, secrets_hash| - secrets_hash.each do |line, secrets| - found_secrets[key] ||= [] - found_secrets[key] << { "line_number" => line, "secret_type" => secrets } - end - end - end - found_secrets + ::Gitlab::SecretDetection::Scan.new.secrets_scan(all_blobs) end def show_message(found_secrets) secrets_error_message = "" - found_secrets.each do |_, secrets_array| - secrets_array.each do |secret| - # rubocop:disable Layout/LineLength -- WIP, will likely remove these rubocop disables later - secrets_error_message += " Found #{secret['secret_type'].first} on line #{secret['line_number']}\n" - # rubocop:enable Layout/LineLength + found_secrets[:result].each_key do |blob_id| + secrets_error_message += " Blob ID: #{blob_id}\n" + + found_secrets[:result][blob_id].each do |secret| + secrets_error_message += " Found #{secret['secret_type']} on line #{secret['line_number']}\n" end end - raise ::Gitlab::GitAccess::ForbiddenError, "#{ERROR_MESSAGE}\n#{secrets_error_message}" end diff --git a/gems/gitlab-secret_detection/lib/gitlab/secret_detection/scan.rb b/gems/gitlab-secret_detection/lib/gitlab/secret_detection/scan.rb index 048b8ac2815a02..263f2d4dd178ea 100644 --- a/gems/gitlab-secret_detection/lib/gitlab/secret_detection/scan.rb +++ b/gems/gitlab-secret_detection/lib/gitlab/secret_detection/scan.rb @@ -3,78 +3,93 @@ require 'tomlrb' require 're2' require 'logger' +require 'set' module Gitlab module SecretDetection class Scan - def initialize + DEFAULT_SCAN_TIMEOUT = 50.seconds.freeze + BLOB_SCAN_TIMEOUT = 10.seconds.freeze + + # status codes + # success: { status: 0, results: { blob_id: [ { line_number:, secret_type:, status: } ] } } + # failure: { status: 1 } + SUCCESS = '0' + SCAN_TIMEOUT = '1' # whole scan times out + BLOB_TIMEOUT = '2' # individual blob scan times out + INITIALIZATION_ERROR = '3' # parse_file or create_patterns_and_keywords error + SCAN_ERROR = '4' # error while scanning + + def initialize(logger: ::Logger) @secrets_config_data = parse_file - @secrets_pattern_set = create_patterns - @logger = ::Logger.new($stdout) + @secrets_pattern_set, @secrets_keywords = create_patterns_and_keywords + @logger = logger end - def secrets_scan(blobs) - return unless keyword_match(blobs) + def secrets_scan(blobs, timeout: DEFAULT_SCAN_TIMEOUT) + Timeout.timeout(timeout) do + matched_blobs = keyword_match(blobs) + found_secrets = regex_match(matched_blobs) - regex_match(blobs) + break { status: '0', result: found_secrets } if found_secrets.any? + + { status: '1' } + end end def parse_file file_path = File.expand_path('../../gitleaks.toml', __dir__) Tomlrb.load_file(file_path) - rescue StandardError => e - @logger.error(message: e.message) - false + rescue StandardError + INITIALIZATION_ERROR end - def create_patterns + def create_patterns_and_keywords @secrets_pattern_set = RE2::Set.new + @secrets_keywords = [] @secrets_config_data["rules"].each do |rule| - pattern = rule["regex"] - @secrets_pattern_set.add(pattern) + @secrets_pattern_set.add(rule["regex"]) + @secrets_keywords << rule["keywords"] end @secrets_pattern_set.compile - @secrets_pattern_set - rescue StandardError => e - @logger.error(message: e.message) - false + + [@secrets_pattern_set, @secrets_keywords.compact.flatten] + rescue StandardError + INITIALIZATION_ERROR end def keyword_match(blobs) - secrets_keywords = [] - + secrets_set = Set.new(@secrets_keywords) + matched_blobs = [] blobs.each do |blob| - @secrets_config_data["rules"].each do |rule| - pattern = rule["keywords"] - secrets_keywords << pattern - end - - secrets_keywords.compact.flatten.any? do |keyword| - break true if blob.data.include?(keyword) - end + matched_blobs << blob if secrets_set.any? { |keyword| blob.data.freeze.include?(keyword) } end + + matched_blobs end def regex_match(blobs) - # binding.pry_shell - found_secrets = Hash.new { |secret, details| secret[details] = {} } - - blobs.each_with_index do |blob, i| - gitaly_blob_data = blob.data - split_strings = gitaly_blob_data.split("\n") + found_secrets = Hash.new { |blob, secret| blob[secret] = [] } - split_strings.each_with_index do |string, l| - patterns = @secrets_pattern_set.match(string) - next unless patterns.any? - - matched_patterns = [] - - patterns.each do |p| - matched_patterns << @secrets_config_data["rules"][p]["id"] + blobs.each do |blob| + Timeout.timeout(BLOB_SCAN_TIMEOUT) do + gitaly_blob_data = blob.data + + gitaly_blob_data.each_line.with_index do |string, line| + patterns = @secrets_pattern_set.match(string) + next unless patterns.any? + + patterns.each do |pattern| + secret_info = { + "line_number" => line + 1, + "secret_type" => [@secrets_config_data["rules"][pattern]["description"]], + "status" => 0 + } + found_secrets[blob.id] << secret_info + end end - found_secrets[i][l + 1] = matched_patterns end end found_secrets -- GitLab