From 977be17cdf8e69daf90408b3dd22ce37445d0293 Mon Sep 17 00:00:00 2001 From: Adam Cohen Date: Mon, 20 Oct 2025 14:31:44 -0400 Subject: [PATCH 01/16] Initial migration to restore incorrect vuln states Changelog: added EE: true --- ...restore_incorrect_vulnerability_states.yml | 8 +++++ ..._restore_incorrect_vulnerability_states.rb | 35 +++++++++++++++++++ .../restore_incorrect_vulnerability_states.rb | 22 ++++++++++++ ...ore_incorrect_vulnerability_states_spec.rb | 7 ++++ ...ore_incorrect_vulnerability_states_spec.rb | 27 ++++++++++++++ 5 files changed, 99 insertions(+) create mode 100644 db/docs/batched_background_migrations/restore_incorrect_vulnerability_states.yml create mode 100644 db/post_migrate/20251020182838_queue_restore_incorrect_vulnerability_states.rb create mode 100644 lib/gitlab/background_migration/restore_incorrect_vulnerability_states.rb create mode 100644 spec/lib/gitlab/background_migration/restore_incorrect_vulnerability_states_spec.rb create mode 100644 spec/migrations/20251020182838_queue_restore_incorrect_vulnerability_states_spec.rb diff --git a/db/docs/batched_background_migrations/restore_incorrect_vulnerability_states.yml b/db/docs/batched_background_migrations/restore_incorrect_vulnerability_states.yml new file mode 100644 index 00000000000000..0d3cdba0f47b27 --- /dev/null +++ b/db/docs/batched_background_migrations/restore_incorrect_vulnerability_states.yml @@ -0,0 +1,8 @@ +--- +migration_job_name: RestoreIncorrectVulnerabilityStates +description: Restores incorrect vulnerability states as a result of a bug in GitLab Semgrep v6.7.0 +feature_category: static_application_security_testing +introduced_by_url: # URL of the MR (or issue/commit) that introduced the migration +milestone: '18.6' +queued_migration_version: 20251020182838 +finalized_by: # version of the migration that finalized this BBM diff --git a/db/post_migrate/20251020182838_queue_restore_incorrect_vulnerability_states.rb b/db/post_migrate/20251020182838_queue_restore_incorrect_vulnerability_states.rb new file mode 100644 index 00000000000000..d6c5da59be8ce2 --- /dev/null +++ b/db/post_migrate/20251020182838_queue_restore_incorrect_vulnerability_states.rb @@ -0,0 +1,35 @@ +# frozen_string_literal: true + +# See https://docs.gitlab.com/ee/development/database/batched_background_migrations.html +# for more information on when/how to queue batched background migrations + +# Update below commented lines with appropriate values. + +class QueueRestoreIncorrectVulnerabilityStates < Gitlab::Database::Migration[2.3] + milestone '18.6' + + # Select the applicable gitlab schema for your batched background migration + restrict_gitlab_migration # gitlab_schema: :gitlab_main_org / :gitlab_ci / ... + + MIGRATION = "RestoreIncorrectVulnerabilityStates" + # BATCH_SIZE = 1000 + # SUB_BATCH_SIZE = 100 + + def up + # If you are requeueing an already executed migration, you need to delete the prior batched migration record + # for the new enqueue to be executed, else, you can delete this line. + # delete_batched_background_migration(MIGRATION, :vulnerability_occurrences, :id, []) + + queue_batched_background_migration( + MIGRATION, + :vulnerability_occurrences, + :id, + batch_size: BATCH_SIZE, + sub_batch_size: SUB_BATCH_SIZE + ) + end + + def down + delete_batched_background_migration(MIGRATION, :vulnerability_occurrences, :id, []) + end +end diff --git a/lib/gitlab/background_migration/restore_incorrect_vulnerability_states.rb b/lib/gitlab/background_migration/restore_incorrect_vulnerability_states.rb new file mode 100644 index 00000000000000..b38bf9bc69bbb8 --- /dev/null +++ b/lib/gitlab/background_migration/restore_incorrect_vulnerability_states.rb @@ -0,0 +1,22 @@ +# frozen_string_literal: true + +# See https://docs.gitlab.com/ee/development/database/batched_background_migrations.html +# for more information on how to use batched background migrations + +# Update below commented lines with appropriate values. + +module Gitlab + module BackgroundMigration + class RestoreIncorrectVulnerabilityStates < BatchedMigrationJob + # operation_name :my_operation # This is used as the key on collecting metrics + # scope_to ->(relation) { relation.where(column: "value") } + feature_category :static_application_security_testing + + def perform + each_sub_batch do |sub_batch| + # Your action on each sub_batch + end + end + end + end +end diff --git a/spec/lib/gitlab/background_migration/restore_incorrect_vulnerability_states_spec.rb b/spec/lib/gitlab/background_migration/restore_incorrect_vulnerability_states_spec.rb new file mode 100644 index 00000000000000..9daa4797c64eb4 --- /dev/null +++ b/spec/lib/gitlab/background_migration/restore_incorrect_vulnerability_states_spec.rb @@ -0,0 +1,7 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::BackgroundMigration::RestoreIncorrectVulnerabilityStates, feature_category: :static_application_security_testing do + # Tests go here +end diff --git a/spec/migrations/20251020182838_queue_restore_incorrect_vulnerability_states_spec.rb b/spec/migrations/20251020182838_queue_restore_incorrect_vulnerability_states_spec.rb new file mode 100644 index 00000000000000..17e5f6537eb5f5 --- /dev/null +++ b/spec/migrations/20251020182838_queue_restore_incorrect_vulnerability_states_spec.rb @@ -0,0 +1,27 @@ +# frozen_string_literal: true + +require 'spec_helper' +require_migration! + +RSpec.describe QueueRestoreIncorrectVulnerabilityStates, migration: :gitlab_?, feature_category: :static_application_security_testing do + # let!(:batched_migration) { described_class::MIGRATION } + + # it 'schedules a new batched migration' do + # reversible_migration do |migration| + # migration.before -> { + # expect(batched_migration).not_to have_scheduled_batched_migration + # } + + # migration.after -> { + # expect(batched_migration).to have_scheduled_batched_migration( + # gitlab_schema: # :gitlab_main_org / :gitlab_ci / ... + # table_name: :vulnerability_occurrences, + # column_name: :id, + # interval: described_class::DELAY_INTERVAL, + # batch_size: described_class::BATCH_SIZE, + # sub_batch_size: described_class::SUB_BATCH_SIZE + # ) + # } + # end + # end +end -- GitLab From 64e75eff9d9b5a2d77803ac60d3a137db7784fdb Mon Sep 17 00:00:00 2001 From: Adam Cohen Date: Mon, 20 Oct 2025 15:44:15 -0400 Subject: [PATCH 02/16] Start working on specs --- ..._queue_restore_incorrect_vulnerability_states.rb | 4 +++- .../restore_incorrect_vulnerability_states.rb | 1 + .../restore_incorrect_vulnerability_states_spec.rb | 13 ++++++++++++- 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/db/post_migrate/20251020182838_queue_restore_incorrect_vulnerability_states.rb b/db/post_migrate/20251020182838_queue_restore_incorrect_vulnerability_states.rb index d6c5da59be8ce2..39e3ff480c378e 100644 --- a/db/post_migrate/20251020182838_queue_restore_incorrect_vulnerability_states.rb +++ b/db/post_migrate/20251020182838_queue_restore_incorrect_vulnerability_states.rb @@ -9,7 +9,9 @@ class QueueRestoreIncorrectVulnerabilityStates < Gitlab::Database::Migration[2.3 milestone '18.6' # Select the applicable gitlab schema for your batched background migration - restrict_gitlab_migration # gitlab_schema: :gitlab_main_org / :gitlab_ci / ... + restrict_gitlab_migration gitlab_schema: :gitlab_sec + + # restrict_gitlab_migration # gitlab_schema: :gitlab_main_org / :gitlab_ci / ... MIGRATION = "RestoreIncorrectVulnerabilityStates" # BATCH_SIZE = 1000 diff --git a/lib/gitlab/background_migration/restore_incorrect_vulnerability_states.rb b/lib/gitlab/background_migration/restore_incorrect_vulnerability_states.rb index b38bf9bc69bbb8..3784fa95912f44 100644 --- a/lib/gitlab/background_migration/restore_incorrect_vulnerability_states.rb +++ b/lib/gitlab/background_migration/restore_incorrect_vulnerability_states.rb @@ -8,6 +8,7 @@ module Gitlab module BackgroundMigration class RestoreIncorrectVulnerabilityStates < BatchedMigrationJob + operation_name :restore_incorrect_vulnerability_states # operation_name :my_operation # This is used as the key on collecting metrics # scope_to ->(relation) { relation.where(column: "value") } feature_category :static_application_security_testing diff --git a/spec/lib/gitlab/background_migration/restore_incorrect_vulnerability_states_spec.rb b/spec/lib/gitlab/background_migration/restore_incorrect_vulnerability_states_spec.rb index 9daa4797c64eb4..bc399394de2894 100644 --- a/spec/lib/gitlab/background_migration/restore_incorrect_vulnerability_states_spec.rb +++ b/spec/lib/gitlab/background_migration/restore_incorrect_vulnerability_states_spec.rb @@ -3,5 +3,16 @@ require 'spec_helper' RSpec.describe Gitlab::BackgroundMigration::RestoreIncorrectVulnerabilityStates, feature_category: :static_application_security_testing do - # Tests go here + let(:namespaces) { table(:namespaces) } + let(:projects) { table(:projects) } + let(:project) { projects.create!(namespace_id: namespace.id, project_namespace_id: namespace.id) } + let(:known_keys) { Set.new } + + let!(:artifact) { create(:ee_ci_job_artifact, :sast) } + + it 'restores vulnerabilities' do + Security::StoreScanService.execute(artifact, known_keys, false) + + # create_vulnerability(project_id: project.id, external_id: "semgrep_id") + end end -- GitLab From 254affe3f4a87cdb2def561730a3c090afcc485b Mon Sep 17 00:00:00 2001 From: Adam Cohen Date: Mon, 27 Oct 2025 12:06:08 -0400 Subject: [PATCH 03/16] Add new traits and ingestion/custom_spec --- .../ingestion/ingest_reports_service.rb | 1 + ee/spec/factories/ci/job_artifacts.rb | 24 + ...bilities-incorrect-primary-identifier.json | 768 ++++++++++++++++++ ...port-semgrep-multiple-vulnerabilities.json | 768 ++++++++++++++++++ .../security/ingestion/custom_spec.rb | 66 ++ ...ore_incorrect_vulnerability_states_spec.rb | 23 +- 6 files changed, 1648 insertions(+), 2 deletions(-) create mode 100644 ee/spec/fixtures/security_reports/master/gl-sast-report-semgrep-multiple-vulnerabilities-incorrect-primary-identifier.json create mode 100644 ee/spec/fixtures/security_reports/master/gl-sast-report-semgrep-multiple-vulnerabilities.json create mode 100644 ee/spec/services/security/ingestion/custom_spec.rb diff --git a/ee/app/services/security/ingestion/ingest_reports_service.rb b/ee/app/services/security/ingestion/ingest_reports_service.rb index be280277c14329..056f8846d40a77 100644 --- a/ee/app/services/security/ingestion/ingest_reports_service.rb +++ b/ee/app/services/security/ingestion/ingest_reports_service.rb @@ -16,6 +16,7 @@ def initialize(pipeline) end def execute + # binding.pry store_reports mark_resolved_vulnerabilities auto_dismiss_vulnerabilities diff --git a/ee/spec/factories/ci/job_artifacts.rb b/ee/spec/factories/ci/job_artifacts.rb index 981800cf943c8c..ff134369b4d8bb 100644 --- a/ee/spec/factories/ci/job_artifacts.rb +++ b/ee/spec/factories/ci/job_artifacts.rb @@ -121,6 +121,30 @@ end end + trait :sast_semgrep_multiple_vulnerabilities do + file_type { :sast } + file_format { :raw } + + after(:build) do |artifact, _| + artifact.file = fixture_file_upload( + Rails.root.join('ee/spec/fixtures/security_reports/master/gl-sast-report-semgrep-multiple-vulnerabilities.json'), + 'application/json' + ) + end + end + + trait :sast_semgrep_multiple_vulnerabilities_incorrect_primary_identifier do + file_type { :sast } + file_format { :raw } + + after(:build) do |artifact, _| + artifact.file = fixture_file_upload( + Rails.root.join('ee/spec/fixtures/security_reports/master/gl-sast-report-semgrep-multiple-vulnerabilities-incorrect-primary-identifier.json'), + 'application/json' + ) + end + end + trait :dast_with_evidence do file_type { :dast } file_format { :raw } diff --git a/ee/spec/fixtures/security_reports/master/gl-sast-report-semgrep-multiple-vulnerabilities-incorrect-primary-identifier.json b/ee/spec/fixtures/security_reports/master/gl-sast-report-semgrep-multiple-vulnerabilities-incorrect-primary-identifier.json new file mode 100644 index 00000000000000..1c337c35eef83a --- /dev/null +++ b/ee/spec/fixtures/security_reports/master/gl-sast-report-semgrep-multiple-vulnerabilities-incorrect-primary-identifier.json @@ -0,0 +1,768 @@ +{ + "version": "15.1.4", + "vulnerabilities": [ + { + "id": "6a8d97c532a32e7bb9e1d93b3300977b8a7e75f9ddcc5bac4edaa6cda3603833", + "category": "sast", + "name": "Deserialization of untrusted data", + "description": "The application was found using an unsafe version of `yaml` load which is vulnerable to\ndeserialization attacks. Deserialization attacks exploit the process of reading serialized\ndata and turning it back\ninto an object. By constructing malicious objects and serializing them, an adversary may\nattempt to:\n\n- Inject code that is executed upon object construction, which occurs during the\ndeserialization process.\n- Exploit mass assignment by including fields that are not normally a part of the serialized\ndata but are read in during deserialization.\n\nTo remediate this issue, use `safe_load()` or call `yaml.load()` with the `Loader` argument\nset to\n`yaml.SafeLoader`.\n\nExample loading YAML using `safe_load`:\n```\nimport yaml\n\n# Use safe_load to load data into an intermediary object\nintermediary_object = yaml.safe_load(\"\"\"user:\n name: 'test user'\"\"\"\n)\n# Create our real object, copying over only the necessary fields\nuser_object = {'user': {\n # Assign the deserialized data from intermediary object\n 'name': intermediary_object['user']['name'],\n # Add in protected data in object definition (or set it from a class constructor)\n 'is_admin': False,\n }\n}\n# Work with user_object\n# ...\n```\n\nFor more details on deserialization attacks in general, see OWASP's guide:\n- https://cheatsheetseries.owasp.org/cheatsheets/Deserialization_Cheat_Sheet.html\n", + "cve": "semgrep_id:bandit.B506:329:329", + "severity": "High", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "app/app.py", + "start_line": 329 + }, + "identifiers": [ + { + "type": "cwe", + "name": "CWE-502", + "value": "502", + "url": "https://cwe.mitre.org/data/definitions/502.html" + }, + { + "type": "owasp", + "name": "A08:2021 - Software and Data Integrity Failures", + "value": "A08:2021" + }, + { + "type": "owasp", + "name": "A8:2017 - Insecure Deserialization", + "value": "A8:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B506", + "value": "B506" + }, + { + "type": "semgrep_id", + "name": "bandit.B506", + "value": "bandit.B506", + "url": "https://semgrep.dev/r/gitlab.bandit.B506" + } + ] + }, + { + "id": "185f6aa5aece728c2b94f16ff36ea99339dbeb39a027964d65a0e544b439529d", + "category": "sast", + "name": "Improper neutralization of special elements used in an SQL Command ('SQL Injection')", + "description": "SQL Injection is a critical vulnerability that can lead to data or system compromise. By\ndynamically generating SQL query strings, user input may be able to influence the logic of\nthe SQL statement. This could lead to an adversary accessing information they should\nnot have access to, or in some circumstances, being able to execute OS functionality or code.\n\nReplace all dynamically generated SQL queries with parameterized queries. In situations where\ndynamic queries must be created, never use direct user input, but instead use a map or\ndictionary of valid values and resolve them using a user supplied key.\n\nFor example, some database drivers do not allow parameterized queries for `\u003e` or `\u003c` comparison\noperators. In these cases, do not use a user supplied `\u003e` or `\u003c` value, but rather have the\nuser\nsupply a `gt` or `lt` value. The alphabetical values are then used to look up the `\u003e` and `\u003c`\nvalues to be used in the construction of the dynamic query. The same goes for other queries\nwhere\ncolumn or table names are required but cannot be parameterized.\n\nExample using `PreparedStatement` queries:\n```\nimport sqlite3\n\n# Create a new database (in memory)\ncon = sqlite3.connect(\":memory:\")\n# Get a cursor from the connection\ncur = con.cursor()\n# Create a tuple of the value to be used in the parameterized query\nparams = ('user-input',)\n# execute the statement, passing in the params for the value\ncur.execute(\"select name from sqlite_master where name = ?\", params)\n# work with the result\nresult = cur.fetchall()\n```\n\nFor more information on SQL Injection see OWASP:\nhttps://cheatsheetseries.owasp.org/cheatsheets/SQL_Injection_Prevention_Cheat_Sheet.html\n", + "cve": "semgrep_id:bandit.B608:265:265", + "severity": "High", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "app/app.py", + "start_line": 265 + }, + "identifiers": [ + { + "type": "cwe", + "name": "CWE-89", + "value": "89", + "url": "https://cwe.mitre.org/data/definitions/89.html" + }, + { + "type": "owasp", + "name": "A03:2021 - Injection", + "value": "A03:2021" + }, + { + "type": "owasp", + "name": "A1:2017 - Injection", + "value": "A1:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B608", + "value": "B608" + }, + { + "type": "semgrep_id", + "name": "bandit.B608", + "value": "bandit.B608", + "url": "https://semgrep.dev/r/gitlab.bandit.B608" + } + ] + }, + { + "id": "afb3a18f344a72ed01c842afd1939b4c33b150ba50234001d8eb34ce72a977f4", + "category": "sast", + "name": "Improper neutralization of directives in dynamically evaluated code ('Eval Injection')", + "description": "The application was found calling the `eval` function OR Function()\n constructor OR setTimeout() OR setInterval() methods. If the\n\n variables or strings or functions passed to these methods contains user-supplied input, an adversary could attempt to execute arbitrary\n\n JavaScript\n\n code. This could lead to a full system compromise in Node applications or Cross-site Scripting\n\n (XSS) in web applications.\n\n\n To remediate this issue, remove all calls to above methods and consider alternative methods for\n\n executing\n\n the necessary business logic. There is almost no safe method of calling `eval` or other above stated sinks with\n\n user-supplied input.\n\n Instead, consider alternative methods such as using property accessors to dynamically access\n\n values.\n\n\n Example using property accessors to dynamically access an object's property:\n\n ```\n\n // Define an object\n\n const obj = {key1: 'value1', key2: 'value2'};\n\n // Get key dynamically from user input\n\n const key = getUserInput();\n\n // Check if the key exists in our object and return it, or a default empty string\n\n const value = (obj.hasOwnProperty(key)) ? obj[key] : '';\n\n // Work with the value\n\n ```\n\n\n For more information on why not to use `eval`, and alternatives see:\n\n - https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/eval#never_use_eval!\n\n Other References:\n\n - https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Function/Function\n\n - https://developer.mozilla.org/en-US/docs/Web/API/setTimeout\n\n - https://developer.mozilla.org/en-US/docs/Web/API/setInterval\n", + "cve": "semgrep_id:eslint.detect-eval-with-expression:10:10", + "severity": "High", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "app/static/main.js", + "start_line": 10 + }, + "identifiers": [ + { + "type": "cwe", + "name": "CWE-95", + "value": "95", + "url": "https://cwe.mitre.org/data/definitions/95.html" + }, + { + "type": "owasp", + "name": "A03:2021 - Injection", + "value": "A03:2021" + }, + { + "type": "owasp", + "name": "A1:2017 - Injection", + "value": "A1:2017" + }, + { + "type": "eslint_rule_id", + "name": "ESLint rule ID/detect-eval-with-expression", + "value": "detect-eval-with-expression" + }, + { + "type": "semgrep_id", + "name": "eslint.detect-eval-with-expression", + "value": "eslint.detect-eval-with-expression", + "url": "https://semgrep.dev/r/gitlab.eslint.detect-eval-with-expression" + } + ] + }, + { + "id": "4e7633d40f31f6398b4c7ffc4bf481ba6fe627c34042d7439b71259e6ea9b32c", + "category": "sast", + "name": "Improper neutralization of special elements used in an SQL Command ('SQL Injection')", + "description": "Detected user input used to manually construct a SQL string. This is usually\nbad practice because manual construction could accidentally result in a SQL\ninjection. An attacker could use a SQL injection to steal or modify contents\nof the database. Instead, use a parameterized query which is available\nby default in most database engines. Alternatively, consider using an\nobject-relational mapper (ORM) such as SQLAlchemy which will protect your queries.\n\nSQL Injections are a critical type of vulnerability that can lead to data \nor system compromise. By dynamically generating SQL query strings, user \ninput may be able to influence the logic of an SQL statement. \nThis could lead to an malicious parties accessing information they should not \nhave access to, or in some circumstances, being able to execute OS functionality\nor code.\n\nReplace all dynamically generated SQL queries with parameterized queries. \nIn situations where dynamic queries must be created, never use direct user input,\nbut instead use a map or dictionary of valid values and resolve them using a user \nsupplied key.\n\nFor example, some database drivers do not allow parameterized queries for \n`\u003e` or `\u003c` comparison operators. In these cases, do not use a user supplied \n`\u003e` or `\u003c` value, but rather have the user supply a `gt` or `lt` value. \nThe alphabetical values are then used to look up the `\u003e` and `\u003c` values to be used \nin the construction of the dynamic query. The same goes for other queries where \ncolumn or table names are required but cannot be parameterized.\nData that is possible user-controlled from a python request is passed\nto `execute()` function. To remediate this issue, use SQLAlchemy statements\nwhich are built with query parameterization and therefore not vulnerable \nto sql injection.\n\nIf for some reason this is not feasible, ensure calls including user-supplied \ndata pass it in to the `params` parameter of the `execute()` method.\nBelow is an example using `execute()`, passing in user-supplied data as `params`. \nThis will treat the query as a parameterized query and `params` as strictly data, \npreventing any possibility of SQL Injection.\n\n```\nname = request.args.get('name')\nreq = text('SELECT * FROM student WHERE firstname = :x')\nresult = db.session.execute(req, {\"x\":name})\n```\nFor more information on QuerySets see:\n- https://docs.djangoproject.com/en/4.2/ref/models/querysets/#queryset-api\nFor more information on SQL Injections see OWASP:\n- https://cheatsheetseries.owasp.org/cheatsheets/SQL_Injection_Prevention_Cheat_Sheet.html\n", + "cve": "semgrep_id:python_flask_rule-flask-tainted-sql-string:261:261", + "severity": "High", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "app/app.py", + "start_line": 261 + }, + "identifiers": [ + { + "type": "cwe", + "name": "CWE-89", + "value": "89", + "url": "https://cwe.mitre.org/data/definitions/89.html" + }, + { + "type": "owasp", + "name": "A03:2021 - Injection", + "value": "A03:2021" + }, + { + "type": "owasp", + "name": "A1:2017 - Injection", + "value": "A1:2017" + }, + { + "type": "semgrep_id", + "name": "python_flask_rule-flask-tainted-sql-string", + "value": "python_flask_rule-flask-tainted-sql-string" + } + ] + }, + { + "id": "819fa95af305ebbf12f83f5cd85ce6b9720a22a112869bb6ef76bec8fe449d62", + "category": "sast", + "name": "Allocation of resources without limits or throttling", + "description": "The application was found using the `requests` module without configuring a timeout value for\nconnections. This could lead to uncontrolled resource consumption where the application could\nrun out of\nsocket descriptors, effectively causing a Denial of Service (DoS).\n\nTo remediate this issue, pass in a `timeout=` argument to each `requests` call.\n\nExample using a timeout for an HTTP GET request:\n```\n# Issue a GET request to https://example.com with a timeout of 10 seconds\nresponse = requests.get('https://example.com', timeout=10)\n# Work with the response object\n# ...\n```\n\nFor more information on using the requests module see:\n- https://requests.readthedocs.io/en/latest/api/\n", + "cve": "semgrep_id:bandit.B113:17:18", + "severity": "Medium", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "tests/e2e_zap.py", + "start_line": 17, + "end_line": 18 + }, + "identifiers": [ + { + "type": "cwe", + "name": "CWE-770", + "value": "770", + "url": "https://cwe.mitre.org/data/definitions/770.html" + }, + { + "type": "owasp", + "name": "A05:2021 - Security Misconfiguration", + "value": "A05:2021" + }, + { + "type": "owasp", + "name": "A6:2017 - Security Misconfiguration", + "value": "A6:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B113", + "value": "B113" + }, + { + "type": "semgrep_id", + "name": "bandit.B113", + "value": "bandit.B113", + "url": "https://semgrep.dev/r/gitlab.bandit.B113" + } + ] + }, + { + "id": "10ea0fe99f1cb7743ecc12fd2a83cb76853523f53e8f24f688daddd2d5687e32", + "category": "sast", + "name": "Allocation of resources without limits or throttling", + "description": "The application was found using the `requests` module without configuring a timeout value for\nconnections. This could lead to uncontrolled resource consumption where the application could\nrun out of\nsocket descriptors, effectively causing a Denial of Service (DoS).\n\nTo remediate this issue, pass in a `timeout=` argument to each `requests` call.\n\nExample using a timeout for an HTTP GET request:\n```\n# Issue a GET request to https://example.com with a timeout of 10 seconds\nresponse = requests.get('https://example.com', timeout=10)\n# Work with the response object\n# ...\n```\n\nFor more information on using the requests module see:\n- https://requests.readthedocs.io/en/latest/api/\n", + "cve": "semgrep_id:bandit.B113:28:29", + "severity": "Medium", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "tests/e2e_zap.py", + "start_line": 28, + "end_line": 29 + }, + "identifiers": [ + { + "type": "cwe", + "name": "CWE-770", + "value": "770", + "url": "https://cwe.mitre.org/data/definitions/770.html" + }, + { + "type": "owasp", + "name": "A05:2021 - Security Misconfiguration", + "value": "A05:2021" + }, + { + "type": "owasp", + "name": "A6:2017 - Security Misconfiguration", + "value": "A6:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B113", + "value": "B113" + }, + { + "type": "semgrep_id", + "name": "bandit.B113", + "value": "bandit.B113", + "url": "https://semgrep.dev/r/gitlab.bandit.B113" + } + ] + }, + { + "id": "61c61d9440d5a9c4b76ca89d7a6146b50dfdce4d5ec3e93d42fe255c67bf4684", + "category": "sast", + "name": "Allocation of resources without limits or throttling", + "description": "The application was found using the `requests` module without configuring a timeout value for\nconnections. This could lead to uncontrolled resource consumption where the application could\nrun out of\nsocket descriptors, effectively causing a Denial of Service (DoS).\n\nTo remediate this issue, pass in a `timeout=` argument to each `requests` call.\n\nExample using a timeout for an HTTP GET request:\n```\n# Issue a GET request to https://example.com with a timeout of 10 seconds\nresponse = requests.get('https://example.com', timeout=10)\n# Work with the response object\n# ...\n```\n\nFor more information on using the requests module see:\n- https://requests.readthedocs.io/en/latest/api/\n", + "cve": "semgrep_id:bandit.B113:36:37", + "severity": "Medium", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "tests/e2e_zap.py", + "start_line": 36, + "end_line": 37 + }, + "identifiers": [ + { + "type": "cwe", + "name": "CWE-770", + "value": "770", + "url": "https://cwe.mitre.org/data/definitions/770.html" + }, + { + "type": "owasp", + "name": "A05:2021 - Security Misconfiguration", + "value": "A05:2021" + }, + { + "type": "owasp", + "name": "A6:2017 - Security Misconfiguration", + "value": "A6:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B113", + "value": "B113" + }, + { + "type": "semgrep_id", + "name": "bandit.B113", + "value": "bandit.B113", + "url": "https://semgrep.dev/r/gitlab.bandit.B113" + } + ] + }, + { + "id": "7fef73eeb450ba731ada304710b14f4ca65790c4d571ebad2af3ee5191e5b42f", + "category": "sast", + "name": "Allocation of resources without limits or throttling", + "description": "The application was found using the `requests` module without configuring a timeout value for\nconnections. This could lead to uncontrolled resource consumption where the application could\nrun out of\nsocket descriptors, effectively causing a Denial of Service (DoS).\n\nTo remediate this issue, pass in a `timeout=` argument to each `requests` call.\n\nExample using a timeout for an HTTP GET request:\n```\n# Issue a GET request to https://example.com with a timeout of 10 seconds\nresponse = requests.get('https://example.com', timeout=10)\n# Work with the response object\n# ...\n```\n\nFor more information on using the requests module see:\n- https://requests.readthedocs.io/en/latest/api/\n", + "cve": "semgrep_id:bandit.B113:44:45", + "severity": "Medium", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "tests/e2e_zap.py", + "start_line": 44, + "end_line": 45 + }, + "identifiers": [ + { + "type": "cwe", + "name": "CWE-770", + "value": "770", + "url": "https://cwe.mitre.org/data/definitions/770.html" + }, + { + "type": "owasp", + "name": "A05:2021 - Security Misconfiguration", + "value": "A05:2021" + }, + { + "type": "owasp", + "name": "A6:2017 - Security Misconfiguration", + "value": "A6:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B113", + "value": "B113" + }, + { + "type": "semgrep_id", + "name": "bandit.B113", + "value": "bandit.B113", + "url": "https://semgrep.dev/r/gitlab.bandit.B113" + } + ] + }, + { + "id": "e81f87450a35ed038550bfe4f56dcff5bebd9c5ca5f309b6144de063cb99e1b2", + "category": "sast", + "name": "Use of a broken or risky cryptographic algorithm", + "description": "The application was found using an insecure or risky digest or signature algorithm. MD2, MD4,\n MD5 and SHA1 hash algorithms have been found to be vulnerable to producing collisions.\n\nThis means\nthat two different values, when hashed, can lead to the same hash value. If the application is\ntrying\nto use these hash methods for storing passwords, then it is recommended to switch to a\npassword hashing\nalgorithm such as Argon2id or PBKDF2.\n\nNote that the `Crypto` and `Cryptodome` Python packages are no longer recommended for\nnew applications, instead consider using the [cryptography](https://cryptography.io/) package.\n\nExample of creating a SHA-384 hash using the `cryptography` package:\n```\nfrom cryptography.hazmat.primitives import hashes\n# Create a SHA384 digest\ndigest = hashes.Hash(hashes.SHA384())\n# Update the digest with some initial data\ndigest.update(b\"some data to hash\")\n# Add more data to the digest\ndigest.update(b\"some more data\")\n# Finalize the digest as bytes\nresult = digest.finalize()\n```\n\nFor more information on secure password storage see OWASP:\n- https://cheatsheetseries.owasp.org/cheatsheets/Password_Storage_Cheat_Sheet.html\n\nFor more information on the cryptography module see:\n- https://cryptography.io/en/latest/\n", + "cve": "semgrep_id:bandit.B303-1:141:141", + "severity": "Medium", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "app/app.py", + "start_line": 141 + }, + "identifiers": [ + { + "type": "cwe", + "name": "CWE-327", + "value": "327", + "url": "https://cwe.mitre.org/data/definitions/327.html" + }, + { + "type": "owasp", + "name": "A02:2021 - Cryptographic Failures", + "value": "A02:2021" + }, + { + "type": "owasp", + "name": "A3:2017 - Sensitive Data Exposure", + "value": "A3:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B303", + "value": "B303" + }, + { + "type": "semgrep_id", + "name": "bandit.B303-1", + "value": "bandit.B303-1", + "url": "https://semgrep.dev/r/gitlab.bandit.B303-1" + } + ] + }, + { + "id": "3f8a15b8ea5a1e062262c837c4b5c763320c40622f50183f04fa2e584fc05e13", + "category": "sast", + "name": "Improper certificate validation", + "description": "The application was found using the `requests` module without configuring a timeout value for\nconnections. The `verify=False` argument has been set, which effectively disables the\nvalidation\nof server certificates.\n\nThis allows for an adversary who is in between the application and the target host to intercept\npotentially sensitive information or transmit malicious data.\n\nTo remediate this issue either remove the `verify=False` argument, or set `verify=True`to each\n`requests` call.\n\nExample verifying server certificates for an HTTP GET request:\n```\n# Issue a GET request to https://example.com with a timeout of 10 seconds and verify the\n# server certificate explicitly.\nresponse = requests.get('https://example.com', timeout=10, verify=True)\n# Work with the response object\n# ...\n```\n\nFor more information on using the requests module see:\n- https://requests.readthedocs.io/en/latest/api/\n", + "cve": "semgrep_id:bandit.B501:17:18", + "severity": "Medium", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "tests/e2e_zap.py", + "start_line": 17, + "end_line": 18 + }, + "identifiers": [ + { + "type": "cwe", + "name": "CWE-295", + "value": "295", + "url": "https://cwe.mitre.org/data/definitions/295.html" + }, + { + "type": "owasp", + "name": "A07:2021 - Identification and Authentication Failures", + "value": "A07:2021" + }, + { + "type": "owasp", + "name": "A2:2017 - Broken Authentication", + "value": "A2:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B501", + "value": "B501" + }, + { + "type": "semgrep_id", + "name": "bandit.B501", + "value": "bandit.B501", + "url": "https://semgrep.dev/r/gitlab.bandit.B501" + } + ] + }, + { + "id": "8b6a98da4410a8abe0a3338ec5db34f4a9a48d0716ba296dcda0e93b63a5766f", + "category": "sast", + "name": "Improper certificate validation", + "description": "The application was found using the `requests` module without configuring a timeout value for\nconnections. The `verify=False` argument has been set, which effectively disables the\nvalidation\nof server certificates.\n\nThis allows for an adversary who is in between the application and the target host to intercept\npotentially sensitive information or transmit malicious data.\n\nTo remediate this issue either remove the `verify=False` argument, or set `verify=True`to each\n`requests` call.\n\nExample verifying server certificates for an HTTP GET request:\n```\n# Issue a GET request to https://example.com with a timeout of 10 seconds and verify the\n# server certificate explicitly.\nresponse = requests.get('https://example.com', timeout=10, verify=True)\n# Work with the response object\n# ...\n```\n\nFor more information on using the requests module see:\n- https://requests.readthedocs.io/en/latest/api/\n", + "cve": "semgrep_id:bandit.B501:28:29", + "severity": "Medium", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "tests/e2e_zap.py", + "start_line": 28, + "end_line": 29 + }, + "identifiers": [ + { + "type": "cwe", + "name": "CWE-295", + "value": "295", + "url": "https://cwe.mitre.org/data/definitions/295.html" + }, + { + "type": "owasp", + "name": "A07:2021 - Identification and Authentication Failures", + "value": "A07:2021" + }, + { + "type": "owasp", + "name": "A2:2017 - Broken Authentication", + "value": "A2:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B501", + "value": "B501" + }, + { + "type": "semgrep_id", + "name": "bandit.B501", + "value": "bandit.B501", + "url": "https://semgrep.dev/r/gitlab.bandit.B501" + } + ] + }, + { + "id": "3b65f8017d6b3a73a5f6e7d1c0e9e78aa0daf817f06234985a9d011da1a9d804", + "category": "sast", + "name": "Improper certificate validation", + "description": "The application was found using the `requests` module without configuring a timeout value for\nconnections. The `verify=False` argument has been set, which effectively disables the\nvalidation\nof server certificates.\n\nThis allows for an adversary who is in between the application and the target host to intercept\npotentially sensitive information or transmit malicious data.\n\nTo remediate this issue either remove the `verify=False` argument, or set `verify=True`to each\n`requests` call.\n\nExample verifying server certificates for an HTTP GET request:\n```\n# Issue a GET request to https://example.com with a timeout of 10 seconds and verify the\n# server certificate explicitly.\nresponse = requests.get('https://example.com', timeout=10, verify=True)\n# Work with the response object\n# ...\n```\n\nFor more information on using the requests module see:\n- https://requests.readthedocs.io/en/latest/api/\n", + "cve": "semgrep_id:bandit.B501:36:37", + "severity": "Medium", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "tests/e2e_zap.py", + "start_line": 36, + "end_line": 37 + }, + "identifiers": [ + { + "type": "cwe", + "name": "CWE-295", + "value": "295", + "url": "https://cwe.mitre.org/data/definitions/295.html" + }, + { + "type": "owasp", + "name": "A07:2021 - Identification and Authentication Failures", + "value": "A07:2021" + }, + { + "type": "owasp", + "name": "A2:2017 - Broken Authentication", + "value": "A2:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B501", + "value": "B501" + }, + { + "type": "semgrep_id", + "name": "bandit.B501", + "value": "bandit.B501", + "url": "https://semgrep.dev/r/gitlab.bandit.B501" + } + ] + }, + { + "id": "878843d5b4edf0042e3066429a4cac5f66f8c7ad72b40056601fbb191fa13214", + "category": "sast", + "name": "Improper certificate validation", + "description": "The application was found using the `requests` module without configuring a timeout value for\nconnections. The `verify=False` argument has been set, which effectively disables the\nvalidation\nof server certificates.\n\nThis allows for an adversary who is in between the application and the target host to intercept\npotentially sensitive information or transmit malicious data.\n\nTo remediate this issue either remove the `verify=False` argument, or set `verify=True`to each\n`requests` call.\n\nExample verifying server certificates for an HTTP GET request:\n```\n# Issue a GET request to https://example.com with a timeout of 10 seconds and verify the\n# server certificate explicitly.\nresponse = requests.get('https://example.com', timeout=10, verify=True)\n# Work with the response object\n# ...\n```\n\nFor more information on using the requests module see:\n- https://requests.readthedocs.io/en/latest/api/\n", + "cve": "semgrep_id:bandit.B501:44:45", + "severity": "Medium", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "tests/e2e_zap.py", + "start_line": 44, + "end_line": 45 + }, + "identifiers": [ + { + "type": "cwe", + "name": "CWE-295", + "value": "295", + "url": "https://cwe.mitre.org/data/definitions/295.html" + }, + { + "type": "owasp", + "name": "A07:2021 - Identification and Authentication Failures", + "value": "A07:2021" + }, + { + "type": "owasp", + "name": "A2:2017 - Broken Authentication", + "value": "A2:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B501", + "value": "B501" + }, + { + "type": "semgrep_id", + "name": "bandit.B501", + "value": "bandit.B501", + "url": "https://semgrep.dev/r/gitlab.bandit.B501" + } + ] + }, + { + "id": "6cac58319f88ad3a1cb16df9c1272049ea0f909fa5fc3f67508148fda3ce5e2c", + "category": "sast", + "name": "Regular expression with non-literal value", + "description": "The `RegExp` constructor was called with a non-literal value. If an adversary were able to\nsupply a malicious regex, they could cause a Regular Expression Denial of Service (ReDoS)\nagainst the application. In Node applications, this could cause the entire application to no\nlonger be responsive to other users' requests.\n\nTo remediate this issue, never allow user-supplied regular expressions. Instead, the regular \nexpression should be hardcoded. If this is not possible, consider using an alternative regular\nexpression engine such as [node-re2](https://www.npmjs.com/package/re2). RE2 is a safe alternative \nthat does not support backtracking, which is what leads to ReDoS.\n\nExample using re2 which does not support backtracking (Note: it is still recommended to\nnever use user-supplied input):\n```\n// Import the re2 module\nconst RE2 = require('re2');\n\nfunction match(userSuppliedRegex, userInput) {\n // Create a RE2 object with the user supplied regex, this is relatively safe\n // due to RE2 not supporting backtracking which can be abused to cause long running\n // queries\n var re = new RE2(userSuppliedRegex);\n // Execute the regular expression against some userInput\n var result = re.exec(userInput);\n // Work with the result\n}\n```\n\nFor more information on Regular Expression DoS see:\n- https://owasp.org/www-community/attacks/Regular_expression_Denial_of_Service_-_ReDoS\n", + "cve": "semgrep_id:eslint.detect-non-literal-regexp:15:15", + "severity": "Medium", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "app/static/main.js", + "start_line": 15 + }, + "identifiers": [ + { + "type": "cwe", + "name": "CWE-185", + "value": "185", + "url": "https://cwe.mitre.org/data/definitions/185.html" + }, + { + "type": "owasp", + "name": "A03:2021 - Injection", + "value": "A03:2021" + }, + { + "type": "owasp", + "name": "A1:2017 - Injection", + "value": "A1:2017" + }, + { + "type": "eslint_rule_id", + "name": "ESLint rule ID/detect-non-literal-regexp", + "value": "detect-non-literal-regexp" + }, + { + "type": "semgrep_id", + "name": "eslint.detect-non-literal-regexp", + "value": "eslint.detect-non-literal-regexp", + "url": "https://semgrep.dev/r/gitlab.eslint.detect-non-literal-regexp" + } + ] + }, + { + "id": "512131f12839cd51c58aaabf643870dc262bf169f0af15a47d0d073fcfd449ac", + "category": "sast", + "name": "Use of cryptographically weak pseudo-random number generator (PRNG)", + "description": "Depending on the context, generating weak random numbers may expose cryptographic functions,\nwhich rely on these numbers, to be exploitable. When generating numbers for sensitive values\nsuch as tokens, nonces, and cryptographic keys, it is recommended that the `secrets` module\nbe used instead.\n\nExample using the secrets module:\n```\nimport secrets\n\n# Generate a secure random 64 byte array\nrandom_bytes = secrets.token_bytes(64)\nprint(random_bytes)\n\n# Generate a secure random 64 byte array as a hex string\nrandom_bytes_hex = secrets.token_hex(64)\n\n# Generate a secure random 64 byte array base64 encoded for use in URLs\nrandom_string = secrets.token_urlsafe(64)\n```\n\nFor more information on the `secrets` module see:\n- https://docs.python.org/3/library/secrets.html\n", + "cve": "semgrep_id:bandit.B311:295:295", + "severity": "Low", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "app/app.py", + "start_line": 295 + }, + "identifiers": [ + { + "type": "cwe", + "name": "CWE-338", + "value": "338", + "url": "https://cwe.mitre.org/data/definitions/338.html" + }, + { + "type": "owasp", + "name": "A02:2021 - Cryptographic Failures", + "value": "A02:2021" + }, + { + "type": "owasp", + "name": "A3:2017 - Sensitive Data Exposure", + "value": "A3:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B311", + "value": "B311" + }, + { + "type": "semgrep_id", + "name": "bandit.B311", + "value": "bandit.B311", + "url": "https://semgrep.dev/r/gitlab.bandit.B311" + } + ] + }, + { + "id": "6cf069d55d47c54f5b2363af43f3c7a2d71ef25e04751111b6566fe89b90c8aa", + "category": "sast", + "name": "Use of cryptographically weak pseudo-random number generator (PRNG)", + "description": "Depending on the context, generating weak random numbers may expose cryptographic functions,\nwhich rely on these numbers, to be exploitable. When generating numbers for sensitive values\nsuch as tokens, nonces, and cryptographic keys, it is recommended that the `secrets` module\nbe used instead.\n\nExample using the secrets module:\n```\nimport secrets\n\n# Generate a secure random 64 byte array\nrandom_bytes = secrets.token_bytes(64)\nprint(random_bytes)\n\n# Generate a secure random 64 byte array as a hex string\nrandom_bytes_hex = secrets.token_hex(64)\n\n# Generate a secure random 64 byte array base64 encoded for use in URLs\nrandom_string = secrets.token_urlsafe(64)\n```\n\nFor more information on the `secrets` module see:\n- https://docs.python.org/3/library/secrets.html\n", + "cve": "semgrep_id:bandit.B311:319:319", + "severity": "Low", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "app/app.py", + "start_line": 319 + }, + "identifiers": [ + { + "type": "cwe", + "name": "CWE-338", + "value": "338", + "url": "https://cwe.mitre.org/data/definitions/338.html" + }, + { + "type": "owasp", + "name": "A02:2021 - Cryptographic Failures", + "value": "A02:2021" + }, + { + "type": "owasp", + "name": "A3:2017 - Sensitive Data Exposure", + "value": "A3:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B311", + "value": "B311" + }, + { + "type": "semgrep_id", + "name": "bandit.B311", + "value": "bandit.B311", + "url": "https://semgrep.dev/r/gitlab.bandit.B311" + } + ] + } + ], + "scan": { + "analyzer": { + "id": "semgrep", + "name": "Semgrep", + "url": "https://gitlab.com/gitlab-org/security-products/analyzers/semgrep", + "vendor": { + "name": "GitLab" + }, + "version": "6.6.2" + }, + "scanner": { + "id": "semgrep", + "name": "Semgrep", + "url": "https://github.com/returntocorp/semgrep", + "vendor": { + "name": "GitLab" + }, + "version": "1.118.0" + }, + "type": "sast", + "start_time": "2025-09-29T21:06:41", + "end_time": "2025-09-29T21:06:48", + "status": "success", + "observability": { + "events": [ + { + "event": "collect_sast_scan_metrics_from_pipeline", + "property": "5c418ec4-3b29-4631-bbbc-61e76f3f2396", + "label": "semgrep", + "value": 0, + "version": "6.6.2", + "exit_code": 0, + "override_count": 0, + "passthrough_count": 0, + "custom_exclude_path_count": 0, + "time_s": 6, + "file_count": 4 + } + ] + } + } +} diff --git a/ee/spec/fixtures/security_reports/master/gl-sast-report-semgrep-multiple-vulnerabilities.json b/ee/spec/fixtures/security_reports/master/gl-sast-report-semgrep-multiple-vulnerabilities.json new file mode 100644 index 00000000000000..fe4134e1491bda --- /dev/null +++ b/ee/spec/fixtures/security_reports/master/gl-sast-report-semgrep-multiple-vulnerabilities.json @@ -0,0 +1,768 @@ +{ + "version": "15.1.4", + "vulnerabilities": [ + { + "id": "6a8d97c532a32e7bb9e1d93b3300977b8a7e75f9ddcc5bac4edaa6cda3603833", + "category": "sast", + "name": "Deserialization of untrusted data", + "description": "The application was found using an unsafe version of `yaml` load which is vulnerable to\ndeserialization attacks. Deserialization attacks exploit the process of reading serialized\ndata and turning it back\ninto an object. By constructing malicious objects and serializing them, an adversary may\nattempt to:\n\n- Inject code that is executed upon object construction, which occurs during the\ndeserialization process.\n- Exploit mass assignment by including fields that are not normally a part of the serialized\ndata but are read in during deserialization.\n\nTo remediate this issue, use `safe_load()` or call `yaml.load()` with the `Loader` argument\nset to\n`yaml.SafeLoader`.\n\nExample loading YAML using `safe_load`:\n```\nimport yaml\n\n# Use safe_load to load data into an intermediary object\nintermediary_object = yaml.safe_load(\"\"\"user:\n name: 'test user'\"\"\"\n)\n# Create our real object, copying over only the necessary fields\nuser_object = {'user': {\n # Assign the deserialized data from intermediary object\n 'name': intermediary_object['user']['name'],\n # Add in protected data in object definition (or set it from a class constructor)\n 'is_admin': False,\n }\n}\n# Work with user_object\n# ...\n```\n\nFor more details on deserialization attacks in general, see OWASP's guide:\n- https://cheatsheetseries.owasp.org/cheatsheets/Deserialization_Cheat_Sheet.html\n", + "cve": "semgrep_id:bandit.B506:329:329", + "severity": "High", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "app/app.py", + "start_line": 329 + }, + "identifiers": [ + { + "type": "semgrep_id", + "name": "bandit.B506", + "value": "bandit.B506", + "url": "https://semgrep.dev/r/gitlab.bandit.B506" + }, + { + "type": "cwe", + "name": "CWE-502", + "value": "502", + "url": "https://cwe.mitre.org/data/definitions/502.html" + }, + { + "type": "owasp", + "name": "A08:2021 - Software and Data Integrity Failures", + "value": "A08:2021" + }, + { + "type": "owasp", + "name": "A8:2017 - Insecure Deserialization", + "value": "A8:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B506", + "value": "B506" + } + ] + }, + { + "id": "185f6aa5aece728c2b94f16ff36ea99339dbeb39a027964d65a0e544b439529d", + "category": "sast", + "name": "Improper neutralization of special elements used in an SQL Command ('SQL Injection')", + "description": "SQL Injection is a critical vulnerability that can lead to data or system compromise. By\ndynamically generating SQL query strings, user input may be able to influence the logic of\nthe SQL statement. This could lead to an adversary accessing information they should\nnot have access to, or in some circumstances, being able to execute OS functionality or code.\n\nReplace all dynamically generated SQL queries with parameterized queries. In situations where\ndynamic queries must be created, never use direct user input, but instead use a map or\ndictionary of valid values and resolve them using a user supplied key.\n\nFor example, some database drivers do not allow parameterized queries for `\u003e` or `\u003c` comparison\noperators. In these cases, do not use a user supplied `\u003e` or `\u003c` value, but rather have the\nuser\nsupply a `gt` or `lt` value. The alphabetical values are then used to look up the `\u003e` and `\u003c`\nvalues to be used in the construction of the dynamic query. The same goes for other queries\nwhere\ncolumn or table names are required but cannot be parameterized.\n\nExample using `PreparedStatement` queries:\n```\nimport sqlite3\n\n# Create a new database (in memory)\ncon = sqlite3.connect(\":memory:\")\n# Get a cursor from the connection\ncur = con.cursor()\n# Create a tuple of the value to be used in the parameterized query\nparams = ('user-input',)\n# execute the statement, passing in the params for the value\ncur.execute(\"select name from sqlite_master where name = ?\", params)\n# work with the result\nresult = cur.fetchall()\n```\n\nFor more information on SQL Injection see OWASP:\nhttps://cheatsheetseries.owasp.org/cheatsheets/SQL_Injection_Prevention_Cheat_Sheet.html\n", + "cve": "semgrep_id:bandit.B608:265:265", + "severity": "High", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "app/app.py", + "start_line": 265 + }, + "identifiers": [ + { + "type": "semgrep_id", + "name": "bandit.B608", + "value": "bandit.B608", + "url": "https://semgrep.dev/r/gitlab.bandit.B608" + }, + { + "type": "cwe", + "name": "CWE-89", + "value": "89", + "url": "https://cwe.mitre.org/data/definitions/89.html" + }, + { + "type": "owasp", + "name": "A03:2021 - Injection", + "value": "A03:2021" + }, + { + "type": "owasp", + "name": "A1:2017 - Injection", + "value": "A1:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B608", + "value": "B608" + } + ] + }, + { + "id": "afb3a18f344a72ed01c842afd1939b4c33b150ba50234001d8eb34ce72a977f4", + "category": "sast", + "name": "Improper neutralization of directives in dynamically evaluated code ('Eval Injection')", + "description": "The application was found calling the `eval` function OR Function()\n constructor OR setTimeout() OR setInterval() methods. If the\n\n variables or strings or functions passed to these methods contains user-supplied input, an adversary could attempt to execute arbitrary\n\n JavaScript\n\n code. This could lead to a full system compromise in Node applications or Cross-site Scripting\n\n (XSS) in web applications.\n\n\n To remediate this issue, remove all calls to above methods and consider alternative methods for\n\n executing\n\n the necessary business logic. There is almost no safe method of calling `eval` or other above stated sinks with\n\n user-supplied input.\n\n Instead, consider alternative methods such as using property accessors to dynamically access\n\n values.\n\n\n Example using property accessors to dynamically access an object's property:\n\n ```\n\n // Define an object\n\n const obj = {key1: 'value1', key2: 'value2'};\n\n // Get key dynamically from user input\n\n const key = getUserInput();\n\n // Check if the key exists in our object and return it, or a default empty string\n\n const value = (obj.hasOwnProperty(key)) ? obj[key] : '';\n\n // Work with the value\n\n ```\n\n\n For more information on why not to use `eval`, and alternatives see:\n\n - https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/eval#never_use_eval!\n\n Other References:\n\n - https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Function/Function\n\n - https://developer.mozilla.org/en-US/docs/Web/API/setTimeout\n\n - https://developer.mozilla.org/en-US/docs/Web/API/setInterval\n", + "cve": "semgrep_id:eslint.detect-eval-with-expression:10:10", + "severity": "High", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "app/static/main.js", + "start_line": 10 + }, + "identifiers": [ + { + "type": "semgrep_id", + "name": "eslint.detect-eval-with-expression", + "value": "eslint.detect-eval-with-expression", + "url": "https://semgrep.dev/r/gitlab.eslint.detect-eval-with-expression" + }, + { + "type": "cwe", + "name": "CWE-95", + "value": "95", + "url": "https://cwe.mitre.org/data/definitions/95.html" + }, + { + "type": "owasp", + "name": "A03:2021 - Injection", + "value": "A03:2021" + }, + { + "type": "owasp", + "name": "A1:2017 - Injection", + "value": "A1:2017" + }, + { + "type": "eslint_rule_id", + "name": "ESLint rule ID/detect-eval-with-expression", + "value": "detect-eval-with-expression" + } + ] + }, + { + "id": "4e7633d40f31f6398b4c7ffc4bf481ba6fe627c34042d7439b71259e6ea9b32c", + "category": "sast", + "name": "Improper neutralization of special elements used in an SQL Command ('SQL Injection')", + "description": "Detected user input used to manually construct a SQL string. This is usually\nbad practice because manual construction could accidentally result in a SQL\ninjection. An attacker could use a SQL injection to steal or modify contents\nof the database. Instead, use a parameterized query which is available\nby default in most database engines. Alternatively, consider using an\nobject-relational mapper (ORM) such as SQLAlchemy which will protect your queries.\n\nSQL Injections are a critical type of vulnerability that can lead to data \nor system compromise. By dynamically generating SQL query strings, user \ninput may be able to influence the logic of an SQL statement. \nThis could lead to an malicious parties accessing information they should not \nhave access to, or in some circumstances, being able to execute OS functionality\nor code.\n\nReplace all dynamically generated SQL queries with parameterized queries. \nIn situations where dynamic queries must be created, never use direct user input,\nbut instead use a map or dictionary of valid values and resolve them using a user \nsupplied key.\n\nFor example, some database drivers do not allow parameterized queries for \n`\u003e` or `\u003c` comparison operators. In these cases, do not use a user supplied \n`\u003e` or `\u003c` value, but rather have the user supply a `gt` or `lt` value. \nThe alphabetical values are then used to look up the `\u003e` and `\u003c` values to be used \nin the construction of the dynamic query. The same goes for other queries where \ncolumn or table names are required but cannot be parameterized.\nData that is possible user-controlled from a python request is passed\nto `execute()` function. To remediate this issue, use SQLAlchemy statements\nwhich are built with query parameterization and therefore not vulnerable \nto sql injection.\n\nIf for some reason this is not feasible, ensure calls including user-supplied \ndata pass it in to the `params` parameter of the `execute()` method.\nBelow is an example using `execute()`, passing in user-supplied data as `params`. \nThis will treat the query as a parameterized query and `params` as strictly data, \npreventing any possibility of SQL Injection.\n\n```\nname = request.args.get('name')\nreq = text('SELECT * FROM student WHERE firstname = :x')\nresult = db.session.execute(req, {\"x\":name})\n```\nFor more information on QuerySets see:\n- https://docs.djangoproject.com/en/4.2/ref/models/querysets/#queryset-api\nFor more information on SQL Injections see OWASP:\n- https://cheatsheetseries.owasp.org/cheatsheets/SQL_Injection_Prevention_Cheat_Sheet.html\n", + "cve": "semgrep_id:python_flask_rule-flask-tainted-sql-string:261:261", + "severity": "High", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "app/app.py", + "start_line": 261 + }, + "identifiers": [ + { + "type": "semgrep_id", + "name": "python_flask_rule-flask-tainted-sql-string", + "value": "python_flask_rule-flask-tainted-sql-string" + }, + { + "type": "cwe", + "name": "CWE-89", + "value": "89", + "url": "https://cwe.mitre.org/data/definitions/89.html" + }, + { + "type": "owasp", + "name": "A03:2021 - Injection", + "value": "A03:2021" + }, + { + "type": "owasp", + "name": "A1:2017 - Injection", + "value": "A1:2017" + } + ] + }, + { + "id": "819fa95af305ebbf12f83f5cd85ce6b9720a22a112869bb6ef76bec8fe449d62", + "category": "sast", + "name": "Allocation of resources without limits or throttling", + "description": "The application was found using the `requests` module without configuring a timeout value for\nconnections. This could lead to uncontrolled resource consumption where the application could\nrun out of\nsocket descriptors, effectively causing a Denial of Service (DoS).\n\nTo remediate this issue, pass in a `timeout=` argument to each `requests` call.\n\nExample using a timeout for an HTTP GET request:\n```\n# Issue a GET request to https://example.com with a timeout of 10 seconds\nresponse = requests.get('https://example.com', timeout=10)\n# Work with the response object\n# ...\n```\n\nFor more information on using the requests module see:\n- https://requests.readthedocs.io/en/latest/api/\n", + "cve": "semgrep_id:bandit.B113:17:18", + "severity": "Medium", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "tests/e2e_zap.py", + "start_line": 17, + "end_line": 18 + }, + "identifiers": [ + { + "type": "semgrep_id", + "name": "bandit.B113", + "value": "bandit.B113", + "url": "https://semgrep.dev/r/gitlab.bandit.B113" + }, + { + "type": "cwe", + "name": "CWE-770", + "value": "770", + "url": "https://cwe.mitre.org/data/definitions/770.html" + }, + { + "type": "owasp", + "name": "A05:2021 - Security Misconfiguration", + "value": "A05:2021" + }, + { + "type": "owasp", + "name": "A6:2017 - Security Misconfiguration", + "value": "A6:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B113", + "value": "B113" + } + ] + }, + { + "id": "10ea0fe99f1cb7743ecc12fd2a83cb76853523f53e8f24f688daddd2d5687e32", + "category": "sast", + "name": "Allocation of resources without limits or throttling", + "description": "The application was found using the `requests` module without configuring a timeout value for\nconnections. This could lead to uncontrolled resource consumption where the application could\nrun out of\nsocket descriptors, effectively causing a Denial of Service (DoS).\n\nTo remediate this issue, pass in a `timeout=` argument to each `requests` call.\n\nExample using a timeout for an HTTP GET request:\n```\n# Issue a GET request to https://example.com with a timeout of 10 seconds\nresponse = requests.get('https://example.com', timeout=10)\n# Work with the response object\n# ...\n```\n\nFor more information on using the requests module see:\n- https://requests.readthedocs.io/en/latest/api/\n", + "cve": "semgrep_id:bandit.B113:28:29", + "severity": "Medium", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "tests/e2e_zap.py", + "start_line": 28, + "end_line": 29 + }, + "identifiers": [ + { + "type": "semgrep_id", + "name": "bandit.B113", + "value": "bandit.B113", + "url": "https://semgrep.dev/r/gitlab.bandit.B113" + }, + { + "type": "cwe", + "name": "CWE-770", + "value": "770", + "url": "https://cwe.mitre.org/data/definitions/770.html" + }, + { + "type": "owasp", + "name": "A05:2021 - Security Misconfiguration", + "value": "A05:2021" + }, + { + "type": "owasp", + "name": "A6:2017 - Security Misconfiguration", + "value": "A6:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B113", + "value": "B113" + } + ] + }, + { + "id": "61c61d9440d5a9c4b76ca89d7a6146b50dfdce4d5ec3e93d42fe255c67bf4684", + "category": "sast", + "name": "Allocation of resources without limits or throttling", + "description": "The application was found using the `requests` module without configuring a timeout value for\nconnections. This could lead to uncontrolled resource consumption where the application could\nrun out of\nsocket descriptors, effectively causing a Denial of Service (DoS).\n\nTo remediate this issue, pass in a `timeout=` argument to each `requests` call.\n\nExample using a timeout for an HTTP GET request:\n```\n# Issue a GET request to https://example.com with a timeout of 10 seconds\nresponse = requests.get('https://example.com', timeout=10)\n# Work with the response object\n# ...\n```\n\nFor more information on using the requests module see:\n- https://requests.readthedocs.io/en/latest/api/\n", + "cve": "semgrep_id:bandit.B113:36:37", + "severity": "Medium", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "tests/e2e_zap.py", + "start_line": 36, + "end_line": 37 + }, + "identifiers": [ + { + "type": "semgrep_id", + "name": "bandit.B113", + "value": "bandit.B113", + "url": "https://semgrep.dev/r/gitlab.bandit.B113" + }, + { + "type": "cwe", + "name": "CWE-770", + "value": "770", + "url": "https://cwe.mitre.org/data/definitions/770.html" + }, + { + "type": "owasp", + "name": "A05:2021 - Security Misconfiguration", + "value": "A05:2021" + }, + { + "type": "owasp", + "name": "A6:2017 - Security Misconfiguration", + "value": "A6:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B113", + "value": "B113" + } + ] + }, + { + "id": "7fef73eeb450ba731ada304710b14f4ca65790c4d571ebad2af3ee5191e5b42f", + "category": "sast", + "name": "Allocation of resources without limits or throttling", + "description": "The application was found using the `requests` module without configuring a timeout value for\nconnections. This could lead to uncontrolled resource consumption where the application could\nrun out of\nsocket descriptors, effectively causing a Denial of Service (DoS).\n\nTo remediate this issue, pass in a `timeout=` argument to each `requests` call.\n\nExample using a timeout for an HTTP GET request:\n```\n# Issue a GET request to https://example.com with a timeout of 10 seconds\nresponse = requests.get('https://example.com', timeout=10)\n# Work with the response object\n# ...\n```\n\nFor more information on using the requests module see:\n- https://requests.readthedocs.io/en/latest/api/\n", + "cve": "semgrep_id:bandit.B113:44:45", + "severity": "Medium", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "tests/e2e_zap.py", + "start_line": 44, + "end_line": 45 + }, + "identifiers": [ + { + "type": "semgrep_id", + "name": "bandit.B113", + "value": "bandit.B113", + "url": "https://semgrep.dev/r/gitlab.bandit.B113" + }, + { + "type": "cwe", + "name": "CWE-770", + "value": "770", + "url": "https://cwe.mitre.org/data/definitions/770.html" + }, + { + "type": "owasp", + "name": "A05:2021 - Security Misconfiguration", + "value": "A05:2021" + }, + { + "type": "owasp", + "name": "A6:2017 - Security Misconfiguration", + "value": "A6:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B113", + "value": "B113" + } + ] + }, + { + "id": "e81f87450a35ed038550bfe4f56dcff5bebd9c5ca5f309b6144de063cb99e1b2", + "category": "sast", + "name": "Use of a broken or risky cryptographic algorithm", + "description": "The application was found using an insecure or risky digest or signature algorithm. MD2, MD4,\n MD5 and SHA1 hash algorithms have been found to be vulnerable to producing collisions.\n\nThis means\nthat two different values, when hashed, can lead to the same hash value. If the application is\ntrying\nto use these hash methods for storing passwords, then it is recommended to switch to a\npassword hashing\nalgorithm such as Argon2id or PBKDF2.\n\nNote that the `Crypto` and `Cryptodome` Python packages are no longer recommended for\nnew applications, instead consider using the [cryptography](https://cryptography.io/) package.\n\nExample of creating a SHA-384 hash using the `cryptography` package:\n```\nfrom cryptography.hazmat.primitives import hashes\n# Create a SHA384 digest\ndigest = hashes.Hash(hashes.SHA384())\n# Update the digest with some initial data\ndigest.update(b\"some data to hash\")\n# Add more data to the digest\ndigest.update(b\"some more data\")\n# Finalize the digest as bytes\nresult = digest.finalize()\n```\n\nFor more information on secure password storage see OWASP:\n- https://cheatsheetseries.owasp.org/cheatsheets/Password_Storage_Cheat_Sheet.html\n\nFor more information on the cryptography module see:\n- https://cryptography.io/en/latest/\n", + "cve": "semgrep_id:bandit.B303-1:141:141", + "severity": "Medium", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "app/app.py", + "start_line": 141 + }, + "identifiers": [ + { + "type": "semgrep_id", + "name": "bandit.B303-1", + "value": "bandit.B303-1", + "url": "https://semgrep.dev/r/gitlab.bandit.B303-1" + }, + { + "type": "cwe", + "name": "CWE-327", + "value": "327", + "url": "https://cwe.mitre.org/data/definitions/327.html" + }, + { + "type": "owasp", + "name": "A02:2021 - Cryptographic Failures", + "value": "A02:2021" + }, + { + "type": "owasp", + "name": "A3:2017 - Sensitive Data Exposure", + "value": "A3:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B303", + "value": "B303" + } + ] + }, + { + "id": "3f8a15b8ea5a1e062262c837c4b5c763320c40622f50183f04fa2e584fc05e13", + "category": "sast", + "name": "Improper certificate validation", + "description": "The application was found using the `requests` module without configuring a timeout value for\nconnections. The `verify=False` argument has been set, which effectively disables the\nvalidation\nof server certificates.\n\nThis allows for an adversary who is in between the application and the target host to intercept\npotentially sensitive information or transmit malicious data.\n\nTo remediate this issue either remove the `verify=False` argument, or set `verify=True`to each\n`requests` call.\n\nExample verifying server certificates for an HTTP GET request:\n```\n# Issue a GET request to https://example.com with a timeout of 10 seconds and verify the\n# server certificate explicitly.\nresponse = requests.get('https://example.com', timeout=10, verify=True)\n# Work with the response object\n# ...\n```\n\nFor more information on using the requests module see:\n- https://requests.readthedocs.io/en/latest/api/\n", + "cve": "semgrep_id:bandit.B501:17:18", + "severity": "Medium", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "tests/e2e_zap.py", + "start_line": 17, + "end_line": 18 + }, + "identifiers": [ + { + "type": "semgrep_id", + "name": "bandit.B501", + "value": "bandit.B501", + "url": "https://semgrep.dev/r/gitlab.bandit.B501" + }, + { + "type": "cwe", + "name": "CWE-295", + "value": "295", + "url": "https://cwe.mitre.org/data/definitions/295.html" + }, + { + "type": "owasp", + "name": "A07:2021 - Identification and Authentication Failures", + "value": "A07:2021" + }, + { + "type": "owasp", + "name": "A2:2017 - Broken Authentication", + "value": "A2:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B501", + "value": "B501" + } + ] + }, + { + "id": "8b6a98da4410a8abe0a3338ec5db34f4a9a48d0716ba296dcda0e93b63a5766f", + "category": "sast", + "name": "Improper certificate validation", + "description": "The application was found using the `requests` module without configuring a timeout value for\nconnections. The `verify=False` argument has been set, which effectively disables the\nvalidation\nof server certificates.\n\nThis allows for an adversary who is in between the application and the target host to intercept\npotentially sensitive information or transmit malicious data.\n\nTo remediate this issue either remove the `verify=False` argument, or set `verify=True`to each\n`requests` call.\n\nExample verifying server certificates for an HTTP GET request:\n```\n# Issue a GET request to https://example.com with a timeout of 10 seconds and verify the\n# server certificate explicitly.\nresponse = requests.get('https://example.com', timeout=10, verify=True)\n# Work with the response object\n# ...\n```\n\nFor more information on using the requests module see:\n- https://requests.readthedocs.io/en/latest/api/\n", + "cve": "semgrep_id:bandit.B501:28:29", + "severity": "Medium", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "tests/e2e_zap.py", + "start_line": 28, + "end_line": 29 + }, + "identifiers": [ + { + "type": "semgrep_id", + "name": "bandit.B501", + "value": "bandit.B501", + "url": "https://semgrep.dev/r/gitlab.bandit.B501" + }, + { + "type": "cwe", + "name": "CWE-295", + "value": "295", + "url": "https://cwe.mitre.org/data/definitions/295.html" + }, + { + "type": "owasp", + "name": "A07:2021 - Identification and Authentication Failures", + "value": "A07:2021" + }, + { + "type": "owasp", + "name": "A2:2017 - Broken Authentication", + "value": "A2:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B501", + "value": "B501" + } + ] + }, + { + "id": "3b65f8017d6b3a73a5f6e7d1c0e9e78aa0daf817f06234985a9d011da1a9d804", + "category": "sast", + "name": "Improper certificate validation", + "description": "The application was found using the `requests` module without configuring a timeout value for\nconnections. The `verify=False` argument has been set, which effectively disables the\nvalidation\nof server certificates.\n\nThis allows for an adversary who is in between the application and the target host to intercept\npotentially sensitive information or transmit malicious data.\n\nTo remediate this issue either remove the `verify=False` argument, or set `verify=True`to each\n`requests` call.\n\nExample verifying server certificates for an HTTP GET request:\n```\n# Issue a GET request to https://example.com with a timeout of 10 seconds and verify the\n# server certificate explicitly.\nresponse = requests.get('https://example.com', timeout=10, verify=True)\n# Work with the response object\n# ...\n```\n\nFor more information on using the requests module see:\n- https://requests.readthedocs.io/en/latest/api/\n", + "cve": "semgrep_id:bandit.B501:36:37", + "severity": "Medium", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "tests/e2e_zap.py", + "start_line": 36, + "end_line": 37 + }, + "identifiers": [ + { + "type": "semgrep_id", + "name": "bandit.B501", + "value": "bandit.B501", + "url": "https://semgrep.dev/r/gitlab.bandit.B501" + }, + { + "type": "cwe", + "name": "CWE-295", + "value": "295", + "url": "https://cwe.mitre.org/data/definitions/295.html" + }, + { + "type": "owasp", + "name": "A07:2021 - Identification and Authentication Failures", + "value": "A07:2021" + }, + { + "type": "owasp", + "name": "A2:2017 - Broken Authentication", + "value": "A2:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B501", + "value": "B501" + } + ] + }, + { + "id": "878843d5b4edf0042e3066429a4cac5f66f8c7ad72b40056601fbb191fa13214", + "category": "sast", + "name": "Improper certificate validation", + "description": "The application was found using the `requests` module without configuring a timeout value for\nconnections. The `verify=False` argument has been set, which effectively disables the\nvalidation\nof server certificates.\n\nThis allows for an adversary who is in between the application and the target host to intercept\npotentially sensitive information or transmit malicious data.\n\nTo remediate this issue either remove the `verify=False` argument, or set `verify=True`to each\n`requests` call.\n\nExample verifying server certificates for an HTTP GET request:\n```\n# Issue a GET request to https://example.com with a timeout of 10 seconds and verify the\n# server certificate explicitly.\nresponse = requests.get('https://example.com', timeout=10, verify=True)\n# Work with the response object\n# ...\n```\n\nFor more information on using the requests module see:\n- https://requests.readthedocs.io/en/latest/api/\n", + "cve": "semgrep_id:bandit.B501:44:45", + "severity": "Medium", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "tests/e2e_zap.py", + "start_line": 44, + "end_line": 45 + }, + "identifiers": [ + { + "type": "semgrep_id", + "name": "bandit.B501", + "value": "bandit.B501", + "url": "https://semgrep.dev/r/gitlab.bandit.B501" + }, + { + "type": "cwe", + "name": "CWE-295", + "value": "295", + "url": "https://cwe.mitre.org/data/definitions/295.html" + }, + { + "type": "owasp", + "name": "A07:2021 - Identification and Authentication Failures", + "value": "A07:2021" + }, + { + "type": "owasp", + "name": "A2:2017 - Broken Authentication", + "value": "A2:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B501", + "value": "B501" + } + ] + }, + { + "id": "6cac58319f88ad3a1cb16df9c1272049ea0f909fa5fc3f67508148fda3ce5e2c", + "category": "sast", + "name": "Regular expression with non-literal value", + "description": "The `RegExp` constructor was called with a non-literal value. If an adversary were able to\nsupply a malicious regex, they could cause a Regular Expression Denial of Service (ReDoS)\nagainst the application. In Node applications, this could cause the entire application to no\nlonger be responsive to other users' requests.\n\nTo remediate this issue, never allow user-supplied regular expressions. Instead, the regular \nexpression should be hardcoded. If this is not possible, consider using an alternative regular\nexpression engine such as [node-re2](https://www.npmjs.com/package/re2). RE2 is a safe alternative \nthat does not support backtracking, which is what leads to ReDoS.\n\nExample using re2 which does not support backtracking (Note: it is still recommended to\nnever use user-supplied input):\n```\n// Import the re2 module\nconst RE2 = require('re2');\n\nfunction match(userSuppliedRegex, userInput) {\n // Create a RE2 object with the user supplied regex, this is relatively safe\n // due to RE2 not supporting backtracking which can be abused to cause long running\n // queries\n var re = new RE2(userSuppliedRegex);\n // Execute the regular expression against some userInput\n var result = re.exec(userInput);\n // Work with the result\n}\n```\n\nFor more information on Regular Expression DoS see:\n- https://owasp.org/www-community/attacks/Regular_expression_Denial_of_Service_-_ReDoS\n", + "cve": "semgrep_id:eslint.detect-non-literal-regexp:15:15", + "severity": "Medium", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "app/static/main.js", + "start_line": 15 + }, + "identifiers": [ + { + "type": "semgrep_id", + "name": "eslint.detect-non-literal-regexp", + "value": "eslint.detect-non-literal-regexp", + "url": "https://semgrep.dev/r/gitlab.eslint.detect-non-literal-regexp" + }, + { + "type": "cwe", + "name": "CWE-185", + "value": "185", + "url": "https://cwe.mitre.org/data/definitions/185.html" + }, + { + "type": "owasp", + "name": "A03:2021 - Injection", + "value": "A03:2021" + }, + { + "type": "owasp", + "name": "A1:2017 - Injection", + "value": "A1:2017" + }, + { + "type": "eslint_rule_id", + "name": "ESLint rule ID/detect-non-literal-regexp", + "value": "detect-non-literal-regexp" + } + ] + }, + { + "id": "512131f12839cd51c58aaabf643870dc262bf169f0af15a47d0d073fcfd449ac", + "category": "sast", + "name": "Use of cryptographically weak pseudo-random number generator (PRNG)", + "description": "Depending on the context, generating weak random numbers may expose cryptographic functions,\nwhich rely on these numbers, to be exploitable. When generating numbers for sensitive values\nsuch as tokens, nonces, and cryptographic keys, it is recommended that the `secrets` module\nbe used instead.\n\nExample using the secrets module:\n```\nimport secrets\n\n# Generate a secure random 64 byte array\nrandom_bytes = secrets.token_bytes(64)\nprint(random_bytes)\n\n# Generate a secure random 64 byte array as a hex string\nrandom_bytes_hex = secrets.token_hex(64)\n\n# Generate a secure random 64 byte array base64 encoded for use in URLs\nrandom_string = secrets.token_urlsafe(64)\n```\n\nFor more information on the `secrets` module see:\n- https://docs.python.org/3/library/secrets.html\n", + "cve": "semgrep_id:bandit.B311:295:295", + "severity": "Low", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "app/app.py", + "start_line": 295 + }, + "identifiers": [ + { + "type": "semgrep_id", + "name": "bandit.B311", + "value": "bandit.B311", + "url": "https://semgrep.dev/r/gitlab.bandit.B311" + }, + { + "type": "cwe", + "name": "CWE-338", + "value": "338", + "url": "https://cwe.mitre.org/data/definitions/338.html" + }, + { + "type": "owasp", + "name": "A02:2021 - Cryptographic Failures", + "value": "A02:2021" + }, + { + "type": "owasp", + "name": "A3:2017 - Sensitive Data Exposure", + "value": "A3:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B311", + "value": "B311" + } + ] + }, + { + "id": "6cf069d55d47c54f5b2363af43f3c7a2d71ef25e04751111b6566fe89b90c8aa", + "category": "sast", + "name": "Use of cryptographically weak pseudo-random number generator (PRNG)", + "description": "Depending on the context, generating weak random numbers may expose cryptographic functions,\nwhich rely on these numbers, to be exploitable. When generating numbers for sensitive values\nsuch as tokens, nonces, and cryptographic keys, it is recommended that the `secrets` module\nbe used instead.\n\nExample using the secrets module:\n```\nimport secrets\n\n# Generate a secure random 64 byte array\nrandom_bytes = secrets.token_bytes(64)\nprint(random_bytes)\n\n# Generate a secure random 64 byte array as a hex string\nrandom_bytes_hex = secrets.token_hex(64)\n\n# Generate a secure random 64 byte array base64 encoded for use in URLs\nrandom_string = secrets.token_urlsafe(64)\n```\n\nFor more information on the `secrets` module see:\n- https://docs.python.org/3/library/secrets.html\n", + "cve": "semgrep_id:bandit.B311:319:319", + "severity": "Low", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "app/app.py", + "start_line": 319 + }, + "identifiers": [ + { + "type": "semgrep_id", + "name": "bandit.B311", + "value": "bandit.B311", + "url": "https://semgrep.dev/r/gitlab.bandit.B311" + }, + { + "type": "cwe", + "name": "CWE-338", + "value": "338", + "url": "https://cwe.mitre.org/data/definitions/338.html" + }, + { + "type": "owasp", + "name": "A02:2021 - Cryptographic Failures", + "value": "A02:2021" + }, + { + "type": "owasp", + "name": "A3:2017 - Sensitive Data Exposure", + "value": "A3:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B311", + "value": "B311" + } + ] + } + ], + "scan": { + "analyzer": { + "id": "semgrep", + "name": "Semgrep", + "url": "https://gitlab.com/gitlab-org/security-products/analyzers/semgrep", + "vendor": { + "name": "GitLab" + }, + "version": "6.6.2" + }, + "scanner": { + "id": "semgrep", + "name": "Semgrep", + "url": "https://github.com/returntocorp/semgrep", + "vendor": { + "name": "GitLab" + }, + "version": "1.118.0" + }, + "type": "sast", + "start_time": "2025-09-29T21:06:41", + "end_time": "2025-09-29T21:06:48", + "status": "success", + "observability": { + "events": [ + { + "event": "collect_sast_scan_metrics_from_pipeline", + "property": "5c418ec4-3b29-4631-bbbc-61e76f3f2396", + "label": "semgrep", + "value": 0, + "version": "6.6.2", + "exit_code": 0, + "override_count": 0, + "passthrough_count": 0, + "custom_exclude_path_count": 0, + "time_s": 6, + "file_count": 4 + } + ] + } + } +} diff --git a/ee/spec/services/security/ingestion/custom_spec.rb b/ee/spec/services/security/ingestion/custom_spec.rb new file mode 100644 index 00000000000000..2b6c80220ea96e --- /dev/null +++ b/ee/spec/services/security/ingestion/custom_spec.rb @@ -0,0 +1,66 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe 'my custom spec', feature_category: :vulnerability_management do + let(:project) { create(:project) } + let(:user) { create(:user) } + let(:pipeline1) { create(:ci_pipeline, user: user, project: project) } + let(:sast_build) { create(:ee_ci_build, :success, pipeline: pipeline1, project: project) } + let!(:sast_semgrep_artifact_with_correct_primary_identifiers1) { create(:ee_ci_job_artifact, :sast_semgrep_multiple_vulnerabilities, job: sast_build) } + + let(:pipeline2) { create(:ci_pipeline, user: user, project: project) } + let(:sast_build2) { create(:ee_ci_build, :success, pipeline: pipeline2) } + let!(:sast_semgrep_artifact_with_incorrect_primary_identifiers) { create(:ee_ci_job_artifact, :sast_semgrep_multiple_vulnerabilities_incorrect_primary_identifier, job: sast_build2) } + + let(:pipeline3) { create(:ci_pipeline, user: user, project: project) } + let(:sast_build3) { create(:ee_ci_build, :success, pipeline: pipeline3) } + let!(:sast_semgrep_artifact_with_correct_primary_identifiers2) { create(:ee_ci_job_artifact, :sast_semgrep_multiple_vulnerabilities, job: sast_build3) } + + let(:known_keys) { Set.new } + + before do + stub_licensed_features(sast: true, security_dashboard: true) + project.add_maintainer(user) + end + + it 'restores vulnerabilities' do + Security::StoreScansService.execute(pipeline1) + # Security::StoreScanService.execute(sast_semgrep_artifact, known_keys, false) + Security::Ingestion::IngestReportsService.execute(pipeline1) + + puts "XXXXXXXXXXXXXXXX", (%|VULNERABILITY.COUNT: #{Vulnerability.count.inspect}|), "XXXXXXXXXXXXXXXX" + + Vulnerability.where(project_id: project.id).each { |v| puts "Resolving vuln #{v.id}"; ::Vulnerabilities::ResolveService.new(user, v, "first resolution").execute } + Vulnerability.where(project_id: project.id, severity: 'high').each { |v| puts "Confirming vuln #{v.id}"; Vulnerabilities::ConfirmService.new(user, v, "confirming").execute } + Vulnerability.where(project_id: project.id, severity: 'low').each { |v| puts "Dismissing vuln #{v.id}"; Vulnerabilities::DismissService.new(user, v, "dismissing", 'acceptable_risk').execute } + Vulnerability.where(project_id: project.id, severity: 'medium')[0..4].each {|v| puts "Resolving again vuln #{v.id}"; ::Vulnerabilities::ResolveService.new(user, v, "last resolution").execute } + + # binding.pry + + Security::StoreScansService.execute(pipeline2) + Security::Ingestion::IngestReportsService.execute(pipeline2) + + # binding.pry + + puts "XXXXXXXXXXXXXXXX", (%|VULNERABILITY.COUNT: #{Vulnerability.count.inspect}|), "XXXXXXXXXXXXXXXX" + + Security::StoreScansService.execute(pipeline3) + Security::Ingestion::IngestReportsService.execute(pipeline3) + + binding.pry + puts "XXXXXXXXXXXXXXXX", (%|VULNERABILITY.COUNT: #{Vulnerability.count.inspect}|), "XXXXXXXXXXXXXXXX" + + high_severity_states = Vulnerability.where(project_id: project.id, severity: 'high', resolved_on_default_branch: false).map { |v| v.finding.state } + expect(high_severity_states).to eq(["confirmed", "confirmed", "confirmed", "confirmed"]) + + + # puts "XXXXXXXXXXXXXXXX", (%|VULNERABILITY.COUNT: #{Vulnerability.count.inspect}|), "XXXXXXXXXXXXXXXX" + # puts "XXXXXXXXXXXXXXXX", (%|VULNERABILITIES::FINDING.COUNT: #{Vulnerabilities::Finding.count.inspect}|), "XXXXXXXXXXXXXXXX" + # puts "XXXXXXXXXXXXXXXX", (%|SECURITY::FINDING.COUNT: #{Security::Finding.count.inspect}|), "XXXXXXXXXXXXXXXX" + # puts "XXXXXXXXXXXXXXXX", (%|VULNERABILITIES::IDENTIFIER.COUNT: #{Vulnerabilities::Identifier.count.inspect}|), "XXXXXXXXXXXXXXXX" + # puts "XXXXXXXXXXXXXXXX", (%|VULNERABILITIES::SCANNER.COUNT: #{Vulnerabilities::Scanner.count.inspect}|), "XXXXXXXXXXXXXXXX" + # puts "XXXXXXXXXXXXXXXX", (%|SECURITY::FINDING.COUNT: #{Security::Finding.count.inspect}|), "XXXXXXXXXXXXXXXX" + # puts "XXXXXXXXXXXXXXXX", (%|VULNERABILITIES::FINDINGIDENTIFIER.COUNT: #{Vulnerabilities::FindingIdentifier.count.inspect}|), "XXXXXXXXXXXXXXXX" + end +end diff --git a/spec/lib/gitlab/background_migration/restore_incorrect_vulnerability_states_spec.rb b/spec/lib/gitlab/background_migration/restore_incorrect_vulnerability_states_spec.rb index bc399394de2894..f19a109bd3ba74 100644 --- a/spec/lib/gitlab/background_migration/restore_incorrect_vulnerability_states_spec.rb +++ b/spec/lib/gitlab/background_migration/restore_incorrect_vulnerability_states_spec.rb @@ -5,14 +5,33 @@ RSpec.describe Gitlab::BackgroundMigration::RestoreIncorrectVulnerabilityStates, feature_category: :static_application_security_testing do let(:namespaces) { table(:namespaces) } let(:projects) { table(:projects) } + let(:vulnerabilities) { table(:vulnerabilities) } let(:project) { projects.create!(namespace_id: namespace.id, project_namespace_id: namespace.id) } let(:known_keys) { Set.new } - let!(:artifact) { create(:ee_ci_job_artifact, :sast) } + # let!(:sast_semgrep_artifact) { create(:ee_ci_job_artifact, :sast_semgrep_multiple_vulnerabilities) } + # let!(:sast_semgrep_artifact_with_incorrect_primary_identifiers) { create(:ee_ci_job_artifact, :sast_semgrep_multiple_vulnerabilities_incorrect_primary_identifier) } + + let(:user) { create(:user) } + let(:pipeline) { create(:ci_pipeline, user: user) } + let(:sast_build) { create(:ee_ci_build, :success, pipeline: pipeline) } + let(:sast_semgrep_artifact) { create(:ee_ci_job_artifact, :sast_semgrep_multiple_vulnerabilities, job: sast_build) } + + before do + stub_licensed_features(sast: true) + end it 'restores vulnerabilities' do - Security::StoreScanService.execute(artifact, known_keys, false) + # ActiveRecord::Base.logger = Logger.new(STDOUT) + Security::StoreScanService.execute(sast_semgrep_artifact, known_keys, false) + puts "XXXXXXXXXXXXXXXX", (%|VULNERABILITY.COUNT: #{Vulnerability.count.inspect}|), "XXXXXXXXXXXXXXXX" + puts "XXXXXXXXXXXXXXXX", (%|VULNERABILITIES::FINDING.COUNT: #{Vulnerabilities::Finding.count.inspect}|), "XXXXXXXXXXXXXXXX" + puts "XXXXXXXXXXXXXXXX", (%|VULNERABILITIES::IDENTIFIER.COUNT: #{Vulnerabilities::Identifier.count.inspect}|), "XXXXXXXXXXXXXXXX" + puts "XXXXXXXXXXXXXXXX", (%|VULNERABILITIES::SCANNER.COUNT: #{Vulnerabilities::Scanner.count.inspect}|), "XXXXXXXXXXXXXXXX" + puts "XXXXXXXXXXXXXXXX", (%|SECURITY::FINDING.COUNT: #{Security::Finding.count.inspect}|), "XXXXXXXXXXXXXXXX" + puts "XXXXXXXXXXXXXXXX", (%|VULNERABILITIES::FINDINGIDENTIFIER.COUNT: #{Vulnerabilities::FindingIdentifier.count.inspect}|), "XXXXXXXXXXXXXXXX" + # puts "XXXXXXXXXXXXXXXX", (%|VULNERABILITIES.LENGTH: #{vulnerabilities.all.length.inspect}|), "XXXXXXXXXXXXXXXX" # create_vulnerability(project_id: project.id, external_id: "semgrep_id") end end -- GitLab From 8ec7edcb0ec4063916a99b8f04902640a6d0767a Mon Sep 17 00:00:00 2001 From: Adam Cohen Date: Mon, 27 Oct 2025 17:09:03 -0400 Subject: [PATCH 04/16] Tests pass now --- .../security/ingestion/custom_spec.rb | 81 ++++++++++++++--- .../restore_incorrect_vulnerability_states.rb | 91 +++++++++++++++++++ ...ore_incorrect_vulnerability_states_spec.rb | 81 +++++++++++++---- 3 files changed, 225 insertions(+), 28 deletions(-) diff --git a/ee/spec/services/security/ingestion/custom_spec.rb b/ee/spec/services/security/ingestion/custom_spec.rb index 2b6c80220ea96e..430eca4703c890 100644 --- a/ee/spec/services/security/ingestion/custom_spec.rb +++ b/ee/spec/services/security/ingestion/custom_spec.rb @@ -29,31 +29,27 @@ # Security::StoreScanService.execute(sast_semgrep_artifact, known_keys, false) Security::Ingestion::IngestReportsService.execute(pipeline1) - puts "XXXXXXXXXXXXXXXX", (%|VULNERABILITY.COUNT: #{Vulnerability.count.inspect}|), "XXXXXXXXXXXXXXXX" - Vulnerability.where(project_id: project.id).each { |v| puts "Resolving vuln #{v.id}"; ::Vulnerabilities::ResolveService.new(user, v, "first resolution").execute } Vulnerability.where(project_id: project.id, severity: 'high').each { |v| puts "Confirming vuln #{v.id}"; Vulnerabilities::ConfirmService.new(user, v, "confirming").execute } Vulnerability.where(project_id: project.id, severity: 'low').each { |v| puts "Dismissing vuln #{v.id}"; Vulnerabilities::DismissService.new(user, v, "dismissing", 'acceptable_risk').execute } Vulnerability.where(project_id: project.id, severity: 'medium')[0..4].each {|v| puts "Resolving again vuln #{v.id}"; ::Vulnerabilities::ResolveService.new(user, v, "last resolution").execute } - # binding.pry - Security::StoreScansService.execute(pipeline2) Security::Ingestion::IngestReportsService.execute(pipeline2) - # binding.pry - - puts "XXXXXXXXXXXXXXXX", (%|VULNERABILITY.COUNT: #{Vulnerability.count.inspect}|), "XXXXXXXXXXXXXXXX" - Security::StoreScansService.execute(pipeline3) Security::Ingestion::IngestReportsService.execute(pipeline3) - binding.pry - puts "XXXXXXXXXXXXXXXX", (%|VULNERABILITY.COUNT: #{Vulnerability.count.inspect}|), "XXXXXXXXXXXXXXXX" + reset_vulnerability_states(project.id) + + expect(Vulnerability.where(project_id: project.id, severity: 'high', resolved_on_default_branch: false).map { |v| v.finding.state }) + .to eq(["confirmed"] * 4) - high_severity_states = Vulnerability.where(project_id: project.id, severity: 'high', resolved_on_default_branch: false).map { |v| v.finding.state } - expect(high_severity_states).to eq(["confirmed", "confirmed", "confirmed", "confirmed"]) + expect(Vulnerability.where(project_id: project.id, severity: 'low', resolved_on_default_branch: false).map { |v| v.finding.state }) + .to eq(["dismissed"] * 2) + expect(Vulnerability.where(project_id: project.id, severity: 'medium', resolved_on_default_branch: false).map { |v| v.finding.state }) + .to eq(["resolved"] * 10) # puts "XXXXXXXXXXXXXXXX", (%|VULNERABILITY.COUNT: #{Vulnerability.count.inspect}|), "XXXXXXXXXXXXXXXX" # puts "XXXXXXXXXXXXXXXX", (%|VULNERABILITIES::FINDING.COUNT: #{Vulnerabilities::Finding.count.inspect}|), "XXXXXXXXXXXXXXXX" @@ -64,3 +60,64 @@ # puts "XXXXXXXXXXXXXXXX", (%|VULNERABILITIES::FINDINGIDENTIFIER.COUNT: #{Vulnerabilities::FindingIdentifier.count.inspect}|), "XXXXXXXXXXXXXXXX" end end + +def reset_vulnerability_states(project_id) + vulnerabilities = Vulnerability.joins(findings: :primary_identifier) + .where(project_id: project_id) + .where.not(vulnerability_identifiers: { external_type: 'semgrep_id' }) + + vulnerabilities.each do |vulnerability| + latest_transition = vulnerability.state_transitions + .where.not(author_id: nil) + .order(created_at: :desc) + .first + next unless latest_transition + + matching_finding = find_matching_finding(vulnerability.finding) + next unless matching_finding + + apply_state_transition(matching_finding.vulnerability, latest_transition) + end +end + +private + +def find_matching_finding(original_finding) + # Try finding by corrected metadata + corrected_metadata = reorder_identifiers(original_finding.raw_metadata) + finding = Vulnerabilities::Finding.find_by(raw_metadata: corrected_metadata) + return finding if finding + + # Fallback to attribute matching + puts "Unable to find vulnerability finding using raw metadata, attempting backup strategy" + Vulnerabilities::Finding.where( + severity: original_finding.severity, + report_type: original_finding.report_type, + location_fingerprint: original_finding.location_fingerprint, + name: original_finding.name, + metadata_version: original_finding.metadata_version + ).where.not(id: original_finding.id).first.tap do |result| + puts "Unable to find match with backup strategy" unless result + end +end + +def reorder_identifiers(raw_metadata) + metadata = JSON.parse(raw_metadata) + metadata['identifiers'] = metadata['identifiers'].partition { |id| id['type'] == 'semgrep_id' }.flatten(1) + JSON.generate(metadata) +end + +def apply_state_transition(vulnerability, transition) + return if vulnerability.state == transition.to_state + + author = User.find(transition.author_id) + + case transition.to_state + when "resolved" + ::Vulnerabilities::ResolveService.new(author, vulnerability, transition.comment).execute + when "confirmed" + ::Vulnerabilities::ConfirmService.new(author, vulnerability, transition.comment).execute + when "dismissed" + ::Vulnerabilities::DismissService.new(author, vulnerability, transition.comment, transition.dismissal_reason).execute + end +end diff --git a/lib/gitlab/background_migration/restore_incorrect_vulnerability_states.rb b/lib/gitlab/background_migration/restore_incorrect_vulnerability_states.rb index 3784fa95912f44..1abbf19e4b97e6 100644 --- a/lib/gitlab/background_migration/restore_incorrect_vulnerability_states.rb +++ b/lib/gitlab/background_migration/restore_incorrect_vulnerability_states.rb @@ -13,11 +13,102 @@ class RestoreIncorrectVulnerabilityStates < BatchedMigrationJob # scope_to ->(relation) { relation.where(column: "value") } feature_category :static_application_security_testing + project_ids = ActiveRecord::Base.connection.execute(<<~SQL).to_a.flatten + SELECT DISTINCT vo.project_id + FROM vulnerability_occurrences vo + JOIN vulnerability_identifiers vi ON vo.primary_identifier_id = vi.id + JOIN vulnerability_scanners vs ON vo.scanner_id = vs.id + WHERE vi.external_type != 'semgrep_id' + AND vs.external_id = 'semgrep' + AND vs.project_id = vo.project_id + AND vo.report_type = 0 + SQL + + # project_ids = Vulnerabilities::Finding + # .joins(:primary_identifier, :scanner) + # .where.not(vulnerability_identifiers: { external_type: 'semgrep_id' }) + # .where( + # vulnerability_scanners: { external_id: 'semgrep' }, + # report_type: 0 + # ) + # .where('vulnerability_scanners.project_id = vulnerability_occurrences.project_id') + # .distinct + # .pluck(:project_id) + def perform each_sub_batch do |sub_batch| # Your action on each sub_batch end end + + def reset_vulnerability_states(project_id) + vulnerabilities = Vulnerability.joins(findings: :primary_identifier) + .where(project_id: project_id) + .where.not(vulnerability_identifiers: { external_type: 'semgrep_id' }) + + vulnerabilities.each do |vulnerability| + latest_transition = vulnerability.state_transitions + .where.not(author_id: nil) + .order(created_at: :desc) + .first + next unless latest_transition + + matching_finding = find_matching_finding(vulnerability.finding) + next unless matching_finding + + apply_state_transition(matching_finding.vulnerability, latest_transition) + end + end + + private + + def find_matching_finding(original_finding) + # Try finding by corrected metadata + corrected_metadata = reorder_identifiers(original_finding.raw_metadata) + finding = Vulnerabilities::Finding.find_by(raw_metadata: corrected_metadata) + return finding if finding + + # Fallback to attribute matching + puts "Unable to find vulnerability finding using raw metadata, attempting backup strategy" + + + Vulnerabilities::Finding.where( + severity: original_finding.severity, + # /Users/adam/Documents/programming/gitlab/gdk/gitlab/app/models/concerns/enums/vulnerability.rb + # REPORT_TYPES = { + # sast: 0, + # secret_detection: 4 + # }.with_indifferent_access.freeze + report_type: original_finding.report_type, + location_fingerprint: original_finding.location_fingerprint, + name: original_finding.name, + metadata_version: original_finding.metadata_version + ).where.not(id: original_finding.id).first.tap do |result| + puts "Unable to find match with backup strategy" unless result + end + end + + def reorder_identifiers(raw_metadata) + metadata = JSON.parse(raw_metadata) + metadata['identifiers'] = metadata['identifiers'].partition { |id| id['type'] == 'semgrep_id' }.flatten(1) + JSON.generate(metadata) + end + + def apply_state_transition(vulnerability, transition) + return if vulnerability.state == transition.to_state + + author = User.find(transition.author_id) + + case transition.to_state + when "resolved" + ::Vulnerabilities::ResolveService.new(author, vulnerability, transition.comment).execute + when "confirmed" + ::Vulnerabilities::ConfirmService.new(author, vulnerability, transition.comment).execute + when "dismissed" + ::Vulnerabilities::DismissService.new(author, vulnerability, transition.comment, transition.dismissal_reason).execute + end + end + end end end diff --git a/spec/lib/gitlab/background_migration/restore_incorrect_vulnerability_states_spec.rb b/spec/lib/gitlab/background_migration/restore_incorrect_vulnerability_states_spec.rb index f19a109bd3ba74..c8ee5cbf36eec8 100644 --- a/spec/lib/gitlab/background_migration/restore_incorrect_vulnerability_states_spec.rb +++ b/spec/lib/gitlab/background_migration/restore_incorrect_vulnerability_states_spec.rb @@ -12,26 +12,75 @@ # let!(:sast_semgrep_artifact) { create(:ee_ci_job_artifact, :sast_semgrep_multiple_vulnerabilities) } # let!(:sast_semgrep_artifact_with_incorrect_primary_identifiers) { create(:ee_ci_job_artifact, :sast_semgrep_multiple_vulnerabilities_incorrect_primary_identifier) } + let(:project) { create(:project) } let(:user) { create(:user) } - let(:pipeline) { create(:ci_pipeline, user: user) } - let(:sast_build) { create(:ee_ci_build, :success, pipeline: pipeline) } - let(:sast_semgrep_artifact) { create(:ee_ci_job_artifact, :sast_semgrep_multiple_vulnerabilities, job: sast_build) } + let(:pipeline1) { create(:ci_pipeline, user: user, project: project) } + let(:sast_build) { create(:ee_ci_build, :success, pipeline: pipeline1, project: project) } + let!(:sast_semgrep_artifact_with_correct_primary_identifiers1) { create(:ee_ci_job_artifact, :sast_semgrep_multiple_vulnerabilities, job: sast_build) } + + let(:pipeline2) { create(:ci_pipeline, user: user, project: project) } + let(:sast_build2) { create(:ee_ci_build, :success, pipeline: pipeline2) } + let!(:sast_semgrep_artifact_with_incorrect_primary_identifiers) { create(:ee_ci_job_artifact, :sast_semgrep_multiple_vulnerabilities_incorrect_primary_identifier, job: sast_build2) } + + let(:pipeline3) { create(:ci_pipeline, user: user, project: project) } + let(:sast_build3) { create(:ee_ci_build, :success, pipeline: pipeline3) } + let!(:sast_semgrep_artifact_with_correct_primary_identifiers2) { create(:ee_ci_job_artifact, :sast_semgrep_multiple_vulnerabilities, job: sast_build3) } + + let(:known_keys) { Set.new } before do - stub_licensed_features(sast: true) + stub_licensed_features(sast: true, security_dashboard: true) + project.add_maintainer(user) end - it 'restores vulnerabilities' do - # ActiveRecord::Base.logger = Logger.new(STDOUT) - Security::StoreScanService.execute(sast_semgrep_artifact, known_keys, false) - puts "XXXXXXXXXXXXXXXX", (%|VULNERABILITY.COUNT: #{Vulnerability.count.inspect}|), "XXXXXXXXXXXXXXXX" - puts "XXXXXXXXXXXXXXXX", (%|VULNERABILITIES::FINDING.COUNT: #{Vulnerabilities::Finding.count.inspect}|), "XXXXXXXXXXXXXXXX" - puts "XXXXXXXXXXXXXXXX", (%|VULNERABILITIES::IDENTIFIER.COUNT: #{Vulnerabilities::Identifier.count.inspect}|), "XXXXXXXXXXXXXXXX" - puts "XXXXXXXXXXXXXXXX", (%|VULNERABILITIES::SCANNER.COUNT: #{Vulnerabilities::Scanner.count.inspect}|), "XXXXXXXXXXXXXXXX" - puts "XXXXXXXXXXXXXXXX", (%|SECURITY::FINDING.COUNT: #{Security::Finding.count.inspect}|), "XXXXXXXXXXXXXXXX" - puts "XXXXXXXXXXXXXXXX", (%|VULNERABILITIES::FINDINGIDENTIFIER.COUNT: #{Vulnerabilities::FindingIdentifier.count.inspect}|), "XXXXXXXXXXXXXXXX" - - # puts "XXXXXXXXXXXXXXXX", (%|VULNERABILITIES.LENGTH: #{vulnerabilities.all.length.inspect}|), "XXXXXXXXXXXXXXXX" - # create_vulnerability(project_id: project.id, external_id: "semgrep_id") + # use a method instead of a subject to avoid rspec memoization + def perform_migration + described_class.new( + start_id: vulnerability_reads.minimum(:id), + end_id: vulnerability_reads.maximum(:id), + batch_table: :vulnerability_reads, + batch_column: :id, + sub_batch_size: sub_batch_size, + pause_ms: 0, + connection: ActiveRecord::Base.connection + ).perform + end + + describe "#perform", feature_category: :static_application_security_testing do + it 'restores vulnerabilities' do + Security::StoreScansService.execute(pipeline1) + # Security::StoreScanService.execute(sast_semgrep_artifact, known_keys, false) + Security::Ingestion::IngestReportsService.execute(pipeline1) + + Vulnerability.where(project_id: project.id).each { |v| puts "Resolving vuln #{v.id}"; ::Vulnerabilities::ResolveService.new(user, v, "first resolution").execute } + Vulnerability.where(project_id: project.id, severity: 'high').each { |v| puts "Confirming vuln #{v.id}"; Vulnerabilities::ConfirmService.new(user, v, "confirming").execute } + Vulnerability.where(project_id: project.id, severity: 'low').each { |v| puts "Dismissing vuln #{v.id}"; Vulnerabilities::DismissService.new(user, v, "dismissing", 'acceptable_risk').execute } + Vulnerability.where(project_id: project.id, severity: 'medium')[0..4].each {|v| puts "Resolving again vuln #{v.id}"; ::Vulnerabilities::ResolveService.new(user, v, "last resolution").execute } + + Security::StoreScansService.execute(pipeline2) + Security::Ingestion::IngestReportsService.execute(pipeline2) + + Security::StoreScansService.execute(pipeline3) + Security::Ingestion::IngestReportsService.execute(pipeline3) + + reset_vulnerability_states(project.id) + + expect(Vulnerability.where(project_id: project.id, severity: 'high', resolved_on_default_branch: false).map { |v| v.finding.state }) + .to eq(["confirmed"] * 4) + + expect(Vulnerability.where(project_id: project.id, severity: 'low', resolved_on_default_branch: false).map { |v| v.finding.state }) + .to eq(["dismissed"] * 2) + + expect(Vulnerability.where(project_id: project.id, severity: 'medium', resolved_on_default_branch: false).map { |v| v.finding.state }) + .to eq(["resolved"] * 10) + + # puts "XXXXXXXXXXXXXXXX", (%|VULNERABILITY.COUNT: #{Vulnerability.count.inspect}|), "XXXXXXXXXXXXXXXX" + # puts "XXXXXXXXXXXXXXXX", (%|VULNERABILITIES::FINDING.COUNT: #{Vulnerabilities::Finding.count.inspect}|), "XXXXXXXXXXXXXXXX" + # puts "XXXXXXXXXXXXXXXX", (%|SECURITY::FINDING.COUNT: #{Security::Finding.count.inspect}|), "XXXXXXXXXXXXXXXX" + # puts "XXXXXXXXXXXXXXXX", (%|VULNERABILITIES::IDENTIFIER.COUNT: #{Vulnerabilities::Identifier.count.inspect}|), "XXXXXXXXXXXXXXXX" + # puts "XXXXXXXXXXXXXXXX", (%|VULNERABILITIES::SCANNER.COUNT: #{Vulnerabilities::Scanner.count.inspect}|), "XXXXXXXXXXXXXXXX" + # puts "XXXXXXXXXXXXXXXX", (%|SECURITY::FINDING.COUNT: #{Security::Finding.count.inspect}|), "XXXXXXXXXXXXXXXX" + # puts "XXXXXXXXXXXXXXXX", (%|VULNERABILITIES::FINDINGIDENTIFIER.COUNT: #{Vulnerabilities::FindingIdentifier.count.inspect}|), "XXXXXXXXXXXXXXXX" + end end end -- GitLab From c8631402649038f8be98d62fd0b0bf4adbd71600 Mon Sep 17 00:00:00 2001 From: Adam Cohen Date: Thu, 30 Oct 2025 19:54:22 -0400 Subject: [PATCH 05/16] add restore-vulnerability-states-for-project.rb --- restore-vulnerability-states-for-project.rb | 215 ++++++++++++++++++++ 1 file changed, 215 insertions(+) create mode 100755 restore-vulnerability-states-for-project.rb diff --git a/restore-vulnerability-states-for-project.rb b/restore-vulnerability-states-for-project.rb new file mode 100755 index 00000000000000..eda8950a0ddf60 --- /dev/null +++ b/restore-vulnerability-states-for-project.rb @@ -0,0 +1,215 @@ +#!/usr/bin/env ruby +require "bundler/inline" +gemfile(true) do + source "https://rubygems.org" + gem "activerecord" + gem "pg" +end +require 'active_record' + +MAIN_DB = ENV["MAIN_CONNECTION_STRING"] +SEC_DB = ENV["SEC_CONNECTION_STRING"] + +class MainRecord < ActiveRecord::Base + self.abstract_class = true + establish_connection(MAIN_DB) +end + +class SecApplicationRecord < ActiveRecord::Base + self.abstract_class = true + establish_connection(SEC_DB) +end + +class Route < MainRecord + belongs_to :source, polymorphic: true +end + +class Project < MainRecord + has_one :route, as: :source + + def full_path + route&.path + end +end + +class User < MainRecord +end + +module Enums + module Vulnerability + VULNERABILITY_STATES = { + detected: 1, + confirmed: 4, + resolved: 3, + dismissed: 2 + }.with_indifferent_access.freeze + + def self.vulnerability_state_by_value(value) + VULNERABILITY_STATES.key(value) + end + end +end + +module Vulnerabilities + class StateTransition < SecApplicationRecord + self.table_name = 'vulnerability_state_transitions' + + belongs_to :vulnerability, class_name: 'Vulnerability', inverse_of: :state_transitions + belongs_to :vulnerability_occurrence, optional: true, class_name: 'Vulnerabilities::Finding' + end + + class Finding < SecApplicationRecord + self.table_name = 'vulnerability_occurrences' + + belongs_to :primary_identifier, class_name: 'Vulnerabilities::Identifier', foreign_key: 'primary_identifier_id' + belongs_to :scanner, class_name: 'Vulnerabilities::Scanner', foreign_key: 'scanner_id' + belongs_to :vulnerability, class_name: 'Vulnerability', inverse_of: :findings, foreign_key: 'vulnerability_id' + end + + class Identifier < SecApplicationRecord + self.table_name = 'vulnerability_identifiers' + end + + class Scanner < SecApplicationRecord + self.table_name = 'vulnerability_scanners' + end +end + +class Vulnerability < SecApplicationRecord + has_many :findings, class_name: '::Vulnerabilities::Finding', inverse_of: :vulnerability + has_many :state_transitions, class_name: '::Vulnerabilities::StateTransition', inverse_of: :vulnerability + + def finding + @finding ||= findings.first + end +end + +# Set timeouts on both connections +MainRecord.connection.execute("SET statement_timeout = '30min';") +SecApplicationRecord.connection.execute("SET statement_timeout = '30min';") + +def reset_vulnerability_states(project_id) + puts "Searching for vulnerabilities with incorrect primary identifier" + vulnerabilities = Vulnerability.joins(findings: :primary_identifier) + .where(project_id: project_id) + .where.not(vulnerability_identifiers: { external_type: 'semgrep_id' }) + + num_vulnerabilities_found = vulnerabilities.length + puts "Found #{num_vulnerabilities_found} vulnerabilities with incorrect primary identifier" + + vulnerabilities.each_with_index do |vulnerability, idx| + puts "Processing vulnerability #{idx} of #{vulnerabilities.count} [#{((idx.to_f/num_vulnerabilities_found)*100).round(2)}%]" + + latest_transition = vulnerability.state_transitions + .where.not(author_id: nil) + .order(created_at: :desc) + .first + + unless latest_transition + puts "Unable to find transition for vulnerability #{vulnerability.id}" + next + end + + puts "Found transition for vulnerability #{vulnerability.id}" + + matching_finding = find_matching_finding(vulnerability.finding) + next unless matching_finding + + apply_state_transition(matching_finding.vulnerability, latest_transition) + end +end + +def reset_vulnerability_states_sql(project_id) + sql = <<-SQL + WITH incorrect_vulns AS ( + SELECT DISTINCT + vo.id as finding_id, + vo.vulnerability_id, + vo.severity, + vo.report_type, + vo.location_fingerprint, + vo.name, + vo.metadata_version + FROM vulnerability_occurrences vo + JOIN vulnerability_identifiers vi ON vo.primary_identifier_id = vi.id + WHERE vo.project_id = #{project_id} + AND vi.external_type != 'semgrep_id' + ), + matched_findings AS ( + SELECT + iv.vulnerability_id as original_vuln_id, + vo2.vulnerability_id as matched_vuln_id + FROM incorrect_vulns iv + JOIN vulnerability_occurrences vo2 ON ( + vo2.severity = iv.severity + AND vo2.report_type = iv.report_type + AND vo2.location_fingerprint = iv.location_fingerprint + AND vo2.name = iv.name + AND vo2.metadata_version = iv.metadata_version + AND vo2.id != iv.finding_id + AND vo2.project_id = #{project_id} + ) + ) + SELECT * FROM matched_findings; + SQL + + SecApplicationRecord.connection.execute(sql) +end + +def find_matching_finding(original_finding) + # Try finding by corrected metadata + puts "Searching for original vulnerability finding for #{original_finding.id}" + + puts " severity: #{original_finding.severity}" + puts " report_type: #{original_finding.report_type}" + puts " location_fingerprint: #{original_finding.location_fingerprint&.unpack('H*')&.first}" + puts " name: #{original_finding.name}" + puts " metadata_version: #{original_finding.metadata_version}" + puts " id not: #{original_finding.id}" + + Vulnerabilities::Finding.where( + severity: original_finding.severity, + report_type: original_finding.report_type, + location_fingerprint: original_finding.location_fingerprint, + name: original_finding.name, + metadata_version: original_finding.metadata_version + ).where.not(id: original_finding.id).first.tap do |result| + puts "Unable to find match with backup strategy" unless result + end +end + +def reorder_identifiers(raw_metadata) + metadata = JSON.parse(raw_metadata) + metadata['identifiers'] = metadata['identifiers'].partition { |id| id['type'] == 'semgrep_id' }.flatten(1) + JSON.generate(metadata) +end + +def apply_state_transition(vulnerability, transition) + # skip this vulnerability if the it already has the correct state + if vulnerability.state == transition.to_state + puts "Current vulnerability state #{Enums::Vulnerability.vulnerability_state_by_value(vulnerability.state)} matches original state: #{Enums::Vulnerability.vulnerability_state_by_value(transition.to_state)}, skipping" + return + end + + author = User.find(transition.author_id) + + case transition.to_state + when "resolved" + puts "Resolving vulnerability #{vulnerability.id} with comment #{transition.comment}" + # ::Vulnerabilities::ResolveService.new(author, vulnerability, transition.comment).execute + when "confirmed" + puts "Confirming vulnerability #{vulnerability.id} with comment #{transition.comment}" + # ::Vulnerabilities::ConfirmService.new(author, vulnerability, transition.comment).execute + when "dismissed" + puts "Dismissing vulnerability #{vulnerability.id} with comment #{transition.comment}" + # ::Vulnerabilities::DismissService.new(author, vulnerability, transition.comment, transition.dismissal_reason).execute + end +end + +reset_vulnerability_states(57498926) +# result = reset_vulnerability_states_sql(57498926) + +# result.each do |row| +# # or access specific columns: +# puts "Original: #{row['original_vuln_id']}, Matched: #{row['matched_vuln_id']}" +# end -- GitLab From ff814cb25b0f246445e511f88021b8b83e0cc7e7 Mon Sep 17 00:00:00 2001 From: Adam Cohen Date: Fri, 31 Oct 2025 13:14:23 -0400 Subject: [PATCH 06/16] Add report type to reset_vulnerability_states --- .../restore_incorrect_vulnerability_states.rb | 13 ++++++++++++- restore-vulnerability-states-for-project.rb | 1 + 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/lib/gitlab/background_migration/restore_incorrect_vulnerability_states.rb b/lib/gitlab/background_migration/restore_incorrect_vulnerability_states.rb index 1abbf19e4b97e6..67e46c53cea728 100644 --- a/lib/gitlab/background_migration/restore_incorrect_vulnerability_states.rb +++ b/lib/gitlab/background_migration/restore_incorrect_vulnerability_states.rb @@ -75,10 +75,21 @@ def find_matching_finding(original_finding) Vulnerabilities::Finding.where( severity: original_finding.severity, # /Users/adam/Documents/programming/gitlab/gdk/gitlab/app/models/concerns/enums/vulnerability.rb + # /Users/adam/Documents/programming/gitlab/gdk/gitlab/ee/app/models/concerns/ee/enums/vulnerability.rb + # REPORT_TYPES = { # sast: 0, + # dependency_scanning: 1, + # container_scanning: 2, + # dast: 3, # secret_detection: 4 - # }.with_indifferent_access.freeze + # coverage_fuzzing: 5, + # api_fuzzing: 6, + # cluster_image_scanning: 7, + # container_scanning_for_registry: 8, + # generic: 99 + # }.freeze + report_type: original_finding.report_type, location_fingerprint: original_finding.location_fingerprint, name: original_finding.name, diff --git a/restore-vulnerability-states-for-project.rb b/restore-vulnerability-states-for-project.rb index eda8950a0ddf60..7f9aa506b41657 100755 --- a/restore-vulnerability-states-for-project.rb +++ b/restore-vulnerability-states-for-project.rb @@ -92,6 +92,7 @@ def reset_vulnerability_states(project_id) puts "Searching for vulnerabilities with incorrect primary identifier" vulnerabilities = Vulnerability.joins(findings: :primary_identifier) .where(project_id: project_id) + .where(report_type: 0) .where.not(vulnerability_identifiers: { external_type: 'semgrep_id' }) num_vulnerabilities_found = vulnerabilities.length -- GitLab From 980ad8477b0973ef51d56cfe2b00d3131aab2259 Mon Sep 17 00:00:00 2001 From: Adam Cohen Date: Wed, 5 Nov 2025 11:53:16 -0500 Subject: [PATCH 07/16] Spec now passes --- .../restore_incorrect_vulnerability_states.rb | 296 +++++++++++----- restore-vulnerability-states-for-project.rb | 334 ++++++++++++------ ...ore_incorrect_vulnerability_states_spec.rb | 29 +- 3 files changed, 457 insertions(+), 202 deletions(-) diff --git a/lib/gitlab/background_migration/restore_incorrect_vulnerability_states.rb b/lib/gitlab/background_migration/restore_incorrect_vulnerability_states.rb index 67e46c53cea728..6711442efa2a78 100644 --- a/lib/gitlab/background_migration/restore_incorrect_vulnerability_states.rb +++ b/lib/gitlab/background_migration/restore_incorrect_vulnerability_states.rb @@ -13,113 +13,231 @@ class RestoreIncorrectVulnerabilityStates < BatchedMigrationJob # scope_to ->(relation) { relation.where(column: "value") } feature_category :static_application_security_testing - project_ids = ActiveRecord::Base.connection.execute(<<~SQL).to_a.flatten - SELECT DISTINCT vo.project_id - FROM vulnerability_occurrences vo - JOIN vulnerability_identifiers vi ON vo.primary_identifier_id = vi.id - JOIN vulnerability_scanners vs ON vo.scanner_id = vs.id - WHERE vi.external_type != 'semgrep_id' - AND vs.external_id = 'semgrep' - AND vs.project_id = vo.project_id - AND vo.report_type = 0 - SQL - - # project_ids = Vulnerabilities::Finding - # .joins(:primary_identifier, :scanner) - # .where.not(vulnerability_identifiers: { external_type: 'semgrep_id' }) - # .where( - # vulnerability_scanners: { external_id: 'semgrep' }, - # report_type: 0 - # ) - # .where('vulnerability_scanners.project_id = vulnerability_occurrences.project_id') - # .distinct - # .pluck(:project_id) + module Migratable + module Enums + module Vulnerability + REPORT_TYPES = { + sast: 0, + }.freeze + + VULNERABILITY_STATES = { + detected: 1, + confirmed: 4, + resolved: 3, + dismissed: 2 + }.with_indifferent_access.freeze + + def self.vulnerability_states + VULNERABILITY_STATES + end + end + end + + module Vulnerabilities + class StateTransition < SecApplicationRecord + self.table_name = 'vulnerability_state_transitions' + + enum :from_state, ::Enums::Vulnerability.vulnerability_states, prefix: true + enum :to_state, ::Enums::Vulnerability.vulnerability_states, prefix: true + + belongs_to :vulnerability, class_name: 'Vulnerability', inverse_of: :state_transitions + belongs_to :vulnerability_occurrence, optional: true, class_name: 'Vulnerabilities::Finding' + end + + class Finding < SecApplicationRecord + self.table_name = 'vulnerability_occurrences' + + belongs_to :primary_identifier, class_name: 'Vulnerabilities::Identifier', foreign_key: 'primary_identifier_id' + belongs_to :scanner, class_name: 'Vulnerabilities::Scanner', foreign_key: 'scanner_id' + belongs_to :vulnerability, class_name: 'Vulnerability', inverse_of: :findings, foreign_key: 'vulnerability_id' + end + + class Identifier < SecApplicationRecord + self.table_name = 'vulnerability_identifiers' + end + + class Scanner < SecApplicationRecord + self.table_name = 'vulnerability_scanners' + end + end + + class Vulnerability < SecApplicationRecord + has_many :findings, class_name: '::Vulnerabilities::Finding', inverse_of: :vulnerability + has_many :state_transitions, class_name: '::Vulnerabilities::StateTransition', inverse_of: :vulnerability + + enum :state, ::Enums::Vulnerability.vulnerability_states + + def finding + @finding ||= findings.first + end + end + end + + # project_ids = ActiveRecord::Base.connection.execute(<<~SQL).to_a.flatten + # SELECT DISTINCT vo.project_id + # FROM vulnerability_occurrences vo + # JOIN vulnerability_identifiers vi ON vo.primary_identifier_id = vi.id + # JOIN vulnerability_scanners vs ON vo.scanner_id = vs.id + # WHERE vi.external_type in ('cwe', 'owasp') + # AND vs.external_id = 'semgrep' + # AND vs.project_id = vo.project_id + # AND vo.report_type = 0 + # SQL def perform - each_sub_batch do |sub_batch| - # Your action on each sub_batch + user_id = Users::Internal.security_bot.id + + vulnerabilities_by_project = Migratable::Vulnerability + .joins(findings: [:primary_identifier, :scanner]) + .where(report_type: 0) + .where(vulnerability_identifiers: { external_type: ['cwe', 'owasp'] }) + .where(vulnerability_scanners: { external_id: 'semgrep' }) + .where('vulnerability_scanners.project_id = vulnerability_occurrences.project_id') + .includes( + findings: [:primary_identifier], + state_transitions: [] + ) + .group_by(&:project_id) + + # project_ids = Migratable::Vulnerabilities::Finding + # .joins(:primary_identifier, :scanner) + # .where(vulnerability_identifiers: { external_type: ['cwe', 'owasp'] }) + # .where( + # vulnerability_scanners: { external_id: 'semgrep' }, + # report_type: 0 + # ) + # .where('vulnerability_scanners.project_id = vulnerability_occurrences.project_id') + # .distinct + # .pluck(:project_id) + + # project_ids.each do |project_id| + # reset_vulnerability_states(project_id) + # end + + # each_sub_batch do |sub_batch| + # # Your action on each sub_batch + # end + + vulnerabilities_by_project.each_with_index do |(project_id, vulnerabilities), idx| + reset_vulnerability_states(project_id, vulnerabilities) end end - def reset_vulnerability_states(project_id) - vulnerabilities = Vulnerability.joins(findings: :primary_identifier) - .where(project_id: project_id) - .where.not(vulnerability_identifiers: { external_type: 'semgrep_id' }) + def reset_vulnerability_states(project_id, vulnerabilities) + puts "Searching for vulnerabilities with incorrect primary identifier for project id #{project_id}" + + num_vulnerabilities_found = vulnerabilities.length + puts "Found #{num_vulnerabilities_found} vulnerabilities with incorrect primary identifier" + + # Build lookup keys for all original findings + original_findings_data = vulnerabilities.map do |vuln| + finding = vuln.finding + { + vulnerability: vuln, + finding: finding, + lookup_key: [ + finding.severity, + finding.report_type, + finding.location_fingerprint&.unpack('H*')&.first, + finding.name, + finding.metadata_version + ] + } + end - vulnerabilities.each do |vulnerability| - latest_transition = vulnerability.state_transitions - .where.not(author_id: nil) - .order(created_at: :desc) - .first - next unless latest_transition + # Batch query for all matching findings + puts "Batch querying for matching findings..." + + conditions = original_findings_data.map do |data| + finding = data[:finding] + { + severity: finding.severity, + report_type: finding.report_type, + location_fingerprint: finding.location_fingerprint, + name: finding.name, + metadata_version: finding.metadata_version + } + end - matching_finding = find_matching_finding(vulnerability.finding) - next unless matching_finding + # Get all original finding IDs to exclude + original_finding_ids = original_findings_data.map { |d| d[:finding].id } - apply_state_transition(matching_finding.vulnerability, latest_transition) + if original_finding_ids.empty? + puts "Unable to find vulnerabilities for project #{project_id}" + return end - end - private - - def find_matching_finding(original_finding) - # Try finding by corrected metadata - corrected_metadata = reorder_identifiers(original_finding.raw_metadata) - finding = Vulnerabilities::Finding.find_by(raw_metadata: corrected_metadata) - return finding if finding - - # Fallback to attribute matching - puts "Unable to find vulnerability finding using raw metadata, attempting backup strategy" - - - Vulnerabilities::Finding.where( - severity: original_finding.severity, - # /Users/adam/Documents/programming/gitlab/gdk/gitlab/app/models/concerns/enums/vulnerability.rb - # /Users/adam/Documents/programming/gitlab/gdk/gitlab/ee/app/models/concerns/ee/enums/vulnerability.rb - - # REPORT_TYPES = { - # sast: 0, - # dependency_scanning: 1, - # container_scanning: 2, - # dast: 3, - # secret_detection: 4 - # coverage_fuzzing: 5, - # api_fuzzing: 6, - # cluster_image_scanning: 7, - # container_scanning_for_registry: 8, - # generic: 99 - # }.freeze - - report_type: original_finding.report_type, - location_fingerprint: original_finding.location_fingerprint, - name: original_finding.name, - metadata_version: original_finding.metadata_version - ).where.not(id: original_finding.id).first.tap do |result| - puts "Unable to find match with backup strategy" unless result + # Build a complex OR query + query = nil + conditions.each do |cond| + subquery = Vulnerabilities::Finding.where(cond) + query = query ? query.or(subquery) : subquery end - end - def reorder_identifiers(raw_metadata) - metadata = JSON.parse(raw_metadata) - metadata['identifiers'] = metadata['identifiers'].partition { |id| id['type'] == 'semgrep_id' }.flatten(1) - JSON.generate(metadata) - end + matching_findings = query + .where.not(id: original_finding_ids) + .where(project_id: project_id) # Add project filter if applicable + .includes(:vulnerability) + .to_a + + # Build lookup hash for matched findings + matched_findings_hash = matching_findings.group_by do |finding| + [ + finding.severity, + finding.report_type, + finding.location_fingerprint&.unpack('H*')&.first, + finding.name, + finding.metadata_version + ] + end - def apply_state_transition(vulnerability, transition) - return if vulnerability.state == transition.to_state + # Process each vulnerability with pre-fetched data + original_findings_data.each_with_index do |data, idx| + puts "Processing vulnerability #{idx} of #{num_vulnerabilities_found} [#{((idx.to_f/num_vulnerabilities_found)*100).round(2)}%]" - author = User.find(transition.author_id) + vulnerability = data[:vulnerability] + finding = data[:finding] - case transition.to_state - when "resolved" - ::Vulnerabilities::ResolveService.new(author, vulnerability, transition.comment).execute - when "confirmed" - ::Vulnerabilities::ConfirmService.new(author, vulnerability, transition.comment).execute - when "dismissed" - ::Vulnerabilities::DismissService.new(author, vulnerability, transition.comment, transition.dismissal_reason).execute + latest_transition = vulnerability.state_transitions + .select { |t| t.author_id.present? } + .max_by(&:created_at) + + # Look up matching finding from hash + matched_findings = matched_findings_hash[data[:lookup_key]] + matching_finding = matched_findings&.first + + unless latest_transition + if matching_finding + puts "Unable to find vulnerability_state_transition for `semgrep 6.7.0` vulnerability ID #{vulnerability.id} (corresponding `semgrep >= 6.7.1` ID: #{matching_finding.vulnerability.id})" + else + puts "Unable to find vulnerability_state_transition for `semgrep 6.7.0` vulnerability ID #{vulnerability.id} (no corresponding `semgrep >= 6.7.1` ID)" + end + next + end + + unless matching_finding + puts "Unable to find match for `semgrep 6.7.0` vulnerability ID #{vulnerability.id}" + next + end + + puts "`semgrep 6.7.0` ID #{vulnerability.id} matches `semgrep >= 6.7.1` vulnerability ID #{matching_finding.vulnerability.id}" + + apply_state_transition(project_id, vulnerability, finding, matching_finding.vulnerability, matching_finding, latest_transition) end end + def apply_state_transition(project_id, old_vulnerability, old_finding, current_vulnerability, current_finding, transition) + puts "Updating incorrect primary identifier #{old_finding.primary_identifier_id} to correct primary identifier #{current_finding.primary_identifier_id}" + puts "Deleting vulerability #{current_vulnerability.id}" + old_finding.update(primary_identifier_id: current_finding.primary_identifier_id) + old_vulnerability.update( + state: transition.to_state, + resolved_on_default_branch: current_vulnerability.resolved_on_default_branch, + present_on_default_branch: current_vulnerability.present_on_default_branch + ) + current_vulnerability.destroy + old_vulnerability.state_transitions.where(author_id: nil).destroy_all + end end end end diff --git a/restore-vulnerability-states-for-project.rb b/restore-vulnerability-states-for-project.rb index 7f9aa506b41657..c4cdd8e517d598 100755 --- a/restore-vulnerability-states-for-project.rb +++ b/restore-vulnerability-states-for-project.rb @@ -2,6 +2,8 @@ require "bundler/inline" gemfile(true) do source "https://rubygems.org" + gem 'pry' + gem 'pry-byebug' gem "activerecord" gem "pg" end @@ -37,6 +39,10 @@ class User < MainRecord module Enums module Vulnerability + REPORT_TYPES = { + sast: 0, + }.freeze + VULNERABILITY_STATES = { detected: 1, confirmed: 4, @@ -44,8 +50,8 @@ module Vulnerability dismissed: 2 }.with_indifferent_access.freeze - def self.vulnerability_state_by_value(value) - VULNERABILITY_STATES.key(value) + def self.vulnerability_states + VULNERABILITY_STATES end end end @@ -54,6 +60,9 @@ module Vulnerabilities class StateTransition < SecApplicationRecord self.table_name = 'vulnerability_state_transitions' + enum :from_state, ::Enums::Vulnerability.vulnerability_states, prefix: true + enum :to_state, ::Enums::Vulnerability.vulnerability_states, prefix: true + belongs_to :vulnerability, class_name: 'Vulnerability', inverse_of: :state_transitions belongs_to :vulnerability_occurrence, optional: true, class_name: 'Vulnerabilities::Finding' end @@ -79,6 +88,8 @@ class Vulnerability < SecApplicationRecord has_many :findings, class_name: '::Vulnerabilities::Finding', inverse_of: :vulnerability has_many :state_transitions, class_name: '::Vulnerabilities::StateTransition', inverse_of: :vulnerability + enum :state, ::Enums::Vulnerability.vulnerability_states + def finding @finding ||= findings.first end @@ -88,129 +99,234 @@ def finding MainRecord.connection.execute("SET statement_timeout = '30min';") SecApplicationRecord.connection.execute("SET statement_timeout = '30min';") -def reset_vulnerability_states(project_id) - puts "Searching for vulnerabilities with incorrect primary identifier" - vulnerabilities = Vulnerability.joins(findings: :primary_identifier) - .where(project_id: project_id) - .where(report_type: 0) - .where.not(vulnerability_identifiers: { external_type: 'semgrep_id' }) +class Processor + def initialize(project_id = nil) + @transitions_by_project = Hash.new { |h, k| h[k] = { resolved: [], confirmed: [], dismissed: [] } } + @project_id = project_id + end + + def execute + process_everything = false + + if process_everything + affected_project_ids = Vulnerabilities::Finding + .joins(:primary_identifier) + .joins(:scanner) + .where(vulnerability_identifiers: { external_type: ['cwe', 'owasp'] }) + .where(vulnerability_scanners: { external_id: 'semgrep' }) + .where('vulnerability_scanners.project_id = vulnerability_occurrences.project_id') + .where(report_type: 0) + .distinct + .pluck(:project_id) + + affected_project_ids.each_with_index do |project_id, idx| + puts "" + puts "Processing project #{idx+1} of #{affected_project_ids.length}" + reset_vulnerability_states(project_id) + puts "" + puts "" + end + end + + + if project_id + reset_vulnerability_states(project_id) + else + # affected_project_ids = File.read('clario-clinical-affected-project-ids.txt').split("\n") + affected_project_ids = File.read('ds365ai-affected-project-ids.txt').split("\n") - num_vulnerabilities_found = vulnerabilities.length - puts "Found #{num_vulnerabilities_found} vulnerabilities with incorrect primary identifier" + puts "Found #{affected_project_ids.length} affected project ids" - vulnerabilities.each_with_index do |vulnerability, idx| - puts "Processing vulnerability #{idx} of #{vulnerabilities.count} [#{((idx.to_f/num_vulnerabilities_found)*100).round(2)}%]" + affected_project_ids.each_with_index do |project_id, idx| + puts "" + puts "Processing project #{idx+1} of #{affected_project_ids.length}" + reset_vulnerability_states(project_id) + puts "" + puts "" + end + end - latest_transition = vulnerability.state_transitions - .where.not(author_id: nil) - .order(created_at: :desc) - .first + print_transition_summary + end - unless latest_transition - puts "Unable to find transition for vulnerability #{vulnerability.id}" - next + attr_accessor :transitions_by_project + attr_reader :project_id + + def reset_vulnerability_states(project_id) + puts "Searching for vulnerabilities with incorrect primary identifier for project id #{project_id}" + + # Eager load all the data we need upfront + vulnerabilities = Vulnerability.joins(findings: :primary_identifier) + .where(project_id: project_id) + .where(report_type: 0) + .where(vulnerability_identifiers: { external_type: ['cwe', 'owasp'] }) + .includes( + findings: [:primary_identifier], + state_transitions: [] + ) + + num_vulnerabilities_found = vulnerabilities.length + puts "Found #{num_vulnerabilities_found} vulnerabilities with incorrect primary identifier" + + # Build lookup keys for all original findings + original_findings_data = vulnerabilities.map do |vuln| + finding = vuln.finding + { + vulnerability: vuln, + finding: finding, + lookup_key: [ + finding.severity, + finding.report_type, + finding.location_fingerprint&.unpack('H*')&.first, + finding.name, + finding.metadata_version + ] + } end - puts "Found transition for vulnerability #{vulnerability.id}" + # Batch query for all matching findings + puts "Batch querying for matching findings..." + + conditions = original_findings_data.map do |data| + finding = data[:finding] + { + severity: finding.severity, + report_type: finding.report_type, + location_fingerprint: finding.location_fingerprint, + name: finding.name, + metadata_version: finding.metadata_version + } + end - matching_finding = find_matching_finding(vulnerability.finding) - next unless matching_finding + # Get all original finding IDs to exclude + original_finding_ids = original_findings_data.map { |d| d[:finding].id } - apply_state_transition(matching_finding.vulnerability, latest_transition) - end -end + if original_finding_ids.empty? + puts "Unable to find vulnerabilities for project #{project_id}" + return + end -def reset_vulnerability_states_sql(project_id) - sql = <<-SQL - WITH incorrect_vulns AS ( - SELECT DISTINCT - vo.id as finding_id, - vo.vulnerability_id, - vo.severity, - vo.report_type, - vo.location_fingerprint, - vo.name, - vo.metadata_version - FROM vulnerability_occurrences vo - JOIN vulnerability_identifiers vi ON vo.primary_identifier_id = vi.id - WHERE vo.project_id = #{project_id} - AND vi.external_type != 'semgrep_id' - ), - matched_findings AS ( - SELECT - iv.vulnerability_id as original_vuln_id, - vo2.vulnerability_id as matched_vuln_id - FROM incorrect_vulns iv - JOIN vulnerability_occurrences vo2 ON ( - vo2.severity = iv.severity - AND vo2.report_type = iv.report_type - AND vo2.location_fingerprint = iv.location_fingerprint - AND vo2.name = iv.name - AND vo2.metadata_version = iv.metadata_version - AND vo2.id != iv.finding_id - AND vo2.project_id = #{project_id} - ) - ) - SELECT * FROM matched_findings; - SQL - - SecApplicationRecord.connection.execute(sql) -end + # Build a complex OR query + query = nil + conditions.each do |cond| + subquery = Vulnerabilities::Finding.where(cond) + query = query ? query.or(subquery) : subquery + end -def find_matching_finding(original_finding) - # Try finding by corrected metadata - puts "Searching for original vulnerability finding for #{original_finding.id}" - - puts " severity: #{original_finding.severity}" - puts " report_type: #{original_finding.report_type}" - puts " location_fingerprint: #{original_finding.location_fingerprint&.unpack('H*')&.first}" - puts " name: #{original_finding.name}" - puts " metadata_version: #{original_finding.metadata_version}" - puts " id not: #{original_finding.id}" - - Vulnerabilities::Finding.where( - severity: original_finding.severity, - report_type: original_finding.report_type, - location_fingerprint: original_finding.location_fingerprint, - name: original_finding.name, - metadata_version: original_finding.metadata_version - ).where.not(id: original_finding.id).first.tap do |result| - puts "Unable to find match with backup strategy" unless result + matching_findings = query + .where.not(id: original_finding_ids) + .where(project_id: project_id) # Add project filter if applicable + .includes(:vulnerability) + .to_a + + # Build lookup hash for matched findings + matched_findings_hash = matching_findings.group_by do |finding| + [ + finding.severity, + finding.report_type, + finding.location_fingerprint&.unpack('H*')&.first, + finding.name, + finding.metadata_version + ] + end + + # Process each vulnerability with pre-fetched data + original_findings_data.each_with_index do |data, idx| + puts "Processing vulnerability #{idx} of #{num_vulnerabilities_found} [#{((idx.to_f/num_vulnerabilities_found)*100).round(2)}%]" + + vulnerability = data[:vulnerability] + finding = data[:finding] + + latest_transition = vulnerability.state_transitions + .select { |t| t.author_id.present? } + .max_by(&:created_at) + + # Look up matching finding from hash + matched_findings = matched_findings_hash[data[:lookup_key]] + matching_finding = matched_findings&.first + + unless latest_transition + if matching_finding + puts "Unable to find vulnerability_state_transition for `semgrep 6.7.0` vulnerability ID #{vulnerability.id} (corresponding `semgrep >= 6.7.1` ID: #{matching_finding.vulnerability.id})" + else + puts "Unable to find vulnerability_state_transition for `semgrep 6.7.0` vulnerability ID #{vulnerability.id} (no corresponding `semgrep >= 6.7.1` ID)" + end + next + end + + unless matching_finding + puts "Unable to find match for `semgrep 6.7.0` vulnerability ID #{vulnerability.id}" + next + end + + puts "`semgrep 6.7.0` ID #{vulnerability.id} matches `semgrep >= 6.7.1` vulnerability ID #{matching_finding.vulnerability.id}" + + apply_state_transition(project_id, vulnerability, finding, matching_finding.vulnerability, matching_finding, latest_transition) + end end -end -def reorder_identifiers(raw_metadata) - metadata = JSON.parse(raw_metadata) - metadata['identifiers'] = metadata['identifiers'].partition { |id| id['type'] == 'semgrep_id' }.flatten(1) - JSON.generate(metadata) -end + private -def apply_state_transition(vulnerability, transition) - # skip this vulnerability if the it already has the correct state - if vulnerability.state == transition.to_state - puts "Current vulnerability state #{Enums::Vulnerability.vulnerability_state_by_value(vulnerability.state)} matches original state: #{Enums::Vulnerability.vulnerability_state_by_value(transition.to_state)}, skipping" - return + def apply_state_transition(project_id, old_vulnerability, old_finding, current_vulnerability, current_finding, transition) + # skip this vulnerability if the it already has the correct state + if current_vulnerability.state == transition.to_state + puts "Current vulnerability state #{current_vulnerability.state} matches original state: #{transition.to_state}, skipping" + return + end + + puts "Updating old primary identifier #{old_finding.primary_identifier_id} to new primary identifier #{current_finding.primary_identifier_id}" + puts "Deleting vulerability #{current_vulnerability.id}" + + author = User.find(transition.author_id) + + case transition.to_state + when "resolved" + puts "Resolving vulnerability #{current_vulnerability.id} with comment #{transition.comment}" + transitions_by_project[project_id][:resolved] << { + "current_vulnerability_id" => current_vulnerability.id, "old_vulnerability_id" => old_vulnerability.id, "comment" => transition.comment + } + # ::Vulnerabilities::ResolveService.new(author, vulnerability, transition.comment).execute + when "confirmed" + puts "Confirming vulnerability #{current_vulnerability.id} with comment #{transition.comment}" + transitions_by_project[project_id][:confirmed] << { + "current_vulnerability_id" => current_vulnerability.id, "old_vulnerability_id" => old_vulnerability.id, "comment" => transition.comment + } + # ::Vulnerabilities::ConfirmService.new(author, vulnerability, transition.comment).execute + when "dismissed" + puts "Dismissing vulnerability #{current_vulnerability.id} with comment #{transition.comment}" + transitions_by_project[project_id][:dismissed] << { + "current_vulnerability_id" => current_vulnerability.id, "old_vulnerability_id" => old_vulnerability.id, "comment" => transition.comment + } + # ::Vulnerabilities::DismissService.new(author, vulnerability, transition.comment, transition.dismissal_reason).execute + else + puts "Unknown transition '#{transition.to_state}' for vulnerability #{vulnerability.id}" + end end - author = User.find(transition.author_id) - - case transition.to_state - when "resolved" - puts "Resolving vulnerability #{vulnerability.id} with comment #{transition.comment}" - # ::Vulnerabilities::ResolveService.new(author, vulnerability, transition.comment).execute - when "confirmed" - puts "Confirming vulnerability #{vulnerability.id} with comment #{transition.comment}" - # ::Vulnerabilities::ConfirmService.new(author, vulnerability, transition.comment).execute - when "dismissed" - puts "Dismissing vulnerability #{vulnerability.id} with comment #{transition.comment}" - # ::Vulnerabilities::DismissService.new(author, vulnerability, transition.comment, transition.dismissal_reason).execute + def print_transition_summary + puts "\n=== Transition Summary ===" + transitions_by_project.each do |project_id, states| + project = Project.find(project_id) + puts "\nProject https://gitlab.com/#{project.full_path} (ID: #{project_id})" + puts " Resolved: #{states[:resolved].count} vulnerabilities" if states[:resolved].any? + states[:resolved].each do |resolved| + puts %| ID: #{resolved["current_vulnerability_id"]} (Original ID: #{resolved["old_vulnerability_id"]}), Comment: '#{resolved["comment"]}'| + end + puts " Confirmed: #{states[:confirmed].count} vulnerabilities" if states[:confirmed].any? + states[:confirmed].each do |confirmed| + puts %| ID: #{confirmed["current_vulnerability_id"]} (Original ID: #{confirmed["old_vulnerability_id"]}), Comment: '#{confirmed["comment"]}'| + end + puts " Dismissed: #{states[:dismissed].count} vulnerabilities" if states[:dismissed].any? + states[:dismissed].each do |dismissed| + puts %| ID: #{dismissed["current_vulnerability_id"]} (Original ID: #{dismissed["old_vulnerability_id"]}), Comment: '#{dismissed["comment"]}'| + end + end end end -reset_vulnerability_states(57498926) -# result = reset_vulnerability_states_sql(57498926) +Processor.new(75802210).execute +# Processor.new.execute -# result.each do |row| -# # or access specific columns: -# puts "Original: #{row['original_vuln_id']}, Matched: #{row['matched_vuln_id']}" -# end +# reset_vulnerability_states(57498926) +# reset_vulnerability_states(60454917) +# reset_vulnerability_states(75802210) # my personal project diff --git a/spec/lib/gitlab/background_migration/restore_incorrect_vulnerability_states_spec.rb b/spec/lib/gitlab/background_migration/restore_incorrect_vulnerability_states_spec.rb index c8ee5cbf36eec8..e33f5d89e4befb 100644 --- a/spec/lib/gitlab/background_migration/restore_incorrect_vulnerability_states_spec.rb +++ b/spec/lib/gitlab/background_migration/restore_incorrect_vulnerability_states_spec.rb @@ -33,12 +33,14 @@ project.add_maintainer(user) end + let(:sub_batch_size) { vulnerabilities.count } + # use a method instead of a subject to avoid rspec memoization def perform_migration described_class.new( - start_id: vulnerability_reads.minimum(:id), - end_id: vulnerability_reads.maximum(:id), - batch_table: :vulnerability_reads, + start_id: vulnerabilities.minimum(:id), + end_id: vulnerabilities.maximum(:id), + batch_table: :vulnerabilities, batch_column: :id, sub_batch_size: sub_batch_size, pause_ms: 0, @@ -52,18 +54,37 @@ def perform_migration # Security::StoreScanService.execute(sast_semgrep_artifact, known_keys, false) Security::Ingestion::IngestReportsService.execute(pipeline1) + puts "XXXXXXXXXXXXXXXX", (%|VULNERABILITY.COUNT: #{Vulnerability.count.inspect}|), "XXXXXXXXXXXXXXXX" + Vulnerability.where(project_id: project.id).each { |v| puts "Resolving vuln #{v.id}"; ::Vulnerabilities::ResolveService.new(user, v, "first resolution").execute } Vulnerability.where(project_id: project.id, severity: 'high').each { |v| puts "Confirming vuln #{v.id}"; Vulnerabilities::ConfirmService.new(user, v, "confirming").execute } Vulnerability.where(project_id: project.id, severity: 'low').each { |v| puts "Dismissing vuln #{v.id}"; Vulnerabilities::DismissService.new(user, v, "dismissing", 'acceptable_risk').execute } Vulnerability.where(project_id: project.id, severity: 'medium')[0..4].each {|v| puts "Resolving again vuln #{v.id}"; ::Vulnerabilities::ResolveService.new(user, v, "last resolution").execute } + # binding.pry + Security::StoreScansService.execute(pipeline2) Security::Ingestion::IngestReportsService.execute(pipeline2) + puts "XXXXXXXXXXXXXXXX", (%|VULNERABILITY.COUNT: #{Vulnerability.count.inspect}|), "XXXXXXXXXXXXXXXX" + Security::StoreScansService.execute(pipeline3) Security::Ingestion::IngestReportsService.execute(pipeline3) - reset_vulnerability_states(project.id) + puts "XXXXXXXXXXXXXXXX", (%|VULNERABILITY.COUNT: #{Vulnerability.count.inspect}|), "XXXXXXXXXXXXXXXX" + + perform_migration + + expect(Vulnerability.count).to eq(16) + types = Vulnerability.all.map { |v| v.finding.primary_identifier.external_type } + expect(types).to eq(["semgrep_id"] * 16) + + # Vulnerability.all.each do |v| + # puts "XXXXXXXXXXXXXXXX", (%|V.ID: #{v.id.inspect}|), "XXXXXXXXXXXXXXXX" + # puts "XXXXXXXXXXXXXXXX", (%|V.ID: #{v.resolved_on_default_branch.inspect}|), "XXXXXXXXXXXXXXXX" + # puts "XXXXXXXXXXXXXXXX", (%|V.FINDING.STATE: #{v.finding.state.inspect}|), "XXXXXXXXXXXXXXXX" + # puts "XXXXXXXXXXXXXXXX", (%|V.STATE_TRANSITIONS: #{v.state_transitions.inspect}|), "XXXXXXXXXXXXXXXX" + # end expect(Vulnerability.where(project_id: project.id, severity: 'high', resolved_on_default_branch: false).map { |v| v.finding.state }) .to eq(["confirmed"] * 4) -- GitLab From e8668117bb5d5b47e41916b47c0bf55cc42e8d8e Mon Sep 17 00:00:00 2001 From: Adam Cohen Date: Wed, 5 Nov 2025 17:17:51 -0500 Subject: [PATCH 08/16] Handle when semgrep 6.7.1 has not been run --- .../restore_incorrect_vulnerability_states.rb | 186 ++++++------------ ...ore_incorrect_vulnerability_states_spec.rb | 122 ++++++------ 2 files changed, 129 insertions(+), 179 deletions(-) diff --git a/lib/gitlab/background_migration/restore_incorrect_vulnerability_states.rb b/lib/gitlab/background_migration/restore_incorrect_vulnerability_states.rb index 6711442efa2a78..fb36378fef0ef1 100644 --- a/lib/gitlab/background_migration/restore_incorrect_vulnerability_states.rb +++ b/lib/gitlab/background_migration/restore_incorrect_vulnerability_states.rb @@ -5,6 +5,8 @@ # Update below commented lines with appropriate values. +require 'json' + module Gitlab module BackgroundMigration class RestoreIncorrectVulnerabilityStates < BatchedMigrationJob @@ -73,20 +75,7 @@ def finding end end - # project_ids = ActiveRecord::Base.connection.execute(<<~SQL).to_a.flatten - # SELECT DISTINCT vo.project_id - # FROM vulnerability_occurrences vo - # JOIN vulnerability_identifiers vi ON vo.primary_identifier_id = vi.id - # JOIN vulnerability_scanners vs ON vo.scanner_id = vs.id - # WHERE vi.external_type in ('cwe', 'owasp') - # AND vs.external_id = 'semgrep' - # AND vs.project_id = vo.project_id - # AND vo.report_type = 0 - # SQL - def perform - user_id = Users::Internal.security_bot.id - vulnerabilities_by_project = Migratable::Vulnerability .joins(findings: [:primary_identifier, :scanner]) .where(report_type: 0) @@ -99,25 +88,6 @@ def perform ) .group_by(&:project_id) - # project_ids = Migratable::Vulnerabilities::Finding - # .joins(:primary_identifier, :scanner) - # .where(vulnerability_identifiers: { external_type: ['cwe', 'owasp'] }) - # .where( - # vulnerability_scanners: { external_id: 'semgrep' }, - # report_type: 0 - # ) - # .where('vulnerability_scanners.project_id = vulnerability_occurrences.project_id') - # .distinct - # .pluck(:project_id) - - # project_ids.each do |project_id| - # reset_vulnerability_states(project_id) - # end - - # each_sub_batch do |sub_batch| - # # Your action on each sub_batch - # end - vulnerabilities_by_project.each_with_index do |(project_id, vulnerabilities), idx| reset_vulnerability_states(project_id, vulnerabilities) end @@ -129,114 +99,82 @@ def reset_vulnerability_states(project_id, vulnerabilities) num_vulnerabilities_found = vulnerabilities.length puts "Found #{num_vulnerabilities_found} vulnerabilities with incorrect primary identifier" - # Build lookup keys for all original findings - original_findings_data = vulnerabilities.map do |vuln| - finding = vuln.finding - { - vulnerability: vuln, - finding: finding, - lookup_key: [ - finding.severity, - finding.report_type, - finding.location_fingerprint&.unpack('H*')&.first, - finding.name, - finding.metadata_version - ] - } - end + # Process each vulnerability with pre-fetched data + vulnerabilities.each_with_index do |vulnerability_with_incorrect_primary_id, idx| + puts "Processing vulnerability #{idx} of #{num_vulnerabilities_found} [#{((idx.to_f/num_vulnerabilities_found)*100).round(2)}%]" - # Batch query for all matching findings - puts "Batch querying for matching findings..." - - conditions = original_findings_data.map do |data| - finding = data[:finding] - { - severity: finding.severity, - report_type: finding.report_type, - location_fingerprint: finding.location_fingerprint, - name: finding.name, - metadata_version: finding.metadata_version - } - end + latest_transition = vulnerability_with_incorrect_primary_id.state_transitions + .select { |t| t.author_id.present? } + .max_by(&:created_at) - # Get all original finding IDs to exclude - original_finding_ids = original_findings_data.map { |d| d[:finding].id } + current_vulnerability_finding = get_matching_finding(vulnerability_with_incorrect_primary_id.finding) - if original_finding_ids.empty? - puts "Unable to find vulnerabilities for project #{project_id}" - return + reset_vulnerability_state(project_id, vulnerability_with_incorrect_primary_id, current_vulnerability_finding, latest_transition) end + end - # Build a complex OR query - query = nil - conditions.each do |cond| - subquery = Vulnerabilities::Finding.where(cond) - query = query ? query.or(subquery) : subquery - end + def get_matching_finding(current_vulnerability_finding) + Vulnerabilities::Finding.where( + severity: current_vulnerability_finding.severity, + report_type: current_vulnerability_finding.report_type, + location_fingerprint: current_vulnerability_finding.location_fingerprint, + name: current_vulnerability_finding.name, + metadata_version: current_vulnerability_finding.metadata_version + ) + .where.not(id: current_vulnerability_finding.id) + .where(project_id: current_vulnerability_finding.project_id) + .first + end - matching_findings = query - .where.not(id: original_finding_ids) - .where(project_id: project_id) # Add project filter if applicable - .includes(:vulnerability) - .to_a - - # Build lookup hash for matched findings - matched_findings_hash = matching_findings.group_by do |finding| - [ - finding.severity, - finding.report_type, - finding.location_fingerprint&.unpack('H*')&.first, - finding.name, - finding.metadata_version - ] - end + def sort_metadata(raw_metadata) + metadata = JSON.parse(raw_metadata) - # Process each vulnerability with pre-fetched data - original_findings_data.each_with_index do |data, idx| - puts "Processing vulnerability #{idx} of #{num_vulnerabilities_found} [#{((idx.to_f/num_vulnerabilities_found)*100).round(2)}%]" + # Find the semgrep_id identifier and separate it from the rest + identifiers = metadata["identifiers"] + semgrep_identifier = identifiers.find { |id| id["type"] == "semgrep_id" } + other_identifiers = identifiers.reject { |id| id["type"] == "semgrep_id" } - vulnerability = data[:vulnerability] - finding = data[:finding] + # Sort the other identifiers by their "value" field + other_identifiers.sort! { |a, b| a["value"] <=> b["value"] } - latest_transition = vulnerability.state_transitions - .select { |t| t.author_id.present? } - .max_by(&:created_at) + # Reconstruct the identifiers array with semgrep_id first, then sorted others + metadata["identifiers"] = [semgrep_identifier] + other_identifiers + end - # Look up matching finding from hash - matched_findings = matched_findings_hash[data[:lookup_key]] - matching_finding = matched_findings&.first + def reset_vulnerability_state(project_id, vulnerability_with_incorrect_primary_id, current_vulnerability_finding, transition) + current_vulnerability = current_vulnerability_finding&.vulnerability + finding_with_incorrect_primary_id = vulnerability_with_incorrect_primary_id.finding - unless latest_transition - if matching_finding - puts "Unable to find vulnerability_state_transition for `semgrep 6.7.0` vulnerability ID #{vulnerability.id} (corresponding `semgrep >= 6.7.1` ID: #{matching_finding.vulnerability.id})" - else - puts "Unable to find vulnerability_state_transition for `semgrep 6.7.0` vulnerability ID #{vulnerability.id} (no corresponding `semgrep >= 6.7.1` ID)" - end - next - end + sorted_metadata = sort_metadata(finding_with_incorrect_primary_id.raw_metadata) + semgrep_identifier = sorted_metadata[0] - unless matching_finding - puts "Unable to find match for `semgrep 6.7.0` vulnerability ID #{vulnerability.id}" - next - end + correct_primary_identifier = Vulnerabilities::Identifier.find_by( + project_id: project_id, fingerprint: Digest::SHA1.hexdigest("#{semgrep_identifier['type']}:#{semgrep_identifier['value']}") + ) - puts "`semgrep 6.7.0` ID #{vulnerability.id} matches `semgrep >= 6.7.1` vulnerability ID #{matching_finding.vulnerability.id}" + puts "Updating incorrect primary identifier #{vulnerability_with_incorrect_primary_id.finding.primary_identifier_id} to correct primary identifier #{correct_primary_identifier.id}" + finding_with_incorrect_primary_id.update(primary_identifier_id: correct_primary_identifier.id) - apply_state_transition(project_id, vulnerability, finding, matching_finding.vulnerability, matching_finding, latest_transition) + attributes = {} + attributes[:state] = transition.to_state if transition + + if current_vulnerability + attributes[:resolved_on_default_branch] = current_vulnerability.resolved_on_default_branch + attributes[:present_on_default_branch] = current_vulnerability.present_on_default_branch end - end - def apply_state_transition(project_id, old_vulnerability, old_finding, current_vulnerability, current_finding, transition) - puts "Updating incorrect primary identifier #{old_finding.primary_identifier_id} to correct primary identifier #{current_finding.primary_identifier_id}" - puts "Deleting vulerability #{current_vulnerability.id}" - old_finding.update(primary_identifier_id: current_finding.primary_identifier_id) - old_vulnerability.update( - state: transition.to_state, - resolved_on_default_branch: current_vulnerability.resolved_on_default_branch, - present_on_default_branch: current_vulnerability.present_on_default_branch - ) - current_vulnerability.destroy - old_vulnerability.state_transitions.where(author_id: nil).destroy_all + finding_with_incorrect_primary_id.update(raw_metadata: sorted_metadata.to_json) + + vulnerability_with_incorrect_primary_id.update(attributes) + + if current_vulnerability_finding + puts "Deleting vulerability finding (occurrences) #{current_vulnerability_finding.id}" + current_vulnerability_finding.destroy + puts "Deleting vulerability #{current_vulnerability.id}" + current_vulnerability.destroy + end + + vulnerability_with_incorrect_primary_id.state_transitions.where(author_id: nil).destroy_all end end end diff --git a/spec/lib/gitlab/background_migration/restore_incorrect_vulnerability_states_spec.rb b/spec/lib/gitlab/background_migration/restore_incorrect_vulnerability_states_spec.rb index e33f5d89e4befb..e7b214cc10b894 100644 --- a/spec/lib/gitlab/background_migration/restore_incorrect_vulnerability_states_spec.rb +++ b/spec/lib/gitlab/background_migration/restore_incorrect_vulnerability_states_spec.rb @@ -3,28 +3,22 @@ require 'spec_helper' RSpec.describe Gitlab::BackgroundMigration::RestoreIncorrectVulnerabilityStates, feature_category: :static_application_security_testing do - let(:namespaces) { table(:namespaces) } - let(:projects) { table(:projects) } let(:vulnerabilities) { table(:vulnerabilities) } - let(:project) { projects.create!(namespace_id: namespace.id, project_namespace_id: namespace.id) } let(:known_keys) { Set.new } - - # let!(:sast_semgrep_artifact) { create(:ee_ci_job_artifact, :sast_semgrep_multiple_vulnerabilities) } - # let!(:sast_semgrep_artifact_with_incorrect_primary_identifiers) { create(:ee_ci_job_artifact, :sast_semgrep_multiple_vulnerabilities_incorrect_primary_identifier) } - let(:project) { create(:project) } let(:user) { create(:user) } - let(:pipeline1) { create(:ci_pipeline, user: user, project: project) } - let(:sast_build) { create(:ee_ci_build, :success, pipeline: pipeline1, project: project) } - let!(:sast_semgrep_artifact_with_correct_primary_identifiers1) { create(:ee_ci_job_artifact, :sast_semgrep_multiple_vulnerabilities, job: sast_build) } - let(:pipeline2) { create(:ci_pipeline, user: user, project: project) } - let(:sast_build2) { create(:ee_ci_build, :success, pipeline: pipeline2) } - let!(:sast_semgrep_artifact_with_incorrect_primary_identifiers) { create(:ee_ci_job_artifact, :sast_semgrep_multiple_vulnerabilities_incorrect_primary_identifier, job: sast_build2) } + let(:initial_pipeline) { create(:ci_pipeline, user: user, project: project) } + let(:initial_sast_build) { create(:ee_ci_build, :success, pipeline: initial_pipeline, project: project) } + let!(:initial_sast_artifact) { create(:ee_ci_job_artifact, :sast_semgrep_multiple_vulnerabilities, job: initial_sast_build) } - let(:pipeline3) { create(:ci_pipeline, user: user, project: project) } - let(:sast_build3) { create(:ee_ci_build, :success, pipeline: pipeline3) } - let!(:sast_semgrep_artifact_with_correct_primary_identifiers2) { create(:ee_ci_job_artifact, :sast_semgrep_multiple_vulnerabilities, job: sast_build3) } + let(:corrupted_pipeline) { create(:ci_pipeline, user: user, project: project) } + let(:corrupted_sast_build) { create(:ee_ci_build, :success, pipeline: corrupted_pipeline) } + let!(:corrupted_sast_artifact) { create(:ee_ci_job_artifact, :sast_semgrep_multiple_vulnerabilities_incorrect_primary_identifier, job: corrupted_sast_build) } + + let(:restored_pipeline) { create(:ci_pipeline, user: user, project: project) } + let(:restored_sast_build) { create(:ee_ci_build, :success, pipeline: restored_pipeline) } + let!(:restored_sast_artifact) { create(:ee_ci_job_artifact, :sast_semgrep_multiple_vulnerabilities, job: restored_sast_build) } let(:known_keys) { Set.new } @@ -49,59 +43,77 @@ def perform_migration end describe "#perform", feature_category: :static_application_security_testing do - it 'restores vulnerabilities' do - Security::StoreScansService.execute(pipeline1) - # Security::StoreScanService.execute(sast_semgrep_artifact, known_keys, false) - Security::Ingestion::IngestReportsService.execute(pipeline1) + context 'when ingesting reports in sequence: correct identifiers → incorrect identifiers' do + before do + Security::StoreScansService.execute(initial_pipeline) + Security::Ingestion::IngestReportsService.execute(initial_pipeline) + + Vulnerability.where(project_id: project.id).each { |v| puts "Resolving vuln #{v.id}"; ::Vulnerabilities::ResolveService.new(user, v, "first resolution").execute } + Vulnerability.where(project_id: project.id, severity: 'high').each { |v| puts "Confirming vuln #{v.id}"; Vulnerabilities::ConfirmService.new(user, v, "confirming").execute } + Vulnerability.where(project_id: project.id, severity: 'low').each { |v| puts "Dismissing vuln #{v.id}"; Vulnerabilities::DismissService.new(user, v, "dismissing", 'acceptable_risk').execute } + Vulnerability.where(project_id: project.id, severity: 'medium')[0..4].each {|v| puts "Resolving again vuln #{v.id}"; ::Vulnerabilities::ResolveService.new(user, v, "last resolution").execute } - puts "XXXXXXXXXXXXXXXX", (%|VULNERABILITY.COUNT: #{Vulnerability.count.inspect}|), "XXXXXXXXXXXXXXXX" + Security::StoreScansService.execute(corrupted_pipeline) + Security::Ingestion::IngestReportsService.execute(corrupted_pipeline) + end - Vulnerability.where(project_id: project.id).each { |v| puts "Resolving vuln #{v.id}"; ::Vulnerabilities::ResolveService.new(user, v, "first resolution").execute } - Vulnerability.where(project_id: project.id, severity: 'high').each { |v| puts "Confirming vuln #{v.id}"; Vulnerabilities::ConfirmService.new(user, v, "confirming").execute } - Vulnerability.where(project_id: project.id, severity: 'low').each { |v| puts "Dismissing vuln #{v.id}"; Vulnerabilities::DismissService.new(user, v, "dismissing", 'acceptable_risk').execute } - Vulnerability.where(project_id: project.id, severity: 'medium')[0..4].each {|v| puts "Resolving again vuln #{v.id}"; ::Vulnerabilities::ResolveService.new(user, v, "last resolution").execute } + it 'restores vulnerability states' do + perform_migration - # binding.pry + types = Vulnerability.all.map { |v| v.finding.primary_identifier.external_type } + expect(types).to eq(["semgrep_id"] * 16) - Security::StoreScansService.execute(pipeline2) - Security::Ingestion::IngestReportsService.execute(pipeline2) + expect(Vulnerability.where(project_id: project.id, severity: 'high', resolved_on_default_branch: false).map { |v| v.finding.state }) + .to eq(["confirmed"] * 4) + + expect(Vulnerability.where(project_id: project.id, severity: 'low', resolved_on_default_branch: false).map { |v| v.finding.state }) + .to eq(["dismissed"] * 2) + + expect(Vulnerability.where(project_id: project.id, severity: 'medium', resolved_on_default_branch: false).map { |v| v.finding.state }) + .to eq(["resolved"] * 10) + end + + it 'does not delete any vulnerabilities' do + expect { perform_migration }.not_to change { Vulnerability.count } + end + end - puts "XXXXXXXXXXXXXXXX", (%|VULNERABILITY.COUNT: #{Vulnerability.count.inspect}|), "XXXXXXXXXXXXXXXX" + context 'when ingesting reports in sequence: correct identifiers → incorrect identifiers → correct identifiers' do + before do + Security::StoreScansService.execute(initial_pipeline) + Security::Ingestion::IngestReportsService.execute(initial_pipeline) - Security::StoreScansService.execute(pipeline3) - Security::Ingestion::IngestReportsService.execute(pipeline3) + Vulnerability.where(project_id: project.id).each { |v| puts "Resolving vuln #{v.id}"; ::Vulnerabilities::ResolveService.new(user, v, "first resolution").execute } + Vulnerability.where(project_id: project.id, severity: 'high').each { |v| puts "Confirming vuln #{v.id}"; Vulnerabilities::ConfirmService.new(user, v, "confirming").execute } + Vulnerability.where(project_id: project.id, severity: 'low').each { |v| puts "Dismissing vuln #{v.id}"; Vulnerabilities::DismissService.new(user, v, "dismissing", 'acceptable_risk').execute } + Vulnerability.where(project_id: project.id, severity: 'medium')[0..4].each {|v| puts "Resolving again vuln #{v.id}"; ::Vulnerabilities::ResolveService.new(user, v, "last resolution").execute } - puts "XXXXXXXXXXXXXXXX", (%|VULNERABILITY.COUNT: #{Vulnerability.count.inspect}|), "XXXXXXXXXXXXXXXX" + Security::StoreScansService.execute(corrupted_pipeline) + Security::Ingestion::IngestReportsService.execute(corrupted_pipeline) - perform_migration + Security::StoreScansService.execute(restored_pipeline) + Security::Ingestion::IngestReportsService.execute(restored_pipeline) + end - expect(Vulnerability.count).to eq(16) - types = Vulnerability.all.map { |v| v.finding.primary_identifier.external_type } - expect(types).to eq(["semgrep_id"] * 16) + it 'restores vulnerability states' do + perform_migration - # Vulnerability.all.each do |v| - # puts "XXXXXXXXXXXXXXXX", (%|V.ID: #{v.id.inspect}|), "XXXXXXXXXXXXXXXX" - # puts "XXXXXXXXXXXXXXXX", (%|V.ID: #{v.resolved_on_default_branch.inspect}|), "XXXXXXXXXXXXXXXX" - # puts "XXXXXXXXXXXXXXXX", (%|V.FINDING.STATE: #{v.finding.state.inspect}|), "XXXXXXXXXXXXXXXX" - # puts "XXXXXXXXXXXXXXXX", (%|V.STATE_TRANSITIONS: #{v.state_transitions.inspect}|), "XXXXXXXXXXXXXXXX" - # end + types = Vulnerability.all.map { |v| v.finding.primary_identifier.external_type } + expect(types).to eq(["semgrep_id"] * 16) - expect(Vulnerability.where(project_id: project.id, severity: 'high', resolved_on_default_branch: false).map { |v| v.finding.state }) - .to eq(["confirmed"] * 4) + expect(Vulnerability.where(project_id: project.id, severity: 'high', resolved_on_default_branch: false).map { |v| v.finding.state }) + .to eq(["confirmed"] * 4) - expect(Vulnerability.where(project_id: project.id, severity: 'low', resolved_on_default_branch: false).map { |v| v.finding.state }) - .to eq(["dismissed"] * 2) + expect(Vulnerability.where(project_id: project.id, severity: 'low', resolved_on_default_branch: false).map { |v| v.finding.state }) + .to eq(["dismissed"] * 2) - expect(Vulnerability.where(project_id: project.id, severity: 'medium', resolved_on_default_branch: false).map { |v| v.finding.state }) - .to eq(["resolved"] * 10) + expect(Vulnerability.where(project_id: project.id, severity: 'medium', resolved_on_default_branch: false).map { |v| v.finding.state }) + .to eq(["resolved"] * 10) + end - # puts "XXXXXXXXXXXXXXXX", (%|VULNERABILITY.COUNT: #{Vulnerability.count.inspect}|), "XXXXXXXXXXXXXXXX" - # puts "XXXXXXXXXXXXXXXX", (%|VULNERABILITIES::FINDING.COUNT: #{Vulnerabilities::Finding.count.inspect}|), "XXXXXXXXXXXXXXXX" - # puts "XXXXXXXXXXXXXXXX", (%|SECURITY::FINDING.COUNT: #{Security::Finding.count.inspect}|), "XXXXXXXXXXXXXXXX" - # puts "XXXXXXXXXXXXXXXX", (%|VULNERABILITIES::IDENTIFIER.COUNT: #{Vulnerabilities::Identifier.count.inspect}|), "XXXXXXXXXXXXXXXX" - # puts "XXXXXXXXXXXXXXXX", (%|VULNERABILITIES::SCANNER.COUNT: #{Vulnerabilities::Scanner.count.inspect}|), "XXXXXXXXXXXXXXXX" - # puts "XXXXXXXXXXXXXXXX", (%|SECURITY::FINDING.COUNT: #{Security::Finding.count.inspect}|), "XXXXXXXXXXXXXXXX" - # puts "XXXXXXXXXXXXXXXX", (%|VULNERABILITIES::FINDINGIDENTIFIER.COUNT: #{Vulnerabilities::FindingIdentifier.count.inspect}|), "XXXXXXXXXXXXXXXX" + it 'deletes duplicate vulnerabilities' do + expect { perform_migration }.to change { Vulnerability.count }.from(32).to(16) + end end end end -- GitLab From 8bb8864d8c3dd550fe5bf1bef0721a01e5a1a5d9 Mon Sep 17 00:00:00 2001 From: Adam Cohen Date: Wed, 5 Nov 2025 18:24:14 -0500 Subject: [PATCH 09/16] Ensure only Migratable classes are used --- .../restore_incorrect_vulnerability_states.rb | 29 ++++++++++++++++--- restore-vulnerability-states-for-project.rb | 11 +++++-- 2 files changed, 33 insertions(+), 7 deletions(-) diff --git a/lib/gitlab/background_migration/restore_incorrect_vulnerability_states.rb b/lib/gitlab/background_migration/restore_incorrect_vulnerability_states.rb index fb36378fef0ef1..6a8aa975e53b2d 100644 --- a/lib/gitlab/background_migration/restore_incorrect_vulnerability_states.rb +++ b/lib/gitlab/background_migration/restore_incorrect_vulnerability_states.rb @@ -32,6 +32,21 @@ module Vulnerability def self.vulnerability_states VULNERABILITY_STATES end + + SEVERITY_LEVELS = { + # undefined: 0, no longer applicable + info: 1, + unknown: 2, + # experimental: 3, formerly used by confidence, no longer applicable + low: 4, + medium: 5, + high: 6, + critical: 7 + }.with_indifferent_access.freeze + + def self.severity_levels + SEVERITY_LEVELS + end end end @@ -49,6 +64,9 @@ class StateTransition < SecApplicationRecord class Finding < SecApplicationRecord self.table_name = 'vulnerability_occurrences' + enum :report_type, ::Enums::Vulnerability.report_types + enum :severity, ::Enums::Vulnerability.severity_levels, prefix: :severity + belongs_to :primary_identifier, class_name: 'Vulnerabilities::Identifier', foreign_key: 'primary_identifier_id' belongs_to :scanner, class_name: 'Vulnerabilities::Scanner', foreign_key: 'scanner_id' belongs_to :vulnerability, class_name: 'Vulnerability', inverse_of: :findings, foreign_key: 'vulnerability_id' @@ -114,10 +132,12 @@ def reset_vulnerability_states(project_id, vulnerabilities) end def get_matching_finding(current_vulnerability_finding) - Vulnerabilities::Finding.where( + binary_location_fingerprint = [current_vulnerability_finding.location_fingerprint].pack('H*') + + Migratable::Vulnerabilities::Finding.where( severity: current_vulnerability_finding.severity, report_type: current_vulnerability_finding.report_type, - location_fingerprint: current_vulnerability_finding.location_fingerprint, + location_fingerprint: binary_location_fingerprint, name: current_vulnerability_finding.name, metadata_version: current_vulnerability_finding.metadata_version ) @@ -148,8 +168,9 @@ def reset_vulnerability_state(project_id, vulnerability_with_incorrect_primary_i sorted_metadata = sort_metadata(finding_with_incorrect_primary_id.raw_metadata) semgrep_identifier = sorted_metadata[0] - correct_primary_identifier = Vulnerabilities::Identifier.find_by( - project_id: project_id, fingerprint: Digest::SHA1.hexdigest("#{semgrep_identifier['type']}:#{semgrep_identifier['value']}") + binary_fingerprint = [Digest::SHA1.hexdigest("#{semgrep_identifier['type']}:#{semgrep_identifier['value']}")].pack('H*') + correct_primary_identifier = Migratable::Vulnerabilities::Identifier.find_by( + project_id: project_id, fingerprint: binary_fingerprint ) puts "Updating incorrect primary identifier #{vulnerability_with_incorrect_primary_id.finding.primary_identifier_id} to correct primary identifier #{correct_primary_identifier.id}" diff --git a/restore-vulnerability-states-for-project.rb b/restore-vulnerability-states-for-project.rb index c4cdd8e517d598..3f635c56e90bb1 100755 --- a/restore-vulnerability-states-for-project.rb +++ b/restore-vulnerability-states-for-project.rb @@ -1,4 +1,9 @@ #!/usr/bin/env ruby + +# frozen_string_literal: true + +# rubocop:disable all + require "bundler/inline" gemfile(true) do source "https://rubygems.org" @@ -128,12 +133,10 @@ def execute end end - if project_id reset_vulnerability_states(project_id) else - # affected_project_ids = File.read('clario-clinical-affected-project-ids.txt').split("\n") - affected_project_ids = File.read('ds365ai-affected-project-ids.txt').split("\n") + affected_project_ids = File.read(ARGV[0]).split("\n") puts "Found #{affected_project_ids.length} affected project ids" @@ -330,3 +333,5 @@ def print_transition_summary # reset_vulnerability_states(57498926) # reset_vulnerability_states(60454917) # reset_vulnerability_states(75802210) # my personal project + +# rubocop:enable all -- GitLab From 45e187f64eef659b7f625c01991231c76d3be96b Mon Sep 17 00:00:00 2001 From: Adam Cohen Date: Wed, 5 Nov 2025 18:57:46 -0500 Subject: [PATCH 10/16] Get specs and code working --- ...restore_incorrect_vulnerability_states.yml | 6 +- ..._restore_incorrect_vulnerability_states.rb | 16 +- db/schema_migrations/20251020182838 | 1 + .../ingestion/ingest_reports_service.rb | 1 - ee/spec/factories/ci/job_artifacts.rb | 28 +- ...mgrep-6.6.2-multiple-vulnerabilities.json} | 281 ++-- ...bilities-incorrect-primary-identifier.json | 677 ++++++++ ...rabilities-correct-primary-identifier.json | 137 ++ ...abilities-correct-primary-identifier.json} | 141 +- .../security/ingestion/custom_spec.rb | 123 -- .../restore_incorrect_vulnerability_states.rb | 844 ++++++++-- restore-vulnerability-states-for-project.rb | 337 ---- ...orrect_vulnerability_states_manual_spec.rb | 1411 +++++++++++++++++ ...ore_incorrect_vulnerability_states_spec.rb | 119 -- ...ore_incorrect_vulnerability_states_spec.rb | 38 +- 15 files changed, 3142 insertions(+), 1018 deletions(-) create mode 100644 db/schema_migrations/20251020182838 rename ee/spec/fixtures/security_reports/master/{gl-sast-report-semgrep-multiple-vulnerabilities-incorrect-primary-identifier.json => gl-sast-report-semgrep-6.6.2-multiple-vulnerabilities.json} (76%) create mode 100644 ee/spec/fixtures/security_reports/master/gl-sast-report-semgrep-6.7.0-multiple-vulnerabilities-incorrect-primary-identifier.json create mode 100644 ee/spec/fixtures/security_reports/master/gl-sast-report-semgrep-6.7.1-additional-vulnerabilities-correct-primary-identifier.json rename ee/spec/fixtures/security_reports/master/{gl-sast-report-semgrep-multiple-vulnerabilities.json => gl-sast-report-semgrep-6.7.1-multiple-vulnerabilities-correct-primary-identifier.json} (75%) delete mode 100644 ee/spec/services/security/ingestion/custom_spec.rb delete mode 100755 restore-vulnerability-states-for-project.rb create mode 100644 spec/lib/gitlab/background_migration/restore_incorrect_vulnerability_states_manual_spec.rb delete mode 100644 spec/lib/gitlab/background_migration/restore_incorrect_vulnerability_states_spec.rb diff --git a/db/docs/batched_background_migrations/restore_incorrect_vulnerability_states.yml b/db/docs/batched_background_migrations/restore_incorrect_vulnerability_states.yml index 0d3cdba0f47b27..e52246df146ff4 100644 --- a/db/docs/batched_background_migrations/restore_incorrect_vulnerability_states.yml +++ b/db/docs/batched_background_migrations/restore_incorrect_vulnerability_states.yml @@ -1,8 +1,8 @@ --- migration_job_name: RestoreIncorrectVulnerabilityStates -description: Restores incorrect vulnerability states as a result of a bug in GitLab Semgrep v6.7.0 +description: Restores incorrect vulnerability states caused by a bug in GitLab Semgrep v6.7.0 feature_category: static_application_security_testing -introduced_by_url: # URL of the MR (or issue/commit) that introduced the migration -milestone: '18.6' +introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/211669 +milestone: '18.7' queued_migration_version: 20251020182838 finalized_by: # version of the migration that finalized this BBM diff --git a/db/post_migrate/20251020182838_queue_restore_incorrect_vulnerability_states.rb b/db/post_migrate/20251020182838_queue_restore_incorrect_vulnerability_states.rb index 39e3ff480c378e..002f76b814782f 100644 --- a/db/post_migrate/20251020182838_queue_restore_incorrect_vulnerability_states.rb +++ b/db/post_migrate/20251020182838_queue_restore_incorrect_vulnerability_states.rb @@ -6,7 +6,7 @@ # Update below commented lines with appropriate values. class QueueRestoreIncorrectVulnerabilityStates < Gitlab::Database::Migration[2.3] - milestone '18.6' + milestone '18.7' # Select the applicable gitlab schema for your batched background migration restrict_gitlab_migration gitlab_schema: :gitlab_sec @@ -14,24 +14,26 @@ class QueueRestoreIncorrectVulnerabilityStates < Gitlab::Database::Migration[2.3 # restrict_gitlab_migration # gitlab_schema: :gitlab_main_org / :gitlab_ci / ... MIGRATION = "RestoreIncorrectVulnerabilityStates" - # BATCH_SIZE = 1000 - # SUB_BATCH_SIZE = 100 + DELAY_INTERVAL = 2.minutes + BATCH_SIZE = 1000 + SUB_BATCH_SIZE = 100 def up # If you are requeueing an already executed migration, you need to delete the prior batched migration record # for the new enqueue to be executed, else, you can delete this line. - # delete_batched_background_migration(MIGRATION, :vulnerability_occurrences, :id, []) + # delete_batched_background_migration(MIGRATION, :vulnerability_reads, :vulnerability_id, []) + # Use vulnerability_reads (denormalized table) to avoid scanning the 200M row vulnerability_occurrences table queue_batched_background_migration( MIGRATION, - :vulnerability_occurrences, - :id, + :vulnerability_reads, + :vulnerability_id, batch_size: BATCH_SIZE, sub_batch_size: SUB_BATCH_SIZE ) end def down - delete_batched_background_migration(MIGRATION, :vulnerability_occurrences, :id, []) + delete_batched_background_migration(MIGRATION, :vulnerability_reads, :vulnerability_id, []) end end diff --git a/db/schema_migrations/20251020182838 b/db/schema_migrations/20251020182838 new file mode 100644 index 00000000000000..6129d2c00d6cfe --- /dev/null +++ b/db/schema_migrations/20251020182838 @@ -0,0 +1 @@ +f3f0d068a7817c20f208f77d6851259f8551cea91532a12bbc623c53277e5df2 \ No newline at end of file diff --git a/ee/app/services/security/ingestion/ingest_reports_service.rb b/ee/app/services/security/ingestion/ingest_reports_service.rb index 056f8846d40a77..be280277c14329 100644 --- a/ee/app/services/security/ingestion/ingest_reports_service.rb +++ b/ee/app/services/security/ingestion/ingest_reports_service.rb @@ -16,7 +16,6 @@ def initialize(pipeline) end def execute - # binding.pry store_reports mark_resolved_vulnerabilities auto_dismiss_vulnerabilities diff --git a/ee/spec/factories/ci/job_artifacts.rb b/ee/spec/factories/ci/job_artifacts.rb index ff134369b4d8bb..b776dad2a1dff7 100644 --- a/ee/spec/factories/ci/job_artifacts.rb +++ b/ee/spec/factories/ci/job_artifacts.rb @@ -127,7 +127,7 @@ after(:build) do |artifact, _| artifact.file = fixture_file_upload( - Rails.root.join('ee/spec/fixtures/security_reports/master/gl-sast-report-semgrep-multiple-vulnerabilities.json'), + Rails.root.join('ee/spec/fixtures/security_reports/master/gl-sast-report-semgrep-6.6.2-multiple-vulnerabilities.json'), 'application/json' ) end @@ -139,7 +139,31 @@ after(:build) do |artifact, _| artifact.file = fixture_file_upload( - Rails.root.join('ee/spec/fixtures/security_reports/master/gl-sast-report-semgrep-multiple-vulnerabilities-incorrect-primary-identifier.json'), + Rails.root.join('ee/spec/fixtures/security_reports/master/gl-sast-report-semgrep-6.7.0-multiple-vulnerabilities-incorrect-primary-identifier.json'), + 'application/json' + ) + end + end + + trait :sast_semgrep_multiple_vulnerabilities_correct_primary_identifier do + file_type { :sast } + file_format { :raw } + + after(:build) do |artifact, _| + artifact.file = fixture_file_upload( + Rails.root.join('ee/spec/fixtures/security_reports/master/gl-sast-report-semgrep-6.7.1-multiple-vulnerabilities-correct-primary-identifier.json'), + 'application/json' + ) + end + end + + trait :sast_semgrep_additional_vulnerabilities_correct_primary_identifier do + file_type { :sast } + file_format { :raw } + + after(:build) do |artifact, _| + artifact.file = fixture_file_upload( + Rails.root.join('ee/spec/fixtures/security_reports/master/gl-sast-report-semgrep-6.7.1-additional-vulnerabilities-correct-primary-identifier.json'), 'application/json' ) end diff --git a/ee/spec/fixtures/security_reports/master/gl-sast-report-semgrep-multiple-vulnerabilities-incorrect-primary-identifier.json b/ee/spec/fixtures/security_reports/master/gl-sast-report-semgrep-6.6.2-multiple-vulnerabilities.json similarity index 76% rename from ee/spec/fixtures/security_reports/master/gl-sast-report-semgrep-multiple-vulnerabilities-incorrect-primary-identifier.json rename to ee/spec/fixtures/security_reports/master/gl-sast-report-semgrep-6.6.2-multiple-vulnerabilities.json index 1c337c35eef83a..e036030261518f 100644 --- a/ee/spec/fixtures/security_reports/master/gl-sast-report-semgrep-multiple-vulnerabilities-incorrect-primary-identifier.json +++ b/ee/spec/fixtures/security_reports/master/gl-sast-report-semgrep-6.6.2-multiple-vulnerabilities.json @@ -4,7 +4,7 @@ { "id": "6a8d97c532a32e7bb9e1d93b3300977b8a7e75f9ddcc5bac4edaa6cda3603833", "category": "sast", - "name": "Deserialization of untrusted data", + "name": "Deserialization of untrusted data. Vulnerability to be confirmed.", "description": "The application was found using an unsafe version of `yaml` load which is vulnerable to\ndeserialization attacks. Deserialization attacks exploit the process of reading serialized\ndata and turning it back\ninto an object. By constructing malicious objects and serializing them, an adversary may\nattempt to:\n\n- Inject code that is executed upon object construction, which occurs during the\ndeserialization process.\n- Exploit mass assignment by including fields that are not normally a part of the serialized\ndata but are read in during deserialization.\n\nTo remediate this issue, use `safe_load()` or call `yaml.load()` with the `Loader` argument\nset to\n`yaml.SafeLoader`.\n\nExample loading YAML using `safe_load`:\n```\nimport yaml\n\n# Use safe_load to load data into an intermediary object\nintermediary_object = yaml.safe_load(\"\"\"user:\n name: 'test user'\"\"\"\n)\n# Create our real object, copying over only the necessary fields\nuser_object = {'user': {\n # Assign the deserialized data from intermediary object\n 'name': intermediary_object['user']['name'],\n # Add in protected data in object definition (or set it from a class constructor)\n 'is_admin': False,\n }\n}\n# Work with user_object\n# ...\n```\n\nFor more details on deserialization attacks in general, see OWASP's guide:\n- https://cheatsheetseries.owasp.org/cheatsheets/Deserialization_Cheat_Sheet.html\n", "cve": "semgrep_id:bandit.B506:329:329", "severity": "High", @@ -17,6 +17,12 @@ "start_line": 329 }, "identifiers": [ + { + "type": "semgrep_id", + "name": "bandit.B506", + "value": "bandit.B506", + "url": "https://semgrep.dev/r/gitlab.bandit.B506" + }, { "type": "cwe", "name": "CWE-502", @@ -37,20 +43,14 @@ "type": "bandit_test_id", "name": "Bandit Test ID B506", "value": "B506" - }, - { - "type": "semgrep_id", - "name": "bandit.B506", - "value": "bandit.B506", - "url": "https://semgrep.dev/r/gitlab.bandit.B506" } ] }, { "id": "185f6aa5aece728c2b94f16ff36ea99339dbeb39a027964d65a0e544b439529d", "category": "sast", - "name": "Improper neutralization of special elements used in an SQL Command ('SQL Injection')", - "description": "SQL Injection is a critical vulnerability that can lead to data or system compromise. By\ndynamically generating SQL query strings, user input may be able to influence the logic of\nthe SQL statement. This could lead to an adversary accessing information they should\nnot have access to, or in some circumstances, being able to execute OS functionality or code.\n\nReplace all dynamically generated SQL queries with parameterized queries. In situations where\ndynamic queries must be created, never use direct user input, but instead use a map or\ndictionary of valid values and resolve them using a user supplied key.\n\nFor example, some database drivers do not allow parameterized queries for `\u003e` or `\u003c` comparison\noperators. In these cases, do not use a user supplied `\u003e` or `\u003c` value, but rather have the\nuser\nsupply a `gt` or `lt` value. The alphabetical values are then used to look up the `\u003e` and `\u003c`\nvalues to be used in the construction of the dynamic query. The same goes for other queries\nwhere\ncolumn or table names are required but cannot be parameterized.\n\nExample using `PreparedStatement` queries:\n```\nimport sqlite3\n\n# Create a new database (in memory)\ncon = sqlite3.connect(\":memory:\")\n# Get a cursor from the connection\ncur = con.cursor()\n# Create a tuple of the value to be used in the parameterized query\nparams = ('user-input',)\n# execute the statement, passing in the params for the value\ncur.execute(\"select name from sqlite_master where name = ?\", params)\n# work with the result\nresult = cur.fetchall()\n```\n\nFor more information on SQL Injection see OWASP:\nhttps://cheatsheetseries.owasp.org/cheatsheets/SQL_Injection_Prevention_Cheat_Sheet.html\n", + "name": "Improper neutralization of special elements used in an SQL Command ('SQL Injection'). Vulnerability to be confirmed.", + "description": "SQL Injection is a critical vulnerability that can lead to data or system compromise.", "cve": "semgrep_id:bandit.B608:265:265", "severity": "High", "scanner": { @@ -62,6 +62,12 @@ "start_line": 265 }, "identifiers": [ + { + "type": "semgrep_id", + "name": "bandit.B608", + "value": "bandit.B608", + "url": "https://semgrep.dev/r/gitlab.bandit.B608" + }, { "type": "cwe", "name": "CWE-89", @@ -82,19 +88,13 @@ "type": "bandit_test_id", "name": "Bandit Test ID B608", "value": "B608" - }, - { - "type": "semgrep_id", - "name": "bandit.B608", - "value": "bandit.B608", - "url": "https://semgrep.dev/r/gitlab.bandit.B608" } ] }, { "id": "afb3a18f344a72ed01c842afd1939b4c33b150ba50234001d8eb34ce72a977f4", "category": "sast", - "name": "Improper neutralization of directives in dynamically evaluated code ('Eval Injection')", + "name": "Improper neutralization of directives in dynamically evaluated code ('Eval Injection'). Vulnerability to be confirmed.", "description": "The application was found calling the `eval` function OR Function()\n constructor OR setTimeout() OR setInterval() methods. If the\n\n variables or strings or functions passed to these methods contains user-supplied input, an adversary could attempt to execute arbitrary\n\n JavaScript\n\n code. This could lead to a full system compromise in Node applications or Cross-site Scripting\n\n (XSS) in web applications.\n\n\n To remediate this issue, remove all calls to above methods and consider alternative methods for\n\n executing\n\n the necessary business logic. There is almost no safe method of calling `eval` or other above stated sinks with\n\n user-supplied input.\n\n Instead, consider alternative methods such as using property accessors to dynamically access\n\n values.\n\n\n Example using property accessors to dynamically access an object's property:\n\n ```\n\n // Define an object\n\n const obj = {key1: 'value1', key2: 'value2'};\n\n // Get key dynamically from user input\n\n const key = getUserInput();\n\n // Check if the key exists in our object and return it, or a default empty string\n\n const value = (obj.hasOwnProperty(key)) ? obj[key] : '';\n\n // Work with the value\n\n ```\n\n\n For more information on why not to use `eval`, and alternatives see:\n\n - https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/eval#never_use_eval!\n\n Other References:\n\n - https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Function/Function\n\n - https://developer.mozilla.org/en-US/docs/Web/API/setTimeout\n\n - https://developer.mozilla.org/en-US/docs/Web/API/setInterval\n", "cve": "semgrep_id:eslint.detect-eval-with-expression:10:10", "severity": "High", @@ -107,6 +107,12 @@ "start_line": 10 }, "identifiers": [ + { + "type": "semgrep_id", + "name": "eslint.detect-eval-with-expression", + "value": "eslint.detect-eval-with-expression", + "url": "https://semgrep.dev/r/gitlab.eslint.detect-eval-with-expression" + }, { "type": "cwe", "name": "CWE-95", @@ -127,20 +133,14 @@ "type": "eslint_rule_id", "name": "ESLint rule ID/detect-eval-with-expression", "value": "detect-eval-with-expression" - }, - { - "type": "semgrep_id", - "name": "eslint.detect-eval-with-expression", - "value": "eslint.detect-eval-with-expression", - "url": "https://semgrep.dev/r/gitlab.eslint.detect-eval-with-expression" } ] }, { "id": "4e7633d40f31f6398b4c7ffc4bf481ba6fe627c34042d7439b71259e6ea9b32c", "category": "sast", - "name": "Improper neutralization of special elements used in an SQL Command ('SQL Injection')", - "description": "Detected user input used to manually construct a SQL string. This is usually\nbad practice because manual construction could accidentally result in a SQL\ninjection. An attacker could use a SQL injection to steal or modify contents\nof the database. Instead, use a parameterized query which is available\nby default in most database engines. Alternatively, consider using an\nobject-relational mapper (ORM) such as SQLAlchemy which will protect your queries.\n\nSQL Injections are a critical type of vulnerability that can lead to data \nor system compromise. By dynamically generating SQL query strings, user \ninput may be able to influence the logic of an SQL statement. \nThis could lead to an malicious parties accessing information they should not \nhave access to, or in some circumstances, being able to execute OS functionality\nor code.\n\nReplace all dynamically generated SQL queries with parameterized queries. \nIn situations where dynamic queries must be created, never use direct user input,\nbut instead use a map or dictionary of valid values and resolve them using a user \nsupplied key.\n\nFor example, some database drivers do not allow parameterized queries for \n`\u003e` or `\u003c` comparison operators. In these cases, do not use a user supplied \n`\u003e` or `\u003c` value, but rather have the user supply a `gt` or `lt` value. \nThe alphabetical values are then used to look up the `\u003e` and `\u003c` values to be used \nin the construction of the dynamic query. The same goes for other queries where \ncolumn or table names are required but cannot be parameterized.\nData that is possible user-controlled from a python request is passed\nto `execute()` function. To remediate this issue, use SQLAlchemy statements\nwhich are built with query parameterization and therefore not vulnerable \nto sql injection.\n\nIf for some reason this is not feasible, ensure calls including user-supplied \ndata pass it in to the `params` parameter of the `execute()` method.\nBelow is an example using `execute()`, passing in user-supplied data as `params`. \nThis will treat the query as a parameterized query and `params` as strictly data, \npreventing any possibility of SQL Injection.\n\n```\nname = request.args.get('name')\nreq = text('SELECT * FROM student WHERE firstname = :x')\nresult = db.session.execute(req, {\"x\":name})\n```\nFor more information on QuerySets see:\n- https://docs.djangoproject.com/en/4.2/ref/models/querysets/#queryset-api\nFor more information on SQL Injections see OWASP:\n- https://cheatsheetseries.owasp.org/cheatsheets/SQL_Injection_Prevention_Cheat_Sheet.html\n", + "name": "Improper neutralization of special elements used in an SQL Command ('SQL Injection'). Vulnerability to be confirmed.", + "description": "Detected user input used to manually construct a SQL string. This is usually\nbad practice because manual construction could accidentally result in a SQL\ninjection. An attacker could use a SQL injection to steal or modify contents\nof the database. Instead, use a parameterized query which is available\nby default in most database engines. Alternatively, consider using an\nobject-relational mapper (ORM) such as SQLAlchemy which will protect your queries.\n\nSQL Injections are a critical type of vulnerability that can lead to data \nor system compromise. By dynamically generating SQL query strings, user \ninput may be able to influence the logic of an SQL statement. \nThis could lead to an malicious parties accessing information they should not \nhave access to, or in some circumstances, being able to execute OS functionality\nor code.\n\nReplace all dynamically generated SQL queries with parameterized queries. \nIn situations where dynamic queries must be created, never use direct user input,\nbut instead use a map or dictionary of valid values and resolve them using a user \nsupplied key.\n\nFor example, some database drivers do not allow parameterized queries for \n`>` or `<` comparison operators. In these cases, do not use a user supplied \n`>` or `<` value, but rather have the user supply a `gt` or `lt` value. \nThe alphabetical values are then used to look up the `>` and `<` values to be used \nin the construction of the dynamic query. The same goes for other queries where \ncolumn or table names are required but cannot be parameterized.\nData that is possible user-controlled from a python request is passed\nto `execute()` function. To remediate this issue, use SQLAlchemy statements\nwhich are built with query parameterization and therefore not vulnerable \nto sql injection.\n\nIf for some reason this is not feasible, ensure calls including user-supplied \ndata pass it in to the `params` parameter of the `execute()` method.\nBelow is an example using `execute()`, passing in user-supplied data as `params`. \nThis will treat the query as a parameterized query and `params` as strictly data, \npreventing any possibility of SQL Injection.\n\n```\nname = request.args.get('name')\nreq = text('SELECT * FROM student WHERE firstname = :x')\nresult = db.session.execute(req, {\"x\":name})\n```\nFor more information on QuerySets see:\n- https://docs.djangoproject.com/en/4.2/ref/models/querysets/#queryset-api\nFor more information on SQL Injections see OWASP:\n- https://cheatsheetseries.owasp.org/cheatsheets/SQL_Injection_Prevention_Cheat_Sheet.html\n", "cve": "semgrep_id:python_flask_rule-flask-tainted-sql-string:261:261", "severity": "High", "scanner": { @@ -152,6 +152,11 @@ "start_line": 261 }, "identifiers": [ + { + "type": "semgrep_id", + "name": "python_flask_rule-flask-tainted-sql-string", + "value": "python_flask_rule-flask-tainted-sql-string" + }, { "type": "cwe", "name": "CWE-89", @@ -167,11 +172,6 @@ "type": "owasp", "name": "A1:2017 - Injection", "value": "A1:2017" - }, - { - "type": "semgrep_id", - "name": "python_flask_rule-flask-tainted-sql-string", - "value": "python_flask_rule-flask-tainted-sql-string" } ] }, @@ -181,7 +181,7 @@ "name": "Allocation of resources without limits or throttling", "description": "The application was found using the `requests` module without configuring a timeout value for\nconnections. This could lead to uncontrolled resource consumption where the application could\nrun out of\nsocket descriptors, effectively causing a Denial of Service (DoS).\n\nTo remediate this issue, pass in a `timeout=` argument to each `requests` call.\n\nExample using a timeout for an HTTP GET request:\n```\n# Issue a GET request to https://example.com with a timeout of 10 seconds\nresponse = requests.get('https://example.com', timeout=10)\n# Work with the response object\n# ...\n```\n\nFor more information on using the requests module see:\n- https://requests.readthedocs.io/en/latest/api/\n", "cve": "semgrep_id:bandit.B113:17:18", - "severity": "Medium", + "severity": "Critical", "scanner": { "id": "semgrep", "name": "Semgrep" @@ -192,6 +192,12 @@ "end_line": 18 }, "identifiers": [ + { + "type": "semgrep_id", + "name": "bandit.B113", + "value": "bandit.B113", + "url": "https://semgrep.dev/r/gitlab.bandit.B113" + }, { "type": "cwe", "name": "CWE-770", @@ -212,12 +218,6 @@ "type": "bandit_test_id", "name": "Bandit Test ID B113", "value": "B113" - }, - { - "type": "semgrep_id", - "name": "bandit.B113", - "value": "bandit.B113", - "url": "https://semgrep.dev/r/gitlab.bandit.B113" } ] }, @@ -227,7 +227,7 @@ "name": "Allocation of resources without limits or throttling", "description": "The application was found using the `requests` module without configuring a timeout value for\nconnections. This could lead to uncontrolled resource consumption where the application could\nrun out of\nsocket descriptors, effectively causing a Denial of Service (DoS).\n\nTo remediate this issue, pass in a `timeout=` argument to each `requests` call.\n\nExample using a timeout for an HTTP GET request:\n```\n# Issue a GET request to https://example.com with a timeout of 10 seconds\nresponse = requests.get('https://example.com', timeout=10)\n# Work with the response object\n# ...\n```\n\nFor more information on using the requests module see:\n- https://requests.readthedocs.io/en/latest/api/\n", "cve": "semgrep_id:bandit.B113:28:29", - "severity": "Medium", + "severity": "Critical", "scanner": { "id": "semgrep", "name": "Semgrep" @@ -238,6 +238,12 @@ "end_line": 29 }, "identifiers": [ + { + "type": "semgrep_id", + "name": "bandit.B113", + "value": "bandit.B113", + "url": "https://semgrep.dev/r/gitlab.bandit.B113" + }, { "type": "cwe", "name": "CWE-770", @@ -258,12 +264,6 @@ "type": "bandit_test_id", "name": "Bandit Test ID B113", "value": "B113" - }, - { - "type": "semgrep_id", - "name": "bandit.B113", - "value": "bandit.B113", - "url": "https://semgrep.dev/r/gitlab.bandit.B113" } ] }, @@ -273,7 +273,7 @@ "name": "Allocation of resources without limits or throttling", "description": "The application was found using the `requests` module without configuring a timeout value for\nconnections. This could lead to uncontrolled resource consumption where the application could\nrun out of\nsocket descriptors, effectively causing a Denial of Service (DoS).\n\nTo remediate this issue, pass in a `timeout=` argument to each `requests` call.\n\nExample using a timeout for an HTTP GET request:\n```\n# Issue a GET request to https://example.com with a timeout of 10 seconds\nresponse = requests.get('https://example.com', timeout=10)\n# Work with the response object\n# ...\n```\n\nFor more information on using the requests module see:\n- https://requests.readthedocs.io/en/latest/api/\n", "cve": "semgrep_id:bandit.B113:36:37", - "severity": "Medium", + "severity": "Critical", "scanner": { "id": "semgrep", "name": "Semgrep" @@ -284,6 +284,12 @@ "end_line": 37 }, "identifiers": [ + { + "type": "semgrep_id", + "name": "bandit.B113", + "value": "bandit.B113", + "url": "https://semgrep.dev/r/gitlab.bandit.B113" + }, { "type": "cwe", "name": "CWE-770", @@ -304,12 +310,6 @@ "type": "bandit_test_id", "name": "Bandit Test ID B113", "value": "B113" - }, - { - "type": "semgrep_id", - "name": "bandit.B113", - "value": "bandit.B113", - "url": "https://semgrep.dev/r/gitlab.bandit.B113" } ] }, @@ -319,7 +319,7 @@ "name": "Allocation of resources without limits or throttling", "description": "The application was found using the `requests` module without configuring a timeout value for\nconnections. This could lead to uncontrolled resource consumption where the application could\nrun out of\nsocket descriptors, effectively causing a Denial of Service (DoS).\n\nTo remediate this issue, pass in a `timeout=` argument to each `requests` call.\n\nExample using a timeout for an HTTP GET request:\n```\n# Issue a GET request to https://example.com with a timeout of 10 seconds\nresponse = requests.get('https://example.com', timeout=10)\n# Work with the response object\n# ...\n```\n\nFor more information on using the requests module see:\n- https://requests.readthedocs.io/en/latest/api/\n", "cve": "semgrep_id:bandit.B113:44:45", - "severity": "Medium", + "severity": "Critical", "scanner": { "id": "semgrep", "name": "Semgrep" @@ -330,6 +330,12 @@ "end_line": 45 }, "identifiers": [ + { + "type": "semgrep_id", + "name": "bandit.B113", + "value": "bandit.B113", + "url": "https://semgrep.dev/r/gitlab.bandit.B113" + }, { "type": "cwe", "name": "CWE-770", @@ -350,19 +356,13 @@ "type": "bandit_test_id", "name": "Bandit Test ID B113", "value": "B113" - }, - { - "type": "semgrep_id", - "name": "bandit.B113", - "value": "bandit.B113", - "url": "https://semgrep.dev/r/gitlab.bandit.B113" } ] }, { "id": "e81f87450a35ed038550bfe4f56dcff5bebd9c5ca5f309b6144de063cb99e1b2", "category": "sast", - "name": "Use of a broken or risky cryptographic algorithm", + "name": "Use of a broken or risky cryptographic algorithm. Vulnerability to be resolved.", "description": "The application was found using an insecure or risky digest or signature algorithm. MD2, MD4,\n MD5 and SHA1 hash algorithms have been found to be vulnerable to producing collisions.\n\nThis means\nthat two different values, when hashed, can lead to the same hash value. If the application is\ntrying\nto use these hash methods for storing passwords, then it is recommended to switch to a\npassword hashing\nalgorithm such as Argon2id or PBKDF2.\n\nNote that the `Crypto` and `Cryptodome` Python packages are no longer recommended for\nnew applications, instead consider using the [cryptography](https://cryptography.io/) package.\n\nExample of creating a SHA-384 hash using the `cryptography` package:\n```\nfrom cryptography.hazmat.primitives import hashes\n# Create a SHA384 digest\ndigest = hashes.Hash(hashes.SHA384())\n# Update the digest with some initial data\ndigest.update(b\"some data to hash\")\n# Add more data to the digest\ndigest.update(b\"some more data\")\n# Finalize the digest as bytes\nresult = digest.finalize()\n```\n\nFor more information on secure password storage see OWASP:\n- https://cheatsheetseries.owasp.org/cheatsheets/Password_Storage_Cheat_Sheet.html\n\nFor more information on the cryptography module see:\n- https://cryptography.io/en/latest/\n", "cve": "semgrep_id:bandit.B303-1:141:141", "severity": "Medium", @@ -375,6 +375,12 @@ "start_line": 141 }, "identifiers": [ + { + "type": "semgrep_id", + "name": "bandit.B303-1", + "value": "bandit.B303-1", + "url": "https://semgrep.dev/r/gitlab.bandit.B303-1" + }, { "type": "cwe", "name": "CWE-327", @@ -395,19 +401,13 @@ "type": "bandit_test_id", "name": "Bandit Test ID B303", "value": "B303" - }, - { - "type": "semgrep_id", - "name": "bandit.B303-1", - "value": "bandit.B303-1", - "url": "https://semgrep.dev/r/gitlab.bandit.B303-1" } ] }, { "id": "3f8a15b8ea5a1e062262c837c4b5c763320c40622f50183f04fa2e584fc05e13", "category": "sast", - "name": "Improper certificate validation", + "name": "Improper certificate validation. Vulnerability to be resolved.", "description": "The application was found using the `requests` module without configuring a timeout value for\nconnections. The `verify=False` argument has been set, which effectively disables the\nvalidation\nof server certificates.\n\nThis allows for an adversary who is in between the application and the target host to intercept\npotentially sensitive information or transmit malicious data.\n\nTo remediate this issue either remove the `verify=False` argument, or set `verify=True`to each\n`requests` call.\n\nExample verifying server certificates for an HTTP GET request:\n```\n# Issue a GET request to https://example.com with a timeout of 10 seconds and verify the\n# server certificate explicitly.\nresponse = requests.get('https://example.com', timeout=10, verify=True)\n# Work with the response object\n# ...\n```\n\nFor more information on using the requests module see:\n- https://requests.readthedocs.io/en/latest/api/\n", "cve": "semgrep_id:bandit.B501:17:18", "severity": "Medium", @@ -421,6 +421,12 @@ "end_line": 18 }, "identifiers": [ + { + "type": "semgrep_id", + "name": "bandit.B501", + "value": "bandit.B501", + "url": "https://semgrep.dev/r/gitlab.bandit.B501" + }, { "type": "cwe", "name": "CWE-295", @@ -441,19 +447,13 @@ "type": "bandit_test_id", "name": "Bandit Test ID B501", "value": "B501" - }, - { - "type": "semgrep_id", - "name": "bandit.B501", - "value": "bandit.B501", - "url": "https://semgrep.dev/r/gitlab.bandit.B501" } ] }, { "id": "8b6a98da4410a8abe0a3338ec5db34f4a9a48d0716ba296dcda0e93b63a5766f", "category": "sast", - "name": "Improper certificate validation", + "name": "Improper certificate validation. Vulnerability to be resolved.", "description": "The application was found using the `requests` module without configuring a timeout value for\nconnections. The `verify=False` argument has been set, which effectively disables the\nvalidation\nof server certificates.\n\nThis allows for an adversary who is in between the application and the target host to intercept\npotentially sensitive information or transmit malicious data.\n\nTo remediate this issue either remove the `verify=False` argument, or set `verify=True`to each\n`requests` call.\n\nExample verifying server certificates for an HTTP GET request:\n```\n# Issue a GET request to https://example.com with a timeout of 10 seconds and verify the\n# server certificate explicitly.\nresponse = requests.get('https://example.com', timeout=10, verify=True)\n# Work with the response object\n# ...\n```\n\nFor more information on using the requests module see:\n- https://requests.readthedocs.io/en/latest/api/\n", "cve": "semgrep_id:bandit.B501:28:29", "severity": "Medium", @@ -467,6 +467,12 @@ "end_line": 29 }, "identifiers": [ + { + "type": "semgrep_id", + "name": "bandit.B501", + "value": "bandit.B501", + "url": "https://semgrep.dev/r/gitlab.bandit.B501" + }, { "type": "cwe", "name": "CWE-295", @@ -487,19 +493,13 @@ "type": "bandit_test_id", "name": "Bandit Test ID B501", "value": "B501" - }, - { - "type": "semgrep_id", - "name": "bandit.B501", - "value": "bandit.B501", - "url": "https://semgrep.dev/r/gitlab.bandit.B501" } ] }, { "id": "3b65f8017d6b3a73a5f6e7d1c0e9e78aa0daf817f06234985a9d011da1a9d804", "category": "sast", - "name": "Improper certificate validation", + "name": "Improper certificate validation. Vulnerability to be resolved.", "description": "The application was found using the `requests` module without configuring a timeout value for\nconnections. The `verify=False` argument has been set, which effectively disables the\nvalidation\nof server certificates.\n\nThis allows for an adversary who is in between the application and the target host to intercept\npotentially sensitive information or transmit malicious data.\n\nTo remediate this issue either remove the `verify=False` argument, or set `verify=True`to each\n`requests` call.\n\nExample verifying server certificates for an HTTP GET request:\n```\n# Issue a GET request to https://example.com with a timeout of 10 seconds and verify the\n# server certificate explicitly.\nresponse = requests.get('https://example.com', timeout=10, verify=True)\n# Work with the response object\n# ...\n```\n\nFor more information on using the requests module see:\n- https://requests.readthedocs.io/en/latest/api/\n", "cve": "semgrep_id:bandit.B501:36:37", "severity": "Medium", @@ -513,52 +513,12 @@ "end_line": 37 }, "identifiers": [ - { - "type": "cwe", - "name": "CWE-295", - "value": "295", - "url": "https://cwe.mitre.org/data/definitions/295.html" - }, - { - "type": "owasp", - "name": "A07:2021 - Identification and Authentication Failures", - "value": "A07:2021" - }, - { - "type": "owasp", - "name": "A2:2017 - Broken Authentication", - "value": "A2:2017" - }, - { - "type": "bandit_test_id", - "name": "Bandit Test ID B501", - "value": "B501" - }, { "type": "semgrep_id", "name": "bandit.B501", "value": "bandit.B501", "url": "https://semgrep.dev/r/gitlab.bandit.B501" - } - ] - }, - { - "id": "878843d5b4edf0042e3066429a4cac5f66f8c7ad72b40056601fbb191fa13214", - "category": "sast", - "name": "Improper certificate validation", - "description": "The application was found using the `requests` module without configuring a timeout value for\nconnections. The `verify=False` argument has been set, which effectively disables the\nvalidation\nof server certificates.\n\nThis allows for an adversary who is in between the application and the target host to intercept\npotentially sensitive information or transmit malicious data.\n\nTo remediate this issue either remove the `verify=False` argument, or set `verify=True`to each\n`requests` call.\n\nExample verifying server certificates for an HTTP GET request:\n```\n# Issue a GET request to https://example.com with a timeout of 10 seconds and verify the\n# server certificate explicitly.\nresponse = requests.get('https://example.com', timeout=10, verify=True)\n# Work with the response object\n# ...\n```\n\nFor more information on using the requests module see:\n- https://requests.readthedocs.io/en/latest/api/\n", - "cve": "semgrep_id:bandit.B501:44:45", - "severity": "Medium", - "scanner": { - "id": "semgrep", - "name": "Semgrep" - }, - "location": { - "file": "tests/e2e_zap.py", - "start_line": 44, - "end_line": 45 - }, - "identifiers": [ + }, { "type": "cwe", "name": "CWE-295", @@ -579,64 +539,13 @@ "type": "bandit_test_id", "name": "Bandit Test ID B501", "value": "B501" - }, - { - "type": "semgrep_id", - "name": "bandit.B501", - "value": "bandit.B501", - "url": "https://semgrep.dev/r/gitlab.bandit.B501" - } - ] - }, - { - "id": "6cac58319f88ad3a1cb16df9c1272049ea0f909fa5fc3f67508148fda3ce5e2c", - "category": "sast", - "name": "Regular expression with non-literal value", - "description": "The `RegExp` constructor was called with a non-literal value. If an adversary were able to\nsupply a malicious regex, they could cause a Regular Expression Denial of Service (ReDoS)\nagainst the application. In Node applications, this could cause the entire application to no\nlonger be responsive to other users' requests.\n\nTo remediate this issue, never allow user-supplied regular expressions. Instead, the regular \nexpression should be hardcoded. If this is not possible, consider using an alternative regular\nexpression engine such as [node-re2](https://www.npmjs.com/package/re2). RE2 is a safe alternative \nthat does not support backtracking, which is what leads to ReDoS.\n\nExample using re2 which does not support backtracking (Note: it is still recommended to\nnever use user-supplied input):\n```\n// Import the re2 module\nconst RE2 = require('re2');\n\nfunction match(userSuppliedRegex, userInput) {\n // Create a RE2 object with the user supplied regex, this is relatively safe\n // due to RE2 not supporting backtracking which can be abused to cause long running\n // queries\n var re = new RE2(userSuppliedRegex);\n // Execute the regular expression against some userInput\n var result = re.exec(userInput);\n // Work with the result\n}\n```\n\nFor more information on Regular Expression DoS see:\n- https://owasp.org/www-community/attacks/Regular_expression_Denial_of_Service_-_ReDoS\n", - "cve": "semgrep_id:eslint.detect-non-literal-regexp:15:15", - "severity": "Medium", - "scanner": { - "id": "semgrep", - "name": "Semgrep" - }, - "location": { - "file": "app/static/main.js", - "start_line": 15 - }, - "identifiers": [ - { - "type": "cwe", - "name": "CWE-185", - "value": "185", - "url": "https://cwe.mitre.org/data/definitions/185.html" - }, - { - "type": "owasp", - "name": "A03:2021 - Injection", - "value": "A03:2021" - }, - { - "type": "owasp", - "name": "A1:2017 - Injection", - "value": "A1:2017" - }, - { - "type": "eslint_rule_id", - "name": "ESLint rule ID/detect-non-literal-regexp", - "value": "detect-non-literal-regexp" - }, - { - "type": "semgrep_id", - "name": "eslint.detect-non-literal-regexp", - "value": "eslint.detect-non-literal-regexp", - "url": "https://semgrep.dev/r/gitlab.eslint.detect-non-literal-regexp" } ] }, { "id": "512131f12839cd51c58aaabf643870dc262bf169f0af15a47d0d073fcfd449ac", "category": "sast", - "name": "Use of cryptographically weak pseudo-random number generator (PRNG)", + "name": "Use of cryptographically weak pseudo-random number generator (PRNG). Vulnerability to be dismissed.", "description": "Depending on the context, generating weak random numbers may expose cryptographic functions,\nwhich rely on these numbers, to be exploitable. When generating numbers for sensitive values\nsuch as tokens, nonces, and cryptographic keys, it is recommended that the `secrets` module\nbe used instead.\n\nExample using the secrets module:\n```\nimport secrets\n\n# Generate a secure random 64 byte array\nrandom_bytes = secrets.token_bytes(64)\nprint(random_bytes)\n\n# Generate a secure random 64 byte array as a hex string\nrandom_bytes_hex = secrets.token_hex(64)\n\n# Generate a secure random 64 byte array base64 encoded for use in URLs\nrandom_string = secrets.token_urlsafe(64)\n```\n\nFor more information on the `secrets` module see:\n- https://docs.python.org/3/library/secrets.html\n", "cve": "semgrep_id:bandit.B311:295:295", "severity": "Low", @@ -649,6 +558,12 @@ "start_line": 295 }, "identifiers": [ + { + "type": "semgrep_id", + "name": "bandit.B311", + "value": "bandit.B311", + "url": "https://semgrep.dev/r/gitlab.bandit.B311" + }, { "type": "cwe", "name": "CWE-338", @@ -669,20 +584,14 @@ "type": "bandit_test_id", "name": "Bandit Test ID B311", "value": "B311" - }, - { - "type": "semgrep_id", - "name": "bandit.B311", - "value": "bandit.B311", - "url": "https://semgrep.dev/r/gitlab.bandit.B311" } ] }, { "id": "6cf069d55d47c54f5b2363af43f3c7a2d71ef25e04751111b6566fe89b90c8aa", "category": "sast", - "name": "Use of cryptographically weak pseudo-random number generator (PRNG)", - "description": "Depending on the context, generating weak random numbers may expose cryptographic functions,\nwhich rely on these numbers, to be exploitable. When generating numbers for sensitive values\nsuch as tokens, nonces, and cryptographic keys, it is recommended that the `secrets` module\nbe used instead.\n\nExample using the secrets module:\n```\nimport secrets\n\n# Generate a secure random 64 byte array\nrandom_bytes = secrets.token_bytes(64)\nprint(random_bytes)\n\n# Generate a secure random 64 byte array as a hex string\nrandom_bytes_hex = secrets.token_hex(64)\n\n# Generate a secure random 64 byte array base64 encoded for use in URLs\nrandom_string = secrets.token_urlsafe(64)\n```\n\nFor more information on the `secrets` module see:\n- https://docs.python.org/3/library/secrets.html\n", + "name": "Use of cryptographically weak pseudo-random number generator (PRNG). Vulnerability to be dismissed.", + "description": "Depending on the context, generating weak random numbers may expose cryptographic functions", "cve": "semgrep_id:bandit.B311:319:319", "severity": "Low", "scanner": { @@ -694,6 +603,12 @@ "start_line": 319 }, "identifiers": [ + { + "type": "semgrep_id", + "name": "bandit.B311", + "value": "bandit.B311", + "url": "https://semgrep.dev/r/gitlab.bandit.B311" + }, { "type": "cwe", "name": "CWE-338", @@ -714,12 +629,6 @@ "type": "bandit_test_id", "name": "Bandit Test ID B311", "value": "B311" - }, - { - "type": "semgrep_id", - "name": "bandit.B311", - "value": "bandit.B311", - "url": "https://semgrep.dev/r/gitlab.bandit.B311" } ] } diff --git a/ee/spec/fixtures/security_reports/master/gl-sast-report-semgrep-6.7.0-multiple-vulnerabilities-incorrect-primary-identifier.json b/ee/spec/fixtures/security_reports/master/gl-sast-report-semgrep-6.7.0-multiple-vulnerabilities-incorrect-primary-identifier.json new file mode 100644 index 00000000000000..8108a9f46c97c0 --- /dev/null +++ b/ee/spec/fixtures/security_reports/master/gl-sast-report-semgrep-6.7.0-multiple-vulnerabilities-incorrect-primary-identifier.json @@ -0,0 +1,677 @@ +{ + "version": "15.2.2", + "vulnerabilities": [ + { + "id": "ab0e702014cb7fcd3b9bcbed45425c3cfd65a237fbe8b51fd676ac675060bba9", + "category": "sast", + "name": "Deserialization of untrusted data. Vulnerability to be confirmed.", + "description": "The application was found using an unsafe version of `yaml` load which is vulnerable to\ndeserialization attacks. Deserialization attacks exploit the process of reading serialized\ndata and turning it back\ninto an object. By constructing malicious objects and serializing them, an adversary may\nattempt to:\n\n- Inject code that is executed upon object construction, which occurs during the\ndeserialization process.\n- Exploit mass assignment by including fields that are not normally a part of the serialized\ndata but are read in during deserialization.\n\nTo remediate this issue, use `safe_load()` or call `yaml.load()` with the `Loader` argument\nset to\n`yaml.SafeLoader`.\n\nExample loading YAML using `safe_load`:\n```\nimport yaml\n\n# Use safe_load to load data into an intermediary object\nintermediary_object = yaml.safe_load(\"\"\"user:\n name: 'test user'\"\"\"\n)\n# Create our real object, copying over only the necessary fields\nuser_object = {'user': {\n # Assign the deserialized data from intermediary object\n 'name': intermediary_object['user']['name'],\n # Add in protected data in object definition (or set it from a class constructor)\n 'is_admin': False,\n }\n}\n# Work with user_object\n# ...\n```\n\nFor more details on deserialization attacks in general, see OWASP's guide:\n- https://cheatsheetseries.owasp.org/cheatsheets/Deserialization_Cheat_Sheet.html\n", + "cve": "semgrep_id:bandit.B506:329:329", + "severity": "High", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "app/app.py", + "start_line": 329 + }, + "identifiers": [ + { + "type": "cwe", + "name": "CWE-502", + "value": "502", + "url": "https://cwe.mitre.org/data/definitions/502.html" + }, + { + "type": "owasp", + "name": "A08:2021 - Software and Data Integrity Failures", + "value": "A08:2021" + }, + { + "type": "owasp", + "name": "A8:2017 - Insecure Deserialization", + "value": "A8:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B506", + "value": "B506" + }, + { + "type": "semgrep_id", + "name": "bandit.B506", + "value": "bandit.B506", + "url": "https://semgrep.dev/r/gitlab.bandit.B506" + } + ] + }, + { + "id": "4cc9c82ff0d985defd2801e1be40f784c149f8a70f5c1325c5d1979b13771bc1", + "category": "sast", + "name": "Improper neutralization of special elements used in an SQL Command ('SQL Injection'). Vulnerability to be confirmed.", + "description": "SQL Injection is a critical vulnerability that can lead to data or system compromise.", + "cve": "semgrep_id:bandit.B608:265:265", + "severity": "High", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "app/app.py", + "start_line": 265 + }, + "identifiers": [ + { + "type": "cwe", + "name": "CWE-89", + "value": "89", + "url": "https://cwe.mitre.org/data/definitions/89.html" + }, + { + "type": "owasp", + "name": "A03:2021 - Injection", + "value": "A03:2021" + }, + { + "type": "owasp", + "name": "A1:2017 - Injection", + "value": "A1:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B608", + "value": "B608" + }, + { + "type": "semgrep_id", + "name": "bandit.B608", + "value": "bandit.B608", + "url": "https://semgrep.dev/r/gitlab.bandit.B608" + } + ] + }, + { + "id": "cfe69fc86bf76db2922260ecebb865819886315c51309ff8d52d7eaf9d48e501", + "category": "sast", + "name": "Improper neutralization of directives in dynamically evaluated code ('Eval Injection'). Vulnerability to be confirmed.", + "description": "The application was found calling the `eval` function OR Function()\n constructor OR setTimeout() OR setInterval() methods. If the\n\n variables or strings or functions passed to these methods contains user-supplied input, an adversary could attempt to execute arbitrary\n\n JavaScript\n\n code. This could lead to a full system compromise in Node applications or Cross-site Scripting\n\n (XSS) in web applications.\n\n\n To remediate this issue, remove all calls to above methods and consider alternative methods for\n\n executing\n\n the necessary business logic. There is almost no safe method of calling `eval` or other above stated sinks with\n\n user-supplied input.\n\n Instead, consider alternative methods such as using property accessors to dynamically access\n\n values.\n\n\n Example using property accessors to dynamically access an object's property:\n\n ```\n\n // Define an object\n\n const obj = {key1: 'value1', key2: 'value2'};\n\n // Get key dynamically from user input\n\n const key = getUserInput();\n\n // Check if the key exists in our object and return it, or a default empty string\n\n const value = (obj.hasOwnProperty(key)) ? obj[key] : '';\n\n // Work with the value\n\n ```\n\n\n For more information on why not to use `eval`, and alternatives see:\n\n - https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/eval#never_use_eval!\n\n Other References:\n\n - https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Function/Function\n\n - https://developer.mozilla.org/en-US/docs/Web/API/setTimeout\n\n - https://developer.mozilla.org/en-US/docs/Web/API/setInterval\n", + "cve": "semgrep_id:eslint.detect-eval-with-expression:10:10", + "severity": "High", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "app/static/main.js", + "start_line": 10 + }, + "identifiers": [ + { + "type": "cwe", + "name": "CWE-95", + "value": "95", + "url": "https://cwe.mitre.org/data/definitions/95.html" + }, + { + "type": "owasp", + "name": "A03:2021 - Injection", + "value": "A03:2021" + }, + { + "type": "owasp", + "name": "A1:2017 - Injection", + "value": "A1:2017" + }, + { + "type": "eslint_rule_id", + "name": "ESLint rule ID/detect-eval-with-expression", + "value": "detect-eval-with-expression" + }, + { + "type": "semgrep_id", + "name": "eslint.detect-eval-with-expression", + "value": "eslint.detect-eval-with-expression", + "url": "https://semgrep.dev/r/gitlab.eslint.detect-eval-with-expression" + } + ] + }, + { + "id": "c30477f6181fd32ad367377123762e5b4f52cdd4e69951591fe704d5f35aeb86", + "category": "sast", + "name": "Improper neutralization of special elements used in an SQL Command ('SQL Injection'). Vulnerability to be confirmed.", + "description": "Detected user input used to manually construct a SQL string. This is usually\nbad practice because manual construction could accidentally result in a SQL\ninjection. An attacker could use a SQL injection to steal or modify contents\nof the database. Instead, use a parameterized query which is available\nby default in most database engines. Alternatively, consider using an\nobject-relational mapper (ORM) such as SQLAlchemy which will protect your queries.\n\nSQL Injections are a critical type of vulnerability that can lead to data \nor system compromise. By dynamically generating SQL query strings, user \ninput may be able to influence the logic of an SQL statement. \nThis could lead to an malicious parties accessing information they should not \nhave access to, or in some circumstances, being able to execute OS functionality\nor code.\n\nReplace all dynamically generated SQL queries with parameterized queries. \nIn situations where dynamic queries must be created, never use direct user input,\nbut instead use a map or dictionary of valid values and resolve them using a user \nsupplied key.\n\nFor example, some database drivers do not allow parameterized queries for \n`>` or `<` comparison operators. In these cases, do not use a user supplied \n`>` or `<` value, but rather have the user supply a `gt` or `lt` value. \nThe alphabetical values are then used to look up the `>` and `<` values to be used \nin the construction of the dynamic query. The same goes for other queries where \ncolumn or table names are required but cannot be parameterized.\nData that is possible user-controlled from a python request is passed\nto `execute()` function. To remediate this issue, use SQLAlchemy statements\nwhich are built with query parameterization and therefore not vulnerable \nto sql injection.\n\nIf for some reason this is not feasible, ensure calls including user-supplied \ndata pass it in to the `params` parameter of the `execute()` method.\nBelow is an example using `execute()`, passing in user-supplied data as `params`. \nThis will treat the query as a parameterized query and `params` as strictly data, \npreventing any possibility of SQL Injection.\n\n```\nname = request.args.get('name')\nreq = text('SELECT * FROM student WHERE firstname = :x')\nresult = db.session.execute(req, {\"x\":name})\n```\nFor more information on QuerySets see:\n- https://docs.djangoproject.com/en/4.2/ref/models/querysets/#queryset-api\nFor more information on SQL Injections see OWASP:\n- https://cheatsheetseries.owasp.org/cheatsheets/SQL_Injection_Prevention_Cheat_Sheet.html\n", + "cve": "semgrep_id:python_flask_rule-flask-tainted-sql-string:261:261", + "severity": "High", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "app/app.py", + "start_line": 261 + }, + "identifiers": [ + { + "type": "cwe", + "name": "CWE-89", + "value": "89", + "url": "https://cwe.mitre.org/data/definitions/89.html" + }, + { + "type": "owasp", + "name": "A03:2021 - Injection", + "value": "A03:2021" + }, + { + "type": "owasp", + "name": "A1:2017 - Injection", + "value": "A1:2017" + }, + { + "type": "semgrep_id", + "name": "python_flask_rule-flask-tainted-sql-string", + "value": "python_flask_rule-flask-tainted-sql-string" + } + ] + }, + { + "id": "ea8e06bf1648e96a1ff75b3ebe4a702b83367b6959dad867ccc026cd067b5834", + "category": "sast", + "name": "Allocation of resources without limits or throttling", + "description": "The application was found using the `requests` module without configuring a timeout value for\nconnections. This could lead to uncontrolled resource consumption where the application could\nrun out of\nsocket descriptors, effectively causing a Denial of Service (DoS).\n\nTo remediate this issue, pass in a `timeout=` argument to each `requests` call.\n\nExample using a timeout for an HTTP GET request:\n```\n# Issue a GET request to https://example.com with a timeout of 10 seconds\nresponse = requests.get('https://example.com', timeout=10)\n# Work with the response object\n# ...\n```\n\nFor more information on using the requests module see:\n- https://requests.readthedocs.io/en/latest/api/\n", + "cve": "semgrep_id:bandit.B113:17:18", + "severity": "Critical", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "tests/e2e_zap.py", + "start_line": 17, + "end_line": 18 + }, + "identifiers": [ + { + "type": "cwe", + "name": "CWE-770", + "value": "770", + "url": "https://cwe.mitre.org/data/definitions/770.html" + }, + { + "type": "owasp", + "name": "A05:2021 - Security Misconfiguration", + "value": "A05:2021" + }, + { + "type": "owasp", + "name": "A6:2017 - Security Misconfiguration", + "value": "A6:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B113", + "value": "B113" + }, + { + "type": "semgrep_id", + "name": "bandit.B113", + "value": "bandit.B113", + "url": "https://semgrep.dev/r/gitlab.bandit.B113" + } + ] + }, + { + "id": "e97ebd3a747951b1b7499a76dd141ca96bbbdd784604ec615fbd46c9cce75780", + "category": "sast", + "name": "Allocation of resources without limits or throttling", + "description": "The application was found using the `requests` module without configuring a timeout value for\nconnections. This could lead to uncontrolled resource consumption where the application could\nrun out of\nsocket descriptors, effectively causing a Denial of Service (DoS).\n\nTo remediate this issue, pass in a `timeout=` argument to each `requests` call.\n\nExample using a timeout for an HTTP GET request:\n```\n# Issue a GET request to https://example.com with a timeout of 10 seconds\nresponse = requests.get('https://example.com', timeout=10)\n# Work with the response object\n# ...\n```\n\nFor more information on using the requests module see:\n- https://requests.readthedocs.io/en/latest/api/\n", + "cve": "semgrep_id:bandit.B113:28:29", + "severity": "Critical", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "tests/e2e_zap.py", + "start_line": 28, + "end_line": 29 + }, + "identifiers": [ + { + "type": "cwe", + "name": "CWE-770", + "value": "770", + "url": "https://cwe.mitre.org/data/definitions/770.html" + }, + { + "type": "owasp", + "name": "A05:2021 - Security Misconfiguration", + "value": "A05:2021" + }, + { + "type": "owasp", + "name": "A6:2017 - Security Misconfiguration", + "value": "A6:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B113", + "value": "B113" + }, + { + "type": "semgrep_id", + "name": "bandit.B113", + "value": "bandit.B113", + "url": "https://semgrep.dev/r/gitlab.bandit.B113" + } + ] + }, + { + "id": "6c95ad34402b92a5f731e94d44be3c76afa4162a99d22d431b4a803fdc78b294", + "category": "sast", + "name": "Allocation of resources without limits or throttling", + "description": "The application was found using the `requests` module without configuring a timeout value for\nconnections. This could lead to uncontrolled resource consumption where the application could\nrun out of\nsocket descriptors, effectively causing a Denial of Service (DoS).\n\nTo remediate this issue, pass in a `timeout=` argument to each `requests` call.\n\nExample using a timeout for an HTTP GET request:\n```\n# Issue a GET request to https://example.com with a timeout of 10 seconds\nresponse = requests.get('https://example.com', timeout=10)\n# Work with the response object\n# ...\n```\n\nFor more information on using the requests module see:\n- https://requests.readthedocs.io/en/latest/api/\n", + "cve": "semgrep_id:bandit.B113:36:37", + "severity": "Critical", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "tests/e2e_zap.py", + "start_line": 36, + "end_line": 37 + }, + "identifiers": [ + { + "type": "cwe", + "name": "CWE-770", + "value": "770", + "url": "https://cwe.mitre.org/data/definitions/770.html" + }, + { + "type": "owasp", + "name": "A05:2021 - Security Misconfiguration", + "value": "A05:2021" + }, + { + "type": "owasp", + "name": "A6:2017 - Security Misconfiguration", + "value": "A6:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B113", + "value": "B113" + }, + { + "type": "semgrep_id", + "name": "bandit.B113", + "value": "bandit.B113", + "url": "https://semgrep.dev/r/gitlab.bandit.B113" + } + ] + }, + { + "id": "9f07ee2737e73db366bbe53b2e4fce15c13c325ab73d162703984509f9192153", + "category": "sast", + "name": "Allocation of resources without limits or throttling", + "description": "The application was found using the `requests` module without configuring a timeout value for\nconnections. This could lead to uncontrolled resource consumption where the application could\nrun out of\nsocket descriptors, effectively causing a Denial of Service (DoS).\n\nTo remediate this issue, pass in a `timeout=` argument to each `requests` call.\n\nExample using a timeout for an HTTP GET request:\n```\n# Issue a GET request to https://example.com with a timeout of 10 seconds\nresponse = requests.get('https://example.com', timeout=10)\n# Work with the response object\n# ...\n```\n\nFor more information on using the requests module see:\n- https://requests.readthedocs.io/en/latest/api/\n", + "cve": "semgrep_id:bandit.B113:44:45", + "severity": "Critical", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "tests/e2e_zap.py", + "start_line": 44, + "end_line": 45 + }, + "identifiers": [ + { + "type": "cwe", + "name": "CWE-770", + "value": "770", + "url": "https://cwe.mitre.org/data/definitions/770.html" + }, + { + "type": "owasp", + "name": "A05:2021 - Security Misconfiguration", + "value": "A05:2021" + }, + { + "type": "owasp", + "name": "A6:2017 - Security Misconfiguration", + "value": "A6:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B113", + "value": "B113" + }, + { + "type": "semgrep_id", + "name": "bandit.B113", + "value": "bandit.B113", + "url": "https://semgrep.dev/r/gitlab.bandit.B113" + } + ] + }, + { + "id": "9a277ce32137e458b38b1da405ed4515505b286beea2c29ade5ccb770b250555", + "category": "sast", + "name": "Use of a broken or risky cryptographic algorithm. Vulnerability to be resolved.", + "description": "The application was found using an insecure or risky digest or signature algorithm. MD2, MD4,\n MD5 and SHA1 hash algorithms have been found to be vulnerable to producing collisions.\n\nThis means\nthat two different values, when hashed, can lead to the same hash value. If the application is\ntrying\nto use these hash methods for storing passwords, then it is recommended to switch to a\npassword hashing\nalgorithm such as Argon2id or PBKDF2.\n\nNote that the `Crypto` and `Cryptodome` Python packages are no longer recommended for\nnew applications, instead consider using the [cryptography](https://cryptography.io/) package.\n\nExample of creating a SHA-384 hash using the `cryptography` package:\n```\nfrom cryptography.hazmat.primitives import hashes\n# Create a SHA384 digest\ndigest = hashes.Hash(hashes.SHA384())\n# Update the digest with some initial data\ndigest.update(b\"some data to hash\")\n# Add more data to the digest\ndigest.update(b\"some more data\")\n# Finalize the digest as bytes\nresult = digest.finalize()\n```\n\nFor more information on secure password storage see OWASP:\n- https://cheatsheetseries.owasp.org/cheatsheets/Password_Storage_Cheat_Sheet.html\n\nFor more information on the cryptography module see:\n- https://cryptography.io/en/latest/\n", + "cve": "semgrep_id:bandit.B303-1:141:141", + "severity": "Medium", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "app/app.py", + "start_line": 141 + }, + "identifiers": [ + { + "type": "cwe", + "name": "CWE-327", + "value": "327", + "url": "https://cwe.mitre.org/data/definitions/327.html" + }, + { + "type": "owasp", + "name": "A02:2021 - Cryptographic Failures", + "value": "A02:2021" + }, + { + "type": "owasp", + "name": "A3:2017 - Sensitive Data Exposure", + "value": "A3:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B303", + "value": "B303" + }, + { + "type": "semgrep_id", + "name": "bandit.B303-1", + "value": "bandit.B303-1", + "url": "https://semgrep.dev/r/gitlab.bandit.B303-1" + } + ] + }, + { + "id": "f01149fe2f18418cea98677f89a23274464d51abacf55e33c3dd5db227b35919", + "category": "sast", + "name": "Improper certificate validation. Vulnerability to be resolved.", + "description": "The application was found using the `requests` module without configuring a timeout value for\nconnections. The `verify=False` argument has been set, which effectively disables the\nvalidation\nof server certificates.\n\nThis allows for an adversary who is in between the application and the target host to intercept\npotentially sensitive information or transmit malicious data.\n\nTo remediate this issue either remove the `verify=False` argument, or set `verify=True`to each\n`requests` call.\n\nExample verifying server certificates for an HTTP GET request:\n```\n# Issue a GET request to https://example.com with a timeout of 10 seconds and verify the\n# server certificate explicitly.\nresponse = requests.get('https://example.com', timeout=10, verify=True)\n# Work with the response object\n# ...\n```\n\nFor more information on using the requests module see:\n- https://requests.readthedocs.io/en/latest/api/\n", + "cve": "semgrep_id:bandit.B501:17:18", + "severity": "Medium", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "tests/e2e_zap.py", + "start_line": 17, + "end_line": 18 + }, + "identifiers": [ + { + "type": "cwe", + "name": "CWE-295", + "value": "295", + "url": "https://cwe.mitre.org/data/definitions/295.html" + }, + { + "type": "owasp", + "name": "A07:2021 - Identification and Authentication Failures", + "value": "A07:2021" + }, + { + "type": "owasp", + "name": "A2:2017 - Broken Authentication", + "value": "A2:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B501", + "value": "B501" + }, + { + "type": "semgrep_id", + "name": "bandit.B501", + "value": "bandit.B501", + "url": "https://semgrep.dev/r/gitlab.bandit.B501" + } + ] + }, + { + "id": "6cdb150a93524217a68ea2a8f0540cae3065b7b62ad4be3f6e4b69089a4f6b0e", + "category": "sast", + "name": "Improper certificate validation. Vulnerability to be resolved.", + "description": "The application was found using the `requests` module without configuring a timeout value for\nconnections. The `verify=False` argument has been set, which effectively disables the\nvalidation\nof server certificates.\n\nThis allows for an adversary who is in between the application and the target host to intercept\npotentially sensitive information or transmit malicious data.\n\nTo remediate this issue either remove the `verify=False` argument, or set `verify=True`to each\n`requests` call.\n\nExample verifying server certificates for an HTTP GET request:\n```\n# Issue a GET request to https://example.com with a timeout of 10 seconds and verify the\n# server certificate explicitly.\nresponse = requests.get('https://example.com', timeout=10, verify=True)\n# Work with the response object\n# ...\n```\n\nFor more information on using the requests module see:\n- https://requests.readthedocs.io/en/latest/api/\n", + "cve": "semgrep_id:bandit.B501:28:29", + "severity": "Medium", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "tests/e2e_zap.py", + "start_line": 28, + "end_line": 29 + }, + "identifiers": [ + { + "type": "cwe", + "name": "CWE-295", + "value": "295", + "url": "https://cwe.mitre.org/data/definitions/295.html" + }, + { + "type": "owasp", + "name": "A07:2021 - Identification and Authentication Failures", + "value": "A07:2021" + }, + { + "type": "owasp", + "name": "A2:2017 - Broken Authentication", + "value": "A2:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B501", + "value": "B501" + }, + { + "type": "semgrep_id", + "name": "bandit.B501", + "value": "bandit.B501", + "url": "https://semgrep.dev/r/gitlab.bandit.B501" + } + ] + }, + { + "id": "c2ccb4205c8ee740b3459780633598fed6342aded59395213f3735d85c5d1f8f", + "category": "sast", + "name": "Improper certificate validation. Vulnerability to be resolved.", + "description": "The application was found using the `requests` module without configuring a timeout value for\nconnections. The `verify=False` argument has been set, which effectively disables the\nvalidation\nof server certificates.\n\nThis allows for an adversary who is in between the application and the target host to intercept\npotentially sensitive information or transmit malicious data.\n\nTo remediate this issue either remove the `verify=False` argument, or set `verify=True`to each\n`requests` call.\n\nExample verifying server certificates for an HTTP GET request:\n```\n# Issue a GET request to https://example.com with a timeout of 10 seconds and verify the\n# server certificate explicitly.\nresponse = requests.get('https://example.com', timeout=10, verify=True)\n# Work with the response object\n# ...\n```\n\nFor more information on using the requests module see:\n- https://requests.readthedocs.io/en/latest/api/\n", + "cve": "semgrep_id:bandit.B501:36:37", + "severity": "Medium", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "tests/e2e_zap.py", + "start_line": 36, + "end_line": 37 + }, + "identifiers": [ + { + "type": "cwe", + "name": "CWE-295", + "value": "295", + "url": "https://cwe.mitre.org/data/definitions/295.html" + }, + { + "type": "owasp", + "name": "A07:2021 - Identification and Authentication Failures", + "value": "A07:2021" + }, + { + "type": "owasp", + "name": "A2:2017 - Broken Authentication", + "value": "A2:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B501", + "value": "B501" + }, + { + "type": "semgrep_id", + "name": "bandit.B501", + "value": "bandit.B501", + "url": "https://semgrep.dev/r/gitlab.bandit.B501" + } + ] + }, + { + "id": "1c298e2f336e61d323cd8d407fd703a721c103b78eab94d7b07b44e87f563fb0", + "category": "sast", + "name": "Use of cryptographically weak pseudo-random number generator (PRNG). Vulnerability to be dismissed.", + "description": "Depending on the context, generating weak random numbers may expose cryptographic functions,\nwhich rely on these numbers, to be exploitable. When generating numbers for sensitive values\nsuch as tokens, nonces, and cryptographic keys, it is recommended that the `secrets` module\nbe used instead.\n\nExample using the secrets module:\n```\nimport secrets\n\n# Generate a secure random 64 byte array\nrandom_bytes = secrets.token_bytes(64)\nprint(random_bytes)\n\n# Generate a secure random 64 byte array as a hex string\nrandom_bytes_hex = secrets.token_hex(64)\n\n# Generate a secure random 64 byte array base64 encoded for use in URLs\nrandom_string = secrets.token_urlsafe(64)\n```\n\nFor more information on the `secrets` module see:\n- https://docs.python.org/3/library/secrets.html\n", + "cve": "semgrep_id:bandit.B311:295:295", + "severity": "Low", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "app/app.py", + "start_line": 295 + }, + "identifiers": [ + { + "type": "cwe", + "name": "CWE-338", + "value": "338", + "url": "https://cwe.mitre.org/data/definitions/338.html" + }, + { + "type": "owasp", + "name": "A02:2021 - Cryptographic Failures", + "value": "A02:2021" + }, + { + "type": "owasp", + "name": "A3:2017 - Sensitive Data Exposure", + "value": "A3:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B311", + "value": "B311" + }, + { + "type": "semgrep_id", + "name": "bandit.B311", + "value": "bandit.B311", + "url": "https://semgrep.dev/r/gitlab.bandit.B311" + } + ] + }, + { + "id": "c551792feebfb8e0cea54dd6af035f5d3fd71a5b1f8af102fb1de5a479405a01", + "category": "sast", + "name": "Use of cryptographically weak pseudo-random number generator (PRNG). Vulnerability to be dismissed.", + "description": "Depending on the context, generating weak random numbers may expose cryptographic functions", + "cve": "semgrep_id:bandit.B311:319:319", + "severity": "Low", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "app/app.py", + "start_line": 319 + }, + "identifiers": [ + { + "type": "cwe", + "name": "CWE-338", + "value": "338", + "url": "https://cwe.mitre.org/data/definitions/338.html" + }, + { + "type": "owasp", + "name": "A02:2021 - Cryptographic Failures", + "value": "A02:2021" + }, + { + "type": "owasp", + "name": "A3:2017 - Sensitive Data Exposure", + "value": "A3:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B311", + "value": "B311" + }, + { + "type": "semgrep_id", + "name": "bandit.B311", + "value": "bandit.B311", + "url": "https://semgrep.dev/r/gitlab.bandit.B311" + } + ] + } + ], + "scan": { + "analyzer": { + "id": "semgrep", + "name": "Semgrep", + "url": "https://gitlab.com/gitlab-org/security-products/analyzers/semgrep", + "vendor": { + "name": "GitLab" + }, + "version": "6.7.0" + }, + "scanner": { + "id": "semgrep", + "name": "Semgrep", + "url": "https://github.com/returntocorp/semgrep", + "vendor": { + "name": "GitLab" + }, + "version": "1.118.0" + }, + "type": "sast", + "start_time": "2025-11-07T18:27:55", + "end_time": "2025-11-07T18:28:02", + "status": "success", + "observability": { + "events": [ + { + "event": "collect_sast_scan_metrics_from_pipeline", + "property": "12d9d108-d5b4-47fb-87ea-1789284a5930", + "label": "semgrep", + "value": 0, + "version": "6.7.0", + "exit_code": 0, + "override_count": 0, + "passthrough_count": 0, + "custom_exclude_path_count": 0, + "time_s": 6, + "file_count": 4 + } + ] + } + } +} diff --git a/ee/spec/fixtures/security_reports/master/gl-sast-report-semgrep-6.7.1-additional-vulnerabilities-correct-primary-identifier.json b/ee/spec/fixtures/security_reports/master/gl-sast-report-semgrep-6.7.1-additional-vulnerabilities-correct-primary-identifier.json new file mode 100644 index 00000000000000..6b6e9f51bd005f --- /dev/null +++ b/ee/spec/fixtures/security_reports/master/gl-sast-report-semgrep-6.7.1-additional-vulnerabilities-correct-primary-identifier.json @@ -0,0 +1,137 @@ +{ + "version": "15.2.2", + "vulnerabilities": [ + { + "id": "i878843d5b4edf0042e3066429a4cac5f66f8c7ad72b40056601fbb191fa13214", + "category": "sast", + "name": "Additional vulerability 1", + "description": "The application was found using the `requests` module without configuring a timeout value.", + "cve": "semgrep_id:bandit.B501:44:45", + "severity": "Medium", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "tests/e2e_zap.py", + "start_line": 44, + "end_line": 45 + }, + "identifiers": [ + { + "type": "semgrep_id", + "name": "bandit.B501", + "value": "bandit.B501", + "url": "https://semgrep.dev/r/gitlab.bandit.B501" + }, + { + "type": "cwe", + "name": "CWE-295", + "value": "295", + "url": "https://cwe.mitre.org/data/definitions/295.html" + }, + { + "type": "owasp", + "name": "A07:2021 - Identification and Authentication Failures", + "value": "A07:2021" + }, + { + "type": "owasp", + "name": "A2:2017 - Broken Authentication", + "value": "A2:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B501", + "value": "B501" + } + ] + }, + { + "id": "6cac58319f88ad3a1cb16df9c1272049ea0f909fa5fc3f67508148fda3ce5e2c", + "category": "sast", + "name": "Additional vulnerability 2", + "description": "The `RegExp` constructor was called with a non-literal value.", + "cve": "semgrep_id:eslint.detect-non-literal-regexp:15:15", + "severity": "Medium", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "app/static/main.js", + "start_line": 15 + }, + "identifiers": [ + { + "type": "semgrep_id", + "name": "eslint.detect-non-literal-regexp", + "value": "eslint.detect-non-literal-regexp", + "url": "https://semgrep.dev/r/gitlab.eslint.detect-non-literal-regexp" + }, + { + "type": "cwe", + "name": "CWE-185", + "value": "185", + "url": "https://cwe.mitre.org/data/definitions/185.html" + }, + { + "type": "owasp", + "name": "A03:2021 - Injection", + "value": "A03:2021" + }, + { + "type": "owasp", + "name": "A1:2017 - Injection", + "value": "A1:2017" + }, + { + "type": "eslint_rule_id", + "name": "ESLint rule ID/detect-non-literal-regexp", + "value": "detect-non-literal-regexp" + } + ] + } + ], + "scan": { + "analyzer": { + "id": "semgrep", + "name": "Semgrep", + "url": "https://gitlab.com/gitlab-org/security-products/analyzers/semgrep", + "vendor": { + "name": "GitLab" + }, + "version": "6.7.1" + }, + "scanner": { + "id": "semgrep", + "name": "Semgrep", + "url": "https://github.com/returntocorp/semgrep", + "vendor": { + "name": "GitLab" + }, + "version": "1.118.0" + }, + "type": "sast", + "start_time": "2025-11-07T18:35:36", + "end_time": "2025-11-07T18:35:41", + "status": "success", + "observability": { + "events": [ + { + "event": "collect_sast_scan_metrics_from_pipeline", + "property": "fcdda559-730c-40ab-9aea-0356dbb429b6", + "label": "semgrep", + "value": 0, + "version": "6.7.1", + "exit_code": 2, + "override_count": 0, + "passthrough_count": 0, + "custom_exclude_path_count": 0, + "time_s": 5, + "file_count": 4 + } + ] + } + } +} diff --git a/ee/spec/fixtures/security_reports/master/gl-sast-report-semgrep-multiple-vulnerabilities.json b/ee/spec/fixtures/security_reports/master/gl-sast-report-semgrep-6.7.1-multiple-vulnerabilities-correct-primary-identifier.json similarity index 75% rename from ee/spec/fixtures/security_reports/master/gl-sast-report-semgrep-multiple-vulnerabilities.json rename to ee/spec/fixtures/security_reports/master/gl-sast-report-semgrep-6.7.1-multiple-vulnerabilities-correct-primary-identifier.json index fe4134e1491bda..5cbb16ff896b76 100644 --- a/ee/spec/fixtures/security_reports/master/gl-sast-report-semgrep-multiple-vulnerabilities.json +++ b/ee/spec/fixtures/security_reports/master/gl-sast-report-semgrep-6.7.1-multiple-vulnerabilities-correct-primary-identifier.json @@ -1,10 +1,10 @@ { - "version": "15.1.4", + "version": "15.2.2", "vulnerabilities": [ { "id": "6a8d97c532a32e7bb9e1d93b3300977b8a7e75f9ddcc5bac4edaa6cda3603833", "category": "sast", - "name": "Deserialization of untrusted data", + "name": "Deserialization of untrusted data. Vulnerability to be confirmed.", "description": "The application was found using an unsafe version of `yaml` load which is vulnerable to\ndeserialization attacks. Deserialization attacks exploit the process of reading serialized\ndata and turning it back\ninto an object. By constructing malicious objects and serializing them, an adversary may\nattempt to:\n\n- Inject code that is executed upon object construction, which occurs during the\ndeserialization process.\n- Exploit mass assignment by including fields that are not normally a part of the serialized\ndata but are read in during deserialization.\n\nTo remediate this issue, use `safe_load()` or call `yaml.load()` with the `Loader` argument\nset to\n`yaml.SafeLoader`.\n\nExample loading YAML using `safe_load`:\n```\nimport yaml\n\n# Use safe_load to load data into an intermediary object\nintermediary_object = yaml.safe_load(\"\"\"user:\n name: 'test user'\"\"\"\n)\n# Create our real object, copying over only the necessary fields\nuser_object = {'user': {\n # Assign the deserialized data from intermediary object\n 'name': intermediary_object['user']['name'],\n # Add in protected data in object definition (or set it from a class constructor)\n 'is_admin': False,\n }\n}\n# Work with user_object\n# ...\n```\n\nFor more details on deserialization attacks in general, see OWASP's guide:\n- https://cheatsheetseries.owasp.org/cheatsheets/Deserialization_Cheat_Sheet.html\n", "cve": "semgrep_id:bandit.B506:329:329", "severity": "High", @@ -49,8 +49,8 @@ { "id": "185f6aa5aece728c2b94f16ff36ea99339dbeb39a027964d65a0e544b439529d", "category": "sast", - "name": "Improper neutralization of special elements used in an SQL Command ('SQL Injection')", - "description": "SQL Injection is a critical vulnerability that can lead to data or system compromise. By\ndynamically generating SQL query strings, user input may be able to influence the logic of\nthe SQL statement. This could lead to an adversary accessing information they should\nnot have access to, or in some circumstances, being able to execute OS functionality or code.\n\nReplace all dynamically generated SQL queries with parameterized queries. In situations where\ndynamic queries must be created, never use direct user input, but instead use a map or\ndictionary of valid values and resolve them using a user supplied key.\n\nFor example, some database drivers do not allow parameterized queries for `\u003e` or `\u003c` comparison\noperators. In these cases, do not use a user supplied `\u003e` or `\u003c` value, but rather have the\nuser\nsupply a `gt` or `lt` value. The alphabetical values are then used to look up the `\u003e` and `\u003c`\nvalues to be used in the construction of the dynamic query. The same goes for other queries\nwhere\ncolumn or table names are required but cannot be parameterized.\n\nExample using `PreparedStatement` queries:\n```\nimport sqlite3\n\n# Create a new database (in memory)\ncon = sqlite3.connect(\":memory:\")\n# Get a cursor from the connection\ncur = con.cursor()\n# Create a tuple of the value to be used in the parameterized query\nparams = ('user-input',)\n# execute the statement, passing in the params for the value\ncur.execute(\"select name from sqlite_master where name = ?\", params)\n# work with the result\nresult = cur.fetchall()\n```\n\nFor more information on SQL Injection see OWASP:\nhttps://cheatsheetseries.owasp.org/cheatsheets/SQL_Injection_Prevention_Cheat_Sheet.html\n", + "name": "Improper neutralization of special elements used in an SQL Command ('SQL Injection'). Vulnerability to be confirmed.", + "description": "SQL Injection is a critical vulnerability that can lead to data or system compromise.", "cve": "semgrep_id:bandit.B608:265:265", "severity": "High", "scanner": { @@ -94,7 +94,7 @@ { "id": "afb3a18f344a72ed01c842afd1939b4c33b150ba50234001d8eb34ce72a977f4", "category": "sast", - "name": "Improper neutralization of directives in dynamically evaluated code ('Eval Injection')", + "name": "Improper neutralization of directives in dynamically evaluated code ('Eval Injection'). Vulnerability to be confirmed.", "description": "The application was found calling the `eval` function OR Function()\n constructor OR setTimeout() OR setInterval() methods. If the\n\n variables or strings or functions passed to these methods contains user-supplied input, an adversary could attempt to execute arbitrary\n\n JavaScript\n\n code. This could lead to a full system compromise in Node applications or Cross-site Scripting\n\n (XSS) in web applications.\n\n\n To remediate this issue, remove all calls to above methods and consider alternative methods for\n\n executing\n\n the necessary business logic. There is almost no safe method of calling `eval` or other above stated sinks with\n\n user-supplied input.\n\n Instead, consider alternative methods such as using property accessors to dynamically access\n\n values.\n\n\n Example using property accessors to dynamically access an object's property:\n\n ```\n\n // Define an object\n\n const obj = {key1: 'value1', key2: 'value2'};\n\n // Get key dynamically from user input\n\n const key = getUserInput();\n\n // Check if the key exists in our object and return it, or a default empty string\n\n const value = (obj.hasOwnProperty(key)) ? obj[key] : '';\n\n // Work with the value\n\n ```\n\n\n For more information on why not to use `eval`, and alternatives see:\n\n - https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/eval#never_use_eval!\n\n Other References:\n\n - https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Function/Function\n\n - https://developer.mozilla.org/en-US/docs/Web/API/setTimeout\n\n - https://developer.mozilla.org/en-US/docs/Web/API/setInterval\n", "cve": "semgrep_id:eslint.detect-eval-with-expression:10:10", "severity": "High", @@ -139,8 +139,8 @@ { "id": "4e7633d40f31f6398b4c7ffc4bf481ba6fe627c34042d7439b71259e6ea9b32c", "category": "sast", - "name": "Improper neutralization of special elements used in an SQL Command ('SQL Injection')", - "description": "Detected user input used to manually construct a SQL string. This is usually\nbad practice because manual construction could accidentally result in a SQL\ninjection. An attacker could use a SQL injection to steal or modify contents\nof the database. Instead, use a parameterized query which is available\nby default in most database engines. Alternatively, consider using an\nobject-relational mapper (ORM) such as SQLAlchemy which will protect your queries.\n\nSQL Injections are a critical type of vulnerability that can lead to data \nor system compromise. By dynamically generating SQL query strings, user \ninput may be able to influence the logic of an SQL statement. \nThis could lead to an malicious parties accessing information they should not \nhave access to, or in some circumstances, being able to execute OS functionality\nor code.\n\nReplace all dynamically generated SQL queries with parameterized queries. \nIn situations where dynamic queries must be created, never use direct user input,\nbut instead use a map or dictionary of valid values and resolve them using a user \nsupplied key.\n\nFor example, some database drivers do not allow parameterized queries for \n`\u003e` or `\u003c` comparison operators. In these cases, do not use a user supplied \n`\u003e` or `\u003c` value, but rather have the user supply a `gt` or `lt` value. \nThe alphabetical values are then used to look up the `\u003e` and `\u003c` values to be used \nin the construction of the dynamic query. The same goes for other queries where \ncolumn or table names are required but cannot be parameterized.\nData that is possible user-controlled from a python request is passed\nto `execute()` function. To remediate this issue, use SQLAlchemy statements\nwhich are built with query parameterization and therefore not vulnerable \nto sql injection.\n\nIf for some reason this is not feasible, ensure calls including user-supplied \ndata pass it in to the `params` parameter of the `execute()` method.\nBelow is an example using `execute()`, passing in user-supplied data as `params`. \nThis will treat the query as a parameterized query and `params` as strictly data, \npreventing any possibility of SQL Injection.\n\n```\nname = request.args.get('name')\nreq = text('SELECT * FROM student WHERE firstname = :x')\nresult = db.session.execute(req, {\"x\":name})\n```\nFor more information on QuerySets see:\n- https://docs.djangoproject.com/en/4.2/ref/models/querysets/#queryset-api\nFor more information on SQL Injections see OWASP:\n- https://cheatsheetseries.owasp.org/cheatsheets/SQL_Injection_Prevention_Cheat_Sheet.html\n", + "name": "Improper neutralization of special elements used in an SQL Command ('SQL Injection'). Vulnerability to be confirmed.", + "description": "Detected user input used to manually construct a SQL string. This is usually\nbad practice because manual construction could accidentally result in a SQL\ninjection. An attacker could use a SQL injection to steal or modify contents\nof the database. Instead, use a parameterized query which is available\nby default in most database engines. Alternatively, consider using an\nobject-relational mapper (ORM) such as SQLAlchemy which will protect your queries.\n\nSQL Injections are a critical type of vulnerability that can lead to data \nor system compromise. By dynamically generating SQL query strings, user \ninput may be able to influence the logic of an SQL statement. \nThis could lead to an malicious parties accessing information they should not \nhave access to, or in some circumstances, being able to execute OS functionality\nor code.\n\nReplace all dynamically generated SQL queries with parameterized queries. \nIn situations where dynamic queries must be created, never use direct user input,\nbut instead use a map or dictionary of valid values and resolve them using a user \nsupplied key.\n\nFor example, some database drivers do not allow parameterized queries for \n`>` or `<` comparison operators. In these cases, do not use a user supplied \n`>` or `<` value, but rather have the user supply a `gt` or `lt` value. \nThe alphabetical values are then used to look up the `>` and `<` values to be used \nin the construction of the dynamic query. The same goes for other queries where \ncolumn or table names are required but cannot be parameterized.\nData that is possible user-controlled from a python request is passed\nto `execute()` function. To remediate this issue, use SQLAlchemy statements\nwhich are built with query parameterization and therefore not vulnerable \nto sql injection.\n\nIf for some reason this is not feasible, ensure calls including user-supplied \ndata pass it in to the `params` parameter of the `execute()` method.\nBelow is an example using `execute()`, passing in user-supplied data as `params`. \nThis will treat the query as a parameterized query and `params` as strictly data, \npreventing any possibility of SQL Injection.\n\n```\nname = request.args.get('name')\nreq = text('SELECT * FROM student WHERE firstname = :x')\nresult = db.session.execute(req, {\"x\":name})\n```\nFor more information on QuerySets see:\n- https://docs.djangoproject.com/en/4.2/ref/models/querysets/#queryset-api\nFor more information on SQL Injections see OWASP:\n- https://cheatsheetseries.owasp.org/cheatsheets/SQL_Injection_Prevention_Cheat_Sheet.html\n", "cve": "semgrep_id:python_flask_rule-flask-tainted-sql-string:261:261", "severity": "High", "scanner": { @@ -181,7 +181,7 @@ "name": "Allocation of resources without limits or throttling", "description": "The application was found using the `requests` module without configuring a timeout value for\nconnections. This could lead to uncontrolled resource consumption where the application could\nrun out of\nsocket descriptors, effectively causing a Denial of Service (DoS).\n\nTo remediate this issue, pass in a `timeout=` argument to each `requests` call.\n\nExample using a timeout for an HTTP GET request:\n```\n# Issue a GET request to https://example.com with a timeout of 10 seconds\nresponse = requests.get('https://example.com', timeout=10)\n# Work with the response object\n# ...\n```\n\nFor more information on using the requests module see:\n- https://requests.readthedocs.io/en/latest/api/\n", "cve": "semgrep_id:bandit.B113:17:18", - "severity": "Medium", + "severity": "Critical", "scanner": { "id": "semgrep", "name": "Semgrep" @@ -227,7 +227,7 @@ "name": "Allocation of resources without limits or throttling", "description": "The application was found using the `requests` module without configuring a timeout value for\nconnections. This could lead to uncontrolled resource consumption where the application could\nrun out of\nsocket descriptors, effectively causing a Denial of Service (DoS).\n\nTo remediate this issue, pass in a `timeout=` argument to each `requests` call.\n\nExample using a timeout for an HTTP GET request:\n```\n# Issue a GET request to https://example.com with a timeout of 10 seconds\nresponse = requests.get('https://example.com', timeout=10)\n# Work with the response object\n# ...\n```\n\nFor more information on using the requests module see:\n- https://requests.readthedocs.io/en/latest/api/\n", "cve": "semgrep_id:bandit.B113:28:29", - "severity": "Medium", + "severity": "Critical", "scanner": { "id": "semgrep", "name": "Semgrep" @@ -273,7 +273,7 @@ "name": "Allocation of resources without limits or throttling", "description": "The application was found using the `requests` module without configuring a timeout value for\nconnections. This could lead to uncontrolled resource consumption where the application could\nrun out of\nsocket descriptors, effectively causing a Denial of Service (DoS).\n\nTo remediate this issue, pass in a `timeout=` argument to each `requests` call.\n\nExample using a timeout for an HTTP GET request:\n```\n# Issue a GET request to https://example.com with a timeout of 10 seconds\nresponse = requests.get('https://example.com', timeout=10)\n# Work with the response object\n# ...\n```\n\nFor more information on using the requests module see:\n- https://requests.readthedocs.io/en/latest/api/\n", "cve": "semgrep_id:bandit.B113:36:37", - "severity": "Medium", + "severity": "Critical", "scanner": { "id": "semgrep", "name": "Semgrep" @@ -319,7 +319,7 @@ "name": "Allocation of resources without limits or throttling", "description": "The application was found using the `requests` module without configuring a timeout value for\nconnections. This could lead to uncontrolled resource consumption where the application could\nrun out of\nsocket descriptors, effectively causing a Denial of Service (DoS).\n\nTo remediate this issue, pass in a `timeout=` argument to each `requests` call.\n\nExample using a timeout for an HTTP GET request:\n```\n# Issue a GET request to https://example.com with a timeout of 10 seconds\nresponse = requests.get('https://example.com', timeout=10)\n# Work with the response object\n# ...\n```\n\nFor more information on using the requests module see:\n- https://requests.readthedocs.io/en/latest/api/\n", "cve": "semgrep_id:bandit.B113:44:45", - "severity": "Medium", + "severity": "Critical", "scanner": { "id": "semgrep", "name": "Semgrep" @@ -362,7 +362,7 @@ { "id": "e81f87450a35ed038550bfe4f56dcff5bebd9c5ca5f309b6144de063cb99e1b2", "category": "sast", - "name": "Use of a broken or risky cryptographic algorithm", + "name": "Use of a broken or risky cryptographic algorithm. Vulnerability to be resolved.", "description": "The application was found using an insecure or risky digest or signature algorithm. MD2, MD4,\n MD5 and SHA1 hash algorithms have been found to be vulnerable to producing collisions.\n\nThis means\nthat two different values, when hashed, can lead to the same hash value. If the application is\ntrying\nto use these hash methods for storing passwords, then it is recommended to switch to a\npassword hashing\nalgorithm such as Argon2id or PBKDF2.\n\nNote that the `Crypto` and `Cryptodome` Python packages are no longer recommended for\nnew applications, instead consider using the [cryptography](https://cryptography.io/) package.\n\nExample of creating a SHA-384 hash using the `cryptography` package:\n```\nfrom cryptography.hazmat.primitives import hashes\n# Create a SHA384 digest\ndigest = hashes.Hash(hashes.SHA384())\n# Update the digest with some initial data\ndigest.update(b\"some data to hash\")\n# Add more data to the digest\ndigest.update(b\"some more data\")\n# Finalize the digest as bytes\nresult = digest.finalize()\n```\n\nFor more information on secure password storage see OWASP:\n- https://cheatsheetseries.owasp.org/cheatsheets/Password_Storage_Cheat_Sheet.html\n\nFor more information on the cryptography module see:\n- https://cryptography.io/en/latest/\n", "cve": "semgrep_id:bandit.B303-1:141:141", "severity": "Medium", @@ -407,7 +407,7 @@ { "id": "3f8a15b8ea5a1e062262c837c4b5c763320c40622f50183f04fa2e584fc05e13", "category": "sast", - "name": "Improper certificate validation", + "name": "Improper certificate validation. Vulnerability to be resolved.", "description": "The application was found using the `requests` module without configuring a timeout value for\nconnections. The `verify=False` argument has been set, which effectively disables the\nvalidation\nof server certificates.\n\nThis allows for an adversary who is in between the application and the target host to intercept\npotentially sensitive information or transmit malicious data.\n\nTo remediate this issue either remove the `verify=False` argument, or set `verify=True`to each\n`requests` call.\n\nExample verifying server certificates for an HTTP GET request:\n```\n# Issue a GET request to https://example.com with a timeout of 10 seconds and verify the\n# server certificate explicitly.\nresponse = requests.get('https://example.com', timeout=10, verify=True)\n# Work with the response object\n# ...\n```\n\nFor more information on using the requests module see:\n- https://requests.readthedocs.io/en/latest/api/\n", "cve": "semgrep_id:bandit.B501:17:18", "severity": "Medium", @@ -453,7 +453,7 @@ { "id": "8b6a98da4410a8abe0a3338ec5db34f4a9a48d0716ba296dcda0e93b63a5766f", "category": "sast", - "name": "Improper certificate validation", + "name": "Improper certificate validation. Vulnerability to be resolved.", "description": "The application was found using the `requests` module without configuring a timeout value for\nconnections. The `verify=False` argument has been set, which effectively disables the\nvalidation\nof server certificates.\n\nThis allows for an adversary who is in between the application and the target host to intercept\npotentially sensitive information or transmit malicious data.\n\nTo remediate this issue either remove the `verify=False` argument, or set `verify=True`to each\n`requests` call.\n\nExample verifying server certificates for an HTTP GET request:\n```\n# Issue a GET request to https://example.com with a timeout of 10 seconds and verify the\n# server certificate explicitly.\nresponse = requests.get('https://example.com', timeout=10, verify=True)\n# Work with the response object\n# ...\n```\n\nFor more information on using the requests module see:\n- https://requests.readthedocs.io/en/latest/api/\n", "cve": "semgrep_id:bandit.B501:28:29", "severity": "Medium", @@ -499,7 +499,7 @@ { "id": "3b65f8017d6b3a73a5f6e7d1c0e9e78aa0daf817f06234985a9d011da1a9d804", "category": "sast", - "name": "Improper certificate validation", + "name": "Improper certificate validation. Vulnerability to be resolved.", "description": "The application was found using the `requests` module without configuring a timeout value for\nconnections. The `verify=False` argument has been set, which effectively disables the\nvalidation\nof server certificates.\n\nThis allows for an adversary who is in between the application and the target host to intercept\npotentially sensitive information or transmit malicious data.\n\nTo remediate this issue either remove the `verify=False` argument, or set `verify=True`to each\n`requests` call.\n\nExample verifying server certificates for an HTTP GET request:\n```\n# Issue a GET request to https://example.com with a timeout of 10 seconds and verify the\n# server certificate explicitly.\nresponse = requests.get('https://example.com', timeout=10, verify=True)\n# Work with the response object\n# ...\n```\n\nFor more information on using the requests module see:\n- https://requests.readthedocs.io/en/latest/api/\n", "cve": "semgrep_id:bandit.B501:36:37", "severity": "Medium", @@ -542,101 +542,10 @@ } ] }, - { - "id": "878843d5b4edf0042e3066429a4cac5f66f8c7ad72b40056601fbb191fa13214", - "category": "sast", - "name": "Improper certificate validation", - "description": "The application was found using the `requests` module without configuring a timeout value for\nconnections. The `verify=False` argument has been set, which effectively disables the\nvalidation\nof server certificates.\n\nThis allows for an adversary who is in between the application and the target host to intercept\npotentially sensitive information or transmit malicious data.\n\nTo remediate this issue either remove the `verify=False` argument, or set `verify=True`to each\n`requests` call.\n\nExample verifying server certificates for an HTTP GET request:\n```\n# Issue a GET request to https://example.com with a timeout of 10 seconds and verify the\n# server certificate explicitly.\nresponse = requests.get('https://example.com', timeout=10, verify=True)\n# Work with the response object\n# ...\n```\n\nFor more information on using the requests module see:\n- https://requests.readthedocs.io/en/latest/api/\n", - "cve": "semgrep_id:bandit.B501:44:45", - "severity": "Medium", - "scanner": { - "id": "semgrep", - "name": "Semgrep" - }, - "location": { - "file": "tests/e2e_zap.py", - "start_line": 44, - "end_line": 45 - }, - "identifiers": [ - { - "type": "semgrep_id", - "name": "bandit.B501", - "value": "bandit.B501", - "url": "https://semgrep.dev/r/gitlab.bandit.B501" - }, - { - "type": "cwe", - "name": "CWE-295", - "value": "295", - "url": "https://cwe.mitre.org/data/definitions/295.html" - }, - { - "type": "owasp", - "name": "A07:2021 - Identification and Authentication Failures", - "value": "A07:2021" - }, - { - "type": "owasp", - "name": "A2:2017 - Broken Authentication", - "value": "A2:2017" - }, - { - "type": "bandit_test_id", - "name": "Bandit Test ID B501", - "value": "B501" - } - ] - }, - { - "id": "6cac58319f88ad3a1cb16df9c1272049ea0f909fa5fc3f67508148fda3ce5e2c", - "category": "sast", - "name": "Regular expression with non-literal value", - "description": "The `RegExp` constructor was called with a non-literal value. If an adversary were able to\nsupply a malicious regex, they could cause a Regular Expression Denial of Service (ReDoS)\nagainst the application. In Node applications, this could cause the entire application to no\nlonger be responsive to other users' requests.\n\nTo remediate this issue, never allow user-supplied regular expressions. Instead, the regular \nexpression should be hardcoded. If this is not possible, consider using an alternative regular\nexpression engine such as [node-re2](https://www.npmjs.com/package/re2). RE2 is a safe alternative \nthat does not support backtracking, which is what leads to ReDoS.\n\nExample using re2 which does not support backtracking (Note: it is still recommended to\nnever use user-supplied input):\n```\n// Import the re2 module\nconst RE2 = require('re2');\n\nfunction match(userSuppliedRegex, userInput) {\n // Create a RE2 object with the user supplied regex, this is relatively safe\n // due to RE2 not supporting backtracking which can be abused to cause long running\n // queries\n var re = new RE2(userSuppliedRegex);\n // Execute the regular expression against some userInput\n var result = re.exec(userInput);\n // Work with the result\n}\n```\n\nFor more information on Regular Expression DoS see:\n- https://owasp.org/www-community/attacks/Regular_expression_Denial_of_Service_-_ReDoS\n", - "cve": "semgrep_id:eslint.detect-non-literal-regexp:15:15", - "severity": "Medium", - "scanner": { - "id": "semgrep", - "name": "Semgrep" - }, - "location": { - "file": "app/static/main.js", - "start_line": 15 - }, - "identifiers": [ - { - "type": "semgrep_id", - "name": "eslint.detect-non-literal-regexp", - "value": "eslint.detect-non-literal-regexp", - "url": "https://semgrep.dev/r/gitlab.eslint.detect-non-literal-regexp" - }, - { - "type": "cwe", - "name": "CWE-185", - "value": "185", - "url": "https://cwe.mitre.org/data/definitions/185.html" - }, - { - "type": "owasp", - "name": "A03:2021 - Injection", - "value": "A03:2021" - }, - { - "type": "owasp", - "name": "A1:2017 - Injection", - "value": "A1:2017" - }, - { - "type": "eslint_rule_id", - "name": "ESLint rule ID/detect-non-literal-regexp", - "value": "detect-non-literal-regexp" - } - ] - }, { "id": "512131f12839cd51c58aaabf643870dc262bf169f0af15a47d0d073fcfd449ac", "category": "sast", - "name": "Use of cryptographically weak pseudo-random number generator (PRNG)", + "name": "Use of cryptographically weak pseudo-random number generator (PRNG). Vulnerability to be dismissed.", "description": "Depending on the context, generating weak random numbers may expose cryptographic functions,\nwhich rely on these numbers, to be exploitable. When generating numbers for sensitive values\nsuch as tokens, nonces, and cryptographic keys, it is recommended that the `secrets` module\nbe used instead.\n\nExample using the secrets module:\n```\nimport secrets\n\n# Generate a secure random 64 byte array\nrandom_bytes = secrets.token_bytes(64)\nprint(random_bytes)\n\n# Generate a secure random 64 byte array as a hex string\nrandom_bytes_hex = secrets.token_hex(64)\n\n# Generate a secure random 64 byte array base64 encoded for use in URLs\nrandom_string = secrets.token_urlsafe(64)\n```\n\nFor more information on the `secrets` module see:\n- https://docs.python.org/3/library/secrets.html\n", "cve": "semgrep_id:bandit.B311:295:295", "severity": "Low", @@ -681,8 +590,8 @@ { "id": "6cf069d55d47c54f5b2363af43f3c7a2d71ef25e04751111b6566fe89b90c8aa", "category": "sast", - "name": "Use of cryptographically weak pseudo-random number generator (PRNG)", - "description": "Depending on the context, generating weak random numbers may expose cryptographic functions,\nwhich rely on these numbers, to be exploitable. When generating numbers for sensitive values\nsuch as tokens, nonces, and cryptographic keys, it is recommended that the `secrets` module\nbe used instead.\n\nExample using the secrets module:\n```\nimport secrets\n\n# Generate a secure random 64 byte array\nrandom_bytes = secrets.token_bytes(64)\nprint(random_bytes)\n\n# Generate a secure random 64 byte array as a hex string\nrandom_bytes_hex = secrets.token_hex(64)\n\n# Generate a secure random 64 byte array base64 encoded for use in URLs\nrandom_string = secrets.token_urlsafe(64)\n```\n\nFor more information on the `secrets` module see:\n- https://docs.python.org/3/library/secrets.html\n", + "name": "Use of cryptographically weak pseudo-random number generator (PRNG). Vulnerability to be dismissed.", + "description": "Depending on the context, generating weak random numbers may expose cryptographic functions", "cve": "semgrep_id:bandit.B311:319:319", "severity": "Low", "scanner": { @@ -732,7 +641,7 @@ "vendor": { "name": "GitLab" }, - "version": "6.6.2" + "version": "6.7.1" }, "scanner": { "id": "semgrep", @@ -744,22 +653,22 @@ "version": "1.118.0" }, "type": "sast", - "start_time": "2025-09-29T21:06:41", - "end_time": "2025-09-29T21:06:48", + "start_time": "2025-11-07T18:35:36", + "end_time": "2025-11-07T18:35:41", "status": "success", "observability": { "events": [ { "event": "collect_sast_scan_metrics_from_pipeline", - "property": "5c418ec4-3b29-4631-bbbc-61e76f3f2396", + "property": "fcdda559-730c-40ab-9aea-0356dbb429b6", "label": "semgrep", "value": 0, - "version": "6.6.2", - "exit_code": 0, + "version": "6.7.1", + "exit_code": 2, "override_count": 0, "passthrough_count": 0, "custom_exclude_path_count": 0, - "time_s": 6, + "time_s": 5, "file_count": 4 } ] diff --git a/ee/spec/services/security/ingestion/custom_spec.rb b/ee/spec/services/security/ingestion/custom_spec.rb deleted file mode 100644 index 430eca4703c890..00000000000000 --- a/ee/spec/services/security/ingestion/custom_spec.rb +++ /dev/null @@ -1,123 +0,0 @@ -# frozen_string_literal: true - -require 'spec_helper' - -RSpec.describe 'my custom spec', feature_category: :vulnerability_management do - let(:project) { create(:project) } - let(:user) { create(:user) } - let(:pipeline1) { create(:ci_pipeline, user: user, project: project) } - let(:sast_build) { create(:ee_ci_build, :success, pipeline: pipeline1, project: project) } - let!(:sast_semgrep_artifact_with_correct_primary_identifiers1) { create(:ee_ci_job_artifact, :sast_semgrep_multiple_vulnerabilities, job: sast_build) } - - let(:pipeline2) { create(:ci_pipeline, user: user, project: project) } - let(:sast_build2) { create(:ee_ci_build, :success, pipeline: pipeline2) } - let!(:sast_semgrep_artifact_with_incorrect_primary_identifiers) { create(:ee_ci_job_artifact, :sast_semgrep_multiple_vulnerabilities_incorrect_primary_identifier, job: sast_build2) } - - let(:pipeline3) { create(:ci_pipeline, user: user, project: project) } - let(:sast_build3) { create(:ee_ci_build, :success, pipeline: pipeline3) } - let!(:sast_semgrep_artifact_with_correct_primary_identifiers2) { create(:ee_ci_job_artifact, :sast_semgrep_multiple_vulnerabilities, job: sast_build3) } - - let(:known_keys) { Set.new } - - before do - stub_licensed_features(sast: true, security_dashboard: true) - project.add_maintainer(user) - end - - it 'restores vulnerabilities' do - Security::StoreScansService.execute(pipeline1) - # Security::StoreScanService.execute(sast_semgrep_artifact, known_keys, false) - Security::Ingestion::IngestReportsService.execute(pipeline1) - - Vulnerability.where(project_id: project.id).each { |v| puts "Resolving vuln #{v.id}"; ::Vulnerabilities::ResolveService.new(user, v, "first resolution").execute } - Vulnerability.where(project_id: project.id, severity: 'high').each { |v| puts "Confirming vuln #{v.id}"; Vulnerabilities::ConfirmService.new(user, v, "confirming").execute } - Vulnerability.where(project_id: project.id, severity: 'low').each { |v| puts "Dismissing vuln #{v.id}"; Vulnerabilities::DismissService.new(user, v, "dismissing", 'acceptable_risk').execute } - Vulnerability.where(project_id: project.id, severity: 'medium')[0..4].each {|v| puts "Resolving again vuln #{v.id}"; ::Vulnerabilities::ResolveService.new(user, v, "last resolution").execute } - - Security::StoreScansService.execute(pipeline2) - Security::Ingestion::IngestReportsService.execute(pipeline2) - - Security::StoreScansService.execute(pipeline3) - Security::Ingestion::IngestReportsService.execute(pipeline3) - - reset_vulnerability_states(project.id) - - expect(Vulnerability.where(project_id: project.id, severity: 'high', resolved_on_default_branch: false).map { |v| v.finding.state }) - .to eq(["confirmed"] * 4) - - expect(Vulnerability.where(project_id: project.id, severity: 'low', resolved_on_default_branch: false).map { |v| v.finding.state }) - .to eq(["dismissed"] * 2) - - expect(Vulnerability.where(project_id: project.id, severity: 'medium', resolved_on_default_branch: false).map { |v| v.finding.state }) - .to eq(["resolved"] * 10) - - # puts "XXXXXXXXXXXXXXXX", (%|VULNERABILITY.COUNT: #{Vulnerability.count.inspect}|), "XXXXXXXXXXXXXXXX" - # puts "XXXXXXXXXXXXXXXX", (%|VULNERABILITIES::FINDING.COUNT: #{Vulnerabilities::Finding.count.inspect}|), "XXXXXXXXXXXXXXXX" - # puts "XXXXXXXXXXXXXXXX", (%|SECURITY::FINDING.COUNT: #{Security::Finding.count.inspect}|), "XXXXXXXXXXXXXXXX" - # puts "XXXXXXXXXXXXXXXX", (%|VULNERABILITIES::IDENTIFIER.COUNT: #{Vulnerabilities::Identifier.count.inspect}|), "XXXXXXXXXXXXXXXX" - # puts "XXXXXXXXXXXXXXXX", (%|VULNERABILITIES::SCANNER.COUNT: #{Vulnerabilities::Scanner.count.inspect}|), "XXXXXXXXXXXXXXXX" - # puts "XXXXXXXXXXXXXXXX", (%|SECURITY::FINDING.COUNT: #{Security::Finding.count.inspect}|), "XXXXXXXXXXXXXXXX" - # puts "XXXXXXXXXXXXXXXX", (%|VULNERABILITIES::FINDINGIDENTIFIER.COUNT: #{Vulnerabilities::FindingIdentifier.count.inspect}|), "XXXXXXXXXXXXXXXX" - end -end - -def reset_vulnerability_states(project_id) - vulnerabilities = Vulnerability.joins(findings: :primary_identifier) - .where(project_id: project_id) - .where.not(vulnerability_identifiers: { external_type: 'semgrep_id' }) - - vulnerabilities.each do |vulnerability| - latest_transition = vulnerability.state_transitions - .where.not(author_id: nil) - .order(created_at: :desc) - .first - next unless latest_transition - - matching_finding = find_matching_finding(vulnerability.finding) - next unless matching_finding - - apply_state_transition(matching_finding.vulnerability, latest_transition) - end -end - -private - -def find_matching_finding(original_finding) - # Try finding by corrected metadata - corrected_metadata = reorder_identifiers(original_finding.raw_metadata) - finding = Vulnerabilities::Finding.find_by(raw_metadata: corrected_metadata) - return finding if finding - - # Fallback to attribute matching - puts "Unable to find vulnerability finding using raw metadata, attempting backup strategy" - Vulnerabilities::Finding.where( - severity: original_finding.severity, - report_type: original_finding.report_type, - location_fingerprint: original_finding.location_fingerprint, - name: original_finding.name, - metadata_version: original_finding.metadata_version - ).where.not(id: original_finding.id).first.tap do |result| - puts "Unable to find match with backup strategy" unless result - end -end - -def reorder_identifiers(raw_metadata) - metadata = JSON.parse(raw_metadata) - metadata['identifiers'] = metadata['identifiers'].partition { |id| id['type'] == 'semgrep_id' }.flatten(1) - JSON.generate(metadata) -end - -def apply_state_transition(vulnerability, transition) - return if vulnerability.state == transition.to_state - - author = User.find(transition.author_id) - - case transition.to_state - when "resolved" - ::Vulnerabilities::ResolveService.new(author, vulnerability, transition.comment).execute - when "confirmed" - ::Vulnerabilities::ConfirmService.new(author, vulnerability, transition.comment).execute - when "dismissed" - ::Vulnerabilities::DismissService.new(author, vulnerability, transition.comment, transition.dismissal_reason).execute - end -end diff --git a/lib/gitlab/background_migration/restore_incorrect_vulnerability_states.rb b/lib/gitlab/background_migration/restore_incorrect_vulnerability_states.rb index 6a8aa975e53b2d..b4237d6216fa7b 100644 --- a/lib/gitlab/background_migration/restore_incorrect_vulnerability_states.rb +++ b/lib/gitlab/background_migration/restore_incorrect_vulnerability_states.rb @@ -1,52 +1,74 @@ # frozen_string_literal: true -# See https://docs.gitlab.com/ee/development/database/batched_background_migrations.html -# for more information on how to use batched background migrations - -# Update below commented lines with appropriate values. - -require 'json' - +# rubocop:disable Metrics/ClassLength -- we need to keep the logic in a single class module Gitlab module BackgroundMigration class RestoreIncorrectVulnerabilityStates < BatchedMigrationJob operation_name :restore_incorrect_vulnerability_states - # operation_name :my_operation # This is used as the key on collecting metrics - # scope_to ->(relation) { relation.where(column: "value") } feature_category :static_application_security_testing + TRANSITION_COMMENT_TEMPLATE = '%{original_comment} (original comment automatically copied ' \ + 'from transition %{transition_id} to fix semgrep 6.7.0 bug)' + + SYSTEM_NOTE_TEMPLATE = 'changed vulnerability status from %{from_state} to %{to_state} with the ' \ + 'following comment: "%{original_comment}" (original comment automatically copied from transition ' \ + '%{transition_id} to fix semgrep 6.7.0 bug)' + + SUB_BATCH_SIZE = 100 + + # This migration fixes corrupted vulnerability data introduced by semgrep v6.7.0, + # released on 16 September 2025 at 20:30:02 UTC module Migratable + class Note < ApplicationRecord + has_one :system_note_metadata + end + + class SystemNoteMetadata < ApplicationRecord + belongs_to :note + end + + class Project < ApplicationRecord + end + module Enums + module Security + def self.scan_types = { sast: 1 } + end + module Vulnerability - REPORT_TYPES = { - sast: 0, - }.freeze + REPORT_TYPES = { sast: 0 }.with_indifferent_access.freeze VULNERABILITY_STATES = { detected: 1, - confirmed: 4, + dismissed: 2, resolved: 3, - dismissed: 2 + confirmed: 4 }.with_indifferent_access.freeze - def self.vulnerability_states - VULNERABILITY_STATES - end - SEVERITY_LEVELS = { - # undefined: 0, no longer applicable info: 1, unknown: 2, - # experimental: 3, formerly used by confidence, no longer applicable low: 4, medium: 5, high: 6, critical: 7 }.with_indifferent_access.freeze - def self.severity_levels - SEVERITY_LEVELS - end + def self.vulnerability_states = VULNERABILITY_STATES + def self.report_types = REPORT_TYPES + def self.severity_levels = SEVERITY_LEVELS + end + end + + module Security + class Finding < SecApplicationRecord + self.table_name = 'security_findings' + + belongs_to :vulnerability_finding, + class_name: 'Migratable::Vulnerabilities::Finding', + primary_key: :uuid, + foreign_key: :uuid, + inverse_of: :security_findings end end @@ -54,38 +76,62 @@ module Vulnerabilities class StateTransition < SecApplicationRecord self.table_name = 'vulnerability_state_transitions' - enum :from_state, ::Enums::Vulnerability.vulnerability_states, prefix: true - enum :to_state, ::Enums::Vulnerability.vulnerability_states, prefix: true + enum :from_state, Migratable::Enums::Vulnerability.vulnerability_states, prefix: true + enum :to_state, Migratable::Enums::Vulnerability.vulnerability_states, prefix: true - belongs_to :vulnerability, class_name: 'Vulnerability', inverse_of: :state_transitions - belongs_to :vulnerability_occurrence, optional: true, class_name: 'Vulnerabilities::Finding' + belongs_to :vulnerability, class_name: 'Migratable::Vulnerability', inverse_of: :state_transitions + belongs_to :vulnerability_occurrence, optional: true, class_name: 'Migratable::Vulnerabilities::Finding' end class Finding < SecApplicationRecord self.table_name = 'vulnerability_occurrences' - enum :report_type, ::Enums::Vulnerability.report_types - enum :severity, ::Enums::Vulnerability.severity_levels, prefix: :severity + enum :report_type, Migratable::Enums::Vulnerability.report_types + enum :severity, Migratable::Enums::Vulnerability.severity_levels, prefix: :severity + + belongs_to :primary_identifier, class_name: 'Migratable::Vulnerabilities::Identifier' + belongs_to :scanner, class_name: 'Migratable::Vulnerabilities::Scanner' + belongs_to :vulnerability, class_name: 'Migratable::Vulnerability', inverse_of: :findings - belongs_to :primary_identifier, class_name: 'Vulnerabilities::Identifier', foreign_key: 'primary_identifier_id' - belongs_to :scanner, class_name: 'Vulnerabilities::Scanner', foreign_key: 'scanner_id' - belongs_to :vulnerability, class_name: 'Vulnerability', inverse_of: :findings, foreign_key: 'vulnerability_id' + has_many :security_findings, + class_name: 'Migratable::Security::Finding', + primary_key: :uuid, + foreign_key: :uuid, + inverse_of: :vulnerability_finding end class Identifier < SecApplicationRecord self.table_name = 'vulnerability_identifiers' + + def self.sha1_fingerprint(identifier) + fingerprint_string = "#{identifier['type']}:#{identifier['value']}" + [Digest::SHA1.hexdigest(fingerprint_string)].pack('H*') # rubocop:disable Fips/SHA1 -- we must use SHA1, since this is how the fingerprint is stored in the DB + end end class Scanner < SecApplicationRecord self.table_name = 'vulnerability_scanners' end + + class Read < ::SecApplicationRecord + self.table_name = "vulnerability_reads" + self.primary_key = :vulnerability_id + + belongs_to :vulnerability, inverse_of: :vulnerability_read + end end class Vulnerability < SecApplicationRecord - has_many :findings, class_name: '::Vulnerabilities::Finding', inverse_of: :vulnerability - has_many :state_transitions, class_name: '::Vulnerabilities::StateTransition', inverse_of: :vulnerability + has_many :notes + has_many :findings, class_name: 'Migratable::Vulnerabilities::Finding', inverse_of: :vulnerability + has_many :state_transitions, class_name: 'Migratable::Vulnerabilities::StateTransition', + inverse_of: :vulnerability + has_one :vulnerability_read, class_name: 'Migratable::Vulnerabilities::Read', inverse_of: :vulnerability + + belongs_to :project - enum :state, ::Enums::Vulnerability.vulnerability_states + enum :state, Migratable::Enums::Vulnerability.vulnerability_states + enum :severity, Migratable::Enums::Vulnerability.severity_levels, prefix: :severity def finding @finding ||= findings.first @@ -94,109 +140,697 @@ def finding end def perform - vulnerabilities_by_project = Migratable::Vulnerability - .joins(findings: [:primary_identifier, :scanner]) - .where(report_type: 0) - .where(vulnerability_identifiers: { external_type: ['cwe', 'owasp'] }) - .where(vulnerability_scanners: { external_id: 'semgrep' }) - .where('vulnerability_scanners.project_id = vulnerability_occurrences.project_id') - .includes( - findings: [:primary_identifier], - state_transitions: [] - ) - .group_by(&:project_id) + # CRITICAL: Use vulnerability_reads as efficient pre-filter to avoid scanning + # the 200M row vulnerability_occurrences table which causes production timeouts + + # Step 1: Filter vulnerability_reads (small, indexed) for semgrep SAST vulnerabilities + # WITHOUT joining to vulnerability_occurrences yet + each_sub_batch( + batching_scope: ->(relation) do + # Only filter by scanner and report type here - avoid joining large tables + relation + .joins('INNER JOIN vulnerability_scanners ON vulnerability_scanners.id = vulnerability_reads.scanner_id') + .where(vulnerability_scanners: { external_id: 'semgrep' }) + .where(vulnerability_reads: { + report_type: Migratable::Enums::Vulnerability.report_types[:sast] + }) + .select('vulnerability_reads.vulnerability_id') + .distinct + end + ) do |sub_batch| + # Step 2: Get candidate vulnerability IDs from the filtered vulnerability_reads + candidate_vulnerability_ids = sub_batch.pluck(:vulnerability_id).uniq - vulnerabilities_by_project.each_with_index do |(project_id, vulnerabilities), idx| - reset_vulnerability_states(project_id, vulnerabilities) + next if candidate_vulnerability_ids.empty? + + # Step 3: Load ONLY the filtered vulnerabilities with their findings + # Now vulnerability_occurrences queries are constrained to a small set of IDs + # This is where we finally touch the 200M row table, but only for specific IDs + process_vulnerabilities_batch(candidate_vulnerability_ids) end end - def reset_vulnerability_states(project_id, vulnerabilities) - puts "Searching for vulnerabilities with incorrect primary identifier for project id #{project_id}" + def process_vulnerabilities_batch(candidate_vulnerability_ids) + # Load vulnerabilities with CWE or OWASP primary identifiers (affected by the bug) + # This join to vulnerability_occurrences is now safe because it's constrained by + # the small set of candidate_vulnerability_ids + vulnerabilities = Migratable::Vulnerability + .where(id: candidate_vulnerability_ids) + .joins(findings: :primary_identifier) + .where(vulnerability_identifiers: { external_type: %w[cwe owasp] }) + .includes( + findings: [:primary_identifier, :security_findings], + state_transitions: [], + vulnerability_read: {}, + project: {} + ) + + return if vulnerabilities.empty? + + log_info("Processing batch of #{vulnerabilities.length} vulnerabilities") + + restore_vulnerability_states_bulk(vulnerabilities) + end - num_vulnerabilities_found = vulnerabilities.length - puts "Found #{num_vulnerabilities_found} vulnerabilities with incorrect primary identifier" + # rubocop:disable Metrics/AbcSize, Metrics/MethodLength -- Complex bulk processing logic required for performance + def restore_vulnerability_states_bulk(vulnerabilities) + num_vulnerabilities = vulnerabilities.length + return if num_vulnerabilities == 0 + + current_time = Time.current + + # Bulk data collections + state_transitions_to_insert = [] + notes_to_insert = [] + system_note_metadata_to_insert = [] + vulnerabilities_to_update = [] + security_findings_to_update = [] + orphaned_security_findings_to_delete = [] + vulnerability_reads_to_update = [] + vulnerability_findings_to_update = [] + + # Group vulnerabilities by project for efficient identifier lookups + vulns_by_project = vulnerabilities.group_by(&:project_id) + + # Batch-load correct findings (fixes N+1 #1) + correct_findings_lookup = batch_find_correct_findings(vulnerabilities) + + # Preload identifiers for UUID updates (fixes N+1 #2) - per project + identifiers_lookup = {} + vulns_by_project.each do |project_id, project_vulns| + identifiers_lookup.merge!( + preload_identifiers_for_vulns(project_id, project_vulns, correct_findings_lookup) + ) + end - # Process each vulnerability with pre-fetched data - vulnerabilities.each_with_index do |vulnerability_with_incorrect_primary_id, idx| - puts "Processing vulnerability #{idx} of #{num_vulnerabilities_found} [#{((idx.to_f/num_vulnerabilities_found)*100).round(2)}%]" + # rubocop:disable Metrics/BlockLength -- Processing each vulnerability requires complex logic + vulnerabilities.each_with_index do |vuln_with_incorrect_id, idx| + log_progress(idx + 1, num_vulnerabilities) + + latest_transition = vuln_with_incorrect_id.state_transitions + .select { |t| t.author_id.present? } + .max_by(&:created_at) + + correct_finding = correct_findings_lookup[vuln_with_incorrect_id.id] + + # if semgrep v6.7.1 has been executed, then we'll have a correct_finding + if correct_finding + collect_state_transition_records( + correct_finding.vulnerability, + latest_transition, + current_time, + state_transitions_to_insert, + notes_to_insert, + system_note_metadata_to_insert, + vulnerabilities_to_update, + vulnerability_reads_to_update + ) + else + # semgrep v6.7.1 has not been executed yet, existing vulnerability records contain + # corrupted primary identifier values + collect_uuid_update_data( + vuln_with_incorrect_id, + latest_transition, + identifiers_lookup, + current_time, + state_transitions_to_insert, + notes_to_insert, + system_note_metadata_to_insert, + vulnerabilities_to_update, + security_findings_to_update, + orphaned_security_findings_to_delete, + vulnerability_reads_to_update, + vulnerability_findings_to_update + ) + end + end + # rubocop:enable Metrics/BlockLength + + # Bulk insert and update operations + bulk_insert_state_transitions(state_transitions_to_insert) + inserted_note_ids = bulk_insert_notes(notes_to_insert) + bulk_insert_system_note_metadata(system_note_metadata_to_insert, inserted_note_ids) + + bulk_update_vulnerabilities(vulnerabilities_to_update) + bulk_update_security_findings(security_findings_to_update) + bulk_delete_security_findings(orphaned_security_findings_to_delete) + bulk_update_vulnerability_reads(vulnerability_reads_to_update) + bulk_update_vulnerability_findings(vulnerability_findings_to_update) + end + # rubocop:enable Metrics/AbcSize, Metrics/MethodLength + + # Batch-loads correct findings for all vulnerabilities in a single query + # Uses a lateral join pattern to find matching findings efficiently + # CRITICAL: Constrains vulnerability_occurrences search using vulnerability_reads to avoid full table scan + # rubocop:disable Metrics/AbcSize, Metrics/MethodLength -- Complex SQL generation required for performance + def batch_find_correct_findings(vulnerabilities) + return {} if vulnerabilities.empty? + + # Build structured data with named keys for clarity + search_params = vulnerabilities.map do |vuln| + f = vuln.finding + { + vulnerability_id: vuln.id, + exclude_finding_id: f.id, + severity: Migratable::Enums::Vulnerability.severity_levels[f.severity], + report_type: Migratable::Enums::Vulnerability.report_types[f.report_type], + location_fingerprint: f.location_fingerprint, + name: f.name, + description: f.description, + metadata_version: f.metadata_version, + project_id: f.project_id + } + end - latest_transition = vulnerability_with_incorrect_primary_id.state_transitions - .select { |t| t.author_id.present? } - .max_by(&:created_at) + # Get unique project IDs to constrain the search + project_ids = search_params.pluck(:project_id).uniq + + # Pre-filter vulnerability_occurrences using vulnerability_reads to get a small candidate set + # This avoids scanning the full table + candidate_occurrence_ids = Migratable::Vulnerabilities::Read + .where(project_id: project_ids, report_type: 0) + .joins( + 'INNER JOIN vulnerability_occurrences ' \ + 'ON vulnerability_occurrences.vulnerability_id = vulnerability_reads.vulnerability_id' + ) + .pluck('vulnerability_occurrences.id') + + return {} if candidate_occurrence_ids.empty? + + conn = ::SecApplicationRecord.connection + + values_sql = search_params.map do |p| + location_fingerprint_hex = "'\\x#{p[:location_fingerprint].unpack1('H*')}'::bytea" + + "(#{p[:vulnerability_id]}, #{p[:exclude_finding_id]}, #{p[:severity]}, #{p[:report_type]}, " \ + "#{location_fingerprint_hex}, #{conn.quote(p[:name])}, #{conn.quote(p[:description])}, " \ + "#{conn.quote(p[:metadata_version])}, #{p[:project_id]})" + end.join(', ') + + candidate_ids_sql = candidate_occurrence_ids.join(', ') + + sql = <<~SQL + WITH search_params AS ( + SELECT * FROM ( + VALUES #{values_sql} + ) AS t(vulnerability_id, exclude_finding_id, severity, report_type, location_fingerprint, name, description, metadata_version, project_id) + ) + SELECT DISTINCT ON (sp.vulnerability_id) + sp.vulnerability_id, + vo.id AS finding_id + FROM search_params sp + JOIN vulnerability_occurrences vo ON + vo.id IN (#{candidate_ids_sql}) + AND vo.severity = sp.severity + AND vo.report_type = sp.report_type + AND vo.location_fingerprint = sp.location_fingerprint + AND vo.name = sp.name + AND vo.description = sp.description + AND vo.metadata_version = sp.metadata_version + AND vo.project_id = sp.project_id + AND vo.id != sp.exclude_finding_id + ORDER BY sp.vulnerability_id, vo.id + SQL + + results = conn.execute(sql) + finding_ids = results.pluck('finding_id') + + return {} if finding_ids.empty? + + # Load findings with vulnerability and project associations (fixes N+1 #3 and #4) + findings_by_id = Migratable::Vulnerabilities::Finding + .where(id: finding_ids) + .includes(vulnerability: :project) + .index_by(&:id) + + # Build lookup by vulnerability ID + results.each_with_object({}) do |row, lookup| + lookup[row['vulnerability_id']] = findings_by_id[row['finding_id']] + end + end + # rubocop:enable Metrics/AbcSize, Metrics/MethodLength + + # Preload all identifiers needed for UUID updates + def preload_identifiers_for_vulns(project_id, vulnerabilities, correct_findings_lookup) + # Only compute fingerprints for vulns that don't have a correct finding + fingerprint_to_vuln = {} + + vulnerabilities.each do |vuln| + next if correct_findings_lookup[vuln.id] + + finding = vuln.finding + metadata = reorder_metadata_with_correct_primary_id(finding.raw_metadata) + semgrep_identifier = metadata["identifiers"][0] - current_vulnerability_finding = get_matching_finding(vulnerability_with_incorrect_primary_id.finding) + fingerprint = Migratable::Vulnerabilities::Identifier.sha1_fingerprint(semgrep_identifier) - reset_vulnerability_state(project_id, vulnerability_with_incorrect_primary_id, current_vulnerability_finding, latest_transition) + fingerprint_to_vuln[fingerprint] ||= [] + fingerprint_to_vuln[fingerprint] << vuln.id end - end - def get_matching_finding(current_vulnerability_finding) - binary_location_fingerprint = [current_vulnerability_finding.location_fingerprint].pack('H*') + return {} if fingerprint_to_vuln.empty? - Migratable::Vulnerabilities::Finding.where( - severity: current_vulnerability_finding.severity, - report_type: current_vulnerability_finding.report_type, - location_fingerprint: binary_location_fingerprint, - name: current_vulnerability_finding.name, - metadata_version: current_vulnerability_finding.metadata_version - ) - .where.not(id: current_vulnerability_finding.id) - .where(project_id: current_vulnerability_finding.project_id) - .first - end + # Single query to load all needed identifiers + identifiers = Migratable::Vulnerabilities::Identifier + .where(project_id: project_id, fingerprint: fingerprint_to_vuln.keys) + .index_by(&:fingerprint) - def sort_metadata(raw_metadata) - metadata = JSON.parse(raw_metadata) + # Build lookup by vulnerability ID + fingerprint_to_vuln.each_with_object({}) do |(fingerprint, vulnerability_ids), lookup| + identifier = identifiers[fingerprint] + vulnerability_ids.each { |vid| lookup[vid] = identifier } + end + end - # Find the semgrep_id identifier and separate it from the rest + def reorder_metadata_with_correct_primary_id(raw_metadata) + metadata = Gitlab::Json.parse(raw_metadata) identifiers = metadata["identifiers"] semgrep_identifier = identifiers.find { |id| id["type"] == "semgrep_id" } other_identifiers = identifiers.reject { |id| id["type"] == "semgrep_id" } - - # Sort the other identifiers by their "value" field - other_identifiers.sort! { |a, b| a["value"] <=> b["value"] } - - # Reconstruct the identifiers array with semgrep_id first, then sorted others + other_identifiers.sort_by! { |a| a["value"] } metadata["identifiers"] = [semgrep_identifier] + other_identifiers + metadata end - def reset_vulnerability_state(project_id, vulnerability_with_incorrect_primary_id, current_vulnerability_finding, transition) - current_vulnerability = current_vulnerability_finding&.vulnerability - finding_with_incorrect_primary_id = vulnerability_with_incorrect_primary_id.finding + def reorder_finding_data_with_correct_primary_id(finding_data) + identifiers = finding_data["identifiers"] + semgrep_identifier = identifiers.find { |id| id["external_type"] == "semgrep_id" } + other_identifiers = identifiers.reject { |id| id["external_type"] == "semgrep_id" } + other_identifiers.sort_by! { |a| a["external_id"] } + finding_data["identifiers"] = [semgrep_identifier] + other_identifiers + finding_data + end + + # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/ParameterLists -- Data collection for UUID updates + def collect_uuid_update_data( + vuln_with_incorrect_id, + latest_transition, + identifiers_lookup, + current_time, + state_transitions_to_insert, + notes_to_insert, + system_note_metadata_to_insert, + vulnerabilities_to_update, + security_findings_to_update, + orphaned_security_findings_to_delete, + vulnerability_reads_to_update, + vulnerability_findings_to_update + ) + finding_with_incorrect_id = vuln_with_incorrect_id.finding + + metadata_with_correct_id = reorder_metadata_with_correct_primary_id(finding_with_incorrect_id.raw_metadata) + + security_finding = finding_with_incorrect_id.security_findings.first + finding_data_with_correct_id = + (reorder_finding_data_with_correct_primary_id(security_finding.finding_data) if security_finding) + + correct_identifier = identifiers_lookup[vuln_with_incorrect_id.id] + unless correct_identifier + log_warning("Missing correct identifier for vulnerability #{vuln_with_incorrect_id.id}") + return + end + + correct_uuid = Gitlab::UUID.v5( + [ + Migratable::Enums::Vulnerability.report_types.key(vuln_with_incorrect_id.report_type), + correct_identifier.fingerprint.unpack1('H*'), + finding_with_incorrect_id.location_fingerprint.unpack1('H*'), + vuln_with_incorrect_id.project_id + ].join("-") + ) + + finding_with_incorrect_id.security_findings.each do |sf| + orphaned_security_findings_to_delete << sf.id - sorted_metadata = sort_metadata(finding_with_incorrect_primary_id.raw_metadata) - semgrep_identifier = sorted_metadata[0] + security_findings_to_update << { + id: sf.id, + partition_number: sf.partition_number, + uuid: correct_uuid, + finding_data: finding_data_with_correct_id + } + end - binary_fingerprint = [Digest::SHA1.hexdigest("#{semgrep_identifier['type']}:#{semgrep_identifier['value']}")].pack('H*') - correct_primary_identifier = Migratable::Vulnerabilities::Identifier.find_by( - project_id: project_id, fingerprint: binary_fingerprint + if vuln_with_incorrect_id.vulnerability_read + vulnerability_reads_to_update << { + vulnerability_id: vuln_with_incorrect_id.id, + uuid: correct_uuid, + dismissal_reason: nil + } + end + + vulnerability_findings_to_update << { + id: finding_with_incorrect_id.id, + primary_identifier_id: correct_identifier.id, + raw_metadata: metadata_with_correct_id.to_json, + uuid: correct_uuid + } + + collect_state_transition_records( + vuln_with_incorrect_id, + latest_transition, + current_time, + state_transitions_to_insert, + notes_to_insert, + system_note_metadata_to_insert, + vulnerabilities_to_update, + nil ) + end + # rubocop:enable Metrics/AbcSize, Metrics/MethodLength, Metrics/ParameterLists - puts "Updating incorrect primary identifier #{vulnerability_with_incorrect_primary_id.finding.primary_identifier_id} to correct primary identifier #{correct_primary_identifier.id}" - finding_with_incorrect_primary_id.update(primary_identifier_id: correct_primary_identifier.id) + def bulk_insert_state_transitions(data) + return if data.empty? - attributes = {} - attributes[:state] = transition.to_state if transition + log_info("Bulk inserting #{data.length} state transitions") + data.each_slice(SUB_BATCH_SIZE) do |batch| + Migratable::Vulnerabilities::StateTransition.insert_all(batch) + end + end + + def bulk_insert_notes(data) + return {} if data.empty? + + log_info("Bulk inserting #{data.length} notes") + data.each_slice(SUB_BATCH_SIZE).each_with_object({}) do |batch, mapping| + result = Migratable::Note.insert_all(batch, returning: [:id, :discussion_id]) + result.rows.each { |row| mapping[row[1]] = row[0] } + end + end + + def bulk_insert_system_note_metadata(data, inserted_note_ids) + return if data.empty? + + log_info("Bulk inserting #{data.length} system note metadata records") + + metadata_records = data.map do |record| + { + note_id: inserted_note_ids[record[:note_data][:discussion_id]], + namespace_id: record[:namespace_id], + action: record[:action], + created_at: record[:created_at], + updated_at: record[:updated_at] + } + end + + metadata_records.each_slice(SUB_BATCH_SIZE) do |batch| + Migratable::SystemNoteMetadata.insert_all(batch) + end + end + + # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/BlockLength -- UPDATE FROM VALUES requires building multiple column sets + def bulk_update_vulnerabilities(updates) + return if updates.empty? + + log_info("Bulk updating #{updates.length} vulnerabilities") + conn = ::SecApplicationRecord.connection + + updates.each_slice(SUB_BATCH_SIZE) do |batch| + values_sql = batch.map do |u| + confirmed_by = u[:confirmed_by_id] ? u[:confirmed_by_id].to_s : 'NULL::bigint' + confirmed_at = u[:confirmed_at] ? "#{conn.quote(u[:confirmed_at])}::timestamptz" : 'NULL::timestamptz' + resolved_by = u[:resolved_by_id] ? u[:resolved_by_id].to_s : 'NULL::bigint' + resolved_at = u[:resolved_at] ? "#{conn.quote(u[:resolved_at])}::timestamptz" : 'NULL::timestamptz' + dismissed_by = u[:dismissed_by_id] ? u[:dismissed_by_id].to_s : 'NULL::bigint' + dismissed_at = u[:dismissed_at] ? "#{conn.quote(u[:dismissed_at])}::timestamptz" : 'NULL::timestamptz' + + "(#{u[:id]}::bigint, #{u[:state]}::smallint, #{confirmed_by}, #{confirmed_at}, " \ + "#{resolved_by}, #{resolved_at}, #{dismissed_by}, #{dismissed_at})" + end.join(', ') + + sql = <<~SQL + UPDATE vulnerabilities AS v + SET + state = t.state, + confirmed_by_id = COALESCE(t.confirmed_by_id, v.confirmed_by_id), + confirmed_at = COALESCE(t.confirmed_at, v.confirmed_at), + resolved_by_id = COALESCE(t.resolved_by_id, v.resolved_by_id), + resolved_at = COALESCE(t.resolved_at, v.resolved_at), + dismissed_by_id = COALESCE(t.dismissed_by_id, v.dismissed_by_id), + dismissed_at = COALESCE(t.dismissed_at, v.dismissed_at) + FROM ( + VALUES #{values_sql} + ) AS t(id, state, confirmed_by_id, confirmed_at, resolved_by_id, resolved_at, dismissed_by_id, dismissed_at) + WHERE v.id = t.id + SQL + + conn.execute(sql) + end + end + # rubocop:enable Metrics/AbcSize, Metrics/MethodLength, Metrics/BlockLength + + def bulk_delete_security_findings(orphaned_security_finding_ids_to_delete) + return if orphaned_security_finding_ids_to_delete.empty? + + log_info("Bulk deleting #{orphaned_security_finding_ids_to_delete.length} security findings") + conn = ::SecApplicationRecord.connection + + orphaned_security_finding_ids_to_delete.each_slice(SUB_BATCH_SIZE) do + |orphaned_security_finding_ids_to_delete_batch| - if current_vulnerability - attributes[:resolved_on_default_branch] = current_vulnerability.resolved_on_default_branch - attributes[:present_on_default_branch] = current_vulnerability.present_on_default_branch + security_finding_ids_to_delete = orphaned_security_finding_ids_to_delete_batch.join(", ") + + sql = <<~SQL + DELETE from security_findings + WHERE id in (#{security_finding_ids_to_delete}) + SQL + + conn.execute(sql) end + end - finding_with_incorrect_primary_id.update(raw_metadata: sorted_metadata.to_json) + def bulk_update_security_findings(data) + return if data.empty? + + log_info("Bulk updating #{data.length} security findings") + conn = ::SecApplicationRecord.connection + + data.each_slice(SUB_BATCH_SIZE) do |batch| + values_sql = batch.map do |r| + id = Array(r[:id]).first.to_i + partition_number = r[:partition_number].to_i + escaped_data = conn.quote(r[:finding_data].to_json) + "(#{id}::bigint, #{partition_number}::integer, '#{r[:uuid]}'::uuid, #{escaped_data}::jsonb)" + end.join(', ') + + sql = <<~SQL + UPDATE security_findings AS sf + SET + uuid = t.uuid, + finding_data = t.finding_data + FROM ( + VALUES #{values_sql} + ) AS t(id, partition_number, uuid, finding_data) + WHERE sf.id = t.id AND sf.partition_number = t.partition_number + SQL + + conn.execute(sql) + end + end - vulnerability_with_incorrect_primary_id.update(attributes) + def bulk_update_vulnerability_reads(data) + return if data.empty? + + log_info("Bulk updating #{data.length} vulnerability reads") + conn = ::SecApplicationRecord.connection + + data.each_slice(SUB_BATCH_SIZE) do |batch| + values_sql = batch.map do |r| + uuid_value = r[:uuid] ? "'#{r[:uuid]}'::uuid" : "NULL::uuid" + dismissal_value = r[:dismissal_reason] ? r[:dismissal_reason].to_s : "NULL::integer" + "(#{r[:vulnerability_id]}, #{uuid_value}, #{dismissal_value})" + end.join(', ') + + sql = <<~SQL + UPDATE vulnerability_reads AS vr + SET + uuid = COALESCE(t.uuid, vr.uuid), + dismissal_reason = COALESCE(t.dismissal_reason, vr.dismissal_reason) + FROM ( + VALUES #{values_sql} + ) AS t(vulnerability_id, uuid, dismissal_reason) + WHERE vr.vulnerability_id = t.vulnerability_id + SQL + + conn.execute(sql) + end + end - if current_vulnerability_finding - puts "Deleting vulerability finding (occurrences) #{current_vulnerability_finding.id}" - current_vulnerability_finding.destroy - puts "Deleting vulerability #{current_vulnerability.id}" - current_vulnerability.destroy + def bulk_update_vulnerability_findings(updates) + return if updates.empty? + + log_info("Bulk updating #{updates.length} vulnerability findings") + conn = ::SecApplicationRecord.connection + + updates.each_slice(SUB_BATCH_SIZE) do |batch| + values_sql = batch.map do |u| + escaped_metadata = conn.quote(u[:raw_metadata]) + "(#{u[:id]}, #{u[:primary_identifier_id]}, '#{u[:uuid]}'::uuid, #{escaped_metadata})" + end.join(', ') + + sql = <<~SQL + UPDATE vulnerability_occurrences AS vo + SET + primary_identifier_id = t.primary_identifier_id, + uuid = t.uuid, + raw_metadata = t.raw_metadata + FROM ( + VALUES #{values_sql} + ) AS t(id, primary_identifier_id, uuid, raw_metadata) + WHERE vo.id = t.id + SQL + + conn.execute(sql) end + end + + def collect_state_transition_records( + vulnerability, + latest_transition, + current_time, + state_transitions_to_insert, + notes_to_insert, + system_note_metadata_to_insert, + vulnerabilities_to_update, + vulnerability_reads_to_update + ) + return unless latest_transition + return if vulnerability.state == latest_transition.to_state + + state_transitions_to_insert << build_state_transition(vulnerability, latest_transition, current_time) + + note_data = build_note_data(vulnerability, latest_transition, current_time) + notes_to_insert << note_data + + system_note_metadata_to_insert << build_system_note_metadata( + vulnerability, latest_transition, current_time, note_data) + + vulnerabilities_to_update << build_vulnerability_update( + vulnerability, latest_transition, current_time) + + return unless vulnerability_reads_to_update + + vulnerability_reads_to_update << build_vulnerability_read_update(vulnerability, latest_transition) + end + + def build_state_transition(vulnerability, latest_transition, current_time) + { + vulnerability_id: vulnerability.id, + dismissal_reason: latest_transition.dismissal_reason, + author_id: latest_transition.author_id, + from_state: Migratable::Enums::Vulnerability.vulnerability_states[vulnerability.state], + to_state: Migratable::Enums::Vulnerability.vulnerability_states[latest_transition.to_state], + created_at: current_time, + updated_at: current_time, + comment: format(TRANSITION_COMMENT_TEMPLATE, { + original_comment: latest_transition.comment, + transition_id: latest_transition.id + }) + } + end + + def build_note_data(vulnerability, latest_transition, current_time) + note_text = format(SYSTEM_NOTE_TEMPLATE, { + original_comment: latest_transition.comment, + from_state: vulnerability.state.titleize, + to_state: latest_transition.to_state.titleize, + transition_id: latest_transition.id + }) + + { + note: note_text, + noteable_type: 'Vulnerability', + author_id: latest_transition.author_id, + created_at: current_time, + updated_at: current_time, + project_id: vulnerability.project_id, + noteable_id: vulnerability.id, + system: true, + discussion_id: discussion_id(vulnerability.id), + namespace_id: vulnerability.project.project_namespace_id + } + end + + def build_system_note_metadata(vulnerability, latest_transition, current_time, note_data) + { + namespace_id: vulnerability.project.project_namespace_id, + action: "vulnerability_#{latest_transition.to_state}", + created_at: current_time, + updated_at: current_time, + note_data: note_data + } + end + + def build_vulnerability_update(vulnerability, latest_transition, current_time) + vuln_update = { + id: vulnerability.id, + state: Migratable::Enums::Vulnerability.vulnerability_states[latest_transition.to_state] + } + + add_state_specific_attributes(vuln_update, latest_transition, current_time) + + vuln_update + end + + def add_state_specific_attributes(vuln_update, latest_transition, current_time) + case latest_transition.to_state + when "confirmed" + vuln_update[:confirmed_by_id] = latest_transition.author_id + vuln_update[:confirmed_at] = current_time + when "resolved" + vuln_update[:resolved_by_id] = latest_transition.author_id + vuln_update[:resolved_at] = current_time + when "dismissed" + vuln_update[:dismissed_by_id] = latest_transition.author_id + vuln_update[:dismissed_at] = current_time + end + end + + def build_vulnerability_read_update(vulnerability, latest_transition) + { + vulnerability_id: vulnerability.id, + uuid: nil, + dismissal_reason: latest_transition.dismissal_reason + } + end + + def discussion_id(vulnerability_id) + # rubocop:disable Fips/SHA1 -- required for parity with app/models/discussion.rb + Digest::SHA1.hexdigest("discussion-vulnerability-#{vulnerability_id}-#{SecureRandom.hex}") + # rubocop:enable Fips/SHA1 + end + + # Logging helpers - use structured logger in production, puts in tests + def log_info(message) + if defined?(Gitlab::AppJsonLogger) + Gitlab::AppJsonLogger.info( + message: message, + class: self.class.name + ) + else + puts message + end + end + + def log_warning(message) + if defined?(Gitlab::AppJsonLogger) + Gitlab::AppJsonLogger.warn( + message: message, + class: self.class.name + ) + else + puts "WARNING: #{message}" + end + end + + def log_progress(current, total, context = "") + return unless current % 100 == 0 || current == total - vulnerability_with_incorrect_primary_id.state_transitions.where(author_id: nil).destroy_all + percentage = ((current.to_f / total) * 100).round(2) + message = "Processing #{current} of #{total} [#{percentage}%]" + message += " - #{context}" unless context.empty? + log_info(message) end end end end +# rubocop:enable Metrics/ClassLength diff --git a/restore-vulnerability-states-for-project.rb b/restore-vulnerability-states-for-project.rb deleted file mode 100755 index 3f635c56e90bb1..00000000000000 --- a/restore-vulnerability-states-for-project.rb +++ /dev/null @@ -1,337 +0,0 @@ -#!/usr/bin/env ruby - -# frozen_string_literal: true - -# rubocop:disable all - -require "bundler/inline" -gemfile(true) do - source "https://rubygems.org" - gem 'pry' - gem 'pry-byebug' - gem "activerecord" - gem "pg" -end -require 'active_record' - -MAIN_DB = ENV["MAIN_CONNECTION_STRING"] -SEC_DB = ENV["SEC_CONNECTION_STRING"] - -class MainRecord < ActiveRecord::Base - self.abstract_class = true - establish_connection(MAIN_DB) -end - -class SecApplicationRecord < ActiveRecord::Base - self.abstract_class = true - establish_connection(SEC_DB) -end - -class Route < MainRecord - belongs_to :source, polymorphic: true -end - -class Project < MainRecord - has_one :route, as: :source - - def full_path - route&.path - end -end - -class User < MainRecord -end - -module Enums - module Vulnerability - REPORT_TYPES = { - sast: 0, - }.freeze - - VULNERABILITY_STATES = { - detected: 1, - confirmed: 4, - resolved: 3, - dismissed: 2 - }.with_indifferent_access.freeze - - def self.vulnerability_states - VULNERABILITY_STATES - end - end -end - -module Vulnerabilities - class StateTransition < SecApplicationRecord - self.table_name = 'vulnerability_state_transitions' - - enum :from_state, ::Enums::Vulnerability.vulnerability_states, prefix: true - enum :to_state, ::Enums::Vulnerability.vulnerability_states, prefix: true - - belongs_to :vulnerability, class_name: 'Vulnerability', inverse_of: :state_transitions - belongs_to :vulnerability_occurrence, optional: true, class_name: 'Vulnerabilities::Finding' - end - - class Finding < SecApplicationRecord - self.table_name = 'vulnerability_occurrences' - - belongs_to :primary_identifier, class_name: 'Vulnerabilities::Identifier', foreign_key: 'primary_identifier_id' - belongs_to :scanner, class_name: 'Vulnerabilities::Scanner', foreign_key: 'scanner_id' - belongs_to :vulnerability, class_name: 'Vulnerability', inverse_of: :findings, foreign_key: 'vulnerability_id' - end - - class Identifier < SecApplicationRecord - self.table_name = 'vulnerability_identifiers' - end - - class Scanner < SecApplicationRecord - self.table_name = 'vulnerability_scanners' - end -end - -class Vulnerability < SecApplicationRecord - has_many :findings, class_name: '::Vulnerabilities::Finding', inverse_of: :vulnerability - has_many :state_transitions, class_name: '::Vulnerabilities::StateTransition', inverse_of: :vulnerability - - enum :state, ::Enums::Vulnerability.vulnerability_states - - def finding - @finding ||= findings.first - end -end - -# Set timeouts on both connections -MainRecord.connection.execute("SET statement_timeout = '30min';") -SecApplicationRecord.connection.execute("SET statement_timeout = '30min';") - -class Processor - def initialize(project_id = nil) - @transitions_by_project = Hash.new { |h, k| h[k] = { resolved: [], confirmed: [], dismissed: [] } } - @project_id = project_id - end - - def execute - process_everything = false - - if process_everything - affected_project_ids = Vulnerabilities::Finding - .joins(:primary_identifier) - .joins(:scanner) - .where(vulnerability_identifiers: { external_type: ['cwe', 'owasp'] }) - .where(vulnerability_scanners: { external_id: 'semgrep' }) - .where('vulnerability_scanners.project_id = vulnerability_occurrences.project_id') - .where(report_type: 0) - .distinct - .pluck(:project_id) - - affected_project_ids.each_with_index do |project_id, idx| - puts "" - puts "Processing project #{idx+1} of #{affected_project_ids.length}" - reset_vulnerability_states(project_id) - puts "" - puts "" - end - end - - if project_id - reset_vulnerability_states(project_id) - else - affected_project_ids = File.read(ARGV[0]).split("\n") - - puts "Found #{affected_project_ids.length} affected project ids" - - affected_project_ids.each_with_index do |project_id, idx| - puts "" - puts "Processing project #{idx+1} of #{affected_project_ids.length}" - reset_vulnerability_states(project_id) - puts "" - puts "" - end - end - - print_transition_summary - end - - attr_accessor :transitions_by_project - attr_reader :project_id - - def reset_vulnerability_states(project_id) - puts "Searching for vulnerabilities with incorrect primary identifier for project id #{project_id}" - - # Eager load all the data we need upfront - vulnerabilities = Vulnerability.joins(findings: :primary_identifier) - .where(project_id: project_id) - .where(report_type: 0) - .where(vulnerability_identifiers: { external_type: ['cwe', 'owasp'] }) - .includes( - findings: [:primary_identifier], - state_transitions: [] - ) - - num_vulnerabilities_found = vulnerabilities.length - puts "Found #{num_vulnerabilities_found} vulnerabilities with incorrect primary identifier" - - # Build lookup keys for all original findings - original_findings_data = vulnerabilities.map do |vuln| - finding = vuln.finding - { - vulnerability: vuln, - finding: finding, - lookup_key: [ - finding.severity, - finding.report_type, - finding.location_fingerprint&.unpack('H*')&.first, - finding.name, - finding.metadata_version - ] - } - end - - # Batch query for all matching findings - puts "Batch querying for matching findings..." - - conditions = original_findings_data.map do |data| - finding = data[:finding] - { - severity: finding.severity, - report_type: finding.report_type, - location_fingerprint: finding.location_fingerprint, - name: finding.name, - metadata_version: finding.metadata_version - } - end - - # Get all original finding IDs to exclude - original_finding_ids = original_findings_data.map { |d| d[:finding].id } - - if original_finding_ids.empty? - puts "Unable to find vulnerabilities for project #{project_id}" - return - end - - # Build a complex OR query - query = nil - conditions.each do |cond| - subquery = Vulnerabilities::Finding.where(cond) - query = query ? query.or(subquery) : subquery - end - - matching_findings = query - .where.not(id: original_finding_ids) - .where(project_id: project_id) # Add project filter if applicable - .includes(:vulnerability) - .to_a - - # Build lookup hash for matched findings - matched_findings_hash = matching_findings.group_by do |finding| - [ - finding.severity, - finding.report_type, - finding.location_fingerprint&.unpack('H*')&.first, - finding.name, - finding.metadata_version - ] - end - - # Process each vulnerability with pre-fetched data - original_findings_data.each_with_index do |data, idx| - puts "Processing vulnerability #{idx} of #{num_vulnerabilities_found} [#{((idx.to_f/num_vulnerabilities_found)*100).round(2)}%]" - - vulnerability = data[:vulnerability] - finding = data[:finding] - - latest_transition = vulnerability.state_transitions - .select { |t| t.author_id.present? } - .max_by(&:created_at) - - # Look up matching finding from hash - matched_findings = matched_findings_hash[data[:lookup_key]] - matching_finding = matched_findings&.first - - unless latest_transition - if matching_finding - puts "Unable to find vulnerability_state_transition for `semgrep 6.7.0` vulnerability ID #{vulnerability.id} (corresponding `semgrep >= 6.7.1` ID: #{matching_finding.vulnerability.id})" - else - puts "Unable to find vulnerability_state_transition for `semgrep 6.7.0` vulnerability ID #{vulnerability.id} (no corresponding `semgrep >= 6.7.1` ID)" - end - next - end - - unless matching_finding - puts "Unable to find match for `semgrep 6.7.0` vulnerability ID #{vulnerability.id}" - next - end - - puts "`semgrep 6.7.0` ID #{vulnerability.id} matches `semgrep >= 6.7.1` vulnerability ID #{matching_finding.vulnerability.id}" - - apply_state_transition(project_id, vulnerability, finding, matching_finding.vulnerability, matching_finding, latest_transition) - end - end - - private - - def apply_state_transition(project_id, old_vulnerability, old_finding, current_vulnerability, current_finding, transition) - # skip this vulnerability if the it already has the correct state - if current_vulnerability.state == transition.to_state - puts "Current vulnerability state #{current_vulnerability.state} matches original state: #{transition.to_state}, skipping" - return - end - - puts "Updating old primary identifier #{old_finding.primary_identifier_id} to new primary identifier #{current_finding.primary_identifier_id}" - puts "Deleting vulerability #{current_vulnerability.id}" - - author = User.find(transition.author_id) - - case transition.to_state - when "resolved" - puts "Resolving vulnerability #{current_vulnerability.id} with comment #{transition.comment}" - transitions_by_project[project_id][:resolved] << { - "current_vulnerability_id" => current_vulnerability.id, "old_vulnerability_id" => old_vulnerability.id, "comment" => transition.comment - } - # ::Vulnerabilities::ResolveService.new(author, vulnerability, transition.comment).execute - when "confirmed" - puts "Confirming vulnerability #{current_vulnerability.id} with comment #{transition.comment}" - transitions_by_project[project_id][:confirmed] << { - "current_vulnerability_id" => current_vulnerability.id, "old_vulnerability_id" => old_vulnerability.id, "comment" => transition.comment - } - # ::Vulnerabilities::ConfirmService.new(author, vulnerability, transition.comment).execute - when "dismissed" - puts "Dismissing vulnerability #{current_vulnerability.id} with comment #{transition.comment}" - transitions_by_project[project_id][:dismissed] << { - "current_vulnerability_id" => current_vulnerability.id, "old_vulnerability_id" => old_vulnerability.id, "comment" => transition.comment - } - # ::Vulnerabilities::DismissService.new(author, vulnerability, transition.comment, transition.dismissal_reason).execute - else - puts "Unknown transition '#{transition.to_state}' for vulnerability #{vulnerability.id}" - end - end - - def print_transition_summary - puts "\n=== Transition Summary ===" - transitions_by_project.each do |project_id, states| - project = Project.find(project_id) - puts "\nProject https://gitlab.com/#{project.full_path} (ID: #{project_id})" - puts " Resolved: #{states[:resolved].count} vulnerabilities" if states[:resolved].any? - states[:resolved].each do |resolved| - puts %| ID: #{resolved["current_vulnerability_id"]} (Original ID: #{resolved["old_vulnerability_id"]}), Comment: '#{resolved["comment"]}'| - end - puts " Confirmed: #{states[:confirmed].count} vulnerabilities" if states[:confirmed].any? - states[:confirmed].each do |confirmed| - puts %| ID: #{confirmed["current_vulnerability_id"]} (Original ID: #{confirmed["old_vulnerability_id"]}), Comment: '#{confirmed["comment"]}'| - end - puts " Dismissed: #{states[:dismissed].count} vulnerabilities" if states[:dismissed].any? - states[:dismissed].each do |dismissed| - puts %| ID: #{dismissed["current_vulnerability_id"]} (Original ID: #{dismissed["old_vulnerability_id"]}), Comment: '#{dismissed["comment"]}'| - end - end - end -end - -Processor.new(75802210).execute -# Processor.new.execute - -# reset_vulnerability_states(57498926) -# reset_vulnerability_states(60454917) -# reset_vulnerability_states(75802210) # my personal project - -# rubocop:enable all diff --git a/spec/lib/gitlab/background_migration/restore_incorrect_vulnerability_states_manual_spec.rb b/spec/lib/gitlab/background_migration/restore_incorrect_vulnerability_states_manual_spec.rb new file mode 100644 index 00000000000000..852467823f9751 --- /dev/null +++ b/spec/lib/gitlab/background_migration/restore_incorrect_vulnerability_states_manual_spec.rb @@ -0,0 +1,1411 @@ +# frozen_string_literal: true + +require 'spec_helper' + +# rubocop:disable RSpec/MultipleMemoizedHelpers -- We need this many for this background migration +RSpec.describe Gitlab::BackgroundMigration::RestoreIncorrectVulnerabilityStates, feature_category: :static_application_security_testing do + let(:pipelines_table) { ci_partitioned_table(:p_ci_pipelines) } + let(:builds_table) { ci_partitioned_table(:p_ci_builds) } + let(:notes_table) { table(:notes) } + let(:system_note_metadata_table) { table(:system_note_metadata) } + let(:vulnerabilities_table) { table(:vulnerabilities, database: :sec) } + let(:vulnerability_state_transitions_table) { table(:vulnerability_state_transitions, database: :sec) } + let(:vulnerability_identifiers_table) { table(:vulnerability_identifiers, database: :sec) } + let(:vulnerability_finding_identifiers_table) { table(:vulnerability_occurrence_identifiers, database: :sec) } + let(:organizations_table) { table(:organizations) } + let(:vulnerability_findings_table) { table(:vulnerability_occurrences, database: :sec) } + let(:security_findings_table) { table(:security_findings, database: :sec) } + let(:security_scans) { table(:security_scans, database: :sec) } + let(:vulnerability_reads_table) { table(:vulnerability_reads, database: :sec) } + let(:vulnerability_scanners) { table(:vulnerability_scanners, database: :sec) } + let(:sub_batch_size) { vulnerabilities_table.count } + let(:namespaces_table) { table(:namespaces) } + let(:projects_table) { table(:projects) } + let(:user) do + table(:users).create!(username: 'john_doe', email: 'johndoe@gitlab.com', projects_limit: 2, + organization_id: organization.id) + end + + let(:acceptable_risk_dismissal_int) { 0 } + + let(:severity_level_low_int) { 4 } + let(:severity_level_medium_int) { 5 } + let(:severity_level_high_int) { 6 } + + let(:resolved_state_string) { 'resolved' } + let(:detected_state_string) { 'detected' } + let(:dismissed_state_string) { 'dismissed' } + let(:confirmed_state_string) { 'confirmed' } + + let(:detected_state_int) { 1 } + let(:dismissed_state_int) { 2 } + let(:confirmed_state_int) { 4 } + let(:resolved_state_int) { 3 } + + let(:sast_report_type_string) { described_class::Migratable::Enums::Vulnerability.report_types.key(0) } + let(:sast_report_type_int) { described_class::Migratable::Enums::Vulnerability.report_types['sast'] } + + let(:organization) { organizations_table.create!(name: 'Organization', path: 'organization') } + let!(:group_namespace) do + namespaces_table.create!( + name: 'Project One', + path: 'project-one', + type: 'Group', + organization_id: organization.id + ).tap { |namespace| namespace.update!(traversal_ids: [namespace.id]) } + end + + let(:project_id) { nil } + let(:project) { create_project(name: 'gitlab', group: group_namespace, id: project_id) } + let(:current_time) { Time.current } + + let(:vulnerability_scanner) do + vulnerability_scanners.create!(created_at: current_time, updated_at: current_time, + project_id: project.id, external_id: 'semgrep', name: 'Semgrep', vendor: 'GitLab') + end + + def create_project(name:, group:, id: nil) + project_namespace = namespaces_table.create!( + name: name, + path: name, + type: 'Project', + organization_id: organization.id + ) + + projects_table.create!({ + id: id, + namespace_id: group.id, + project_namespace_id: project_namespace.id, + organization_id: organization.id, + name: name, + path: name + }.compact) + end + + # use a method instead of a subject to avoid rspec memoization + def perform_migration + described_class.new( + start_id: vulnerability_reads_table.minimum(:vulnerability_id), + end_id: vulnerability_reads_table.maximum(:vulnerability_id), + batch_table: :vulnerability_reads, + batch_column: :vulnerability_id, + sub_batch_size: sub_batch_size, + pause_ms: 0, + connection: ::SecApplicationRecord.connection + ).perform + end + + describe "#perform", feature_category: :static_application_security_testing do + let(:vulnerabilities_to_be_confirmed) do + vulnerabilities_table.where(project_id: project.id) + .where("title LIKE ?", "%Vulnerability to be confirmed.").order(:id) + end + + let(:vulnerabilities_to_be_resolved) do + vulnerabilities_table.where(project_id: project.id) + .where("title LIKE ?", "%Vulnerability to be resolved.").order(:id) + end + + let(:vulnerabilities_to_be_dismissed) do + vulnerabilities_table.where(project_id: project.id) + .where("title LIKE ?", "%Vulnerability to be dismissed.").order(:id) + end + + let(:uuids_before_corruption) { [] } + let(:corrupted_vulnerabilities) { [] } + let(:duplicated_vulnerabilities) { [] } + + context 'when performing sanity checks against spec data' do + context 'when creating vulnerabilities' do + let(:project_id) { 112 } + + it 'creates vulnerability occurrences with uuid and location_fingerprint ' \ + 'values that correspond to the hardcoded project id' do + expect { create_vulnerabilities('gl-sast-report-semgrep-6.6.2-multiple-vulnerabilities.json') }.to change { + vulnerability_findings_table.where( + uuid: 'fa338ae7-51e1-5211-8e02-34359ce9544d', + location_fingerprint: ['1eae8e196458ba7c9f60d97e55a77e1ca9d7d7f5'].pack('H*'), + project_id: project_id + ).count + }.from(0).to(1) + .and change { + vulnerability_findings_table.count + }.by(14) + .and change { + security_findings_table.where( + uuid: 'fa338ae7-51e1-5211-8e02-34359ce9544d', + project_id: project_id + ).count + }.from(0).to(1) + .and change { + vulnerability_reads_table.where( + uuid: 'fa338ae7-51e1-5211-8e02-34359ce9544d', + project_id: project_id + ).count + }.from(0).to(1) + + vulnerability_findings_table.find_each { |vf| expect(vf.raw_metadata).to be_present } + security_findings_table.find_each { |sf| expect(sf.finding_data).to be_present } + + security_finding_data = security_findings_table + .find_by(uuid: 'fa338ae7-51e1-5211-8e02-34359ce9544d').finding_data + expect(security_finding_data).to eq({ + "name" => "Improper neutralization of special elements used in an SQL Command ('SQL Injection'). " \ + "Vulnerability to be confirmed.", + "links" => [], "assets" => [], "details" => {}, "evidence" => nil, + "location" => { "file" => "app/app.py", "start_line" => 265 }, "solution" => nil, + "description" => "SQL Injection is a critical vulnerability that can lead to data or system compromise.", + "identifiers" => [ + { "url" => "https://semgrep.dev/r/gitlab.bandit.B608", "name" => "bandit.B608", + "external_id" => "bandit.B608", "fingerprint" => "5fc4137cf46497245dba266eaf656ee07eb154b3", + "external_type" => "semgrep_id" }, + { "url" => "https://cwe.mitre.org/data/definitions/89.html", "name" => "CWE-89", + "external_id" => "89", "fingerprint" => "b74f6bacf3f4d4f92c6f4da6584963e4148b91e6", + "external_type" => "cwe" }, + { "url" => nil, "name" => "A03:2021 - Injection", "external_id" => "A03:2021", + "fingerprint" => "a8e828eea3aba35916401da9304619f0a218119b", "external_type" => "owasp" }, + { "url" => nil, "name" => "A1:2017 - Injection", "external_id" => "A1:2017", + "fingerprint" => "08de3511f2132da4d24f1b8b1d3ca14368a0259b", "external_type" => "owasp" }, + { "url" => nil, "name" => "Bandit Test ID B608", "external_id" => "B608", + "fingerprint" => "2efed5393435ae741114b2200f17077e81954270", "external_type" => "bandit_test_id" } + ], "false_positive?" => false, "raw_source_code_extract" => nil, "remediation_byte_offsets" => [] + }) + end + end + + context 'when changing vulnerability state' do + it 'resolves vulnerabilities' do + create_vulnerabilities('gl-sast-report-semgrep-6.6.2-multiple-vulnerabilities.json') + + vulnerabilities_to_be_resolved = vulnerabilities_table.where(severity: severity_level_medium_int) + + expect do + resolve_vulnerabilities(vulnerabilities: vulnerabilities_to_be_resolved, comment: 'resolving') + end + .to change { + vulnerabilities_to_be_resolved.map(&:state) + }.from([detected_state_int] * vulnerabilities_to_be_resolved.count) + .to([resolved_state_int] * vulnerabilities_to_be_resolved.count) + .and change { + vulnerability_reads_table.where(vulnerability_id: vulnerabilities_to_be_resolved.map(&:id)).map(&:state) + }.from([detected_state_int] * vulnerabilities_to_be_resolved.count) + .to([resolved_state_int] * vulnerabilities_to_be_resolved.count) + .and change { + vulnerability_state_transitions_table.where(from_state: detected_state_int, + to_state: resolved_state_int, project_id: project.id).count + }.from(0).to(vulnerabilities_to_be_resolved.count) + .and change { + notes_table.count + }.from(0).to(vulnerabilities_to_be_resolved.count) + .and change { + system_note_metadata_table.count + }.from(0).to(vulnerabilities_to_be_resolved.count) + .and change { + vulnerabilities_to_be_resolved.pluck(:resolved_by_id) + }.from([nil] * vulnerabilities_to_be_resolved.count) + .to([user.id] * vulnerabilities_to_be_resolved.count) + .and change { + vulnerabilities_to_be_resolved.pluck(:resolved_at) + }.from([nil] * vulnerabilities_to_be_resolved.count) + end + + it 'dismisses vulnerabilities' do + create_vulnerabilities('gl-sast-report-semgrep-6.6.2-multiple-vulnerabilities.json') + + vulnerabilities_to_be_dismissed = vulnerabilities_table.where(severity: severity_level_medium_int) + + expect do + dismiss_vulnerabilities(vulnerabilities: vulnerabilities_to_be_dismissed, + comment: 'dismissing', dismissal_reason: acceptable_risk_dismissal_int) + end + .to change { + vulnerabilities_to_be_dismissed.map(&:state) + }.from([detected_state_int] * vulnerabilities_to_be_dismissed.count) + .to([dismissed_state_int] * vulnerabilities_to_be_dismissed.count) + .and change { + vulnerability_reads_table.where(vulnerability_id: vulnerabilities_to_be_dismissed.map(&:id)).map(&:state) + }.from([detected_state_int] * vulnerabilities_to_be_dismissed.count) + .to([dismissed_state_int] * vulnerabilities_to_be_dismissed.count) + .and change { + vulnerability_state_transitions_table.where(from_state: detected_state_int, + to_state: dismissed_state_int, project_id: project.id).count + }.from(0).to(vulnerabilities_to_be_dismissed.count) + .and change { + notes_table.count + }.from(0).to(vulnerabilities_to_be_dismissed.count) + .and change { + system_note_metadata_table.count + }.from(0).to(vulnerabilities_to_be_dismissed.count) + .and change { + vulnerabilities_to_be_dismissed.pluck(:dismissed_by_id) + }.from([nil] * vulnerabilities_to_be_dismissed.count) + .to([user.id] * vulnerabilities_to_be_dismissed.count) + .and change { + vulnerabilities_to_be_dismissed.pluck(:dismissed_at) + }.from([nil] * vulnerabilities_to_be_dismissed.count) + .and change { + vulnerability_reads_table.where(vulnerability_id: vulnerabilities_to_be_dismissed.pluck(:id)) + .pluck(:dismissal_reason) + }.from([nil] * vulnerabilities_to_be_dismissed.count) + .to([acceptable_risk_dismissal_int] * vulnerabilities_to_be_dismissed.count) + end + end + + context 'when corrupting vulnerabilities' do + it 'resets the state for resolved vulnerabilities to detected' do + create_vulnerabilities('gl-sast-report-semgrep-6.6.2-multiple-vulnerabilities.json') + resolve_vulnerabilities( + vulnerabilities: vulnerabilities_table.where(severity: severity_level_medium_int), comment: 'resolving' + ) + + expect { corrupt_vulnerabilities }.to change { + vulnerabilities_to_be_resolved.pluck(:state) + }.from([resolved_state_int] * vulnerabilities_to_be_resolved.count) + .to([detected_state_int] * vulnerabilities_to_be_resolved.count) + end + + it 'does not reset the state for confirmed or dismissed vulnerabilities' do + create_vulnerabilities('gl-sast-report-semgrep-6.6.2-multiple-vulnerabilities.json') + + dismiss_vulnerabilities(vulnerabilities: vulnerabilities_to_be_dismissed, + comment: 'dismissing', dismissal_reason: acceptable_risk_dismissal_int) + confirm_vulnerabilities(vulnerabilities: vulnerabilities_to_be_confirmed, comment: 'confirming') + + expect { corrupt_vulnerabilities }.to not_change { + vulnerabilities_to_be_dismissed.pluck(:state) + }.from([dismissed_state_int] * vulnerabilities_to_be_dismissed.count) + .and not_change { vulnerabilities_to_be_confirmed.pluck(:state) } + .from([confirmed_state_int] * vulnerabilities_to_be_confirmed.count) + end + + it 'reorders the raw_metadata identifiers for corrupted vulnerability findings, ' \ + 'placing cwe first, without altering other metadata' do + create_vulnerabilities('gl-sast-report-semgrep-6.6.2-multiple-vulnerabilities.json') + + ordered_vulnerabilities = vulnerabilities_table.order(:id) + + metadata_before = ordered_vulnerabilities.to_h do |v| + finding = vulnerability_findings_table.find(v.finding_id) + [v.id, Gitlab::Json.parse(finding.raw_metadata)] + end + + corrupt_vulnerabilities + + aggregate_failures 'all findings should have reordered identifiers only' do + ordered_vulnerabilities.each do |vulnerability| + old_metadata = metadata_before[vulnerability.id] + finding = vulnerability_findings_table.find(vulnerability.finding_id) + new_metadata = Gitlab::Json.parse(finding.raw_metadata) + old_identifiers = old_metadata['identifiers'] + new_identifiers = new_metadata['identifiers'] + + expect(new_identifiers.first['type']).to eq('cwe') + expect(new_identifiers).to match_array(old_identifiers) + expect(new_metadata.except('identifiers')).to eq(old_metadata.except('identifiers')) + end + end + end + + it 'updates the UUID for corrupted vulnerability records' do + create_vulnerabilities('gl-sast-report-semgrep-6.6.2-multiple-vulnerabilities.json') + + ordered_vulnerabilities = vulnerabilities_table.order(:id) + + original_uuids = ordered_vulnerabilities.map do |vuln| + finding = vulnerability_findings_table.find(vuln.finding_id) + primary_identifier = vulnerability_identifiers_table.find(finding.primary_identifier_id) + + Gitlab::UUID.v5( + [ + described_class::Migratable::Enums::Vulnerability.report_types.key(vuln.report_type), + primary_identifier.fingerprint.unpack1('H*'), + finding.location_fingerprint.unpack1('H*'), + vuln.project_id + ].join("-") + ) + end + + corrupt_uuids = ordered_vulnerabilities.map do |vuln| + finding = vulnerability_findings_table.find(vuln.finding_id) + identifiers = Gitlab::Json.parse(finding.raw_metadata)['identifiers'] + cwe_identifier = identifiers.find { |id| id['type'] == 'cwe' } + + binary_fingerprint = described_class::Migratable::Vulnerabilities::Identifier + .sha1_fingerprint(cwe_identifier) + + Gitlab::UUID.v5( + [ + described_class::Migratable::Enums::Vulnerability.report_types.key(vuln.report_type), + binary_fingerprint.unpack1('H*'), + finding.location_fingerprint.unpack1('H*'), + vuln.project_id + ].join("-") + ) + end + + expect { corrupt_vulnerabilities }.to change { + ordered_vulnerabilities.map do |v| + vulnerability_findings_table.find(v.finding_id).uuid + end + }.from(original_uuids).to(corrupt_uuids) + end + end + + context 'when no corrupt vulnerabilities exist' do + before do + create_vulnerabilities('gl-sast-report-semgrep-6.6.2-multiple-vulnerabilities.json') + end + + it 'does not create or alter any existing records' do + expect { perform_migration }.to not_change { + vulnerabilities_table.all.pluck(:state) + } + .and not_change { security_findings_table.count } + .and not_change { notes_table.count } + .and not_change { system_note_metadata_table.count } + end + end + end + + # TODO: implement this + context 'when ingesting reports in sequence: correct identifiers' do + it 'performs the migration' do + expect(true).to be_truthy + end + end + + context 'when ingesting reports in sequence: incorrect identifiers' do + before do + create_vulnerabilities( + 'gl-sast-report-semgrep-6.7.0-multiple-vulnerabilities-incorrect-primary-identifier.json' + ) + + uuids_before_corruption.concat(vulnerability_findings_table.all.order(:id).pluck(:uuid)) + + confirm_vulnerabilities(vulnerabilities: vulnerabilities_to_be_confirmed, comment: 'confirming') + resolve_vulnerabilities(vulnerabilities: vulnerabilities_to_be_resolved, comment: 'resolving') + dismiss_vulnerabilities(vulnerabilities: vulnerabilities_to_be_dismissed, + comment: 'dismissing', dismissal_reason: acceptable_risk_dismissal_int) + end + + it 'does not change the state of any vulnerability records' do + expect { perform_migration }.to not_change { + vulnerabilities_table.order(:id).pluck(:state) + }.and not_change { + vulnerability_reads_table.order(:id).pluck(:state) + } + end + + it 'does not insert any state transition records' do + expect { perform_migration }.to not_change { + vulnerability_state_transitions_table.count + } + end + + it 'does not insert any note or system note metadata records' do + expect { perform_migration }.to not_change { + notes_table.count + }.and not_change { + system_note_metadata_table.count + } + end + + it 'restores the primary identifiers for corrupted vulnerability findings to the correct value' do + expect { perform_migration }.to change { + vulnerability_findings_table.all.map do |finding| + vulnerability_identifiers_table.find(finding.primary_identifier_id).external_type + end + }.from(["cwe"] * vulnerability_findings_table.count).to(["semgrep_id"] * vulnerability_findings_table.count) + end + + it 'deletes orphaned security findings where the first identifier has external_type "cwe"' do + findings_with_cwe_primary = security_findings_table.where( + "finding_data->'identifiers'->0->>'external_type' = ?", 'cwe' + ) + + expect { perform_migration }.to change { + findings_with_cwe_primary.count + }.to(0).and change { + security_findings_table.count + }.by(-findings_with_cwe_primary.count) + end + + it 'restores the UUID for corrupted vulnerability records' do + ordered_vulnerabilities = vulnerabilities_table.order(:id) + + corrupt_uuids = ordered_vulnerabilities.map do |vuln| + finding = vulnerability_findings_table.find(vuln.finding_id) + primary_identifier = vulnerability_identifiers_table.find(finding.primary_identifier_id) + + Gitlab::UUID.v5( + [ + described_class::Migratable::Enums::Vulnerability.report_types.key(vuln.report_type), + primary_identifier.fingerprint.unpack1('H*'), + finding.location_fingerprint.unpack1('H*'), + vuln.project_id + ].join("-") + ) + end + + expect { perform_migration }.to change { + ordered_vulnerabilities.map do |v| + vulnerability_findings_table.find(v.finding_id).uuid + end + }.from(corrupt_uuids) + .and change { + ordered_vulnerabilities.map do |v| + finding = vulnerability_findings_table.find(v.finding_id) + security_findings_table.where(uuid: finding.uuid).first&.uuid + end + }.from(corrupt_uuids) + .and change { + ordered_vulnerabilities.map do |v| + vulnerability_reads_table.find_by(vulnerability_id: v.id).uuid + end + }.from(corrupt_uuids) + end + + it 'reorders the raw_metadata identifiers for corrupted vulnerability findings, ' \ + 'placing semgrep_id first, without altering other metadata' do + ordered_vulnerabilities = vulnerabilities_table.order(:id) + + metadata_before = ordered_vulnerabilities.to_h do |v| + finding = vulnerability_findings_table.find(v.finding_id) + [v.id, Gitlab::Json.parse(finding.raw_metadata)] + end + + perform_migration + + aggregate_failures 'all findings should have reordered identifiers only' do + ordered_vulnerabilities.each do |vulnerability| + old_metadata = metadata_before[vulnerability.id] + finding = vulnerability_findings_table.find(vulnerability.finding_id) + new_metadata = Gitlab::Json.parse(finding.raw_metadata) + old_identifiers = old_metadata['identifiers'] + new_identifiers = new_metadata['identifiers'] + + expect(new_identifiers.first['type']).to eq('semgrep_id') + expect(new_identifiers).to match_array(old_identifiers) + expect(new_metadata.except('identifiers')).to eq(old_metadata.except('identifiers')) + end + end + end + end + + context 'when ingesting reports in sequence: correct identifiers → incorrect identifiers' do + before do + create_vulnerabilities('gl-sast-report-semgrep-6.6.2-multiple-vulnerabilities.json') + + uuids_before_corruption.concat(vulnerability_findings_table.all.order(:id).pluck(:uuid)) + + confirm_vulnerabilities(vulnerabilities: vulnerabilities_to_be_confirmed, comment: 'confirming') + resolve_vulnerabilities(vulnerabilities: vulnerabilities_to_be_resolved, comment: 'resolving') + dismiss_vulnerabilities(vulnerabilities: vulnerabilities_to_be_dismissed, + comment: 'dismissing', dismissal_reason: acceptable_risk_dismissal_int) + + corrupt_vulnerabilities + end + + it 'deletes orphaned security findings where the first identifier has external_type "cwe"' do + findings_with_cwe_primary = security_findings_table.where( + "finding_data->'identifiers'->0->>'external_type' = ?", 'cwe' + ) + + expect { perform_migration }.to change { + findings_with_cwe_primary.count + }.to(0).and change { + security_findings_table.count + }.by(-findings_with_cwe_primary.count) + end + + it 'restores the primary identifiers for corrupted vulnerability findings to the correct value' do + expect { perform_migration }.to change { + vulnerability_findings_table.all.map do |finding| + vulnerability_identifiers_table.find(finding.primary_identifier_id).external_type + end + }.from(["cwe"] * vulnerability_findings_table.count).to(["semgrep_id"] * vulnerability_findings_table.count) + end + + it 'restores the UUID for corrupted vulnerability records to the correct value' do + ordered_vulnerabilities = vulnerabilities_table.order(:id) + + corrupt_uuids = ordered_vulnerabilities.map do |vuln| + finding = vulnerability_findings_table.find(vuln.finding_id) + primary_identifier = vulnerability_identifiers_table.find(finding.primary_identifier_id) + + Gitlab::UUID.v5( + [ + described_class::Migratable::Enums::Vulnerability.report_types.key(vuln.report_type), + primary_identifier.fingerprint.unpack1('H*'), + finding.location_fingerprint.unpack1('H*'), + vuln.project_id + ].join("-") + ) + end + + expect { perform_migration }.to change { + ordered_vulnerabilities.map do |v| + vulnerability_findings_table.find(v.finding_id).uuid + end + }.from(corrupt_uuids).to(uuids_before_corruption) + .and change { + ordered_vulnerabilities.map do |v| + finding = vulnerability_findings_table.find(v.finding_id) + security_findings_table.where(uuid: finding.uuid).first&.uuid + end + }.from(corrupt_uuids).to(uuids_before_corruption) + .and change { + ordered_vulnerabilities.map do |v| + vulnerability_reads_table.find_by(vulnerability_id: v.id).uuid + end + }.from(corrupt_uuids).to(uuids_before_corruption) + end + + it 'reorders the raw_metadata identifiers for corrupted vulnerability findings, ' \ + 'placing semgrep_id first, without altering other metadata' do + ordered_vulnerabilities = vulnerabilities_table.order(:id) + + metadata_before = ordered_vulnerabilities.to_h do |v| + finding = vulnerability_findings_table.find(v.finding_id) + [v.id, Gitlab::Json.parse(finding.raw_metadata)] + end + + perform_migration + + aggregate_failures 'all findings should have reordered identifiers only' do + ordered_vulnerabilities.each do |vulnerability| + old_metadata = metadata_before[vulnerability.id] + finding = vulnerability_findings_table.find(vulnerability.finding_id) + new_metadata = Gitlab::Json.parse(finding.raw_metadata) + old_identifiers = old_metadata['identifiers'] + new_identifiers = new_metadata['identifiers'] + + expect(new_identifiers.first['type']).to eq('semgrep_id') + expect(new_identifiers).to match_array(old_identifiers) + expect(new_metadata.except('identifiers')).to eq(old_metadata.except('identifiers')) + end + end + end + + it 'reorders the finding_data identifiers for corrupted vulnerability security findings, ' \ + 'placing semgrep_id first, without altering other finding_data' do + ordered_vulnerabilities = vulnerabilities_table.order(:id) + + finding_data_before = ordered_vulnerabilities.to_h do |v| + finding = vulnerability_findings_table.find(v.finding_id) + security_finding = security_findings_table.where(uuid: finding.uuid).first + [v.id, security_finding.finding_data] + end + + perform_migration + + aggregate_failures 'all security findings should have reordered identifiers only' do + ordered_vulnerabilities.each do |vulnerability| + old_finding_data = finding_data_before[vulnerability.id] + finding = vulnerability_findings_table.find(vulnerability.finding_id) + new_security_finding = security_findings_table.where(uuid: finding.uuid).first + new_finding_data = new_security_finding.finding_data + + old_identifiers = old_finding_data['identifiers'] + new_identifiers = new_finding_data['identifiers'] + + expect(new_identifiers.first['external_type']).to eq('semgrep_id') + expect(new_identifiers).to match_array(old_identifiers) + expect(new_finding_data.except('identifiers')).to eq(old_finding_data.except('identifiers')) + end + end + end + + it 'inserts new vulnerability state transitions for the corrupted vulnerabilities that have ' \ + 'changed from detected to resolved' do + vulnerability_ids = vulnerabilities_to_be_resolved.map(&:id) + + expect { perform_migration }.to change { + vulnerability_state_transitions_table + .where(vulnerability_id: vulnerability_ids) + .where("comment LIKE ?", "%original comment automatically copied from transition%") + .where(from_state: detected_state_int, to_state: resolved_state_int) + .count + }.by(vulnerabilities_to_be_resolved.count) + end + + it 'restores the state for resolved corrupted vulnerabilities and vulnerability_reads' do + vulnerability_ids = vulnerabilities_to_be_resolved.map(&:id) + + expect { perform_migration }.to change { + vulnerability_ids.map { |id| vulnerabilities_table.find(id).state } + }.from([detected_state_int] * vulnerabilities_to_be_resolved.length) + .to([resolved_state_int] * vulnerabilities_to_be_resolved.length) + .and change { + vulnerability_ids.map { |id| vulnerability_reads_table.find_by(vulnerability_id: id).state } + }.from([detected_state_int] * vulnerabilities_to_be_resolved.length) + .to([resolved_state_int] * vulnerabilities_to_be_resolved.length) + end + + it 'does not change the state for confirmed and dismissed corrupted vulnerabilities, ' \ + 'because they are still correct' do + expect { perform_migration }.to not_change { + vulnerabilities_to_be_confirmed.pluck(:state) + }.from([confirmed_state_int] * vulnerabilities_to_be_confirmed.count) + .and not_change { + vulnerabilities_to_be_dismissed.map.pluck(:state) + }.from([dismissed_state_int] * vulnerabilities_to_be_dismissed.count) + end + + it 'creates new system notes for the resolved vulnerabilities' do + total_vulnerabilities_changed = ( + vulnerabilities_to_be_dismissed.count + + vulnerabilities_to_be_resolved.count + + # add vulnerabilities_to_be_resolved.count twice, because + # BulkCreateRedetectedNotesService adds a new record for + # each resolved vulnerability when the pipeline is corrupted + vulnerabilities_to_be_resolved.count + + vulnerabilities_to_be_confirmed.count + ) + + expect { perform_migration }.to change { notes_table.count } + .from(total_vulnerabilities_changed).to(total_vulnerabilities_changed + vulnerabilities_to_be_resolved.count) + + notes = notes_table.where("note LIKE ?", "%original comment automatically copied from transition%") + + aggregate_failures "checking note attributes" do + notes.order(:noteable_id).zip(vulnerabilities_to_be_resolved).each do |note, vulnerability| + expect(note.noteable_type).to eq('Vulnerability') + expect(note.noteable_id).to eq(vulnerability.id) + expect(note.author_id).to eq(user.id) + expect(note.created_at).to be_a_kind_of(Time) + expect(note.updated_at).to be_a_kind_of(Time) + expect(note.project_id).to eq(project.id) + expect(note.system).to be_truthy + expect(note.namespace_id).to eq(project.project_namespace_id) + expect(note.discussion_id).to match(/[a-f0-9]{40}/) + expect(note.note).to match( + /changed vulnerability status from Detected to Resolved with the following comment: "resolving"/ + ) + end + end + end + + it 'creates new system note metadata for the resolved vulnerabilities' do + expect { perform_migration }.to change { system_note_metadata_table.count } + .by(vulnerabilities_to_be_resolved.count) + + notes = Note.where("note LIKE ?", "%original comment automatically copied from transition%") + system_note_metadata = system_note_metadata_table.where(note_id: notes.pluck(:id)) + + expect(system_note_metadata.count).to eq(notes.count) + + aggregate_failures "checking system note metadata attributes" do + system_note_metadata.order(:note_id).zip(notes.order(:id)).each do |metadata, note| + expect(metadata.note_id).to eq(note.id) + expect(metadata.action).to eq('vulnerability_resolved') + expect(metadata.namespace_id).to eq(project.project_namespace_id) + expect(metadata.created_at).to be_a_kind_of(Time) + expect(metadata.updated_at).to be_a_kind_of(Time) + end + end + end + + it 'does not create or delete any vulnerabilities or vulnerability findings' do + expect { perform_migration }.to not_change { vulnerabilities_table.count } + .and not_change { vulnerability_findings_table.count } + end + + context 'when performing the migration twice' do + it 'is idempotent' do + expect { perform_migration }.to change { + vulnerability_findings_table.all.map do |finding| + vulnerability_identifiers_table.find(finding.primary_identifier_id).external_type + end + }.from(["cwe"] * vulnerabilities_table.count).to(["semgrep_id"] * vulnerabilities_table.count) + + expect { perform_migration }.to( + not_change { vulnerability_state_transitions_table.count } + .and(not_change do + vulnerability_findings_table.all.map do |finding| + vulnerability_identifiers_table.find(finding.primary_identifier_id).external_type + end + end) + .and(not_change { notes_table.count }) + .and(not_change { system_note_metadata_table.count }) + .and(not_change { vulnerabilities_table.pluck(:state) }) + ) + end + end + end + + context 'when ingesting reports in sequence: correct identifiers → incorrect identifiers → correct identifiers' do + before do + create_vulnerabilities('gl-sast-report-semgrep-6.6.2-multiple-vulnerabilities.json') + + uuids_before_corruption.concat(vulnerability_findings_table.all.order(:id).pluck(:uuid)) + + confirm_vulnerabilities(vulnerabilities: vulnerabilities_to_be_confirmed, comment: 'confirming') + resolve_vulnerabilities(vulnerabilities: vulnerabilities_to_be_resolved, comment: 'resolving') + dismiss_vulnerabilities(vulnerabilities: vulnerabilities_to_be_dismissed, + comment: 'dismissing', dismissal_reason: acceptable_risk_dismissal_int) + + corrupted_vulnerabilities.concat(corrupt_vulnerabilities) + duplicated_vulnerabilities.concat(create_vulnerabilities( + 'gl-sast-report-semgrep-6.7.1-multiple-vulnerabilities-correct-primary-identifier.json' + )) + end + + it 'does not change the primary identifiers for corrupted or duplicate vulnerabilities' do + expect { perform_migration }.to not_change { + vulnerability_findings_table + .where(vulnerability_id: corrupted_vulnerabilities.pluck(:id)) + .map do |finding| + vulnerability_identifiers_table.find(finding.primary_identifier_id).external_type + end + }.from(["cwe"] * corrupted_vulnerabilities.length) + .and not_change { + vulnerability_findings_table + .where(vulnerability_id: duplicated_vulnerabilities.pluck(:id)) + .map do |finding| + vulnerability_identifiers_table.find(finding.primary_identifier_id).external_type + end + }.from(["semgrep_id"] * duplicated_vulnerabilities.length) + end + + it 'does not change the UUID for corrupted or duplicate vulnerabilities' do + corrupted_vuln_ids = corrupted_vulnerabilities.pluck(:id) + duplicated_vuln_ids = duplicated_vulnerabilities.pluck(:id) + + corrupted_vuln_finding_uuids = vulnerability_findings_table.where(vulnerability_id: corrupted_vuln_ids) + .pluck(:uuid) + + duplicated_vuln_finding_uuids = vulnerability_findings_table.where(vulnerability_id: duplicated_vuln_ids) + .pluck(:uuid) + + expect { perform_migration }.to not_change { + vulnerability_findings_table.where(vulnerability_id: corrupted_vuln_ids).order(:id).pluck(:uuid) + } + .and not_change { + security_findings_table.where(uuid: corrupted_vuln_finding_uuids).order(:id).pluck(:uuid) + } + .and not_change { + vulnerability_reads_table.where(vulnerability_id: corrupted_vuln_ids).order(:id).pluck(:uuid) + } + .and not_change { + vulnerability_findings_table.where(vulnerability_id: duplicated_vuln_ids).order(:id).pluck(:uuid) + } + .and not_change { + security_findings_table.where(uuid: duplicated_vuln_finding_uuids).order(:id).pluck(:uuid) + } + .and not_change { + vulnerability_reads_table.where(vulnerability_id: duplicated_vuln_ids).order(:id).pluck(:uuid) + } + end + + it 'does not change the state for corrupted vulnerabilities' do + corrupted_vuln_ids = corrupted_vulnerabilities.pluck(:id) + + expect { perform_migration }.to not_change { + vulnerabilities_table.where(id: corrupted_vuln_ids) + .where("title LIKE ?", "%Vulnerability to be confirmed.").order(:id).pluck(:state) + }.from([confirmed_state_int] * vulnerabilities_to_be_confirmed.length) + .and not_change { + vulnerabilities_table.where(id: corrupted_vuln_ids) + .where("title LIKE ?", "%Vulnerability to be resolved.").order(:id).pluck(:state) + }.from([detected_state_int] * vulnerabilities_to_be_resolved.length) + .and not_change { + vulnerabilities_table.where(id: corrupted_vuln_ids) + .where("title LIKE ?", "%Vulnerability to be dismissed.").order(:id).pluck(:state) + }.from([dismissed_state_int] * vulnerabilities_to_be_dismissed.length) + end + + it 'restores the state for duplicate vulnerabilities' do + duplicated_vuln_ids = duplicated_vulnerabilities.pluck(:id) + + expect { perform_migration }.to change { + vulnerabilities_table.where(id: duplicated_vuln_ids) + .where("title LIKE ?", "%Vulnerability to be confirmed.").order(:id).pluck(:state) + }.from([detected_state_int] * vulnerabilities_to_be_confirmed.length) + .to([confirmed_state_int] * vulnerabilities_to_be_confirmed.length) + .and change { + vulnerabilities_table.where(id: duplicated_vuln_ids) + .where("title LIKE ?", "%Vulnerability to be resolved.").order(:id).pluck(:state) + }.from([detected_state_int] * vulnerabilities_to_be_resolved.length) + .to([resolved_state_int] * vulnerabilities_to_be_resolved.length) + .and change { + vulnerabilities_table.where(id: duplicated_vuln_ids) + .where("title LIKE ?", "%Vulnerability to be dismissed.").order(:id).pluck(:state) + }.from([detected_state_int] * vulnerabilities_to_be_dismissed.length) + .to([dismissed_state_int] * vulnerabilities_to_be_dismissed.length) + end + + context 'when system notes and state transitions' do + where(:to_state, :comment) do + [ + %w[resolved resolving], + %w[confirmed confirming], + %w[dismissed dismissing] + ] + end + + with_them do + let(:title_pattern) { "%Vulnerability to be #{ApplicationRecord.sanitize_sql_like(to_state)}." } + let(:target_vulnerabilities) do + vulnerabilities_table + .where(id: duplicated_vulnerabilities.pluck(:id)) + .where("title LIKE ?", title_pattern) + .order(:id) + end + + let(:to_state_int) do + described_class::Migratable::Enums::Vulnerability.vulnerability_states[to_state] + end + + it "has target vulnerabilities for #{params[:to_state]}" do + expect(target_vulnerabilities.count).to be > 0 + end + + it "creates vulnerability state transitions for duplicate #{params[:to_state]} vulnerabilities" do + comment_pattern = "#{ApplicationRecord.sanitize_sql_like(comment)} (original comment automatically copied%" + + expect { perform_migration }.to change { + vulnerability_state_transitions_table.where("comment LIKE ?", comment_pattern).count + }.by(target_vulnerabilities.count) + + aggregate_failures "checking vulnerability state transition attributes" do + vulnerability_state_transitions_table + .where(vulnerability_id: target_vulnerabilities.pluck(:id)) + .order(:vulnerability_id) + .zip(target_vulnerabilities).each do |transition, vulnerability| + expect(transition.vulnerability_id).to eq(vulnerability.id) + expect(transition.to_state).to eq(to_state_int) + expect(transition.from_state).to eq(detected_state_int) + expect(transition.author_id).to eq(user.id) + expect(transition.created_at).to be_a_kind_of(Time) + expect(transition.updated_at).to be_a_kind_of(Time) + expect(transition.project_id).to eq(project.id) + expect(transition.comment).to match( + /#{comment} \(original comment automatically copied from transition [0-9]* to fix semgrep 6\.7\.0 bug/ + ) + expect(transition.dismissal_reason).to be_between(0, 4) if to_state_int == dismissed_state_int + end + end + end + + it "creates new system notes for #{params[:to_state]} vulnerabilities" do + note_pattern = "%changed vulnerability status from Detected to " \ + "#{ApplicationRecord.sanitize_sql_like(to_state.titleize)} with the following comment: " \ + "\"#{ApplicationRecord.sanitize_sql_like(comment)}\"%" + + expect { perform_migration }.to change { + notes_table.where('note LIKE ?', note_pattern).count + }.by(target_vulnerabilities.count) + + aggregate_failures "checking note attributes" do + notes_table + .where(noteable_id: target_vulnerabilities.pluck(:id)) + .order(:noteable_id) + .zip(target_vulnerabilities).each do |note, vulnerability| + expect(note.noteable_type).to eq('Vulnerability') + expect(note.noteable_id).to eq(vulnerability.id) + expect(note.author_id).to eq(user.id) + expect(note.created_at).to be_a_kind_of(Time) + expect(note.updated_at).to be_a_kind_of(Time) + expect(note.project_id).to eq(project.id) + expect(note.system).to be_truthy + expect(note.namespace_id).to eq(project.project_namespace_id) + expect(note.discussion_id).to match(/[a-f0-9]{40}/) + + expected_note = "changed vulnerability status from Detected to #{to_state.titleize} " \ + "with the following comment: \"#{comment}\"" + expect(note.note).to include(expected_note) + end + end + end + + it "creates new system note metadata for #{params[:to_state]} vulnerabilities" do + expect { perform_migration }.to change { + system_note_metadata_table.where(action: "vulnerability_#{to_state}").count + }.by(target_vulnerabilities.count) + + notes = notes_table.where( + "note LIKE ?", "%changed vulnerability status from Detected " \ + "to #{ApplicationRecord.sanitize_sql_like(to_state.titleize)}%" + ) + + system_note_metadata = system_note_metadata_table.where(note_id: notes.pluck(:id)) + expect(system_note_metadata.count).to eq(notes.count) + + aggregate_failures "checking system note metadata attributes" do + system_note_metadata.order(:note_id).zip(notes.order(:id)).each do |metadata, note| + expect(metadata.note_id).to eq(note.id) + expect(metadata.action).to eq("vulnerability_#{to_state}") + expect(metadata.namespace_id).to eq(project.project_namespace_id) + expect(metadata.created_at).to be_a_kind_of(Time) + expect(metadata.updated_at).to be_a_kind_of(Time) + end + end + end + end + end + + context 'when there are additional vulnerabilities that are not affected by the bug' do + let(:additional_vulnerabilities_to_be_confirmed) do + vulnerabilities_table.where("title LIKE ?", "Additional vulnerability%") + end + + before do + create_vulnerabilities( + 'gl-sast-report-semgrep-6.7.1-additional-vulnerabilities-correct-primary-identifier.json' + ) + + confirm_vulnerabilities(vulnerabilities: additional_vulnerabilities_to_be_confirmed, comment: 'confirming') + end + + it 'does not alter the additional vulnerabilities' do + expect { perform_migration }.to not_change { + additional_vulnerabilities_to_be_confirmed.pluck(:state) + }.from([confirmed_state_int]) + .and not_change { + vulnerability_findings_table.where(id: additional_vulnerabilities_to_be_confirmed.pluck(:finding_id)) + .pluck(:uuid) + } + .and not_change { + notes_table.where(noteable_id: additional_vulnerabilities_to_be_confirmed.pluck(:id)).count + } + .and not_change { + vulnerability_state_transitions_table + .where(vulnerability_id: additional_vulnerabilities_to_be_confirmed.pluck(:id)).count + } + .and not_change { + vulnerability_reads_table + .where(vulnerability_id: additional_vulnerabilities_to_be_confirmed.pluck(:id)).pluck(:uuid) + } + end + end + end + end +end +# rubocop:enable RSpec/MultipleMemoizedHelpers + +# replicates the changes that happen when executing semgrep 6.7.0 +def corrupt_vulnerabilities + new_time = Time.current + corrupt_scan = create_corrupt_scan + + reset_resolved_vulnerabilities_state(new_time, corrupt_scan.pipeline_id) + corrupt_all_vulnerability_identifiers(new_time, corrupt_scan) +end + +def create_corrupt_scan + corrupt_pipeline = pipelines_table.create!(project_id: project.id, partition_id: 100) + corrupt_build = builds_table.create!(partition_id: corrupt_pipeline.partition_id, + project_id: project.id, commit_id: corrupt_pipeline.id) + + security_scans.create!( + build_id: corrupt_build.id, + scan_type: described_class::Migratable::Enums::Security.scan_types[:sast], + pipeline_id: corrupt_pipeline.id, + project_id: project.id + ) +end + +def reset_resolved_vulnerabilities_state(new_time, corrupt_pipeline_id) + # resolved vulnerabilities are the only ones whose state is changed by the bug + vulnerabilities_table.where(state: resolved_state_int).find_each do |vulnerability| + reset_vulnerability_to_detected(vulnerability, new_time, corrupt_pipeline_id) + end +end + +def reset_vulnerability_to_detected(vulnerability, new_time, corrupt_pipeline_id) + from_state_int = vulnerability.state + + vulnerability.update!(state: detected_state_int, resolved_by_id: nil, resolved_at: nil) + + vulnerability_state_transitions_table.create!(vulnerability_id: vulnerability.id, from_state: from_state_int, + to_state: detected_state_int, created_at: new_time, updated_at: new_time, author_id: nil, project_id: project.id + ) + + vulnerability_reads_table.where(vulnerability_id: vulnerability.id).update!(state: detected_state_int) + + note = notes_table.create!( + note: "changed vulnerability status to Needs Triage because it was redetected in pipeline #{corrupt_pipeline_id}", + noteable_type: 'Vulnerability', + author_id: user.id, + created_at: new_time, + updated_at: new_time, + project_id: project.id, + noteable_id: vulnerability.id, + system: true, + discussion_id: nil, + namespace_id: group_namespace.id + ) + + system_note_metadata_table.create!( + action: 'vulnerability_detected', + created_at: note.created_at, + updated_at: note.updated_at, + note_id: note.id, + namespace_id: project.project_namespace_id + ) +end + +def corrupt_all_vulnerability_identifiers(new_time, corrupt_scan) + vulnerabilities = [] + + vulnerabilities_table.find_each do |vulnerability| + corrupt_vulnerability_identifiers(vulnerability, new_time, corrupt_scan) + vulnerabilities << vulnerability + end + + vulnerabilities +end + +def corrupt_vulnerability_identifiers(vulnerability, new_time, corrupt_scan) + vulnerability_finding = vulnerability_findings_table.find_by(vulnerability_id: vulnerability.id) + old_uuid = vulnerability_finding.uuid + + corrupt_metadata = reorder_metadata_with_incorrect_primary_id(vulnerability_finding.raw_metadata) + new_uuid = calculate_corrupt_uuid(corrupt_metadata, vulnerability_finding) + + update_vulnerability_with_corrupt_data(vulnerability, vulnerability_finding, new_uuid, corrupt_metadata, + new_time, corrupt_scan) + create_corrupt_security_finding(old_uuid, new_uuid, corrupt_scan) +end + +def calculate_corrupt_uuid(corrupt_metadata, vulnerability_finding) + corrupt_identifier = corrupt_metadata['identifiers'][0] + binary_fingerprint = described_class::Migratable::Vulnerabilities::Identifier.sha1_fingerprint(corrupt_identifier) + corrupt_primary_identifier = vulnerability_identifiers_table.find_by(fingerprint: binary_fingerprint) + + Gitlab::UUID.v5( + [ + sast_report_type_string, + corrupt_primary_identifier.fingerprint.unpack1('H*'), + vulnerability_finding.location_fingerprint.unpack1('H*'), + project.id + ].join("-") + ) +end + +def update_vulnerability_with_corrupt_data( + vulnerability, vulnerability_finding, new_uuid, corrupt_metadata, new_time, corrupt_scan +) + vulnerability_reads_table.where(vulnerability_id: vulnerability.id).update!(uuid: new_uuid) + + corrupt_identifier = corrupt_metadata['identifiers'][0] + binary_fingerprint = described_class::Migratable::Vulnerabilities::Identifier.sha1_fingerprint(corrupt_identifier) + corrupt_primary_identifier = vulnerability_identifiers_table.find_by(fingerprint: binary_fingerprint) + + vulnerability_finding.update!(uuid: new_uuid, raw_metadata: corrupt_metadata.to_json, metadata_version: '15.2.2', + updated_at: new_time, latest_pipeline_id: corrupt_scan.pipeline_id, + primary_identifier_id: corrupt_primary_identifier.id) +end + +def create_corrupt_security_finding(old_uuid, new_uuid, corrupt_scan) + old_security_finding = security_findings_table.find_by(uuid: old_uuid) + corrupt_finding_data = reorder_finding_data_with_incorrect_primary_id(old_security_finding.finding_data) + + security_findings_table.create!(uuid: new_uuid, scan_id: corrupt_scan.id, finding_data: corrupt_finding_data, + scanner_id: vulnerability_scanner.id, severity: old_security_finding.severity) +end + +def reorder_finding_data_with_incorrect_primary_id(finding_data) + finding_data["identifiers"].sort_by! { |a| a["external_id"] } + finding_data +end + +def reorder_metadata_with_incorrect_primary_id(raw_metadata) + metadata = Gitlab::Json.parse(raw_metadata) + metadata["identifiers"].sort_by! { |a| a["value"] } + metadata +end + +def create_vulnerability_read(vulnerability, vulnerability_finding) + vulnerability_reads_table.create!( + vulnerability_id: vulnerability.id, + uuid: vulnerability_finding.uuid, + project_id: vulnerability.project_id, + scanner_id: vulnerability_finding.scanner_id, + report_type: vulnerability.report_type, + severity: vulnerability.severity, + state: vulnerability.state, + vulnerability_occurrence_id: vulnerability_finding.id + ) +end + +def find_matching_finding(vulnerability_finding) + vulnerability_findings_table.where( + severity: vulnerability_finding.severity, + report_type: vulnerability_finding.report_type, + location_fingerprint: vulnerability_finding.location_fingerprint, + name: vulnerability_finding.name, + metadata_version: vulnerability_finding.metadata_version, + project_id: vulnerability_finding.project_id + ).where.not(id: vulnerability_finding.id).first +end + +def create_security_finding(vulnerability, vulnerability_finding, vulnerability_scanner, scan, identifiers) + security_findings_table.create!( + uuid: vulnerability_finding.uuid, + project_id: project.id, + scanner_id: vulnerability_scanner.id, + scan_id: scan.id, + severity: vulnerability.severity, + finding_data: finding_data_for(vulnerability_finding: vulnerability_finding, vulnerability_identifiers: identifiers) + ) +end + +def create_vulnerability(vulnerability_finding, vulnerability_name, vulnerability_severity) + vulnerabilities_table.create!( + project_id: project.id, author_id: user.id, + created_at: current_time, updated_at: current_time, title: vulnerability_name, + severity: described_class::Migratable::Enums::Vulnerability.severity_levels[vulnerability_severity.downcase], + detected_at: current_time, finding_id: vulnerability_finding.id, report_type: sast_report_type_int + ) +end + +def create_vulnerabilities(fixture_file) + pipeline = pipelines_table.create!(project_id: project.id, partition_id: 100) + build = builds_table.create!(partition_id: pipeline.partition_id, project_id: project.id, commit_id: pipeline.id) + + scan = security_scans.create!( + build_id: build.id, + scan_type: described_class::Migratable::Enums::Security.scan_types[:sast], + pipeline_id: pipeline.id, + project_id: project.id + ) + + parsed_fixture = Gitlab::Json.parse(File.read("ee/spec/fixtures/security_reports/master/#{fixture_file}")) + + vulnerabilities = [] + + parsed_fixture['vulnerabilities'].each do |report_vulnerability| + vulnerability_finding = create_vulnerability_finding( + vulnerability: report_vulnerability, project: project, scanner: vulnerability_scanner, + metadata_version: parsed_fixture['version'], pipeline: pipeline + ) + + vulnerability = create_vulnerability(vulnerability_finding, report_vulnerability['name'], + report_vulnerability['severity']) + + vulnerabilities << vulnerability + + create_security_finding(vulnerability, vulnerability_finding, vulnerability_scanner, + scan, report_vulnerability['identifiers']) + + create_vulnerability_read(vulnerability, vulnerability_finding) + + vulnerability_finding.update!(vulnerability_id: vulnerability.id) + + matching_finding = find_matching_finding(vulnerability_finding) + + next unless matching_finding + + vulnerability_reads_table.find_by(vulnerability_id: matching_finding.vulnerability_id) + .update!(resolved_on_default_branch: true) + vulnerabilities_table.find_by(finding_id: matching_finding.id).update!(resolved_on_default_branch: true) + end + + vulnerabilities +end + +def finding_data_for(vulnerability_finding:, vulnerability_identifiers:) + vulnerability_identifiers_with_fingerprint_data = vulnerability_identifiers.map do |identifier| + fingerprint_string = described_class::Migratable::Vulnerabilities::Identifier.sha1_fingerprint(identifier) + + { + external_id: identifier['value'], + external_type: identifier['type'], + name: identifier['name'], + url: identifier['url'], + fingerprint: fingerprint_string.unpack1('H*') + } + end + + { + name: vulnerability_finding.name, + links: [], + assets: [], + raw_source_code_extract: nil, + false_positive?: false, + remediation_byte_offsets: [], + evidence: nil, + description: vulnerability_finding.description, + solution: vulnerability_finding.solution, + location: vulnerability_finding.location, + identifiers: vulnerability_identifiers_with_fingerprint_data, + details: vulnerability_finding.details + } +end + +def create_vulnerability_identifiers(vulnerability:, project:) + vulnerability['identifiers'].map do |identifier| + fingerprint_string = described_class::Migratable::Vulnerabilities::Identifier.sha1_fingerprint(identifier) + + vulnerability_identifiers_table.find_or_create_by!( + project_id: project.id, + fingerprint: fingerprint_string + ) do |vi| + vi.created_at = current_time + vi.updated_at = current_time + vi.external_type = identifier['type'] + vi.external_id = identifier['value'] + vi.name = identifier['name'] + vi.url = identifier['url'] + end + end +end + +def dismissal_int_to_string(dismissal_int) + %w[acceptable_risk false_positive mitigating_control used_in_tests not_applicable][dismissal_int].titleize +end + +def resolve_vulnerabilities(vulnerabilities:, comment:) + change_vulnerabilities_state(vulnerabilities: vulnerabilities, + to_state_string: resolved_state_string, comment: comment) +end + +def confirm_vulnerabilities(vulnerabilities:, comment:) + change_vulnerabilities_state(vulnerabilities: vulnerabilities, + to_state_string: confirmed_state_string, comment: comment) +end + +def dismiss_vulnerabilities(vulnerabilities:, comment:, dismissal_reason:) + change_vulnerabilities_state(vulnerabilities: vulnerabilities, to_state_string: dismissed_state_string, + comment: comment, dismissal_reason_int: dismissal_reason) +end + +def change_vulnerabilities_state(vulnerabilities:, to_state_string:, comment:, dismissal_reason_int: nil) + new_time = Time.current + to_state_int = described_class::Migratable::Enums::Vulnerability.vulnerability_states[to_state_string] + + vulnerabilities.each do |vulnerability| + from_state_int = vulnerability.state + from_state_string = described_class::Migratable::Enums::Vulnerability.vulnerability_states + .key(from_state_int).titleize + + create_state_transition(vulnerability, from_state_int, to_state_int, comment, dismissal_reason_int, new_time) + note = create_system_note(vulnerability, from_state_string, to_state_string, comment, + dismissal_reason_int, new_time) + create_system_note_metadata(note, to_state_string, new_time) + end + + update_vulnerabilities(vulnerabilities, to_state_string, to_state_int, new_time) + vulnerability_reads_table.where(vulnerability_id: vulnerabilities.map(&:id)) + .update!(dismissal_reason: dismissal_reason_int) +end + +def create_state_transition(vulnerability, from_state_int, to_state_int, comment, dismissal_reason_int, time) + vulnerability_state_transitions_table.create!( + vulnerability_id: vulnerability.id, + from_state: from_state_int, + to_state: to_state_int, + created_at: time, + updated_at: time, + author_id: user.id, + comment: comment, + project_id: project.id, + dismissal_reason: dismissal_reason_int + ) +end + +def create_system_note(vulnerability, from_state_string, to_state_string, comment, dismissal_reason_int, time) + formatted_to_state = to_state_string.titleize + formatted_to_state += ": #{dismissal_int_to_string(dismissal_reason_int)}" if dismissal_reason_int + + notes_table.create!( + noteable_id: vulnerability.id, + noteable_type: 'Vulnerability', + author_id: user.id, + created_at: time, + updated_at: time, + project_id: project.id, + system: true, + namespace_id: project.project_namespace_id, + note: <<~NOTE.squish + changed vulnerability status from #{from_state_string} to #{formatted_to_state} + with the following comment: "#{comment}" + NOTE + ) +end + +def create_system_note_metadata(note, to_state_string, time) + system_note_metadata_table.create!( + action: "vulnerability_#{to_state_string}", + note_id: note.id, + namespace_id: project.project_namespace_id, + created_at: time, + updated_at: time + ) +end + +def update_vulnerabilities(vulnerabilities, to_state_string, to_state_int, time) + vulnerability_attributes = build_vulnerability_attributes(to_state_string, to_state_int, time) + vulnerabilities.update!(vulnerability_attributes) +end + +def build_vulnerability_attributes(to_state_string, to_state_int, time) + attributes = { state: to_state_int } + + case to_state_string + when confirmed_state_string + attributes[:confirmed_by_id] = user.id + attributes[:confirmed_at] = time + when resolved_state_string + attributes[:resolved_by_id] = user.id + attributes[:resolved_at] = time + when dismissed_state_string + attributes[:dismissed_by_id] = user.id + attributes[:dismissed_at] = time + end + + attributes +end + +def create_vulnerability_finding(vulnerability:, project:, scanner:, metadata_version:, pipeline:) + vulnerability_identifiers = create_vulnerability_identifiers(vulnerability: vulnerability, project: project) + primary_identifier = vulnerability_identifiers.find do |vi| + vi.external_type == vulnerability['identifiers'][0]['type'] && + vi.external_id == vulnerability['identifiers'][0]['value'] + end + + fingerprint_data = [ + vulnerability.dig('location', 'file'), + vulnerability.dig('location', 'start_line'), + vulnerability.dig('location', 'end_line') + ].join(":") + + location_fingerprint_hex = Digest::SHA1.hexdigest(fingerprint_data) # rubocop:disable Fips/SHA1 -- we must use SHA1, since this is how the fingerprint is stored in the DB + location_fingerprint_binary = [location_fingerprint_hex].pack('H*') + + uuid = Gitlab::UUID.v5( + [ + sast_report_type_string, + primary_identifier.fingerprint.unpack1('H*'), + location_fingerprint_hex, + project.id + ].join("-") + ) + + vulnerability_finding = vulnerability_findings_table.create!( + location_fingerprint: location_fingerprint_binary, + uuid: uuid, + description: vulnerability['description'], + location: vulnerability['location'], + raw_metadata: vulnerability.to_json, + created_at: current_time, updated_at: current_time, + severity: described_class::Migratable::Enums::Vulnerability.severity_levels[vulnerability['severity'].downcase], + report_type: sast_report_type_int, + project_id: project.id, + scanner_id: scanner.id, + primary_identifier_id: primary_identifier.id, + name: vulnerability['name'], + metadata_version: metadata_version, + initial_pipeline_id: pipeline.id, + latest_pipeline_id: pipeline.id + ) + + vulnerability_identifiers.each do |vi| + vulnerability_finding_identifiers_table.create!(created_at: current_time, updated_at: current_time, + identifier_id: vi.id, project_id: project.id, occurrence_id: vulnerability_finding.id) + end + + vulnerability_finding +end diff --git a/spec/lib/gitlab/background_migration/restore_incorrect_vulnerability_states_spec.rb b/spec/lib/gitlab/background_migration/restore_incorrect_vulnerability_states_spec.rb deleted file mode 100644 index e7b214cc10b894..00000000000000 --- a/spec/lib/gitlab/background_migration/restore_incorrect_vulnerability_states_spec.rb +++ /dev/null @@ -1,119 +0,0 @@ -# frozen_string_literal: true - -require 'spec_helper' - -RSpec.describe Gitlab::BackgroundMigration::RestoreIncorrectVulnerabilityStates, feature_category: :static_application_security_testing do - let(:vulnerabilities) { table(:vulnerabilities) } - let(:known_keys) { Set.new } - let(:project) { create(:project) } - let(:user) { create(:user) } - - let(:initial_pipeline) { create(:ci_pipeline, user: user, project: project) } - let(:initial_sast_build) { create(:ee_ci_build, :success, pipeline: initial_pipeline, project: project) } - let!(:initial_sast_artifact) { create(:ee_ci_job_artifact, :sast_semgrep_multiple_vulnerabilities, job: initial_sast_build) } - - let(:corrupted_pipeline) { create(:ci_pipeline, user: user, project: project) } - let(:corrupted_sast_build) { create(:ee_ci_build, :success, pipeline: corrupted_pipeline) } - let!(:corrupted_sast_artifact) { create(:ee_ci_job_artifact, :sast_semgrep_multiple_vulnerabilities_incorrect_primary_identifier, job: corrupted_sast_build) } - - let(:restored_pipeline) { create(:ci_pipeline, user: user, project: project) } - let(:restored_sast_build) { create(:ee_ci_build, :success, pipeline: restored_pipeline) } - let!(:restored_sast_artifact) { create(:ee_ci_job_artifact, :sast_semgrep_multiple_vulnerabilities, job: restored_sast_build) } - - let(:known_keys) { Set.new } - - before do - stub_licensed_features(sast: true, security_dashboard: true) - project.add_maintainer(user) - end - - let(:sub_batch_size) { vulnerabilities.count } - - # use a method instead of a subject to avoid rspec memoization - def perform_migration - described_class.new( - start_id: vulnerabilities.minimum(:id), - end_id: vulnerabilities.maximum(:id), - batch_table: :vulnerabilities, - batch_column: :id, - sub_batch_size: sub_batch_size, - pause_ms: 0, - connection: ActiveRecord::Base.connection - ).perform - end - - describe "#perform", feature_category: :static_application_security_testing do - context 'when ingesting reports in sequence: correct identifiers → incorrect identifiers' do - before do - Security::StoreScansService.execute(initial_pipeline) - Security::Ingestion::IngestReportsService.execute(initial_pipeline) - - Vulnerability.where(project_id: project.id).each { |v| puts "Resolving vuln #{v.id}"; ::Vulnerabilities::ResolveService.new(user, v, "first resolution").execute } - Vulnerability.where(project_id: project.id, severity: 'high').each { |v| puts "Confirming vuln #{v.id}"; Vulnerabilities::ConfirmService.new(user, v, "confirming").execute } - Vulnerability.where(project_id: project.id, severity: 'low').each { |v| puts "Dismissing vuln #{v.id}"; Vulnerabilities::DismissService.new(user, v, "dismissing", 'acceptable_risk').execute } - Vulnerability.where(project_id: project.id, severity: 'medium')[0..4].each {|v| puts "Resolving again vuln #{v.id}"; ::Vulnerabilities::ResolveService.new(user, v, "last resolution").execute } - - Security::StoreScansService.execute(corrupted_pipeline) - Security::Ingestion::IngestReportsService.execute(corrupted_pipeline) - end - - it 'restores vulnerability states' do - perform_migration - - types = Vulnerability.all.map { |v| v.finding.primary_identifier.external_type } - expect(types).to eq(["semgrep_id"] * 16) - - expect(Vulnerability.where(project_id: project.id, severity: 'high', resolved_on_default_branch: false).map { |v| v.finding.state }) - .to eq(["confirmed"] * 4) - - expect(Vulnerability.where(project_id: project.id, severity: 'low', resolved_on_default_branch: false).map { |v| v.finding.state }) - .to eq(["dismissed"] * 2) - - expect(Vulnerability.where(project_id: project.id, severity: 'medium', resolved_on_default_branch: false).map { |v| v.finding.state }) - .to eq(["resolved"] * 10) - end - - it 'does not delete any vulnerabilities' do - expect { perform_migration }.not_to change { Vulnerability.count } - end - end - - context 'when ingesting reports in sequence: correct identifiers → incorrect identifiers → correct identifiers' do - before do - Security::StoreScansService.execute(initial_pipeline) - Security::Ingestion::IngestReportsService.execute(initial_pipeline) - - Vulnerability.where(project_id: project.id).each { |v| puts "Resolving vuln #{v.id}"; ::Vulnerabilities::ResolveService.new(user, v, "first resolution").execute } - Vulnerability.where(project_id: project.id, severity: 'high').each { |v| puts "Confirming vuln #{v.id}"; Vulnerabilities::ConfirmService.new(user, v, "confirming").execute } - Vulnerability.where(project_id: project.id, severity: 'low').each { |v| puts "Dismissing vuln #{v.id}"; Vulnerabilities::DismissService.new(user, v, "dismissing", 'acceptable_risk').execute } - Vulnerability.where(project_id: project.id, severity: 'medium')[0..4].each {|v| puts "Resolving again vuln #{v.id}"; ::Vulnerabilities::ResolveService.new(user, v, "last resolution").execute } - - Security::StoreScansService.execute(corrupted_pipeline) - Security::Ingestion::IngestReportsService.execute(corrupted_pipeline) - - Security::StoreScansService.execute(restored_pipeline) - Security::Ingestion::IngestReportsService.execute(restored_pipeline) - end - - it 'restores vulnerability states' do - perform_migration - - types = Vulnerability.all.map { |v| v.finding.primary_identifier.external_type } - expect(types).to eq(["semgrep_id"] * 16) - - expect(Vulnerability.where(project_id: project.id, severity: 'high', resolved_on_default_branch: false).map { |v| v.finding.state }) - .to eq(["confirmed"] * 4) - - expect(Vulnerability.where(project_id: project.id, severity: 'low', resolved_on_default_branch: false).map { |v| v.finding.state }) - .to eq(["dismissed"] * 2) - - expect(Vulnerability.where(project_id: project.id, severity: 'medium', resolved_on_default_branch: false).map { |v| v.finding.state }) - .to eq(["resolved"] * 10) - end - - it 'deletes duplicate vulnerabilities' do - expect { perform_migration }.to change { Vulnerability.count }.from(32).to(16) - end - end - end -end diff --git a/spec/migrations/20251020182838_queue_restore_incorrect_vulnerability_states_spec.rb b/spec/migrations/20251020182838_queue_restore_incorrect_vulnerability_states_spec.rb index 17e5f6537eb5f5..44ffe126e1e775 100644 --- a/spec/migrations/20251020182838_queue_restore_incorrect_vulnerability_states_spec.rb +++ b/spec/migrations/20251020182838_queue_restore_incorrect_vulnerability_states_spec.rb @@ -3,25 +3,25 @@ require 'spec_helper' require_migration! -RSpec.describe QueueRestoreIncorrectVulnerabilityStates, migration: :gitlab_?, feature_category: :static_application_security_testing do - # let!(:batched_migration) { described_class::MIGRATION } +RSpec.describe QueueRestoreIncorrectVulnerabilityStates, migration: :gitlab_sec, feature_category: :static_application_security_testing do + let!(:batched_migration) { described_class::MIGRATION } - # it 'schedules a new batched migration' do - # reversible_migration do |migration| - # migration.before -> { - # expect(batched_migration).not_to have_scheduled_batched_migration - # } + it 'schedules a new batched migration' do + reversible_migration do |migration| + migration.before -> { + expect(batched_migration).not_to have_scheduled_batched_migration + } - # migration.after -> { - # expect(batched_migration).to have_scheduled_batched_migration( - # gitlab_schema: # :gitlab_main_org / :gitlab_ci / ... - # table_name: :vulnerability_occurrences, - # column_name: :id, - # interval: described_class::DELAY_INTERVAL, - # batch_size: described_class::BATCH_SIZE, - # sub_batch_size: described_class::SUB_BATCH_SIZE - # ) - # } - # end - # end + migration.after -> { + expect(batched_migration).to have_scheduled_batched_migration( + gitlab_schema: :gitlab_sec, + table_name: :vulnerability_reads, + column_name: :vulnerability_id, + interval: described_class::DELAY_INTERVAL, + batch_size: described_class::BATCH_SIZE, + sub_batch_size: described_class::SUB_BATCH_SIZE + ) + } + end + end end -- GitLab From 2f677a6a23645ba85d37be8c148b5e3e95c429af Mon Sep 17 00:00:00 2001 From: Adam Cohen Date: Sun, 7 Dec 2025 19:55:25 +1100 Subject: [PATCH 11/16] Rename migration spec file --- ...ual_spec.rb => restore_incorrect_vulnerability_states_spec.rb} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename spec/lib/gitlab/background_migration/{restore_incorrect_vulnerability_states_manual_spec.rb => restore_incorrect_vulnerability_states_spec.rb} (100%) diff --git a/spec/lib/gitlab/background_migration/restore_incorrect_vulnerability_states_manual_spec.rb b/spec/lib/gitlab/background_migration/restore_incorrect_vulnerability_states_spec.rb similarity index 100% rename from spec/lib/gitlab/background_migration/restore_incorrect_vulnerability_states_manual_spec.rb rename to spec/lib/gitlab/background_migration/restore_incorrect_vulnerability_states_spec.rb -- GitLab From 732cbe75e3e3f23ea50dfaf771e3676c87cb0a45 Mon Sep 17 00:00:00 2001 From: Adam Cohen Date: Mon, 8 Dec 2025 11:50:37 +1100 Subject: [PATCH 12/16] Reorganize specs, add some more tests Process candidate occurrence ids in batch of 1000 --- ...emgrep-6.6.2-multiple-vulnerabilities.json | 2 +- ...bilities-incorrect-primary-identifier.json | 2 +- ...rabilities-correct-primary-identifier.json | 2 +- .../restore_incorrect_vulnerability_states.rb | 74 ++- ...ore_incorrect_vulnerability_states_spec.rb | 486 +++++++++--------- 5 files changed, 299 insertions(+), 267 deletions(-) diff --git a/ee/spec/fixtures/security_reports/master/gl-sast-report-semgrep-6.6.2-multiple-vulnerabilities.json b/ee/spec/fixtures/security_reports/master/gl-sast-report-semgrep-6.6.2-multiple-vulnerabilities.json index e036030261518f..a2175b4e6e316d 100644 --- a/ee/spec/fixtures/security_reports/master/gl-sast-report-semgrep-6.6.2-multiple-vulnerabilities.json +++ b/ee/spec/fixtures/security_reports/master/gl-sast-report-semgrep-6.6.2-multiple-vulnerabilities.json @@ -49,7 +49,7 @@ { "id": "185f6aa5aece728c2b94f16ff36ea99339dbeb39a027964d65a0e544b439529d", "category": "sast", - "name": "Improper neutralization of special elements used in an SQL Command ('SQL Injection'). Vulnerability to be confirmed.", + "name": "Vulnerbility with issue link. Vulnerability to be confirmed.", "description": "SQL Injection is a critical vulnerability that can lead to data or system compromise.", "cve": "semgrep_id:bandit.B608:265:265", "severity": "High", diff --git a/ee/spec/fixtures/security_reports/master/gl-sast-report-semgrep-6.7.0-multiple-vulnerabilities-incorrect-primary-identifier.json b/ee/spec/fixtures/security_reports/master/gl-sast-report-semgrep-6.7.0-multiple-vulnerabilities-incorrect-primary-identifier.json index 8108a9f46c97c0..f3ad6be203367b 100644 --- a/ee/spec/fixtures/security_reports/master/gl-sast-report-semgrep-6.7.0-multiple-vulnerabilities-incorrect-primary-identifier.json +++ b/ee/spec/fixtures/security_reports/master/gl-sast-report-semgrep-6.7.0-multiple-vulnerabilities-incorrect-primary-identifier.json @@ -49,7 +49,7 @@ { "id": "4cc9c82ff0d985defd2801e1be40f784c149f8a70f5c1325c5d1979b13771bc1", "category": "sast", - "name": "Improper neutralization of special elements used in an SQL Command ('SQL Injection'). Vulnerability to be confirmed.", + "name": "Vulnerbility with issue link. Vulnerability to be confirmed.", "description": "SQL Injection is a critical vulnerability that can lead to data or system compromise.", "cve": "semgrep_id:bandit.B608:265:265", "severity": "High", diff --git a/ee/spec/fixtures/security_reports/master/gl-sast-report-semgrep-6.7.1-multiple-vulnerabilities-correct-primary-identifier.json b/ee/spec/fixtures/security_reports/master/gl-sast-report-semgrep-6.7.1-multiple-vulnerabilities-correct-primary-identifier.json index 5cbb16ff896b76..4180786039a98a 100644 --- a/ee/spec/fixtures/security_reports/master/gl-sast-report-semgrep-6.7.1-multiple-vulnerabilities-correct-primary-identifier.json +++ b/ee/spec/fixtures/security_reports/master/gl-sast-report-semgrep-6.7.1-multiple-vulnerabilities-correct-primary-identifier.json @@ -49,7 +49,7 @@ { "id": "185f6aa5aece728c2b94f16ff36ea99339dbeb39a027964d65a0e544b439529d", "category": "sast", - "name": "Improper neutralization of special elements used in an SQL Command ('SQL Injection'). Vulnerability to be confirmed.", + "name": "Vulnerbility with issue link. Vulnerability to be confirmed.", "description": "SQL Injection is a critical vulnerability that can lead to data or system compromise.", "cve": "semgrep_id:bandit.B608:265:265", "severity": "High", diff --git a/lib/gitlab/background_migration/restore_incorrect_vulnerability_states.rb b/lib/gitlab/background_migration/restore_incorrect_vulnerability_states.rb index b4237d6216fa7b..46391f56200b30 100644 --- a/lib/gitlab/background_migration/restore_incorrect_vulnerability_states.rb +++ b/lib/gitlab/background_migration/restore_incorrect_vulnerability_states.rb @@ -73,6 +73,17 @@ class Finding < SecApplicationRecord end module Vulnerabilities + class IssueLink < SecApplicationRecord + self.table_name = 'vulnerability_issue_links' + + belongs_to :vulnerability + belongs_to :finding, foreign_key: :vulnerability_occurrence_id, + class_name: 'Migratable::Vulnerabilities::Finding', optional: true, inverse_of: false + belongs_to :issue + + enum :link_type, { related: 1, created: 2 } + end + class StateTransition < SecApplicationRecord self.table_name = 'vulnerability_state_transitions' @@ -317,6 +328,9 @@ def batch_find_correct_findings(vulnerabilities) return {} if candidate_occurrence_ids.empty? + log_info("Found #{candidate_occurrence_ids.length} candidate occurrence IDs " \ + "for #{vulnerabilities.length} vulnerabilities") + conn = ::SecApplicationRecord.connection values_sql = search_params.map do |p| @@ -327,33 +341,39 @@ def batch_find_correct_findings(vulnerabilities) "#{conn.quote(p[:metadata_version])}, #{p[:project_id]})" end.join(', ') - candidate_ids_sql = candidate_occurrence_ids.join(', ') + # Process candidate IDs in batches of 1000 to avoid unbounded IN clauses + all_results = [] + candidate_occurrence_ids.each_slice(1000) do |candidate_ids_batch| + candidate_ids_sql = candidate_ids_batch.join(', ') - sql = <<~SQL - WITH search_params AS ( - SELECT * FROM ( - VALUES #{values_sql} - ) AS t(vulnerability_id, exclude_finding_id, severity, report_type, location_fingerprint, name, description, metadata_version, project_id) - ) - SELECT DISTINCT ON (sp.vulnerability_id) - sp.vulnerability_id, - vo.id AS finding_id - FROM search_params sp - JOIN vulnerability_occurrences vo ON - vo.id IN (#{candidate_ids_sql}) - AND vo.severity = sp.severity - AND vo.report_type = sp.report_type - AND vo.location_fingerprint = sp.location_fingerprint - AND vo.name = sp.name - AND vo.description = sp.description - AND vo.metadata_version = sp.metadata_version - AND vo.project_id = sp.project_id - AND vo.id != sp.exclude_finding_id - ORDER BY sp.vulnerability_id, vo.id - SQL - - results = conn.execute(sql) - finding_ids = results.pluck('finding_id') + sql = <<~SQL + WITH search_params AS ( + SELECT * FROM ( + VALUES #{values_sql} + ) AS t(vulnerability_id, exclude_finding_id, severity, report_type, location_fingerprint, name, description, metadata_version, project_id) + ) + SELECT DISTINCT ON (sp.vulnerability_id) + sp.vulnerability_id, + vo.id AS finding_id + FROM search_params sp + JOIN vulnerability_occurrences vo ON + vo.id IN (#{candidate_ids_sql}) + AND vo.severity = sp.severity + AND vo.report_type = sp.report_type + AND vo.location_fingerprint = sp.location_fingerprint + AND vo.name = sp.name + AND vo.description = sp.description + AND vo.metadata_version = sp.metadata_version + AND vo.project_id = sp.project_id + AND vo.id != sp.exclude_finding_id + ORDER BY sp.vulnerability_id, vo.id + SQL + + batch_results = conn.execute(sql) + all_results.concat(batch_results.to_a) + end + + finding_ids = all_results.filter_map { |row| row['finding_id'] } return {} if finding_ids.empty? @@ -364,7 +384,7 @@ def batch_find_correct_findings(vulnerabilities) .index_by(&:id) # Build lookup by vulnerability ID - results.each_with_object({}) do |row, lookup| + all_results.each_with_object({}) do |row, lookup| lookup[row['vulnerability_id']] = findings_by_id[row['finding_id']] end end diff --git a/spec/lib/gitlab/background_migration/restore_incorrect_vulnerability_states_spec.rb b/spec/lib/gitlab/background_migration/restore_incorrect_vulnerability_states_spec.rb index 852467823f9751..bd0ea029c0e682 100644 --- a/spec/lib/gitlab/background_migration/restore_incorrect_vulnerability_states_spec.rb +++ b/spec/lib/gitlab/background_migration/restore_incorrect_vulnerability_states_spec.rb @@ -64,6 +64,21 @@ project_id: project.id, external_id: 'semgrep', name: 'Semgrep', vendor: 'GitLab') end + let(:vulnerabilities_to_be_confirmed) do + vulnerabilities_table.where(project_id: project.id) + .where("title LIKE ?", "%Vulnerability to be confirmed.").order(:id) + end + + let(:vulnerabilities_to_be_resolved) do + vulnerabilities_table.where(project_id: project.id) + .where("title LIKE ?", "%Vulnerability to be resolved.").order(:id) + end + + let(:vulnerabilities_to_be_dismissed) do + vulnerabilities_table.where(project_id: project.id) + .where("title LIKE ?", "%Vulnerability to be dismissed.").order(:id) + end + def create_project(name:, group:, id: nil) project_namespace = namespaces_table.create!( name: name, @@ -95,282 +110,279 @@ def perform_migration ).perform end - describe "#perform", feature_category: :static_application_security_testing do - let(:vulnerabilities_to_be_confirmed) do - vulnerabilities_table.where(project_id: project.id) - .where("title LIKE ?", "%Vulnerability to be confirmed.").order(:id) - end - - let(:vulnerabilities_to_be_resolved) do - vulnerabilities_table.where(project_id: project.id) - .where("title LIKE ?", "%Vulnerability to be resolved.").order(:id) + describe 'when performing sanity checks against spec data' do + context 'when creating vulnerabilities' do + let(:project_id) { 112 } + + it 'creates vulnerability occurrences with uuid and location_fingerprint ' \ + 'values that correspond to the hardcoded project id' do + expect { create_vulnerabilities('gl-sast-report-semgrep-6.6.2-multiple-vulnerabilities.json') }.to change { + vulnerability_findings_table.where( + uuid: 'fa338ae7-51e1-5211-8e02-34359ce9544d', + location_fingerprint: ['1eae8e196458ba7c9f60d97e55a77e1ca9d7d7f5'].pack('H*'), + project_id: project_id + ).count + }.from(0).to(1) + .and change { + vulnerability_findings_table.count + }.by(14) + .and change { + security_findings_table.where( + uuid: 'fa338ae7-51e1-5211-8e02-34359ce9544d', + project_id: project_id + ).count + }.from(0).to(1) + .and change { + vulnerability_reads_table.where( + uuid: 'fa338ae7-51e1-5211-8e02-34359ce9544d', + project_id: project_id + ).count + }.from(0).to(1) + + vulnerability_findings_table.find_each { |vf| expect(vf.raw_metadata).to be_present } + security_findings_table.find_each { |sf| expect(sf.finding_data).to be_present } + + security_finding_data = security_findings_table + .find_by(uuid: 'fa338ae7-51e1-5211-8e02-34359ce9544d').finding_data + expect(security_finding_data).to eq({ + "name" => "Vulnerbility with issue link. Vulnerability to be confirmed.", + "links" => [], "assets" => [], "details" => {}, "evidence" => nil, + "location" => { "file" => "app/app.py", "start_line" => 265 }, "solution" => nil, + "description" => "SQL Injection is a critical vulnerability that can lead to data or system compromise.", + "identifiers" => [ + { "url" => "https://semgrep.dev/r/gitlab.bandit.B608", "name" => "bandit.B608", + "external_id" => "bandit.B608", "fingerprint" => "5fc4137cf46497245dba266eaf656ee07eb154b3", + "external_type" => "semgrep_id" }, + { "url" => "https://cwe.mitre.org/data/definitions/89.html", "name" => "CWE-89", + "external_id" => "89", "fingerprint" => "b74f6bacf3f4d4f92c6f4da6584963e4148b91e6", + "external_type" => "cwe" }, + { "url" => nil, "name" => "A03:2021 - Injection", "external_id" => "A03:2021", + "fingerprint" => "a8e828eea3aba35916401da9304619f0a218119b", "external_type" => "owasp" }, + { "url" => nil, "name" => "A1:2017 - Injection", "external_id" => "A1:2017", + "fingerprint" => "08de3511f2132da4d24f1b8b1d3ca14368a0259b", "external_type" => "owasp" }, + { "url" => nil, "name" => "Bandit Test ID B608", "external_id" => "B608", + "fingerprint" => "2efed5393435ae741114b2200f17077e81954270", "external_type" => "bandit_test_id" } + ], "false_positive?" => false, "raw_source_code_extract" => nil, "remediation_byte_offsets" => [] + }) + end end - let(:vulnerabilities_to_be_dismissed) do - vulnerabilities_table.where(project_id: project.id) - .where("title LIKE ?", "%Vulnerability to be dismissed.").order(:id) - end + context 'when changing vulnerability state' do + it 'resolves vulnerabilities' do + create_vulnerabilities('gl-sast-report-semgrep-6.6.2-multiple-vulnerabilities.json') - let(:uuids_before_corruption) { [] } - let(:corrupted_vulnerabilities) { [] } - let(:duplicated_vulnerabilities) { [] } + vulnerabilities_to_be_resolved = vulnerabilities_table.where(severity: severity_level_medium_int) - context 'when performing sanity checks against spec data' do - context 'when creating vulnerabilities' do - let(:project_id) { 112 } - - it 'creates vulnerability occurrences with uuid and location_fingerprint ' \ - 'values that correspond to the hardcoded project id' do - expect { create_vulnerabilities('gl-sast-report-semgrep-6.6.2-multiple-vulnerabilities.json') }.to change { - vulnerability_findings_table.where( - uuid: 'fa338ae7-51e1-5211-8e02-34359ce9544d', - location_fingerprint: ['1eae8e196458ba7c9f60d97e55a77e1ca9d7d7f5'].pack('H*'), - project_id: project_id - ).count - }.from(0).to(1) - .and change { - vulnerability_findings_table.count - }.by(14) - .and change { - security_findings_table.where( - uuid: 'fa338ae7-51e1-5211-8e02-34359ce9544d', - project_id: project_id - ).count - }.from(0).to(1) - .and change { - vulnerability_reads_table.where( - uuid: 'fa338ae7-51e1-5211-8e02-34359ce9544d', - project_id: project_id - ).count - }.from(0).to(1) - - vulnerability_findings_table.find_each { |vf| expect(vf.raw_metadata).to be_present } - security_findings_table.find_each { |sf| expect(sf.finding_data).to be_present } - - security_finding_data = security_findings_table - .find_by(uuid: 'fa338ae7-51e1-5211-8e02-34359ce9544d').finding_data - expect(security_finding_data).to eq({ - "name" => "Improper neutralization of special elements used in an SQL Command ('SQL Injection'). " \ - "Vulnerability to be confirmed.", - "links" => [], "assets" => [], "details" => {}, "evidence" => nil, - "location" => { "file" => "app/app.py", "start_line" => 265 }, "solution" => nil, - "description" => "SQL Injection is a critical vulnerability that can lead to data or system compromise.", - "identifiers" => [ - { "url" => "https://semgrep.dev/r/gitlab.bandit.B608", "name" => "bandit.B608", - "external_id" => "bandit.B608", "fingerprint" => "5fc4137cf46497245dba266eaf656ee07eb154b3", - "external_type" => "semgrep_id" }, - { "url" => "https://cwe.mitre.org/data/definitions/89.html", "name" => "CWE-89", - "external_id" => "89", "fingerprint" => "b74f6bacf3f4d4f92c6f4da6584963e4148b91e6", - "external_type" => "cwe" }, - { "url" => nil, "name" => "A03:2021 - Injection", "external_id" => "A03:2021", - "fingerprint" => "a8e828eea3aba35916401da9304619f0a218119b", "external_type" => "owasp" }, - { "url" => nil, "name" => "A1:2017 - Injection", "external_id" => "A1:2017", - "fingerprint" => "08de3511f2132da4d24f1b8b1d3ca14368a0259b", "external_type" => "owasp" }, - { "url" => nil, "name" => "Bandit Test ID B608", "external_id" => "B608", - "fingerprint" => "2efed5393435ae741114b2200f17077e81954270", "external_type" => "bandit_test_id" } - ], "false_positive?" => false, "raw_source_code_extract" => nil, "remediation_byte_offsets" => [] - }) + expect do + resolve_vulnerabilities(vulnerabilities: vulnerabilities_to_be_resolved, comment: 'resolving') end + .to change { + vulnerabilities_to_be_resolved.map(&:state) + }.from([detected_state_int] * vulnerabilities_to_be_resolved.count) + .to([resolved_state_int] * vulnerabilities_to_be_resolved.count) + .and change { + vulnerability_reads_table.where(vulnerability_id: vulnerabilities_to_be_resolved.map(&:id)).map(&:state) + }.from([detected_state_int] * vulnerabilities_to_be_resolved.count) + .to([resolved_state_int] * vulnerabilities_to_be_resolved.count) + .and change { + vulnerability_state_transitions_table.where(from_state: detected_state_int, + to_state: resolved_state_int, project_id: project.id).count + }.from(0).to(vulnerabilities_to_be_resolved.count) + .and change { + notes_table.count + }.from(0).to(vulnerabilities_to_be_resolved.count) + .and change { + system_note_metadata_table.count + }.from(0).to(vulnerabilities_to_be_resolved.count) + .and change { + vulnerabilities_to_be_resolved.pluck(:resolved_by_id) + }.from([nil] * vulnerabilities_to_be_resolved.count) + .to([user.id] * vulnerabilities_to_be_resolved.count) + .and change { + vulnerabilities_to_be_resolved.pluck(:resolved_at) + }.from([nil] * vulnerabilities_to_be_resolved.count) end - context 'when changing vulnerability state' do - it 'resolves vulnerabilities' do - create_vulnerabilities('gl-sast-report-semgrep-6.6.2-multiple-vulnerabilities.json') + it 'dismisses vulnerabilities' do + create_vulnerabilities('gl-sast-report-semgrep-6.6.2-multiple-vulnerabilities.json') - vulnerabilities_to_be_resolved = vulnerabilities_table.where(severity: severity_level_medium_int) + vulnerabilities_to_be_dismissed = vulnerabilities_table.where(severity: severity_level_medium_int) - expect do - resolve_vulnerabilities(vulnerabilities: vulnerabilities_to_be_resolved, comment: 'resolving') - end - .to change { - vulnerabilities_to_be_resolved.map(&:state) - }.from([detected_state_int] * vulnerabilities_to_be_resolved.count) - .to([resolved_state_int] * vulnerabilities_to_be_resolved.count) - .and change { - vulnerability_reads_table.where(vulnerability_id: vulnerabilities_to_be_resolved.map(&:id)).map(&:state) - }.from([detected_state_int] * vulnerabilities_to_be_resolved.count) - .to([resolved_state_int] * vulnerabilities_to_be_resolved.count) - .and change { - vulnerability_state_transitions_table.where(from_state: detected_state_int, - to_state: resolved_state_int, project_id: project.id).count - }.from(0).to(vulnerabilities_to_be_resolved.count) - .and change { - notes_table.count - }.from(0).to(vulnerabilities_to_be_resolved.count) - .and change { - system_note_metadata_table.count - }.from(0).to(vulnerabilities_to_be_resolved.count) - .and change { - vulnerabilities_to_be_resolved.pluck(:resolved_by_id) - }.from([nil] * vulnerabilities_to_be_resolved.count) - .to([user.id] * vulnerabilities_to_be_resolved.count) - .and change { - vulnerabilities_to_be_resolved.pluck(:resolved_at) - }.from([nil] * vulnerabilities_to_be_resolved.count) + expect do + dismiss_vulnerabilities(vulnerabilities: vulnerabilities_to_be_dismissed, + comment: 'dismissing', dismissal_reason: acceptable_risk_dismissal_int) end + .to change { + vulnerabilities_to_be_dismissed.map(&:state) + }.from([detected_state_int] * vulnerabilities_to_be_dismissed.count) + .to([dismissed_state_int] * vulnerabilities_to_be_dismissed.count) + .and change { + vulnerability_reads_table.where(vulnerability_id: vulnerabilities_to_be_dismissed.map(&:id)).map(&:state) + }.from([detected_state_int] * vulnerabilities_to_be_dismissed.count) + .to([dismissed_state_int] * vulnerabilities_to_be_dismissed.count) + .and change { + vulnerability_state_transitions_table.where(from_state: detected_state_int, + to_state: dismissed_state_int, project_id: project.id).count + }.from(0).to(vulnerabilities_to_be_dismissed.count) + .and change { + notes_table.count + }.from(0).to(vulnerabilities_to_be_dismissed.count) + .and change { + system_note_metadata_table.count + }.from(0).to(vulnerabilities_to_be_dismissed.count) + .and change { + vulnerabilities_to_be_dismissed.pluck(:dismissed_by_id) + }.from([nil] * vulnerabilities_to_be_dismissed.count) + .to([user.id] * vulnerabilities_to_be_dismissed.count) + .and change { + vulnerabilities_to_be_dismissed.pluck(:dismissed_at) + }.from([nil] * vulnerabilities_to_be_dismissed.count) + .and change { + vulnerability_reads_table.where(vulnerability_id: vulnerabilities_to_be_dismissed.pluck(:id)) + .pluck(:dismissal_reason) + }.from([nil] * vulnerabilities_to_be_dismissed.count) + .to([acceptable_risk_dismissal_int] * vulnerabilities_to_be_dismissed.count) + end + end - it 'dismisses vulnerabilities' do - create_vulnerabilities('gl-sast-report-semgrep-6.6.2-multiple-vulnerabilities.json') - - vulnerabilities_to_be_dismissed = vulnerabilities_table.where(severity: severity_level_medium_int) + context 'when corrupting vulnerabilities' do + it 'resets the state for resolved vulnerabilities to detected' do + create_vulnerabilities('gl-sast-report-semgrep-6.6.2-multiple-vulnerabilities.json') + resolve_vulnerabilities( + vulnerabilities: vulnerabilities_table.where(severity: severity_level_medium_int), comment: 'resolving' + ) - expect do - dismiss_vulnerabilities(vulnerabilities: vulnerabilities_to_be_dismissed, - comment: 'dismissing', dismissal_reason: acceptable_risk_dismissal_int) - end - .to change { - vulnerabilities_to_be_dismissed.map(&:state) - }.from([detected_state_int] * vulnerabilities_to_be_dismissed.count) - .to([dismissed_state_int] * vulnerabilities_to_be_dismissed.count) - .and change { - vulnerability_reads_table.where(vulnerability_id: vulnerabilities_to_be_dismissed.map(&:id)).map(&:state) - }.from([detected_state_int] * vulnerabilities_to_be_dismissed.count) - .to([dismissed_state_int] * vulnerabilities_to_be_dismissed.count) - .and change { - vulnerability_state_transitions_table.where(from_state: detected_state_int, - to_state: dismissed_state_int, project_id: project.id).count - }.from(0).to(vulnerabilities_to_be_dismissed.count) - .and change { - notes_table.count - }.from(0).to(vulnerabilities_to_be_dismissed.count) - .and change { - system_note_metadata_table.count - }.from(0).to(vulnerabilities_to_be_dismissed.count) - .and change { - vulnerabilities_to_be_dismissed.pluck(:dismissed_by_id) - }.from([nil] * vulnerabilities_to_be_dismissed.count) - .to([user.id] * vulnerabilities_to_be_dismissed.count) - .and change { - vulnerabilities_to_be_dismissed.pluck(:dismissed_at) - }.from([nil] * vulnerabilities_to_be_dismissed.count) - .and change { - vulnerability_reads_table.where(vulnerability_id: vulnerabilities_to_be_dismissed.pluck(:id)) - .pluck(:dismissal_reason) - }.from([nil] * vulnerabilities_to_be_dismissed.count) - .to([acceptable_risk_dismissal_int] * vulnerabilities_to_be_dismissed.count) - end + expect { corrupt_vulnerabilities }.to change { + vulnerabilities_to_be_resolved.pluck(:state) + }.from([resolved_state_int] * vulnerabilities_to_be_resolved.count) + .to([detected_state_int] * vulnerabilities_to_be_resolved.count) end - context 'when corrupting vulnerabilities' do - it 'resets the state for resolved vulnerabilities to detected' do - create_vulnerabilities('gl-sast-report-semgrep-6.6.2-multiple-vulnerabilities.json') - resolve_vulnerabilities( - vulnerabilities: vulnerabilities_table.where(severity: severity_level_medium_int), comment: 'resolving' - ) + it 'does not reset the state for confirmed or dismissed vulnerabilities' do + create_vulnerabilities('gl-sast-report-semgrep-6.6.2-multiple-vulnerabilities.json') - expect { corrupt_vulnerabilities }.to change { - vulnerabilities_to_be_resolved.pluck(:state) - }.from([resolved_state_int] * vulnerabilities_to_be_resolved.count) - .to([detected_state_int] * vulnerabilities_to_be_resolved.count) - end + dismiss_vulnerabilities(vulnerabilities: vulnerabilities_to_be_dismissed, + comment: 'dismissing', dismissal_reason: acceptable_risk_dismissal_int) + confirm_vulnerabilities(vulnerabilities: vulnerabilities_to_be_confirmed, comment: 'confirming') - it 'does not reset the state for confirmed or dismissed vulnerabilities' do - create_vulnerabilities('gl-sast-report-semgrep-6.6.2-multiple-vulnerabilities.json') + expect { corrupt_vulnerabilities }.to not_change { + vulnerabilities_to_be_dismissed.pluck(:state) + }.from([dismissed_state_int] * vulnerabilities_to_be_dismissed.count) + .and not_change { vulnerabilities_to_be_confirmed.pluck(:state) } + .from([confirmed_state_int] * vulnerabilities_to_be_confirmed.count) + end - dismiss_vulnerabilities(vulnerabilities: vulnerabilities_to_be_dismissed, - comment: 'dismissing', dismissal_reason: acceptable_risk_dismissal_int) - confirm_vulnerabilities(vulnerabilities: vulnerabilities_to_be_confirmed, comment: 'confirming') + it 'reorders the raw_metadata identifiers for corrupted vulnerability findings, ' \ + 'placing cwe first, without altering other metadata' do + create_vulnerabilities('gl-sast-report-semgrep-6.6.2-multiple-vulnerabilities.json') - expect { corrupt_vulnerabilities }.to not_change { - vulnerabilities_to_be_dismissed.pluck(:state) - }.from([dismissed_state_int] * vulnerabilities_to_be_dismissed.count) - .and not_change { vulnerabilities_to_be_confirmed.pluck(:state) } - .from([confirmed_state_int] * vulnerabilities_to_be_confirmed.count) + ordered_vulnerabilities = vulnerabilities_table.order(:id) + + metadata_before = ordered_vulnerabilities.to_h do |v| + finding = vulnerability_findings_table.find(v.finding_id) + [v.id, Gitlab::Json.parse(finding.raw_metadata)] end - it 'reorders the raw_metadata identifiers for corrupted vulnerability findings, ' \ - 'placing cwe first, without altering other metadata' do - create_vulnerabilities('gl-sast-report-semgrep-6.6.2-multiple-vulnerabilities.json') + corrupt_vulnerabilities - ordered_vulnerabilities = vulnerabilities_table.order(:id) + aggregate_failures 'all findings should have reordered identifiers only' do + ordered_vulnerabilities.each do |vulnerability| + old_metadata = metadata_before[vulnerability.id] + finding = vulnerability_findings_table.find(vulnerability.finding_id) + new_metadata = Gitlab::Json.parse(finding.raw_metadata) + old_identifiers = old_metadata['identifiers'] + new_identifiers = new_metadata['identifiers'] - metadata_before = ordered_vulnerabilities.to_h do |v| - finding = vulnerability_findings_table.find(v.finding_id) - [v.id, Gitlab::Json.parse(finding.raw_metadata)] + expect(new_identifiers.first['type']).to eq('cwe') + expect(new_identifiers).to match_array(old_identifiers) + expect(new_metadata.except('identifiers')).to eq(old_metadata.except('identifiers')) end + end + end - corrupt_vulnerabilities + it 'updates the UUID for corrupted vulnerability records' do + create_vulnerabilities('gl-sast-report-semgrep-6.6.2-multiple-vulnerabilities.json') - aggregate_failures 'all findings should have reordered identifiers only' do - ordered_vulnerabilities.each do |vulnerability| - old_metadata = metadata_before[vulnerability.id] - finding = vulnerability_findings_table.find(vulnerability.finding_id) - new_metadata = Gitlab::Json.parse(finding.raw_metadata) - old_identifiers = old_metadata['identifiers'] - new_identifiers = new_metadata['identifiers'] + ordered_vulnerabilities = vulnerabilities_table.order(:id) - expect(new_identifiers.first['type']).to eq('cwe') - expect(new_identifiers).to match_array(old_identifiers) - expect(new_metadata.except('identifiers')).to eq(old_metadata.except('identifiers')) - end - end - end + original_uuids = ordered_vulnerabilities.map do |vuln| + finding = vulnerability_findings_table.find(vuln.finding_id) + primary_identifier = vulnerability_identifiers_table.find(finding.primary_identifier_id) - it 'updates the UUID for corrupted vulnerability records' do - create_vulnerabilities('gl-sast-report-semgrep-6.6.2-multiple-vulnerabilities.json') + Gitlab::UUID.v5( + [ + described_class::Migratable::Enums::Vulnerability.report_types.key(vuln.report_type), + primary_identifier.fingerprint.unpack1('H*'), + finding.location_fingerprint.unpack1('H*'), + vuln.project_id + ].join("-") + ) + end - ordered_vulnerabilities = vulnerabilities_table.order(:id) + corrupt_uuids = ordered_vulnerabilities.map do |vuln| + finding = vulnerability_findings_table.find(vuln.finding_id) + identifiers = Gitlab::Json.parse(finding.raw_metadata)['identifiers'] + cwe_identifier = identifiers.find { |id| id['type'] == 'cwe' } - original_uuids = ordered_vulnerabilities.map do |vuln| - finding = vulnerability_findings_table.find(vuln.finding_id) - primary_identifier = vulnerability_identifiers_table.find(finding.primary_identifier_id) + binary_fingerprint = described_class::Migratable::Vulnerabilities::Identifier + .sha1_fingerprint(cwe_identifier) - Gitlab::UUID.v5( - [ - described_class::Migratable::Enums::Vulnerability.report_types.key(vuln.report_type), - primary_identifier.fingerprint.unpack1('H*'), - finding.location_fingerprint.unpack1('H*'), - vuln.project_id - ].join("-") - ) - end + Gitlab::UUID.v5( + [ + described_class::Migratable::Enums::Vulnerability.report_types.key(vuln.report_type), + binary_fingerprint.unpack1('H*'), + finding.location_fingerprint.unpack1('H*'), + vuln.project_id + ].join("-") + ) + end - corrupt_uuids = ordered_vulnerabilities.map do |vuln| - finding = vulnerability_findings_table.find(vuln.finding_id) - identifiers = Gitlab::Json.parse(finding.raw_metadata)['identifiers'] - cwe_identifier = identifiers.find { |id| id['type'] == 'cwe' } - - binary_fingerprint = described_class::Migratable::Vulnerabilities::Identifier - .sha1_fingerprint(cwe_identifier) - - Gitlab::UUID.v5( - [ - described_class::Migratable::Enums::Vulnerability.report_types.key(vuln.report_type), - binary_fingerprint.unpack1('H*'), - finding.location_fingerprint.unpack1('H*'), - vuln.project_id - ].join("-") - ) + expect { corrupt_vulnerabilities }.to change { + ordered_vulnerabilities.map do |v| + vulnerability_findings_table.find(v.finding_id).uuid end - - expect { corrupt_vulnerabilities }.to change { - ordered_vulnerabilities.map do |v| - vulnerability_findings_table.find(v.finding_id).uuid - end - }.from(original_uuids).to(corrupt_uuids) - end + }.from(original_uuids).to(corrupt_uuids) end + end - context 'when no corrupt vulnerabilities exist' do - before do - create_vulnerabilities('gl-sast-report-semgrep-6.6.2-multiple-vulnerabilities.json') - end + context 'when no corrupt vulnerabilities exist' do + before do + create_vulnerabilities('gl-sast-report-semgrep-6.6.2-multiple-vulnerabilities.json') + end - it 'does not create or alter any existing records' do - expect { perform_migration }.to not_change { - vulnerabilities_table.all.pluck(:state) - } - .and not_change { security_findings_table.count } - .and not_change { notes_table.count } - .and not_change { system_note_metadata_table.count } - end + it 'does not create or alter any existing records' do + expect { perform_migration }.to not_change { + vulnerabilities_table.all.pluck(:state) + } + .and not_change { security_findings_table.count } + .and not_change { notes_table.count } + .and not_change { system_note_metadata_table.count } end end + end + + describe "#perform", feature_category: :static_application_security_testing do + let(:uuids_before_corruption) { [] } + let(:corrupted_vulnerabilities) { [] } + let(:duplicated_vulnerabilities) { [] } - # TODO: implement this context 'when ingesting reports in sequence: correct identifiers' do - it 'performs the migration' do - expect(true).to be_truthy + before do + create_vulnerabilities( + 'gl-sast-report-semgrep-6.7.1-multiple-vulnerabilities-correct-primary-identifier.json' + ) + end + + it 'does not change the state or UUIDs of any vulnerability records' do + expect { perform_migration }.to not_change { + vulnerabilities_table.order(:id).pluck(:state) + }.and not_change { + vulnerability_reads_table.order(:id).pluck(:state) + }.and not_change { + vulnerability_reads_table.order(:id).pluck(:uuid) + }.and not_change { + vulnerability_findings_table.order(:id).pluck(:uuid) + } end end -- GitLab From 778dd768fd93b7fa9bae54573c9d4b4a0c8f4c76 Mon Sep 17 00:00:00 2001 From: Adam Cohen Date: Mon, 8 Dec 2025 19:43:00 +1100 Subject: [PATCH 13/16] Start working on create_vulnerability_issue_link Remove description from lookup data --- ...rabilities-correct-primary-identifier.json | 2 +- .../restore_incorrect_vulnerability_states.rb | 8 +++----- ...ore_incorrect_vulnerability_states_spec.rb | 20 +++++++++++++++++++ 3 files changed, 24 insertions(+), 6 deletions(-) diff --git a/ee/spec/fixtures/security_reports/master/gl-sast-report-semgrep-6.7.1-additional-vulnerabilities-correct-primary-identifier.json b/ee/spec/fixtures/security_reports/master/gl-sast-report-semgrep-6.7.1-additional-vulnerabilities-correct-primary-identifier.json index 6b6e9f51bd005f..c2646614f728a0 100644 --- a/ee/spec/fixtures/security_reports/master/gl-sast-report-semgrep-6.7.1-additional-vulnerabilities-correct-primary-identifier.json +++ b/ee/spec/fixtures/security_reports/master/gl-sast-report-semgrep-6.7.1-additional-vulnerabilities-correct-primary-identifier.json @@ -2,7 +2,7 @@ "version": "15.2.2", "vulnerabilities": [ { - "id": "i878843d5b4edf0042e3066429a4cac5f66f8c7ad72b40056601fbb191fa13214", + "id": "878843d5b4edf0042e3066429a4cac5f66f8c7ad72b40056601fbb191fa13214", "category": "sast", "name": "Additional vulerability 1", "description": "The application was found using the `requests` module without configuring a timeout value.", diff --git a/lib/gitlab/background_migration/restore_incorrect_vulnerability_states.rb b/lib/gitlab/background_migration/restore_incorrect_vulnerability_states.rb index 46391f56200b30..8de9ed87f0e899 100644 --- a/lib/gitlab/background_migration/restore_incorrect_vulnerability_states.rb +++ b/lib/gitlab/background_migration/restore_incorrect_vulnerability_states.rb @@ -307,7 +307,6 @@ def batch_find_correct_findings(vulnerabilities) report_type: Migratable::Enums::Vulnerability.report_types[f.report_type], location_fingerprint: f.location_fingerprint, name: f.name, - description: f.description, metadata_version: f.metadata_version, project_id: f.project_id } @@ -337,7 +336,7 @@ def batch_find_correct_findings(vulnerabilities) location_fingerprint_hex = "'\\x#{p[:location_fingerprint].unpack1('H*')}'::bytea" "(#{p[:vulnerability_id]}, #{p[:exclude_finding_id]}, #{p[:severity]}, #{p[:report_type]}, " \ - "#{location_fingerprint_hex}, #{conn.quote(p[:name])}, #{conn.quote(p[:description])}, " \ + "#{location_fingerprint_hex}, #{conn.quote(p[:name])}, " \ "#{conn.quote(p[:metadata_version])}, #{p[:project_id]})" end.join(', ') @@ -350,7 +349,7 @@ def batch_find_correct_findings(vulnerabilities) WITH search_params AS ( SELECT * FROM ( VALUES #{values_sql} - ) AS t(vulnerability_id, exclude_finding_id, severity, report_type, location_fingerprint, name, description, metadata_version, project_id) + ) AS t(vulnerability_id, exclude_finding_id, severity, report_type, location_fingerprint, name, metadata_version, project_id) ) SELECT DISTINCT ON (sp.vulnerability_id) sp.vulnerability_id, @@ -362,14 +361,13 @@ def batch_find_correct_findings(vulnerabilities) AND vo.report_type = sp.report_type AND vo.location_fingerprint = sp.location_fingerprint AND vo.name = sp.name - AND vo.description = sp.description AND vo.metadata_version = sp.metadata_version AND vo.project_id = sp.project_id AND vo.id != sp.exclude_finding_id ORDER BY sp.vulnerability_id, vo.id SQL - batch_results = conn.execute(sql) + batch_results = conn.exec_query(sql) all_results.concat(batch_results.to_a) end diff --git a/spec/lib/gitlab/background_migration/restore_incorrect_vulnerability_states_spec.rb b/spec/lib/gitlab/background_migration/restore_incorrect_vulnerability_states_spec.rb index bd0ea029c0e682..4e6f57dc83916f 100644 --- a/spec/lib/gitlab/background_migration/restore_incorrect_vulnerability_states_spec.rb +++ b/spec/lib/gitlab/background_migration/restore_incorrect_vulnerability_states_spec.rb @@ -9,6 +9,7 @@ let(:notes_table) { table(:notes) } let(:system_note_metadata_table) { table(:system_note_metadata) } let(:vulnerabilities_table) { table(:vulnerabilities, database: :sec) } + let(:vulnerability_issue_links_table) { table(:vulnerability_issue_links, database: :sec) } let(:vulnerability_state_transitions_table) { table(:vulnerability_state_transitions, database: :sec) } let(:vulnerability_identifiers_table) { table(:vulnerability_identifiers, database: :sec) } let(:vulnerability_finding_identifiers_table) { table(:vulnerability_occurrence_identifiers, database: :sec) } @@ -37,6 +38,8 @@ let(:dismissed_state_string) { 'dismissed' } let(:confirmed_state_string) { 'confirmed' } + let(:created_link_type_int) { 2 } + let(:detected_state_int) { 1 } let(:dismissed_state_int) { 2 } let(:confirmed_state_int) { 4 } @@ -515,6 +518,10 @@ def perform_migration dismiss_vulnerabilities(vulnerabilities: vulnerabilities_to_be_dismissed, comment: 'dismissing', dismissal_reason: acceptable_risk_dismissal_int) + create_vulnerability_issue_link( + vulnerability: vulnerabilities_table.find_by("title LIKE ?", "Vulnerbility with issue link%") + ) + corrupt_vulnerabilities end @@ -996,6 +1003,19 @@ def perform_migration end # rubocop:enable RSpec/MultipleMemoizedHelpers +def create_vulnerability_issue_link(vulnerability:) + new_time = Time.current + + vulnerability_finding = vulnerability_findings_table.find_by(id: vulnerability.finding_id) + + vulnerability_issue_links_table.create!( + vulnerability_id: vulnerability.id, link_type: created_link_type_int, + vulnerability_occurrence_id: vulnerability_finding.id, + created_at: new_time, updated_at: new_time, project_id: project.id, + issue_id: rand(1..1000) + ) +end + # replicates the changes that happen when executing semgrep 6.7.0 def corrupt_vulnerabilities new_time = Time.current -- GitLab From c22987b4cc88f10969ce01aa9ec7bdc3af654ed6 Mon Sep 17 00:00:00 2001 From: Adam Cohen Date: Sat, 13 Dec 2025 16:17:01 +1100 Subject: [PATCH 14/16] Replace all activerecord calls with raw sql --- .../restore_incorrect_vulnerability_states.rb | 581 ++++++++++-------- ...ore_incorrect_vulnerability_states_spec.rb | 5 +- 2 files changed, 320 insertions(+), 266 deletions(-) diff --git a/lib/gitlab/background_migration/restore_incorrect_vulnerability_states.rb b/lib/gitlab/background_migration/restore_incorrect_vulnerability_states.rb index 8de9ed87f0e899..e53c4a8ee044ba 100644 --- a/lib/gitlab/background_migration/restore_incorrect_vulnerability_states.rb +++ b/lib/gitlab/background_migration/restore_incorrect_vulnerability_states.rb @@ -151,272 +151,305 @@ def finding end def perform - # CRITICAL: Use vulnerability_reads as efficient pre-filter to avoid scanning - # the 200M row vulnerability_occurrences table which causes production timeouts - - # Step 1: Filter vulnerability_reads (small, indexed) for semgrep SAST vulnerabilities - # WITHOUT joining to vulnerability_occurrences yet - each_sub_batch( - batching_scope: ->(relation) do - # Only filter by scanner and report type here - avoid joining large tables - relation - .joins('INNER JOIN vulnerability_scanners ON vulnerability_scanners.id = vulnerability_reads.scanner_id') - .where(vulnerability_scanners: { external_id: 'semgrep' }) - .where(vulnerability_reads: { - report_type: Migratable::Enums::Vulnerability.report_types[:sast] - }) - .select('vulnerability_reads.vulnerability_id') - .distinct + each_sub_batch do |vulnerability_reads_batch| + affected_vulnerabilities = fetch_affected_vulnerability_data(vulnerability_reads_batch) + + if affected_vulnerabilities.empty? + puts "No vulnerabilities found for batch: #{vulnerability_reads_batch.first.id} to #{vulnerability_reads_batch.last.id}" + next end - ) do |sub_batch| - # Step 2: Get candidate vulnerability IDs from the filtered vulnerability_reads - candidate_vulnerability_ids = sub_batch.pluck(:vulnerability_id).uniq - next if candidate_vulnerability_ids.empty? + correct_findings_lookup = bulk_find_correct_findings(affected_vulnerabilities) + # TODO: put state_transitions_lookup and project_namespace_lookup into affected_vulnerabilities + # each affected_vulnerability should contain the latest state and project_namespace_id + state_transitions_lookup = bulk_lookup_state_transitions(affected_vulnerabilities) + project_namespace_lookup = bulk_lookup_project_namespaces(affected_vulnerabilities) + + identifiers_lookup = bulk_fetch_correct_primary_identifiers( + affected_vulnerabilities, correct_findings_lookup + ) - # Step 3: Load ONLY the filtered vulnerabilities with their findings - # Now vulnerability_occurrences queries are constrained to a small set of IDs - # This is where we finally touch the 200M row table, but only for specific IDs - process_vulnerabilities_batch(candidate_vulnerability_ids) + current_time = Time.current + + # Bulk data collections + state_transitions_to_insert = [] + notes_to_insert = [] + system_note_metadata_to_insert = [] + vulnerabilities_to_update = [] + security_findings_to_update = [] + orphaned_security_findings_to_delete = [] + vulnerability_reads_to_update = [] + vulnerability_findings_to_update = [] + + affected_vulnerabilities.each do |vulnerability_id, affected_vulnerability| + latest_transition = state_transitions_lookup[vulnerability_id] + correct_finding_data = correct_findings_lookup[vulnerability_id] + + # binding.pry if latest_transition && latest_transition['to_state'] == 2 + + # if semgrep v6.7.1 has been executed, then we'll have a correct_finding + if correct_finding_data + collect_state_transition_records( + project_namespace_lookup, + correct_finding_data, + latest_transition, + current_time, + state_transitions_to_insert, + notes_to_insert, + system_note_metadata_to_insert, + vulnerabilities_to_update, + vulnerability_reads_to_update + ) + else + # semgrep v6.7.1 has not been executed yet, existing vulnerability records contain + # corrupted primary identifier values + collect_uuid_update_data( + project_namespace_lookup, + affected_vulnerability, + latest_transition, + identifiers_lookup, + current_time, + state_transitions_to_insert, + notes_to_insert, + system_note_metadata_to_insert, + vulnerabilities_to_update, + security_findings_to_update, + orphaned_security_findings_to_delete, + vulnerability_reads_to_update, + vulnerability_findings_to_update + ) + end + end + + bulk_insert_state_transitions(state_transitions_to_insert) + inserted_note_ids = bulk_insert_notes(notes_to_insert) + bulk_insert_system_note_metadata(system_note_metadata_to_insert, inserted_note_ids) + + bulk_update_vulnerabilities(vulnerabilities_to_update) + bulk_update_security_findings(security_findings_to_update) + bulk_delete_security_findings(orphaned_security_findings_to_delete) + bulk_update_vulnerability_reads(vulnerability_reads_to_update) + bulk_update_vulnerability_findings(vulnerability_findings_to_update) end end - def process_vulnerabilities_batch(candidate_vulnerability_ids) - # Load vulnerabilities with CWE or OWASP primary identifiers (affected by the bug) - # This join to vulnerability_occurrences is now safe because it's constrained by - # the small set of candidate_vulnerability_ids - vulnerabilities = Migratable::Vulnerability - .where(id: candidate_vulnerability_ids) - .joins(findings: :primary_identifier) - .where(vulnerability_identifiers: { external_type: %w[cwe owasp] }) - .includes( - findings: [:primary_identifier, :security_findings], - state_transitions: [], - vulnerability_read: {}, - project: {} - ) + # fetches the correct primary identifiers for the given affected_vulnerabilities which + # have an incorrect primary identifier. The correct primary identifier is determined + # by: + # + # 1. Parsing the raw_metadata from the vulnerability finding, then extracting the identifier + # from the raw_metadata where `type = semgrep_id` + # 2. Generating the fingerprint using the type and value from the identifier + # 3. Using the fingerprint to lookup the correct primary identifier from the database + def bulk_fetch_correct_primary_identifiers(affected_vulnerabilities, correct_findings_lookup) + fingerprint_to_vuln = Hash.new { |hash, key| hash[key] = [] } + vulnerability_lookup = {} - return if vulnerabilities.empty? + affected_vulnerabilities_by_project = affected_vulnerabilities + .values.group_by { |av| av['project_id'] } - log_info("Processing batch of #{vulnerabilities.length} vulnerabilities") + puts "Found #{affected_vulnerabilities_by_project.length} distinct projects with vulnerabilities " \ + "out of #{affected_vulnerabilities.length} total vulnerabilities" - restore_vulnerability_states_bulk(vulnerabilities) - end + affected_vulnerabilities_by_project.each do |project_id, affected_vulnerabilities_for_project| + fingerprint_to_vuln.clear - # rubocop:disable Metrics/AbcSize, Metrics/MethodLength -- Complex bulk processing logic required for performance - def restore_vulnerability_states_bulk(vulnerabilities) - num_vulnerabilities = vulnerabilities.length - return if num_vulnerabilities == 0 - - current_time = Time.current - - # Bulk data collections - state_transitions_to_insert = [] - notes_to_insert = [] - system_note_metadata_to_insert = [] - vulnerabilities_to_update = [] - security_findings_to_update = [] - orphaned_security_findings_to_delete = [] - vulnerability_reads_to_update = [] - vulnerability_findings_to_update = [] - - # Group vulnerabilities by project for efficient identifier lookups - vulns_by_project = vulnerabilities.group_by(&:project_id) - - # Batch-load correct findings (fixes N+1 #1) - correct_findings_lookup = batch_find_correct_findings(vulnerabilities) - - # Preload identifiers for UUID updates (fixes N+1 #2) - per project - identifiers_lookup = {} - vulns_by_project.each do |project_id, project_vulns| - identifiers_lookup.merge!( - preload_identifiers_for_vulns(project_id, project_vulns, correct_findings_lookup) - ) - end + affected_vulnerabilities_for_project.each do |affected_vulnerability| + vulnerability_id = affected_vulnerability['vulnerability_id'] - # rubocop:disable Metrics/BlockLength -- Processing each vulnerability requires complex logic - vulnerabilities.each_with_index do |vuln_with_incorrect_id, idx| - log_progress(idx + 1, num_vulnerabilities) - - latest_transition = vuln_with_incorrect_id.state_transitions - .select { |t| t.author_id.present? } - .max_by(&:created_at) - - correct_finding = correct_findings_lookup[vuln_with_incorrect_id.id] - - # if semgrep v6.7.1 has been executed, then we'll have a correct_finding - if correct_finding - collect_state_transition_records( - correct_finding.vulnerability, - latest_transition, - current_time, - state_transitions_to_insert, - notes_to_insert, - system_note_metadata_to_insert, - vulnerabilities_to_update, - vulnerability_reads_to_update - ) - else - # semgrep v6.7.1 has not been executed yet, existing vulnerability records contain - # corrupted primary identifier values - collect_uuid_update_data( - vuln_with_incorrect_id, - latest_transition, - identifiers_lookup, - current_time, - state_transitions_to_insert, - notes_to_insert, - system_note_metadata_to_insert, - vulnerabilities_to_update, - security_findings_to_update, - orphaned_security_findings_to_delete, - vulnerability_reads_to_update, - vulnerability_findings_to_update - ) + # we don't need to find the correct primary identifier if we already have a correct finding, + # since that contains the correct primary identifier + next if correct_findings_lookup[vulnerability_id] + + parsed_metadata = Gitlab::Json.parse(affected_vulnerability['raw_metadata']) + semgrep_identifier = parsed_metadata["identifiers"].find { |id| id['type'] == 'semgrep_id' } + + fingerprint = Migratable::Vulnerabilities::Identifier.sha1_fingerprint(semgrep_identifier).unpack1('H*') + + fingerprint_to_vuln[fingerprint] << vulnerability_id + end + + next if fingerprint_to_vuln.empty? + + fingerprints_sql = fingerprint_to_vuln.keys.map { |fp| "'\\x#{fp}'::bytea" }.join(', ') + + sql = <<~SQL + SELECT id, project_id, fingerprint, external_type, external_id + FROM vulnerability_identifiers + WHERE project_id = #{project_id} + AND fingerprint IN (#{fingerprints_sql}) + SQL + + results = SecApplicationRecord.connection.execute(sql) + identifiers = results.index_by { |row| row['fingerprint'] } + + # Build lookup with both raw_metadata and identifier + fingerprint_to_vuln.each do |(fingerprint, vulnerability_ids)| + vulnerability_ids.each do |vulnerability_id| + vulnerability_lookup[vulnerability_id] = identifiers["\\x#{fingerprint}"] + end end end - # rubocop:enable Metrics/BlockLength - - # Bulk insert and update operations - bulk_insert_state_transitions(state_transitions_to_insert) - inserted_note_ids = bulk_insert_notes(notes_to_insert) - bulk_insert_system_note_metadata(system_note_metadata_to_insert, inserted_note_ids) - - bulk_update_vulnerabilities(vulnerabilities_to_update) - bulk_update_security_findings(security_findings_to_update) - bulk_delete_security_findings(orphaned_security_findings_to_delete) - bulk_update_vulnerability_reads(vulnerability_reads_to_update) - bulk_update_vulnerability_findings(vulnerability_findings_to_update) + + vulnerability_lookup + end + + def bulk_lookup_project_namespaces(affected_vulnerabilities) + project_ids_sql = affected_vulnerabilities.values.pluck('project_id').uniq.join(', ') + + sql = <<-SQL + SELECT id as project_id, project_namespace_id + FROM projects + WHERE id IN (#{project_ids_sql}) + SQL + + results = ApplicationRecord.connection.execute(sql) + + results.each_with_object({}) do |row, hash| + hash[row['project_id'].to_i] = row['project_namespace_id'].to_i + end end - # rubocop:enable Metrics/AbcSize, Metrics/MethodLength - # Batch-loads correct findings for all vulnerabilities in a single query - # Uses a lateral join pattern to find matching findings efficiently - # CRITICAL: Constrains vulnerability_occurrences search using vulnerability_reads to avoid full table scan - # rubocop:disable Metrics/AbcSize, Metrics/MethodLength -- Complex SQL generation required for performance - def batch_find_correct_findings(vulnerabilities) - return {} if vulnerabilities.empty? + def bulk_lookup_state_transitions(affected_vulnerabilities) + vulnerability_ids_sql = affected_vulnerabilities.keys.join(', ') + + sql = <<-SQL + SELECT DISTINCT ON (vst.vulnerability_id) + vst.* + FROM vulnerability_state_transitions vst + WHERE vst.vulnerability_id IN (#{vulnerability_ids_sql}) + AND vst.author_id IS NOT NULL + ORDER BY vst.vulnerability_id, vst.created_at DESC; + SQL + results = SecApplicationRecord.connection.execute(sql) + + results.each_with_object({}) do |row, hash| + hash[row['vulnerability_id']] = row + end + end + + # determine if there exists a _new_ duplicate vulnerability finding which matches + # all the same data as the _old_ vulnerability finding stored in vulnerability_data. + # If this duplicate finding exists, it means that semgrep 6.7.1 has been executed, + # and the state of the new duplicate finding has been initialized to `detected`. We can + # use the state transitions from the old vulnerability finding to set the correct + # state for the new duplicate vulnerability finding. + def bulk_find_correct_findings(vulnerability_data) # Build structured data with named keys for clarity - search_params = vulnerabilities.map do |vuln| - f = vuln.finding + search_params = vulnerability_data.map do |_, data| { - vulnerability_id: vuln.id, - exclude_finding_id: f.id, - severity: Migratable::Enums::Vulnerability.severity_levels[f.severity], - report_type: Migratable::Enums::Vulnerability.report_types[f.report_type], - location_fingerprint: f.location_fingerprint, - name: f.name, - metadata_version: f.metadata_version, - project_id: f.project_id + vulnerability_id: data['vulnerability_id'], + exclude_finding_id: data['finding_id'], + severity: data['severity'], + report_type: data['report_type'], + location_fingerprint: data['location_fingerprint'], + scanner_id: data['scanner_id'], + name: data['name'], + metadata_version: data['metadata_version'], + project_id: data['project_id'] } end # Get unique project IDs to constrain the search - project_ids = search_params.pluck(:project_id).uniq + project_ids = search_params.pluck(:project_id).uniq.join(', ') # Pre-filter vulnerability_occurrences using vulnerability_reads to get a small candidate set # This avoids scanning the full table - candidate_occurrence_ids = Migratable::Vulnerabilities::Read - .where(project_id: project_ids, report_type: 0) - .joins( - 'INNER JOIN vulnerability_occurrences ' \ - 'ON vulnerability_occurrences.vulnerability_id = vulnerability_reads.vulnerability_id' - ) - .pluck('vulnerability_occurrences.id') + sql = <<-SQL + SELECT vo.id + FROM vulnerability_reads vr + INNER JOIN vulnerability_occurrences vo ON vo.vulnerability_id = vr.vulnerability_id + WHERE vr.project_id IN (#{project_ids}) + AND vr.report_type = 0 + SQL - return {} if candidate_occurrence_ids.empty? + conn = SecApplicationRecord.connection - log_info("Found #{candidate_occurrence_ids.length} candidate occurrence IDs " \ - "for #{vulnerabilities.length} vulnerabilities") + occurrences_for_project_ids = conn.execute(sql) - conn = ::SecApplicationRecord.connection + return {} if occurrences_for_project_ids.ntuples == 0 - values_sql = search_params.map do |p| - location_fingerprint_hex = "'\\x#{p[:location_fingerprint].unpack1('H*')}'::bytea" + candidate_occurrence_ids = occurrences_for_project_ids.to_a.map { |data| data['id'] } + values_sql = search_params.map do |p| "(#{p[:vulnerability_id]}, #{p[:exclude_finding_id]}, #{p[:severity]}, #{p[:report_type]}, " \ - "#{location_fingerprint_hex}, #{conn.quote(p[:name])}, " \ + "#{p[:scanner_id]}, '#{p[:location_fingerprint]}'::bytea, #{conn.quote(p[:name])}, " \ "#{conn.quote(p[:metadata_version])}, #{p[:project_id]})" end.join(', ') - # Process candidate IDs in batches of 1000 to avoid unbounded IN clauses + # Process vulnerability IDs in batches of 1000 to avoid unbounded IN clauses all_results = [] - candidate_occurrence_ids.each_slice(1000) do |candidate_ids_batch| - candidate_ids_sql = candidate_ids_batch.join(', ') + candidate_occurrence_ids.each_slice(1000) do |candidate_occurrence_ids_batch| + candidate_occurrence_ids_sql = candidate_occurrence_ids_batch.join(', ') sql = <<~SQL WITH search_params AS ( SELECT * FROM ( VALUES #{values_sql} - ) AS t(vulnerability_id, exclude_finding_id, severity, report_type, location_fingerprint, name, metadata_version, project_id) + ) AS t(vulnerability_id, exclude_finding_id, severity, report_type, scanner_id, location_fingerprint, name, metadata_version, project_id) ) SELECT DISTINCT ON (sp.vulnerability_id) - sp.vulnerability_id, - vo.id AS finding_id + sp.vulnerability_id as original_vulnerability_id, + vo.id AS finding_id, + vo.raw_metadata, + vr_original.state AS original_state, + vr_duplicate.state AS duplicate_state, + vr_duplicate.vulnerability_id AS duplicate_vulnerability_id, + vr_original.project_id FROM search_params sp JOIN vulnerability_occurrences vo ON - vo.id IN (#{candidate_ids_sql}) + vo.id IN (#{candidate_occurrence_ids_sql}) AND vo.severity = sp.severity AND vo.report_type = sp.report_type + AND vo.scanner_id = sp.scanner_id AND vo.location_fingerprint = sp.location_fingerprint AND vo.name = sp.name AND vo.metadata_version = sp.metadata_version AND vo.project_id = sp.project_id AND vo.id != sp.exclude_finding_id + JOIN vulnerability_reads vr_original ON vr_original.vulnerability_id = sp.vulnerability_id + JOIN vulnerability_reads vr_duplicate ON vr_duplicate.vulnerability_id = vo.vulnerability_id ORDER BY sp.vulnerability_id, vo.id SQL - batch_results = conn.exec_query(sql) + batch_results = conn.execute(sql) all_results.concat(batch_results.to_a) end - finding_ids = all_results.filter_map { |row| row['finding_id'] } - - return {} if finding_ids.empty? - - # Load findings with vulnerability and project associations (fixes N+1 #3 and #4) - findings_by_id = Migratable::Vulnerabilities::Finding - .where(id: finding_ids) - .includes(vulnerability: :project) - .index_by(&:id) - - # Build lookup by vulnerability ID - all_results.each_with_object({}) do |row, lookup| - lookup[row['vulnerability_id']] = findings_by_id[row['finding_id']] + all_results.each_with_object({}) do |row, hash| + hash[row['original_vulnerability_id'].to_i] = { + 'vulnerability_id' => row['duplicate_vulnerability_id'].to_i, + 'project_id' => row['project_id'].to_i, + 'finding_id' => row['finding_id'].to_i, + 'raw_metadata' => row['raw_metadata'], + 'original_state' => row['original_state'].to_i, + 'state' => row['duplicate_state'].to_i # the current state + } end end - # rubocop:enable Metrics/AbcSize, Metrics/MethodLength - - # Preload all identifiers needed for UUID updates - def preload_identifiers_for_vulns(project_id, vulnerabilities, correct_findings_lookup) - # Only compute fingerprints for vulns that don't have a correct finding - fingerprint_to_vuln = {} - vulnerabilities.each do |vuln| - next if correct_findings_lookup[vuln.id] - - finding = vuln.finding - metadata = reorder_metadata_with_correct_primary_id(finding.raw_metadata) - semgrep_identifier = metadata["identifiers"][0] - - fingerprint = Migratable::Vulnerabilities::Identifier.sha1_fingerprint(semgrep_identifier) - - fingerprint_to_vuln[fingerprint] ||= [] - fingerprint_to_vuln[fingerprint] << vuln.id - end - - return {} if fingerprint_to_vuln.empty? - - # Single query to load all needed identifiers - identifiers = Migratable::Vulnerabilities::Identifier - .where(project_id: project_id, fingerprint: fingerprint_to_vuln.keys) - .index_by(&:fingerprint) - - # Build lookup by vulnerability ID - fingerprint_to_vuln.each_with_object({}) do |(fingerprint, vulnerability_ids), lookup| - identifier = identifiers[fingerprint] - vulnerability_ids.each { |vid| lookup[vid] = identifier } + def fetch_affected_vulnerability_data(vulnerability_reads) + vulnerability_ids = vulnerability_reads.pluck(:vulnerability_id).join(', ') + sql = <<-SQL + SELECT + v.id as vulnerability_id, v.report_type, v.severity, v.project_id, v.state, + vo.id as finding_id, vo.location_fingerprint, vo.name, vo.metadata_version, vo.scanner_id, vo.raw_metadata, + sf.id as security_finding_id, sf.partition_number, sf.finding_data, + vi.external_type, vi.external_id + FROM vulnerabilities v + INNER JOIN vulnerability_occurrences vo ON vo.vulnerability_id = v.id + INNER JOIN vulnerability_identifiers vi ON vi.id = vo.primary_identifier_id + INNER JOIN vulnerability_scanners vs ON vs.external_id = 'semgrep' and vs.project_id = v.project_id + INNER JOIN security_findings sf ON sf.uuid = vo.uuid + WHERE v.id IN (#{vulnerability_ids}) + AND vi.external_type IN ('cwe', 'owasp') + SQL + + results = SecApplicationRecord.connection.execute(sql) + + results.each_with_object({}) do |row, hash| + hash[row['vulnerability_id'].to_i] = row end end @@ -431,16 +464,18 @@ def reorder_metadata_with_correct_primary_id(raw_metadata) end def reorder_finding_data_with_correct_primary_id(finding_data) - identifiers = finding_data["identifiers"] + parsed_finding_data = Gitlab::Json.parse(finding_data) + identifiers = parsed_finding_data["identifiers"] semgrep_identifier = identifiers.find { |id| id["external_type"] == "semgrep_id" } other_identifiers = identifiers.reject { |id| id["external_type"] == "semgrep_id" } other_identifiers.sort_by! { |a| a["external_id"] } - finding_data["identifiers"] = [semgrep_identifier] + other_identifiers - finding_data + parsed_finding_data["identifiers"] = [semgrep_identifier] + other_identifiers + parsed_finding_data end # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/ParameterLists -- Data collection for UUID updates def collect_uuid_update_data( + project_namespace_lookup, vuln_with_incorrect_id, latest_transition, identifiers_lookup, @@ -454,56 +489,56 @@ def collect_uuid_update_data( vulnerability_reads_to_update, vulnerability_findings_to_update ) - finding_with_incorrect_id = vuln_with_incorrect_id.finding - - metadata_with_correct_id = reorder_metadata_with_correct_primary_id(finding_with_incorrect_id.raw_metadata) + metadata_with_correct_id = reorder_metadata_with_correct_primary_id(vuln_with_incorrect_id['raw_metadata']) - security_finding = finding_with_incorrect_id.security_findings.first finding_data_with_correct_id = - (reorder_finding_data_with_correct_primary_id(security_finding.finding_data) if security_finding) + reorder_finding_data_with_correct_primary_id(vuln_with_incorrect_id['finding_data']) - correct_identifier = identifiers_lookup[vuln_with_incorrect_id.id] + correct_identifier = identifiers_lookup[vuln_with_incorrect_id['vulnerability_id']] unless correct_identifier - log_warning("Missing correct identifier for vulnerability #{vuln_with_incorrect_id.id}") + log_warning("Missing correct identifier for vulnerability #{vuln_with_incorrect_id['vulnerability_id']}") return end correct_uuid = Gitlab::UUID.v5( [ - Migratable::Enums::Vulnerability.report_types.key(vuln_with_incorrect_id.report_type), - correct_identifier.fingerprint.unpack1('H*'), - finding_with_incorrect_id.location_fingerprint.unpack1('H*'), - vuln_with_incorrect_id.project_id + Migratable::Enums::Vulnerability.report_types.key(vuln_with_incorrect_id['report_type']), + correct_identifier['fingerprint'].gsub("\\x", ""), + vuln_with_incorrect_id['location_fingerprint'].gsub("\\x", ""), + vuln_with_incorrect_id['project_id'] ].join("-") ) - finding_with_incorrect_id.security_findings.each do |sf| - orphaned_security_findings_to_delete << sf.id + # TODO: we're looping through multiple security findings here, + # but the finding_data_with_correct_id is from only the first security_finding + # finding_with_incorrect_id.security_findings.each do |sf| + # TODO: looks like we're both deleting the record with sf.id + # _and_ updating it? Do one or the other + orphaned_security_findings_to_delete << vuln_with_incorrect_id['security_finding_id'] security_findings_to_update << { - id: sf.id, - partition_number: sf.partition_number, + id: vuln_with_incorrect_id['security_finding_id'], + partition_number: vuln_with_incorrect_id['partition_number'], uuid: correct_uuid, finding_data: finding_data_with_correct_id } - end + # end - if vuln_with_incorrect_id.vulnerability_read - vulnerability_reads_to_update << { - vulnerability_id: vuln_with_incorrect_id.id, - uuid: correct_uuid, - dismissal_reason: nil - } - end + vulnerability_reads_to_update << { + vulnerability_id: vuln_with_incorrect_id['vulnerability_id'], + uuid: correct_uuid, + dismissal_reason: nil + } vulnerability_findings_to_update << { - id: finding_with_incorrect_id.id, - primary_identifier_id: correct_identifier.id, + id: vuln_with_incorrect_id['finding_id'], + primary_identifier_id: correct_identifier['id'], raw_metadata: metadata_with_correct_id.to_json, uuid: correct_uuid } collect_state_transition_records( + project_namespace_lookup, vuln_with_incorrect_id, latest_transition, current_time, @@ -702,6 +737,7 @@ def bulk_update_vulnerability_findings(updates) end def collect_state_transition_records( + project_namespace_lookup, vulnerability, latest_transition, current_time, @@ -712,15 +748,15 @@ def collect_state_transition_records( vulnerability_reads_to_update ) return unless latest_transition - return if vulnerability.state == latest_transition.to_state + return if vulnerability['state'] == latest_transition['to_state'] state_transitions_to_insert << build_state_transition(vulnerability, latest_transition, current_time) - note_data = build_note_data(vulnerability, latest_transition, current_time) + note_data = build_note_data(project_namespace_lookup, vulnerability, latest_transition, current_time) notes_to_insert << note_data system_note_metadata_to_insert << build_system_note_metadata( - vulnerability, latest_transition, current_time, note_data) + project_namespace_lookup, vulnerability, latest_transition, current_time, note_data) vulnerabilities_to_update << build_vulnerability_update( vulnerability, latest_transition, current_time) @@ -732,46 +768,59 @@ def collect_state_transition_records( def build_state_transition(vulnerability, latest_transition, current_time) { - vulnerability_id: vulnerability.id, - dismissal_reason: latest_transition.dismissal_reason, - author_id: latest_transition.author_id, - from_state: Migratable::Enums::Vulnerability.vulnerability_states[vulnerability.state], - to_state: Migratable::Enums::Vulnerability.vulnerability_states[latest_transition.to_state], + vulnerability_id: vulnerability['vulnerability_id'], + dismissal_reason: latest_transition['dismissal_reason'], + author_id: latest_transition['author_id'], + from_state: vulnerability['state'], + to_state: latest_transition['to_state'], + project_id: latest_transition['project_id'], created_at: current_time, updated_at: current_time, comment: format(TRANSITION_COMMENT_TEMPLATE, { - original_comment: latest_transition.comment, - transition_id: latest_transition.id + original_comment: latest_transition['comment'], + transition_id: latest_transition['id'] }) } end - def build_note_data(vulnerability, latest_transition, current_time) + def build_note_data(project_namespace_lookup, vulnerability, latest_transition, current_time) + from_state_string = Migratable::Enums::Vulnerability.vulnerability_states.key(vulnerability['state']) + to_state_string = Migratable::Enums::Vulnerability.vulnerability_states.key(latest_transition['to_state']) + note_text = format(SYSTEM_NOTE_TEMPLATE, { - original_comment: latest_transition.comment, - from_state: vulnerability.state.titleize, - to_state: latest_transition.to_state.titleize, - transition_id: latest_transition.id + original_comment: latest_transition['comment'], + from_state: from_state_string.titleize, + to_state: to_state_string.titleize, + transition_id: latest_transition['id'] }) { note: note_text, noteable_type: 'Vulnerability', - author_id: latest_transition.author_id, + author_id: latest_transition['author_id'], created_at: current_time, updated_at: current_time, - project_id: vulnerability.project_id, - noteable_id: vulnerability.id, + project_id: vulnerability['project_id'], + noteable_id: vulnerability['vulnerability_id'], system: true, - discussion_id: discussion_id(vulnerability.id), - namespace_id: vulnerability.project.project_namespace_id + discussion_id: discussion_id(vulnerability['vulnerability_id']), + namespace_id: project_namespace_lookup[vulnerability['project_id']] } end - def build_system_note_metadata(vulnerability, latest_transition, current_time, note_data) + def build_system_note_metadata( + project_namespace_lookup, + vulnerability, + latest_transition, + current_time, + note_data + ) + # binding.pry + to_state_string = Migratable::Enums::Vulnerability.vulnerability_states.key(latest_transition['to_state']) + { - namespace_id: vulnerability.project.project_namespace_id, - action: "vulnerability_#{latest_transition.to_state}", + namespace_id: project_namespace_lookup[vulnerability['project_id']], + action: "vulnerability_#{to_state_string}", created_at: current_time, updated_at: current_time, note_data: note_data @@ -780,8 +829,8 @@ def build_system_note_metadata(vulnerability, latest_transition, current_time, n def build_vulnerability_update(vulnerability, latest_transition, current_time) vuln_update = { - id: vulnerability.id, - state: Migratable::Enums::Vulnerability.vulnerability_states[latest_transition.to_state] + id: vulnerability['vulnerability_id'], + state: latest_transition['to_state'] } add_state_specific_attributes(vuln_update, latest_transition, current_time) @@ -790,24 +839,26 @@ def build_vulnerability_update(vulnerability, latest_transition, current_time) end def add_state_specific_attributes(vuln_update, latest_transition, current_time) - case latest_transition.to_state + author_id = latest_transition['author_id'] + + case latest_transition['to_state'] when "confirmed" - vuln_update[:confirmed_by_id] = latest_transition.author_id + vuln_update[:confirmed_by_id] = author_id vuln_update[:confirmed_at] = current_time when "resolved" - vuln_update[:resolved_by_id] = latest_transition.author_id + vuln_update[:resolved_by_id] = author_id vuln_update[:resolved_at] = current_time when "dismissed" - vuln_update[:dismissed_by_id] = latest_transition.author_id + vuln_update[:dismissed_by_id] = author_id vuln_update[:dismissed_at] = current_time end end def build_vulnerability_read_update(vulnerability, latest_transition) { - vulnerability_id: vulnerability.id, + vulnerability_id: vulnerability['vulnerability_id'], uuid: nil, - dismissal_reason: latest_transition.dismissal_reason + dismissal_reason: latest_transition['dismissal_reason'] } end diff --git a/spec/lib/gitlab/background_migration/restore_incorrect_vulnerability_states_spec.rb b/spec/lib/gitlab/background_migration/restore_incorrect_vulnerability_states_spec.rb index 4e6f57dc83916f..4bab2d44e76a2d 100644 --- a/spec/lib/gitlab/background_migration/restore_incorrect_vulnerability_states_spec.rb +++ b/spec/lib/gitlab/background_migration/restore_incorrect_vulnerability_states_spec.rb @@ -642,6 +642,7 @@ def perform_migration expect { perform_migration }.to change { vulnerability_state_transitions_table .where(vulnerability_id: vulnerability_ids) + .where(project_id: project.id) .where("comment LIKE ?", "%original comment automatically copied from transition%") .where(from_state: detected_state_int, to_state: resolved_state_int) .count @@ -765,6 +766,7 @@ def perform_migration comment: 'dismissing', dismissal_reason: acceptable_risk_dismissal_int) corrupted_vulnerabilities.concat(corrupt_vulnerabilities) + duplicated_vulnerabilities.concat(create_vulnerabilities( 'gl-sast-report-semgrep-6.7.1-multiple-vulnerabilities-correct-primary-identifier.json' )) @@ -941,7 +943,8 @@ def perform_migration it "creates new system note metadata for #{params[:to_state]} vulnerabilities" do expect { perform_migration }.to change { system_note_metadata_table.where(action: "vulnerability_#{to_state}").count - }.by(target_vulnerabilities.count) + }.by(target_vulnerabilities.count), + "Expected to create #{target_vulnerabilities.count} system note metadata records for state '#{to_state}'" notes = notes_table.where( "note LIKE ?", "%changed vulnerability status from Detected " \ -- GitLab From 7b1d0ea801acd382fbe9d423a8e37908ceb30475 Mon Sep 17 00:00:00 2001 From: Adam Cohen Date: Sun, 14 Dec 2025 11:52:01 +1100 Subject: [PATCH 15/16] Replace activerecord update calls with raw sql Remove double query from bulk_fetch_duplicate_data --- .../restore_incorrect_vulnerability_states.rb | 282 +++++++----------- 1 file changed, 105 insertions(+), 177 deletions(-) diff --git a/lib/gitlab/background_migration/restore_incorrect_vulnerability_states.rb b/lib/gitlab/background_migration/restore_incorrect_vulnerability_states.rb index e53c4a8ee044ba..37fcdb21e5ed04 100644 --- a/lib/gitlab/background_migration/restore_incorrect_vulnerability_states.rb +++ b/lib/gitlab/background_migration/restore_incorrect_vulnerability_states.rb @@ -19,17 +19,6 @@ class RestoreIncorrectVulnerabilityStates < BatchedMigrationJob # This migration fixes corrupted vulnerability data introduced by semgrep v6.7.0, # released on 16 September 2025 at 20:30:02 UTC module Migratable - class Note < ApplicationRecord - has_one :system_note_metadata - end - - class SystemNoteMetadata < ApplicationRecord - belongs_to :note - end - - class Project < ApplicationRecord - end - module Enums module Security def self.scan_types = { sast: 1 } @@ -60,98 +49,19 @@ def self.severity_levels = SEVERITY_LEVELS end end - module Security - class Finding < SecApplicationRecord - self.table_name = 'security_findings' - - belongs_to :vulnerability_finding, - class_name: 'Migratable::Vulnerabilities::Finding', - primary_key: :uuid, - foreign_key: :uuid, - inverse_of: :security_findings - end - end - module Vulnerabilities - class IssueLink < SecApplicationRecord - self.table_name = 'vulnerability_issue_links' - - belongs_to :vulnerability - belongs_to :finding, foreign_key: :vulnerability_occurrence_id, - class_name: 'Migratable::Vulnerabilities::Finding', optional: true, inverse_of: false - belongs_to :issue - - enum :link_type, { related: 1, created: 2 } - end - - class StateTransition < SecApplicationRecord - self.table_name = 'vulnerability_state_transitions' - - enum :from_state, Migratable::Enums::Vulnerability.vulnerability_states, prefix: true - enum :to_state, Migratable::Enums::Vulnerability.vulnerability_states, prefix: true - - belongs_to :vulnerability, class_name: 'Migratable::Vulnerability', inverse_of: :state_transitions - belongs_to :vulnerability_occurrence, optional: true, class_name: 'Migratable::Vulnerabilities::Finding' - end - - class Finding < SecApplicationRecord - self.table_name = 'vulnerability_occurrences' - - enum :report_type, Migratable::Enums::Vulnerability.report_types - enum :severity, Migratable::Enums::Vulnerability.severity_levels, prefix: :severity - - belongs_to :primary_identifier, class_name: 'Migratable::Vulnerabilities::Identifier' - belongs_to :scanner, class_name: 'Migratable::Vulnerabilities::Scanner' - belongs_to :vulnerability, class_name: 'Migratable::Vulnerability', inverse_of: :findings - - has_many :security_findings, - class_name: 'Migratable::Security::Finding', - primary_key: :uuid, - foreign_key: :uuid, - inverse_of: :vulnerability_finding - end - class Identifier < SecApplicationRecord - self.table_name = 'vulnerability_identifiers' - def self.sha1_fingerprint(identifier) fingerprint_string = "#{identifier['type']}:#{identifier['value']}" [Digest::SHA1.hexdigest(fingerprint_string)].pack('H*') # rubocop:disable Fips/SHA1 -- we must use SHA1, since this is how the fingerprint is stored in the DB end end - - class Scanner < SecApplicationRecord - self.table_name = 'vulnerability_scanners' - end - - class Read < ::SecApplicationRecord - self.table_name = "vulnerability_reads" - self.primary_key = :vulnerability_id - - belongs_to :vulnerability, inverse_of: :vulnerability_read - end - end - - class Vulnerability < SecApplicationRecord - has_many :notes - has_many :findings, class_name: 'Migratable::Vulnerabilities::Finding', inverse_of: :vulnerability - has_many :state_transitions, class_name: 'Migratable::Vulnerabilities::StateTransition', - inverse_of: :vulnerability - has_one :vulnerability_read, class_name: 'Migratable::Vulnerabilities::Read', inverse_of: :vulnerability - - belongs_to :project - - enum :state, Migratable::Enums::Vulnerability.vulnerability_states - enum :severity, Migratable::Enums::Vulnerability.severity_levels, prefix: :severity - - def finding - @finding ||= findings.first - end end end def perform each_sub_batch do |vulnerability_reads_batch| + # find all vulnerabilities that are affected by the bug affected_vulnerabilities = fetch_affected_vulnerability_data(vulnerability_reads_batch) if affected_vulnerabilities.empty? @@ -159,14 +69,14 @@ def perform next end - correct_findings_lookup = bulk_find_correct_findings(affected_vulnerabilities) + duplicate_data_lookup = bulk_fetch_duplicate_data(affected_vulnerabilities) # TODO: put state_transitions_lookup and project_namespace_lookup into affected_vulnerabilities # each affected_vulnerability should contain the latest state and project_namespace_id state_transitions_lookup = bulk_lookup_state_transitions(affected_vulnerabilities) project_namespace_lookup = bulk_lookup_project_namespaces(affected_vulnerabilities) identifiers_lookup = bulk_fetch_correct_primary_identifiers( - affected_vulnerabilities, correct_findings_lookup + affected_vulnerabilities, duplicate_data_lookup ) current_time = Time.current @@ -183,15 +93,15 @@ def perform affected_vulnerabilities.each do |vulnerability_id, affected_vulnerability| latest_transition = state_transitions_lookup[vulnerability_id] - correct_finding_data = correct_findings_lookup[vulnerability_id] + duplicate_data = duplicate_data_lookup[vulnerability_id] # binding.pry if latest_transition && latest_transition['to_state'] == 2 # if semgrep v6.7.1 has been executed, then we'll have a correct_finding - if correct_finding_data + if duplicate_data collect_state_transition_records( project_namespace_lookup, - correct_finding_data, + duplicate_data, latest_transition, current_time, state_transitions_to_insert, @@ -248,8 +158,8 @@ def bulk_fetch_correct_primary_identifiers(affected_vulnerabilities, correct_fin affected_vulnerabilities_by_project = affected_vulnerabilities .values.group_by { |av| av['project_id'] } - puts "Found #{affected_vulnerabilities_by_project.length} distinct projects with vulnerabilities " \ - "out of #{affected_vulnerabilities.length} total vulnerabilities" + log_info("Found #{affected_vulnerabilities_by_project.length} distinct projects with vulnerabilities " \ + "out of #{affected_vulnerabilities.length} total vulnerabilities") affected_vulnerabilities_by_project.each do |project_id, affected_vulnerabilities_for_project| fingerprint_to_vuln.clear @@ -335,11 +245,10 @@ def bulk_lookup_state_transitions(affected_vulnerabilities) # and the state of the new duplicate finding has been initialized to `detected`. We can # use the state transitions from the old vulnerability finding to set the correct # state for the new duplicate vulnerability finding. - def bulk_find_correct_findings(vulnerability_data) - # Build structured data with named keys for clarity - search_params = vulnerability_data.map do |_, data| + def bulk_fetch_duplicate_data(affected_vulnerabilities) + search_params = affected_vulnerabilities.map do |vulnerability_id, data| { - vulnerability_id: data['vulnerability_id'], + vulnerability_id: vulnerability_id, exclude_finding_id: data['finding_id'], severity: data['severity'], report_type: data['report_type'], @@ -351,80 +260,59 @@ def bulk_find_correct_findings(vulnerability_data) } end - # Get unique project IDs to constrain the search - project_ids = search_params.pluck(:project_id).uniq.join(', ') - - # Pre-filter vulnerability_occurrences using vulnerability_reads to get a small candidate set - # This avoids scanning the full table - sql = <<-SQL - SELECT vo.id - FROM vulnerability_reads vr - INNER JOIN vulnerability_occurrences vo ON vo.vulnerability_id = vr.vulnerability_id - WHERE vr.project_id IN (#{project_ids}) - AND vr.report_type = 0 - SQL + return {} if search_params.empty? conn = SecApplicationRecord.connection - occurrences_for_project_ids = conn.execute(sql) - - return {} if occurrences_for_project_ids.ntuples == 0 - - candidate_occurrence_ids = occurrences_for_project_ids.to_a.map { |data| data['id'] } - values_sql = search_params.map do |p| "(#{p[:vulnerability_id]}, #{p[:exclude_finding_id]}, #{p[:severity]}, #{p[:report_type]}, " \ "#{p[:scanner_id]}, '#{p[:location_fingerprint]}'::bytea, #{conn.quote(p[:name])}, " \ "#{conn.quote(p[:metadata_version])}, #{p[:project_id]})" end.join(', ') - # Process vulnerability IDs in batches of 1000 to avoid unbounded IN clauses - all_results = [] - candidate_occurrence_ids.each_slice(1000) do |candidate_occurrence_ids_batch| - candidate_occurrence_ids_sql = candidate_occurrence_ids_batch.join(', ') - - sql = <<~SQL - WITH search_params AS ( - SELECT * FROM ( - VALUES #{values_sql} - ) AS t(vulnerability_id, exclude_finding_id, severity, report_type, scanner_id, location_fingerprint, name, metadata_version, project_id) - ) - SELECT DISTINCT ON (sp.vulnerability_id) - sp.vulnerability_id as original_vulnerability_id, - vo.id AS finding_id, - vo.raw_metadata, - vr_original.state AS original_state, - vr_duplicate.state AS duplicate_state, - vr_duplicate.vulnerability_id AS duplicate_vulnerability_id, - vr_original.project_id - FROM search_params sp - JOIN vulnerability_occurrences vo ON - vo.id IN (#{candidate_occurrence_ids_sql}) - AND vo.severity = sp.severity - AND vo.report_type = sp.report_type - AND vo.scanner_id = sp.scanner_id - AND vo.location_fingerprint = sp.location_fingerprint - AND vo.name = sp.name - AND vo.metadata_version = sp.metadata_version - AND vo.project_id = sp.project_id - AND vo.id != sp.exclude_finding_id - JOIN vulnerability_reads vr_original ON vr_original.vulnerability_id = sp.vulnerability_id - JOIN vulnerability_reads vr_duplicate ON vr_duplicate.vulnerability_id = vo.vulnerability_id - ORDER BY sp.vulnerability_id, vo.id - SQL + sql = <<~SQL + WITH search_params AS ( + SELECT * FROM ( + VALUES #{values_sql} + ) AS t(vulnerability_id, exclude_finding_id, severity, report_type, scanner_id, location_fingerprint, name, metadata_version, project_id) + ) + SELECT DISTINCT ON (sp.vulnerability_id) + sp.vulnerability_id as original_vulnerability_id, + vo.id AS finding_id, + vo.raw_metadata, + vr_original.state AS original_state, + vr_duplicate.state AS duplicate_state, + vr_duplicate.vulnerability_id AS duplicate_vulnerability_id, + vr_original.project_id + FROM search_params sp + JOIN vulnerability_reads vr_candidate ON + vr_candidate.project_id = sp.project_id + AND vr_candidate.report_type = 0 + JOIN vulnerability_occurrences vo ON + vo.vulnerability_id = vr_candidate.vulnerability_id + AND vo.severity = sp.severity + AND vo.report_type = sp.report_type + AND vo.scanner_id = sp.scanner_id + AND vo.location_fingerprint = sp.location_fingerprint + AND vo.name = sp.name + AND vo.metadata_version = sp.metadata_version + AND vo.project_id = sp.project_id + AND vo.id != sp.exclude_finding_id + JOIN vulnerability_reads vr_original ON vr_original.vulnerability_id = sp.vulnerability_id + JOIN vulnerability_reads vr_duplicate ON vr_duplicate.vulnerability_id = vo.vulnerability_id + ORDER BY sp.vulnerability_id, vo.id + SQL - batch_results = conn.execute(sql) - all_results.concat(batch_results.to_a) - end + results = conn.execute(sql) - all_results.each_with_object({}) do |row, hash| + results.each_with_object({}) do |row, hash| hash[row['original_vulnerability_id'].to_i] = { 'vulnerability_id' => row['duplicate_vulnerability_id'].to_i, 'project_id' => row['project_id'].to_i, 'finding_id' => row['finding_id'].to_i, 'raw_metadata' => row['raw_metadata'], 'original_state' => row['original_state'].to_i, - 'state' => row['duplicate_state'].to_i # the current state + 'state' => row['duplicate_state'].to_i } end end @@ -555,18 +443,55 @@ def bulk_insert_state_transitions(data) return if data.empty? log_info("Bulk inserting #{data.length} state transitions") - data.each_slice(SUB_BATCH_SIZE) do |batch| - Migratable::Vulnerabilities::StateTransition.insert_all(batch) - end + + conn = SecApplicationRecord.connection + + values_sql = data.map do |record| + dismissal_reason = record[:dismissal_reason] ? record[:dismissal_reason].to_s : 'NULL::smallint' + comment = record[:comment] ? conn.quote(record[:comment]) : 'NULL::text' + + "(#{record[:vulnerability_id]}, #{record[:from_state]}, #{record[:to_state]}, " \ + "#{record[:author_id]}, #{dismissal_reason}, #{record[:project_id]}, " \ + "#{conn.quote(record[:created_at])}::timestamptz, #{conn.quote(record[:updated_at])}::timestamptz, " \ + "#{comment})" + end.join(', ') + + sql = <<~SQL + INSERT INTO vulnerability_state_transitions + (vulnerability_id, from_state, to_state, author_id, dismissal_reason, project_id, created_at, updated_at, comment) + VALUES #{values_sql} + SQL + + conn.execute(sql) end def bulk_insert_notes(data) return {} if data.empty? log_info("Bulk inserting #{data.length} notes") - data.each_slice(SUB_BATCH_SIZE).each_with_object({}) do |batch, mapping| - result = Migratable::Note.insert_all(batch, returning: [:id, :discussion_id]) - result.rows.each { |row| mapping[row[1]] = row[0] } + + conn = ApplicationRecord.connection + + values_sql = data.map do |record| + "(#{conn.quote(record[:note])}, #{conn.quote(record[:noteable_type])}, " \ + "#{record[:author_id]}, #{conn.quote(record[:created_at])}::timestamptz, " \ + "#{conn.quote(record[:updated_at])}::timestamptz, #{record[:project_id]}, " \ + "#{record[:noteable_id]}, #{record[:system]}, #{conn.quote(record[:discussion_id])}, " \ + "#{record[:namespace_id]})" + end.join(', ') + + sql = <<~SQL + INSERT INTO notes + (note, noteable_type, author_id, created_at, updated_at, project_id, + noteable_id, system, discussion_id, namespace_id) + VALUES #{values_sql} + RETURNING id, discussion_id + SQL + + result = conn.execute(sql) + + result.each_with_object({}) do |row, mapping| + mapping[row['discussion_id']] = row['id'].to_i end end @@ -575,19 +500,22 @@ def bulk_insert_system_note_metadata(data, inserted_note_ids) log_info("Bulk inserting #{data.length} system note metadata records") - metadata_records = data.map do |record| - { - note_id: inserted_note_ids[record[:note_data][:discussion_id]], - namespace_id: record[:namespace_id], - action: record[:action], - created_at: record[:created_at], - updated_at: record[:updated_at] - } - end + conn = ApplicationRecord.connection - metadata_records.each_slice(SUB_BATCH_SIZE) do |batch| - Migratable::SystemNoteMetadata.insert_all(batch) - end + values_sql = data.map do |record| + note_id = inserted_note_ids[record[:note_data][:discussion_id]] + + "(#{note_id}, #{record[:namespace_id]}, #{conn.quote(record[:action])}, " \ + "#{conn.quote(record[:created_at])}::timestamptz, #{conn.quote(record[:updated_at])}::timestamptz)" + end.join(', ') + + sql = <<~SQL + INSERT INTO system_note_metadata + (note_id, namespace_id, action, created_at, updated_at) + VALUES #{values_sql} + SQL + + conn.execute(sql) end # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/BlockLength -- UPDATE FROM VALUES requires building multiple column sets -- GitLab From 96eb7bfb5841c5988084cc4d4a801de5171866e1 Mon Sep 17 00:00:00 2001 From: Adam Cohen Date: Mon, 15 Dec 2025 21:46:56 +1100 Subject: [PATCH 16/16] Combine all affected data in single structure Refactoring to fix rubocop violations Remove all rubocop violations Remove SecApplicationRecord from identifiers class Do not delete orphaned security findings Allow migrating with project/namespace_id Remove QueueRestoreIncorrectVulnerabilityStates Remove queued_migration_version Fix typo and test --- ...restore_incorrect_vulnerability_states.yml | 6 +- ..._restore_incorrect_vulnerability_states.rb | 39 -- db/schema_migrations/20251020182838 | 1 - ...estore_incorrect_vulnerability_states.rake | 27 + .../restore_incorrect_vulnerability_states.rb | 102 +++ ...rabilities-correct-primary-identifier.json | 2 +- ...ncorrect_vulnerability_states_rake_spec.rb | 85 +++ ...ore_incorrect_vulnerability_states_spec.rb | 153 +++++ .../restore_incorrect_vulnerability_states.rb | 585 ++++++++++-------- ...ore_incorrect_vulnerability_states_spec.rb | 66 +- ...ore_incorrect_vulnerability_states_spec.rb | 27 - 11 files changed, 746 insertions(+), 347 deletions(-) delete mode 100644 db/post_migrate/20251020182838_queue_restore_incorrect_vulnerability_states.rb delete mode 100644 db/schema_migrations/20251020182838 create mode 100644 ee/lib/tasks/gitlab/vulnerabilities/restore_incorrect_vulnerability_states.rake create mode 100644 ee/lib/vulnerabilities/rake/restore_incorrect_vulnerability_states.rb create mode 100644 ee/spec/lib/tasks/vulnerabilities/restore_incorrect_vulnerability_states_rake_spec.rb create mode 100644 ee/spec/lib/vulnerabilities/rake/restore_incorrect_vulnerability_states_spec.rb delete mode 100644 spec/migrations/20251020182838_queue_restore_incorrect_vulnerability_states_spec.rb diff --git a/db/docs/batched_background_migrations/restore_incorrect_vulnerability_states.yml b/db/docs/batched_background_migrations/restore_incorrect_vulnerability_states.yml index e52246df146ff4..1824c8738d8da1 100644 --- a/db/docs/batched_background_migrations/restore_incorrect_vulnerability_states.yml +++ b/db/docs/batched_background_migrations/restore_incorrect_vulnerability_states.yml @@ -3,6 +3,6 @@ migration_job_name: RestoreIncorrectVulnerabilityStates description: Restores incorrect vulnerability states caused by a bug in GitLab Semgrep v6.7.0 feature_category: static_application_security_testing introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/211669 -milestone: '18.7' -queued_migration_version: 20251020182838 -finalized_by: # version of the migration that finalized this BBM +milestone: '18.8' +queued_migration_version: +finalized_by: 00000000000000 # This migration was enqueued via rake task and can't be finalized. diff --git a/db/post_migrate/20251020182838_queue_restore_incorrect_vulnerability_states.rb b/db/post_migrate/20251020182838_queue_restore_incorrect_vulnerability_states.rb deleted file mode 100644 index 002f76b814782f..00000000000000 --- a/db/post_migrate/20251020182838_queue_restore_incorrect_vulnerability_states.rb +++ /dev/null @@ -1,39 +0,0 @@ -# frozen_string_literal: true - -# See https://docs.gitlab.com/ee/development/database/batched_background_migrations.html -# for more information on when/how to queue batched background migrations - -# Update below commented lines with appropriate values. - -class QueueRestoreIncorrectVulnerabilityStates < Gitlab::Database::Migration[2.3] - milestone '18.7' - - # Select the applicable gitlab schema for your batched background migration - restrict_gitlab_migration gitlab_schema: :gitlab_sec - - # restrict_gitlab_migration # gitlab_schema: :gitlab_main_org / :gitlab_ci / ... - - MIGRATION = "RestoreIncorrectVulnerabilityStates" - DELAY_INTERVAL = 2.minutes - BATCH_SIZE = 1000 - SUB_BATCH_SIZE = 100 - - def up - # If you are requeueing an already executed migration, you need to delete the prior batched migration record - # for the new enqueue to be executed, else, you can delete this line. - # delete_batched_background_migration(MIGRATION, :vulnerability_reads, :vulnerability_id, []) - - # Use vulnerability_reads (denormalized table) to avoid scanning the 200M row vulnerability_occurrences table - queue_batched_background_migration( - MIGRATION, - :vulnerability_reads, - :vulnerability_id, - batch_size: BATCH_SIZE, - sub_batch_size: SUB_BATCH_SIZE - ) - end - - def down - delete_batched_background_migration(MIGRATION, :vulnerability_reads, :vulnerability_id, []) - end -end diff --git a/db/schema_migrations/20251020182838 b/db/schema_migrations/20251020182838 deleted file mode 100644 index 6129d2c00d6cfe..00000000000000 --- a/db/schema_migrations/20251020182838 +++ /dev/null @@ -1 +0,0 @@ -f3f0d068a7817c20f208f77d6851259f8551cea91532a12bbc623c53277e5df2 \ No newline at end of file diff --git a/ee/lib/tasks/gitlab/vulnerabilities/restore_incorrect_vulnerability_states.rake b/ee/lib/tasks/gitlab/vulnerabilities/restore_incorrect_vulnerability_states.rake new file mode 100644 index 00000000000000..da98cfaaab203f --- /dev/null +++ b/ee/lib/tasks/gitlab/vulnerabilities/restore_incorrect_vulnerability_states.rake @@ -0,0 +1,27 @@ +# frozen_string_literal: true + +namespace :gitlab do + namespace :vulnerabilities do + desc 'Restore vulnerability states affected by https://gitlab.com/gitlab-org/gitlab/-/issues/577229 for a namespace' + task :restore_incorrect_vulnerability_states_for_namespace, [:namespace_id] => :environment do |_, args| + Vulnerabilities::Rake::RestoreIncorrectVulnerabilityStates.new(namespace_id: args[:namespace_id]).execute + end + + desc 'Restore vulnerability states affected by https://gitlab.com/gitlab-org/gitlab/-/issues/577229 for a project' + task :restore_incorrect_vulnerability_states_for_project, [:project_id] => :environment do |_, args| + Vulnerabilities::Rake::RestoreIncorrectVulnerabilityStates.new(project_id: args[:project_id]).execute + end + + task 'restore_incorrect_vulnerability_states_for_namespace:revert', [:namespace_id] => :environment do |_, args| + Vulnerabilities::Rake::RestoreIncorrectVulnerabilityStates.new( + namespace_id: args[:namespace_id], revert: true + ).execute + end + + task 'restore_incorrect_vulnerability_states_for_project:revert', [:project_id] => :environment do |_, args| + Vulnerabilities::Rake::RestoreIncorrectVulnerabilityStates.new( + project_id: args[:project_id], revert: true + ).execute + end + end +end diff --git a/ee/lib/vulnerabilities/rake/restore_incorrect_vulnerability_states.rb b/ee/lib/vulnerabilities/rake/restore_incorrect_vulnerability_states.rb new file mode 100644 index 00000000000000..2976e5aca80fee --- /dev/null +++ b/ee/lib/vulnerabilities/rake/restore_incorrect_vulnerability_states.rb @@ -0,0 +1,102 @@ +# frozen_string_literal: true + +module Vulnerabilities + module Rake + class RestoreIncorrectVulnerabilityStates + include Gitlab::Database::Migrations::BatchedBackgroundMigrationHelpers + + MIGRATION = 'RestoreIncorrectVulnerabilityStates' + INSTANCE_ARG = 'instance' + + def initialize(namespace_id: nil, project_id: nil, revert: false) + @namespace_id = namespace_id + @project_id = project_id + @revert = revert + end + + attr_reader :namespace_id, :project_id, :revert + + def execute + validate_args! + + Gitlab::Database::SharedModel.using_connection(connection) do + if revert + delete_migration + else + queue_migration + end + end + end + + def allowed_gitlab_schemas + [:gitlab_sec] + end + + private + + def validate_args! + abort "One of namespace_id or project_id must be provided" if namespace_id.nil? && project_id.nil? + + validate_namespace! if namespace_id.present? + validate_project! if project_id.present? + end + + def validate_project! + abort "Error: Expected project_id '#{project_id}' to be a number." unless /\d+/.match?(project_id) + + project = Project.find_by_id(project_id) + abort "Project:#{project_id} not found." if project.blank? + end + + def validate_namespace! + unless /(\d+|instance)/.match?(namespace_id) + abort "Error: Expected namespace_id '#{namespace_id}' to be a number.\nUse " \ + "`gitlab-rake 'gitlab:vulnerabilities:restore_incorrect_vulnerability_states_for_namespace[instance]'` " \ + "to perform an instance migration." + end + + return true if instance_migration? + + namespace = Namespace.find_by_id(namespace_id) + abort "Namespace:#{namespace_id} not found." if namespace.blank? + abort 'Namespace must be top-level.' if namespace.parent.present? + end + + def queue_migration + queue_batched_background_migration( + MIGRATION, + :vulnerability_reads, + :vulnerability_id, + job_args, + gitlab_schema: :gitlab_sec + ) + + puts "Enqueued background migration: #{MIGRATION}, job_args: #{job_args}" + end + + def delete_migration + delete_batched_background_migration(MIGRATION, :vulnerability_reads, :vulnerability_id, [job_args]) + + puts "Deleted background migration: #{MIGRATION}, job_args: #{job_args}" + end + + def job_args + return { namespace_id: namespace_id } if namespace_id.present? + + { project_id: project_id.to_i } + end + + def instance_migration? + namespace_id == INSTANCE_ARG + end + + def version + Time.now.utc.strftime("%Y%m%d%H%M%S") + end + + def connection + SecApplicationRecord.connection + end + end + end +end diff --git a/ee/spec/fixtures/security_reports/master/gl-sast-report-semgrep-6.7.1-additional-vulnerabilities-correct-primary-identifier.json b/ee/spec/fixtures/security_reports/master/gl-sast-report-semgrep-6.7.1-additional-vulnerabilities-correct-primary-identifier.json index c2646614f728a0..1ac0093aef4fc5 100644 --- a/ee/spec/fixtures/security_reports/master/gl-sast-report-semgrep-6.7.1-additional-vulnerabilities-correct-primary-identifier.json +++ b/ee/spec/fixtures/security_reports/master/gl-sast-report-semgrep-6.7.1-additional-vulnerabilities-correct-primary-identifier.json @@ -4,7 +4,7 @@ { "id": "878843d5b4edf0042e3066429a4cac5f66f8c7ad72b40056601fbb191fa13214", "category": "sast", - "name": "Additional vulerability 1", + "name": "Additional vulnerability 1", "description": "The application was found using the `requests` module without configuring a timeout value.", "cve": "semgrep_id:bandit.B501:44:45", "severity": "Medium", diff --git a/ee/spec/lib/tasks/vulnerabilities/restore_incorrect_vulnerability_states_rake_spec.rb b/ee/spec/lib/tasks/vulnerabilities/restore_incorrect_vulnerability_states_rake_spec.rb new file mode 100644 index 00000000000000..8b41ada8e75c8e --- /dev/null +++ b/ee/spec/lib/tasks/vulnerabilities/restore_incorrect_vulnerability_states_rake_spec.rb @@ -0,0 +1,85 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe 'Restore vulnerability states rake task', feature_category: :vulnerability_management do + include RakeHelpers + + before_all do + Rake.application.rake_require 'ee/lib/tasks/gitlab/vulnerabilities/restore_incorrect_vulnerability_states', + [Rails.root.to_s] + Rake::Task.define_task(:environment) + end + + context 'when using a namespace' do + describe 'restore_incorrect_vulnerability_states_for_namespace' do + let(:args) { ['123456'] } + let(:expected_args) { { namespace_id: '123456' } } + + subject(:task) do + run_rake_task('gitlab:vulnerabilities:restore_incorrect_vulnerability_states_for_namespace', args) + end + + it 'calls rake service with args' do + expect_next_instance_of(Vulnerabilities::Rake::RestoreIncorrectVulnerabilityStates, expected_args) do |instance| + expect(instance).to receive(:execute) + end + + task + end + end + + describe 'restore_incorrect_vulnerability_states_for_namespace:revert' do + let(:args) { ['123456'] } + let(:expected_args) { { namespace_id: '123456', revert: true } } + + subject(:task) do + run_rake_task('gitlab:vulnerabilities:restore_incorrect_vulnerability_states_for_namespace:revert', args) + end + + it 'calls rake service with args' do + expect_next_instance_of(Vulnerabilities::Rake::RestoreIncorrectVulnerabilityStates, expected_args) do |instance| + expect(instance).to receive(:execute) + end + + task + end + end + end + + context 'when using a project_id' do + describe 'restore_incorrect_vulnerability_states_for_project' do + let(:args) { ['123456'] } + let(:expected_args) { { project_id: '123456' } } + + subject(:task) do + run_rake_task('gitlab:vulnerabilities:restore_incorrect_vulnerability_states_for_project', args) + end + + it 'calls rake service with args' do + expect_next_instance_of(Vulnerabilities::Rake::RestoreIncorrectVulnerabilityStates, expected_args) do |instance| + expect(instance).to receive(:execute) + end + + task + end + end + + describe 'restore_incorrect_vulnerability_states_for_project:revert' do + let(:args) { ['123456'] } + let(:expected_args) { { project_id: '123456', revert: true } } + + subject(:task) do + run_rake_task('gitlab:vulnerabilities:restore_incorrect_vulnerability_states_for_project:revert', args) + end + + it 'calls rake service with args' do + expect_next_instance_of(Vulnerabilities::Rake::RestoreIncorrectVulnerabilityStates, expected_args) do |instance| + expect(instance).to receive(:execute) + end + + task + end + end + end +end diff --git a/ee/spec/lib/vulnerabilities/rake/restore_incorrect_vulnerability_states_spec.rb b/ee/spec/lib/vulnerabilities/rake/restore_incorrect_vulnerability_states_spec.rb new file mode 100644 index 00000000000000..bab843b2c0cb56 --- /dev/null +++ b/ee/spec/lib/vulnerabilities/rake/restore_incorrect_vulnerability_states_spec.rb @@ -0,0 +1,153 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Vulnerabilities::Rake::RestoreIncorrectVulnerabilityStates, feature_category: :vulnerability_management do + include MigrationsHelpers + + let(:args) { nil } + + describe 'execute' do + let(:batched_migration) { described_class::MIGRATION } + let(:connection) { SecApplicationRecord.connection } + + def up + described_class.new(**args).execute + end + + def down + described_class.new(**args, revert: true).execute + end + + context 'when migrating by namespace_id' do + let(:args) { { namespace_id: namespace_id } } + + context 'when performing an instance migration' do + let(:namespace_id) { 'instance' } + + it 'schedules migration' do + up + + Gitlab::Database::SharedModel.using_connection(connection) do + expect(batched_migration).to have_scheduled_batched_migration( + table_name: :vulnerability_reads, + column_name: :vulnerability_id, + gitlab_schema: :gitlab_sec, + job_arguments: [args] + ) + end + + down + + Gitlab::Database::SharedModel.using_connection(connection) do + expect(batched_migration).not_to have_scheduled_batched_migration + end + end + end + + context 'when migrating a namespace' do + let_it_be(:namespace) { create(:namespace) } + let_it_be(:namespace_id) { namespace.id.to_s } + + it 'schedules migration with parsed namespace_id' do + up + + Gitlab::Database::SharedModel.using_connection(connection) do + expect(batched_migration).to have_scheduled_batched_migration( + table_name: :vulnerability_reads, + column_name: :vulnerability_id, + gitlab_schema: :gitlab_sec, + job_arguments: [args] + ) + end + + down + + Gitlab::Database::SharedModel.using_connection(connection) do + expect(batched_migration).not_to have_scheduled_batched_migration + end + end + end + + describe 'validations' do + context 'when namespace_id is not a number' do + let(:namespace_id) { 'foo' } + + it 'prints error and exits' do + expect { up }.to raise_error(SystemExit) + .and output("Error: Expected namespace_id 'foo' to be a number.\n" \ + "Use `gitlab-rake 'gitlab:vulnerabilities:restore_incorrect_vulnerability_states_for_namespace" \ + "[instance]'` to perform an instance migration.\n").to_stderr + end + end + + context 'when namespace_id does not exist' do + let(:namespace_id) { non_existing_record_id.to_s } + + it 'prints error and exits' do + expect { up }.to raise_error(SystemExit) + .and output("Namespace:#{namespace_id} not found.\n").to_stderr + end + end + + context 'when namespace is a subgroup' do + let_it_be(:namespace) { create(:group, :nested) } + let_it_be(:namespace_id) { namespace.id.to_s } + + it 'prints error and exits' do + expect { up }.to raise_error(SystemExit) + .and output("Namespace must be top-level.\n").to_stderr + end + end + end + end + + context 'when migrating by project_id' do + let(:args) { { project_id: project_id } } + + context 'when migrating a project' do + let_it_be(:project) { create(:project) } + let_it_be(:project_id) { project.id.to_s } + + it 'schedules migration with parsed project_id' do + up + + Gitlab::Database::SharedModel.using_connection(connection) do + expect(batched_migration).to have_scheduled_batched_migration( + table_name: :vulnerability_reads, + column_name: :vulnerability_id, + gitlab_schema: :gitlab_sec, + job_arguments: [{ project_id: project_id.to_i }] + ) + end + + down + + Gitlab::Database::SharedModel.using_connection(connection) do + expect(batched_migration).not_to have_scheduled_batched_migration + end + end + end + + describe 'validations' do + context 'when project_id is not a number' do + let(:project_id) { 'foo' } + + it 'prints error and exits' do + expect { up }.to raise_error(SystemExit) + .and output("Error: Expected project_id 'foo' to be a number.\n").to_stderr + end + end + + context 'when project_id does not exist' do + let(:project_id) { non_existing_record_id.to_s } + + it 'prints error and exits' do + expect { up }.to raise_error(SystemExit) + .and output("Project:#{project_id} not found.\n").to_stderr + end + end + end + end + end +end diff --git a/lib/gitlab/background_migration/restore_incorrect_vulnerability_states.rb b/lib/gitlab/background_migration/restore_incorrect_vulnerability_states.rb index 37fcdb21e5ed04..2d4fdbb06ba554 100644 --- a/lib/gitlab/background_migration/restore_incorrect_vulnerability_states.rb +++ b/lib/gitlab/background_migration/restore_incorrect_vulnerability_states.rb @@ -4,6 +4,7 @@ module Gitlab module BackgroundMigration class RestoreIncorrectVulnerabilityStates < BatchedMigrationJob + job_arguments :job_args operation_name :restore_incorrect_vulnerability_states feature_category :static_application_security_testing @@ -50,7 +51,7 @@ def self.severity_levels = SEVERITY_LEVELS end module Vulnerabilities - class Identifier < SecApplicationRecord + class Identifier def self.sha1_fingerprint(identifier) fingerprint_string = "#{identifier['type']}:#{identifier['value']}" [Digest::SHA1.hexdigest(fingerprint_string)].pack('H*') # rubocop:disable Fips/SHA1 -- we must use SHA1, since this is how the fingerprint is stored in the DB @@ -59,87 +60,130 @@ def self.sha1_fingerprint(identifier) end end + def scoped_vulnerability_reads(vulnerability_reads) + relation = vulnerability_reads.where(report_type: Migratable::Enums::Vulnerability.report_types[:sast]) + project_id = job_args['project_id'] + namespace_id = job_args['namespace_id'] + + if project_id + log_info("Migrating affected vulnerabilities with project_id #{project_id}") + return relation.where(project_id: project_id) + end + + if namespace_id == 'instance' + log_info("Migrating affected vulnerabilities for entire instance") + return relation + end + + log_info("Migrating affected vulnerabilities with namespace_id #{namespace_id}") + relation + .where(vulnerability_reads.arel_table[:traversal_ids].gteq([namespace_id.to_i])) + .where(vulnerability_reads.arel_table[:traversal_ids].lt([namespace_id.to_i.next])) + end + def perform each_sub_batch do |vulnerability_reads_batch| - # find all vulnerabilities that are affected by the bug - affected_vulnerabilities = fetch_affected_vulnerability_data(vulnerability_reads_batch) + log_info("Processing sub-batch with #{vulnerability_reads_batch.count} records") + vulnerability_reads = scoped_vulnerability_reads(vulnerability_reads_batch) - if affected_vulnerabilities.empty? - puts "No vulnerabilities found for batch: #{vulnerability_reads_batch.first.id} to #{vulnerability_reads_batch.last.id}" - next - end + next if vulnerability_reads.blank? - duplicate_data_lookup = bulk_fetch_duplicate_data(affected_vulnerabilities) - # TODO: put state_transitions_lookup and project_namespace_lookup into affected_vulnerabilities - # each affected_vulnerability should contain the latest state and project_namespace_id - state_transitions_lookup = bulk_lookup_state_transitions(affected_vulnerabilities) - project_namespace_lookup = bulk_lookup_project_namespaces(affected_vulnerabilities) + process_vulnerability_batch(vulnerability_reads_batch) + end + end - identifiers_lookup = bulk_fetch_correct_primary_identifiers( - affected_vulnerabilities, duplicate_data_lookup - ) + def process_vulnerability_batch(vulnerability_reads_batch) + # find all vulnerabilities that are affected by the bug, and combine all related data + affected_vulnerability_data = build_affected_vulnerability_data(vulnerability_reads_batch) - current_time = Time.current - - # Bulk data collections - state_transitions_to_insert = [] - notes_to_insert = [] - system_note_metadata_to_insert = [] - vulnerabilities_to_update = [] - security_findings_to_update = [] - orphaned_security_findings_to_delete = [] - vulnerability_reads_to_update = [] - vulnerability_findings_to_update = [] - - affected_vulnerabilities.each do |vulnerability_id, affected_vulnerability| - latest_transition = state_transitions_lookup[vulnerability_id] - duplicate_data = duplicate_data_lookup[vulnerability_id] - - # binding.pry if latest_transition && latest_transition['to_state'] == 2 - - # if semgrep v6.7.1 has been executed, then we'll have a correct_finding - if duplicate_data - collect_state_transition_records( - project_namespace_lookup, - duplicate_data, - latest_transition, - current_time, - state_transitions_to_insert, - notes_to_insert, - system_note_metadata_to_insert, - vulnerabilities_to_update, - vulnerability_reads_to_update - ) - else - # semgrep v6.7.1 has not been executed yet, existing vulnerability records contain - # corrupted primary identifier values - collect_uuid_update_data( - project_namespace_lookup, - affected_vulnerability, - latest_transition, - identifiers_lookup, - current_time, - state_transitions_to_insert, - notes_to_insert, - system_note_metadata_to_insert, - vulnerabilities_to_update, - security_findings_to_update, - orphaned_security_findings_to_delete, - vulnerability_reads_to_update, - vulnerability_findings_to_update - ) - end + if affected_vulnerability_data.empty? + log_info("No vulnerabilities found for batch") + return + end + + collections = initialize_collections + collect_updates_for_vulnerabilities(affected_vulnerability_data, collections) + persist_all_updates(collections) + end + + def initialize_collections + { + state_transitions: [], + notes: [], + system_note_metadata: [], + vulnerabilities: [], + security_findings: [], + vulnerability_reads: [], + vulnerability_findings: [] + } + end + + def collect_updates_for_vulnerabilities(affected_vulnerability_data, collections) + current_time = Time.current + + affected_vulnerability_data.each_value do |affected_vuln_data| + # If semgrep v6.7.1 has been executed, then we'll have a duplicate vulnerability + if affected_vuln_data[:duplicate] + collect_state_restoration_data( + affected_vuln_data, + current_time, + collections[:state_transitions], + collections[:notes], + collections[:system_note_metadata], + collections[:vulnerabilities], + collections[:vulnerability_reads] + ) + else + # semgrep v6.7.1 has not been executed yet, existing vulnerability records contain + # corrupted primary identifier values + collect_full_correction_data( + affected_vuln_data, + current_time, + collections[:state_transitions], + collections[:notes], + collections[:system_note_metadata], + collections[:vulnerabilities], + collections[:vulnerability_reads], + collections[:security_findings], + collections[:vulnerability_findings] + ) end + end + end - bulk_insert_state_transitions(state_transitions_to_insert) - inserted_note_ids = bulk_insert_notes(notes_to_insert) - bulk_insert_system_note_metadata(system_note_metadata_to_insert, inserted_note_ids) + def persist_all_updates(collections) + bulk_insert_state_transitions(collections[:state_transitions]) + inserted_note_ids = bulk_insert_notes(collections[:notes]) + bulk_insert_system_note_metadata(collections[:system_note_metadata], inserted_note_ids) - bulk_update_vulnerabilities(vulnerabilities_to_update) - bulk_update_security_findings(security_findings_to_update) - bulk_delete_security_findings(orphaned_security_findings_to_delete) - bulk_update_vulnerability_reads(vulnerability_reads_to_update) - bulk_update_vulnerability_findings(vulnerability_findings_to_update) + bulk_update_vulnerabilities(collections[:vulnerabilities]) + bulk_update_security_findings(collections[:security_findings]) + bulk_update_vulnerability_reads(collections[:vulnerability_reads]) + bulk_update_vulnerability_findings(collections[:vulnerability_findings]) + end + + def build_affected_vulnerability_data(vulnerability_reads_batch) + affected_vulnerabilities = fetch_affected_vulnerability_data(vulnerability_reads_batch) + return {} if affected_vulnerabilities.empty? + + # fetch all related data + duplicate_data = fetch_duplicate_data(affected_vulnerabilities) + state_transitions = fetch_state_transitions(affected_vulnerabilities) + project_namespaces = fetch_project_namespaces(affected_vulnerabilities) + correct_identifiers = fetch_correct_primary_identifiers(affected_vulnerabilities, duplicate_data) + + # combine all related vulnerability data into a single object + affected_vulnerabilities.each_with_object({}) do |(vulnerability_id, affected_vulnerability), combined| + combined[vulnerability_id] = { + # original corrupted vulnerability data, which exists because semgrep 6.7.0 has been executed + corrupted: affected_vulnerability, + # duplicate vulnerability data, which only exists if semgrep 6.7.0 AND 6.7.1 have been executed + duplicate: duplicate_data[vulnerability_id], + # shared data + project_namespace_id: project_namespaces[affected_vulnerability[:project_id]], + latest_transition: state_transitions[vulnerability_id], + correct_identifier: correct_identifiers[vulnerability_id] + } end end @@ -151,61 +195,91 @@ def perform # from the raw_metadata where `type = semgrep_id` # 2. Generating the fingerprint using the type and value from the identifier # 3. Using the fingerprint to lookup the correct primary identifier from the database - def bulk_fetch_correct_primary_identifiers(affected_vulnerabilities, correct_findings_lookup) - fingerprint_to_vuln = Hash.new { |hash, key| hash[key] = [] } + def fetch_correct_primary_identifiers(affected_vulnerabilities, duplicate_data) vulnerability_lookup = {} - affected_vulnerabilities_by_project = affected_vulnerabilities - .values.group_by { |av| av['project_id'] } + affected_vulnerabilities_by_project = group_vulnerabilities_by_project(affected_vulnerabilities) + + affected_vulnerabilities_by_project.each do |project_id, vulnerabilities_for_project| + project_identifiers = fetch_identifiers_for_project( + project_id, + vulnerabilities_for_project, + duplicate_data + ) + vulnerability_lookup.merge!(project_identifiers) + end + + vulnerability_lookup + end - log_info("Found #{affected_vulnerabilities_by_project.length} distinct projects with vulnerabilities " \ + def group_vulnerabilities_by_project(affected_vulnerabilities) + grouped = affected_vulnerabilities.values.group_by { |av| av[:project_id] } + + log_info("Found #{grouped.length} distinct projects with vulnerabilities " \ "out of #{affected_vulnerabilities.length} total vulnerabilities") - affected_vulnerabilities_by_project.each do |project_id, affected_vulnerabilities_for_project| - fingerprint_to_vuln.clear + grouped + end - affected_vulnerabilities_for_project.each do |affected_vulnerability| - vulnerability_id = affected_vulnerability['vulnerability_id'] + def fetch_identifiers_for_project(project_id, vulnerabilities, duplicate_data) + fingerprint_to_vuln = build_fingerprint_mapping(vulnerabilities, duplicate_data) + return {} if fingerprint_to_vuln.empty? - # we don't need to find the correct primary identifier if we already have a correct finding, - # since that contains the correct primary identifier - next if correct_findings_lookup[vulnerability_id] + identifiers = query_identifiers_by_fingerprint(project_id, fingerprint_to_vuln.keys) + map_identifiers_to_vulnerabilities(fingerprint_to_vuln, identifiers) + end - parsed_metadata = Gitlab::Json.parse(affected_vulnerability['raw_metadata']) - semgrep_identifier = parsed_metadata["identifiers"].find { |id| id['type'] == 'semgrep_id' } + def build_fingerprint_mapping(vulnerabilities, duplicate_data) + fingerprint_to_vuln = Hash.new { |hash, key| hash[key] = [] } - fingerprint = Migratable::Vulnerabilities::Identifier.sha1_fingerprint(semgrep_identifier).unpack1('H*') + vulnerabilities.each do |vulnerability| + vulnerability_id = vulnerability[:vulnerability_id] + # we don't need to find the correct primary identifier if we have a duplicate record, + # since that already contains the correct primary identifier + next if duplicate_data[vulnerability_id] - fingerprint_to_vuln[fingerprint] << vulnerability_id - end + fingerprint = extract_semgrep_fingerprint(vulnerability[:raw_metadata]) + fingerprint_to_vuln[fingerprint] << vulnerability_id + end - next if fingerprint_to_vuln.empty? + fingerprint_to_vuln + end - fingerprints_sql = fingerprint_to_vuln.keys.map { |fp| "'\\x#{fp}'::bytea" }.join(', ') + def extract_semgrep_fingerprint(raw_metadata) + parsed_metadata = Gitlab::Json.parse(raw_metadata) + semgrep_identifier = parsed_metadata["identifiers"].find { |id| id['type'] == 'semgrep_id' } + Migratable::Vulnerabilities::Identifier.sha1_fingerprint(semgrep_identifier).unpack1('H*') + end - sql = <<~SQL - SELECT id, project_id, fingerprint, external_type, external_id - FROM vulnerability_identifiers - WHERE project_id = #{project_id} - AND fingerprint IN (#{fingerprints_sql}) - SQL + def query_identifiers_by_fingerprint(project_id, fingerprints) + fingerprints_sql = fingerprints.map { |fp| "'\\x#{fp}'::bytea" }.join(', ') + + sql = <<~SQL + SELECT id, project_id, fingerprint, external_type, external_id + FROM vulnerability_identifiers + WHERE project_id = #{project_id} + AND fingerprint IN (#{fingerprints_sql}) + SQL + + results = SecApplicationRecord.connection.execute(sql) + results.index_by { |row| row['fingerprint'] } + end - results = SecApplicationRecord.connection.execute(sql) - identifiers = results.index_by { |row| row['fingerprint'] } + def map_identifiers_to_vulnerabilities(fingerprint_to_vuln, identifiers) + vulnerability_lookup = {} - # Build lookup with both raw_metadata and identifier - fingerprint_to_vuln.each do |(fingerprint, vulnerability_ids)| - vulnerability_ids.each do |vulnerability_id| - vulnerability_lookup[vulnerability_id] = identifiers["\\x#{fingerprint}"] - end + fingerprint_to_vuln.each do |fingerprint, vulnerability_ids| + identifier = identifiers["\\x#{fingerprint}"] + vulnerability_ids.each do |vulnerability_id| + vulnerability_lookup[vulnerability_id] = identifier end end vulnerability_lookup end - def bulk_lookup_project_namespaces(affected_vulnerabilities) - project_ids_sql = affected_vulnerabilities.values.pluck('project_id').uniq.join(', ') + def fetch_project_namespaces(affected_vulnerabilities) + project_ids_sql = affected_vulnerabilities.values.pluck(:project_id).uniq.join(', ') sql = <<-SQL SELECT id as project_id, project_namespace_id @@ -220,12 +294,13 @@ def bulk_lookup_project_namespaces(affected_vulnerabilities) end end - def bulk_lookup_state_transitions(affected_vulnerabilities) + def fetch_state_transitions(affected_vulnerabilities) vulnerability_ids_sql = affected_vulnerabilities.keys.join(', ') sql = <<-SQL SELECT DISTINCT ON (vst.vulnerability_id) - vst.* + vst.vulnerability_id, vst.id, vst.to_state, vst.from_state, vst.author_id, vst.comment, + vst.dismissal_reason, vst.project_id FROM vulnerability_state_transitions vst WHERE vst.vulnerability_id IN (#{vulnerability_ids_sql}) AND vst.author_id IS NOT NULL @@ -235,42 +310,68 @@ def bulk_lookup_state_transitions(affected_vulnerabilities) results = SecApplicationRecord.connection.execute(sql) results.each_with_object({}) do |row, hash| - hash[row['vulnerability_id']] = row + hash[row['vulnerability_id']] = { + id: row['id'].to_i, + to_state: row['to_state'].to_i, + from_state: row['from_state'].to_i, + author_id: row['author_id'].to_i, + comment: row['comment'], + dismissal_reason: row['dismissal_reason'], + project_id: row['project_id'].to_i + } end end # determine if there exists a _new_ duplicate vulnerability finding which matches - # all the same data as the _old_ vulnerability finding stored in vulnerability_data. + # all the same data as the _old_ vulnerability finding stored in affected_vulnerabilities. # If this duplicate finding exists, it means that semgrep 6.7.1 has been executed, # and the state of the new duplicate finding has been initialized to `detected`. We can # use the state transitions from the old vulnerability finding to set the correct # state for the new duplicate vulnerability finding. - def bulk_fetch_duplicate_data(affected_vulnerabilities) - search_params = affected_vulnerabilities.map do |vulnerability_id, data| + def fetch_duplicate_data(affected_vulnerabilities) + search_params = build_search_params(affected_vulnerabilities) + return {} if search_params.empty? + + results = query_duplicate_vulnerabilities(search_params) + build_duplicate_data_hash(results) + end + + private + + def build_search_params(affected_vulnerabilities) + affected_vulnerabilities.map do |vulnerability_id, affected_vulnerability_data| { vulnerability_id: vulnerability_id, - exclude_finding_id: data['finding_id'], - severity: data['severity'], - report_type: data['report_type'], - location_fingerprint: data['location_fingerprint'], - scanner_id: data['scanner_id'], - name: data['name'], - metadata_version: data['metadata_version'], - project_id: data['project_id'] + exclude_finding_id: affected_vulnerability_data[:finding_id], + severity: affected_vulnerability_data[:severity], + report_type: affected_vulnerability_data[:report_type], + location_fingerprint: affected_vulnerability_data[:location_fingerprint], + scanner_id: affected_vulnerability_data[:scanner_id], + name: affected_vulnerability_data[:name], + metadata_version: affected_vulnerability_data[:metadata_version], + project_id: affected_vulnerability_data[:project_id] } end + end - return {} if search_params.empty? - + def query_duplicate_vulnerabilities(search_params) conn = SecApplicationRecord.connection + values_sql = build_search_params_values_sql(search_params, conn) + + sql = build_duplicate_search_sql(values_sql) + conn.execute(sql) + end - values_sql = search_params.map do |p| + def build_search_params_values_sql(search_params, conn) + search_params.map do |p| "(#{p[:vulnerability_id]}, #{p[:exclude_finding_id]}, #{p[:severity]}, #{p[:report_type]}, " \ "#{p[:scanner_id]}, '#{p[:location_fingerprint]}'::bytea, #{conn.quote(p[:name])}, " \ "#{conn.quote(p[:metadata_version])}, #{p[:project_id]})" end.join(', ') + end - sql = <<~SQL + def build_duplicate_search_sql(values_sql) + <<~SQL WITH search_params AS ( SELECT * FROM ( VALUES #{values_sql} @@ -302,17 +403,17 @@ def bulk_fetch_duplicate_data(affected_vulnerabilities) JOIN vulnerability_reads vr_duplicate ON vr_duplicate.vulnerability_id = vo.vulnerability_id ORDER BY sp.vulnerability_id, vo.id SQL + end - results = conn.execute(sql) - + def build_duplicate_data_hash(results) results.each_with_object({}) do |row, hash| hash[row['original_vulnerability_id'].to_i] = { - 'vulnerability_id' => row['duplicate_vulnerability_id'].to_i, - 'project_id' => row['project_id'].to_i, - 'finding_id' => row['finding_id'].to_i, - 'raw_metadata' => row['raw_metadata'], - 'original_state' => row['original_state'].to_i, - 'state' => row['duplicate_state'].to_i + vulnerability_id: row['duplicate_vulnerability_id'].to_i, + project_id: row['project_id'].to_i, + finding_id: row['finding_id'].to_i, + raw_metadata: row['raw_metadata'], + original_state: row['original_state'].to_i, + state: row['duplicate_state'].to_i } end end @@ -323,8 +424,7 @@ def fetch_affected_vulnerability_data(vulnerability_reads) SELECT v.id as vulnerability_id, v.report_type, v.severity, v.project_id, v.state, vo.id as finding_id, vo.location_fingerprint, vo.name, vo.metadata_version, vo.scanner_id, vo.raw_metadata, - sf.id as security_finding_id, sf.partition_number, sf.finding_data, - vi.external_type, vi.external_id + sf.id as security_finding_id, sf.partition_number, sf.finding_data FROM vulnerabilities v INNER JOIN vulnerability_occurrences vo ON vo.vulnerability_id = v.id INNER JOIN vulnerability_identifiers vi ON vi.id = vo.primary_identifier_id @@ -337,10 +437,29 @@ def fetch_affected_vulnerability_data(vulnerability_reads) results = SecApplicationRecord.connection.execute(sql) results.each_with_object({}) do |row, hash| - hash[row['vulnerability_id'].to_i] = row + hash[row['vulnerability_id'].to_i] = build_affected_data_from_row(row) end end + def build_affected_data_from_row(row) + { + vulnerability_id: row['vulnerability_id'], + project_id: row['project_id'], + state: row['state'], + severity: row['severity'], + report_type: row['report_type'], + finding_id: row['finding_id'], + location_fingerprint: row['location_fingerprint'], + name: row['name'], + metadata_version: row['metadata_version'], + scanner_id: row['scanner_id'], + raw_metadata: row['raw_metadata'], + security_finding_id: row['security_finding_id'], + partition_number: row['partition_number'], + finding_data: row['finding_data'] + } + end + def reorder_metadata_with_correct_primary_id(raw_metadata) metadata = Gitlab::Json.parse(raw_metadata) identifiers = metadata["identifiers"] @@ -361,74 +480,60 @@ def reorder_finding_data_with_correct_primary_id(finding_data) parsed_finding_data end - # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/ParameterLists -- Data collection for UUID updates - def collect_uuid_update_data( - project_namespace_lookup, - vuln_with_incorrect_id, - latest_transition, - identifiers_lookup, + # rubocop:disable Metrics/MethodLength, Metrics/ParameterLists -- Data collection for full updates + def collect_full_correction_data( + vuln_data, current_time, state_transitions_to_insert, notes_to_insert, system_note_metadata_to_insert, vulnerabilities_to_update, - security_findings_to_update, - orphaned_security_findings_to_delete, vulnerability_reads_to_update, + security_findings_to_update, vulnerability_findings_to_update ) - metadata_with_correct_id = reorder_metadata_with_correct_primary_id(vuln_with_incorrect_id['raw_metadata']) - - finding_data_with_correct_id = - reorder_finding_data_with_correct_primary_id(vuln_with_incorrect_id['finding_data']) - - correct_identifier = identifiers_lookup[vuln_with_incorrect_id['vulnerability_id']] + correct_identifier = vuln_data[:correct_identifier] unless correct_identifier - log_warning("Missing correct identifier for vulnerability #{vuln_with_incorrect_id['vulnerability_id']}") + log_warning("Missing correct identifier for vulnerability #{vuln_data[:vulnerability_id]}") return end + corrupted_data = vuln_data[:corrupted] + + metadata_with_correct_id = reorder_metadata_with_correct_primary_id(corrupted_data[:raw_metadata]) + finding_data_with_correct_id = reorder_finding_data_with_correct_primary_id(corrupted_data[:finding_data]) + correct_uuid = Gitlab::UUID.v5( [ - Migratable::Enums::Vulnerability.report_types.key(vuln_with_incorrect_id['report_type']), + Migratable::Enums::Vulnerability.report_types.key(corrupted_data[:report_type]), correct_identifier['fingerprint'].gsub("\\x", ""), - vuln_with_incorrect_id['location_fingerprint'].gsub("\\x", ""), - vuln_with_incorrect_id['project_id'] + corrupted_data[:location_fingerprint].gsub("\\x", ""), + corrupted_data[:project_id] ].join("-") ) - # TODO: we're looping through multiple security findings here, - # but the finding_data_with_correct_id is from only the first security_finding - # finding_with_incorrect_id.security_findings.each do |sf| - # TODO: looks like we're both deleting the record with sf.id - # _and_ updating it? Do one or the other - orphaned_security_findings_to_delete << vuln_with_incorrect_id['security_finding_id'] - - security_findings_to_update << { - id: vuln_with_incorrect_id['security_finding_id'], - partition_number: vuln_with_incorrect_id['partition_number'], - uuid: correct_uuid, - finding_data: finding_data_with_correct_id - } - # end + security_findings_to_update << { + id: corrupted_data[:security_finding_id], + partition_number: corrupted_data[:partition_number], + uuid: correct_uuid, + finding_data: finding_data_with_correct_id + } vulnerability_reads_to_update << { - vulnerability_id: vuln_with_incorrect_id['vulnerability_id'], + vulnerability_id: corrupted_data[:vulnerability_id], uuid: correct_uuid, dismissal_reason: nil } vulnerability_findings_to_update << { - id: vuln_with_incorrect_id['finding_id'], + id: corrupted_data[:finding_id], primary_identifier_id: correct_identifier['id'], raw_metadata: metadata_with_correct_id.to_json, uuid: correct_uuid } - collect_state_transition_records( - project_namespace_lookup, - vuln_with_incorrect_id, - latest_transition, + collect_state_restoration_data( + vuln_data, current_time, state_transitions_to_insert, notes_to_insert, @@ -437,7 +542,7 @@ def collect_uuid_update_data( nil ) end - # rubocop:enable Metrics/AbcSize, Metrics/MethodLength, Metrics/ParameterLists + # rubocop:enable Metrics/MethodLength, Metrics/ParameterLists def bulk_insert_state_transitions(data) return if data.empty? @@ -559,26 +664,6 @@ def bulk_update_vulnerabilities(updates) end # rubocop:enable Metrics/AbcSize, Metrics/MethodLength, Metrics/BlockLength - def bulk_delete_security_findings(orphaned_security_finding_ids_to_delete) - return if orphaned_security_finding_ids_to_delete.empty? - - log_info("Bulk deleting #{orphaned_security_finding_ids_to_delete.length} security findings") - conn = ::SecApplicationRecord.connection - - orphaned_security_finding_ids_to_delete.each_slice(SUB_BATCH_SIZE) do - |orphaned_security_finding_ids_to_delete_batch| - - security_finding_ids_to_delete = orphaned_security_finding_ids_to_delete_batch.join(", ") - - sql = <<~SQL - DELETE from security_findings - WHERE id in (#{security_finding_ids_to_delete}) - SQL - - conn.execute(sql) - end - end - def bulk_update_security_findings(data) return if data.empty? @@ -664,10 +749,9 @@ def bulk_update_vulnerability_findings(updates) end end - def collect_state_transition_records( - project_namespace_lookup, - vulnerability, - latest_transition, + # this method is called both for duplicate vulnerabilities and affected vulnerabilities + def collect_state_restoration_data( + vuln_data, current_time, state_transitions_to_insert, notes_to_insert, @@ -675,79 +759,86 @@ def collect_state_transition_records( vulnerabilities_to_update, vulnerability_reads_to_update ) + latest_transition = vuln_data[:latest_transition] + # we can only restore the vulnerability state if a state transition exists, otherwise, we have + # no idea what the actual state should be return unless latest_transition - return if vulnerability['state'] == latest_transition['to_state'] - state_transitions_to_insert << build_state_transition(vulnerability, latest_transition, current_time) + current_state = value_for_field(vuln_data, :state) - note_data = build_note_data(project_namespace_lookup, vulnerability, latest_transition, current_time) + # don't make any changes if the state has not changed + return if current_state == latest_transition[:to_state] + + state_transitions_to_insert << build_state_transition(vuln_data, current_time) + + note_data = build_note_data(vuln_data, current_time) notes_to_insert << note_data - system_note_metadata_to_insert << build_system_note_metadata( - project_namespace_lookup, vulnerability, latest_transition, current_time, note_data) + system_note_metadata_to_insert << build_system_note_metadata(vuln_data, current_time, note_data) - vulnerabilities_to_update << build_vulnerability_update( - vulnerability, latest_transition, current_time) + vulnerabilities_to_update << build_vulnerability_update(vuln_data, current_time) - return unless vulnerability_reads_to_update + vulnerability_reads_to_update << build_vulnerability_read_update(vuln_data) if vulnerability_reads_to_update + end - vulnerability_reads_to_update << build_vulnerability_read_update(vulnerability, latest_transition) + def value_for_field(vuln_data, field) + vuln_data.dig(:duplicate, field) || vuln_data[:corrupted][field] end - def build_state_transition(vulnerability, latest_transition, current_time) + def build_state_transition(vuln_data, current_time) + latest_transition = vuln_data[:latest_transition] + { - vulnerability_id: vulnerability['vulnerability_id'], - dismissal_reason: latest_transition['dismissal_reason'], - author_id: latest_transition['author_id'], - from_state: vulnerability['state'], - to_state: latest_transition['to_state'], - project_id: latest_transition['project_id'], + vulnerability_id: value_for_field(vuln_data, :vulnerability_id), + dismissal_reason: latest_transition[:dismissal_reason], + author_id: latest_transition[:author_id], + from_state: value_for_field(vuln_data, :state), + to_state: latest_transition[:to_state], + project_id: value_for_field(vuln_data, :project_id), created_at: current_time, updated_at: current_time, comment: format(TRANSITION_COMMENT_TEMPLATE, { - original_comment: latest_transition['comment'], - transition_id: latest_transition['id'] + original_comment: latest_transition[:comment], + transition_id: latest_transition[:id] }) } end - def build_note_data(project_namespace_lookup, vulnerability, latest_transition, current_time) - from_state_string = Migratable::Enums::Vulnerability.vulnerability_states.key(vulnerability['state']) - to_state_string = Migratable::Enums::Vulnerability.vulnerability_states.key(latest_transition['to_state']) + def build_note_data(vuln_data, current_time) + latest_transition = vuln_data[:latest_transition] + from_state_string = Migratable::Enums::Vulnerability.vulnerability_states.key( + value_for_field(vuln_data, :state) + ) + to_state_string = Migratable::Enums::Vulnerability.vulnerability_states.key(latest_transition[:to_state]) + vulnerability_id = value_for_field(vuln_data, :vulnerability_id) note_text = format(SYSTEM_NOTE_TEMPLATE, { - original_comment: latest_transition['comment'], + original_comment: latest_transition[:comment], from_state: from_state_string.titleize, to_state: to_state_string.titleize, - transition_id: latest_transition['id'] + transition_id: latest_transition[:id] }) { note: note_text, noteable_type: 'Vulnerability', - author_id: latest_transition['author_id'], + author_id: latest_transition[:author_id], created_at: current_time, updated_at: current_time, - project_id: vulnerability['project_id'], - noteable_id: vulnerability['vulnerability_id'], + project_id: value_for_field(vuln_data, :project_id), + noteable_id: vulnerability_id, system: true, - discussion_id: discussion_id(vulnerability['vulnerability_id']), - namespace_id: project_namespace_lookup[vulnerability['project_id']] + discussion_id: discussion_id(vulnerability_id), + namespace_id: vuln_data[:project_namespace_id] } end - def build_system_note_metadata( - project_namespace_lookup, - vulnerability, - latest_transition, - current_time, - note_data - ) - # binding.pry - to_state_string = Migratable::Enums::Vulnerability.vulnerability_states.key(latest_transition['to_state']) + def build_system_note_metadata(vuln_data, current_time, note_data) + latest_transition = vuln_data[:latest_transition] + to_state_string = Migratable::Enums::Vulnerability.vulnerability_states.key(latest_transition[:to_state]) { - namespace_id: project_namespace_lookup[vulnerability['project_id']], + namespace_id: vuln_data[:project_namespace_id], action: "vulnerability_#{to_state_string}", created_at: current_time, updated_at: current_time, @@ -755,10 +846,12 @@ def build_system_note_metadata( } end - def build_vulnerability_update(vulnerability, latest_transition, current_time) + def build_vulnerability_update(vuln_data, current_time) + latest_transition = vuln_data[:latest_transition] + vuln_update = { - id: vulnerability['vulnerability_id'], - state: latest_transition['to_state'] + id: value_for_field(vuln_data, :vulnerability_id), + state: latest_transition[:to_state] } add_state_specific_attributes(vuln_update, latest_transition, current_time) @@ -767,9 +860,9 @@ def build_vulnerability_update(vulnerability, latest_transition, current_time) end def add_state_specific_attributes(vuln_update, latest_transition, current_time) - author_id = latest_transition['author_id'] + author_id = latest_transition[:author_id] - case latest_transition['to_state'] + case latest_transition[:to_state] when "confirmed" vuln_update[:confirmed_by_id] = author_id vuln_update[:confirmed_at] = current_time @@ -782,11 +875,13 @@ def add_state_specific_attributes(vuln_update, latest_transition, current_time) end end - def build_vulnerability_read_update(vulnerability, latest_transition) + def build_vulnerability_read_update(vuln_data) + latest_transition = vuln_data[:latest_transition] + { - vulnerability_id: vulnerability['vulnerability_id'], + vulnerability_id: value_for_field(vuln_data, :vulnerability_id), uuid: nil, - dismissal_reason: latest_transition['dismissal_reason'] + dismissal_reason: latest_transition[:dismissal_reason] } end diff --git a/spec/lib/gitlab/background_migration/restore_incorrect_vulnerability_states_spec.rb b/spec/lib/gitlab/background_migration/restore_incorrect_vulnerability_states_spec.rb index 4bab2d44e76a2d..3098868cd222dd 100644 --- a/spec/lib/gitlab/background_migration/restore_incorrect_vulnerability_states_spec.rb +++ b/spec/lib/gitlab/background_migration/restore_incorrect_vulnerability_states_spec.rb @@ -27,7 +27,7 @@ organization_id: organization.id) end - let(:acceptable_risk_dismissal_int) { 0 } + let(:mitigating_control_dismissal_int) { 2 } let(:severity_level_low_int) { 4 } let(:severity_level_medium_int) { 5 } @@ -82,6 +82,9 @@ .where("title LIKE ?", "%Vulnerability to be dismissed.").order(:id) end + # TODO: also add tests for { 'namespace_id' => namespace.id } and { `namespace_id => 'instance'` } + let(:job_args) { { 'project_id' => project.id } } + def create_project(name:, group:, id: nil) project_namespace = namespaces_table.create!( name: name, @@ -108,6 +111,7 @@ def perform_migration batch_table: :vulnerability_reads, batch_column: :vulnerability_id, sub_batch_size: sub_batch_size, + job_arguments: [job_args], pause_ms: 0, connection: ::SecApplicationRecord.connection ).perform @@ -213,7 +217,7 @@ def perform_migration expect do dismiss_vulnerabilities(vulnerabilities: vulnerabilities_to_be_dismissed, - comment: 'dismissing', dismissal_reason: acceptable_risk_dismissal_int) + comment: 'dismissing', dismissal_reason: mitigating_control_dismissal_int) end .to change { vulnerabilities_to_be_dismissed.map(&:state) @@ -244,7 +248,7 @@ def perform_migration vulnerability_reads_table.where(vulnerability_id: vulnerabilities_to_be_dismissed.pluck(:id)) .pluck(:dismissal_reason) }.from([nil] * vulnerabilities_to_be_dismissed.count) - .to([acceptable_risk_dismissal_int] * vulnerabilities_to_be_dismissed.count) + .to([mitigating_control_dismissal_int] * vulnerabilities_to_be_dismissed.count) end end @@ -265,7 +269,7 @@ def perform_migration create_vulnerabilities('gl-sast-report-semgrep-6.6.2-multiple-vulnerabilities.json') dismiss_vulnerabilities(vulnerabilities: vulnerabilities_to_be_dismissed, - comment: 'dismissing', dismissal_reason: acceptable_risk_dismissal_int) + comment: 'dismissing', dismissal_reason: mitigating_control_dismissal_int) confirm_vulnerabilities(vulnerabilities: vulnerabilities_to_be_confirmed, comment: 'confirming') expect { corrupt_vulnerabilities }.to not_change { @@ -400,7 +404,7 @@ def perform_migration confirm_vulnerabilities(vulnerabilities: vulnerabilities_to_be_confirmed, comment: 'confirming') resolve_vulnerabilities(vulnerabilities: vulnerabilities_to_be_resolved, comment: 'resolving') dismiss_vulnerabilities(vulnerabilities: vulnerabilities_to_be_dismissed, - comment: 'dismissing', dismissal_reason: acceptable_risk_dismissal_int) + comment: 'dismissing', dismissal_reason: mitigating_control_dismissal_int) end it 'does not change the state of any vulnerability records' do @@ -433,16 +437,8 @@ def perform_migration }.from(["cwe"] * vulnerability_findings_table.count).to(["semgrep_id"] * vulnerability_findings_table.count) end - it 'deletes orphaned security findings where the first identifier has external_type "cwe"' do - findings_with_cwe_primary = security_findings_table.where( - "finding_data->'identifiers'->0->>'external_type' = ?", 'cwe' - ) - - expect { perform_migration }.to change { - findings_with_cwe_primary.count - }.to(0).and change { - security_findings_table.count - }.by(-findings_with_cwe_primary.count) + it 'does not delete any security findings' do + expect { perform_migration }.to not_change { security_findings_table.count } end it 'restores the UUID for corrupted vulnerability records' do @@ -516,7 +512,7 @@ def perform_migration confirm_vulnerabilities(vulnerabilities: vulnerabilities_to_be_confirmed, comment: 'confirming') resolve_vulnerabilities(vulnerabilities: vulnerabilities_to_be_resolved, comment: 'resolving') dismiss_vulnerabilities(vulnerabilities: vulnerabilities_to_be_dismissed, - comment: 'dismissing', dismissal_reason: acceptable_risk_dismissal_int) + comment: 'dismissing', dismissal_reason: mitigating_control_dismissal_int) create_vulnerability_issue_link( vulnerability: vulnerabilities_table.find_by("title LIKE ?", "Vulnerbility with issue link%") @@ -525,16 +521,8 @@ def perform_migration corrupt_vulnerabilities end - it 'deletes orphaned security findings where the first identifier has external_type "cwe"' do - findings_with_cwe_primary = security_findings_table.where( - "finding_data->'identifiers'->0->>'external_type' = ?", 'cwe' - ) - - expect { perform_migration }.to change { - findings_with_cwe_primary.count - }.to(0).and change { - security_findings_table.count - }.by(-findings_with_cwe_primary.count) + it 'does not delete any security findings' do + expect { perform_migration }.to not_change { security_findings_table.count } end it 'restores the primary identifiers for corrupted vulnerability findings to the correct value' do @@ -763,7 +751,7 @@ def perform_migration confirm_vulnerabilities(vulnerabilities: vulnerabilities_to_be_confirmed, comment: 'confirming') resolve_vulnerabilities(vulnerabilities: vulnerabilities_to_be_resolved, comment: 'resolving') dismiss_vulnerabilities(vulnerabilities: vulnerabilities_to_be_dismissed, - comment: 'dismissing', dismissal_reason: acceptable_risk_dismissal_int) + comment: 'dismissing', dismissal_reason: mitigating_control_dismissal_int) corrupted_vulnerabilities.concat(corrupt_vulnerabilities) @@ -819,6 +807,21 @@ def perform_migration } end + it 'updates the vulnerability_reads.dismissal_reason for duplicate vulnerabilities' do + dismissed_vulns = vulnerabilities_table + .where(id: duplicated_vulnerabilities.pluck(:id)) + .where("title LIKE ?", "%Vulnerability to be dismissed.") + + dismissed_vuln_count = dismissed_vulns.count + + expect { perform_migration }.to change { + vulnerability_reads_table + .where(vulnerability_id: dismissed_vulns.select(:id)) + .pluck(:dismissal_reason) + }.from([nil] * dismissed_vuln_count) + .to([mitigating_control_dismissal_int] * dismissed_vuln_count) + end + it 'does not change the state for corrupted vulnerabilities' do corrupted_vuln_ids = corrupted_vulnerabilities.pluck(:id) @@ -856,7 +859,7 @@ def perform_migration .to([dismissed_state_int] * vulnerabilities_to_be_dismissed.length) end - context 'when system notes and state transitions' do + context 'when notes and state transitions' do where(:to_state, :comment) do [ %w[resolved resolving], @@ -909,14 +912,15 @@ def perform_migration end end - it "creates new system notes for #{params[:to_state]} vulnerabilities" do + it "creates new notes for #{params[:to_state]} vulnerabilities" do note_pattern = "%changed vulnerability status from Detected to " \ "#{ApplicationRecord.sanitize_sql_like(to_state.titleize)} with the following comment: " \ "\"#{ApplicationRecord.sanitize_sql_like(comment)}\"%" expect { perform_migration }.to change { notes_table.where('note LIKE ?', note_pattern).count - }.by(target_vulnerabilities.count) + }.by(target_vulnerabilities.count), + "Expected to create #{target_vulnerabilities.count} notes for state '#{to_state}'" aggregate_failures "checking note attributes" do notes_table @@ -983,7 +987,7 @@ def perform_migration it 'does not alter the additional vulnerabilities' do expect { perform_migration }.to not_change { additional_vulnerabilities_to_be_confirmed.pluck(:state) - }.from([confirmed_state_int]) + }.from([confirmed_state_int] * additional_vulnerabilities_to_be_confirmed.count) .and not_change { vulnerability_findings_table.where(id: additional_vulnerabilities_to_be_confirmed.pluck(:finding_id)) .pluck(:uuid) diff --git a/spec/migrations/20251020182838_queue_restore_incorrect_vulnerability_states_spec.rb b/spec/migrations/20251020182838_queue_restore_incorrect_vulnerability_states_spec.rb deleted file mode 100644 index 44ffe126e1e775..00000000000000 --- a/spec/migrations/20251020182838_queue_restore_incorrect_vulnerability_states_spec.rb +++ /dev/null @@ -1,27 +0,0 @@ -# frozen_string_literal: true - -require 'spec_helper' -require_migration! - -RSpec.describe QueueRestoreIncorrectVulnerabilityStates, migration: :gitlab_sec, feature_category: :static_application_security_testing do - let!(:batched_migration) { described_class::MIGRATION } - - it 'schedules a new batched migration' do - reversible_migration do |migration| - migration.before -> { - expect(batched_migration).not_to have_scheduled_batched_migration - } - - migration.after -> { - expect(batched_migration).to have_scheduled_batched_migration( - gitlab_schema: :gitlab_sec, - table_name: :vulnerability_reads, - column_name: :vulnerability_id, - interval: described_class::DELAY_INTERVAL, - batch_size: described_class::BATCH_SIZE, - sub_batch_size: described_class::SUB_BATCH_SIZE - ) - } - end - end -end -- GitLab