diff --git a/db/docs/batched_background_migrations/restore_incorrect_vulnerability_states.yml b/db/docs/batched_background_migrations/restore_incorrect_vulnerability_states.yml new file mode 100644 index 0000000000000000000000000000000000000000..1824c8738d8da1897f588e113bd27c169644f0f4 --- /dev/null +++ b/db/docs/batched_background_migrations/restore_incorrect_vulnerability_states.yml @@ -0,0 +1,8 @@ +--- +migration_job_name: RestoreIncorrectVulnerabilityStates +description: Restores incorrect vulnerability states caused by a bug in GitLab Semgrep v6.7.0 +feature_category: static_application_security_testing +introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/211669 +milestone: '18.8' +queued_migration_version: +finalized_by: 00000000000000 # This migration was enqueued via rake task and can't be finalized. diff --git a/ee/lib/tasks/gitlab/vulnerabilities/restore_incorrect_vulnerability_states.rake b/ee/lib/tasks/gitlab/vulnerabilities/restore_incorrect_vulnerability_states.rake new file mode 100644 index 0000000000000000000000000000000000000000..da98cfaaab203fde83fe41564d5f395f9f20aa9e --- /dev/null +++ b/ee/lib/tasks/gitlab/vulnerabilities/restore_incorrect_vulnerability_states.rake @@ -0,0 +1,27 @@ +# frozen_string_literal: true + +namespace :gitlab do + namespace :vulnerabilities do + desc 'Restore vulnerability states affected by https://gitlab.com/gitlab-org/gitlab/-/issues/577229 for a namespace' + task :restore_incorrect_vulnerability_states_for_namespace, [:namespace_id] => :environment do |_, args| + Vulnerabilities::Rake::RestoreIncorrectVulnerabilityStates.new(namespace_id: args[:namespace_id]).execute + end + + desc 'Restore vulnerability states affected by https://gitlab.com/gitlab-org/gitlab/-/issues/577229 for a project' + task :restore_incorrect_vulnerability_states_for_project, [:project_id] => :environment do |_, args| + Vulnerabilities::Rake::RestoreIncorrectVulnerabilityStates.new(project_id: args[:project_id]).execute + end + + task 'restore_incorrect_vulnerability_states_for_namespace:revert', [:namespace_id] => :environment do |_, args| + Vulnerabilities::Rake::RestoreIncorrectVulnerabilityStates.new( + namespace_id: args[:namespace_id], revert: true + ).execute + end + + task 'restore_incorrect_vulnerability_states_for_project:revert', [:project_id] => :environment do |_, args| + Vulnerabilities::Rake::RestoreIncorrectVulnerabilityStates.new( + project_id: args[:project_id], revert: true + ).execute + end + end +end diff --git a/ee/lib/vulnerabilities/rake/restore_incorrect_vulnerability_states.rb b/ee/lib/vulnerabilities/rake/restore_incorrect_vulnerability_states.rb new file mode 100644 index 0000000000000000000000000000000000000000..2976e5aca80fee915fe0a7e3432fb798b18f9efa --- /dev/null +++ b/ee/lib/vulnerabilities/rake/restore_incorrect_vulnerability_states.rb @@ -0,0 +1,102 @@ +# frozen_string_literal: true + +module Vulnerabilities + module Rake + class RestoreIncorrectVulnerabilityStates + include Gitlab::Database::Migrations::BatchedBackgroundMigrationHelpers + + MIGRATION = 'RestoreIncorrectVulnerabilityStates' + INSTANCE_ARG = 'instance' + + def initialize(namespace_id: nil, project_id: nil, revert: false) + @namespace_id = namespace_id + @project_id = project_id + @revert = revert + end + + attr_reader :namespace_id, :project_id, :revert + + def execute + validate_args! + + Gitlab::Database::SharedModel.using_connection(connection) do + if revert + delete_migration + else + queue_migration + end + end + end + + def allowed_gitlab_schemas + [:gitlab_sec] + end + + private + + def validate_args! + abort "One of namespace_id or project_id must be provided" if namespace_id.nil? && project_id.nil? + + validate_namespace! if namespace_id.present? + validate_project! if project_id.present? + end + + def validate_project! + abort "Error: Expected project_id '#{project_id}' to be a number." unless /\d+/.match?(project_id) + + project = Project.find_by_id(project_id) + abort "Project:#{project_id} not found." if project.blank? + end + + def validate_namespace! + unless /(\d+|instance)/.match?(namespace_id) + abort "Error: Expected namespace_id '#{namespace_id}' to be a number.\nUse " \ + "`gitlab-rake 'gitlab:vulnerabilities:restore_incorrect_vulnerability_states_for_namespace[instance]'` " \ + "to perform an instance migration." + end + + return true if instance_migration? + + namespace = Namespace.find_by_id(namespace_id) + abort "Namespace:#{namespace_id} not found." if namespace.blank? + abort 'Namespace must be top-level.' if namespace.parent.present? + end + + def queue_migration + queue_batched_background_migration( + MIGRATION, + :vulnerability_reads, + :vulnerability_id, + job_args, + gitlab_schema: :gitlab_sec + ) + + puts "Enqueued background migration: #{MIGRATION}, job_args: #{job_args}" + end + + def delete_migration + delete_batched_background_migration(MIGRATION, :vulnerability_reads, :vulnerability_id, [job_args]) + + puts "Deleted background migration: #{MIGRATION}, job_args: #{job_args}" + end + + def job_args + return { namespace_id: namespace_id } if namespace_id.present? + + { project_id: project_id.to_i } + end + + def instance_migration? + namespace_id == INSTANCE_ARG + end + + def version + Time.now.utc.strftime("%Y%m%d%H%M%S") + end + + def connection + SecApplicationRecord.connection + end + end + end +end diff --git a/ee/spec/factories/ci/job_artifacts.rb b/ee/spec/factories/ci/job_artifacts.rb index 981800cf943c8cb73cf842273b0fe87ea3913064..b776dad2a1dff75d6de379107fa3f6113ddc04f8 100644 --- a/ee/spec/factories/ci/job_artifacts.rb +++ b/ee/spec/factories/ci/job_artifacts.rb @@ -121,6 +121,54 @@ end end + trait :sast_semgrep_multiple_vulnerabilities do + file_type { :sast } + file_format { :raw } + + after(:build) do |artifact, _| + artifact.file = fixture_file_upload( + Rails.root.join('ee/spec/fixtures/security_reports/master/gl-sast-report-semgrep-6.6.2-multiple-vulnerabilities.json'), + 'application/json' + ) + end + end + + trait :sast_semgrep_multiple_vulnerabilities_incorrect_primary_identifier do + file_type { :sast } + file_format { :raw } + + after(:build) do |artifact, _| + artifact.file = fixture_file_upload( + Rails.root.join('ee/spec/fixtures/security_reports/master/gl-sast-report-semgrep-6.7.0-multiple-vulnerabilities-incorrect-primary-identifier.json'), + 'application/json' + ) + end + end + + trait :sast_semgrep_multiple_vulnerabilities_correct_primary_identifier do + file_type { :sast } + file_format { :raw } + + after(:build) do |artifact, _| + artifact.file = fixture_file_upload( + Rails.root.join('ee/spec/fixtures/security_reports/master/gl-sast-report-semgrep-6.7.1-multiple-vulnerabilities-correct-primary-identifier.json'), + 'application/json' + ) + end + end + + trait :sast_semgrep_additional_vulnerabilities_correct_primary_identifier do + file_type { :sast } + file_format { :raw } + + after(:build) do |artifact, _| + artifact.file = fixture_file_upload( + Rails.root.join('ee/spec/fixtures/security_reports/master/gl-sast-report-semgrep-6.7.1-additional-vulnerabilities-correct-primary-identifier.json'), + 'application/json' + ) + end + end + trait :dast_with_evidence do file_type { :dast } file_format { :raw } diff --git a/ee/spec/fixtures/security_reports/master/gl-sast-report-semgrep-6.6.2-multiple-vulnerabilities.json b/ee/spec/fixtures/security_reports/master/gl-sast-report-semgrep-6.6.2-multiple-vulnerabilities.json new file mode 100644 index 0000000000000000000000000000000000000000..a2175b4e6e316dd3c7b36d4a73843c395e89d08d --- /dev/null +++ b/ee/spec/fixtures/security_reports/master/gl-sast-report-semgrep-6.6.2-multiple-vulnerabilities.json @@ -0,0 +1,677 @@ +{ + "version": "15.1.4", + "vulnerabilities": [ + { + "id": "6a8d97c532a32e7bb9e1d93b3300977b8a7e75f9ddcc5bac4edaa6cda3603833", + "category": "sast", + "name": "Deserialization of untrusted data. Vulnerability to be confirmed.", + "description": "The application was found using an unsafe version of `yaml` load which is vulnerable to\ndeserialization attacks. Deserialization attacks exploit the process of reading serialized\ndata and turning it back\ninto an object. By constructing malicious objects and serializing them, an adversary may\nattempt to:\n\n- Inject code that is executed upon object construction, which occurs during the\ndeserialization process.\n- Exploit mass assignment by including fields that are not normally a part of the serialized\ndata but are read in during deserialization.\n\nTo remediate this issue, use `safe_load()` or call `yaml.load()` with the `Loader` argument\nset to\n`yaml.SafeLoader`.\n\nExample loading YAML using `safe_load`:\n```\nimport yaml\n\n# Use safe_load to load data into an intermediary object\nintermediary_object = yaml.safe_load(\"\"\"user:\n name: 'test user'\"\"\"\n)\n# Create our real object, copying over only the necessary fields\nuser_object = {'user': {\n # Assign the deserialized data from intermediary object\n 'name': intermediary_object['user']['name'],\n # Add in protected data in object definition (or set it from a class constructor)\n 'is_admin': False,\n }\n}\n# Work with user_object\n# ...\n```\n\nFor more details on deserialization attacks in general, see OWASP's guide:\n- https://cheatsheetseries.owasp.org/cheatsheets/Deserialization_Cheat_Sheet.html\n", + "cve": "semgrep_id:bandit.B506:329:329", + "severity": "High", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "app/app.py", + "start_line": 329 + }, + "identifiers": [ + { + "type": "semgrep_id", + "name": "bandit.B506", + "value": "bandit.B506", + "url": "https://semgrep.dev/r/gitlab.bandit.B506" + }, + { + "type": "cwe", + "name": "CWE-502", + "value": "502", + "url": "https://cwe.mitre.org/data/definitions/502.html" + }, + { + "type": "owasp", + "name": "A08:2021 - Software and Data Integrity Failures", + "value": "A08:2021" + }, + { + "type": "owasp", + "name": "A8:2017 - Insecure Deserialization", + "value": "A8:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B506", + "value": "B506" + } + ] + }, + { + "id": "185f6aa5aece728c2b94f16ff36ea99339dbeb39a027964d65a0e544b439529d", + "category": "sast", + "name": "Vulnerbility with issue link. Vulnerability to be confirmed.", + "description": "SQL Injection is a critical vulnerability that can lead to data or system compromise.", + "cve": "semgrep_id:bandit.B608:265:265", + "severity": "High", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "app/app.py", + "start_line": 265 + }, + "identifiers": [ + { + "type": "semgrep_id", + "name": "bandit.B608", + "value": "bandit.B608", + "url": "https://semgrep.dev/r/gitlab.bandit.B608" + }, + { + "type": "cwe", + "name": "CWE-89", + "value": "89", + "url": "https://cwe.mitre.org/data/definitions/89.html" + }, + { + "type": "owasp", + "name": "A03:2021 - Injection", + "value": "A03:2021" + }, + { + "type": "owasp", + "name": "A1:2017 - Injection", + "value": "A1:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B608", + "value": "B608" + } + ] + }, + { + "id": "afb3a18f344a72ed01c842afd1939b4c33b150ba50234001d8eb34ce72a977f4", + "category": "sast", + "name": "Improper neutralization of directives in dynamically evaluated code ('Eval Injection'). Vulnerability to be confirmed.", + "description": "The application was found calling the `eval` function OR Function()\n constructor OR setTimeout() OR setInterval() methods. If the\n\n variables or strings or functions passed to these methods contains user-supplied input, an adversary could attempt to execute arbitrary\n\n JavaScript\n\n code. This could lead to a full system compromise in Node applications or Cross-site Scripting\n\n (XSS) in web applications.\n\n\n To remediate this issue, remove all calls to above methods and consider alternative methods for\n\n executing\n\n the necessary business logic. There is almost no safe method of calling `eval` or other above stated sinks with\n\n user-supplied input.\n\n Instead, consider alternative methods such as using property accessors to dynamically access\n\n values.\n\n\n Example using property accessors to dynamically access an object's property:\n\n ```\n\n // Define an object\n\n const obj = {key1: 'value1', key2: 'value2'};\n\n // Get key dynamically from user input\n\n const key = getUserInput();\n\n // Check if the key exists in our object and return it, or a default empty string\n\n const value = (obj.hasOwnProperty(key)) ? obj[key] : '';\n\n // Work with the value\n\n ```\n\n\n For more information on why not to use `eval`, and alternatives see:\n\n - https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/eval#never_use_eval!\n\n Other References:\n\n - https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Function/Function\n\n - https://developer.mozilla.org/en-US/docs/Web/API/setTimeout\n\n - https://developer.mozilla.org/en-US/docs/Web/API/setInterval\n", + "cve": "semgrep_id:eslint.detect-eval-with-expression:10:10", + "severity": "High", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "app/static/main.js", + "start_line": 10 + }, + "identifiers": [ + { + "type": "semgrep_id", + "name": "eslint.detect-eval-with-expression", + "value": "eslint.detect-eval-with-expression", + "url": "https://semgrep.dev/r/gitlab.eslint.detect-eval-with-expression" + }, + { + "type": "cwe", + "name": "CWE-95", + "value": "95", + "url": "https://cwe.mitre.org/data/definitions/95.html" + }, + { + "type": "owasp", + "name": "A03:2021 - Injection", + "value": "A03:2021" + }, + { + "type": "owasp", + "name": "A1:2017 - Injection", + "value": "A1:2017" + }, + { + "type": "eslint_rule_id", + "name": "ESLint rule ID/detect-eval-with-expression", + "value": "detect-eval-with-expression" + } + ] + }, + { + "id": "4e7633d40f31f6398b4c7ffc4bf481ba6fe627c34042d7439b71259e6ea9b32c", + "category": "sast", + "name": "Improper neutralization of special elements used in an SQL Command ('SQL Injection'). Vulnerability to be confirmed.", + "description": "Detected user input used to manually construct a SQL string. This is usually\nbad practice because manual construction could accidentally result in a SQL\ninjection. An attacker could use a SQL injection to steal or modify contents\nof the database. Instead, use a parameterized query which is available\nby default in most database engines. Alternatively, consider using an\nobject-relational mapper (ORM) such as SQLAlchemy which will protect your queries.\n\nSQL Injections are a critical type of vulnerability that can lead to data \nor system compromise. By dynamically generating SQL query strings, user \ninput may be able to influence the logic of an SQL statement. \nThis could lead to an malicious parties accessing information they should not \nhave access to, or in some circumstances, being able to execute OS functionality\nor code.\n\nReplace all dynamically generated SQL queries with parameterized queries. \nIn situations where dynamic queries must be created, never use direct user input,\nbut instead use a map or dictionary of valid values and resolve them using a user \nsupplied key.\n\nFor example, some database drivers do not allow parameterized queries for \n`>` or `<` comparison operators. In these cases, do not use a user supplied \n`>` or `<` value, but rather have the user supply a `gt` or `lt` value. \nThe alphabetical values are then used to look up the `>` and `<` values to be used \nin the construction of the dynamic query. The same goes for other queries where \ncolumn or table names are required but cannot be parameterized.\nData that is possible user-controlled from a python request is passed\nto `execute()` function. To remediate this issue, use SQLAlchemy statements\nwhich are built with query parameterization and therefore not vulnerable \nto sql injection.\n\nIf for some reason this is not feasible, ensure calls including user-supplied \ndata pass it in to the `params` parameter of the `execute()` method.\nBelow is an example using `execute()`, passing in user-supplied data as `params`. \nThis will treat the query as a parameterized query and `params` as strictly data, \npreventing any possibility of SQL Injection.\n\n```\nname = request.args.get('name')\nreq = text('SELECT * FROM student WHERE firstname = :x')\nresult = db.session.execute(req, {\"x\":name})\n```\nFor more information on QuerySets see:\n- https://docs.djangoproject.com/en/4.2/ref/models/querysets/#queryset-api\nFor more information on SQL Injections see OWASP:\n- https://cheatsheetseries.owasp.org/cheatsheets/SQL_Injection_Prevention_Cheat_Sheet.html\n", + "cve": "semgrep_id:python_flask_rule-flask-tainted-sql-string:261:261", + "severity": "High", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "app/app.py", + "start_line": 261 + }, + "identifiers": [ + { + "type": "semgrep_id", + "name": "python_flask_rule-flask-tainted-sql-string", + "value": "python_flask_rule-flask-tainted-sql-string" + }, + { + "type": "cwe", + "name": "CWE-89", + "value": "89", + "url": "https://cwe.mitre.org/data/definitions/89.html" + }, + { + "type": "owasp", + "name": "A03:2021 - Injection", + "value": "A03:2021" + }, + { + "type": "owasp", + "name": "A1:2017 - Injection", + "value": "A1:2017" + } + ] + }, + { + "id": "819fa95af305ebbf12f83f5cd85ce6b9720a22a112869bb6ef76bec8fe449d62", + "category": "sast", + "name": "Allocation of resources without limits or throttling", + "description": "The application was found using the `requests` module without configuring a timeout value for\nconnections. This could lead to uncontrolled resource consumption where the application could\nrun out of\nsocket descriptors, effectively causing a Denial of Service (DoS).\n\nTo remediate this issue, pass in a `timeout=` argument to each `requests` call.\n\nExample using a timeout for an HTTP GET request:\n```\n# Issue a GET request to https://example.com with a timeout of 10 seconds\nresponse = requests.get('https://example.com', timeout=10)\n# Work with the response object\n# ...\n```\n\nFor more information on using the requests module see:\n- https://requests.readthedocs.io/en/latest/api/\n", + "cve": "semgrep_id:bandit.B113:17:18", + "severity": "Critical", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "tests/e2e_zap.py", + "start_line": 17, + "end_line": 18 + }, + "identifiers": [ + { + "type": "semgrep_id", + "name": "bandit.B113", + "value": "bandit.B113", + "url": "https://semgrep.dev/r/gitlab.bandit.B113" + }, + { + "type": "cwe", + "name": "CWE-770", + "value": "770", + "url": "https://cwe.mitre.org/data/definitions/770.html" + }, + { + "type": "owasp", + "name": "A05:2021 - Security Misconfiguration", + "value": "A05:2021" + }, + { + "type": "owasp", + "name": "A6:2017 - Security Misconfiguration", + "value": "A6:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B113", + "value": "B113" + } + ] + }, + { + "id": "10ea0fe99f1cb7743ecc12fd2a83cb76853523f53e8f24f688daddd2d5687e32", + "category": "sast", + "name": "Allocation of resources without limits or throttling", + "description": "The application was found using the `requests` module without configuring a timeout value for\nconnections. This could lead to uncontrolled resource consumption where the application could\nrun out of\nsocket descriptors, effectively causing a Denial of Service (DoS).\n\nTo remediate this issue, pass in a `timeout=` argument to each `requests` call.\n\nExample using a timeout for an HTTP GET request:\n```\n# Issue a GET request to https://example.com with a timeout of 10 seconds\nresponse = requests.get('https://example.com', timeout=10)\n# Work with the response object\n# ...\n```\n\nFor more information on using the requests module see:\n- https://requests.readthedocs.io/en/latest/api/\n", + "cve": "semgrep_id:bandit.B113:28:29", + "severity": "Critical", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "tests/e2e_zap.py", + "start_line": 28, + "end_line": 29 + }, + "identifiers": [ + { + "type": "semgrep_id", + "name": "bandit.B113", + "value": "bandit.B113", + "url": "https://semgrep.dev/r/gitlab.bandit.B113" + }, + { + "type": "cwe", + "name": "CWE-770", + "value": "770", + "url": "https://cwe.mitre.org/data/definitions/770.html" + }, + { + "type": "owasp", + "name": "A05:2021 - Security Misconfiguration", + "value": "A05:2021" + }, + { + "type": "owasp", + "name": "A6:2017 - Security Misconfiguration", + "value": "A6:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B113", + "value": "B113" + } + ] + }, + { + "id": "61c61d9440d5a9c4b76ca89d7a6146b50dfdce4d5ec3e93d42fe255c67bf4684", + "category": "sast", + "name": "Allocation of resources without limits or throttling", + "description": "The application was found using the `requests` module without configuring a timeout value for\nconnections. This could lead to uncontrolled resource consumption where the application could\nrun out of\nsocket descriptors, effectively causing a Denial of Service (DoS).\n\nTo remediate this issue, pass in a `timeout=` argument to each `requests` call.\n\nExample using a timeout for an HTTP GET request:\n```\n# Issue a GET request to https://example.com with a timeout of 10 seconds\nresponse = requests.get('https://example.com', timeout=10)\n# Work with the response object\n# ...\n```\n\nFor more information on using the requests module see:\n- https://requests.readthedocs.io/en/latest/api/\n", + "cve": "semgrep_id:bandit.B113:36:37", + "severity": "Critical", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "tests/e2e_zap.py", + "start_line": 36, + "end_line": 37 + }, + "identifiers": [ + { + "type": "semgrep_id", + "name": "bandit.B113", + "value": "bandit.B113", + "url": "https://semgrep.dev/r/gitlab.bandit.B113" + }, + { + "type": "cwe", + "name": "CWE-770", + "value": "770", + "url": "https://cwe.mitre.org/data/definitions/770.html" + }, + { + "type": "owasp", + "name": "A05:2021 - Security Misconfiguration", + "value": "A05:2021" + }, + { + "type": "owasp", + "name": "A6:2017 - Security Misconfiguration", + "value": "A6:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B113", + "value": "B113" + } + ] + }, + { + "id": "7fef73eeb450ba731ada304710b14f4ca65790c4d571ebad2af3ee5191e5b42f", + "category": "sast", + "name": "Allocation of resources without limits or throttling", + "description": "The application was found using the `requests` module without configuring a timeout value for\nconnections. This could lead to uncontrolled resource consumption where the application could\nrun out of\nsocket descriptors, effectively causing a Denial of Service (DoS).\n\nTo remediate this issue, pass in a `timeout=` argument to each `requests` call.\n\nExample using a timeout for an HTTP GET request:\n```\n# Issue a GET request to https://example.com with a timeout of 10 seconds\nresponse = requests.get('https://example.com', timeout=10)\n# Work with the response object\n# ...\n```\n\nFor more information on using the requests module see:\n- https://requests.readthedocs.io/en/latest/api/\n", + "cve": "semgrep_id:bandit.B113:44:45", + "severity": "Critical", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "tests/e2e_zap.py", + "start_line": 44, + "end_line": 45 + }, + "identifiers": [ + { + "type": "semgrep_id", + "name": "bandit.B113", + "value": "bandit.B113", + "url": "https://semgrep.dev/r/gitlab.bandit.B113" + }, + { + "type": "cwe", + "name": "CWE-770", + "value": "770", + "url": "https://cwe.mitre.org/data/definitions/770.html" + }, + { + "type": "owasp", + "name": "A05:2021 - Security Misconfiguration", + "value": "A05:2021" + }, + { + "type": "owasp", + "name": "A6:2017 - Security Misconfiguration", + "value": "A6:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B113", + "value": "B113" + } + ] + }, + { + "id": "e81f87450a35ed038550bfe4f56dcff5bebd9c5ca5f309b6144de063cb99e1b2", + "category": "sast", + "name": "Use of a broken or risky cryptographic algorithm. Vulnerability to be resolved.", + "description": "The application was found using an insecure or risky digest or signature algorithm. MD2, MD4,\n MD5 and SHA1 hash algorithms have been found to be vulnerable to producing collisions.\n\nThis means\nthat two different values, when hashed, can lead to the same hash value. If the application is\ntrying\nto use these hash methods for storing passwords, then it is recommended to switch to a\npassword hashing\nalgorithm such as Argon2id or PBKDF2.\n\nNote that the `Crypto` and `Cryptodome` Python packages are no longer recommended for\nnew applications, instead consider using the [cryptography](https://cryptography.io/) package.\n\nExample of creating a SHA-384 hash using the `cryptography` package:\n```\nfrom cryptography.hazmat.primitives import hashes\n# Create a SHA384 digest\ndigest = hashes.Hash(hashes.SHA384())\n# Update the digest with some initial data\ndigest.update(b\"some data to hash\")\n# Add more data to the digest\ndigest.update(b\"some more data\")\n# Finalize the digest as bytes\nresult = digest.finalize()\n```\n\nFor more information on secure password storage see OWASP:\n- https://cheatsheetseries.owasp.org/cheatsheets/Password_Storage_Cheat_Sheet.html\n\nFor more information on the cryptography module see:\n- https://cryptography.io/en/latest/\n", + "cve": "semgrep_id:bandit.B303-1:141:141", + "severity": "Medium", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "app/app.py", + "start_line": 141 + }, + "identifiers": [ + { + "type": "semgrep_id", + "name": "bandit.B303-1", + "value": "bandit.B303-1", + "url": "https://semgrep.dev/r/gitlab.bandit.B303-1" + }, + { + "type": "cwe", + "name": "CWE-327", + "value": "327", + "url": "https://cwe.mitre.org/data/definitions/327.html" + }, + { + "type": "owasp", + "name": "A02:2021 - Cryptographic Failures", + "value": "A02:2021" + }, + { + "type": "owasp", + "name": "A3:2017 - Sensitive Data Exposure", + "value": "A3:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B303", + "value": "B303" + } + ] + }, + { + "id": "3f8a15b8ea5a1e062262c837c4b5c763320c40622f50183f04fa2e584fc05e13", + "category": "sast", + "name": "Improper certificate validation. Vulnerability to be resolved.", + "description": "The application was found using the `requests` module without configuring a timeout value for\nconnections. The `verify=False` argument has been set, which effectively disables the\nvalidation\nof server certificates.\n\nThis allows for an adversary who is in between the application and the target host to intercept\npotentially sensitive information or transmit malicious data.\n\nTo remediate this issue either remove the `verify=False` argument, or set `verify=True`to each\n`requests` call.\n\nExample verifying server certificates for an HTTP GET request:\n```\n# Issue a GET request to https://example.com with a timeout of 10 seconds and verify the\n# server certificate explicitly.\nresponse = requests.get('https://example.com', timeout=10, verify=True)\n# Work with the response object\n# ...\n```\n\nFor more information on using the requests module see:\n- https://requests.readthedocs.io/en/latest/api/\n", + "cve": "semgrep_id:bandit.B501:17:18", + "severity": "Medium", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "tests/e2e_zap.py", + "start_line": 17, + "end_line": 18 + }, + "identifiers": [ + { + "type": "semgrep_id", + "name": "bandit.B501", + "value": "bandit.B501", + "url": "https://semgrep.dev/r/gitlab.bandit.B501" + }, + { + "type": "cwe", + "name": "CWE-295", + "value": "295", + "url": "https://cwe.mitre.org/data/definitions/295.html" + }, + { + "type": "owasp", + "name": "A07:2021 - Identification and Authentication Failures", + "value": "A07:2021" + }, + { + "type": "owasp", + "name": "A2:2017 - Broken Authentication", + "value": "A2:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B501", + "value": "B501" + } + ] + }, + { + "id": "8b6a98da4410a8abe0a3338ec5db34f4a9a48d0716ba296dcda0e93b63a5766f", + "category": "sast", + "name": "Improper certificate validation. Vulnerability to be resolved.", + "description": "The application was found using the `requests` module without configuring a timeout value for\nconnections. The `verify=False` argument has been set, which effectively disables the\nvalidation\nof server certificates.\n\nThis allows for an adversary who is in between the application and the target host to intercept\npotentially sensitive information or transmit malicious data.\n\nTo remediate this issue either remove the `verify=False` argument, or set `verify=True`to each\n`requests` call.\n\nExample verifying server certificates for an HTTP GET request:\n```\n# Issue a GET request to https://example.com with a timeout of 10 seconds and verify the\n# server certificate explicitly.\nresponse = requests.get('https://example.com', timeout=10, verify=True)\n# Work with the response object\n# ...\n```\n\nFor more information on using the requests module see:\n- https://requests.readthedocs.io/en/latest/api/\n", + "cve": "semgrep_id:bandit.B501:28:29", + "severity": "Medium", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "tests/e2e_zap.py", + "start_line": 28, + "end_line": 29 + }, + "identifiers": [ + { + "type": "semgrep_id", + "name": "bandit.B501", + "value": "bandit.B501", + "url": "https://semgrep.dev/r/gitlab.bandit.B501" + }, + { + "type": "cwe", + "name": "CWE-295", + "value": "295", + "url": "https://cwe.mitre.org/data/definitions/295.html" + }, + { + "type": "owasp", + "name": "A07:2021 - Identification and Authentication Failures", + "value": "A07:2021" + }, + { + "type": "owasp", + "name": "A2:2017 - Broken Authentication", + "value": "A2:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B501", + "value": "B501" + } + ] + }, + { + "id": "3b65f8017d6b3a73a5f6e7d1c0e9e78aa0daf817f06234985a9d011da1a9d804", + "category": "sast", + "name": "Improper certificate validation. Vulnerability to be resolved.", + "description": "The application was found using the `requests` module without configuring a timeout value for\nconnections. The `verify=False` argument has been set, which effectively disables the\nvalidation\nof server certificates.\n\nThis allows for an adversary who is in between the application and the target host to intercept\npotentially sensitive information or transmit malicious data.\n\nTo remediate this issue either remove the `verify=False` argument, or set `verify=True`to each\n`requests` call.\n\nExample verifying server certificates for an HTTP GET request:\n```\n# Issue a GET request to https://example.com with a timeout of 10 seconds and verify the\n# server certificate explicitly.\nresponse = requests.get('https://example.com', timeout=10, verify=True)\n# Work with the response object\n# ...\n```\n\nFor more information on using the requests module see:\n- https://requests.readthedocs.io/en/latest/api/\n", + "cve": "semgrep_id:bandit.B501:36:37", + "severity": "Medium", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "tests/e2e_zap.py", + "start_line": 36, + "end_line": 37 + }, + "identifiers": [ + { + "type": "semgrep_id", + "name": "bandit.B501", + "value": "bandit.B501", + "url": "https://semgrep.dev/r/gitlab.bandit.B501" + }, + { + "type": "cwe", + "name": "CWE-295", + "value": "295", + "url": "https://cwe.mitre.org/data/definitions/295.html" + }, + { + "type": "owasp", + "name": "A07:2021 - Identification and Authentication Failures", + "value": "A07:2021" + }, + { + "type": "owasp", + "name": "A2:2017 - Broken Authentication", + "value": "A2:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B501", + "value": "B501" + } + ] + }, + { + "id": "512131f12839cd51c58aaabf643870dc262bf169f0af15a47d0d073fcfd449ac", + "category": "sast", + "name": "Use of cryptographically weak pseudo-random number generator (PRNG). Vulnerability to be dismissed.", + "description": "Depending on the context, generating weak random numbers may expose cryptographic functions,\nwhich rely on these numbers, to be exploitable. When generating numbers for sensitive values\nsuch as tokens, nonces, and cryptographic keys, it is recommended that the `secrets` module\nbe used instead.\n\nExample using the secrets module:\n```\nimport secrets\n\n# Generate a secure random 64 byte array\nrandom_bytes = secrets.token_bytes(64)\nprint(random_bytes)\n\n# Generate a secure random 64 byte array as a hex string\nrandom_bytes_hex = secrets.token_hex(64)\n\n# Generate a secure random 64 byte array base64 encoded for use in URLs\nrandom_string = secrets.token_urlsafe(64)\n```\n\nFor more information on the `secrets` module see:\n- https://docs.python.org/3/library/secrets.html\n", + "cve": "semgrep_id:bandit.B311:295:295", + "severity": "Low", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "app/app.py", + "start_line": 295 + }, + "identifiers": [ + { + "type": "semgrep_id", + "name": "bandit.B311", + "value": "bandit.B311", + "url": "https://semgrep.dev/r/gitlab.bandit.B311" + }, + { + "type": "cwe", + "name": "CWE-338", + "value": "338", + "url": "https://cwe.mitre.org/data/definitions/338.html" + }, + { + "type": "owasp", + "name": "A02:2021 - Cryptographic Failures", + "value": "A02:2021" + }, + { + "type": "owasp", + "name": "A3:2017 - Sensitive Data Exposure", + "value": "A3:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B311", + "value": "B311" + } + ] + }, + { + "id": "6cf069d55d47c54f5b2363af43f3c7a2d71ef25e04751111b6566fe89b90c8aa", + "category": "sast", + "name": "Use of cryptographically weak pseudo-random number generator (PRNG). Vulnerability to be dismissed.", + "description": "Depending on the context, generating weak random numbers may expose cryptographic functions", + "cve": "semgrep_id:bandit.B311:319:319", + "severity": "Low", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "app/app.py", + "start_line": 319 + }, + "identifiers": [ + { + "type": "semgrep_id", + "name": "bandit.B311", + "value": "bandit.B311", + "url": "https://semgrep.dev/r/gitlab.bandit.B311" + }, + { + "type": "cwe", + "name": "CWE-338", + "value": "338", + "url": "https://cwe.mitre.org/data/definitions/338.html" + }, + { + "type": "owasp", + "name": "A02:2021 - Cryptographic Failures", + "value": "A02:2021" + }, + { + "type": "owasp", + "name": "A3:2017 - Sensitive Data Exposure", + "value": "A3:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B311", + "value": "B311" + } + ] + } + ], + "scan": { + "analyzer": { + "id": "semgrep", + "name": "Semgrep", + "url": "https://gitlab.com/gitlab-org/security-products/analyzers/semgrep", + "vendor": { + "name": "GitLab" + }, + "version": "6.6.2" + }, + "scanner": { + "id": "semgrep", + "name": "Semgrep", + "url": "https://github.com/returntocorp/semgrep", + "vendor": { + "name": "GitLab" + }, + "version": "1.118.0" + }, + "type": "sast", + "start_time": "2025-09-29T21:06:41", + "end_time": "2025-09-29T21:06:48", + "status": "success", + "observability": { + "events": [ + { + "event": "collect_sast_scan_metrics_from_pipeline", + "property": "5c418ec4-3b29-4631-bbbc-61e76f3f2396", + "label": "semgrep", + "value": 0, + "version": "6.6.2", + "exit_code": 0, + "override_count": 0, + "passthrough_count": 0, + "custom_exclude_path_count": 0, + "time_s": 6, + "file_count": 4 + } + ] + } + } +} diff --git a/ee/spec/fixtures/security_reports/master/gl-sast-report-semgrep-6.7.0-multiple-vulnerabilities-incorrect-primary-identifier.json b/ee/spec/fixtures/security_reports/master/gl-sast-report-semgrep-6.7.0-multiple-vulnerabilities-incorrect-primary-identifier.json new file mode 100644 index 0000000000000000000000000000000000000000..f3ad6be203367bbac1ea739a577772b0584932ff --- /dev/null +++ b/ee/spec/fixtures/security_reports/master/gl-sast-report-semgrep-6.7.0-multiple-vulnerabilities-incorrect-primary-identifier.json @@ -0,0 +1,677 @@ +{ + "version": "15.2.2", + "vulnerabilities": [ + { + "id": "ab0e702014cb7fcd3b9bcbed45425c3cfd65a237fbe8b51fd676ac675060bba9", + "category": "sast", + "name": "Deserialization of untrusted data. Vulnerability to be confirmed.", + "description": "The application was found using an unsafe version of `yaml` load which is vulnerable to\ndeserialization attacks. Deserialization attacks exploit the process of reading serialized\ndata and turning it back\ninto an object. By constructing malicious objects and serializing them, an adversary may\nattempt to:\n\n- Inject code that is executed upon object construction, which occurs during the\ndeserialization process.\n- Exploit mass assignment by including fields that are not normally a part of the serialized\ndata but are read in during deserialization.\n\nTo remediate this issue, use `safe_load()` or call `yaml.load()` with the `Loader` argument\nset to\n`yaml.SafeLoader`.\n\nExample loading YAML using `safe_load`:\n```\nimport yaml\n\n# Use safe_load to load data into an intermediary object\nintermediary_object = yaml.safe_load(\"\"\"user:\n name: 'test user'\"\"\"\n)\n# Create our real object, copying over only the necessary fields\nuser_object = {'user': {\n # Assign the deserialized data from intermediary object\n 'name': intermediary_object['user']['name'],\n # Add in protected data in object definition (or set it from a class constructor)\n 'is_admin': False,\n }\n}\n# Work with user_object\n# ...\n```\n\nFor more details on deserialization attacks in general, see OWASP's guide:\n- https://cheatsheetseries.owasp.org/cheatsheets/Deserialization_Cheat_Sheet.html\n", + "cve": "semgrep_id:bandit.B506:329:329", + "severity": "High", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "app/app.py", + "start_line": 329 + }, + "identifiers": [ + { + "type": "cwe", + "name": "CWE-502", + "value": "502", + "url": "https://cwe.mitre.org/data/definitions/502.html" + }, + { + "type": "owasp", + "name": "A08:2021 - Software and Data Integrity Failures", + "value": "A08:2021" + }, + { + "type": "owasp", + "name": "A8:2017 - Insecure Deserialization", + "value": "A8:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B506", + "value": "B506" + }, + { + "type": "semgrep_id", + "name": "bandit.B506", + "value": "bandit.B506", + "url": "https://semgrep.dev/r/gitlab.bandit.B506" + } + ] + }, + { + "id": "4cc9c82ff0d985defd2801e1be40f784c149f8a70f5c1325c5d1979b13771bc1", + "category": "sast", + "name": "Vulnerbility with issue link. Vulnerability to be confirmed.", + "description": "SQL Injection is a critical vulnerability that can lead to data or system compromise.", + "cve": "semgrep_id:bandit.B608:265:265", + "severity": "High", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "app/app.py", + "start_line": 265 + }, + "identifiers": [ + { + "type": "cwe", + "name": "CWE-89", + "value": "89", + "url": "https://cwe.mitre.org/data/definitions/89.html" + }, + { + "type": "owasp", + "name": "A03:2021 - Injection", + "value": "A03:2021" + }, + { + "type": "owasp", + "name": "A1:2017 - Injection", + "value": "A1:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B608", + "value": "B608" + }, + { + "type": "semgrep_id", + "name": "bandit.B608", + "value": "bandit.B608", + "url": "https://semgrep.dev/r/gitlab.bandit.B608" + } + ] + }, + { + "id": "cfe69fc86bf76db2922260ecebb865819886315c51309ff8d52d7eaf9d48e501", + "category": "sast", + "name": "Improper neutralization of directives in dynamically evaluated code ('Eval Injection'). Vulnerability to be confirmed.", + "description": "The application was found calling the `eval` function OR Function()\n constructor OR setTimeout() OR setInterval() methods. If the\n\n variables or strings or functions passed to these methods contains user-supplied input, an adversary could attempt to execute arbitrary\n\n JavaScript\n\n code. This could lead to a full system compromise in Node applications or Cross-site Scripting\n\n (XSS) in web applications.\n\n\n To remediate this issue, remove all calls to above methods and consider alternative methods for\n\n executing\n\n the necessary business logic. There is almost no safe method of calling `eval` or other above stated sinks with\n\n user-supplied input.\n\n Instead, consider alternative methods such as using property accessors to dynamically access\n\n values.\n\n\n Example using property accessors to dynamically access an object's property:\n\n ```\n\n // Define an object\n\n const obj = {key1: 'value1', key2: 'value2'};\n\n // Get key dynamically from user input\n\n const key = getUserInput();\n\n // Check if the key exists in our object and return it, or a default empty string\n\n const value = (obj.hasOwnProperty(key)) ? obj[key] : '';\n\n // Work with the value\n\n ```\n\n\n For more information on why not to use `eval`, and alternatives see:\n\n - https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/eval#never_use_eval!\n\n Other References:\n\n - https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Function/Function\n\n - https://developer.mozilla.org/en-US/docs/Web/API/setTimeout\n\n - https://developer.mozilla.org/en-US/docs/Web/API/setInterval\n", + "cve": "semgrep_id:eslint.detect-eval-with-expression:10:10", + "severity": "High", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "app/static/main.js", + "start_line": 10 + }, + "identifiers": [ + { + "type": "cwe", + "name": "CWE-95", + "value": "95", + "url": "https://cwe.mitre.org/data/definitions/95.html" + }, + { + "type": "owasp", + "name": "A03:2021 - Injection", + "value": "A03:2021" + }, + { + "type": "owasp", + "name": "A1:2017 - Injection", + "value": "A1:2017" + }, + { + "type": "eslint_rule_id", + "name": "ESLint rule ID/detect-eval-with-expression", + "value": "detect-eval-with-expression" + }, + { + "type": "semgrep_id", + "name": "eslint.detect-eval-with-expression", + "value": "eslint.detect-eval-with-expression", + "url": "https://semgrep.dev/r/gitlab.eslint.detect-eval-with-expression" + } + ] + }, + { + "id": "c30477f6181fd32ad367377123762e5b4f52cdd4e69951591fe704d5f35aeb86", + "category": "sast", + "name": "Improper neutralization of special elements used in an SQL Command ('SQL Injection'). Vulnerability to be confirmed.", + "description": "Detected user input used to manually construct a SQL string. This is usually\nbad practice because manual construction could accidentally result in a SQL\ninjection. An attacker could use a SQL injection to steal or modify contents\nof the database. Instead, use a parameterized query which is available\nby default in most database engines. Alternatively, consider using an\nobject-relational mapper (ORM) such as SQLAlchemy which will protect your queries.\n\nSQL Injections are a critical type of vulnerability that can lead to data \nor system compromise. By dynamically generating SQL query strings, user \ninput may be able to influence the logic of an SQL statement. \nThis could lead to an malicious parties accessing information they should not \nhave access to, or in some circumstances, being able to execute OS functionality\nor code.\n\nReplace all dynamically generated SQL queries with parameterized queries. \nIn situations where dynamic queries must be created, never use direct user input,\nbut instead use a map or dictionary of valid values and resolve them using a user \nsupplied key.\n\nFor example, some database drivers do not allow parameterized queries for \n`>` or `<` comparison operators. In these cases, do not use a user supplied \n`>` or `<` value, but rather have the user supply a `gt` or `lt` value. \nThe alphabetical values are then used to look up the `>` and `<` values to be used \nin the construction of the dynamic query. The same goes for other queries where \ncolumn or table names are required but cannot be parameterized.\nData that is possible user-controlled from a python request is passed\nto `execute()` function. To remediate this issue, use SQLAlchemy statements\nwhich are built with query parameterization and therefore not vulnerable \nto sql injection.\n\nIf for some reason this is not feasible, ensure calls including user-supplied \ndata pass it in to the `params` parameter of the `execute()` method.\nBelow is an example using `execute()`, passing in user-supplied data as `params`. \nThis will treat the query as a parameterized query and `params` as strictly data, \npreventing any possibility of SQL Injection.\n\n```\nname = request.args.get('name')\nreq = text('SELECT * FROM student WHERE firstname = :x')\nresult = db.session.execute(req, {\"x\":name})\n```\nFor more information on QuerySets see:\n- https://docs.djangoproject.com/en/4.2/ref/models/querysets/#queryset-api\nFor more information on SQL Injections see OWASP:\n- https://cheatsheetseries.owasp.org/cheatsheets/SQL_Injection_Prevention_Cheat_Sheet.html\n", + "cve": "semgrep_id:python_flask_rule-flask-tainted-sql-string:261:261", + "severity": "High", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "app/app.py", + "start_line": 261 + }, + "identifiers": [ + { + "type": "cwe", + "name": "CWE-89", + "value": "89", + "url": "https://cwe.mitre.org/data/definitions/89.html" + }, + { + "type": "owasp", + "name": "A03:2021 - Injection", + "value": "A03:2021" + }, + { + "type": "owasp", + "name": "A1:2017 - Injection", + "value": "A1:2017" + }, + { + "type": "semgrep_id", + "name": "python_flask_rule-flask-tainted-sql-string", + "value": "python_flask_rule-flask-tainted-sql-string" + } + ] + }, + { + "id": "ea8e06bf1648e96a1ff75b3ebe4a702b83367b6959dad867ccc026cd067b5834", + "category": "sast", + "name": "Allocation of resources without limits or throttling", + "description": "The application was found using the `requests` module without configuring a timeout value for\nconnections. This could lead to uncontrolled resource consumption where the application could\nrun out of\nsocket descriptors, effectively causing a Denial of Service (DoS).\n\nTo remediate this issue, pass in a `timeout=` argument to each `requests` call.\n\nExample using a timeout for an HTTP GET request:\n```\n# Issue a GET request to https://example.com with a timeout of 10 seconds\nresponse = requests.get('https://example.com', timeout=10)\n# Work with the response object\n# ...\n```\n\nFor more information on using the requests module see:\n- https://requests.readthedocs.io/en/latest/api/\n", + "cve": "semgrep_id:bandit.B113:17:18", + "severity": "Critical", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "tests/e2e_zap.py", + "start_line": 17, + "end_line": 18 + }, + "identifiers": [ + { + "type": "cwe", + "name": "CWE-770", + "value": "770", + "url": "https://cwe.mitre.org/data/definitions/770.html" + }, + { + "type": "owasp", + "name": "A05:2021 - Security Misconfiguration", + "value": "A05:2021" + }, + { + "type": "owasp", + "name": "A6:2017 - Security Misconfiguration", + "value": "A6:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B113", + "value": "B113" + }, + { + "type": "semgrep_id", + "name": "bandit.B113", + "value": "bandit.B113", + "url": "https://semgrep.dev/r/gitlab.bandit.B113" + } + ] + }, + { + "id": "e97ebd3a747951b1b7499a76dd141ca96bbbdd784604ec615fbd46c9cce75780", + "category": "sast", + "name": "Allocation of resources without limits or throttling", + "description": "The application was found using the `requests` module without configuring a timeout value for\nconnections. This could lead to uncontrolled resource consumption where the application could\nrun out of\nsocket descriptors, effectively causing a Denial of Service (DoS).\n\nTo remediate this issue, pass in a `timeout=` argument to each `requests` call.\n\nExample using a timeout for an HTTP GET request:\n```\n# Issue a GET request to https://example.com with a timeout of 10 seconds\nresponse = requests.get('https://example.com', timeout=10)\n# Work with the response object\n# ...\n```\n\nFor more information on using the requests module see:\n- https://requests.readthedocs.io/en/latest/api/\n", + "cve": "semgrep_id:bandit.B113:28:29", + "severity": "Critical", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "tests/e2e_zap.py", + "start_line": 28, + "end_line": 29 + }, + "identifiers": [ + { + "type": "cwe", + "name": "CWE-770", + "value": "770", + "url": "https://cwe.mitre.org/data/definitions/770.html" + }, + { + "type": "owasp", + "name": "A05:2021 - Security Misconfiguration", + "value": "A05:2021" + }, + { + "type": "owasp", + "name": "A6:2017 - Security Misconfiguration", + "value": "A6:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B113", + "value": "B113" + }, + { + "type": "semgrep_id", + "name": "bandit.B113", + "value": "bandit.B113", + "url": "https://semgrep.dev/r/gitlab.bandit.B113" + } + ] + }, + { + "id": "6c95ad34402b92a5f731e94d44be3c76afa4162a99d22d431b4a803fdc78b294", + "category": "sast", + "name": "Allocation of resources without limits or throttling", + "description": "The application was found using the `requests` module without configuring a timeout value for\nconnections. This could lead to uncontrolled resource consumption where the application could\nrun out of\nsocket descriptors, effectively causing a Denial of Service (DoS).\n\nTo remediate this issue, pass in a `timeout=` argument to each `requests` call.\n\nExample using a timeout for an HTTP GET request:\n```\n# Issue a GET request to https://example.com with a timeout of 10 seconds\nresponse = requests.get('https://example.com', timeout=10)\n# Work with the response object\n# ...\n```\n\nFor more information on using the requests module see:\n- https://requests.readthedocs.io/en/latest/api/\n", + "cve": "semgrep_id:bandit.B113:36:37", + "severity": "Critical", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "tests/e2e_zap.py", + "start_line": 36, + "end_line": 37 + }, + "identifiers": [ + { + "type": "cwe", + "name": "CWE-770", + "value": "770", + "url": "https://cwe.mitre.org/data/definitions/770.html" + }, + { + "type": "owasp", + "name": "A05:2021 - Security Misconfiguration", + "value": "A05:2021" + }, + { + "type": "owasp", + "name": "A6:2017 - Security Misconfiguration", + "value": "A6:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B113", + "value": "B113" + }, + { + "type": "semgrep_id", + "name": "bandit.B113", + "value": "bandit.B113", + "url": "https://semgrep.dev/r/gitlab.bandit.B113" + } + ] + }, + { + "id": "9f07ee2737e73db366bbe53b2e4fce15c13c325ab73d162703984509f9192153", + "category": "sast", + "name": "Allocation of resources without limits or throttling", + "description": "The application was found using the `requests` module without configuring a timeout value for\nconnections. This could lead to uncontrolled resource consumption where the application could\nrun out of\nsocket descriptors, effectively causing a Denial of Service (DoS).\n\nTo remediate this issue, pass in a `timeout=` argument to each `requests` call.\n\nExample using a timeout for an HTTP GET request:\n```\n# Issue a GET request to https://example.com with a timeout of 10 seconds\nresponse = requests.get('https://example.com', timeout=10)\n# Work with the response object\n# ...\n```\n\nFor more information on using the requests module see:\n- https://requests.readthedocs.io/en/latest/api/\n", + "cve": "semgrep_id:bandit.B113:44:45", + "severity": "Critical", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "tests/e2e_zap.py", + "start_line": 44, + "end_line": 45 + }, + "identifiers": [ + { + "type": "cwe", + "name": "CWE-770", + "value": "770", + "url": "https://cwe.mitre.org/data/definitions/770.html" + }, + { + "type": "owasp", + "name": "A05:2021 - Security Misconfiguration", + "value": "A05:2021" + }, + { + "type": "owasp", + "name": "A6:2017 - Security Misconfiguration", + "value": "A6:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B113", + "value": "B113" + }, + { + "type": "semgrep_id", + "name": "bandit.B113", + "value": "bandit.B113", + "url": "https://semgrep.dev/r/gitlab.bandit.B113" + } + ] + }, + { + "id": "9a277ce32137e458b38b1da405ed4515505b286beea2c29ade5ccb770b250555", + "category": "sast", + "name": "Use of a broken or risky cryptographic algorithm. Vulnerability to be resolved.", + "description": "The application was found using an insecure or risky digest or signature algorithm. MD2, MD4,\n MD5 and SHA1 hash algorithms have been found to be vulnerable to producing collisions.\n\nThis means\nthat two different values, when hashed, can lead to the same hash value. If the application is\ntrying\nto use these hash methods for storing passwords, then it is recommended to switch to a\npassword hashing\nalgorithm such as Argon2id or PBKDF2.\n\nNote that the `Crypto` and `Cryptodome` Python packages are no longer recommended for\nnew applications, instead consider using the [cryptography](https://cryptography.io/) package.\n\nExample of creating a SHA-384 hash using the `cryptography` package:\n```\nfrom cryptography.hazmat.primitives import hashes\n# Create a SHA384 digest\ndigest = hashes.Hash(hashes.SHA384())\n# Update the digest with some initial data\ndigest.update(b\"some data to hash\")\n# Add more data to the digest\ndigest.update(b\"some more data\")\n# Finalize the digest as bytes\nresult = digest.finalize()\n```\n\nFor more information on secure password storage see OWASP:\n- https://cheatsheetseries.owasp.org/cheatsheets/Password_Storage_Cheat_Sheet.html\n\nFor more information on the cryptography module see:\n- https://cryptography.io/en/latest/\n", + "cve": "semgrep_id:bandit.B303-1:141:141", + "severity": "Medium", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "app/app.py", + "start_line": 141 + }, + "identifiers": [ + { + "type": "cwe", + "name": "CWE-327", + "value": "327", + "url": "https://cwe.mitre.org/data/definitions/327.html" + }, + { + "type": "owasp", + "name": "A02:2021 - Cryptographic Failures", + "value": "A02:2021" + }, + { + "type": "owasp", + "name": "A3:2017 - Sensitive Data Exposure", + "value": "A3:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B303", + "value": "B303" + }, + { + "type": "semgrep_id", + "name": "bandit.B303-1", + "value": "bandit.B303-1", + "url": "https://semgrep.dev/r/gitlab.bandit.B303-1" + } + ] + }, + { + "id": "f01149fe2f18418cea98677f89a23274464d51abacf55e33c3dd5db227b35919", + "category": "sast", + "name": "Improper certificate validation. Vulnerability to be resolved.", + "description": "The application was found using the `requests` module without configuring a timeout value for\nconnections. The `verify=False` argument has been set, which effectively disables the\nvalidation\nof server certificates.\n\nThis allows for an adversary who is in between the application and the target host to intercept\npotentially sensitive information or transmit malicious data.\n\nTo remediate this issue either remove the `verify=False` argument, or set `verify=True`to each\n`requests` call.\n\nExample verifying server certificates for an HTTP GET request:\n```\n# Issue a GET request to https://example.com with a timeout of 10 seconds and verify the\n# server certificate explicitly.\nresponse = requests.get('https://example.com', timeout=10, verify=True)\n# Work with the response object\n# ...\n```\n\nFor more information on using the requests module see:\n- https://requests.readthedocs.io/en/latest/api/\n", + "cve": "semgrep_id:bandit.B501:17:18", + "severity": "Medium", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "tests/e2e_zap.py", + "start_line": 17, + "end_line": 18 + }, + "identifiers": [ + { + "type": "cwe", + "name": "CWE-295", + "value": "295", + "url": "https://cwe.mitre.org/data/definitions/295.html" + }, + { + "type": "owasp", + "name": "A07:2021 - Identification and Authentication Failures", + "value": "A07:2021" + }, + { + "type": "owasp", + "name": "A2:2017 - Broken Authentication", + "value": "A2:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B501", + "value": "B501" + }, + { + "type": "semgrep_id", + "name": "bandit.B501", + "value": "bandit.B501", + "url": "https://semgrep.dev/r/gitlab.bandit.B501" + } + ] + }, + { + "id": "6cdb150a93524217a68ea2a8f0540cae3065b7b62ad4be3f6e4b69089a4f6b0e", + "category": "sast", + "name": "Improper certificate validation. Vulnerability to be resolved.", + "description": "The application was found using the `requests` module without configuring a timeout value for\nconnections. The `verify=False` argument has been set, which effectively disables the\nvalidation\nof server certificates.\n\nThis allows for an adversary who is in between the application and the target host to intercept\npotentially sensitive information or transmit malicious data.\n\nTo remediate this issue either remove the `verify=False` argument, or set `verify=True`to each\n`requests` call.\n\nExample verifying server certificates for an HTTP GET request:\n```\n# Issue a GET request to https://example.com with a timeout of 10 seconds and verify the\n# server certificate explicitly.\nresponse = requests.get('https://example.com', timeout=10, verify=True)\n# Work with the response object\n# ...\n```\n\nFor more information on using the requests module see:\n- https://requests.readthedocs.io/en/latest/api/\n", + "cve": "semgrep_id:bandit.B501:28:29", + "severity": "Medium", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "tests/e2e_zap.py", + "start_line": 28, + "end_line": 29 + }, + "identifiers": [ + { + "type": "cwe", + "name": "CWE-295", + "value": "295", + "url": "https://cwe.mitre.org/data/definitions/295.html" + }, + { + "type": "owasp", + "name": "A07:2021 - Identification and Authentication Failures", + "value": "A07:2021" + }, + { + "type": "owasp", + "name": "A2:2017 - Broken Authentication", + "value": "A2:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B501", + "value": "B501" + }, + { + "type": "semgrep_id", + "name": "bandit.B501", + "value": "bandit.B501", + "url": "https://semgrep.dev/r/gitlab.bandit.B501" + } + ] + }, + { + "id": "c2ccb4205c8ee740b3459780633598fed6342aded59395213f3735d85c5d1f8f", + "category": "sast", + "name": "Improper certificate validation. Vulnerability to be resolved.", + "description": "The application was found using the `requests` module without configuring a timeout value for\nconnections. The `verify=False` argument has been set, which effectively disables the\nvalidation\nof server certificates.\n\nThis allows for an adversary who is in between the application and the target host to intercept\npotentially sensitive information or transmit malicious data.\n\nTo remediate this issue either remove the `verify=False` argument, or set `verify=True`to each\n`requests` call.\n\nExample verifying server certificates for an HTTP GET request:\n```\n# Issue a GET request to https://example.com with a timeout of 10 seconds and verify the\n# server certificate explicitly.\nresponse = requests.get('https://example.com', timeout=10, verify=True)\n# Work with the response object\n# ...\n```\n\nFor more information on using the requests module see:\n- https://requests.readthedocs.io/en/latest/api/\n", + "cve": "semgrep_id:bandit.B501:36:37", + "severity": "Medium", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "tests/e2e_zap.py", + "start_line": 36, + "end_line": 37 + }, + "identifiers": [ + { + "type": "cwe", + "name": "CWE-295", + "value": "295", + "url": "https://cwe.mitre.org/data/definitions/295.html" + }, + { + "type": "owasp", + "name": "A07:2021 - Identification and Authentication Failures", + "value": "A07:2021" + }, + { + "type": "owasp", + "name": "A2:2017 - Broken Authentication", + "value": "A2:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B501", + "value": "B501" + }, + { + "type": "semgrep_id", + "name": "bandit.B501", + "value": "bandit.B501", + "url": "https://semgrep.dev/r/gitlab.bandit.B501" + } + ] + }, + { + "id": "1c298e2f336e61d323cd8d407fd703a721c103b78eab94d7b07b44e87f563fb0", + "category": "sast", + "name": "Use of cryptographically weak pseudo-random number generator (PRNG). Vulnerability to be dismissed.", + "description": "Depending on the context, generating weak random numbers may expose cryptographic functions,\nwhich rely on these numbers, to be exploitable. When generating numbers for sensitive values\nsuch as tokens, nonces, and cryptographic keys, it is recommended that the `secrets` module\nbe used instead.\n\nExample using the secrets module:\n```\nimport secrets\n\n# Generate a secure random 64 byte array\nrandom_bytes = secrets.token_bytes(64)\nprint(random_bytes)\n\n# Generate a secure random 64 byte array as a hex string\nrandom_bytes_hex = secrets.token_hex(64)\n\n# Generate a secure random 64 byte array base64 encoded for use in URLs\nrandom_string = secrets.token_urlsafe(64)\n```\n\nFor more information on the `secrets` module see:\n- https://docs.python.org/3/library/secrets.html\n", + "cve": "semgrep_id:bandit.B311:295:295", + "severity": "Low", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "app/app.py", + "start_line": 295 + }, + "identifiers": [ + { + "type": "cwe", + "name": "CWE-338", + "value": "338", + "url": "https://cwe.mitre.org/data/definitions/338.html" + }, + { + "type": "owasp", + "name": "A02:2021 - Cryptographic Failures", + "value": "A02:2021" + }, + { + "type": "owasp", + "name": "A3:2017 - Sensitive Data Exposure", + "value": "A3:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B311", + "value": "B311" + }, + { + "type": "semgrep_id", + "name": "bandit.B311", + "value": "bandit.B311", + "url": "https://semgrep.dev/r/gitlab.bandit.B311" + } + ] + }, + { + "id": "c551792feebfb8e0cea54dd6af035f5d3fd71a5b1f8af102fb1de5a479405a01", + "category": "sast", + "name": "Use of cryptographically weak pseudo-random number generator (PRNG). Vulnerability to be dismissed.", + "description": "Depending on the context, generating weak random numbers may expose cryptographic functions", + "cve": "semgrep_id:bandit.B311:319:319", + "severity": "Low", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "app/app.py", + "start_line": 319 + }, + "identifiers": [ + { + "type": "cwe", + "name": "CWE-338", + "value": "338", + "url": "https://cwe.mitre.org/data/definitions/338.html" + }, + { + "type": "owasp", + "name": "A02:2021 - Cryptographic Failures", + "value": "A02:2021" + }, + { + "type": "owasp", + "name": "A3:2017 - Sensitive Data Exposure", + "value": "A3:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B311", + "value": "B311" + }, + { + "type": "semgrep_id", + "name": "bandit.B311", + "value": "bandit.B311", + "url": "https://semgrep.dev/r/gitlab.bandit.B311" + } + ] + } + ], + "scan": { + "analyzer": { + "id": "semgrep", + "name": "Semgrep", + "url": "https://gitlab.com/gitlab-org/security-products/analyzers/semgrep", + "vendor": { + "name": "GitLab" + }, + "version": "6.7.0" + }, + "scanner": { + "id": "semgrep", + "name": "Semgrep", + "url": "https://github.com/returntocorp/semgrep", + "vendor": { + "name": "GitLab" + }, + "version": "1.118.0" + }, + "type": "sast", + "start_time": "2025-11-07T18:27:55", + "end_time": "2025-11-07T18:28:02", + "status": "success", + "observability": { + "events": [ + { + "event": "collect_sast_scan_metrics_from_pipeline", + "property": "12d9d108-d5b4-47fb-87ea-1789284a5930", + "label": "semgrep", + "value": 0, + "version": "6.7.0", + "exit_code": 0, + "override_count": 0, + "passthrough_count": 0, + "custom_exclude_path_count": 0, + "time_s": 6, + "file_count": 4 + } + ] + } + } +} diff --git a/ee/spec/fixtures/security_reports/master/gl-sast-report-semgrep-6.7.1-additional-vulnerabilities-correct-primary-identifier.json b/ee/spec/fixtures/security_reports/master/gl-sast-report-semgrep-6.7.1-additional-vulnerabilities-correct-primary-identifier.json new file mode 100644 index 0000000000000000000000000000000000000000..1ac0093aef4fc54140bb4f2f8b2ba18959c0cfa3 --- /dev/null +++ b/ee/spec/fixtures/security_reports/master/gl-sast-report-semgrep-6.7.1-additional-vulnerabilities-correct-primary-identifier.json @@ -0,0 +1,137 @@ +{ + "version": "15.2.2", + "vulnerabilities": [ + { + "id": "878843d5b4edf0042e3066429a4cac5f66f8c7ad72b40056601fbb191fa13214", + "category": "sast", + "name": "Additional vulnerability 1", + "description": "The application was found using the `requests` module without configuring a timeout value.", + "cve": "semgrep_id:bandit.B501:44:45", + "severity": "Medium", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "tests/e2e_zap.py", + "start_line": 44, + "end_line": 45 + }, + "identifiers": [ + { + "type": "semgrep_id", + "name": "bandit.B501", + "value": "bandit.B501", + "url": "https://semgrep.dev/r/gitlab.bandit.B501" + }, + { + "type": "cwe", + "name": "CWE-295", + "value": "295", + "url": "https://cwe.mitre.org/data/definitions/295.html" + }, + { + "type": "owasp", + "name": "A07:2021 - Identification and Authentication Failures", + "value": "A07:2021" + }, + { + "type": "owasp", + "name": "A2:2017 - Broken Authentication", + "value": "A2:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B501", + "value": "B501" + } + ] + }, + { + "id": "6cac58319f88ad3a1cb16df9c1272049ea0f909fa5fc3f67508148fda3ce5e2c", + "category": "sast", + "name": "Additional vulnerability 2", + "description": "The `RegExp` constructor was called with a non-literal value.", + "cve": "semgrep_id:eslint.detect-non-literal-regexp:15:15", + "severity": "Medium", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "app/static/main.js", + "start_line": 15 + }, + "identifiers": [ + { + "type": "semgrep_id", + "name": "eslint.detect-non-literal-regexp", + "value": "eslint.detect-non-literal-regexp", + "url": "https://semgrep.dev/r/gitlab.eslint.detect-non-literal-regexp" + }, + { + "type": "cwe", + "name": "CWE-185", + "value": "185", + "url": "https://cwe.mitre.org/data/definitions/185.html" + }, + { + "type": "owasp", + "name": "A03:2021 - Injection", + "value": "A03:2021" + }, + { + "type": "owasp", + "name": "A1:2017 - Injection", + "value": "A1:2017" + }, + { + "type": "eslint_rule_id", + "name": "ESLint rule ID/detect-non-literal-regexp", + "value": "detect-non-literal-regexp" + } + ] + } + ], + "scan": { + "analyzer": { + "id": "semgrep", + "name": "Semgrep", + "url": "https://gitlab.com/gitlab-org/security-products/analyzers/semgrep", + "vendor": { + "name": "GitLab" + }, + "version": "6.7.1" + }, + "scanner": { + "id": "semgrep", + "name": "Semgrep", + "url": "https://github.com/returntocorp/semgrep", + "vendor": { + "name": "GitLab" + }, + "version": "1.118.0" + }, + "type": "sast", + "start_time": "2025-11-07T18:35:36", + "end_time": "2025-11-07T18:35:41", + "status": "success", + "observability": { + "events": [ + { + "event": "collect_sast_scan_metrics_from_pipeline", + "property": "fcdda559-730c-40ab-9aea-0356dbb429b6", + "label": "semgrep", + "value": 0, + "version": "6.7.1", + "exit_code": 2, + "override_count": 0, + "passthrough_count": 0, + "custom_exclude_path_count": 0, + "time_s": 5, + "file_count": 4 + } + ] + } + } +} diff --git a/ee/spec/fixtures/security_reports/master/gl-sast-report-semgrep-6.7.1-multiple-vulnerabilities-correct-primary-identifier.json b/ee/spec/fixtures/security_reports/master/gl-sast-report-semgrep-6.7.1-multiple-vulnerabilities-correct-primary-identifier.json new file mode 100644 index 0000000000000000000000000000000000000000..4180786039a98aff5c23502a9ee1925f814e2e5d --- /dev/null +++ b/ee/spec/fixtures/security_reports/master/gl-sast-report-semgrep-6.7.1-multiple-vulnerabilities-correct-primary-identifier.json @@ -0,0 +1,677 @@ +{ + "version": "15.2.2", + "vulnerabilities": [ + { + "id": "6a8d97c532a32e7bb9e1d93b3300977b8a7e75f9ddcc5bac4edaa6cda3603833", + "category": "sast", + "name": "Deserialization of untrusted data. Vulnerability to be confirmed.", + "description": "The application was found using an unsafe version of `yaml` load which is vulnerable to\ndeserialization attacks. Deserialization attacks exploit the process of reading serialized\ndata and turning it back\ninto an object. By constructing malicious objects and serializing them, an adversary may\nattempt to:\n\n- Inject code that is executed upon object construction, which occurs during the\ndeserialization process.\n- Exploit mass assignment by including fields that are not normally a part of the serialized\ndata but are read in during deserialization.\n\nTo remediate this issue, use `safe_load()` or call `yaml.load()` with the `Loader` argument\nset to\n`yaml.SafeLoader`.\n\nExample loading YAML using `safe_load`:\n```\nimport yaml\n\n# Use safe_load to load data into an intermediary object\nintermediary_object = yaml.safe_load(\"\"\"user:\n name: 'test user'\"\"\"\n)\n# Create our real object, copying over only the necessary fields\nuser_object = {'user': {\n # Assign the deserialized data from intermediary object\n 'name': intermediary_object['user']['name'],\n # Add in protected data in object definition (or set it from a class constructor)\n 'is_admin': False,\n }\n}\n# Work with user_object\n# ...\n```\n\nFor more details on deserialization attacks in general, see OWASP's guide:\n- https://cheatsheetseries.owasp.org/cheatsheets/Deserialization_Cheat_Sheet.html\n", + "cve": "semgrep_id:bandit.B506:329:329", + "severity": "High", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "app/app.py", + "start_line": 329 + }, + "identifiers": [ + { + "type": "semgrep_id", + "name": "bandit.B506", + "value": "bandit.B506", + "url": "https://semgrep.dev/r/gitlab.bandit.B506" + }, + { + "type": "cwe", + "name": "CWE-502", + "value": "502", + "url": "https://cwe.mitre.org/data/definitions/502.html" + }, + { + "type": "owasp", + "name": "A08:2021 - Software and Data Integrity Failures", + "value": "A08:2021" + }, + { + "type": "owasp", + "name": "A8:2017 - Insecure Deserialization", + "value": "A8:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B506", + "value": "B506" + } + ] + }, + { + "id": "185f6aa5aece728c2b94f16ff36ea99339dbeb39a027964d65a0e544b439529d", + "category": "sast", + "name": "Vulnerbility with issue link. Vulnerability to be confirmed.", + "description": "SQL Injection is a critical vulnerability that can lead to data or system compromise.", + "cve": "semgrep_id:bandit.B608:265:265", + "severity": "High", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "app/app.py", + "start_line": 265 + }, + "identifiers": [ + { + "type": "semgrep_id", + "name": "bandit.B608", + "value": "bandit.B608", + "url": "https://semgrep.dev/r/gitlab.bandit.B608" + }, + { + "type": "cwe", + "name": "CWE-89", + "value": "89", + "url": "https://cwe.mitre.org/data/definitions/89.html" + }, + { + "type": "owasp", + "name": "A03:2021 - Injection", + "value": "A03:2021" + }, + { + "type": "owasp", + "name": "A1:2017 - Injection", + "value": "A1:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B608", + "value": "B608" + } + ] + }, + { + "id": "afb3a18f344a72ed01c842afd1939b4c33b150ba50234001d8eb34ce72a977f4", + "category": "sast", + "name": "Improper neutralization of directives in dynamically evaluated code ('Eval Injection'). Vulnerability to be confirmed.", + "description": "The application was found calling the `eval` function OR Function()\n constructor OR setTimeout() OR setInterval() methods. If the\n\n variables or strings or functions passed to these methods contains user-supplied input, an adversary could attempt to execute arbitrary\n\n JavaScript\n\n code. This could lead to a full system compromise in Node applications or Cross-site Scripting\n\n (XSS) in web applications.\n\n\n To remediate this issue, remove all calls to above methods and consider alternative methods for\n\n executing\n\n the necessary business logic. There is almost no safe method of calling `eval` or other above stated sinks with\n\n user-supplied input.\n\n Instead, consider alternative methods such as using property accessors to dynamically access\n\n values.\n\n\n Example using property accessors to dynamically access an object's property:\n\n ```\n\n // Define an object\n\n const obj = {key1: 'value1', key2: 'value2'};\n\n // Get key dynamically from user input\n\n const key = getUserInput();\n\n // Check if the key exists in our object and return it, or a default empty string\n\n const value = (obj.hasOwnProperty(key)) ? obj[key] : '';\n\n // Work with the value\n\n ```\n\n\n For more information on why not to use `eval`, and alternatives see:\n\n - https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/eval#never_use_eval!\n\n Other References:\n\n - https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Function/Function\n\n - https://developer.mozilla.org/en-US/docs/Web/API/setTimeout\n\n - https://developer.mozilla.org/en-US/docs/Web/API/setInterval\n", + "cve": "semgrep_id:eslint.detect-eval-with-expression:10:10", + "severity": "High", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "app/static/main.js", + "start_line": 10 + }, + "identifiers": [ + { + "type": "semgrep_id", + "name": "eslint.detect-eval-with-expression", + "value": "eslint.detect-eval-with-expression", + "url": "https://semgrep.dev/r/gitlab.eslint.detect-eval-with-expression" + }, + { + "type": "cwe", + "name": "CWE-95", + "value": "95", + "url": "https://cwe.mitre.org/data/definitions/95.html" + }, + { + "type": "owasp", + "name": "A03:2021 - Injection", + "value": "A03:2021" + }, + { + "type": "owasp", + "name": "A1:2017 - Injection", + "value": "A1:2017" + }, + { + "type": "eslint_rule_id", + "name": "ESLint rule ID/detect-eval-with-expression", + "value": "detect-eval-with-expression" + } + ] + }, + { + "id": "4e7633d40f31f6398b4c7ffc4bf481ba6fe627c34042d7439b71259e6ea9b32c", + "category": "sast", + "name": "Improper neutralization of special elements used in an SQL Command ('SQL Injection'). Vulnerability to be confirmed.", + "description": "Detected user input used to manually construct a SQL string. This is usually\nbad practice because manual construction could accidentally result in a SQL\ninjection. An attacker could use a SQL injection to steal or modify contents\nof the database. Instead, use a parameterized query which is available\nby default in most database engines. Alternatively, consider using an\nobject-relational mapper (ORM) such as SQLAlchemy which will protect your queries.\n\nSQL Injections are a critical type of vulnerability that can lead to data \nor system compromise. By dynamically generating SQL query strings, user \ninput may be able to influence the logic of an SQL statement. \nThis could lead to an malicious parties accessing information they should not \nhave access to, or in some circumstances, being able to execute OS functionality\nor code.\n\nReplace all dynamically generated SQL queries with parameterized queries. \nIn situations where dynamic queries must be created, never use direct user input,\nbut instead use a map or dictionary of valid values and resolve them using a user \nsupplied key.\n\nFor example, some database drivers do not allow parameterized queries for \n`>` or `<` comparison operators. In these cases, do not use a user supplied \n`>` or `<` value, but rather have the user supply a `gt` or `lt` value. \nThe alphabetical values are then used to look up the `>` and `<` values to be used \nin the construction of the dynamic query. The same goes for other queries where \ncolumn or table names are required but cannot be parameterized.\nData that is possible user-controlled from a python request is passed\nto `execute()` function. To remediate this issue, use SQLAlchemy statements\nwhich are built with query parameterization and therefore not vulnerable \nto sql injection.\n\nIf for some reason this is not feasible, ensure calls including user-supplied \ndata pass it in to the `params` parameter of the `execute()` method.\nBelow is an example using `execute()`, passing in user-supplied data as `params`. \nThis will treat the query as a parameterized query and `params` as strictly data, \npreventing any possibility of SQL Injection.\n\n```\nname = request.args.get('name')\nreq = text('SELECT * FROM student WHERE firstname = :x')\nresult = db.session.execute(req, {\"x\":name})\n```\nFor more information on QuerySets see:\n- https://docs.djangoproject.com/en/4.2/ref/models/querysets/#queryset-api\nFor more information on SQL Injections see OWASP:\n- https://cheatsheetseries.owasp.org/cheatsheets/SQL_Injection_Prevention_Cheat_Sheet.html\n", + "cve": "semgrep_id:python_flask_rule-flask-tainted-sql-string:261:261", + "severity": "High", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "app/app.py", + "start_line": 261 + }, + "identifiers": [ + { + "type": "semgrep_id", + "name": "python_flask_rule-flask-tainted-sql-string", + "value": "python_flask_rule-flask-tainted-sql-string" + }, + { + "type": "cwe", + "name": "CWE-89", + "value": "89", + "url": "https://cwe.mitre.org/data/definitions/89.html" + }, + { + "type": "owasp", + "name": "A03:2021 - Injection", + "value": "A03:2021" + }, + { + "type": "owasp", + "name": "A1:2017 - Injection", + "value": "A1:2017" + } + ] + }, + { + "id": "819fa95af305ebbf12f83f5cd85ce6b9720a22a112869bb6ef76bec8fe449d62", + "category": "sast", + "name": "Allocation of resources without limits or throttling", + "description": "The application was found using the `requests` module without configuring a timeout value for\nconnections. This could lead to uncontrolled resource consumption where the application could\nrun out of\nsocket descriptors, effectively causing a Denial of Service (DoS).\n\nTo remediate this issue, pass in a `timeout=` argument to each `requests` call.\n\nExample using a timeout for an HTTP GET request:\n```\n# Issue a GET request to https://example.com with a timeout of 10 seconds\nresponse = requests.get('https://example.com', timeout=10)\n# Work with the response object\n# ...\n```\n\nFor more information on using the requests module see:\n- https://requests.readthedocs.io/en/latest/api/\n", + "cve": "semgrep_id:bandit.B113:17:18", + "severity": "Critical", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "tests/e2e_zap.py", + "start_line": 17, + "end_line": 18 + }, + "identifiers": [ + { + "type": "semgrep_id", + "name": "bandit.B113", + "value": "bandit.B113", + "url": "https://semgrep.dev/r/gitlab.bandit.B113" + }, + { + "type": "cwe", + "name": "CWE-770", + "value": "770", + "url": "https://cwe.mitre.org/data/definitions/770.html" + }, + { + "type": "owasp", + "name": "A05:2021 - Security Misconfiguration", + "value": "A05:2021" + }, + { + "type": "owasp", + "name": "A6:2017 - Security Misconfiguration", + "value": "A6:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B113", + "value": "B113" + } + ] + }, + { + "id": "10ea0fe99f1cb7743ecc12fd2a83cb76853523f53e8f24f688daddd2d5687e32", + "category": "sast", + "name": "Allocation of resources without limits or throttling", + "description": "The application was found using the `requests` module without configuring a timeout value for\nconnections. This could lead to uncontrolled resource consumption where the application could\nrun out of\nsocket descriptors, effectively causing a Denial of Service (DoS).\n\nTo remediate this issue, pass in a `timeout=` argument to each `requests` call.\n\nExample using a timeout for an HTTP GET request:\n```\n# Issue a GET request to https://example.com with a timeout of 10 seconds\nresponse = requests.get('https://example.com', timeout=10)\n# Work with the response object\n# ...\n```\n\nFor more information on using the requests module see:\n- https://requests.readthedocs.io/en/latest/api/\n", + "cve": "semgrep_id:bandit.B113:28:29", + "severity": "Critical", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "tests/e2e_zap.py", + "start_line": 28, + "end_line": 29 + }, + "identifiers": [ + { + "type": "semgrep_id", + "name": "bandit.B113", + "value": "bandit.B113", + "url": "https://semgrep.dev/r/gitlab.bandit.B113" + }, + { + "type": "cwe", + "name": "CWE-770", + "value": "770", + "url": "https://cwe.mitre.org/data/definitions/770.html" + }, + { + "type": "owasp", + "name": "A05:2021 - Security Misconfiguration", + "value": "A05:2021" + }, + { + "type": "owasp", + "name": "A6:2017 - Security Misconfiguration", + "value": "A6:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B113", + "value": "B113" + } + ] + }, + { + "id": "61c61d9440d5a9c4b76ca89d7a6146b50dfdce4d5ec3e93d42fe255c67bf4684", + "category": "sast", + "name": "Allocation of resources without limits or throttling", + "description": "The application was found using the `requests` module without configuring a timeout value for\nconnections. This could lead to uncontrolled resource consumption where the application could\nrun out of\nsocket descriptors, effectively causing a Denial of Service (DoS).\n\nTo remediate this issue, pass in a `timeout=` argument to each `requests` call.\n\nExample using a timeout for an HTTP GET request:\n```\n# Issue a GET request to https://example.com with a timeout of 10 seconds\nresponse = requests.get('https://example.com', timeout=10)\n# Work with the response object\n# ...\n```\n\nFor more information on using the requests module see:\n- https://requests.readthedocs.io/en/latest/api/\n", + "cve": "semgrep_id:bandit.B113:36:37", + "severity": "Critical", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "tests/e2e_zap.py", + "start_line": 36, + "end_line": 37 + }, + "identifiers": [ + { + "type": "semgrep_id", + "name": "bandit.B113", + "value": "bandit.B113", + "url": "https://semgrep.dev/r/gitlab.bandit.B113" + }, + { + "type": "cwe", + "name": "CWE-770", + "value": "770", + "url": "https://cwe.mitre.org/data/definitions/770.html" + }, + { + "type": "owasp", + "name": "A05:2021 - Security Misconfiguration", + "value": "A05:2021" + }, + { + "type": "owasp", + "name": "A6:2017 - Security Misconfiguration", + "value": "A6:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B113", + "value": "B113" + } + ] + }, + { + "id": "7fef73eeb450ba731ada304710b14f4ca65790c4d571ebad2af3ee5191e5b42f", + "category": "sast", + "name": "Allocation of resources without limits or throttling", + "description": "The application was found using the `requests` module without configuring a timeout value for\nconnections. This could lead to uncontrolled resource consumption where the application could\nrun out of\nsocket descriptors, effectively causing a Denial of Service (DoS).\n\nTo remediate this issue, pass in a `timeout=` argument to each `requests` call.\n\nExample using a timeout for an HTTP GET request:\n```\n# Issue a GET request to https://example.com with a timeout of 10 seconds\nresponse = requests.get('https://example.com', timeout=10)\n# Work with the response object\n# ...\n```\n\nFor more information on using the requests module see:\n- https://requests.readthedocs.io/en/latest/api/\n", + "cve": "semgrep_id:bandit.B113:44:45", + "severity": "Critical", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "tests/e2e_zap.py", + "start_line": 44, + "end_line": 45 + }, + "identifiers": [ + { + "type": "semgrep_id", + "name": "bandit.B113", + "value": "bandit.B113", + "url": "https://semgrep.dev/r/gitlab.bandit.B113" + }, + { + "type": "cwe", + "name": "CWE-770", + "value": "770", + "url": "https://cwe.mitre.org/data/definitions/770.html" + }, + { + "type": "owasp", + "name": "A05:2021 - Security Misconfiguration", + "value": "A05:2021" + }, + { + "type": "owasp", + "name": "A6:2017 - Security Misconfiguration", + "value": "A6:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B113", + "value": "B113" + } + ] + }, + { + "id": "e81f87450a35ed038550bfe4f56dcff5bebd9c5ca5f309b6144de063cb99e1b2", + "category": "sast", + "name": "Use of a broken or risky cryptographic algorithm. Vulnerability to be resolved.", + "description": "The application was found using an insecure or risky digest or signature algorithm. MD2, MD4,\n MD5 and SHA1 hash algorithms have been found to be vulnerable to producing collisions.\n\nThis means\nthat two different values, when hashed, can lead to the same hash value. If the application is\ntrying\nto use these hash methods for storing passwords, then it is recommended to switch to a\npassword hashing\nalgorithm such as Argon2id or PBKDF2.\n\nNote that the `Crypto` and `Cryptodome` Python packages are no longer recommended for\nnew applications, instead consider using the [cryptography](https://cryptography.io/) package.\n\nExample of creating a SHA-384 hash using the `cryptography` package:\n```\nfrom cryptography.hazmat.primitives import hashes\n# Create a SHA384 digest\ndigest = hashes.Hash(hashes.SHA384())\n# Update the digest with some initial data\ndigest.update(b\"some data to hash\")\n# Add more data to the digest\ndigest.update(b\"some more data\")\n# Finalize the digest as bytes\nresult = digest.finalize()\n```\n\nFor more information on secure password storage see OWASP:\n- https://cheatsheetseries.owasp.org/cheatsheets/Password_Storage_Cheat_Sheet.html\n\nFor more information on the cryptography module see:\n- https://cryptography.io/en/latest/\n", + "cve": "semgrep_id:bandit.B303-1:141:141", + "severity": "Medium", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "app/app.py", + "start_line": 141 + }, + "identifiers": [ + { + "type": "semgrep_id", + "name": "bandit.B303-1", + "value": "bandit.B303-1", + "url": "https://semgrep.dev/r/gitlab.bandit.B303-1" + }, + { + "type": "cwe", + "name": "CWE-327", + "value": "327", + "url": "https://cwe.mitre.org/data/definitions/327.html" + }, + { + "type": "owasp", + "name": "A02:2021 - Cryptographic Failures", + "value": "A02:2021" + }, + { + "type": "owasp", + "name": "A3:2017 - Sensitive Data Exposure", + "value": "A3:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B303", + "value": "B303" + } + ] + }, + { + "id": "3f8a15b8ea5a1e062262c837c4b5c763320c40622f50183f04fa2e584fc05e13", + "category": "sast", + "name": "Improper certificate validation. Vulnerability to be resolved.", + "description": "The application was found using the `requests` module without configuring a timeout value for\nconnections. The `verify=False` argument has been set, which effectively disables the\nvalidation\nof server certificates.\n\nThis allows for an adversary who is in between the application and the target host to intercept\npotentially sensitive information or transmit malicious data.\n\nTo remediate this issue either remove the `verify=False` argument, or set `verify=True`to each\n`requests` call.\n\nExample verifying server certificates for an HTTP GET request:\n```\n# Issue a GET request to https://example.com with a timeout of 10 seconds and verify the\n# server certificate explicitly.\nresponse = requests.get('https://example.com', timeout=10, verify=True)\n# Work with the response object\n# ...\n```\n\nFor more information on using the requests module see:\n- https://requests.readthedocs.io/en/latest/api/\n", + "cve": "semgrep_id:bandit.B501:17:18", + "severity": "Medium", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "tests/e2e_zap.py", + "start_line": 17, + "end_line": 18 + }, + "identifiers": [ + { + "type": "semgrep_id", + "name": "bandit.B501", + "value": "bandit.B501", + "url": "https://semgrep.dev/r/gitlab.bandit.B501" + }, + { + "type": "cwe", + "name": "CWE-295", + "value": "295", + "url": "https://cwe.mitre.org/data/definitions/295.html" + }, + { + "type": "owasp", + "name": "A07:2021 - Identification and Authentication Failures", + "value": "A07:2021" + }, + { + "type": "owasp", + "name": "A2:2017 - Broken Authentication", + "value": "A2:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B501", + "value": "B501" + } + ] + }, + { + "id": "8b6a98da4410a8abe0a3338ec5db34f4a9a48d0716ba296dcda0e93b63a5766f", + "category": "sast", + "name": "Improper certificate validation. Vulnerability to be resolved.", + "description": "The application was found using the `requests` module without configuring a timeout value for\nconnections. The `verify=False` argument has been set, which effectively disables the\nvalidation\nof server certificates.\n\nThis allows for an adversary who is in between the application and the target host to intercept\npotentially sensitive information or transmit malicious data.\n\nTo remediate this issue either remove the `verify=False` argument, or set `verify=True`to each\n`requests` call.\n\nExample verifying server certificates for an HTTP GET request:\n```\n# Issue a GET request to https://example.com with a timeout of 10 seconds and verify the\n# server certificate explicitly.\nresponse = requests.get('https://example.com', timeout=10, verify=True)\n# Work with the response object\n# ...\n```\n\nFor more information on using the requests module see:\n- https://requests.readthedocs.io/en/latest/api/\n", + "cve": "semgrep_id:bandit.B501:28:29", + "severity": "Medium", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "tests/e2e_zap.py", + "start_line": 28, + "end_line": 29 + }, + "identifiers": [ + { + "type": "semgrep_id", + "name": "bandit.B501", + "value": "bandit.B501", + "url": "https://semgrep.dev/r/gitlab.bandit.B501" + }, + { + "type": "cwe", + "name": "CWE-295", + "value": "295", + "url": "https://cwe.mitre.org/data/definitions/295.html" + }, + { + "type": "owasp", + "name": "A07:2021 - Identification and Authentication Failures", + "value": "A07:2021" + }, + { + "type": "owasp", + "name": "A2:2017 - Broken Authentication", + "value": "A2:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B501", + "value": "B501" + } + ] + }, + { + "id": "3b65f8017d6b3a73a5f6e7d1c0e9e78aa0daf817f06234985a9d011da1a9d804", + "category": "sast", + "name": "Improper certificate validation. Vulnerability to be resolved.", + "description": "The application was found using the `requests` module without configuring a timeout value for\nconnections. The `verify=False` argument has been set, which effectively disables the\nvalidation\nof server certificates.\n\nThis allows for an adversary who is in between the application and the target host to intercept\npotentially sensitive information or transmit malicious data.\n\nTo remediate this issue either remove the `verify=False` argument, or set `verify=True`to each\n`requests` call.\n\nExample verifying server certificates for an HTTP GET request:\n```\n# Issue a GET request to https://example.com with a timeout of 10 seconds and verify the\n# server certificate explicitly.\nresponse = requests.get('https://example.com', timeout=10, verify=True)\n# Work with the response object\n# ...\n```\n\nFor more information on using the requests module see:\n- https://requests.readthedocs.io/en/latest/api/\n", + "cve": "semgrep_id:bandit.B501:36:37", + "severity": "Medium", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "tests/e2e_zap.py", + "start_line": 36, + "end_line": 37 + }, + "identifiers": [ + { + "type": "semgrep_id", + "name": "bandit.B501", + "value": "bandit.B501", + "url": "https://semgrep.dev/r/gitlab.bandit.B501" + }, + { + "type": "cwe", + "name": "CWE-295", + "value": "295", + "url": "https://cwe.mitre.org/data/definitions/295.html" + }, + { + "type": "owasp", + "name": "A07:2021 - Identification and Authentication Failures", + "value": "A07:2021" + }, + { + "type": "owasp", + "name": "A2:2017 - Broken Authentication", + "value": "A2:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B501", + "value": "B501" + } + ] + }, + { + "id": "512131f12839cd51c58aaabf643870dc262bf169f0af15a47d0d073fcfd449ac", + "category": "sast", + "name": "Use of cryptographically weak pseudo-random number generator (PRNG). Vulnerability to be dismissed.", + "description": "Depending on the context, generating weak random numbers may expose cryptographic functions,\nwhich rely on these numbers, to be exploitable. When generating numbers for sensitive values\nsuch as tokens, nonces, and cryptographic keys, it is recommended that the `secrets` module\nbe used instead.\n\nExample using the secrets module:\n```\nimport secrets\n\n# Generate a secure random 64 byte array\nrandom_bytes = secrets.token_bytes(64)\nprint(random_bytes)\n\n# Generate a secure random 64 byte array as a hex string\nrandom_bytes_hex = secrets.token_hex(64)\n\n# Generate a secure random 64 byte array base64 encoded for use in URLs\nrandom_string = secrets.token_urlsafe(64)\n```\n\nFor more information on the `secrets` module see:\n- https://docs.python.org/3/library/secrets.html\n", + "cve": "semgrep_id:bandit.B311:295:295", + "severity": "Low", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "app/app.py", + "start_line": 295 + }, + "identifiers": [ + { + "type": "semgrep_id", + "name": "bandit.B311", + "value": "bandit.B311", + "url": "https://semgrep.dev/r/gitlab.bandit.B311" + }, + { + "type": "cwe", + "name": "CWE-338", + "value": "338", + "url": "https://cwe.mitre.org/data/definitions/338.html" + }, + { + "type": "owasp", + "name": "A02:2021 - Cryptographic Failures", + "value": "A02:2021" + }, + { + "type": "owasp", + "name": "A3:2017 - Sensitive Data Exposure", + "value": "A3:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B311", + "value": "B311" + } + ] + }, + { + "id": "6cf069d55d47c54f5b2363af43f3c7a2d71ef25e04751111b6566fe89b90c8aa", + "category": "sast", + "name": "Use of cryptographically weak pseudo-random number generator (PRNG). Vulnerability to be dismissed.", + "description": "Depending on the context, generating weak random numbers may expose cryptographic functions", + "cve": "semgrep_id:bandit.B311:319:319", + "severity": "Low", + "scanner": { + "id": "semgrep", + "name": "Semgrep" + }, + "location": { + "file": "app/app.py", + "start_line": 319 + }, + "identifiers": [ + { + "type": "semgrep_id", + "name": "bandit.B311", + "value": "bandit.B311", + "url": "https://semgrep.dev/r/gitlab.bandit.B311" + }, + { + "type": "cwe", + "name": "CWE-338", + "value": "338", + "url": "https://cwe.mitre.org/data/definitions/338.html" + }, + { + "type": "owasp", + "name": "A02:2021 - Cryptographic Failures", + "value": "A02:2021" + }, + { + "type": "owasp", + "name": "A3:2017 - Sensitive Data Exposure", + "value": "A3:2017" + }, + { + "type": "bandit_test_id", + "name": "Bandit Test ID B311", + "value": "B311" + } + ] + } + ], + "scan": { + "analyzer": { + "id": "semgrep", + "name": "Semgrep", + "url": "https://gitlab.com/gitlab-org/security-products/analyzers/semgrep", + "vendor": { + "name": "GitLab" + }, + "version": "6.7.1" + }, + "scanner": { + "id": "semgrep", + "name": "Semgrep", + "url": "https://github.com/returntocorp/semgrep", + "vendor": { + "name": "GitLab" + }, + "version": "1.118.0" + }, + "type": "sast", + "start_time": "2025-11-07T18:35:36", + "end_time": "2025-11-07T18:35:41", + "status": "success", + "observability": { + "events": [ + { + "event": "collect_sast_scan_metrics_from_pipeline", + "property": "fcdda559-730c-40ab-9aea-0356dbb429b6", + "label": "semgrep", + "value": 0, + "version": "6.7.1", + "exit_code": 2, + "override_count": 0, + "passthrough_count": 0, + "custom_exclude_path_count": 0, + "time_s": 5, + "file_count": 4 + } + ] + } + } +} diff --git a/ee/spec/lib/tasks/vulnerabilities/restore_incorrect_vulnerability_states_rake_spec.rb b/ee/spec/lib/tasks/vulnerabilities/restore_incorrect_vulnerability_states_rake_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..8b41ada8e75c8e7af419e54f74176ca9349011fb --- /dev/null +++ b/ee/spec/lib/tasks/vulnerabilities/restore_incorrect_vulnerability_states_rake_spec.rb @@ -0,0 +1,85 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe 'Restore vulnerability states rake task', feature_category: :vulnerability_management do + include RakeHelpers + + before_all do + Rake.application.rake_require 'ee/lib/tasks/gitlab/vulnerabilities/restore_incorrect_vulnerability_states', + [Rails.root.to_s] + Rake::Task.define_task(:environment) + end + + context 'when using a namespace' do + describe 'restore_incorrect_vulnerability_states_for_namespace' do + let(:args) { ['123456'] } + let(:expected_args) { { namespace_id: '123456' } } + + subject(:task) do + run_rake_task('gitlab:vulnerabilities:restore_incorrect_vulnerability_states_for_namespace', args) + end + + it 'calls rake service with args' do + expect_next_instance_of(Vulnerabilities::Rake::RestoreIncorrectVulnerabilityStates, expected_args) do |instance| + expect(instance).to receive(:execute) + end + + task + end + end + + describe 'restore_incorrect_vulnerability_states_for_namespace:revert' do + let(:args) { ['123456'] } + let(:expected_args) { { namespace_id: '123456', revert: true } } + + subject(:task) do + run_rake_task('gitlab:vulnerabilities:restore_incorrect_vulnerability_states_for_namespace:revert', args) + end + + it 'calls rake service with args' do + expect_next_instance_of(Vulnerabilities::Rake::RestoreIncorrectVulnerabilityStates, expected_args) do |instance| + expect(instance).to receive(:execute) + end + + task + end + end + end + + context 'when using a project_id' do + describe 'restore_incorrect_vulnerability_states_for_project' do + let(:args) { ['123456'] } + let(:expected_args) { { project_id: '123456' } } + + subject(:task) do + run_rake_task('gitlab:vulnerabilities:restore_incorrect_vulnerability_states_for_project', args) + end + + it 'calls rake service with args' do + expect_next_instance_of(Vulnerabilities::Rake::RestoreIncorrectVulnerabilityStates, expected_args) do |instance| + expect(instance).to receive(:execute) + end + + task + end + end + + describe 'restore_incorrect_vulnerability_states_for_project:revert' do + let(:args) { ['123456'] } + let(:expected_args) { { project_id: '123456', revert: true } } + + subject(:task) do + run_rake_task('gitlab:vulnerabilities:restore_incorrect_vulnerability_states_for_project:revert', args) + end + + it 'calls rake service with args' do + expect_next_instance_of(Vulnerabilities::Rake::RestoreIncorrectVulnerabilityStates, expected_args) do |instance| + expect(instance).to receive(:execute) + end + + task + end + end + end +end diff --git a/ee/spec/lib/vulnerabilities/rake/restore_incorrect_vulnerability_states_spec.rb b/ee/spec/lib/vulnerabilities/rake/restore_incorrect_vulnerability_states_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..bab843b2c0cb5697727d9cec9f983fab71f197c1 --- /dev/null +++ b/ee/spec/lib/vulnerabilities/rake/restore_incorrect_vulnerability_states_spec.rb @@ -0,0 +1,153 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Vulnerabilities::Rake::RestoreIncorrectVulnerabilityStates, feature_category: :vulnerability_management do + include MigrationsHelpers + + let(:args) { nil } + + describe 'execute' do + let(:batched_migration) { described_class::MIGRATION } + let(:connection) { SecApplicationRecord.connection } + + def up + described_class.new(**args).execute + end + + def down + described_class.new(**args, revert: true).execute + end + + context 'when migrating by namespace_id' do + let(:args) { { namespace_id: namespace_id } } + + context 'when performing an instance migration' do + let(:namespace_id) { 'instance' } + + it 'schedules migration' do + up + + Gitlab::Database::SharedModel.using_connection(connection) do + expect(batched_migration).to have_scheduled_batched_migration( + table_name: :vulnerability_reads, + column_name: :vulnerability_id, + gitlab_schema: :gitlab_sec, + job_arguments: [args] + ) + end + + down + + Gitlab::Database::SharedModel.using_connection(connection) do + expect(batched_migration).not_to have_scheduled_batched_migration + end + end + end + + context 'when migrating a namespace' do + let_it_be(:namespace) { create(:namespace) } + let_it_be(:namespace_id) { namespace.id.to_s } + + it 'schedules migration with parsed namespace_id' do + up + + Gitlab::Database::SharedModel.using_connection(connection) do + expect(batched_migration).to have_scheduled_batched_migration( + table_name: :vulnerability_reads, + column_name: :vulnerability_id, + gitlab_schema: :gitlab_sec, + job_arguments: [args] + ) + end + + down + + Gitlab::Database::SharedModel.using_connection(connection) do + expect(batched_migration).not_to have_scheduled_batched_migration + end + end + end + + describe 'validations' do + context 'when namespace_id is not a number' do + let(:namespace_id) { 'foo' } + + it 'prints error and exits' do + expect { up }.to raise_error(SystemExit) + .and output("Error: Expected namespace_id 'foo' to be a number.\n" \ + "Use `gitlab-rake 'gitlab:vulnerabilities:restore_incorrect_vulnerability_states_for_namespace" \ + "[instance]'` to perform an instance migration.\n").to_stderr + end + end + + context 'when namespace_id does not exist' do + let(:namespace_id) { non_existing_record_id.to_s } + + it 'prints error and exits' do + expect { up }.to raise_error(SystemExit) + .and output("Namespace:#{namespace_id} not found.\n").to_stderr + end + end + + context 'when namespace is a subgroup' do + let_it_be(:namespace) { create(:group, :nested) } + let_it_be(:namespace_id) { namespace.id.to_s } + + it 'prints error and exits' do + expect { up }.to raise_error(SystemExit) + .and output("Namespace must be top-level.\n").to_stderr + end + end + end + end + + context 'when migrating by project_id' do + let(:args) { { project_id: project_id } } + + context 'when migrating a project' do + let_it_be(:project) { create(:project) } + let_it_be(:project_id) { project.id.to_s } + + it 'schedules migration with parsed project_id' do + up + + Gitlab::Database::SharedModel.using_connection(connection) do + expect(batched_migration).to have_scheduled_batched_migration( + table_name: :vulnerability_reads, + column_name: :vulnerability_id, + gitlab_schema: :gitlab_sec, + job_arguments: [{ project_id: project_id.to_i }] + ) + end + + down + + Gitlab::Database::SharedModel.using_connection(connection) do + expect(batched_migration).not_to have_scheduled_batched_migration + end + end + end + + describe 'validations' do + context 'when project_id is not a number' do + let(:project_id) { 'foo' } + + it 'prints error and exits' do + expect { up }.to raise_error(SystemExit) + .and output("Error: Expected project_id 'foo' to be a number.\n").to_stderr + end + end + + context 'when project_id does not exist' do + let(:project_id) { non_existing_record_id.to_s } + + it 'prints error and exits' do + expect { up }.to raise_error(SystemExit) + .and output("Project:#{project_id} not found.\n").to_stderr + end + end + end + end + end +end diff --git a/lib/gitlab/background_migration/restore_incorrect_vulnerability_states.rb b/lib/gitlab/background_migration/restore_incorrect_vulnerability_states.rb new file mode 100644 index 0000000000000000000000000000000000000000..2d4fdbb06ba5542193583bb2e2927a0ff8db2ada --- /dev/null +++ b/lib/gitlab/background_migration/restore_incorrect_vulnerability_states.rb @@ -0,0 +1,928 @@ +# frozen_string_literal: true + +# rubocop:disable Metrics/ClassLength -- we need to keep the logic in a single class +module Gitlab + module BackgroundMigration + class RestoreIncorrectVulnerabilityStates < BatchedMigrationJob + job_arguments :job_args + operation_name :restore_incorrect_vulnerability_states + feature_category :static_application_security_testing + + TRANSITION_COMMENT_TEMPLATE = '%{original_comment} (original comment automatically copied ' \ + 'from transition %{transition_id} to fix semgrep 6.7.0 bug)' + + SYSTEM_NOTE_TEMPLATE = 'changed vulnerability status from %{from_state} to %{to_state} with the ' \ + 'following comment: "%{original_comment}" (original comment automatically copied from transition ' \ + '%{transition_id} to fix semgrep 6.7.0 bug)' + + SUB_BATCH_SIZE = 100 + + # This migration fixes corrupted vulnerability data introduced by semgrep v6.7.0, + # released on 16 September 2025 at 20:30:02 UTC + module Migratable + module Enums + module Security + def self.scan_types = { sast: 1 } + end + + module Vulnerability + REPORT_TYPES = { sast: 0 }.with_indifferent_access.freeze + + VULNERABILITY_STATES = { + detected: 1, + dismissed: 2, + resolved: 3, + confirmed: 4 + }.with_indifferent_access.freeze + + SEVERITY_LEVELS = { + info: 1, + unknown: 2, + low: 4, + medium: 5, + high: 6, + critical: 7 + }.with_indifferent_access.freeze + + def self.vulnerability_states = VULNERABILITY_STATES + def self.report_types = REPORT_TYPES + def self.severity_levels = SEVERITY_LEVELS + end + end + + module Vulnerabilities + class Identifier + def self.sha1_fingerprint(identifier) + fingerprint_string = "#{identifier['type']}:#{identifier['value']}" + [Digest::SHA1.hexdigest(fingerprint_string)].pack('H*') # rubocop:disable Fips/SHA1 -- we must use SHA1, since this is how the fingerprint is stored in the DB + end + end + end + end + + def scoped_vulnerability_reads(vulnerability_reads) + relation = vulnerability_reads.where(report_type: Migratable::Enums::Vulnerability.report_types[:sast]) + project_id = job_args['project_id'] + namespace_id = job_args['namespace_id'] + + if project_id + log_info("Migrating affected vulnerabilities with project_id #{project_id}") + return relation.where(project_id: project_id) + end + + if namespace_id == 'instance' + log_info("Migrating affected vulnerabilities for entire instance") + return relation + end + + log_info("Migrating affected vulnerabilities with namespace_id #{namespace_id}") + relation + .where(vulnerability_reads.arel_table[:traversal_ids].gteq([namespace_id.to_i])) + .where(vulnerability_reads.arel_table[:traversal_ids].lt([namespace_id.to_i.next])) + end + + def perform + each_sub_batch do |vulnerability_reads_batch| + log_info("Processing sub-batch with #{vulnerability_reads_batch.count} records") + vulnerability_reads = scoped_vulnerability_reads(vulnerability_reads_batch) + + next if vulnerability_reads.blank? + + process_vulnerability_batch(vulnerability_reads_batch) + end + end + + def process_vulnerability_batch(vulnerability_reads_batch) + # find all vulnerabilities that are affected by the bug, and combine all related data + affected_vulnerability_data = build_affected_vulnerability_data(vulnerability_reads_batch) + + if affected_vulnerability_data.empty? + log_info("No vulnerabilities found for batch") + return + end + + collections = initialize_collections + collect_updates_for_vulnerabilities(affected_vulnerability_data, collections) + persist_all_updates(collections) + end + + def initialize_collections + { + state_transitions: [], + notes: [], + system_note_metadata: [], + vulnerabilities: [], + security_findings: [], + vulnerability_reads: [], + vulnerability_findings: [] + } + end + + def collect_updates_for_vulnerabilities(affected_vulnerability_data, collections) + current_time = Time.current + + affected_vulnerability_data.each_value do |affected_vuln_data| + # If semgrep v6.7.1 has been executed, then we'll have a duplicate vulnerability + if affected_vuln_data[:duplicate] + collect_state_restoration_data( + affected_vuln_data, + current_time, + collections[:state_transitions], + collections[:notes], + collections[:system_note_metadata], + collections[:vulnerabilities], + collections[:vulnerability_reads] + ) + else + # semgrep v6.7.1 has not been executed yet, existing vulnerability records contain + # corrupted primary identifier values + collect_full_correction_data( + affected_vuln_data, + current_time, + collections[:state_transitions], + collections[:notes], + collections[:system_note_metadata], + collections[:vulnerabilities], + collections[:vulnerability_reads], + collections[:security_findings], + collections[:vulnerability_findings] + ) + end + end + end + + def persist_all_updates(collections) + bulk_insert_state_transitions(collections[:state_transitions]) + inserted_note_ids = bulk_insert_notes(collections[:notes]) + bulk_insert_system_note_metadata(collections[:system_note_metadata], inserted_note_ids) + + bulk_update_vulnerabilities(collections[:vulnerabilities]) + bulk_update_security_findings(collections[:security_findings]) + bulk_update_vulnerability_reads(collections[:vulnerability_reads]) + bulk_update_vulnerability_findings(collections[:vulnerability_findings]) + end + + def build_affected_vulnerability_data(vulnerability_reads_batch) + affected_vulnerabilities = fetch_affected_vulnerability_data(vulnerability_reads_batch) + return {} if affected_vulnerabilities.empty? + + # fetch all related data + duplicate_data = fetch_duplicate_data(affected_vulnerabilities) + state_transitions = fetch_state_transitions(affected_vulnerabilities) + project_namespaces = fetch_project_namespaces(affected_vulnerabilities) + correct_identifiers = fetch_correct_primary_identifiers(affected_vulnerabilities, duplicate_data) + + # combine all related vulnerability data into a single object + affected_vulnerabilities.each_with_object({}) do |(vulnerability_id, affected_vulnerability), combined| + combined[vulnerability_id] = { + # original corrupted vulnerability data, which exists because semgrep 6.7.0 has been executed + corrupted: affected_vulnerability, + # duplicate vulnerability data, which only exists if semgrep 6.7.0 AND 6.7.1 have been executed + duplicate: duplicate_data[vulnerability_id], + # shared data + project_namespace_id: project_namespaces[affected_vulnerability[:project_id]], + latest_transition: state_transitions[vulnerability_id], + correct_identifier: correct_identifiers[vulnerability_id] + } + end + end + + # fetches the correct primary identifiers for the given affected_vulnerabilities which + # have an incorrect primary identifier. The correct primary identifier is determined + # by: + # + # 1. Parsing the raw_metadata from the vulnerability finding, then extracting the identifier + # from the raw_metadata where `type = semgrep_id` + # 2. Generating the fingerprint using the type and value from the identifier + # 3. Using the fingerprint to lookup the correct primary identifier from the database + def fetch_correct_primary_identifiers(affected_vulnerabilities, duplicate_data) + vulnerability_lookup = {} + + affected_vulnerabilities_by_project = group_vulnerabilities_by_project(affected_vulnerabilities) + + affected_vulnerabilities_by_project.each do |project_id, vulnerabilities_for_project| + project_identifiers = fetch_identifiers_for_project( + project_id, + vulnerabilities_for_project, + duplicate_data + ) + vulnerability_lookup.merge!(project_identifiers) + end + + vulnerability_lookup + end + + def group_vulnerabilities_by_project(affected_vulnerabilities) + grouped = affected_vulnerabilities.values.group_by { |av| av[:project_id] } + + log_info("Found #{grouped.length} distinct projects with vulnerabilities " \ + "out of #{affected_vulnerabilities.length} total vulnerabilities") + + grouped + end + + def fetch_identifiers_for_project(project_id, vulnerabilities, duplicate_data) + fingerprint_to_vuln = build_fingerprint_mapping(vulnerabilities, duplicate_data) + return {} if fingerprint_to_vuln.empty? + + identifiers = query_identifiers_by_fingerprint(project_id, fingerprint_to_vuln.keys) + map_identifiers_to_vulnerabilities(fingerprint_to_vuln, identifiers) + end + + def build_fingerprint_mapping(vulnerabilities, duplicate_data) + fingerprint_to_vuln = Hash.new { |hash, key| hash[key] = [] } + + vulnerabilities.each do |vulnerability| + vulnerability_id = vulnerability[:vulnerability_id] + # we don't need to find the correct primary identifier if we have a duplicate record, + # since that already contains the correct primary identifier + next if duplicate_data[vulnerability_id] + + fingerprint = extract_semgrep_fingerprint(vulnerability[:raw_metadata]) + fingerprint_to_vuln[fingerprint] << vulnerability_id + end + + fingerprint_to_vuln + end + + def extract_semgrep_fingerprint(raw_metadata) + parsed_metadata = Gitlab::Json.parse(raw_metadata) + semgrep_identifier = parsed_metadata["identifiers"].find { |id| id['type'] == 'semgrep_id' } + Migratable::Vulnerabilities::Identifier.sha1_fingerprint(semgrep_identifier).unpack1('H*') + end + + def query_identifiers_by_fingerprint(project_id, fingerprints) + fingerprints_sql = fingerprints.map { |fp| "'\\x#{fp}'::bytea" }.join(', ') + + sql = <<~SQL + SELECT id, project_id, fingerprint, external_type, external_id + FROM vulnerability_identifiers + WHERE project_id = #{project_id} + AND fingerprint IN (#{fingerprints_sql}) + SQL + + results = SecApplicationRecord.connection.execute(sql) + results.index_by { |row| row['fingerprint'] } + end + + def map_identifiers_to_vulnerabilities(fingerprint_to_vuln, identifiers) + vulnerability_lookup = {} + + fingerprint_to_vuln.each do |fingerprint, vulnerability_ids| + identifier = identifiers["\\x#{fingerprint}"] + vulnerability_ids.each do |vulnerability_id| + vulnerability_lookup[vulnerability_id] = identifier + end + end + + vulnerability_lookup + end + + def fetch_project_namespaces(affected_vulnerabilities) + project_ids_sql = affected_vulnerabilities.values.pluck(:project_id).uniq.join(', ') + + sql = <<-SQL + SELECT id as project_id, project_namespace_id + FROM projects + WHERE id IN (#{project_ids_sql}) + SQL + + results = ApplicationRecord.connection.execute(sql) + + results.each_with_object({}) do |row, hash| + hash[row['project_id'].to_i] = row['project_namespace_id'].to_i + end + end + + def fetch_state_transitions(affected_vulnerabilities) + vulnerability_ids_sql = affected_vulnerabilities.keys.join(', ') + + sql = <<-SQL + SELECT DISTINCT ON (vst.vulnerability_id) + vst.vulnerability_id, vst.id, vst.to_state, vst.from_state, vst.author_id, vst.comment, + vst.dismissal_reason, vst.project_id + FROM vulnerability_state_transitions vst + WHERE vst.vulnerability_id IN (#{vulnerability_ids_sql}) + AND vst.author_id IS NOT NULL + ORDER BY vst.vulnerability_id, vst.created_at DESC; + SQL + + results = SecApplicationRecord.connection.execute(sql) + + results.each_with_object({}) do |row, hash| + hash[row['vulnerability_id']] = { + id: row['id'].to_i, + to_state: row['to_state'].to_i, + from_state: row['from_state'].to_i, + author_id: row['author_id'].to_i, + comment: row['comment'], + dismissal_reason: row['dismissal_reason'], + project_id: row['project_id'].to_i + } + end + end + + # determine if there exists a _new_ duplicate vulnerability finding which matches + # all the same data as the _old_ vulnerability finding stored in affected_vulnerabilities. + # If this duplicate finding exists, it means that semgrep 6.7.1 has been executed, + # and the state of the new duplicate finding has been initialized to `detected`. We can + # use the state transitions from the old vulnerability finding to set the correct + # state for the new duplicate vulnerability finding. + def fetch_duplicate_data(affected_vulnerabilities) + search_params = build_search_params(affected_vulnerabilities) + return {} if search_params.empty? + + results = query_duplicate_vulnerabilities(search_params) + build_duplicate_data_hash(results) + end + + private + + def build_search_params(affected_vulnerabilities) + affected_vulnerabilities.map do |vulnerability_id, affected_vulnerability_data| + { + vulnerability_id: vulnerability_id, + exclude_finding_id: affected_vulnerability_data[:finding_id], + severity: affected_vulnerability_data[:severity], + report_type: affected_vulnerability_data[:report_type], + location_fingerprint: affected_vulnerability_data[:location_fingerprint], + scanner_id: affected_vulnerability_data[:scanner_id], + name: affected_vulnerability_data[:name], + metadata_version: affected_vulnerability_data[:metadata_version], + project_id: affected_vulnerability_data[:project_id] + } + end + end + + def query_duplicate_vulnerabilities(search_params) + conn = SecApplicationRecord.connection + values_sql = build_search_params_values_sql(search_params, conn) + + sql = build_duplicate_search_sql(values_sql) + conn.execute(sql) + end + + def build_search_params_values_sql(search_params, conn) + search_params.map do |p| + "(#{p[:vulnerability_id]}, #{p[:exclude_finding_id]}, #{p[:severity]}, #{p[:report_type]}, " \ + "#{p[:scanner_id]}, '#{p[:location_fingerprint]}'::bytea, #{conn.quote(p[:name])}, " \ + "#{conn.quote(p[:metadata_version])}, #{p[:project_id]})" + end.join(', ') + end + + def build_duplicate_search_sql(values_sql) + <<~SQL + WITH search_params AS ( + SELECT * FROM ( + VALUES #{values_sql} + ) AS t(vulnerability_id, exclude_finding_id, severity, report_type, scanner_id, location_fingerprint, name, metadata_version, project_id) + ) + SELECT DISTINCT ON (sp.vulnerability_id) + sp.vulnerability_id as original_vulnerability_id, + vo.id AS finding_id, + vo.raw_metadata, + vr_original.state AS original_state, + vr_duplicate.state AS duplicate_state, + vr_duplicate.vulnerability_id AS duplicate_vulnerability_id, + vr_original.project_id + FROM search_params sp + JOIN vulnerability_reads vr_candidate ON + vr_candidate.project_id = sp.project_id + AND vr_candidate.report_type = 0 + JOIN vulnerability_occurrences vo ON + vo.vulnerability_id = vr_candidate.vulnerability_id + AND vo.severity = sp.severity + AND vo.report_type = sp.report_type + AND vo.scanner_id = sp.scanner_id + AND vo.location_fingerprint = sp.location_fingerprint + AND vo.name = sp.name + AND vo.metadata_version = sp.metadata_version + AND vo.project_id = sp.project_id + AND vo.id != sp.exclude_finding_id + JOIN vulnerability_reads vr_original ON vr_original.vulnerability_id = sp.vulnerability_id + JOIN vulnerability_reads vr_duplicate ON vr_duplicate.vulnerability_id = vo.vulnerability_id + ORDER BY sp.vulnerability_id, vo.id + SQL + end + + def build_duplicate_data_hash(results) + results.each_with_object({}) do |row, hash| + hash[row['original_vulnerability_id'].to_i] = { + vulnerability_id: row['duplicate_vulnerability_id'].to_i, + project_id: row['project_id'].to_i, + finding_id: row['finding_id'].to_i, + raw_metadata: row['raw_metadata'], + original_state: row['original_state'].to_i, + state: row['duplicate_state'].to_i + } + end + end + + def fetch_affected_vulnerability_data(vulnerability_reads) + vulnerability_ids = vulnerability_reads.pluck(:vulnerability_id).join(', ') + sql = <<-SQL + SELECT + v.id as vulnerability_id, v.report_type, v.severity, v.project_id, v.state, + vo.id as finding_id, vo.location_fingerprint, vo.name, vo.metadata_version, vo.scanner_id, vo.raw_metadata, + sf.id as security_finding_id, sf.partition_number, sf.finding_data + FROM vulnerabilities v + INNER JOIN vulnerability_occurrences vo ON vo.vulnerability_id = v.id + INNER JOIN vulnerability_identifiers vi ON vi.id = vo.primary_identifier_id + INNER JOIN vulnerability_scanners vs ON vs.external_id = 'semgrep' and vs.project_id = v.project_id + INNER JOIN security_findings sf ON sf.uuid = vo.uuid + WHERE v.id IN (#{vulnerability_ids}) + AND vi.external_type IN ('cwe', 'owasp') + SQL + + results = SecApplicationRecord.connection.execute(sql) + + results.each_with_object({}) do |row, hash| + hash[row['vulnerability_id'].to_i] = build_affected_data_from_row(row) + end + end + + def build_affected_data_from_row(row) + { + vulnerability_id: row['vulnerability_id'], + project_id: row['project_id'], + state: row['state'], + severity: row['severity'], + report_type: row['report_type'], + finding_id: row['finding_id'], + location_fingerprint: row['location_fingerprint'], + name: row['name'], + metadata_version: row['metadata_version'], + scanner_id: row['scanner_id'], + raw_metadata: row['raw_metadata'], + security_finding_id: row['security_finding_id'], + partition_number: row['partition_number'], + finding_data: row['finding_data'] + } + end + + def reorder_metadata_with_correct_primary_id(raw_metadata) + metadata = Gitlab::Json.parse(raw_metadata) + identifiers = metadata["identifiers"] + semgrep_identifier = identifiers.find { |id| id["type"] == "semgrep_id" } + other_identifiers = identifiers.reject { |id| id["type"] == "semgrep_id" } + other_identifiers.sort_by! { |a| a["value"] } + metadata["identifiers"] = [semgrep_identifier] + other_identifiers + metadata + end + + def reorder_finding_data_with_correct_primary_id(finding_data) + parsed_finding_data = Gitlab::Json.parse(finding_data) + identifiers = parsed_finding_data["identifiers"] + semgrep_identifier = identifiers.find { |id| id["external_type"] == "semgrep_id" } + other_identifiers = identifiers.reject { |id| id["external_type"] == "semgrep_id" } + other_identifiers.sort_by! { |a| a["external_id"] } + parsed_finding_data["identifiers"] = [semgrep_identifier] + other_identifiers + parsed_finding_data + end + + # rubocop:disable Metrics/MethodLength, Metrics/ParameterLists -- Data collection for full updates + def collect_full_correction_data( + vuln_data, + current_time, + state_transitions_to_insert, + notes_to_insert, + system_note_metadata_to_insert, + vulnerabilities_to_update, + vulnerability_reads_to_update, + security_findings_to_update, + vulnerability_findings_to_update + ) + correct_identifier = vuln_data[:correct_identifier] + unless correct_identifier + log_warning("Missing correct identifier for vulnerability #{vuln_data[:vulnerability_id]}") + return + end + + corrupted_data = vuln_data[:corrupted] + + metadata_with_correct_id = reorder_metadata_with_correct_primary_id(corrupted_data[:raw_metadata]) + finding_data_with_correct_id = reorder_finding_data_with_correct_primary_id(corrupted_data[:finding_data]) + + correct_uuid = Gitlab::UUID.v5( + [ + Migratable::Enums::Vulnerability.report_types.key(corrupted_data[:report_type]), + correct_identifier['fingerprint'].gsub("\\x", ""), + corrupted_data[:location_fingerprint].gsub("\\x", ""), + corrupted_data[:project_id] + ].join("-") + ) + + security_findings_to_update << { + id: corrupted_data[:security_finding_id], + partition_number: corrupted_data[:partition_number], + uuid: correct_uuid, + finding_data: finding_data_with_correct_id + } + + vulnerability_reads_to_update << { + vulnerability_id: corrupted_data[:vulnerability_id], + uuid: correct_uuid, + dismissal_reason: nil + } + + vulnerability_findings_to_update << { + id: corrupted_data[:finding_id], + primary_identifier_id: correct_identifier['id'], + raw_metadata: metadata_with_correct_id.to_json, + uuid: correct_uuid + } + + collect_state_restoration_data( + vuln_data, + current_time, + state_transitions_to_insert, + notes_to_insert, + system_note_metadata_to_insert, + vulnerabilities_to_update, + nil + ) + end + # rubocop:enable Metrics/MethodLength, Metrics/ParameterLists + + def bulk_insert_state_transitions(data) + return if data.empty? + + log_info("Bulk inserting #{data.length} state transitions") + + conn = SecApplicationRecord.connection + + values_sql = data.map do |record| + dismissal_reason = record[:dismissal_reason] ? record[:dismissal_reason].to_s : 'NULL::smallint' + comment = record[:comment] ? conn.quote(record[:comment]) : 'NULL::text' + + "(#{record[:vulnerability_id]}, #{record[:from_state]}, #{record[:to_state]}, " \ + "#{record[:author_id]}, #{dismissal_reason}, #{record[:project_id]}, " \ + "#{conn.quote(record[:created_at])}::timestamptz, #{conn.quote(record[:updated_at])}::timestamptz, " \ + "#{comment})" + end.join(', ') + + sql = <<~SQL + INSERT INTO vulnerability_state_transitions + (vulnerability_id, from_state, to_state, author_id, dismissal_reason, project_id, created_at, updated_at, comment) + VALUES #{values_sql} + SQL + + conn.execute(sql) + end + + def bulk_insert_notes(data) + return {} if data.empty? + + log_info("Bulk inserting #{data.length} notes") + + conn = ApplicationRecord.connection + + values_sql = data.map do |record| + "(#{conn.quote(record[:note])}, #{conn.quote(record[:noteable_type])}, " \ + "#{record[:author_id]}, #{conn.quote(record[:created_at])}::timestamptz, " \ + "#{conn.quote(record[:updated_at])}::timestamptz, #{record[:project_id]}, " \ + "#{record[:noteable_id]}, #{record[:system]}, #{conn.quote(record[:discussion_id])}, " \ + "#{record[:namespace_id]})" + end.join(', ') + + sql = <<~SQL + INSERT INTO notes + (note, noteable_type, author_id, created_at, updated_at, project_id, + noteable_id, system, discussion_id, namespace_id) + VALUES #{values_sql} + RETURNING id, discussion_id + SQL + + result = conn.execute(sql) + + result.each_with_object({}) do |row, mapping| + mapping[row['discussion_id']] = row['id'].to_i + end + end + + def bulk_insert_system_note_metadata(data, inserted_note_ids) + return if data.empty? + + log_info("Bulk inserting #{data.length} system note metadata records") + + conn = ApplicationRecord.connection + + values_sql = data.map do |record| + note_id = inserted_note_ids[record[:note_data][:discussion_id]] + + "(#{note_id}, #{record[:namespace_id]}, #{conn.quote(record[:action])}, " \ + "#{conn.quote(record[:created_at])}::timestamptz, #{conn.quote(record[:updated_at])}::timestamptz)" + end.join(', ') + + sql = <<~SQL + INSERT INTO system_note_metadata + (note_id, namespace_id, action, created_at, updated_at) + VALUES #{values_sql} + SQL + + conn.execute(sql) + end + + # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/BlockLength -- UPDATE FROM VALUES requires building multiple column sets + def bulk_update_vulnerabilities(updates) + return if updates.empty? + + log_info("Bulk updating #{updates.length} vulnerabilities") + conn = ::SecApplicationRecord.connection + + updates.each_slice(SUB_BATCH_SIZE) do |batch| + values_sql = batch.map do |u| + confirmed_by = u[:confirmed_by_id] ? u[:confirmed_by_id].to_s : 'NULL::bigint' + confirmed_at = u[:confirmed_at] ? "#{conn.quote(u[:confirmed_at])}::timestamptz" : 'NULL::timestamptz' + resolved_by = u[:resolved_by_id] ? u[:resolved_by_id].to_s : 'NULL::bigint' + resolved_at = u[:resolved_at] ? "#{conn.quote(u[:resolved_at])}::timestamptz" : 'NULL::timestamptz' + dismissed_by = u[:dismissed_by_id] ? u[:dismissed_by_id].to_s : 'NULL::bigint' + dismissed_at = u[:dismissed_at] ? "#{conn.quote(u[:dismissed_at])}::timestamptz" : 'NULL::timestamptz' + + "(#{u[:id]}::bigint, #{u[:state]}::smallint, #{confirmed_by}, #{confirmed_at}, " \ + "#{resolved_by}, #{resolved_at}, #{dismissed_by}, #{dismissed_at})" + end.join(', ') + + sql = <<~SQL + UPDATE vulnerabilities AS v + SET + state = t.state, + confirmed_by_id = COALESCE(t.confirmed_by_id, v.confirmed_by_id), + confirmed_at = COALESCE(t.confirmed_at, v.confirmed_at), + resolved_by_id = COALESCE(t.resolved_by_id, v.resolved_by_id), + resolved_at = COALESCE(t.resolved_at, v.resolved_at), + dismissed_by_id = COALESCE(t.dismissed_by_id, v.dismissed_by_id), + dismissed_at = COALESCE(t.dismissed_at, v.dismissed_at) + FROM ( + VALUES #{values_sql} + ) AS t(id, state, confirmed_by_id, confirmed_at, resolved_by_id, resolved_at, dismissed_by_id, dismissed_at) + WHERE v.id = t.id + SQL + + conn.execute(sql) + end + end + # rubocop:enable Metrics/AbcSize, Metrics/MethodLength, Metrics/BlockLength + + def bulk_update_security_findings(data) + return if data.empty? + + log_info("Bulk updating #{data.length} security findings") + conn = ::SecApplicationRecord.connection + + data.each_slice(SUB_BATCH_SIZE) do |batch| + values_sql = batch.map do |r| + id = Array(r[:id]).first.to_i + partition_number = r[:partition_number].to_i + escaped_data = conn.quote(r[:finding_data].to_json) + "(#{id}::bigint, #{partition_number}::integer, '#{r[:uuid]}'::uuid, #{escaped_data}::jsonb)" + end.join(', ') + + sql = <<~SQL + UPDATE security_findings AS sf + SET + uuid = t.uuid, + finding_data = t.finding_data + FROM ( + VALUES #{values_sql} + ) AS t(id, partition_number, uuid, finding_data) + WHERE sf.id = t.id AND sf.partition_number = t.partition_number + SQL + + conn.execute(sql) + end + end + + def bulk_update_vulnerability_reads(data) + return if data.empty? + + log_info("Bulk updating #{data.length} vulnerability reads") + conn = ::SecApplicationRecord.connection + + data.each_slice(SUB_BATCH_SIZE) do |batch| + values_sql = batch.map do |r| + uuid_value = r[:uuid] ? "'#{r[:uuid]}'::uuid" : "NULL::uuid" + dismissal_value = r[:dismissal_reason] ? r[:dismissal_reason].to_s : "NULL::integer" + "(#{r[:vulnerability_id]}, #{uuid_value}, #{dismissal_value})" + end.join(', ') + + sql = <<~SQL + UPDATE vulnerability_reads AS vr + SET + uuid = COALESCE(t.uuid, vr.uuid), + dismissal_reason = COALESCE(t.dismissal_reason, vr.dismissal_reason) + FROM ( + VALUES #{values_sql} + ) AS t(vulnerability_id, uuid, dismissal_reason) + WHERE vr.vulnerability_id = t.vulnerability_id + SQL + + conn.execute(sql) + end + end + + def bulk_update_vulnerability_findings(updates) + return if updates.empty? + + log_info("Bulk updating #{updates.length} vulnerability findings") + conn = ::SecApplicationRecord.connection + + updates.each_slice(SUB_BATCH_SIZE) do |batch| + values_sql = batch.map do |u| + escaped_metadata = conn.quote(u[:raw_metadata]) + "(#{u[:id]}, #{u[:primary_identifier_id]}, '#{u[:uuid]}'::uuid, #{escaped_metadata})" + end.join(', ') + + sql = <<~SQL + UPDATE vulnerability_occurrences AS vo + SET + primary_identifier_id = t.primary_identifier_id, + uuid = t.uuid, + raw_metadata = t.raw_metadata + FROM ( + VALUES #{values_sql} + ) AS t(id, primary_identifier_id, uuid, raw_metadata) + WHERE vo.id = t.id + SQL + + conn.execute(sql) + end + end + + # this method is called both for duplicate vulnerabilities and affected vulnerabilities + def collect_state_restoration_data( + vuln_data, + current_time, + state_transitions_to_insert, + notes_to_insert, + system_note_metadata_to_insert, + vulnerabilities_to_update, + vulnerability_reads_to_update + ) + latest_transition = vuln_data[:latest_transition] + # we can only restore the vulnerability state if a state transition exists, otherwise, we have + # no idea what the actual state should be + return unless latest_transition + + current_state = value_for_field(vuln_data, :state) + + # don't make any changes if the state has not changed + return if current_state == latest_transition[:to_state] + + state_transitions_to_insert << build_state_transition(vuln_data, current_time) + + note_data = build_note_data(vuln_data, current_time) + notes_to_insert << note_data + + system_note_metadata_to_insert << build_system_note_metadata(vuln_data, current_time, note_data) + + vulnerabilities_to_update << build_vulnerability_update(vuln_data, current_time) + + vulnerability_reads_to_update << build_vulnerability_read_update(vuln_data) if vulnerability_reads_to_update + end + + def value_for_field(vuln_data, field) + vuln_data.dig(:duplicate, field) || vuln_data[:corrupted][field] + end + + def build_state_transition(vuln_data, current_time) + latest_transition = vuln_data[:latest_transition] + + { + vulnerability_id: value_for_field(vuln_data, :vulnerability_id), + dismissal_reason: latest_transition[:dismissal_reason], + author_id: latest_transition[:author_id], + from_state: value_for_field(vuln_data, :state), + to_state: latest_transition[:to_state], + project_id: value_for_field(vuln_data, :project_id), + created_at: current_time, + updated_at: current_time, + comment: format(TRANSITION_COMMENT_TEMPLATE, { + original_comment: latest_transition[:comment], + transition_id: latest_transition[:id] + }) + } + end + + def build_note_data(vuln_data, current_time) + latest_transition = vuln_data[:latest_transition] + from_state_string = Migratable::Enums::Vulnerability.vulnerability_states.key( + value_for_field(vuln_data, :state) + ) + to_state_string = Migratable::Enums::Vulnerability.vulnerability_states.key(latest_transition[:to_state]) + vulnerability_id = value_for_field(vuln_data, :vulnerability_id) + + note_text = format(SYSTEM_NOTE_TEMPLATE, { + original_comment: latest_transition[:comment], + from_state: from_state_string.titleize, + to_state: to_state_string.titleize, + transition_id: latest_transition[:id] + }) + + { + note: note_text, + noteable_type: 'Vulnerability', + author_id: latest_transition[:author_id], + created_at: current_time, + updated_at: current_time, + project_id: value_for_field(vuln_data, :project_id), + noteable_id: vulnerability_id, + system: true, + discussion_id: discussion_id(vulnerability_id), + namespace_id: vuln_data[:project_namespace_id] + } + end + + def build_system_note_metadata(vuln_data, current_time, note_data) + latest_transition = vuln_data[:latest_transition] + to_state_string = Migratable::Enums::Vulnerability.vulnerability_states.key(latest_transition[:to_state]) + + { + namespace_id: vuln_data[:project_namespace_id], + action: "vulnerability_#{to_state_string}", + created_at: current_time, + updated_at: current_time, + note_data: note_data + } + end + + def build_vulnerability_update(vuln_data, current_time) + latest_transition = vuln_data[:latest_transition] + + vuln_update = { + id: value_for_field(vuln_data, :vulnerability_id), + state: latest_transition[:to_state] + } + + add_state_specific_attributes(vuln_update, latest_transition, current_time) + + vuln_update + end + + def add_state_specific_attributes(vuln_update, latest_transition, current_time) + author_id = latest_transition[:author_id] + + case latest_transition[:to_state] + when "confirmed" + vuln_update[:confirmed_by_id] = author_id + vuln_update[:confirmed_at] = current_time + when "resolved" + vuln_update[:resolved_by_id] = author_id + vuln_update[:resolved_at] = current_time + when "dismissed" + vuln_update[:dismissed_by_id] = author_id + vuln_update[:dismissed_at] = current_time + end + end + + def build_vulnerability_read_update(vuln_data) + latest_transition = vuln_data[:latest_transition] + + { + vulnerability_id: value_for_field(vuln_data, :vulnerability_id), + uuid: nil, + dismissal_reason: latest_transition[:dismissal_reason] + } + end + + def discussion_id(vulnerability_id) + # rubocop:disable Fips/SHA1 -- required for parity with app/models/discussion.rb + Digest::SHA1.hexdigest("discussion-vulnerability-#{vulnerability_id}-#{SecureRandom.hex}") + # rubocop:enable Fips/SHA1 + end + + # Logging helpers - use structured logger in production, puts in tests + def log_info(message) + if defined?(Gitlab::AppJsonLogger) + Gitlab::AppJsonLogger.info( + message: message, + class: self.class.name + ) + else + puts message + end + end + + def log_warning(message) + if defined?(Gitlab::AppJsonLogger) + Gitlab::AppJsonLogger.warn( + message: message, + class: self.class.name + ) + else + puts "WARNING: #{message}" + end + end + + def log_progress(current, total, context = "") + return unless current % 100 == 0 || current == total + + percentage = ((current.to_f / total) * 100).round(2) + message = "Processing #{current} of #{total} [#{percentage}%]" + message += " - #{context}" unless context.empty? + log_info(message) + end + end + end +end +# rubocop:enable Metrics/ClassLength diff --git a/spec/lib/gitlab/background_migration/restore_incorrect_vulnerability_states_spec.rb b/spec/lib/gitlab/background_migration/restore_incorrect_vulnerability_states_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..3098868cd222dd0e4a9555e5fe97ba2a5bc830cd --- /dev/null +++ b/spec/lib/gitlab/background_migration/restore_incorrect_vulnerability_states_spec.rb @@ -0,0 +1,1450 @@ +# frozen_string_literal: true + +require 'spec_helper' + +# rubocop:disable RSpec/MultipleMemoizedHelpers -- We need this many for this background migration +RSpec.describe Gitlab::BackgroundMigration::RestoreIncorrectVulnerabilityStates, feature_category: :static_application_security_testing do + let(:pipelines_table) { ci_partitioned_table(:p_ci_pipelines) } + let(:builds_table) { ci_partitioned_table(:p_ci_builds) } + let(:notes_table) { table(:notes) } + let(:system_note_metadata_table) { table(:system_note_metadata) } + let(:vulnerabilities_table) { table(:vulnerabilities, database: :sec) } + let(:vulnerability_issue_links_table) { table(:vulnerability_issue_links, database: :sec) } + let(:vulnerability_state_transitions_table) { table(:vulnerability_state_transitions, database: :sec) } + let(:vulnerability_identifiers_table) { table(:vulnerability_identifiers, database: :sec) } + let(:vulnerability_finding_identifiers_table) { table(:vulnerability_occurrence_identifiers, database: :sec) } + let(:organizations_table) { table(:organizations) } + let(:vulnerability_findings_table) { table(:vulnerability_occurrences, database: :sec) } + let(:security_findings_table) { table(:security_findings, database: :sec) } + let(:security_scans) { table(:security_scans, database: :sec) } + let(:vulnerability_reads_table) { table(:vulnerability_reads, database: :sec) } + let(:vulnerability_scanners) { table(:vulnerability_scanners, database: :sec) } + let(:sub_batch_size) { vulnerabilities_table.count } + let(:namespaces_table) { table(:namespaces) } + let(:projects_table) { table(:projects) } + let(:user) do + table(:users).create!(username: 'john_doe', email: 'johndoe@gitlab.com', projects_limit: 2, + organization_id: organization.id) + end + + let(:mitigating_control_dismissal_int) { 2 } + + let(:severity_level_low_int) { 4 } + let(:severity_level_medium_int) { 5 } + let(:severity_level_high_int) { 6 } + + let(:resolved_state_string) { 'resolved' } + let(:detected_state_string) { 'detected' } + let(:dismissed_state_string) { 'dismissed' } + let(:confirmed_state_string) { 'confirmed' } + + let(:created_link_type_int) { 2 } + + let(:detected_state_int) { 1 } + let(:dismissed_state_int) { 2 } + let(:confirmed_state_int) { 4 } + let(:resolved_state_int) { 3 } + + let(:sast_report_type_string) { described_class::Migratable::Enums::Vulnerability.report_types.key(0) } + let(:sast_report_type_int) { described_class::Migratable::Enums::Vulnerability.report_types['sast'] } + + let(:organization) { organizations_table.create!(name: 'Organization', path: 'organization') } + let!(:group_namespace) do + namespaces_table.create!( + name: 'Project One', + path: 'project-one', + type: 'Group', + organization_id: organization.id + ).tap { |namespace| namespace.update!(traversal_ids: [namespace.id]) } + end + + let(:project_id) { nil } + let(:project) { create_project(name: 'gitlab', group: group_namespace, id: project_id) } + let(:current_time) { Time.current } + + let(:vulnerability_scanner) do + vulnerability_scanners.create!(created_at: current_time, updated_at: current_time, + project_id: project.id, external_id: 'semgrep', name: 'Semgrep', vendor: 'GitLab') + end + + let(:vulnerabilities_to_be_confirmed) do + vulnerabilities_table.where(project_id: project.id) + .where("title LIKE ?", "%Vulnerability to be confirmed.").order(:id) + end + + let(:vulnerabilities_to_be_resolved) do + vulnerabilities_table.where(project_id: project.id) + .where("title LIKE ?", "%Vulnerability to be resolved.").order(:id) + end + + let(:vulnerabilities_to_be_dismissed) do + vulnerabilities_table.where(project_id: project.id) + .where("title LIKE ?", "%Vulnerability to be dismissed.").order(:id) + end + + # TODO: also add tests for { 'namespace_id' => namespace.id } and { `namespace_id => 'instance'` } + let(:job_args) { { 'project_id' => project.id } } + + def create_project(name:, group:, id: nil) + project_namespace = namespaces_table.create!( + name: name, + path: name, + type: 'Project', + organization_id: organization.id + ) + + projects_table.create!({ + id: id, + namespace_id: group.id, + project_namespace_id: project_namespace.id, + organization_id: organization.id, + name: name, + path: name + }.compact) + end + + # use a method instead of a subject to avoid rspec memoization + def perform_migration + described_class.new( + start_id: vulnerability_reads_table.minimum(:vulnerability_id), + end_id: vulnerability_reads_table.maximum(:vulnerability_id), + batch_table: :vulnerability_reads, + batch_column: :vulnerability_id, + sub_batch_size: sub_batch_size, + job_arguments: [job_args], + pause_ms: 0, + connection: ::SecApplicationRecord.connection + ).perform + end + + describe 'when performing sanity checks against spec data' do + context 'when creating vulnerabilities' do + let(:project_id) { 112 } + + it 'creates vulnerability occurrences with uuid and location_fingerprint ' \ + 'values that correspond to the hardcoded project id' do + expect { create_vulnerabilities('gl-sast-report-semgrep-6.6.2-multiple-vulnerabilities.json') }.to change { + vulnerability_findings_table.where( + uuid: 'fa338ae7-51e1-5211-8e02-34359ce9544d', + location_fingerprint: ['1eae8e196458ba7c9f60d97e55a77e1ca9d7d7f5'].pack('H*'), + project_id: project_id + ).count + }.from(0).to(1) + .and change { + vulnerability_findings_table.count + }.by(14) + .and change { + security_findings_table.where( + uuid: 'fa338ae7-51e1-5211-8e02-34359ce9544d', + project_id: project_id + ).count + }.from(0).to(1) + .and change { + vulnerability_reads_table.where( + uuid: 'fa338ae7-51e1-5211-8e02-34359ce9544d', + project_id: project_id + ).count + }.from(0).to(1) + + vulnerability_findings_table.find_each { |vf| expect(vf.raw_metadata).to be_present } + security_findings_table.find_each { |sf| expect(sf.finding_data).to be_present } + + security_finding_data = security_findings_table + .find_by(uuid: 'fa338ae7-51e1-5211-8e02-34359ce9544d').finding_data + expect(security_finding_data).to eq({ + "name" => "Vulnerbility with issue link. Vulnerability to be confirmed.", + "links" => [], "assets" => [], "details" => {}, "evidence" => nil, + "location" => { "file" => "app/app.py", "start_line" => 265 }, "solution" => nil, + "description" => "SQL Injection is a critical vulnerability that can lead to data or system compromise.", + "identifiers" => [ + { "url" => "https://semgrep.dev/r/gitlab.bandit.B608", "name" => "bandit.B608", + "external_id" => "bandit.B608", "fingerprint" => "5fc4137cf46497245dba266eaf656ee07eb154b3", + "external_type" => "semgrep_id" }, + { "url" => "https://cwe.mitre.org/data/definitions/89.html", "name" => "CWE-89", + "external_id" => "89", "fingerprint" => "b74f6bacf3f4d4f92c6f4da6584963e4148b91e6", + "external_type" => "cwe" }, + { "url" => nil, "name" => "A03:2021 - Injection", "external_id" => "A03:2021", + "fingerprint" => "a8e828eea3aba35916401da9304619f0a218119b", "external_type" => "owasp" }, + { "url" => nil, "name" => "A1:2017 - Injection", "external_id" => "A1:2017", + "fingerprint" => "08de3511f2132da4d24f1b8b1d3ca14368a0259b", "external_type" => "owasp" }, + { "url" => nil, "name" => "Bandit Test ID B608", "external_id" => "B608", + "fingerprint" => "2efed5393435ae741114b2200f17077e81954270", "external_type" => "bandit_test_id" } + ], "false_positive?" => false, "raw_source_code_extract" => nil, "remediation_byte_offsets" => [] + }) + end + end + + context 'when changing vulnerability state' do + it 'resolves vulnerabilities' do + create_vulnerabilities('gl-sast-report-semgrep-6.6.2-multiple-vulnerabilities.json') + + vulnerabilities_to_be_resolved = vulnerabilities_table.where(severity: severity_level_medium_int) + + expect do + resolve_vulnerabilities(vulnerabilities: vulnerabilities_to_be_resolved, comment: 'resolving') + end + .to change { + vulnerabilities_to_be_resolved.map(&:state) + }.from([detected_state_int] * vulnerabilities_to_be_resolved.count) + .to([resolved_state_int] * vulnerabilities_to_be_resolved.count) + .and change { + vulnerability_reads_table.where(vulnerability_id: vulnerabilities_to_be_resolved.map(&:id)).map(&:state) + }.from([detected_state_int] * vulnerabilities_to_be_resolved.count) + .to([resolved_state_int] * vulnerabilities_to_be_resolved.count) + .and change { + vulnerability_state_transitions_table.where(from_state: detected_state_int, + to_state: resolved_state_int, project_id: project.id).count + }.from(0).to(vulnerabilities_to_be_resolved.count) + .and change { + notes_table.count + }.from(0).to(vulnerabilities_to_be_resolved.count) + .and change { + system_note_metadata_table.count + }.from(0).to(vulnerabilities_to_be_resolved.count) + .and change { + vulnerabilities_to_be_resolved.pluck(:resolved_by_id) + }.from([nil] * vulnerabilities_to_be_resolved.count) + .to([user.id] * vulnerabilities_to_be_resolved.count) + .and change { + vulnerabilities_to_be_resolved.pluck(:resolved_at) + }.from([nil] * vulnerabilities_to_be_resolved.count) + end + + it 'dismisses vulnerabilities' do + create_vulnerabilities('gl-sast-report-semgrep-6.6.2-multiple-vulnerabilities.json') + + vulnerabilities_to_be_dismissed = vulnerabilities_table.where(severity: severity_level_medium_int) + + expect do + dismiss_vulnerabilities(vulnerabilities: vulnerabilities_to_be_dismissed, + comment: 'dismissing', dismissal_reason: mitigating_control_dismissal_int) + end + .to change { + vulnerabilities_to_be_dismissed.map(&:state) + }.from([detected_state_int] * vulnerabilities_to_be_dismissed.count) + .to([dismissed_state_int] * vulnerabilities_to_be_dismissed.count) + .and change { + vulnerability_reads_table.where(vulnerability_id: vulnerabilities_to_be_dismissed.map(&:id)).map(&:state) + }.from([detected_state_int] * vulnerabilities_to_be_dismissed.count) + .to([dismissed_state_int] * vulnerabilities_to_be_dismissed.count) + .and change { + vulnerability_state_transitions_table.where(from_state: detected_state_int, + to_state: dismissed_state_int, project_id: project.id).count + }.from(0).to(vulnerabilities_to_be_dismissed.count) + .and change { + notes_table.count + }.from(0).to(vulnerabilities_to_be_dismissed.count) + .and change { + system_note_metadata_table.count + }.from(0).to(vulnerabilities_to_be_dismissed.count) + .and change { + vulnerabilities_to_be_dismissed.pluck(:dismissed_by_id) + }.from([nil] * vulnerabilities_to_be_dismissed.count) + .to([user.id] * vulnerabilities_to_be_dismissed.count) + .and change { + vulnerabilities_to_be_dismissed.pluck(:dismissed_at) + }.from([nil] * vulnerabilities_to_be_dismissed.count) + .and change { + vulnerability_reads_table.where(vulnerability_id: vulnerabilities_to_be_dismissed.pluck(:id)) + .pluck(:dismissal_reason) + }.from([nil] * vulnerabilities_to_be_dismissed.count) + .to([mitigating_control_dismissal_int] * vulnerabilities_to_be_dismissed.count) + end + end + + context 'when corrupting vulnerabilities' do + it 'resets the state for resolved vulnerabilities to detected' do + create_vulnerabilities('gl-sast-report-semgrep-6.6.2-multiple-vulnerabilities.json') + resolve_vulnerabilities( + vulnerabilities: vulnerabilities_table.where(severity: severity_level_medium_int), comment: 'resolving' + ) + + expect { corrupt_vulnerabilities }.to change { + vulnerabilities_to_be_resolved.pluck(:state) + }.from([resolved_state_int] * vulnerabilities_to_be_resolved.count) + .to([detected_state_int] * vulnerabilities_to_be_resolved.count) + end + + it 'does not reset the state for confirmed or dismissed vulnerabilities' do + create_vulnerabilities('gl-sast-report-semgrep-6.6.2-multiple-vulnerabilities.json') + + dismiss_vulnerabilities(vulnerabilities: vulnerabilities_to_be_dismissed, + comment: 'dismissing', dismissal_reason: mitigating_control_dismissal_int) + confirm_vulnerabilities(vulnerabilities: vulnerabilities_to_be_confirmed, comment: 'confirming') + + expect { corrupt_vulnerabilities }.to not_change { + vulnerabilities_to_be_dismissed.pluck(:state) + }.from([dismissed_state_int] * vulnerabilities_to_be_dismissed.count) + .and not_change { vulnerabilities_to_be_confirmed.pluck(:state) } + .from([confirmed_state_int] * vulnerabilities_to_be_confirmed.count) + end + + it 'reorders the raw_metadata identifiers for corrupted vulnerability findings, ' \ + 'placing cwe first, without altering other metadata' do + create_vulnerabilities('gl-sast-report-semgrep-6.6.2-multiple-vulnerabilities.json') + + ordered_vulnerabilities = vulnerabilities_table.order(:id) + + metadata_before = ordered_vulnerabilities.to_h do |v| + finding = vulnerability_findings_table.find(v.finding_id) + [v.id, Gitlab::Json.parse(finding.raw_metadata)] + end + + corrupt_vulnerabilities + + aggregate_failures 'all findings should have reordered identifiers only' do + ordered_vulnerabilities.each do |vulnerability| + old_metadata = metadata_before[vulnerability.id] + finding = vulnerability_findings_table.find(vulnerability.finding_id) + new_metadata = Gitlab::Json.parse(finding.raw_metadata) + old_identifiers = old_metadata['identifiers'] + new_identifiers = new_metadata['identifiers'] + + expect(new_identifiers.first['type']).to eq('cwe') + expect(new_identifiers).to match_array(old_identifiers) + expect(new_metadata.except('identifiers')).to eq(old_metadata.except('identifiers')) + end + end + end + + it 'updates the UUID for corrupted vulnerability records' do + create_vulnerabilities('gl-sast-report-semgrep-6.6.2-multiple-vulnerabilities.json') + + ordered_vulnerabilities = vulnerabilities_table.order(:id) + + original_uuids = ordered_vulnerabilities.map do |vuln| + finding = vulnerability_findings_table.find(vuln.finding_id) + primary_identifier = vulnerability_identifiers_table.find(finding.primary_identifier_id) + + Gitlab::UUID.v5( + [ + described_class::Migratable::Enums::Vulnerability.report_types.key(vuln.report_type), + primary_identifier.fingerprint.unpack1('H*'), + finding.location_fingerprint.unpack1('H*'), + vuln.project_id + ].join("-") + ) + end + + corrupt_uuids = ordered_vulnerabilities.map do |vuln| + finding = vulnerability_findings_table.find(vuln.finding_id) + identifiers = Gitlab::Json.parse(finding.raw_metadata)['identifiers'] + cwe_identifier = identifiers.find { |id| id['type'] == 'cwe' } + + binary_fingerprint = described_class::Migratable::Vulnerabilities::Identifier + .sha1_fingerprint(cwe_identifier) + + Gitlab::UUID.v5( + [ + described_class::Migratable::Enums::Vulnerability.report_types.key(vuln.report_type), + binary_fingerprint.unpack1('H*'), + finding.location_fingerprint.unpack1('H*'), + vuln.project_id + ].join("-") + ) + end + + expect { corrupt_vulnerabilities }.to change { + ordered_vulnerabilities.map do |v| + vulnerability_findings_table.find(v.finding_id).uuid + end + }.from(original_uuids).to(corrupt_uuids) + end + end + + context 'when no corrupt vulnerabilities exist' do + before do + create_vulnerabilities('gl-sast-report-semgrep-6.6.2-multiple-vulnerabilities.json') + end + + it 'does not create or alter any existing records' do + expect { perform_migration }.to not_change { + vulnerabilities_table.all.pluck(:state) + } + .and not_change { security_findings_table.count } + .and not_change { notes_table.count } + .and not_change { system_note_metadata_table.count } + end + end + end + + describe "#perform", feature_category: :static_application_security_testing do + let(:uuids_before_corruption) { [] } + let(:corrupted_vulnerabilities) { [] } + let(:duplicated_vulnerabilities) { [] } + + context 'when ingesting reports in sequence: correct identifiers' do + before do + create_vulnerabilities( + 'gl-sast-report-semgrep-6.7.1-multiple-vulnerabilities-correct-primary-identifier.json' + ) + end + + it 'does not change the state or UUIDs of any vulnerability records' do + expect { perform_migration }.to not_change { + vulnerabilities_table.order(:id).pluck(:state) + }.and not_change { + vulnerability_reads_table.order(:id).pluck(:state) + }.and not_change { + vulnerability_reads_table.order(:id).pluck(:uuid) + }.and not_change { + vulnerability_findings_table.order(:id).pluck(:uuid) + } + end + end + + context 'when ingesting reports in sequence: incorrect identifiers' do + before do + create_vulnerabilities( + 'gl-sast-report-semgrep-6.7.0-multiple-vulnerabilities-incorrect-primary-identifier.json' + ) + + uuids_before_corruption.concat(vulnerability_findings_table.all.order(:id).pluck(:uuid)) + + confirm_vulnerabilities(vulnerabilities: vulnerabilities_to_be_confirmed, comment: 'confirming') + resolve_vulnerabilities(vulnerabilities: vulnerabilities_to_be_resolved, comment: 'resolving') + dismiss_vulnerabilities(vulnerabilities: vulnerabilities_to_be_dismissed, + comment: 'dismissing', dismissal_reason: mitigating_control_dismissal_int) + end + + it 'does not change the state of any vulnerability records' do + expect { perform_migration }.to not_change { + vulnerabilities_table.order(:id).pluck(:state) + }.and not_change { + vulnerability_reads_table.order(:id).pluck(:state) + } + end + + it 'does not insert any state transition records' do + expect { perform_migration }.to not_change { + vulnerability_state_transitions_table.count + } + end + + it 'does not insert any note or system note metadata records' do + expect { perform_migration }.to not_change { + notes_table.count + }.and not_change { + system_note_metadata_table.count + } + end + + it 'restores the primary identifiers for corrupted vulnerability findings to the correct value' do + expect { perform_migration }.to change { + vulnerability_findings_table.all.map do |finding| + vulnerability_identifiers_table.find(finding.primary_identifier_id).external_type + end + }.from(["cwe"] * vulnerability_findings_table.count).to(["semgrep_id"] * vulnerability_findings_table.count) + end + + it 'does not delete any security findings' do + expect { perform_migration }.to not_change { security_findings_table.count } + end + + it 'restores the UUID for corrupted vulnerability records' do + ordered_vulnerabilities = vulnerabilities_table.order(:id) + + corrupt_uuids = ordered_vulnerabilities.map do |vuln| + finding = vulnerability_findings_table.find(vuln.finding_id) + primary_identifier = vulnerability_identifiers_table.find(finding.primary_identifier_id) + + Gitlab::UUID.v5( + [ + described_class::Migratable::Enums::Vulnerability.report_types.key(vuln.report_type), + primary_identifier.fingerprint.unpack1('H*'), + finding.location_fingerprint.unpack1('H*'), + vuln.project_id + ].join("-") + ) + end + + expect { perform_migration }.to change { + ordered_vulnerabilities.map do |v| + vulnerability_findings_table.find(v.finding_id).uuid + end + }.from(corrupt_uuids) + .and change { + ordered_vulnerabilities.map do |v| + finding = vulnerability_findings_table.find(v.finding_id) + security_findings_table.where(uuid: finding.uuid).first&.uuid + end + }.from(corrupt_uuids) + .and change { + ordered_vulnerabilities.map do |v| + vulnerability_reads_table.find_by(vulnerability_id: v.id).uuid + end + }.from(corrupt_uuids) + end + + it 'reorders the raw_metadata identifiers for corrupted vulnerability findings, ' \ + 'placing semgrep_id first, without altering other metadata' do + ordered_vulnerabilities = vulnerabilities_table.order(:id) + + metadata_before = ordered_vulnerabilities.to_h do |v| + finding = vulnerability_findings_table.find(v.finding_id) + [v.id, Gitlab::Json.parse(finding.raw_metadata)] + end + + perform_migration + + aggregate_failures 'all findings should have reordered identifiers only' do + ordered_vulnerabilities.each do |vulnerability| + old_metadata = metadata_before[vulnerability.id] + finding = vulnerability_findings_table.find(vulnerability.finding_id) + new_metadata = Gitlab::Json.parse(finding.raw_metadata) + old_identifiers = old_metadata['identifiers'] + new_identifiers = new_metadata['identifiers'] + + expect(new_identifiers.first['type']).to eq('semgrep_id') + expect(new_identifiers).to match_array(old_identifiers) + expect(new_metadata.except('identifiers')).to eq(old_metadata.except('identifiers')) + end + end + end + end + + context 'when ingesting reports in sequence: correct identifiers → incorrect identifiers' do + before do + create_vulnerabilities('gl-sast-report-semgrep-6.6.2-multiple-vulnerabilities.json') + + uuids_before_corruption.concat(vulnerability_findings_table.all.order(:id).pluck(:uuid)) + + confirm_vulnerabilities(vulnerabilities: vulnerabilities_to_be_confirmed, comment: 'confirming') + resolve_vulnerabilities(vulnerabilities: vulnerabilities_to_be_resolved, comment: 'resolving') + dismiss_vulnerabilities(vulnerabilities: vulnerabilities_to_be_dismissed, + comment: 'dismissing', dismissal_reason: mitigating_control_dismissal_int) + + create_vulnerability_issue_link( + vulnerability: vulnerabilities_table.find_by("title LIKE ?", "Vulnerbility with issue link%") + ) + + corrupt_vulnerabilities + end + + it 'does not delete any security findings' do + expect { perform_migration }.to not_change { security_findings_table.count } + end + + it 'restores the primary identifiers for corrupted vulnerability findings to the correct value' do + expect { perform_migration }.to change { + vulnerability_findings_table.all.map do |finding| + vulnerability_identifiers_table.find(finding.primary_identifier_id).external_type + end + }.from(["cwe"] * vulnerability_findings_table.count).to(["semgrep_id"] * vulnerability_findings_table.count) + end + + it 'restores the UUID for corrupted vulnerability records to the correct value' do + ordered_vulnerabilities = vulnerabilities_table.order(:id) + + corrupt_uuids = ordered_vulnerabilities.map do |vuln| + finding = vulnerability_findings_table.find(vuln.finding_id) + primary_identifier = vulnerability_identifiers_table.find(finding.primary_identifier_id) + + Gitlab::UUID.v5( + [ + described_class::Migratable::Enums::Vulnerability.report_types.key(vuln.report_type), + primary_identifier.fingerprint.unpack1('H*'), + finding.location_fingerprint.unpack1('H*'), + vuln.project_id + ].join("-") + ) + end + + expect { perform_migration }.to change { + ordered_vulnerabilities.map do |v| + vulnerability_findings_table.find(v.finding_id).uuid + end + }.from(corrupt_uuids).to(uuids_before_corruption) + .and change { + ordered_vulnerabilities.map do |v| + finding = vulnerability_findings_table.find(v.finding_id) + security_findings_table.where(uuid: finding.uuid).first&.uuid + end + }.from(corrupt_uuids).to(uuids_before_corruption) + .and change { + ordered_vulnerabilities.map do |v| + vulnerability_reads_table.find_by(vulnerability_id: v.id).uuid + end + }.from(corrupt_uuids).to(uuids_before_corruption) + end + + it 'reorders the raw_metadata identifiers for corrupted vulnerability findings, ' \ + 'placing semgrep_id first, without altering other metadata' do + ordered_vulnerabilities = vulnerabilities_table.order(:id) + + metadata_before = ordered_vulnerabilities.to_h do |v| + finding = vulnerability_findings_table.find(v.finding_id) + [v.id, Gitlab::Json.parse(finding.raw_metadata)] + end + + perform_migration + + aggregate_failures 'all findings should have reordered identifiers only' do + ordered_vulnerabilities.each do |vulnerability| + old_metadata = metadata_before[vulnerability.id] + finding = vulnerability_findings_table.find(vulnerability.finding_id) + new_metadata = Gitlab::Json.parse(finding.raw_metadata) + old_identifiers = old_metadata['identifiers'] + new_identifiers = new_metadata['identifiers'] + + expect(new_identifiers.first['type']).to eq('semgrep_id') + expect(new_identifiers).to match_array(old_identifiers) + expect(new_metadata.except('identifiers')).to eq(old_metadata.except('identifiers')) + end + end + end + + it 'reorders the finding_data identifiers for corrupted vulnerability security findings, ' \ + 'placing semgrep_id first, without altering other finding_data' do + ordered_vulnerabilities = vulnerabilities_table.order(:id) + + finding_data_before = ordered_vulnerabilities.to_h do |v| + finding = vulnerability_findings_table.find(v.finding_id) + security_finding = security_findings_table.where(uuid: finding.uuid).first + [v.id, security_finding.finding_data] + end + + perform_migration + + aggregate_failures 'all security findings should have reordered identifiers only' do + ordered_vulnerabilities.each do |vulnerability| + old_finding_data = finding_data_before[vulnerability.id] + finding = vulnerability_findings_table.find(vulnerability.finding_id) + new_security_finding = security_findings_table.where(uuid: finding.uuid).first + new_finding_data = new_security_finding.finding_data + + old_identifiers = old_finding_data['identifiers'] + new_identifiers = new_finding_data['identifiers'] + + expect(new_identifiers.first['external_type']).to eq('semgrep_id') + expect(new_identifiers).to match_array(old_identifiers) + expect(new_finding_data.except('identifiers')).to eq(old_finding_data.except('identifiers')) + end + end + end + + it 'inserts new vulnerability state transitions for the corrupted vulnerabilities that have ' \ + 'changed from detected to resolved' do + vulnerability_ids = vulnerabilities_to_be_resolved.map(&:id) + + expect { perform_migration }.to change { + vulnerability_state_transitions_table + .where(vulnerability_id: vulnerability_ids) + .where(project_id: project.id) + .where("comment LIKE ?", "%original comment automatically copied from transition%") + .where(from_state: detected_state_int, to_state: resolved_state_int) + .count + }.by(vulnerabilities_to_be_resolved.count) + end + + it 'restores the state for resolved corrupted vulnerabilities and vulnerability_reads' do + vulnerability_ids = vulnerabilities_to_be_resolved.map(&:id) + + expect { perform_migration }.to change { + vulnerability_ids.map { |id| vulnerabilities_table.find(id).state } + }.from([detected_state_int] * vulnerabilities_to_be_resolved.length) + .to([resolved_state_int] * vulnerabilities_to_be_resolved.length) + .and change { + vulnerability_ids.map { |id| vulnerability_reads_table.find_by(vulnerability_id: id).state } + }.from([detected_state_int] * vulnerabilities_to_be_resolved.length) + .to([resolved_state_int] * vulnerabilities_to_be_resolved.length) + end + + it 'does not change the state for confirmed and dismissed corrupted vulnerabilities, ' \ + 'because they are still correct' do + expect { perform_migration }.to not_change { + vulnerabilities_to_be_confirmed.pluck(:state) + }.from([confirmed_state_int] * vulnerabilities_to_be_confirmed.count) + .and not_change { + vulnerabilities_to_be_dismissed.map.pluck(:state) + }.from([dismissed_state_int] * vulnerabilities_to_be_dismissed.count) + end + + it 'creates new system notes for the resolved vulnerabilities' do + total_vulnerabilities_changed = ( + vulnerabilities_to_be_dismissed.count + + vulnerabilities_to_be_resolved.count + + # add vulnerabilities_to_be_resolved.count twice, because + # BulkCreateRedetectedNotesService adds a new record for + # each resolved vulnerability when the pipeline is corrupted + vulnerabilities_to_be_resolved.count + + vulnerabilities_to_be_confirmed.count + ) + + expect { perform_migration }.to change { notes_table.count } + .from(total_vulnerabilities_changed).to(total_vulnerabilities_changed + vulnerabilities_to_be_resolved.count) + + notes = notes_table.where("note LIKE ?", "%original comment automatically copied from transition%") + + aggregate_failures "checking note attributes" do + notes.order(:noteable_id).zip(vulnerabilities_to_be_resolved).each do |note, vulnerability| + expect(note.noteable_type).to eq('Vulnerability') + expect(note.noteable_id).to eq(vulnerability.id) + expect(note.author_id).to eq(user.id) + expect(note.created_at).to be_a_kind_of(Time) + expect(note.updated_at).to be_a_kind_of(Time) + expect(note.project_id).to eq(project.id) + expect(note.system).to be_truthy + expect(note.namespace_id).to eq(project.project_namespace_id) + expect(note.discussion_id).to match(/[a-f0-9]{40}/) + expect(note.note).to match( + /changed vulnerability status from Detected to Resolved with the following comment: "resolving"/ + ) + end + end + end + + it 'creates new system note metadata for the resolved vulnerabilities' do + expect { perform_migration }.to change { system_note_metadata_table.count } + .by(vulnerabilities_to_be_resolved.count) + + notes = Note.where("note LIKE ?", "%original comment automatically copied from transition%") + system_note_metadata = system_note_metadata_table.where(note_id: notes.pluck(:id)) + + expect(system_note_metadata.count).to eq(notes.count) + + aggregate_failures "checking system note metadata attributes" do + system_note_metadata.order(:note_id).zip(notes.order(:id)).each do |metadata, note| + expect(metadata.note_id).to eq(note.id) + expect(metadata.action).to eq('vulnerability_resolved') + expect(metadata.namespace_id).to eq(project.project_namespace_id) + expect(metadata.created_at).to be_a_kind_of(Time) + expect(metadata.updated_at).to be_a_kind_of(Time) + end + end + end + + it 'does not create or delete any vulnerabilities or vulnerability findings' do + expect { perform_migration }.to not_change { vulnerabilities_table.count } + .and not_change { vulnerability_findings_table.count } + end + + context 'when performing the migration twice' do + it 'is idempotent' do + expect { perform_migration }.to change { + vulnerability_findings_table.all.map do |finding| + vulnerability_identifiers_table.find(finding.primary_identifier_id).external_type + end + }.from(["cwe"] * vulnerabilities_table.count).to(["semgrep_id"] * vulnerabilities_table.count) + + expect { perform_migration }.to( + not_change { vulnerability_state_transitions_table.count } + .and(not_change do + vulnerability_findings_table.all.map do |finding| + vulnerability_identifiers_table.find(finding.primary_identifier_id).external_type + end + end) + .and(not_change { notes_table.count }) + .and(not_change { system_note_metadata_table.count }) + .and(not_change { vulnerabilities_table.pluck(:state) }) + ) + end + end + end + + context 'when ingesting reports in sequence: correct identifiers → incorrect identifiers → correct identifiers' do + before do + create_vulnerabilities('gl-sast-report-semgrep-6.6.2-multiple-vulnerabilities.json') + + uuids_before_corruption.concat(vulnerability_findings_table.all.order(:id).pluck(:uuid)) + + confirm_vulnerabilities(vulnerabilities: vulnerabilities_to_be_confirmed, comment: 'confirming') + resolve_vulnerabilities(vulnerabilities: vulnerabilities_to_be_resolved, comment: 'resolving') + dismiss_vulnerabilities(vulnerabilities: vulnerabilities_to_be_dismissed, + comment: 'dismissing', dismissal_reason: mitigating_control_dismissal_int) + + corrupted_vulnerabilities.concat(corrupt_vulnerabilities) + + duplicated_vulnerabilities.concat(create_vulnerabilities( + 'gl-sast-report-semgrep-6.7.1-multiple-vulnerabilities-correct-primary-identifier.json' + )) + end + + it 'does not change the primary identifiers for corrupted or duplicate vulnerabilities' do + expect { perform_migration }.to not_change { + vulnerability_findings_table + .where(vulnerability_id: corrupted_vulnerabilities.pluck(:id)) + .map do |finding| + vulnerability_identifiers_table.find(finding.primary_identifier_id).external_type + end + }.from(["cwe"] * corrupted_vulnerabilities.length) + .and not_change { + vulnerability_findings_table + .where(vulnerability_id: duplicated_vulnerabilities.pluck(:id)) + .map do |finding| + vulnerability_identifiers_table.find(finding.primary_identifier_id).external_type + end + }.from(["semgrep_id"] * duplicated_vulnerabilities.length) + end + + it 'does not change the UUID for corrupted or duplicate vulnerabilities' do + corrupted_vuln_ids = corrupted_vulnerabilities.pluck(:id) + duplicated_vuln_ids = duplicated_vulnerabilities.pluck(:id) + + corrupted_vuln_finding_uuids = vulnerability_findings_table.where(vulnerability_id: corrupted_vuln_ids) + .pluck(:uuid) + + duplicated_vuln_finding_uuids = vulnerability_findings_table.where(vulnerability_id: duplicated_vuln_ids) + .pluck(:uuid) + + expect { perform_migration }.to not_change { + vulnerability_findings_table.where(vulnerability_id: corrupted_vuln_ids).order(:id).pluck(:uuid) + } + .and not_change { + security_findings_table.where(uuid: corrupted_vuln_finding_uuids).order(:id).pluck(:uuid) + } + .and not_change { + vulnerability_reads_table.where(vulnerability_id: corrupted_vuln_ids).order(:id).pluck(:uuid) + } + .and not_change { + vulnerability_findings_table.where(vulnerability_id: duplicated_vuln_ids).order(:id).pluck(:uuid) + } + .and not_change { + security_findings_table.where(uuid: duplicated_vuln_finding_uuids).order(:id).pluck(:uuid) + } + .and not_change { + vulnerability_reads_table.where(vulnerability_id: duplicated_vuln_ids).order(:id).pluck(:uuid) + } + end + + it 'updates the vulnerability_reads.dismissal_reason for duplicate vulnerabilities' do + dismissed_vulns = vulnerabilities_table + .where(id: duplicated_vulnerabilities.pluck(:id)) + .where("title LIKE ?", "%Vulnerability to be dismissed.") + + dismissed_vuln_count = dismissed_vulns.count + + expect { perform_migration }.to change { + vulnerability_reads_table + .where(vulnerability_id: dismissed_vulns.select(:id)) + .pluck(:dismissal_reason) + }.from([nil] * dismissed_vuln_count) + .to([mitigating_control_dismissal_int] * dismissed_vuln_count) + end + + it 'does not change the state for corrupted vulnerabilities' do + corrupted_vuln_ids = corrupted_vulnerabilities.pluck(:id) + + expect { perform_migration }.to not_change { + vulnerabilities_table.where(id: corrupted_vuln_ids) + .where("title LIKE ?", "%Vulnerability to be confirmed.").order(:id).pluck(:state) + }.from([confirmed_state_int] * vulnerabilities_to_be_confirmed.length) + .and not_change { + vulnerabilities_table.where(id: corrupted_vuln_ids) + .where("title LIKE ?", "%Vulnerability to be resolved.").order(:id).pluck(:state) + }.from([detected_state_int] * vulnerabilities_to_be_resolved.length) + .and not_change { + vulnerabilities_table.where(id: corrupted_vuln_ids) + .where("title LIKE ?", "%Vulnerability to be dismissed.").order(:id).pluck(:state) + }.from([dismissed_state_int] * vulnerabilities_to_be_dismissed.length) + end + + it 'restores the state for duplicate vulnerabilities' do + duplicated_vuln_ids = duplicated_vulnerabilities.pluck(:id) + + expect { perform_migration }.to change { + vulnerabilities_table.where(id: duplicated_vuln_ids) + .where("title LIKE ?", "%Vulnerability to be confirmed.").order(:id).pluck(:state) + }.from([detected_state_int] * vulnerabilities_to_be_confirmed.length) + .to([confirmed_state_int] * vulnerabilities_to_be_confirmed.length) + .and change { + vulnerabilities_table.where(id: duplicated_vuln_ids) + .where("title LIKE ?", "%Vulnerability to be resolved.").order(:id).pluck(:state) + }.from([detected_state_int] * vulnerabilities_to_be_resolved.length) + .to([resolved_state_int] * vulnerabilities_to_be_resolved.length) + .and change { + vulnerabilities_table.where(id: duplicated_vuln_ids) + .where("title LIKE ?", "%Vulnerability to be dismissed.").order(:id).pluck(:state) + }.from([detected_state_int] * vulnerabilities_to_be_dismissed.length) + .to([dismissed_state_int] * vulnerabilities_to_be_dismissed.length) + end + + context 'when notes and state transitions' do + where(:to_state, :comment) do + [ + %w[resolved resolving], + %w[confirmed confirming], + %w[dismissed dismissing] + ] + end + + with_them do + let(:title_pattern) { "%Vulnerability to be #{ApplicationRecord.sanitize_sql_like(to_state)}." } + let(:target_vulnerabilities) do + vulnerabilities_table + .where(id: duplicated_vulnerabilities.pluck(:id)) + .where("title LIKE ?", title_pattern) + .order(:id) + end + + let(:to_state_int) do + described_class::Migratable::Enums::Vulnerability.vulnerability_states[to_state] + end + + it "has target vulnerabilities for #{params[:to_state]}" do + expect(target_vulnerabilities.count).to be > 0 + end + + it "creates vulnerability state transitions for duplicate #{params[:to_state]} vulnerabilities" do + comment_pattern = "#{ApplicationRecord.sanitize_sql_like(comment)} (original comment automatically copied%" + + expect { perform_migration }.to change { + vulnerability_state_transitions_table.where("comment LIKE ?", comment_pattern).count + }.by(target_vulnerabilities.count) + + aggregate_failures "checking vulnerability state transition attributes" do + vulnerability_state_transitions_table + .where(vulnerability_id: target_vulnerabilities.pluck(:id)) + .order(:vulnerability_id) + .zip(target_vulnerabilities).each do |transition, vulnerability| + expect(transition.vulnerability_id).to eq(vulnerability.id) + expect(transition.to_state).to eq(to_state_int) + expect(transition.from_state).to eq(detected_state_int) + expect(transition.author_id).to eq(user.id) + expect(transition.created_at).to be_a_kind_of(Time) + expect(transition.updated_at).to be_a_kind_of(Time) + expect(transition.project_id).to eq(project.id) + expect(transition.comment).to match( + /#{comment} \(original comment automatically copied from transition [0-9]* to fix semgrep 6\.7\.0 bug/ + ) + expect(transition.dismissal_reason).to be_between(0, 4) if to_state_int == dismissed_state_int + end + end + end + + it "creates new notes for #{params[:to_state]} vulnerabilities" do + note_pattern = "%changed vulnerability status from Detected to " \ + "#{ApplicationRecord.sanitize_sql_like(to_state.titleize)} with the following comment: " \ + "\"#{ApplicationRecord.sanitize_sql_like(comment)}\"%" + + expect { perform_migration }.to change { + notes_table.where('note LIKE ?', note_pattern).count + }.by(target_vulnerabilities.count), + "Expected to create #{target_vulnerabilities.count} notes for state '#{to_state}'" + + aggregate_failures "checking note attributes" do + notes_table + .where(noteable_id: target_vulnerabilities.pluck(:id)) + .order(:noteable_id) + .zip(target_vulnerabilities).each do |note, vulnerability| + expect(note.noteable_type).to eq('Vulnerability') + expect(note.noteable_id).to eq(vulnerability.id) + expect(note.author_id).to eq(user.id) + expect(note.created_at).to be_a_kind_of(Time) + expect(note.updated_at).to be_a_kind_of(Time) + expect(note.project_id).to eq(project.id) + expect(note.system).to be_truthy + expect(note.namespace_id).to eq(project.project_namespace_id) + expect(note.discussion_id).to match(/[a-f0-9]{40}/) + + expected_note = "changed vulnerability status from Detected to #{to_state.titleize} " \ + "with the following comment: \"#{comment}\"" + expect(note.note).to include(expected_note) + end + end + end + + it "creates new system note metadata for #{params[:to_state]} vulnerabilities" do + expect { perform_migration }.to change { + system_note_metadata_table.where(action: "vulnerability_#{to_state}").count + }.by(target_vulnerabilities.count), + "Expected to create #{target_vulnerabilities.count} system note metadata records for state '#{to_state}'" + + notes = notes_table.where( + "note LIKE ?", "%changed vulnerability status from Detected " \ + "to #{ApplicationRecord.sanitize_sql_like(to_state.titleize)}%" + ) + + system_note_metadata = system_note_metadata_table.where(note_id: notes.pluck(:id)) + expect(system_note_metadata.count).to eq(notes.count) + + aggregate_failures "checking system note metadata attributes" do + system_note_metadata.order(:note_id).zip(notes.order(:id)).each do |metadata, note| + expect(metadata.note_id).to eq(note.id) + expect(metadata.action).to eq("vulnerability_#{to_state}") + expect(metadata.namespace_id).to eq(project.project_namespace_id) + expect(metadata.created_at).to be_a_kind_of(Time) + expect(metadata.updated_at).to be_a_kind_of(Time) + end + end + end + end + end + + context 'when there are additional vulnerabilities that are not affected by the bug' do + let(:additional_vulnerabilities_to_be_confirmed) do + vulnerabilities_table.where("title LIKE ?", "Additional vulnerability%") + end + + before do + create_vulnerabilities( + 'gl-sast-report-semgrep-6.7.1-additional-vulnerabilities-correct-primary-identifier.json' + ) + + confirm_vulnerabilities(vulnerabilities: additional_vulnerabilities_to_be_confirmed, comment: 'confirming') + end + + it 'does not alter the additional vulnerabilities' do + expect { perform_migration }.to not_change { + additional_vulnerabilities_to_be_confirmed.pluck(:state) + }.from([confirmed_state_int] * additional_vulnerabilities_to_be_confirmed.count) + .and not_change { + vulnerability_findings_table.where(id: additional_vulnerabilities_to_be_confirmed.pluck(:finding_id)) + .pluck(:uuid) + } + .and not_change { + notes_table.where(noteable_id: additional_vulnerabilities_to_be_confirmed.pluck(:id)).count + } + .and not_change { + vulnerability_state_transitions_table + .where(vulnerability_id: additional_vulnerabilities_to_be_confirmed.pluck(:id)).count + } + .and not_change { + vulnerability_reads_table + .where(vulnerability_id: additional_vulnerabilities_to_be_confirmed.pluck(:id)).pluck(:uuid) + } + end + end + end + end +end +# rubocop:enable RSpec/MultipleMemoizedHelpers + +def create_vulnerability_issue_link(vulnerability:) + new_time = Time.current + + vulnerability_finding = vulnerability_findings_table.find_by(id: vulnerability.finding_id) + + vulnerability_issue_links_table.create!( + vulnerability_id: vulnerability.id, link_type: created_link_type_int, + vulnerability_occurrence_id: vulnerability_finding.id, + created_at: new_time, updated_at: new_time, project_id: project.id, + issue_id: rand(1..1000) + ) +end + +# replicates the changes that happen when executing semgrep 6.7.0 +def corrupt_vulnerabilities + new_time = Time.current + corrupt_scan = create_corrupt_scan + + reset_resolved_vulnerabilities_state(new_time, corrupt_scan.pipeline_id) + corrupt_all_vulnerability_identifiers(new_time, corrupt_scan) +end + +def create_corrupt_scan + corrupt_pipeline = pipelines_table.create!(project_id: project.id, partition_id: 100) + corrupt_build = builds_table.create!(partition_id: corrupt_pipeline.partition_id, + project_id: project.id, commit_id: corrupt_pipeline.id) + + security_scans.create!( + build_id: corrupt_build.id, + scan_type: described_class::Migratable::Enums::Security.scan_types[:sast], + pipeline_id: corrupt_pipeline.id, + project_id: project.id + ) +end + +def reset_resolved_vulnerabilities_state(new_time, corrupt_pipeline_id) + # resolved vulnerabilities are the only ones whose state is changed by the bug + vulnerabilities_table.where(state: resolved_state_int).find_each do |vulnerability| + reset_vulnerability_to_detected(vulnerability, new_time, corrupt_pipeline_id) + end +end + +def reset_vulnerability_to_detected(vulnerability, new_time, corrupt_pipeline_id) + from_state_int = vulnerability.state + + vulnerability.update!(state: detected_state_int, resolved_by_id: nil, resolved_at: nil) + + vulnerability_state_transitions_table.create!(vulnerability_id: vulnerability.id, from_state: from_state_int, + to_state: detected_state_int, created_at: new_time, updated_at: new_time, author_id: nil, project_id: project.id + ) + + vulnerability_reads_table.where(vulnerability_id: vulnerability.id).update!(state: detected_state_int) + + note = notes_table.create!( + note: "changed vulnerability status to Needs Triage because it was redetected in pipeline #{corrupt_pipeline_id}", + noteable_type: 'Vulnerability', + author_id: user.id, + created_at: new_time, + updated_at: new_time, + project_id: project.id, + noteable_id: vulnerability.id, + system: true, + discussion_id: nil, + namespace_id: group_namespace.id + ) + + system_note_metadata_table.create!( + action: 'vulnerability_detected', + created_at: note.created_at, + updated_at: note.updated_at, + note_id: note.id, + namespace_id: project.project_namespace_id + ) +end + +def corrupt_all_vulnerability_identifiers(new_time, corrupt_scan) + vulnerabilities = [] + + vulnerabilities_table.find_each do |vulnerability| + corrupt_vulnerability_identifiers(vulnerability, new_time, corrupt_scan) + vulnerabilities << vulnerability + end + + vulnerabilities +end + +def corrupt_vulnerability_identifiers(vulnerability, new_time, corrupt_scan) + vulnerability_finding = vulnerability_findings_table.find_by(vulnerability_id: vulnerability.id) + old_uuid = vulnerability_finding.uuid + + corrupt_metadata = reorder_metadata_with_incorrect_primary_id(vulnerability_finding.raw_metadata) + new_uuid = calculate_corrupt_uuid(corrupt_metadata, vulnerability_finding) + + update_vulnerability_with_corrupt_data(vulnerability, vulnerability_finding, new_uuid, corrupt_metadata, + new_time, corrupt_scan) + create_corrupt_security_finding(old_uuid, new_uuid, corrupt_scan) +end + +def calculate_corrupt_uuid(corrupt_metadata, vulnerability_finding) + corrupt_identifier = corrupt_metadata['identifiers'][0] + binary_fingerprint = described_class::Migratable::Vulnerabilities::Identifier.sha1_fingerprint(corrupt_identifier) + corrupt_primary_identifier = vulnerability_identifiers_table.find_by(fingerprint: binary_fingerprint) + + Gitlab::UUID.v5( + [ + sast_report_type_string, + corrupt_primary_identifier.fingerprint.unpack1('H*'), + vulnerability_finding.location_fingerprint.unpack1('H*'), + project.id + ].join("-") + ) +end + +def update_vulnerability_with_corrupt_data( + vulnerability, vulnerability_finding, new_uuid, corrupt_metadata, new_time, corrupt_scan +) + vulnerability_reads_table.where(vulnerability_id: vulnerability.id).update!(uuid: new_uuid) + + corrupt_identifier = corrupt_metadata['identifiers'][0] + binary_fingerprint = described_class::Migratable::Vulnerabilities::Identifier.sha1_fingerprint(corrupt_identifier) + corrupt_primary_identifier = vulnerability_identifiers_table.find_by(fingerprint: binary_fingerprint) + + vulnerability_finding.update!(uuid: new_uuid, raw_metadata: corrupt_metadata.to_json, metadata_version: '15.2.2', + updated_at: new_time, latest_pipeline_id: corrupt_scan.pipeline_id, + primary_identifier_id: corrupt_primary_identifier.id) +end + +def create_corrupt_security_finding(old_uuid, new_uuid, corrupt_scan) + old_security_finding = security_findings_table.find_by(uuid: old_uuid) + corrupt_finding_data = reorder_finding_data_with_incorrect_primary_id(old_security_finding.finding_data) + + security_findings_table.create!(uuid: new_uuid, scan_id: corrupt_scan.id, finding_data: corrupt_finding_data, + scanner_id: vulnerability_scanner.id, severity: old_security_finding.severity) +end + +def reorder_finding_data_with_incorrect_primary_id(finding_data) + finding_data["identifiers"].sort_by! { |a| a["external_id"] } + finding_data +end + +def reorder_metadata_with_incorrect_primary_id(raw_metadata) + metadata = Gitlab::Json.parse(raw_metadata) + metadata["identifiers"].sort_by! { |a| a["value"] } + metadata +end + +def create_vulnerability_read(vulnerability, vulnerability_finding) + vulnerability_reads_table.create!( + vulnerability_id: vulnerability.id, + uuid: vulnerability_finding.uuid, + project_id: vulnerability.project_id, + scanner_id: vulnerability_finding.scanner_id, + report_type: vulnerability.report_type, + severity: vulnerability.severity, + state: vulnerability.state, + vulnerability_occurrence_id: vulnerability_finding.id + ) +end + +def find_matching_finding(vulnerability_finding) + vulnerability_findings_table.where( + severity: vulnerability_finding.severity, + report_type: vulnerability_finding.report_type, + location_fingerprint: vulnerability_finding.location_fingerprint, + name: vulnerability_finding.name, + metadata_version: vulnerability_finding.metadata_version, + project_id: vulnerability_finding.project_id + ).where.not(id: vulnerability_finding.id).first +end + +def create_security_finding(vulnerability, vulnerability_finding, vulnerability_scanner, scan, identifiers) + security_findings_table.create!( + uuid: vulnerability_finding.uuid, + project_id: project.id, + scanner_id: vulnerability_scanner.id, + scan_id: scan.id, + severity: vulnerability.severity, + finding_data: finding_data_for(vulnerability_finding: vulnerability_finding, vulnerability_identifiers: identifiers) + ) +end + +def create_vulnerability(vulnerability_finding, vulnerability_name, vulnerability_severity) + vulnerabilities_table.create!( + project_id: project.id, author_id: user.id, + created_at: current_time, updated_at: current_time, title: vulnerability_name, + severity: described_class::Migratable::Enums::Vulnerability.severity_levels[vulnerability_severity.downcase], + detected_at: current_time, finding_id: vulnerability_finding.id, report_type: sast_report_type_int + ) +end + +def create_vulnerabilities(fixture_file) + pipeline = pipelines_table.create!(project_id: project.id, partition_id: 100) + build = builds_table.create!(partition_id: pipeline.partition_id, project_id: project.id, commit_id: pipeline.id) + + scan = security_scans.create!( + build_id: build.id, + scan_type: described_class::Migratable::Enums::Security.scan_types[:sast], + pipeline_id: pipeline.id, + project_id: project.id + ) + + parsed_fixture = Gitlab::Json.parse(File.read("ee/spec/fixtures/security_reports/master/#{fixture_file}")) + + vulnerabilities = [] + + parsed_fixture['vulnerabilities'].each do |report_vulnerability| + vulnerability_finding = create_vulnerability_finding( + vulnerability: report_vulnerability, project: project, scanner: vulnerability_scanner, + metadata_version: parsed_fixture['version'], pipeline: pipeline + ) + + vulnerability = create_vulnerability(vulnerability_finding, report_vulnerability['name'], + report_vulnerability['severity']) + + vulnerabilities << vulnerability + + create_security_finding(vulnerability, vulnerability_finding, vulnerability_scanner, + scan, report_vulnerability['identifiers']) + + create_vulnerability_read(vulnerability, vulnerability_finding) + + vulnerability_finding.update!(vulnerability_id: vulnerability.id) + + matching_finding = find_matching_finding(vulnerability_finding) + + next unless matching_finding + + vulnerability_reads_table.find_by(vulnerability_id: matching_finding.vulnerability_id) + .update!(resolved_on_default_branch: true) + vulnerabilities_table.find_by(finding_id: matching_finding.id).update!(resolved_on_default_branch: true) + end + + vulnerabilities +end + +def finding_data_for(vulnerability_finding:, vulnerability_identifiers:) + vulnerability_identifiers_with_fingerprint_data = vulnerability_identifiers.map do |identifier| + fingerprint_string = described_class::Migratable::Vulnerabilities::Identifier.sha1_fingerprint(identifier) + + { + external_id: identifier['value'], + external_type: identifier['type'], + name: identifier['name'], + url: identifier['url'], + fingerprint: fingerprint_string.unpack1('H*') + } + end + + { + name: vulnerability_finding.name, + links: [], + assets: [], + raw_source_code_extract: nil, + false_positive?: false, + remediation_byte_offsets: [], + evidence: nil, + description: vulnerability_finding.description, + solution: vulnerability_finding.solution, + location: vulnerability_finding.location, + identifiers: vulnerability_identifiers_with_fingerprint_data, + details: vulnerability_finding.details + } +end + +def create_vulnerability_identifiers(vulnerability:, project:) + vulnerability['identifiers'].map do |identifier| + fingerprint_string = described_class::Migratable::Vulnerabilities::Identifier.sha1_fingerprint(identifier) + + vulnerability_identifiers_table.find_or_create_by!( + project_id: project.id, + fingerprint: fingerprint_string + ) do |vi| + vi.created_at = current_time + vi.updated_at = current_time + vi.external_type = identifier['type'] + vi.external_id = identifier['value'] + vi.name = identifier['name'] + vi.url = identifier['url'] + end + end +end + +def dismissal_int_to_string(dismissal_int) + %w[acceptable_risk false_positive mitigating_control used_in_tests not_applicable][dismissal_int].titleize +end + +def resolve_vulnerabilities(vulnerabilities:, comment:) + change_vulnerabilities_state(vulnerabilities: vulnerabilities, + to_state_string: resolved_state_string, comment: comment) +end + +def confirm_vulnerabilities(vulnerabilities:, comment:) + change_vulnerabilities_state(vulnerabilities: vulnerabilities, + to_state_string: confirmed_state_string, comment: comment) +end + +def dismiss_vulnerabilities(vulnerabilities:, comment:, dismissal_reason:) + change_vulnerabilities_state(vulnerabilities: vulnerabilities, to_state_string: dismissed_state_string, + comment: comment, dismissal_reason_int: dismissal_reason) +end + +def change_vulnerabilities_state(vulnerabilities:, to_state_string:, comment:, dismissal_reason_int: nil) + new_time = Time.current + to_state_int = described_class::Migratable::Enums::Vulnerability.vulnerability_states[to_state_string] + + vulnerabilities.each do |vulnerability| + from_state_int = vulnerability.state + from_state_string = described_class::Migratable::Enums::Vulnerability.vulnerability_states + .key(from_state_int).titleize + + create_state_transition(vulnerability, from_state_int, to_state_int, comment, dismissal_reason_int, new_time) + note = create_system_note(vulnerability, from_state_string, to_state_string, comment, + dismissal_reason_int, new_time) + create_system_note_metadata(note, to_state_string, new_time) + end + + update_vulnerabilities(vulnerabilities, to_state_string, to_state_int, new_time) + vulnerability_reads_table.where(vulnerability_id: vulnerabilities.map(&:id)) + .update!(dismissal_reason: dismissal_reason_int) +end + +def create_state_transition(vulnerability, from_state_int, to_state_int, comment, dismissal_reason_int, time) + vulnerability_state_transitions_table.create!( + vulnerability_id: vulnerability.id, + from_state: from_state_int, + to_state: to_state_int, + created_at: time, + updated_at: time, + author_id: user.id, + comment: comment, + project_id: project.id, + dismissal_reason: dismissal_reason_int + ) +end + +def create_system_note(vulnerability, from_state_string, to_state_string, comment, dismissal_reason_int, time) + formatted_to_state = to_state_string.titleize + formatted_to_state += ": #{dismissal_int_to_string(dismissal_reason_int)}" if dismissal_reason_int + + notes_table.create!( + noteable_id: vulnerability.id, + noteable_type: 'Vulnerability', + author_id: user.id, + created_at: time, + updated_at: time, + project_id: project.id, + system: true, + namespace_id: project.project_namespace_id, + note: <<~NOTE.squish + changed vulnerability status from #{from_state_string} to #{formatted_to_state} + with the following comment: "#{comment}" + NOTE + ) +end + +def create_system_note_metadata(note, to_state_string, time) + system_note_metadata_table.create!( + action: "vulnerability_#{to_state_string}", + note_id: note.id, + namespace_id: project.project_namespace_id, + created_at: time, + updated_at: time + ) +end + +def update_vulnerabilities(vulnerabilities, to_state_string, to_state_int, time) + vulnerability_attributes = build_vulnerability_attributes(to_state_string, to_state_int, time) + vulnerabilities.update!(vulnerability_attributes) +end + +def build_vulnerability_attributes(to_state_string, to_state_int, time) + attributes = { state: to_state_int } + + case to_state_string + when confirmed_state_string + attributes[:confirmed_by_id] = user.id + attributes[:confirmed_at] = time + when resolved_state_string + attributes[:resolved_by_id] = user.id + attributes[:resolved_at] = time + when dismissed_state_string + attributes[:dismissed_by_id] = user.id + attributes[:dismissed_at] = time + end + + attributes +end + +def create_vulnerability_finding(vulnerability:, project:, scanner:, metadata_version:, pipeline:) + vulnerability_identifiers = create_vulnerability_identifiers(vulnerability: vulnerability, project: project) + primary_identifier = vulnerability_identifiers.find do |vi| + vi.external_type == vulnerability['identifiers'][0]['type'] && + vi.external_id == vulnerability['identifiers'][0]['value'] + end + + fingerprint_data = [ + vulnerability.dig('location', 'file'), + vulnerability.dig('location', 'start_line'), + vulnerability.dig('location', 'end_line') + ].join(":") + + location_fingerprint_hex = Digest::SHA1.hexdigest(fingerprint_data) # rubocop:disable Fips/SHA1 -- we must use SHA1, since this is how the fingerprint is stored in the DB + location_fingerprint_binary = [location_fingerprint_hex].pack('H*') + + uuid = Gitlab::UUID.v5( + [ + sast_report_type_string, + primary_identifier.fingerprint.unpack1('H*'), + location_fingerprint_hex, + project.id + ].join("-") + ) + + vulnerability_finding = vulnerability_findings_table.create!( + location_fingerprint: location_fingerprint_binary, + uuid: uuid, + description: vulnerability['description'], + location: vulnerability['location'], + raw_metadata: vulnerability.to_json, + created_at: current_time, updated_at: current_time, + severity: described_class::Migratable::Enums::Vulnerability.severity_levels[vulnerability['severity'].downcase], + report_type: sast_report_type_int, + project_id: project.id, + scanner_id: scanner.id, + primary_identifier_id: primary_identifier.id, + name: vulnerability['name'], + metadata_version: metadata_version, + initial_pipeline_id: pipeline.id, + latest_pipeline_id: pipeline.id + ) + + vulnerability_identifiers.each do |vi| + vulnerability_finding_identifiers_table.create!(created_at: current_time, updated_at: current_time, + identifier_id: vi.id, project_id: project.id, occurrence_id: vulnerability_finding.id) + end + + vulnerability_finding +end