From cd657439491608bf84b0c5885744725830b0f14c Mon Sep 17 00:00:00 2001 From: Yasha Rise Date: Tue, 3 Sep 2024 12:27:56 +0300 Subject: [PATCH 01/40] Reorganise data_object_class behavior in fabricator; add EPSS --- .../data_object_fabricator.rb | 24 +++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/ee/app/services/package_metadata/data_object_fabricator.rb b/ee/app/services/package_metadata/data_object_fabricator.rb index 4cfcdb110d14c9..d1146bb0e573a6 100644 --- a/ee/app/services/package_metadata/data_object_fabricator.rb +++ b/ee/app/services/package_metadata/data_object_fabricator.rb @@ -28,14 +28,30 @@ def create_object(data) end def data_object_class - return AdvisoryDataObject if sync_config.advisories? + if sync_config.epss? + epss_data_object_class + elsif sync_config.advisories? + advisory_data_object_class + elsif sync_config.v2? + v2_license_data_object_class + else + v1_license_data_object_class + end + end - license_data_object_class + def epss_data_object_class + EpssDataObject end - def license_data_object_class - return CompressedPackageDataObject if sync_config.v2? + def advisory_data_object_class + AdvisoryDataObject + end + + def v2_license_data_object_class + CompressedPackageDataObject + end + def v1_license_data_object_class DataObject end end -- GitLab From ed2bbcac72682e9d736711b3bc07a5ee970e9838 Mon Sep 17 00:00:00 2001 From: Yasha Rise Date: Wed, 4 Sep 2024 12:15:25 +0300 Subject: [PATCH 02/40] Implement object fabricator spec for epss --- .../sync/cve_enrichment/v2/epss.ndjson | 3 +++ .../data_object_fabricator_spec.rb | 17 +++++++++++++++++ 2 files changed, 20 insertions(+) create mode 100644 ee/spec/fixtures/package_metadata/sync/cve_enrichment/v2/epss.ndjson diff --git a/ee/spec/fixtures/package_metadata/sync/cve_enrichment/v2/epss.ndjson b/ee/spec/fixtures/package_metadata/sync/cve_enrichment/v2/epss.ndjson new file mode 100644 index 00000000000000..e8f3412b602941 --- /dev/null +++ b/ee/spec/fixtures/package_metadata/sync/cve_enrichment/v2/epss.ndjson @@ -0,0 +1,3 @@ +{ "cve_id": "CVE-2020-1234", "score": 0.5 } +{ "cve_id": "CVE-2021-12345", "score": 0.6 } +{ "cve_id": "CVE-CORRUPT-ID", "score": 10.06 } diff --git a/ee/spec/services/package_metadata/data_object_fabricator_spec.rb b/ee/spec/services/package_metadata/data_object_fabricator_spec.rb index c82b32f8f30170..4adc343a774171 100644 --- a/ee/spec/services/package_metadata/data_object_fabricator_spec.rb +++ b/ee/spec/services/package_metadata/data_object_fabricator_spec.rb @@ -102,5 +102,22 @@ it_behaves_like 'it handles errors' end + + context 'when cve enrichment' do + let(:sync_config) { build(:pm_sync_config, data_type: 'cve_enrichment') } + let(:io) { File.open(Rails.root.join('ee/spec/fixtures/package_metadata/sync/cve_enrichment/v2/epss.ndjson')) } + let(:data_file) { Gitlab::PackageMetadata::Connector::NdjsonDataFile.new(io, 0, 0) } + + subject(:data_objects) { described_class.new(data_file: data_file, sync_config: sync_config).to_a } + + it { + is_expected.to match_array([ + have_attributes(cve: 'CVE-2020-1234', score: '0.5'), + have_attributes(cve: 'CVE-2021-12345', score: '0.6') + ]) + } + + it_behaves_like 'it handles errors' + end end end -- GitLab From 0df24d813e686a2b827c1078d625bbea55022b0b Mon Sep 17 00:00:00 2001 From: Yasha Rise Date: Wed, 4 Sep 2024 12:16:04 +0300 Subject: [PATCH 03/40] Epss -> CveEnrichment --- .../package_metadata/data_object_fabricator.rb | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/ee/app/services/package_metadata/data_object_fabricator.rb b/ee/app/services/package_metadata/data_object_fabricator.rb index d1146bb0e573a6..c8e33ef402afaa 100644 --- a/ee/app/services/package_metadata/data_object_fabricator.rb +++ b/ee/app/services/package_metadata/data_object_fabricator.rb @@ -28,10 +28,10 @@ def create_object(data) end def data_object_class - if sync_config.epss? - epss_data_object_class + if sync_config.cve_enrichment? + CveEnrichmentDataObject elsif sync_config.advisories? - advisory_data_object_class + AdvisoryDataObject elsif sync_config.v2? v2_license_data_object_class else @@ -39,14 +39,6 @@ def data_object_class end end - def epss_data_object_class - EpssDataObject - end - - def advisory_data_object_class - AdvisoryDataObject - end - def v2_license_data_object_class CompressedPackageDataObject end -- GitLab From 00c5ecab2ca570eba2536fa69f54494503a05dbf Mon Sep 17 00:00:00 2001 From: Yasha Rise Date: Wed, 4 Sep 2024 15:05:08 +0300 Subject: [PATCH 04/40] Implement object fabricator spec for epss --- .rubocop_todo/gitlab/bounded_contexts.yml | 1 + .../data_objects/cve_enrichment.rb | 24 +++++++++++++++++++ 2 files changed, 25 insertions(+) create mode 100644 ee/app/services/package_metadata/data_objects/cve_enrichment.rb diff --git a/.rubocop_todo/gitlab/bounded_contexts.yml b/.rubocop_todo/gitlab/bounded_contexts.yml index 15eadfc4859215..d22132db25d80f 100644 --- a/.rubocop_todo/gitlab/bounded_contexts.yml +++ b/.rubocop_todo/gitlab/bounded_contexts.yml @@ -3435,6 +3435,7 @@ Gitlab/BoundedContexts: - 'ee/app/services/package_metadata/compressed_package_data_object.rb' - 'ee/app/services/package_metadata/data_object.rb' - 'ee/app/services/package_metadata/data_object_fabricator.rb' + - 'ee/app/services/package_metadata/data_objects/cve_enrichment.rb' - 'ee/app/services/package_metadata/ingestion/advisory/advisory_ingestion_task.rb' - 'ee/app/services/package_metadata/ingestion/advisory/affected_package_ingestion_task.rb' - 'ee/app/services/package_metadata/ingestion/advisory/ingestion_service.rb' diff --git a/ee/app/services/package_metadata/data_objects/cve_enrichment.rb b/ee/app/services/package_metadata/data_objects/cve_enrichment.rb new file mode 100644 index 00000000000000..58cc301730b67c --- /dev/null +++ b/ee/app/services/package_metadata/data_objects/cve_enrichment.rb @@ -0,0 +1,24 @@ +# frozen_string_literal: true + +module PackageMetadata + module DataObjects + class CveEnrichment + # TODO: We don't use purl_type argument. Figure out what to do with it + def self.create(data, _purl_type) + new(**data.transform_keys(&:to_sym)) + end + + attr_accessor :cve_id, :epss_score + + def initialize(cve_id:, epss_score:) + @cve_id = cve_id + @epss_score = epss_score + end + + def ==(other) + cve_id == other.cve_id && + epss_score == other.epss_score + end + end + end +end -- GitLab From 7e4b6cc33f960aa6792421385d28cf5c61f11658 Mon Sep 17 00:00:00 2001 From: Yasha Rise Date: Wed, 4 Sep 2024 15:36:41 +0300 Subject: [PATCH 05/40] Rename expected test values to include epss_ --- .../package_metadata/sync/cve_enrichment/v2/epss.ndjson | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ee/spec/fixtures/package_metadata/sync/cve_enrichment/v2/epss.ndjson b/ee/spec/fixtures/package_metadata/sync/cve_enrichment/v2/epss.ndjson index e8f3412b602941..7859c468f3394f 100644 --- a/ee/spec/fixtures/package_metadata/sync/cve_enrichment/v2/epss.ndjson +++ b/ee/spec/fixtures/package_metadata/sync/cve_enrichment/v2/epss.ndjson @@ -1,3 +1,3 @@ -{ "cve_id": "CVE-2020-1234", "score": 0.5 } -{ "cve_id": "CVE-2021-12345", "score": 0.6 } -{ "cve_id": "CVE-CORRUPT-ID", "score": 10.06 } +{ "cve_id": "CVE-2020-1234", "epss_score": 0.5 } +{ "cve_id": "CVE-2021-12345", "epss_score": 0.6 } +{ "cve_id": "CVE-CORRUPT-ID", "epss_score": 10.06 } -- GitLab From 08ab881c1bf389a5f20f25393d30d3f1f864b024 Mon Sep 17 00:00:00 2001 From: Yasha Rise Date: Wed, 4 Sep 2024 16:25:33 +0300 Subject: [PATCH 06/40] Add CVE enrichment data object factory --- .../cve_enrichment_data_objects.rb | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 ee/spec/factories/package_metadata/cve_enrichment_data_objects.rb diff --git a/ee/spec/factories/package_metadata/cve_enrichment_data_objects.rb b/ee/spec/factories/package_metadata/cve_enrichment_data_objects.rb new file mode 100644 index 00000000000000..4da985cfd8cd90 --- /dev/null +++ b/ee/spec/factories/package_metadata/cve_enrichment_data_objects.rb @@ -0,0 +1,14 @@ +# frozen_string_literal: true + +FactoryBot.define do + factory :pm_advisory_data_object, class: '::PackageMetadata::AdvisoryDataObject' do + cve_id { 'CVE-2020-1234' } + epss_score { 0.5 } + + initialize_with do + new(**attributes) + end + + skip_create + end +end -- GitLab From 79ed6b4b759402afccb6a1dc24d02acb2a0d2579 Mon Sep 17 00:00:00 2001 From: Yasha Rise Date: Wed, 4 Sep 2024 16:44:54 +0300 Subject: [PATCH 07/40] Implement tests for cve enrichment ingestion --- .../cve_enrichment_data_objects.rb | 2 +- .../cve_enrichment_ingestion_task_spec.rb | 77 +++++++++++++++++++ 2 files changed, 78 insertions(+), 1 deletion(-) create mode 100644 ee/spec/services/package_metadata/ingestion/cve_enrichment/cve_enrichment_ingestion_task_spec.rb diff --git a/ee/spec/factories/package_metadata/cve_enrichment_data_objects.rb b/ee/spec/factories/package_metadata/cve_enrichment_data_objects.rb index 4da985cfd8cd90..aa03a5b87338e2 100644 --- a/ee/spec/factories/package_metadata/cve_enrichment_data_objects.rb +++ b/ee/spec/factories/package_metadata/cve_enrichment_data_objects.rb @@ -1,7 +1,7 @@ # frozen_string_literal: true FactoryBot.define do - factory :pm_advisory_data_object, class: '::PackageMetadata::AdvisoryDataObject' do + factory :pm_cve_enrichment_data_object, class: '::PackageMetadata::DataObjects::CveEnrichment' do cve_id { 'CVE-2020-1234' } epss_score { 0.5 } diff --git a/ee/spec/services/package_metadata/ingestion/cve_enrichment/cve_enrichment_ingestion_task_spec.rb b/ee/spec/services/package_metadata/ingestion/cve_enrichment/cve_enrichment_ingestion_task_spec.rb new file mode 100644 index 00000000000000..852256be643770 --- /dev/null +++ b/ee/spec/services/package_metadata/ingestion/cve_enrichment/cve_enrichment_ingestion_task_spec.rb @@ -0,0 +1,77 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe PackageMetadata::Ingestion::CveEnrichment::CveEnrichmentIngestionTask, feature_category: :software_composition_analysis do + describe '.execute' do + let(:cve_id) { 'CVE-2023-12345' } + let(:new_epss_score) { 0.75 } + let(:old_epss_score) { 0.5 } + + let!(:existing_cve_enrichment) do + create(:pm_epss, cve: cve_id, score: old_epss_score) + end + + let(:import_data) do + [ + build(:pm_cve_enrichment_data_object, cve_id: cve_id, epss_score: new_epss_score), + build(:pm_cve_enrichment_data_object) + ] + end + + subject(:execute) { described_class.execute(import_data) } + + context 'when CVE enrichments are valid' do + it 'adds all new CVE enrichments in import data' do + expect { execute }.to change { PackageMetadata::Epss.count }.from(1).to(2) + end + + it 'updates existing CVE enrichments' do + expect { execute } + .to change { existing_cve_enrichment.reload.score } + .from(old_epss_score) + .to(new_epss_score) + end + + it 'returns the CVE enrichment database id values as a map' do + actual_cve_enrichment_map = execute + expected_cve_enrichment_map = {} + PackageMetadata::Epss.all.find_each do |epss| + expected_cve_enrichment_map[epss.cve] = + Hashie::Mash.new({ + id: epss.id, + cve: epss.cve, + score: epss.score, + created_at: epss.created_at, + updated_at: epss.updated_at + }) + end + expect(actual_cve_enrichment_map).to eq(expected_cve_enrichment_map) + end + end + + context 'when CVE enrichments are invalid' do + let(:valid_cve_enrichment) { build(:pm_cve_enrichment_data_object) } + let(:invalid_cve_enrichment) { build(:pm_cve_enrichment_data_object, epss_score: 'invalid') } + let(:import_data) { [valid_cve_enrichment, invalid_cve_enrichment] } + + it 'creates only valid CVE enrichments' do + expect { execute }.to change { PackageMetadata::Epss.count }.by(1) + end + + it 'logs invalid CVE enrichments as an error' do + expect(Gitlab::ErrorTracking) + .to receive(:track_exception) + .with( + an_instance_of(described_class::Error), + hash_including( + cve: invalid_cve_enrichment.cve_id, + score: 'invalid', + errors: hash_including(:score) + ) + ) + execute + end + end + end +end -- GitLab From 095a7402e0fb005a417cfe1e2b94e5ba95fe9476 Mon Sep 17 00:00:00 2001 From: Yasha Rise Date: Wed, 4 Sep 2024 16:50:58 +0300 Subject: [PATCH 08/40] Implement cve enrichment ingestion (it's not done yet) --- .rubocop_todo/gitlab/bounded_contexts.yml | 2 + .../cve_enrichment_ingestion_task.rb | 62 +++++++++++++++++++ .../cve_enrichment/ingestion_service.rb | 17 +++++ 3 files changed, 81 insertions(+) create mode 100644 ee/app/services/package_metadata/ingestion/cve_enrichment/cve_enrichment_ingestion_task.rb create mode 100644 ee/app/services/package_metadata/ingestion/cve_enrichment/ingestion_service.rb diff --git a/.rubocop_todo/gitlab/bounded_contexts.yml b/.rubocop_todo/gitlab/bounded_contexts.yml index d22132db25d80f..d11fef16f28a17 100644 --- a/.rubocop_todo/gitlab/bounded_contexts.yml +++ b/.rubocop_todo/gitlab/bounded_contexts.yml @@ -3442,6 +3442,8 @@ Gitlab/BoundedContexts: - 'ee/app/services/package_metadata/ingestion/compressed_package/ingestion_service.rb' - 'ee/app/services/package_metadata/ingestion/compressed_package/license_ingestion_task.rb' - 'ee/app/services/package_metadata/ingestion/compressed_package/package_ingestion_task.rb' + - 'ee/app/services/package_metadata/ingestion/cve_enrichment/ingestion_service.rb' + - 'ee/app/services/package_metadata/ingestion/cve_enrichment/cve_enrichment_ingestion_task.rb' - 'ee/app/services/package_metadata/ingestion/data_map.rb' - 'ee/app/services/package_metadata/ingestion/ingestion_service.rb' - 'ee/app/services/package_metadata/ingestion/tasks/base.rb' diff --git a/ee/app/services/package_metadata/ingestion/cve_enrichment/cve_enrichment_ingestion_task.rb b/ee/app/services/package_metadata/ingestion/cve_enrichment/cve_enrichment_ingestion_task.rb new file mode 100644 index 00000000000000..de1a69ca48dad4 --- /dev/null +++ b/ee/app/services/package_metadata/ingestion/cve_enrichment/cve_enrichment_ingestion_task.rb @@ -0,0 +1,62 @@ +# frozen_string_literal: true + +module PackageMetadata + module Ingestion + module CveEnrichment + class CveEnrichmentIngestionTask + Error = Class.new(StandardError) + + def self.execute(import_data) + new(import_data).execute + end + + def initialize(import_data) + @import_data = import_data + end + + def execute + PackageMetadata::Epss.bulk_upsert!(valid_cve_enrichment_entries, unique_by: %w[cve], + returns: %w[id cve score created_at updated_at]) + end + + private + + attr_reader :import_data + + # validates the list of provided cve_enrichment models and returns + # only those which are valid and logs the invalid packages as an error + def valid_cve_enrichment_entries + cve_enrichment.map do |cve_enrichment_entry| + if cve_enrichment_entry.invalid? + Gitlab::ErrorTracking.track_exception( + Error.new( + "invalid CVE enrichment entry"), + cve: cve_enrichment.cve, + score: cve_enrichment.score, + errors: cve_enrichment.errors.to_hash + ) + next + end + + cve_enrichment_entry + end.reject(&:blank?) + end + + def cve_enrichment + import_data.map do |data_object| + PackageMetadata::Epss.new( + cve: data_object.cve_id, + score: data_object.epss_score, + created_at: now, + updated_at: now + ) + end + end + + def now + @now ||= Time.zone.now + end + end + end + end +end diff --git a/ee/app/services/package_metadata/ingestion/cve_enrichment/ingestion_service.rb b/ee/app/services/package_metadata/ingestion/cve_enrichment/ingestion_service.rb new file mode 100644 index 00000000000000..f8636636b063df --- /dev/null +++ b/ee/app/services/package_metadata/ingestion/cve_enrichment/ingestion_service.rb @@ -0,0 +1,17 @@ +# frozen_string_literal: true + +module PackageMetadata + module Ingestion + module CveEnrichment + class IngestionService + def initialize(import_data) + @data_map = DataMap.new(import_data) + end + + def execute + CveEnrichmentIngestionTask.new(@import_data, @data_map).execute + end + end + end + end +end -- GitLab From 93425c84b8f060d33696873a835fc6c8740a6ee6 Mon Sep 17 00:00:00 2001 From: Yasha Rise Date: Thu, 5 Sep 2024 11:33:25 +0300 Subject: [PATCH 09/40] Fix cve enrichment ingestion variable usage and tests --- .../cve_enrichment_ingestion_task.rb | 6 ++--- .../cve_enrichment_ingestion_task_spec.rb | 24 ++++--------------- 2 files changed, 7 insertions(+), 23 deletions(-) diff --git a/ee/app/services/package_metadata/ingestion/cve_enrichment/cve_enrichment_ingestion_task.rb b/ee/app/services/package_metadata/ingestion/cve_enrichment/cve_enrichment_ingestion_task.rb index de1a69ca48dad4..443751813e9403 100644 --- a/ee/app/services/package_metadata/ingestion/cve_enrichment/cve_enrichment_ingestion_task.rb +++ b/ee/app/services/package_metadata/ingestion/cve_enrichment/cve_enrichment_ingestion_task.rb @@ -31,9 +31,9 @@ def valid_cve_enrichment_entries Gitlab::ErrorTracking.track_exception( Error.new( "invalid CVE enrichment entry"), - cve: cve_enrichment.cve, - score: cve_enrichment.score, - errors: cve_enrichment.errors.to_hash + cve: cve_enrichment_entry.cve, + score: cve_enrichment_entry.score, + errors: cve_enrichment_entry.errors.to_hash ) next end diff --git a/ee/spec/services/package_metadata/ingestion/cve_enrichment/cve_enrichment_ingestion_task_spec.rb b/ee/spec/services/package_metadata/ingestion/cve_enrichment/cve_enrichment_ingestion_task_spec.rb index 852256be643770..ad21a015647961 100644 --- a/ee/spec/services/package_metadata/ingestion/cve_enrichment/cve_enrichment_ingestion_task_spec.rb +++ b/ee/spec/services/package_metadata/ingestion/cve_enrichment/cve_enrichment_ingestion_task_spec.rb @@ -32,27 +32,11 @@ .from(old_epss_score) .to(new_epss_score) end - - it 'returns the CVE enrichment database id values as a map' do - actual_cve_enrichment_map = execute - expected_cve_enrichment_map = {} - PackageMetadata::Epss.all.find_each do |epss| - expected_cve_enrichment_map[epss.cve] = - Hashie::Mash.new({ - id: epss.id, - cve: epss.cve, - score: epss.score, - created_at: epss.created_at, - updated_at: epss.updated_at - }) - end - expect(actual_cve_enrichment_map).to eq(expected_cve_enrichment_map) - end end context 'when CVE enrichments are invalid' do let(:valid_cve_enrichment) { build(:pm_cve_enrichment_data_object) } - let(:invalid_cve_enrichment) { build(:pm_cve_enrichment_data_object, epss_score: 'invalid') } + let(:invalid_cve_enrichment) { build(:pm_cve_enrichment_data_object, cve_id: 'invalid') } let(:import_data) { [valid_cve_enrichment, invalid_cve_enrichment] } it 'creates only valid CVE enrichments' do @@ -65,9 +49,9 @@ .with( an_instance_of(described_class::Error), hash_including( - cve: invalid_cve_enrichment.cve_id, - score: 'invalid', - errors: hash_including(:score) + cve: 'invalid', + score: invalid_cve_enrichment.epss_score, + errors: { cve: ["is invalid"] } ) ) execute -- GitLab From 21440fe4ffa8d41012feaaa97e588fc74b1cd9cd Mon Sep 17 00:00:00 2001 From: Yasha Rise Date: Thu, 5 Sep 2024 15:42:59 +0300 Subject: [PATCH 10/40] Add spec for cve enrichment type --- .../package_metadata/sync_service_spec.rb | 20 ++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/ee/spec/services/package_metadata/sync_service_spec.rb b/ee/spec/services/package_metadata/sync_service_spec.rb index 8c880de4a6f671..890888ba89b969 100644 --- a/ee/spec/services/package_metadata/sync_service_spec.rb +++ b/ee/spec/services/package_metadata/sync_service_spec.rb @@ -71,12 +71,24 @@ sync_config.data_type = 'advisories' end - it 'calls v1 ingestion service to store data' do + it 'calls advisories ingestion service to store data' do execute expect(PackageMetadata::Ingestion::Advisory::IngestionService) .to have_received(:execute).with(data_objects).twice end end + + context 'if data_type is cve enrichment' do + before do + sync_config.data_type = 'cve_enrichment' + end + + it 'calls cve enrichment ingestion service to store data' do + execute + expect(PackageMetadata::Ingestion::CveEnrichment::IngestionService) + .to have_received(:execute).with(data_objects).twice + end + end end context 'when a slice has been ingested' do @@ -229,6 +241,12 @@ it_behaves_like 'it calls #execute for each enabled config' end + + context 'and the data_type is cve_enrichment' do + let(:data_type) { 'cve_enrichment' } + + it_behaves_like 'it calls #execute for each enabled config' + end end context 'when stop_signal.stop? is true' do -- GitLab From 512b02a8397b4781492cd10a144133db3489c960 Mon Sep 17 00:00:00 2001 From: Yasha Rise Date: Thu, 5 Sep 2024 16:07:52 +0300 Subject: [PATCH 11/40] Initial addition of cve_enrichment to sync service --- .../package_metadata/sync_configuration.rb | 24 +++++++++++++++++++ .../services/package_metadata/sync_service.rb | 4 +++- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/ee/app/models/package_metadata/sync_configuration.rb b/ee/app/models/package_metadata/sync_configuration.rb index 3103e4d2876103..2c39ad09fa844d 100644 --- a/ee/app/models/package_metadata/sync_configuration.rb +++ b/ee/app/models/package_metadata/sync_configuration.rb @@ -26,6 +26,8 @@ class SyncConfiguration def self.configs_for(data_type) case data_type + when 'cve_enrichment' + cve_enrichment_configs when 'advisories' advisory_configs when 'licenses' @@ -35,6 +37,14 @@ def self.configs_for(data_type) end end + def self.cve_enrichment_configs + storage_type, base_uri = Location.for_cve_enrichment + + permitted_purl_types.map do |_, _| + new('cve_enrichment', storage_type, base_uri, VERSION_FORMAT_V2, nil) + end + end + def self.advisory_configs storage_type, base_uri = Location.for_advisories @@ -77,6 +87,10 @@ def advisories? data_type == 'advisories' end + def cve_enrichment? + data_type == 'cve_enrichment' + end + def to_s "#{data_type}:#{storage_type}/#{base_uri}/#{version_format}/#{purl_type}" end @@ -89,6 +103,8 @@ class Location LICENSES_BUCKET = 'prod-export-license-bucket-1a6c642fc4de57d4' ADVISORIES_PATH = Rails.root.join('vendor/package_metadata/advisories').freeze ADVISORIES_BUCKET = 'prod-export-advisory-bucket-1a6c642fc4de57d4' + CVE_ENRICHMENT_PATH = Rails.root.join('vendor/package_metadata/cve_enrichment').freeze + CVE_ENRICHMENT_BUCKET = 'prod-export-cve-enrichment-bucket-1a6c642fc4de57d4' def self.for_licenses if File.exist?(LICENSES_PATH) @@ -107,6 +123,14 @@ def self.for_advisories [:gcp, ADVISORIES_BUCKET] end end + + def self.for_cve_enrichment + if File.exist?(CVE_ENRICHMENT_PATH) + [:offline, CVE_ENRICHMENT_PATH] + else + [:gcp, CVE_ENRICHMENT_BUCKET] + end + end end end end diff --git a/ee/app/services/package_metadata/sync_service.rb b/ee/app/services/package_metadata/sync_service.rb index 463464b94b6453..aeab6815bc0618 100644 --- a/ee/app/services/package_metadata/sync_service.rb +++ b/ee/app/services/package_metadata/sync_service.rb @@ -49,7 +49,9 @@ def execute attr_accessor :sync_config, :signal def ingest(data) - if sync_config.advisories? + if sync_config.cve_enrichment? + PackageMetadata::Ingestion::CveEnrichment::CveEnrichmentIngestionTask.execute(data) + elsif sync_config.advisories? PackageMetadata::Ingestion::Advisory::IngestionService.execute(data) elsif sync_config.v2? PackageMetadata::Ingestion::CompressedPackage::IngestionService.execute(data) -- GitLab From 31431612615e6d39a24e1e5352d1ea99fb1e984f Mon Sep 17 00:00:00 2001 From: Yasha Rise Date: Thu, 5 Sep 2024 16:25:14 +0300 Subject: [PATCH 12/40] Add cve_enrichment to package_metadata enum --- app/models/concerns/enums/package_metadata.rb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/app/models/concerns/enums/package_metadata.rb b/app/models/concerns/enums/package_metadata.rb index 385807cd7ed148..0954c555e56608 100644 --- a/app/models/concerns/enums/package_metadata.rb +++ b/app/models/concerns/enums/package_metadata.rb @@ -9,7 +9,8 @@ class PackageMetadata DATA_TYPES = { advisories: 1, - licenses: 2 + licenses: 2, + cve_enrichment: 3 }.with_indifferent_access.freeze VERSION_FORMATS = { -- GitLab From 77f2e76cbd202b5aaf4c551d903aa853463ab88c Mon Sep 17 00:00:00 2001 From: Yasha Rise Date: Sun, 8 Sep 2024 17:14:00 +0300 Subject: [PATCH 13/40] Correct CVE enrichment sync service behaviour --- ee/app/services/package_metadata/sync_service.rb | 2 +- ee/spec/services/package_metadata/sync_service_spec.rb | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/ee/app/services/package_metadata/sync_service.rb b/ee/app/services/package_metadata/sync_service.rb index aeab6815bc0618..b4980de58bb6f0 100644 --- a/ee/app/services/package_metadata/sync_service.rb +++ b/ee/app/services/package_metadata/sync_service.rb @@ -50,7 +50,7 @@ def execute def ingest(data) if sync_config.cve_enrichment? - PackageMetadata::Ingestion::CveEnrichment::CveEnrichmentIngestionTask.execute(data) + PackageMetadata::Ingestion::CveEnrichment::IngestionService.execute(data) elsif sync_config.advisories? PackageMetadata::Ingestion::Advisory::IngestionService.execute(data) elsif sync_config.v2? diff --git a/ee/spec/services/package_metadata/sync_service_spec.rb b/ee/spec/services/package_metadata/sync_service_spec.rb index 890888ba89b969..718e999478df52 100644 --- a/ee/spec/services/package_metadata/sync_service_spec.rb +++ b/ee/spec/services/package_metadata/sync_service_spec.rb @@ -26,6 +26,7 @@ allow(PackageMetadata::Ingestion::IngestionService).to receive(:execute) allow(PackageMetadata::Ingestion::CompressedPackage::IngestionService).to receive(:execute) allow(PackageMetadata::Ingestion::Advisory::IngestionService).to receive(:execute) + allow(PackageMetadata::Ingestion::CveEnrichment::IngestionService).to receive(:execute) allow(service).to receive(:sleep) allow(Gitlab::AppJsonLogger).to receive(:debug) end -- GitLab From e3adcd71d9c03ef9b0c2c520d5d8b30966d88846 Mon Sep 17 00:00:00 2001 From: Yasha Rise Date: Sun, 8 Sep 2024 17:34:29 +0300 Subject: [PATCH 14/40] Implement cve_enrichment_worker --- .rubocop_todo/gitlab/bounded_contexts.yml | 1 + .../cve_enrichment_sync_worker.rb | 41 +++++++++++++++++++ 2 files changed, 42 insertions(+) create mode 100644 ee/app/workers/package_metadata/cve_enrichment_sync_worker.rb diff --git a/.rubocop_todo/gitlab/bounded_contexts.yml b/.rubocop_todo/gitlab/bounded_contexts.yml index d11fef16f28a17..c70ea9bbf918ba 100644 --- a/.rubocop_todo/gitlab/bounded_contexts.yml +++ b/.rubocop_todo/gitlab/bounded_contexts.yml @@ -3634,6 +3634,7 @@ Gitlab/BoundedContexts: - 'ee/app/workers/new_epic_worker.rb' - 'ee/app/workers/okrs/checkin_reminder_emails_cron_worker.rb' - 'ee/app/workers/package_metadata/advisories_sync_worker.rb' + - 'ee/app/workers/package_metadata/cve_enrichment_sync_worker.rb' - 'ee/app/workers/package_metadata/global_advisory_scan_worker.rb' - 'ee/app/workers/package_metadata/licenses_sync_worker.rb' - 'ee/app/workers/personal_access_tokens/groups/policy_worker.rb' diff --git a/ee/app/workers/package_metadata/cve_enrichment_sync_worker.rb b/ee/app/workers/package_metadata/cve_enrichment_sync_worker.rb new file mode 100644 index 00000000000000..e0c9fd0307fc27 --- /dev/null +++ b/ee/app/workers/package_metadata/cve_enrichment_sync_worker.rb @@ -0,0 +1,41 @@ +# frozen_string_literal: true + +module PackageMetadata + class CveEnrichmentSyncWorker + include ApplicationWorker + include CronjobQueue # rubocop:disable Scalability/CronWorkerContext -- there is no relevant metadata to add to logs + include ExclusiveLeaseGuard + + LEASE_TIMEOUT = 5.minutes + + data_consistency :always # rubocop:disable SidekiqLoadBalancing/WorkerDataConsistency -- exclusively writes + feature_category :software_composition_analysis + urgency :low + + idempotent! + sidekiq_options retry: false + worker_has_external_dependencies! + + def perform + return unless should_run? + + try_obtain_lease do + SyncService.execute(data_type: 'cve_enrichment', lease: exclusive_lease) + end + end + + private + + def should_run? + # Should we use some other feature here? + return false unless ::License.feature_available?(:dependency_scanning) + return false if Rails.env.development? && ENV.fetch('PM_SYNC_IN_DEV', 'false') != 'true' + + true + end + + def lease_timeout + LEASE_TIMEOUT + end + end +end -- GitLab From c7d6072e9f5dbc7a8958e1d2655a200b40aaa3e7 Mon Sep 17 00:00:00 2001 From: Yasha Rise Date: Sun, 8 Sep 2024 17:36:29 +0300 Subject: [PATCH 15/40] Generate queues yaml --- ee/app/workers/all_queues.yml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/ee/app/workers/all_queues.yml b/ee/app/workers/all_queues.yml index 28d6d825338f6e..31d56fc9d6d517 100644 --- a/ee/app/workers/all_queues.yml +++ b/ee/app/workers/all_queues.yml @@ -498,6 +498,15 @@ :weight: 1 :idempotent: true :tags: [] +- :name: cronjob:package_metadata_cve_enrichment_sync + :worker_name: PackageMetadata::CveEnrichmentSyncWorker + :feature_category: :software_composition_analysis + :has_external_dependencies: true + :urgency: :low + :resource_boundary: :unknown + :weight: 1 + :idempotent: true + :tags: [] - :name: cronjob:package_metadata_licenses_sync :worker_name: PackageMetadata::LicensesSyncWorker :feature_category: :software_composition_analysis -- GitLab From 315f7cea5778f08a8990fbde6a68fb3cd9d0d8bb Mon Sep 17 00:00:00 2001 From: Yasha Rise Date: Mon, 9 Sep 2024 13:01:19 +0300 Subject: [PATCH 16/40] Correct cve_enrichment data object behaviour and spec --- .../services/package_metadata/data_object_fabricator.rb | 2 +- .../package_metadata/sync/cve_enrichment/v2/epss.ndjson | 3 --- .../package_metadata/data_object_fabricator_spec.rb | 8 ++++---- 3 files changed, 5 insertions(+), 8 deletions(-) delete mode 100644 ee/spec/fixtures/package_metadata/sync/cve_enrichment/v2/epss.ndjson diff --git a/ee/app/services/package_metadata/data_object_fabricator.rb b/ee/app/services/package_metadata/data_object_fabricator.rb index c8e33ef402afaa..9e14061276c284 100644 --- a/ee/app/services/package_metadata/data_object_fabricator.rb +++ b/ee/app/services/package_metadata/data_object_fabricator.rb @@ -29,7 +29,7 @@ def create_object(data) def data_object_class if sync_config.cve_enrichment? - CveEnrichmentDataObject + DataObjects::CveEnrichment elsif sync_config.advisories? AdvisoryDataObject elsif sync_config.v2? diff --git a/ee/spec/fixtures/package_metadata/sync/cve_enrichment/v2/epss.ndjson b/ee/spec/fixtures/package_metadata/sync/cve_enrichment/v2/epss.ndjson deleted file mode 100644 index 7859c468f3394f..00000000000000 --- a/ee/spec/fixtures/package_metadata/sync/cve_enrichment/v2/epss.ndjson +++ /dev/null @@ -1,3 +0,0 @@ -{ "cve_id": "CVE-2020-1234", "epss_score": 0.5 } -{ "cve_id": "CVE-2021-12345", "epss_score": 0.6 } -{ "cve_id": "CVE-CORRUPT-ID", "epss_score": 10.06 } diff --git a/ee/spec/services/package_metadata/data_object_fabricator_spec.rb b/ee/spec/services/package_metadata/data_object_fabricator_spec.rb index 4adc343a774171..683da41c2a044a 100644 --- a/ee/spec/services/package_metadata/data_object_fabricator_spec.rb +++ b/ee/spec/services/package_metadata/data_object_fabricator_spec.rb @@ -104,16 +104,16 @@ end context 'when cve enrichment' do - let(:sync_config) { build(:pm_sync_config, data_type: 'cve_enrichment') } - let(:io) { File.open(Rails.root.join('ee/spec/fixtures/package_metadata/sync/cve_enrichment/v2/epss.ndjson')) } + let(:sync_config) { build(:pm_sync_config, data_type: 'cve_enrichment', version_format: 'v2') } + let(:io) { File.open(Rails.root.join('ee/spec/fixtures/package_metadata/sync/cve_enrichment/v2/data.ndjson')) } let(:data_file) { Gitlab::PackageMetadata::Connector::NdjsonDataFile.new(io, 0, 0) } subject(:data_objects) { described_class.new(data_file: data_file, sync_config: sync_config).to_a } it { is_expected.to match_array([ - have_attributes(cve: 'CVE-2020-1234', score: '0.5'), - have_attributes(cve: 'CVE-2021-12345', score: '0.6') + have_attributes(cve_id: 'CVE-2020-1234', epss_score: 0.5), + have_attributes(cve_id: 'CVE-2021-12345', epss_score: 0.6) ]) } -- GitLab From a4910335c9ca490fc23bc51cb1fe84ef29776690 Mon Sep 17 00:00:00 2001 From: Yasha Rise Date: Mon, 9 Sep 2024 13:06:46 +0300 Subject: [PATCH 17/40] Run cve_enrichment only once, not for each purl_type --- ee/app/models/package_metadata/sync_configuration.rb | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/ee/app/models/package_metadata/sync_configuration.rb b/ee/app/models/package_metadata/sync_configuration.rb index 2c39ad09fa844d..6c241066818973 100644 --- a/ee/app/models/package_metadata/sync_configuration.rb +++ b/ee/app/models/package_metadata/sync_configuration.rb @@ -39,10 +39,7 @@ def self.configs_for(data_type) def self.cve_enrichment_configs storage_type, base_uri = Location.for_cve_enrichment - - permitted_purl_types.map do |_, _| - new('cve_enrichment', storage_type, base_uri, VERSION_FORMAT_V2, nil) - end + new('cve_enrichment', storage_type, base_uri, VERSION_FORMAT_V2, nil) end def self.advisory_configs -- GitLab From 83efd3ad75eb3d3072e402e6371a413bde7d73bd Mon Sep 17 00:00:00 2001 From: Yasha Rise Date: Mon, 9 Sep 2024 16:29:53 +0300 Subject: [PATCH 18/40] Create and use epss_ingestion feature flag --- .../package_metadata/cve_enrichment_sync_worker.rb | 1 + ee/config/feature_flags/beta/epss_ingestion.yml | 9 +++++++++ 2 files changed, 10 insertions(+) create mode 100644 ee/config/feature_flags/beta/epss_ingestion.yml diff --git a/ee/app/workers/package_metadata/cve_enrichment_sync_worker.rb b/ee/app/workers/package_metadata/cve_enrichment_sync_worker.rb index e0c9fd0307fc27..133d87c8f1c91e 100644 --- a/ee/app/workers/package_metadata/cve_enrichment_sync_worker.rb +++ b/ee/app/workers/package_metadata/cve_enrichment_sync_worker.rb @@ -27,6 +27,7 @@ def perform private def should_run? + return false unless Feature.enabled?(:epss_ingestion, object.project) # Should we use some other feature here? return false unless ::License.feature_available?(:dependency_scanning) return false if Rails.env.development? && ENV.fetch('PM_SYNC_IN_DEV', 'false') != 'true' diff --git a/ee/config/feature_flags/beta/epss_ingestion.yml b/ee/config/feature_flags/beta/epss_ingestion.yml new file mode 100644 index 00000000000000..fcb45ead38c589 --- /dev/null +++ b/ee/config/feature_flags/beta/epss_ingestion.yml @@ -0,0 +1,9 @@ +--- +name: epss_ingestion +feature_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/470856 +introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/164348 +rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/470977 +milestone: '17.4' +group: group::composition analysis +type: beta +default_enabled: false -- GitLab From 3e81ac227d31341e6fd64a1a223d70a699582140 Mon Sep 17 00:00:00 2001 From: Yasha Rise Date: Mon, 9 Sep 2024 17:01:18 +0300 Subject: [PATCH 19/40] Add cve enrichment fixture data --- .../package_metadata/sync/cve_enrichment/v2/data.ndjson | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 ee/spec/fixtures/package_metadata/sync/cve_enrichment/v2/data.ndjson diff --git a/ee/spec/fixtures/package_metadata/sync/cve_enrichment/v2/data.ndjson b/ee/spec/fixtures/package_metadata/sync/cve_enrichment/v2/data.ndjson new file mode 100644 index 00000000000000..040d50ffe3dbf3 --- /dev/null +++ b/ee/spec/fixtures/package_metadata/sync/cve_enrichment/v2/data.ndjson @@ -0,0 +1,3 @@ +{ "cve_id": "CVE-2020-1234", "epss_score": 0.5 } +{ "cve_id": "CVE-2021-12345", "epss_score": 0.6 } +{ "epss_score": 0.2 } -- GitLab From 892af6cc28c42844658b3d07c2ffd5104aee3f62 Mon Sep 17 00:00:00 2001 From: Yasha Rise Date: Mon, 9 Sep 2024 17:51:06 +0300 Subject: [PATCH 20/40] Expect cve_enrichment sync service to use just nil purl_type --- ee/app/models/package_metadata/sync_configuration.rb | 2 +- ee/spec/services/package_metadata/sync_service_spec.rb | 9 ++++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/ee/app/models/package_metadata/sync_configuration.rb b/ee/app/models/package_metadata/sync_configuration.rb index 6c241066818973..5b12b293a577de 100644 --- a/ee/app/models/package_metadata/sync_configuration.rb +++ b/ee/app/models/package_metadata/sync_configuration.rb @@ -39,7 +39,7 @@ def self.configs_for(data_type) def self.cve_enrichment_configs storage_type, base_uri = Location.for_cve_enrichment - new('cve_enrichment', storage_type, base_uri, VERSION_FORMAT_V2, nil) + [new('cve_enrichment', storage_type, base_uri, VERSION_FORMAT_V2, nil)] end def self.advisory_configs diff --git a/ee/spec/services/package_metadata/sync_service_spec.rb b/ee/spec/services/package_metadata/sync_service_spec.rb index 718e999478df52..9b610f162f4cb4 100644 --- a/ee/spec/services/package_metadata/sync_service_spec.rb +++ b/ee/spec/services/package_metadata/sync_service_spec.rb @@ -245,8 +245,15 @@ context 'and the data_type is cve_enrichment' do let(:data_type) { 'cve_enrichment' } + let(:should_stop) { false } - it_behaves_like 'it calls #execute for each enabled config' + it 'calls #execute once' do + expect(observer).to receive(:execute).once + expect(described_class).to receive(:new) + .with(having_attributes(data_type: data_type, purl_type: nil), stop_signal) + .and_return(observer) + execute + end end end -- GitLab From a3a5001de12e51a9b4a24b850c8cc5562587a5cb Mon Sep 17 00:00:00 2001 From: Yasha Rise Date: Tue, 10 Sep 2024 11:51:58 +0300 Subject: [PATCH 21/40] Add cve enrichment sync worker to cronjobs --- config/initializers/1_settings.rb | 3 +++ 1 file changed, 3 insertions(+) diff --git a/config/initializers/1_settings.rb b/config/initializers/1_settings.rb index aa96951cdaaafd..e785190ed3f1e3 100644 --- a/config/initializers/1_settings.rb +++ b/config/initializers/1_settings.rb @@ -933,6 +933,9 @@ Settings.cron_jobs['observability_alert_query_worker'] ||= {} Settings.cron_jobs['observability_alert_query_worker']['cron'] ||= '* * * * *' Settings.cron_jobs['observability_alert_query_worker']['job_class'] = 'Observability::AlertQueryWorker' + Settings.cron_jobs['package_metadata_cve_enrichment_sync_worker'] ||= {} + Settings.cron_jobs['package_metadata_cve_enrichment_sync_worker']['cron'] ||= "*/5 * * * *" + Settings.cron_jobs['package_metadata_cve_enrichment_sync_worker']['job_class'] = 'PackageMetadata::CveEnrichmentSyncWorker' Gitlab.com do Settings.cron_jobs['disable_legacy_open_source_license_for_inactive_projects'] ||= {} -- GitLab From 07327e3d06de8efd4ad76367c40e85e82dbdf870 Mon Sep 17 00:00:00 2001 From: Yasha Rise Date: Tue, 10 Sep 2024 15:43:47 +0300 Subject: [PATCH 22/40] Check epss ingestion feature on instance level --- ee/app/workers/package_metadata/cve_enrichment_sync_worker.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ee/app/workers/package_metadata/cve_enrichment_sync_worker.rb b/ee/app/workers/package_metadata/cve_enrichment_sync_worker.rb index 133d87c8f1c91e..f985f576ff195e 100644 --- a/ee/app/workers/package_metadata/cve_enrichment_sync_worker.rb +++ b/ee/app/workers/package_metadata/cve_enrichment_sync_worker.rb @@ -27,7 +27,7 @@ def perform private def should_run? - return false unless Feature.enabled?(:epss_ingestion, object.project) + return false unless Feature.enabled?(:epss_ingestion, :instance) # Should we use some other feature here? return false unless ::License.feature_available?(:dependency_scanning) return false if Rails.env.development? && ENV.fetch('PM_SYNC_IN_DEV', 'false') != 'true' -- GitLab From c8abb2ff3251d71bcd53099225a09984a401ad48 Mon Sep 17 00:00:00 2001 From: Yasha Rise Date: Wed, 11 Sep 2024 11:08:13 +0300 Subject: [PATCH 23/40] Add ingestion for EPSS data into GitLab DB Changelog: added EE: true --- .../cve_enrichment/ingestion_service.rb | 8 +++-- .../connector/base_connector.rb | 2 ++ .../connector/base_connector_spec.rb | 29 ++++++++++++++++++- 3 files changed, 36 insertions(+), 3 deletions(-) diff --git a/ee/app/services/package_metadata/ingestion/cve_enrichment/ingestion_service.rb b/ee/app/services/package_metadata/ingestion/cve_enrichment/ingestion_service.rb index f8636636b063df..cb7232f964a183 100644 --- a/ee/app/services/package_metadata/ingestion/cve_enrichment/ingestion_service.rb +++ b/ee/app/services/package_metadata/ingestion/cve_enrichment/ingestion_service.rb @@ -4,12 +4,16 @@ module PackageMetadata module Ingestion module CveEnrichment class IngestionService + def self.execute(import_data) + new(import_data).execute + end + def initialize(import_data) - @data_map = DataMap.new(import_data) + @import_data = import_data end def execute - CveEnrichmentIngestionTask.new(@import_data, @data_map).execute + CveEnrichmentIngestionTask.new(@import_data).execute end end end diff --git a/ee/lib/gitlab/package_metadata/connector/base_connector.rb b/ee/lib/gitlab/package_metadata/connector/base_connector.rb index 0b91b28f0e56cf..0e3183c84e0736 100644 --- a/ee/lib/gitlab/package_metadata/connector/base_connector.rb +++ b/ee/lib/gitlab/package_metadata/connector/base_connector.rb @@ -28,6 +28,8 @@ def data_file_class end def file_prefix + return sync_config.version_format unless sync_config.purl_type + File.join(sync_config.version_format, registry_id) end diff --git a/ee/spec/lib/gitlab/package_metadata/connector/base_connector_spec.rb b/ee/spec/lib/gitlab/package_metadata/connector/base_connector_spec.rb index 0db253668b3433..048d966bb1a6ce 100644 --- a/ee/spec/lib/gitlab/package_metadata/connector/base_connector_spec.rb +++ b/ee/spec/lib/gitlab/package_metadata/connector/base_connector_spec.rb @@ -3,7 +3,7 @@ require 'spec_helper' RSpec.describe Gitlab::PackageMetadata::Connector::BaseConnector, feature_category: :software_composition_analysis do - let(:sync_config) { build(:pm_sync_config, version_format: version_format) } + let(:sync_config) { build(:pm_sync_config, version_format: version_format, purl_type: purl_type) } let(:connector) { described_class.new(sync_config) } describe '.data_file_class' do @@ -21,4 +21,31 @@ it { is_expected.to be(::Gitlab::PackageMetadata::Connector::CsvDataFile) } end end + + describe '#file_prefix' do + subject(:file_prefix) { connector.send(:file_prefix) } + + let_it_be(:version_format) { 'v2' } + + context 'when purl_type is nil' do + let(:purl_type) { nil } + + it 'returns just the version_format' do + expect(file_prefix).to eq(version_format) + end + end + + context 'when purl_type is present' do + let(:purl_type) { 'npm' } + let(:registry_id) { 'npm' } + + before do + allow(::PackageMetadata::SyncConfiguration).to receive(:registry_id).with(purl_type).and_return(registry_id) + end + + it 'returns the joined path' do + expect(file_prefix).to eq(File.join(version_format, registry_id)) + end + end + end end -- GitLab From 3fc8a986aedfab623d9d4a1001b2edc3b0237c09 Mon Sep 17 00:00:00 2001 From: Yasha Rise Date: Wed, 11 Sep 2024 11:58:28 +0300 Subject: [PATCH 24/40] Add CVE enrichment ingestion task and service --- .rubocop_todo/gitlab/bounded_contexts.yml | 2 + .../cve_enrichment_ingestion_task.rb | 62 +++++++++++++++++++ .../cve_enrichment/ingestion_service.rb | 21 +++++++ .../cve_enrichment_ingestion_task_spec.rb | 61 ++++++++++++++++++ 4 files changed, 146 insertions(+) create mode 100644 ee/app/services/package_metadata/ingestion/cve_enrichment/cve_enrichment_ingestion_task.rb create mode 100644 ee/app/services/package_metadata/ingestion/cve_enrichment/ingestion_service.rb create mode 100644 ee/spec/services/package_metadata/ingestion/cve_enrichment/cve_enrichment_ingestion_task_spec.rb diff --git a/.rubocop_todo/gitlab/bounded_contexts.yml b/.rubocop_todo/gitlab/bounded_contexts.yml index 548cd702c14548..a45ea5d28daf79 100644 --- a/.rubocop_todo/gitlab/bounded_contexts.yml +++ b/.rubocop_todo/gitlab/bounded_contexts.yml @@ -3439,6 +3439,8 @@ Gitlab/BoundedContexts: - 'ee/app/services/package_metadata/ingestion/compressed_package/ingestion_service.rb' - 'ee/app/services/package_metadata/ingestion/compressed_package/license_ingestion_task.rb' - 'ee/app/services/package_metadata/ingestion/compressed_package/package_ingestion_task.rb' + - 'ee/app/services/package_metadata/ingestion/cve_enrichment/ingestion_service.rb' + - 'ee/app/services/package_metadata/ingestion/cve_enrichment/cve_enrichment_ingestion_task.rb' - 'ee/app/services/package_metadata/ingestion/data_map.rb' - 'ee/app/services/package_metadata/ingestion/ingestion_service.rb' - 'ee/app/services/package_metadata/ingestion/tasks/base.rb' diff --git a/ee/app/services/package_metadata/ingestion/cve_enrichment/cve_enrichment_ingestion_task.rb b/ee/app/services/package_metadata/ingestion/cve_enrichment/cve_enrichment_ingestion_task.rb new file mode 100644 index 00000000000000..443751813e9403 --- /dev/null +++ b/ee/app/services/package_metadata/ingestion/cve_enrichment/cve_enrichment_ingestion_task.rb @@ -0,0 +1,62 @@ +# frozen_string_literal: true + +module PackageMetadata + module Ingestion + module CveEnrichment + class CveEnrichmentIngestionTask + Error = Class.new(StandardError) + + def self.execute(import_data) + new(import_data).execute + end + + def initialize(import_data) + @import_data = import_data + end + + def execute + PackageMetadata::Epss.bulk_upsert!(valid_cve_enrichment_entries, unique_by: %w[cve], + returns: %w[id cve score created_at updated_at]) + end + + private + + attr_reader :import_data + + # validates the list of provided cve_enrichment models and returns + # only those which are valid and logs the invalid packages as an error + def valid_cve_enrichment_entries + cve_enrichment.map do |cve_enrichment_entry| + if cve_enrichment_entry.invalid? + Gitlab::ErrorTracking.track_exception( + Error.new( + "invalid CVE enrichment entry"), + cve: cve_enrichment_entry.cve, + score: cve_enrichment_entry.score, + errors: cve_enrichment_entry.errors.to_hash + ) + next + end + + cve_enrichment_entry + end.reject(&:blank?) + end + + def cve_enrichment + import_data.map do |data_object| + PackageMetadata::Epss.new( + cve: data_object.cve_id, + score: data_object.epss_score, + created_at: now, + updated_at: now + ) + end + end + + def now + @now ||= Time.zone.now + end + end + end + end +end diff --git a/ee/app/services/package_metadata/ingestion/cve_enrichment/ingestion_service.rb b/ee/app/services/package_metadata/ingestion/cve_enrichment/ingestion_service.rb new file mode 100644 index 00000000000000..cb7232f964a183 --- /dev/null +++ b/ee/app/services/package_metadata/ingestion/cve_enrichment/ingestion_service.rb @@ -0,0 +1,21 @@ +# frozen_string_literal: true + +module PackageMetadata + module Ingestion + module CveEnrichment + class IngestionService + def self.execute(import_data) + new(import_data).execute + end + + def initialize(import_data) + @import_data = import_data + end + + def execute + CveEnrichmentIngestionTask.new(@import_data).execute + end + end + end + end +end diff --git a/ee/spec/services/package_metadata/ingestion/cve_enrichment/cve_enrichment_ingestion_task_spec.rb b/ee/spec/services/package_metadata/ingestion/cve_enrichment/cve_enrichment_ingestion_task_spec.rb new file mode 100644 index 00000000000000..ad21a015647961 --- /dev/null +++ b/ee/spec/services/package_metadata/ingestion/cve_enrichment/cve_enrichment_ingestion_task_spec.rb @@ -0,0 +1,61 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe PackageMetadata::Ingestion::CveEnrichment::CveEnrichmentIngestionTask, feature_category: :software_composition_analysis do + describe '.execute' do + let(:cve_id) { 'CVE-2023-12345' } + let(:new_epss_score) { 0.75 } + let(:old_epss_score) { 0.5 } + + let!(:existing_cve_enrichment) do + create(:pm_epss, cve: cve_id, score: old_epss_score) + end + + let(:import_data) do + [ + build(:pm_cve_enrichment_data_object, cve_id: cve_id, epss_score: new_epss_score), + build(:pm_cve_enrichment_data_object) + ] + end + + subject(:execute) { described_class.execute(import_data) } + + context 'when CVE enrichments are valid' do + it 'adds all new CVE enrichments in import data' do + expect { execute }.to change { PackageMetadata::Epss.count }.from(1).to(2) + end + + it 'updates existing CVE enrichments' do + expect { execute } + .to change { existing_cve_enrichment.reload.score } + .from(old_epss_score) + .to(new_epss_score) + end + end + + context 'when CVE enrichments are invalid' do + let(:valid_cve_enrichment) { build(:pm_cve_enrichment_data_object) } + let(:invalid_cve_enrichment) { build(:pm_cve_enrichment_data_object, cve_id: 'invalid') } + let(:import_data) { [valid_cve_enrichment, invalid_cve_enrichment] } + + it 'creates only valid CVE enrichments' do + expect { execute }.to change { PackageMetadata::Epss.count }.by(1) + end + + it 'logs invalid CVE enrichments as an error' do + expect(Gitlab::ErrorTracking) + .to receive(:track_exception) + .with( + an_instance_of(described_class::Error), + hash_including( + cve: 'invalid', + score: invalid_cve_enrichment.epss_score, + errors: { cve: ["is invalid"] } + ) + ) + execute + end + end + end +end -- GitLab From ba52a20db8d738ac290e3d205029fba6ebd7c8e3 Mon Sep 17 00:00:00 2001 From: Yasha Rise Date: Wed, 11 Sep 2024 12:49:36 +0300 Subject: [PATCH 25/40] Add CVE enrichment factory --- .../cve_enrichment_data_objects.rb | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 ee/spec/factories/package_metadata/cve_enrichment_data_objects.rb diff --git a/ee/spec/factories/package_metadata/cve_enrichment_data_objects.rb b/ee/spec/factories/package_metadata/cve_enrichment_data_objects.rb new file mode 100644 index 00000000000000..aa03a5b87338e2 --- /dev/null +++ b/ee/spec/factories/package_metadata/cve_enrichment_data_objects.rb @@ -0,0 +1,14 @@ +# frozen_string_literal: true + +FactoryBot.define do + factory :pm_cve_enrichment_data_object, class: '::PackageMetadata::DataObjects::CveEnrichment' do + cve_id { 'CVE-2020-1234' } + epss_score { 0.5 } + + initialize_with do + new(**attributes) + end + + skip_create + end +end -- GitLab From e49fd1659240e3d429d80fe628b2c8d2bb8027a7 Mon Sep 17 00:00:00 2001 From: Yasha Rise Date: Wed, 11 Sep 2024 12:54:23 +0300 Subject: [PATCH 26/40] Create cve_enrichment data object --- .rubocop_todo/gitlab/bounded_contexts.yml | 1 + .../data_objects/cve_enrichment.rb | 23 +++++++++++++++++++ 2 files changed, 24 insertions(+) create mode 100644 ee/app/services/package_metadata/data_objects/cve_enrichment.rb diff --git a/.rubocop_todo/gitlab/bounded_contexts.yml b/.rubocop_todo/gitlab/bounded_contexts.yml index a45ea5d28daf79..243c03ffbae082 100644 --- a/.rubocop_todo/gitlab/bounded_contexts.yml +++ b/.rubocop_todo/gitlab/bounded_contexts.yml @@ -3433,6 +3433,7 @@ Gitlab/BoundedContexts: - 'ee/app/services/package_metadata/compressed_package_data_object.rb' - 'ee/app/services/package_metadata/data_object.rb' - 'ee/app/services/package_metadata/data_object_fabricator.rb' + - 'ee/app/services/package_metadata/data_objects/cve_enrichment.rb' - 'ee/app/services/package_metadata/ingestion/advisory/advisory_ingestion_task.rb' - 'ee/app/services/package_metadata/ingestion/advisory/affected_package_ingestion_task.rb' - 'ee/app/services/package_metadata/ingestion/advisory/ingestion_service.rb' diff --git a/ee/app/services/package_metadata/data_objects/cve_enrichment.rb b/ee/app/services/package_metadata/data_objects/cve_enrichment.rb new file mode 100644 index 00000000000000..ce5c1bbe7ffdcd --- /dev/null +++ b/ee/app/services/package_metadata/data_objects/cve_enrichment.rb @@ -0,0 +1,23 @@ +# frozen_string_literal: true + +module PackageMetadata + module DataObjects + class CveEnrichment + def self.create(data, _purl_type) + new(**data.transform_keys(&:to_sym)) + end + + attr_accessor :cve_id, :epss_score + + def initialize(cve_id:, epss_score:) + @cve_id = cve_id + @epss_score = epss_score + end + + def ==(other) + cve_id == other.cve_id && + epss_score == other.epss_score + end + end + end +end -- GitLab From 334eea77c4971a006c29f46d814eabec88888cdd Mon Sep 17 00:00:00 2001 From: Yasha Rise Date: Wed, 11 Sep 2024 13:12:56 +0300 Subject: [PATCH 27/40] Add missing local var in test --- .../gitlab/package_metadata/connector/base_connector_spec.rb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ee/spec/lib/gitlab/package_metadata/connector/base_connector_spec.rb b/ee/spec/lib/gitlab/package_metadata/connector/base_connector_spec.rb index 048d966bb1a6ce..c1e82e83e1da69 100644 --- a/ee/spec/lib/gitlab/package_metadata/connector/base_connector_spec.rb +++ b/ee/spec/lib/gitlab/package_metadata/connector/base_connector_spec.rb @@ -9,6 +9,8 @@ describe '.data_file_class' do subject(:data_file_class) { connector.send(:data_file_class) } + let_it_be(:purl_type) { nil } + context 'when version_format v2' do let(:version_format) { 'v2' } -- GitLab From d9bee1ba84c7a7e5ec5454c83666eeb53874918b Mon Sep 17 00:00:00 2001 From: Yasha Rise Date: Wed, 11 Sep 2024 15:22:26 +0300 Subject: [PATCH 28/40] Don't use checkpoints when cve_enrichment --- .../services/package_metadata/sync_service.rb | 36 ++++++++++++------- .../package_metadata/sync_service_spec.rb | 14 ++++++++ 2 files changed, 37 insertions(+), 13 deletions(-) diff --git a/ee/app/services/package_metadata/sync_service.rb b/ee/app/services/package_metadata/sync_service.rb index b4980de58bb6f0..f448013bdb5119 100644 --- a/ee/app/services/package_metadata/sync_service.rb +++ b/ee/app/services/package_metadata/sync_service.rb @@ -26,25 +26,35 @@ def initialize(sync_config, signal) end def execute - connector.data_after(checkpoint).each do |file| - log_progress(file) + use_checkpoint = !sync_config.cve_enrichment? + checkpoint_value = use_checkpoint ? checkpoint : nil - DataObjectFabricator.new(data_file: file, sync_config: sync_config) - .each_slice(INGEST_SLICE_SIZE) do |data_objects| - ingest(data_objects) - throttle - end + connector.data_after(checkpoint_value).each do |file| + log_progress(file) + ingest_file(file) + update_checkpoint(file) if use_checkpoint + return log_stop_signal if signal.stop? + end + end - checkpoint.update(sequence: file.sequence, chunk: file.chunk) + private - if signal.stop? - return Gitlab::AppJsonLogger.debug(class: self.class.name, - message: "Stop signal after checkpointing") + def ingest_file(file) + DataObjectFabricator.new(data_file: file, sync_config: sync_config) + .each_slice(INGEST_SLICE_SIZE) do |data_objects| + ingest(data_objects) + throttle end - end end - private + def update_checkpoint(file) + checkpoint.update(sequence: file.sequence, chunk: file.chunk) + end + + def log_stop_signal + Gitlab::AppJsonLogger.debug(class: self.class.name, + message: "Stop signal after checkpointing") + end attr_accessor :sync_config, :signal diff --git a/ee/spec/services/package_metadata/sync_service_spec.rb b/ee/spec/services/package_metadata/sync_service_spec.rb index 9b610f162f4cb4..47e70e7f42b307 100644 --- a/ee/spec/services/package_metadata/sync_service_spec.rb +++ b/ee/spec/services/package_metadata/sync_service_spec.rb @@ -80,10 +80,24 @@ end context 'if data_type is cve enrichment' do + let(:checkpoint) do + create(:pm_checkpoint, purl_type: sync_config.purl_type, data_type: sync_config.data_type) + end + before do sync_config.data_type = 'cve_enrichment' end + it 'always calls data_after(nil) even if checkpoints exist' do + checkpoint + execute + expect(connector).to have_received(:data_after).with(nil) + end + + it 'does not update the checkpoint' do + expect { execute }.not_to change { checkpoint.reload.attributes } + end + it 'calls cve enrichment ingestion service to store data' do execute expect(PackageMetadata::Ingestion::CveEnrichment::IngestionService) -- GitLab From 994b8f0686848361732f57c1968f74431a9cf26e Mon Sep 17 00:00:00 2001 From: Yasha Rise Date: Wed, 11 Sep 2024 16:28:12 +0300 Subject: [PATCH 29/40] Update checkpoint spec according to changes --- ee/spec/models/package_metadata/checkpoint_spec.rb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ee/spec/models/package_metadata/checkpoint_spec.rb b/ee/spec/models/package_metadata/checkpoint_spec.rb index 5fab1cd8a7ce90..6de65c517af911 100644 --- a/ee/spec/models/package_metadata/checkpoint_spec.rb +++ b/ee/spec/models/package_metadata/checkpoint_spec.rb @@ -6,7 +6,8 @@ let(:data_types) do { advisories: 1, - licenses: 2 + licenses: 2, + cve_enrichment: 3 } end -- GitLab From 0f2512adc5020425b6bb797ef8ec8c3107469b97 Mon Sep 17 00:00:00 2001 From: Yasha Rise Date: Mon, 16 Sep 2024 14:40:51 +0300 Subject: [PATCH 30/40] DataObjects -> DataObject --- .rubocop_todo/gitlab/bounded_contexts.yml | 2 +- .../{data_objects => data_object}/cve_enrichment.rb | 2 +- .../package_metadata/data_object_fabricator.rb | 12 +++++++++--- .../package_metadata/cve_enrichment_data_objects.rb | 2 +- .../cve_enrichment_ingestion_task_spec.rb | 11 +++++++++++ 5 files changed, 23 insertions(+), 6 deletions(-) rename ee/app/services/package_metadata/{data_objects => data_object}/cve_enrichment.rb (95%) diff --git a/.rubocop_todo/gitlab/bounded_contexts.yml b/.rubocop_todo/gitlab/bounded_contexts.yml index 243c03ffbae082..40991464c90a6c 100644 --- a/.rubocop_todo/gitlab/bounded_contexts.yml +++ b/.rubocop_todo/gitlab/bounded_contexts.yml @@ -3433,7 +3433,7 @@ Gitlab/BoundedContexts: - 'ee/app/services/package_metadata/compressed_package_data_object.rb' - 'ee/app/services/package_metadata/data_object.rb' - 'ee/app/services/package_metadata/data_object_fabricator.rb' - - 'ee/app/services/package_metadata/data_objects/cve_enrichment.rb' + - 'ee/app/services/package_metadata/data_object/cve_enrichment.rb' - 'ee/app/services/package_metadata/ingestion/advisory/advisory_ingestion_task.rb' - 'ee/app/services/package_metadata/ingestion/advisory/affected_package_ingestion_task.rb' - 'ee/app/services/package_metadata/ingestion/advisory/ingestion_service.rb' diff --git a/ee/app/services/package_metadata/data_objects/cve_enrichment.rb b/ee/app/services/package_metadata/data_object/cve_enrichment.rb similarity index 95% rename from ee/app/services/package_metadata/data_objects/cve_enrichment.rb rename to ee/app/services/package_metadata/data_object/cve_enrichment.rb index ce5c1bbe7ffdcd..6283bb9f2e5415 100644 --- a/ee/app/services/package_metadata/data_objects/cve_enrichment.rb +++ b/ee/app/services/package_metadata/data_object/cve_enrichment.rb @@ -1,7 +1,7 @@ # frozen_string_literal: true module PackageMetadata - module DataObjects + module DataObject class CveEnrichment def self.create(data, _purl_type) new(**data.transform_keys(&:to_sym)) diff --git a/ee/app/services/package_metadata/data_object_fabricator.rb b/ee/app/services/package_metadata/data_object_fabricator.rb index 4cfcdb110d14c9..f5437735b8adcf 100644 --- a/ee/app/services/package_metadata/data_object_fabricator.rb +++ b/ee/app/services/package_metadata/data_object_fabricator.rb @@ -28,9 +28,15 @@ def create_object(data) end def data_object_class - return AdvisoryDataObject if sync_config.advisories? - - license_data_object_class + if sync_config.cve_enrichment? + DataObject::CveEnrichment + elsif sync_config.advisories? + AdvisoryDataObject + elsif sync_config.v2? + v2_license_data_object_class + else + v1_license_data_object_class + end end def license_data_object_class diff --git a/ee/spec/factories/package_metadata/cve_enrichment_data_objects.rb b/ee/spec/factories/package_metadata/cve_enrichment_data_objects.rb index aa03a5b87338e2..6fed3b0ca673b4 100644 --- a/ee/spec/factories/package_metadata/cve_enrichment_data_objects.rb +++ b/ee/spec/factories/package_metadata/cve_enrichment_data_objects.rb @@ -1,7 +1,7 @@ # frozen_string_literal: true FactoryBot.define do - factory :pm_cve_enrichment_data_object, class: '::PackageMetadata::DataObjects::CveEnrichment' do + factory :pm_cve_enrichment_data_object, class: '::PackageMetadata::DataObject::CveEnrichment' do cve_id { 'CVE-2020-1234' } epss_score { 0.5 } diff --git a/ee/spec/services/package_metadata/ingestion/cve_enrichment/cve_enrichment_ingestion_task_spec.rb b/ee/spec/services/package_metadata/ingestion/cve_enrichment/cve_enrichment_ingestion_task_spec.rb index ad21a015647961..843b444de0bf33 100644 --- a/ee/spec/services/package_metadata/ingestion/cve_enrichment/cve_enrichment_ingestion_task_spec.rb +++ b/ee/spec/services/package_metadata/ingestion/cve_enrichment/cve_enrichment_ingestion_task_spec.rb @@ -32,6 +32,17 @@ .from(old_epss_score) .to(new_epss_score) end + + it 'correctly stores the data for new and updated CVE enrichments' do + result = execute + + updated_enrichment = result.find { |entry| entry.cve == cve_id } + new_enrichment = result.find { |entry| entry.cve != cve_id } + + expect(updated_enrichment.score).to eq(new_epss_score) + expect(new_enrichment.cve).to eq(import_data.last.cve_id) + expect(new_enrichment.score).to eq(import_data.last.epss_score) + end end context 'when CVE enrichments are invalid' do -- GitLab From 95f3b36c1bfd057557cea57a39f6277a083828a0 Mon Sep 17 00:00:00 2001 From: Yasha Rise Date: Mon, 16 Sep 2024 14:51:50 +0300 Subject: [PATCH 31/40] Revert "DataObjects -> DataObject" This reverts commit 0f2512adc5020425b6bb797ef8ec8c3107469b97. --- .rubocop_todo/gitlab/bounded_contexts.yml | 2 +- .../package_metadata/data_object_fabricator.rb | 12 +++--------- .../{data_object => data_objects}/cve_enrichment.rb | 2 +- .../package_metadata/cve_enrichment_data_objects.rb | 2 +- .../cve_enrichment_ingestion_task_spec.rb | 11 ----------- 5 files changed, 6 insertions(+), 23 deletions(-) rename ee/app/services/package_metadata/{data_object => data_objects}/cve_enrichment.rb (95%) diff --git a/.rubocop_todo/gitlab/bounded_contexts.yml b/.rubocop_todo/gitlab/bounded_contexts.yml index 40991464c90a6c..243c03ffbae082 100644 --- a/.rubocop_todo/gitlab/bounded_contexts.yml +++ b/.rubocop_todo/gitlab/bounded_contexts.yml @@ -3433,7 +3433,7 @@ Gitlab/BoundedContexts: - 'ee/app/services/package_metadata/compressed_package_data_object.rb' - 'ee/app/services/package_metadata/data_object.rb' - 'ee/app/services/package_metadata/data_object_fabricator.rb' - - 'ee/app/services/package_metadata/data_object/cve_enrichment.rb' + - 'ee/app/services/package_metadata/data_objects/cve_enrichment.rb' - 'ee/app/services/package_metadata/ingestion/advisory/advisory_ingestion_task.rb' - 'ee/app/services/package_metadata/ingestion/advisory/affected_package_ingestion_task.rb' - 'ee/app/services/package_metadata/ingestion/advisory/ingestion_service.rb' diff --git a/ee/app/services/package_metadata/data_object_fabricator.rb b/ee/app/services/package_metadata/data_object_fabricator.rb index f5437735b8adcf..4cfcdb110d14c9 100644 --- a/ee/app/services/package_metadata/data_object_fabricator.rb +++ b/ee/app/services/package_metadata/data_object_fabricator.rb @@ -28,15 +28,9 @@ def create_object(data) end def data_object_class - if sync_config.cve_enrichment? - DataObject::CveEnrichment - elsif sync_config.advisories? - AdvisoryDataObject - elsif sync_config.v2? - v2_license_data_object_class - else - v1_license_data_object_class - end + return AdvisoryDataObject if sync_config.advisories? + + license_data_object_class end def license_data_object_class diff --git a/ee/app/services/package_metadata/data_object/cve_enrichment.rb b/ee/app/services/package_metadata/data_objects/cve_enrichment.rb similarity index 95% rename from ee/app/services/package_metadata/data_object/cve_enrichment.rb rename to ee/app/services/package_metadata/data_objects/cve_enrichment.rb index 6283bb9f2e5415..ce5c1bbe7ffdcd 100644 --- a/ee/app/services/package_metadata/data_object/cve_enrichment.rb +++ b/ee/app/services/package_metadata/data_objects/cve_enrichment.rb @@ -1,7 +1,7 @@ # frozen_string_literal: true module PackageMetadata - module DataObject + module DataObjects class CveEnrichment def self.create(data, _purl_type) new(**data.transform_keys(&:to_sym)) diff --git a/ee/spec/factories/package_metadata/cve_enrichment_data_objects.rb b/ee/spec/factories/package_metadata/cve_enrichment_data_objects.rb index 6fed3b0ca673b4..aa03a5b87338e2 100644 --- a/ee/spec/factories/package_metadata/cve_enrichment_data_objects.rb +++ b/ee/spec/factories/package_metadata/cve_enrichment_data_objects.rb @@ -1,7 +1,7 @@ # frozen_string_literal: true FactoryBot.define do - factory :pm_cve_enrichment_data_object, class: '::PackageMetadata::DataObject::CveEnrichment' do + factory :pm_cve_enrichment_data_object, class: '::PackageMetadata::DataObjects::CveEnrichment' do cve_id { 'CVE-2020-1234' } epss_score { 0.5 } diff --git a/ee/spec/services/package_metadata/ingestion/cve_enrichment/cve_enrichment_ingestion_task_spec.rb b/ee/spec/services/package_metadata/ingestion/cve_enrichment/cve_enrichment_ingestion_task_spec.rb index 843b444de0bf33..ad21a015647961 100644 --- a/ee/spec/services/package_metadata/ingestion/cve_enrichment/cve_enrichment_ingestion_task_spec.rb +++ b/ee/spec/services/package_metadata/ingestion/cve_enrichment/cve_enrichment_ingestion_task_spec.rb @@ -32,17 +32,6 @@ .from(old_epss_score) .to(new_epss_score) end - - it 'correctly stores the data for new and updated CVE enrichments' do - result = execute - - updated_enrichment = result.find { |entry| entry.cve == cve_id } - new_enrichment = result.find { |entry| entry.cve != cve_id } - - expect(updated_enrichment.score).to eq(new_epss_score) - expect(new_enrichment.cve).to eq(import_data.last.cve_id) - expect(new_enrichment.score).to eq(import_data.last.epss_score) - end end context 'when CVE enrichments are invalid' do -- GitLab From e0e92bafd421aa50ab8154b4d26acb3fd631f7b0 Mon Sep 17 00:00:00 2001 From: Yasha Rise Date: Mon, 16 Sep 2024 15:42:56 +0300 Subject: [PATCH 32/40] Add a spec to validate data integrity --- .../cve_enrichment_ingestion_task_spec.rb | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/ee/spec/services/package_metadata/ingestion/cve_enrichment/cve_enrichment_ingestion_task_spec.rb b/ee/spec/services/package_metadata/ingestion/cve_enrichment/cve_enrichment_ingestion_task_spec.rb index ad21a015647961..bd6f117d178826 100644 --- a/ee/spec/services/package_metadata/ingestion/cve_enrichment/cve_enrichment_ingestion_task_spec.rb +++ b/ee/spec/services/package_metadata/ingestion/cve_enrichment/cve_enrichment_ingestion_task_spec.rb @@ -32,6 +32,21 @@ .from(old_epss_score) .to(new_epss_score) end + + it 'correctly stores the data for new and updated CVE enrichments' do + result = execute + + expect(result).to contain_exactly( + a_collection_including( + cve_id, + new_epss_score + ), + a_collection_including( + import_data.last.cve_id, + import_data.last.epss_score + ) + ) + end end context 'when CVE enrichments are invalid' do -- GitLab From 9aa1aa6a4d875e760f5fcbdd0db2e2ec99ce43dd Mon Sep 17 00:00:00 2001 From: Yasha Rise Date: Tue, 17 Sep 2024 17:24:15 +0300 Subject: [PATCH 33/40] Writing tests for cve enrichment data object --- .../data_objects/cve_enrichment_spec.rb | 66 +++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 ee/spec/services/package_metadata/data_objects/cve_enrichment_spec.rb diff --git a/ee/spec/services/package_metadata/data_objects/cve_enrichment_spec.rb b/ee/spec/services/package_metadata/data_objects/cve_enrichment_spec.rb new file mode 100644 index 00000000000000..c31e1eaec81a6a --- /dev/null +++ b/ee/spec/services/package_metadata/data_objects/cve_enrichment_spec.rb @@ -0,0 +1,66 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe PackageMetadata::DataObjects::CveEnrichment, feature_category: :software_composition_analysis do + describe '.create' do + let(:data) do + { + "cve_id" => "CVE-2023-12345", + "epss_score" => 0.5 + } + end + + subject(:create) { described_class.create(data, nil) } + + it { is_expected.to be_kind_of(described_class) } + + it do + is_expected.to match(have_attributes( + cve_id: "CVE-2023-12345", + epss_score: 0.5 + )) + end + + context 'when an attribute is missing' do + using RSpec::Parameterized::TableSyntax + + subject(:create!) { described_class.create(data.except(attribute), nil) } + + where(:attribute, :required) do + :cve_id | true + :epss_score | true + end + + with_them do + specify do + required ? expect { create! }.to(raise_error(ArgumentError)) : expect { create! }.not_to(raise_error) + end + end + end + end + + describe '==' do + let(:obj) { described_class.new(cve_id: "CVE-2023-12345", epss_score: 0.85) } + + subject(:equality) { obj == other } + + context 'when all attributes are equal' do + let(:other) { obj } + + it { is_expected.to eq(true) } + end + + context 'when cve_id does not match' do + let(:other) { obj.dup.tap { |o| o.cve_id = "CVE-2023-54321" } } + + it { is_expected.to eq(false) } + end + + context 'when epss_score does not match' do + let(:other) { obj.dup.tap { |o| o.epss_score = 0.9 } } + + it { is_expected.to eq(false) } + end + end +end -- GitLab From f15ac253b759cf194017cc27b77e587a8d52de30 Mon Sep 17 00:00:00 2001 From: Yasha Rise Date: Wed, 18 Sep 2024 12:06:14 +0300 Subject: [PATCH 34/40] Convert attribute to string in test --- .../package_metadata/data_objects/cve_enrichment_spec.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ee/spec/services/package_metadata/data_objects/cve_enrichment_spec.rb b/ee/spec/services/package_metadata/data_objects/cve_enrichment_spec.rb index c31e1eaec81a6a..0baac728567f08 100644 --- a/ee/spec/services/package_metadata/data_objects/cve_enrichment_spec.rb +++ b/ee/spec/services/package_metadata/data_objects/cve_enrichment_spec.rb @@ -25,7 +25,7 @@ context 'when an attribute is missing' do using RSpec::Parameterized::TableSyntax - subject(:create!) { described_class.create(data.except(attribute), nil) } + subject(:create!) { described_class.create(data.except(attribute.to_s), nil) } where(:attribute, :required) do :cve_id | true -- GitLab From ce649095bdf69f04bbee963b14067b64e364a87d Mon Sep 17 00:00:00 2001 From: Yasha Rise Date: Wed, 18 Sep 2024 14:50:07 +0300 Subject: [PATCH 35/40] Add ingestion service spec --- .../cve_enrichment/ingestion_service_spec.rb | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 ee/spec/services/package_metadata/ingestion/cve_enrichment/ingestion_service_spec.rb diff --git a/ee/spec/services/package_metadata/ingestion/cve_enrichment/ingestion_service_spec.rb b/ee/spec/services/package_metadata/ingestion/cve_enrichment/ingestion_service_spec.rb new file mode 100644 index 00000000000000..57d980365a273f --- /dev/null +++ b/ee/spec/services/package_metadata/ingestion/cve_enrichment/ingestion_service_spec.rb @@ -0,0 +1,35 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe PackageMetadata::Ingestion::CveEnrichment::IngestionService, feature_category: :software_composition_analysis do + describe '.execute' do + subject(:execute) { described_class.execute(import_data) } + + let(:import_data) { build_list(:pm_cve_enrichment_data_object, 5) } + + describe 'execution' do + context 'when no errors' do + it 'calls CveEnrichmentIngestionTask with import data' do + task_instance = instance_double(PackageMetadata::Ingestion::CveEnrichment::CveEnrichmentIngestionTask) + expect(PackageMetadata::Ingestion::CveEnrichment::CveEnrichmentIngestionTask) + .to receive(:new).with(import_data).and_return(task_instance) + expect(task_instance).to receive(:execute) + + execute + end + end + + context 'when error occurs' do + it 'raises the error' do + task_instance = instance_double(PackageMetadata::Ingestion::CveEnrichment::CveEnrichmentIngestionTask) + allow(PackageMetadata::Ingestion::CveEnrichment::CveEnrichmentIngestionTask) + .to receive(:new).with(import_data).and_return(task_instance) + allow(task_instance).to receive(:execute).and_raise(StandardError) + + expect { execute }.to raise_error(StandardError) + end + end + end + end +end -- GitLab From 5da718beb68839721853ffdfbd465261f324d193 Mon Sep 17 00:00:00 2001 From: Yasha Rise Date: Thu, 19 Sep 2024 16:41:57 +0300 Subject: [PATCH 36/40] Use NullCheckpoint instead of complicating sync_service logic --- ee/app/models/package_metadata/checkpoint.rb | 8 ++++++ .../services/package_metadata/sync_service.rb | 27 +++++++------------ .../package_metadata/sync_service_spec.rb | 4 +-- 3 files changed, 20 insertions(+), 19 deletions(-) diff --git a/ee/app/models/package_metadata/checkpoint.rb b/ee/app/models/package_metadata/checkpoint.rb index 9f22db8ae79892..299cf8a53a1f73 100644 --- a/ee/app/models/package_metadata/checkpoint.rb +++ b/ee/app/models/package_metadata/checkpoint.rb @@ -18,4 +18,12 @@ def self.with_path_components(data_type, version_format, purl_type) find_or_initialize_by(data_type: data_type, purl_type: purl_type, version_format: version_format) end end + + class NullCheckpoint + def update(*args); end + + def blank? + true + end + end end diff --git a/ee/app/services/package_metadata/sync_service.rb b/ee/app/services/package_metadata/sync_service.rb index f448013bdb5119..57ac043d1bf97c 100644 --- a/ee/app/services/package_metadata/sync_service.rb +++ b/ee/app/services/package_metadata/sync_service.rb @@ -26,13 +26,12 @@ def initialize(sync_config, signal) end def execute - use_checkpoint = !sync_config.cve_enrichment? - checkpoint_value = use_checkpoint ? checkpoint : nil + connector.data_after(checkpoint).each do |file| + Gitlab::AppJsonLogger.debug(class: self.class.name, message: "Evaluating data for #{sync_config}/#{file}") - connector.data_after(checkpoint_value).each do |file| - log_progress(file) ingest_file(file) - update_checkpoint(file) if use_checkpoint + checkpoint.update(sequence: file.sequence, chunk: file.chunk) + return log_stop_signal if signal.stop? end end @@ -47,10 +46,6 @@ def ingest_file(file) end end - def update_checkpoint(file) - checkpoint.update(sequence: file.sequence, chunk: file.chunk) - end - def log_stop_signal Gitlab::AppJsonLogger.debug(class: self.class.name, message: "Stop signal after checkpointing") @@ -71,8 +66,12 @@ def ingest(data) end def checkpoint - @checkpoint ||= PackageMetadata::Checkpoint - .with_path_components(sync_config.data_type, sync_config.version_format, sync_config.purl_type) + if sync_config.cve_enrichment? + @checkpoint ||= PackageMetadata::NullCheckpoint.new + else + @checkpoint ||= PackageMetadata::Checkpoint + .with_path_components(sync_config.data_type, sync_config.version_format, sync_config.purl_type) + end end def connector @@ -86,12 +85,6 @@ def connector end end - def log_progress(file) - Gitlab::AppJsonLogger - .debug(class: self.class.name, - message: "Evaluating data for #{sync_config}/#{file}") - end - def throttle return if ENV['PM_SYNC_IN_DEV'] == 'true' diff --git a/ee/spec/services/package_metadata/sync_service_spec.rb b/ee/spec/services/package_metadata/sync_service_spec.rb index 47e70e7f42b307..0fa4e009bfe98a 100644 --- a/ee/spec/services/package_metadata/sync_service_spec.rb +++ b/ee/spec/services/package_metadata/sync_service_spec.rb @@ -88,10 +88,10 @@ sync_config.data_type = 'cve_enrichment' end - it 'always calls data_after(nil) even if checkpoints exist' do + it 'always calls data_after(NullCheckpoint) even if checkpoints exist' do checkpoint execute - expect(connector).to have_received(:data_after).with(nil) + expect(connector).to have_received(:data_after).with(an_instance_of(PackageMetadata::NullCheckpoint)) end it 'does not update the checkpoint' do -- GitLab From e779c90e8c14b641e4c421d8262609d76868aee9 Mon Sep 17 00:00:00 2001 From: Yasha Rise Date: Sun, 22 Sep 2024 14:29:52 +0300 Subject: [PATCH 37/40] Implement specs for cve enrichment sync worker --- .../cve_enrichment_sync_worker.rb | 1 - .../cve_enrichment_sync_worker_spec.rb | 86 +++++++++++++++++++ 2 files changed, 86 insertions(+), 1 deletion(-) create mode 100644 ee/spec/workers/package_metadata/cve_enrichment_sync_worker_spec.rb diff --git a/ee/app/workers/package_metadata/cve_enrichment_sync_worker.rb b/ee/app/workers/package_metadata/cve_enrichment_sync_worker.rb index f985f576ff195e..0b01272a593ce2 100644 --- a/ee/app/workers/package_metadata/cve_enrichment_sync_worker.rb +++ b/ee/app/workers/package_metadata/cve_enrichment_sync_worker.rb @@ -28,7 +28,6 @@ def perform def should_run? return false unless Feature.enabled?(:epss_ingestion, :instance) - # Should we use some other feature here? return false unless ::License.feature_available?(:dependency_scanning) return false if Rails.env.development? && ENV.fetch('PM_SYNC_IN_DEV', 'false') != 'true' diff --git a/ee/spec/workers/package_metadata/cve_enrichment_sync_worker_spec.rb b/ee/spec/workers/package_metadata/cve_enrichment_sync_worker_spec.rb new file mode 100644 index 00000000000000..88e0fed4824cfb --- /dev/null +++ b/ee/spec/workers/package_metadata/cve_enrichment_sync_worker_spec.rb @@ -0,0 +1,86 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe PackageMetadata::CveEnrichmentSyncWorker, type: :worker, feature_category: :software_composition_analysis do + describe '#perform' do + let(:instance) { described_class.new } + let(:lease) { instance_double(Gitlab::ExclusiveLease) } + + subject(:perform!) { instance.perform } + + before do + allow(instance).to receive(:try_obtain_lease).and_yield + allow(Gitlab::ExclusiveLease).to receive(:new).and_return(lease) + end + + shared_examples_for 'it syncs' do + it 'calls sync service with the cve_enrichment data_type' do + expect(PackageMetadata::SyncService).to receive(:execute) + .with(data_type: 'cve_enrichment', lease: lease) + + perform! + end + end + + shared_examples_for 'it does not sync' do + it 'does not call sync service' do + expect(PackageMetadata::SyncService).not_to receive(:execute) + + perform! + end + end + + context 'when the epss_ingestion feature flag is disabled' do + before do + stub_feature_flags(epss_ingestion: false) + end + + it_behaves_like 'it does not sync' + end + + context 'when the dependency_scanning feature is disabled' do + before do + stub_licensed_features(dependency_scanning: false) + end + + it_behaves_like 'it does not sync' + end + + context 'when the dependency_scanning feature is enabled' do + before do + stub_licensed_features(dependency_scanning: true) + end + + context 'and rails is not development' do + before do + allow(Rails.env).to receive(:development?).and_return(false) + end + + it_behaves_like 'it syncs' + end + + context 'and rails is development' do + before do + allow(Rails.env).to receive(:development?).and_return(true) + end + + context 'and sync in dev env variable is true' do + before do + stub_env('PM_SYNC_IN_DEV', true) + end + + it_behaves_like 'it syncs' + end + + context 'and sync in dev env variable is false' do + before do + stub_env('PM_SYNC_IN_DEV', false) + end + + it_behaves_like 'it does not sync' + end + end + end + end +end -- GitLab From 38d5de87a85b03e09c379b564669cd2084c44ad9 Mon Sep 17 00:00:00 2001 From: Yasha Rise Date: Mon, 23 Sep 2024 09:50:03 +0300 Subject: [PATCH 38/40] Add spec for NullCheckpoint --- .../package_metadata/checkpoint_spec.rb | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/ee/spec/models/package_metadata/checkpoint_spec.rb b/ee/spec/models/package_metadata/checkpoint_spec.rb index 6de65c517af911..941ad1867a84da 100644 --- a/ee/spec/models/package_metadata/checkpoint_spec.rb +++ b/ee/spec/models/package_metadata/checkpoint_spec.rb @@ -106,3 +106,27 @@ end end end + +RSpec.describe PackageMetadata::NullCheckpoint, type: :model, feature_category: :software_composition_analysis do + subject(:null_checkpoint) { described_class.new } + + describe '#update' do + it 'accepts any number of arguments without raising an error' do + # rubocop:disable Rails/SaveBang -- There is no `update!` method since NullCheckpoint isn't ActiveRecord + expect { null_checkpoint.update }.not_to raise_error + expect { null_checkpoint.update(sequence: 1, chunk: 2) }.not_to raise_error + # rubocop:enable Rails/SaveBang + end + + it 'returns nil' do + expect(null_checkpoint.update).to be_nil + expect(null_checkpoint.update(sequence: 1, chunk: 2)).to be_nil + end + end + + describe '#blank?' do + it 'always returns true' do + expect(null_checkpoint.blank?).to be true + end + end +end -- GitLab From e67a907ee0483d248ec52dc6c52c11cda00cf698 Mon Sep 17 00:00:00 2001 From: Yasha Rise Date: Mon, 23 Sep 2024 09:53:26 +0300 Subject: [PATCH 39/40] Add spec for sync configuration --- .../sync_configuration_spec.rb | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/ee/spec/models/package_metadata/sync_configuration_spec.rb b/ee/spec/models/package_metadata/sync_configuration_spec.rb index c70bc16e1cf899..3a34675871b197 100644 --- a/ee/spec/models/package_metadata/sync_configuration_spec.rb +++ b/ee/spec/models/package_metadata/sync_configuration_spec.rb @@ -111,6 +111,24 @@ it { is_expected.to match_array([expected_storage_type, expected_base_uri]) } end end + + describe '.for_cve_enrichment' do + subject { described_class.for_cve_enrichment } + + where(:filepath_exists, :expected_storage_type, :expected_base_uri) do + true | :offline | described_class::CVE_ENRICHMENT_PATH + false | :gcp | described_class::CVE_ENRICHMENT_BUCKET + end + + with_them do + before do + allow(File).to receive(:exist?).with(described_class::CVE_ENRICHMENT_PATH) + .and_return(filepath_exists) + end + + it { is_expected.to match_array([expected_storage_type, expected_base_uri]) } + end + end end describe '.registry' do -- GitLab From 7bb4f19819ae883bdf0cf1358accd99c759ef4c2 Mon Sep 17 00:00:00 2001 From: Yasha Rise Date: Mon, 23 Sep 2024 13:09:28 +0300 Subject: [PATCH 40/40] Update bucket name to match deployment --- ee/app/models/package_metadata/sync_configuration.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ee/app/models/package_metadata/sync_configuration.rb b/ee/app/models/package_metadata/sync_configuration.rb index 5b12b293a577de..1f6df2cc793f52 100644 --- a/ee/app/models/package_metadata/sync_configuration.rb +++ b/ee/app/models/package_metadata/sync_configuration.rb @@ -101,7 +101,7 @@ class Location ADVISORIES_PATH = Rails.root.join('vendor/package_metadata/advisories').freeze ADVISORIES_BUCKET = 'prod-export-advisory-bucket-1a6c642fc4de57d4' CVE_ENRICHMENT_PATH = Rails.root.join('vendor/package_metadata/cve_enrichment').freeze - CVE_ENRICHMENT_BUCKET = 'prod-export-cve-enrichment-bucket-1a6c642fc4de57d4' + CVE_ENRICHMENT_BUCKET = 'prod-export-cve-enrichments-bucket-1a6c642fc4de57d4' def self.for_licenses if File.exist?(LICENSES_PATH) -- GitLab