diff --git a/.rubocop_todo/gitlab/bounded_contexts.yml b/.rubocop_todo/gitlab/bounded_contexts.yml index 7559a7d2dc944b3e6bf8d8ae27a8f2c21fa54a1e..c618ffa31afbe327b679dd1b94c8ffb7404ca870 100644 --- a/.rubocop_todo/gitlab/bounded_contexts.yml +++ b/.rubocop_todo/gitlab/bounded_contexts.yml @@ -3631,6 +3631,7 @@ Gitlab/BoundedContexts: - 'ee/app/workers/new_epic_worker.rb' - 'ee/app/workers/okrs/checkin_reminder_emails_cron_worker.rb' - 'ee/app/workers/package_metadata/advisories_sync_worker.rb' + - 'ee/app/workers/package_metadata/cve_enrichment_sync_worker.rb' - 'ee/app/workers/package_metadata/global_advisory_scan_worker.rb' - 'ee/app/workers/package_metadata/licenses_sync_worker.rb' - 'ee/app/workers/personal_access_tokens/groups/policy_worker.rb' diff --git a/app/models/concerns/enums/package_metadata.rb b/app/models/concerns/enums/package_metadata.rb index 385807cd7ed14875f7f0e7c6790c28332afa2cda..0954c555e56608dc90a3b0f86e9164db9554b5a8 100644 --- a/app/models/concerns/enums/package_metadata.rb +++ b/app/models/concerns/enums/package_metadata.rb @@ -9,7 +9,8 @@ class PackageMetadata DATA_TYPES = { advisories: 1, - licenses: 2 + licenses: 2, + cve_enrichment: 3 }.with_indifferent_access.freeze VERSION_FORMATS = { diff --git a/config/initializers/1_settings.rb b/config/initializers/1_settings.rb index d0bd1d0bc051d39392ed6e7752e7503adf8e466c..02faebbab0fd75e2827224bf7ea1b91808659eb4 100644 --- a/config/initializers/1_settings.rb +++ b/config/initializers/1_settings.rb @@ -942,6 +942,9 @@ Settings.cron_jobs['usage_events_dump_write_buffer_cron_worker'] ||= {} Settings.cron_jobs['usage_events_dump_write_buffer_cron_worker']['cron'] ||= "*/5 * * * *" Settings.cron_jobs['usage_events_dump_write_buffer_cron_worker']['job_class'] = 'UsageEvents::DumpWriteBufferCronWorker' + Settings.cron_jobs['package_metadata_cve_enrichment_sync_worker'] ||= {} + Settings.cron_jobs['package_metadata_cve_enrichment_sync_worker']['cron'] ||= "*/5 * * * *" + Settings.cron_jobs['package_metadata_cve_enrichment_sync_worker']['job_class'] = 'PackageMetadata::CveEnrichmentSyncWorker' Gitlab.com do Settings.cron_jobs['disable_legacy_open_source_license_for_inactive_projects'] ||= {} diff --git a/ee/app/models/package_metadata/checkpoint.rb b/ee/app/models/package_metadata/checkpoint.rb index 9f22db8ae798920323b86172c0865ff6bb8105ee..299cf8a53a1f734a7f214de20dde9481c0a8c057 100644 --- a/ee/app/models/package_metadata/checkpoint.rb +++ b/ee/app/models/package_metadata/checkpoint.rb @@ -18,4 +18,12 @@ def self.with_path_components(data_type, version_format, purl_type) find_or_initialize_by(data_type: data_type, purl_type: purl_type, version_format: version_format) end end + + class NullCheckpoint + def update(*args); end + + def blank? + true + end + end end diff --git a/ee/app/models/package_metadata/sync_configuration.rb b/ee/app/models/package_metadata/sync_configuration.rb index 3103e4d287610398448a42abf8d64c3742451a7a..1f6df2cc793f52ff49309037fed74797e6346da0 100644 --- a/ee/app/models/package_metadata/sync_configuration.rb +++ b/ee/app/models/package_metadata/sync_configuration.rb @@ -26,6 +26,8 @@ class SyncConfiguration def self.configs_for(data_type) case data_type + when 'cve_enrichment' + cve_enrichment_configs when 'advisories' advisory_configs when 'licenses' @@ -35,6 +37,11 @@ def self.configs_for(data_type) end end + def self.cve_enrichment_configs + storage_type, base_uri = Location.for_cve_enrichment + [new('cve_enrichment', storage_type, base_uri, VERSION_FORMAT_V2, nil)] + end + def self.advisory_configs storage_type, base_uri = Location.for_advisories @@ -77,6 +84,10 @@ def advisories? data_type == 'advisories' end + def cve_enrichment? + data_type == 'cve_enrichment' + end + def to_s "#{data_type}:#{storage_type}/#{base_uri}/#{version_format}/#{purl_type}" end @@ -89,6 +100,8 @@ class Location LICENSES_BUCKET = 'prod-export-license-bucket-1a6c642fc4de57d4' ADVISORIES_PATH = Rails.root.join('vendor/package_metadata/advisories').freeze ADVISORIES_BUCKET = 'prod-export-advisory-bucket-1a6c642fc4de57d4' + CVE_ENRICHMENT_PATH = Rails.root.join('vendor/package_metadata/cve_enrichment').freeze + CVE_ENRICHMENT_BUCKET = 'prod-export-cve-enrichments-bucket-1a6c642fc4de57d4' def self.for_licenses if File.exist?(LICENSES_PATH) @@ -107,6 +120,14 @@ def self.for_advisories [:gcp, ADVISORIES_BUCKET] end end + + def self.for_cve_enrichment + if File.exist?(CVE_ENRICHMENT_PATH) + [:offline, CVE_ENRICHMENT_PATH] + else + [:gcp, CVE_ENRICHMENT_BUCKET] + end + end end end end diff --git a/ee/app/services/package_metadata/data_object_fabricator.rb b/ee/app/services/package_metadata/data_object_fabricator.rb index 4cfcdb110d14c9b6d9834203df0aea40aa9315c3..9e14061276c28436c0049d611b6e871ed39b87d6 100644 --- a/ee/app/services/package_metadata/data_object_fabricator.rb +++ b/ee/app/services/package_metadata/data_object_fabricator.rb @@ -28,14 +28,22 @@ def create_object(data) end def data_object_class - return AdvisoryDataObject if sync_config.advisories? - - license_data_object_class + if sync_config.cve_enrichment? + DataObjects::CveEnrichment + elsif sync_config.advisories? + AdvisoryDataObject + elsif sync_config.v2? + v2_license_data_object_class + else + v1_license_data_object_class + end end - def license_data_object_class - return CompressedPackageDataObject if sync_config.v2? + def v2_license_data_object_class + CompressedPackageDataObject + end + def v1_license_data_object_class DataObject end end diff --git a/ee/app/services/package_metadata/sync_service.rb b/ee/app/services/package_metadata/sync_service.rb index 463464b94b64530d703a0af549cbfcd41780c5a8..57ac043d1bf97cf88844ab27c8a5765dfdcdfda2 100644 --- a/ee/app/services/package_metadata/sync_service.rb +++ b/ee/app/services/package_metadata/sync_service.rb @@ -27,29 +27,36 @@ def initialize(sync_config, signal) def execute connector.data_after(checkpoint).each do |file| - log_progress(file) - - DataObjectFabricator.new(data_file: file, sync_config: sync_config) - .each_slice(INGEST_SLICE_SIZE) do |data_objects| - ingest(data_objects) - throttle - end + Gitlab::AppJsonLogger.debug(class: self.class.name, message: "Evaluating data for #{sync_config}/#{file}") + ingest_file(file) checkpoint.update(sequence: file.sequence, chunk: file.chunk) - if signal.stop? - return Gitlab::AppJsonLogger.debug(class: self.class.name, - message: "Stop signal after checkpointing") - end + return log_stop_signal if signal.stop? end end private + def ingest_file(file) + DataObjectFabricator.new(data_file: file, sync_config: sync_config) + .each_slice(INGEST_SLICE_SIZE) do |data_objects| + ingest(data_objects) + throttle + end + end + + def log_stop_signal + Gitlab::AppJsonLogger.debug(class: self.class.name, + message: "Stop signal after checkpointing") + end + attr_accessor :sync_config, :signal def ingest(data) - if sync_config.advisories? + if sync_config.cve_enrichment? + PackageMetadata::Ingestion::CveEnrichment::IngestionService.execute(data) + elsif sync_config.advisories? PackageMetadata::Ingestion::Advisory::IngestionService.execute(data) elsif sync_config.v2? PackageMetadata::Ingestion::CompressedPackage::IngestionService.execute(data) @@ -59,8 +66,12 @@ def ingest(data) end def checkpoint - @checkpoint ||= PackageMetadata::Checkpoint - .with_path_components(sync_config.data_type, sync_config.version_format, sync_config.purl_type) + if sync_config.cve_enrichment? + @checkpoint ||= PackageMetadata::NullCheckpoint.new + else + @checkpoint ||= PackageMetadata::Checkpoint + .with_path_components(sync_config.data_type, sync_config.version_format, sync_config.purl_type) + end end def connector @@ -74,12 +85,6 @@ def connector end end - def log_progress(file) - Gitlab::AppJsonLogger - .debug(class: self.class.name, - message: "Evaluating data for #{sync_config}/#{file}") - end - def throttle return if ENV['PM_SYNC_IN_DEV'] == 'true' diff --git a/ee/app/workers/all_queues.yml b/ee/app/workers/all_queues.yml index 18fb66502833e4ce3dc6fa3b3d34bea61b412881..3b80cb2d767fbd22730fb64ac935c3c0c2fa9324 100644 --- a/ee/app/workers/all_queues.yml +++ b/ee/app/workers/all_queues.yml @@ -507,6 +507,15 @@ :weight: 1 :idempotent: true :tags: [] +- :name: cronjob:package_metadata_cve_enrichment_sync + :worker_name: PackageMetadata::CveEnrichmentSyncWorker + :feature_category: :software_composition_analysis + :has_external_dependencies: true + :urgency: :low + :resource_boundary: :unknown + :weight: 1 + :idempotent: true + :tags: [] - :name: cronjob:package_metadata_licenses_sync :worker_name: PackageMetadata::LicensesSyncWorker :feature_category: :software_composition_analysis diff --git a/ee/app/workers/package_metadata/cve_enrichment_sync_worker.rb b/ee/app/workers/package_metadata/cve_enrichment_sync_worker.rb new file mode 100644 index 0000000000000000000000000000000000000000..0b01272a593ce29c9bd6d92fb304315a4fb100b0 --- /dev/null +++ b/ee/app/workers/package_metadata/cve_enrichment_sync_worker.rb @@ -0,0 +1,41 @@ +# frozen_string_literal: true + +module PackageMetadata + class CveEnrichmentSyncWorker + include ApplicationWorker + include CronjobQueue # rubocop:disable Scalability/CronWorkerContext -- there is no relevant metadata to add to logs + include ExclusiveLeaseGuard + + LEASE_TIMEOUT = 5.minutes + + data_consistency :always # rubocop:disable SidekiqLoadBalancing/WorkerDataConsistency -- exclusively writes + feature_category :software_composition_analysis + urgency :low + + idempotent! + sidekiq_options retry: false + worker_has_external_dependencies! + + def perform + return unless should_run? + + try_obtain_lease do + SyncService.execute(data_type: 'cve_enrichment', lease: exclusive_lease) + end + end + + private + + def should_run? + return false unless Feature.enabled?(:epss_ingestion, :instance) + return false unless ::License.feature_available?(:dependency_scanning) + return false if Rails.env.development? && ENV.fetch('PM_SYNC_IN_DEV', 'false') != 'true' + + true + end + + def lease_timeout + LEASE_TIMEOUT + end + end +end diff --git a/ee/config/feature_flags/beta/epss_ingestion.yml b/ee/config/feature_flags/beta/epss_ingestion.yml new file mode 100644 index 0000000000000000000000000000000000000000..fcb45ead38c58974352ca8bc9b3a3a45a38d2626 --- /dev/null +++ b/ee/config/feature_flags/beta/epss_ingestion.yml @@ -0,0 +1,9 @@ +--- +name: epss_ingestion +feature_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/470856 +introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/164348 +rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/470977 +milestone: '17.4' +group: group::composition analysis +type: beta +default_enabled: false diff --git a/ee/lib/gitlab/package_metadata/connector/base_connector.rb b/ee/lib/gitlab/package_metadata/connector/base_connector.rb index 0b91b28f0e56cfa3ca59bcabd7187220e32010e8..0e3183c84e0736837e7c1d1387e0561722957dfd 100644 --- a/ee/lib/gitlab/package_metadata/connector/base_connector.rb +++ b/ee/lib/gitlab/package_metadata/connector/base_connector.rb @@ -28,6 +28,8 @@ def data_file_class end def file_prefix + return sync_config.version_format unless sync_config.purl_type + File.join(sync_config.version_format, registry_id) end diff --git a/ee/spec/fixtures/package_metadata/sync/cve_enrichment/v2/data.ndjson b/ee/spec/fixtures/package_metadata/sync/cve_enrichment/v2/data.ndjson new file mode 100644 index 0000000000000000000000000000000000000000..040d50ffe3dbf3067b6a3999de772834111c11b6 --- /dev/null +++ b/ee/spec/fixtures/package_metadata/sync/cve_enrichment/v2/data.ndjson @@ -0,0 +1,3 @@ +{ "cve_id": "CVE-2020-1234", "epss_score": 0.5 } +{ "cve_id": "CVE-2021-12345", "epss_score": 0.6 } +{ "epss_score": 0.2 } diff --git a/ee/spec/lib/gitlab/package_metadata/connector/base_connector_spec.rb b/ee/spec/lib/gitlab/package_metadata/connector/base_connector_spec.rb index 0db253668b34334bc313c4f518ee90917ad68b3b..c1e82e83e1da69bf4247a4a8a43de7f872db2b1b 100644 --- a/ee/spec/lib/gitlab/package_metadata/connector/base_connector_spec.rb +++ b/ee/spec/lib/gitlab/package_metadata/connector/base_connector_spec.rb @@ -3,12 +3,14 @@ require 'spec_helper' RSpec.describe Gitlab::PackageMetadata::Connector::BaseConnector, feature_category: :software_composition_analysis do - let(:sync_config) { build(:pm_sync_config, version_format: version_format) } + let(:sync_config) { build(:pm_sync_config, version_format: version_format, purl_type: purl_type) } let(:connector) { described_class.new(sync_config) } describe '.data_file_class' do subject(:data_file_class) { connector.send(:data_file_class) } + let_it_be(:purl_type) { nil } + context 'when version_format v2' do let(:version_format) { 'v2' } @@ -21,4 +23,31 @@ it { is_expected.to be(::Gitlab::PackageMetadata::Connector::CsvDataFile) } end end + + describe '#file_prefix' do + subject(:file_prefix) { connector.send(:file_prefix) } + + let_it_be(:version_format) { 'v2' } + + context 'when purl_type is nil' do + let(:purl_type) { nil } + + it 'returns just the version_format' do + expect(file_prefix).to eq(version_format) + end + end + + context 'when purl_type is present' do + let(:purl_type) { 'npm' } + let(:registry_id) { 'npm' } + + before do + allow(::PackageMetadata::SyncConfiguration).to receive(:registry_id).with(purl_type).and_return(registry_id) + end + + it 'returns the joined path' do + expect(file_prefix).to eq(File.join(version_format, registry_id)) + end + end + end end diff --git a/ee/spec/models/package_metadata/checkpoint_spec.rb b/ee/spec/models/package_metadata/checkpoint_spec.rb index 5fab1cd8a7ce90db5fc7f94eefcf8c4749b611e0..941ad1867a84daf36a1e8e77cb724db8df07ca01 100644 --- a/ee/spec/models/package_metadata/checkpoint_spec.rb +++ b/ee/spec/models/package_metadata/checkpoint_spec.rb @@ -6,7 +6,8 @@ let(:data_types) do { advisories: 1, - licenses: 2 + licenses: 2, + cve_enrichment: 3 } end @@ -105,3 +106,27 @@ end end end + +RSpec.describe PackageMetadata::NullCheckpoint, type: :model, feature_category: :software_composition_analysis do + subject(:null_checkpoint) { described_class.new } + + describe '#update' do + it 'accepts any number of arguments without raising an error' do + # rubocop:disable Rails/SaveBang -- There is no `update!` method since NullCheckpoint isn't ActiveRecord + expect { null_checkpoint.update }.not_to raise_error + expect { null_checkpoint.update(sequence: 1, chunk: 2) }.not_to raise_error + # rubocop:enable Rails/SaveBang + end + + it 'returns nil' do + expect(null_checkpoint.update).to be_nil + expect(null_checkpoint.update(sequence: 1, chunk: 2)).to be_nil + end + end + + describe '#blank?' do + it 'always returns true' do + expect(null_checkpoint.blank?).to be true + end + end +end diff --git a/ee/spec/models/package_metadata/sync_configuration_spec.rb b/ee/spec/models/package_metadata/sync_configuration_spec.rb index c70bc16e1cf8990c78e8ae1d959cd3efb6b093cc..3a34675871b1978d4a0a782a34508617b6e4e460 100644 --- a/ee/spec/models/package_metadata/sync_configuration_spec.rb +++ b/ee/spec/models/package_metadata/sync_configuration_spec.rb @@ -111,6 +111,24 @@ it { is_expected.to match_array([expected_storage_type, expected_base_uri]) } end end + + describe '.for_cve_enrichment' do + subject { described_class.for_cve_enrichment } + + where(:filepath_exists, :expected_storage_type, :expected_base_uri) do + true | :offline | described_class::CVE_ENRICHMENT_PATH + false | :gcp | described_class::CVE_ENRICHMENT_BUCKET + end + + with_them do + before do + allow(File).to receive(:exist?).with(described_class::CVE_ENRICHMENT_PATH) + .and_return(filepath_exists) + end + + it { is_expected.to match_array([expected_storage_type, expected_base_uri]) } + end + end end describe '.registry' do diff --git a/ee/spec/services/package_metadata/data_object_fabricator_spec.rb b/ee/spec/services/package_metadata/data_object_fabricator_spec.rb index c82b32f8f301703430b64f307702af814c134e9b..683da41c2a044a706c0414d2df1cfe20ba4bcd8f 100644 --- a/ee/spec/services/package_metadata/data_object_fabricator_spec.rb +++ b/ee/spec/services/package_metadata/data_object_fabricator_spec.rb @@ -102,5 +102,22 @@ it_behaves_like 'it handles errors' end + + context 'when cve enrichment' do + let(:sync_config) { build(:pm_sync_config, data_type: 'cve_enrichment', version_format: 'v2') } + let(:io) { File.open(Rails.root.join('ee/spec/fixtures/package_metadata/sync/cve_enrichment/v2/data.ndjson')) } + let(:data_file) { Gitlab::PackageMetadata::Connector::NdjsonDataFile.new(io, 0, 0) } + + subject(:data_objects) { described_class.new(data_file: data_file, sync_config: sync_config).to_a } + + it { + is_expected.to match_array([ + have_attributes(cve_id: 'CVE-2020-1234', epss_score: 0.5), + have_attributes(cve_id: 'CVE-2021-12345', epss_score: 0.6) + ]) + } + + it_behaves_like 'it handles errors' + end end end diff --git a/ee/spec/services/package_metadata/sync_service_spec.rb b/ee/spec/services/package_metadata/sync_service_spec.rb index 8c880de4a6f6719a6667af0d2e663331fa6a53aa..0fa4e009bfe98aad05d016aa915685a99cb290df 100644 --- a/ee/spec/services/package_metadata/sync_service_spec.rb +++ b/ee/spec/services/package_metadata/sync_service_spec.rb @@ -26,6 +26,7 @@ allow(PackageMetadata::Ingestion::IngestionService).to receive(:execute) allow(PackageMetadata::Ingestion::CompressedPackage::IngestionService).to receive(:execute) allow(PackageMetadata::Ingestion::Advisory::IngestionService).to receive(:execute) + allow(PackageMetadata::Ingestion::CveEnrichment::IngestionService).to receive(:execute) allow(service).to receive(:sleep) allow(Gitlab::AppJsonLogger).to receive(:debug) end @@ -71,12 +72,38 @@ sync_config.data_type = 'advisories' end - it 'calls v1 ingestion service to store data' do + it 'calls advisories ingestion service to store data' do execute expect(PackageMetadata::Ingestion::Advisory::IngestionService) .to have_received(:execute).with(data_objects).twice end end + + context 'if data_type is cve enrichment' do + let(:checkpoint) do + create(:pm_checkpoint, purl_type: sync_config.purl_type, data_type: sync_config.data_type) + end + + before do + sync_config.data_type = 'cve_enrichment' + end + + it 'always calls data_after(NullCheckpoint) even if checkpoints exist' do + checkpoint + execute + expect(connector).to have_received(:data_after).with(an_instance_of(PackageMetadata::NullCheckpoint)) + end + + it 'does not update the checkpoint' do + expect { execute }.not_to change { checkpoint.reload.attributes } + end + + it 'calls cve enrichment ingestion service to store data' do + execute + expect(PackageMetadata::Ingestion::CveEnrichment::IngestionService) + .to have_received(:execute).with(data_objects).twice + end + end end context 'when a slice has been ingested' do @@ -229,6 +256,19 @@ it_behaves_like 'it calls #execute for each enabled config' end + + context 'and the data_type is cve_enrichment' do + let(:data_type) { 'cve_enrichment' } + let(:should_stop) { false } + + it 'calls #execute once' do + expect(observer).to receive(:execute).once + expect(described_class).to receive(:new) + .with(having_attributes(data_type: data_type, purl_type: nil), stop_signal) + .and_return(observer) + execute + end + end end context 'when stop_signal.stop? is true' do diff --git a/ee/spec/workers/package_metadata/cve_enrichment_sync_worker_spec.rb b/ee/spec/workers/package_metadata/cve_enrichment_sync_worker_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..88e0fed4824cfb259cff3318859fb64034e21d08 --- /dev/null +++ b/ee/spec/workers/package_metadata/cve_enrichment_sync_worker_spec.rb @@ -0,0 +1,86 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe PackageMetadata::CveEnrichmentSyncWorker, type: :worker, feature_category: :software_composition_analysis do + describe '#perform' do + let(:instance) { described_class.new } + let(:lease) { instance_double(Gitlab::ExclusiveLease) } + + subject(:perform!) { instance.perform } + + before do + allow(instance).to receive(:try_obtain_lease).and_yield + allow(Gitlab::ExclusiveLease).to receive(:new).and_return(lease) + end + + shared_examples_for 'it syncs' do + it 'calls sync service with the cve_enrichment data_type' do + expect(PackageMetadata::SyncService).to receive(:execute) + .with(data_type: 'cve_enrichment', lease: lease) + + perform! + end + end + + shared_examples_for 'it does not sync' do + it 'does not call sync service' do + expect(PackageMetadata::SyncService).not_to receive(:execute) + + perform! + end + end + + context 'when the epss_ingestion feature flag is disabled' do + before do + stub_feature_flags(epss_ingestion: false) + end + + it_behaves_like 'it does not sync' + end + + context 'when the dependency_scanning feature is disabled' do + before do + stub_licensed_features(dependency_scanning: false) + end + + it_behaves_like 'it does not sync' + end + + context 'when the dependency_scanning feature is enabled' do + before do + stub_licensed_features(dependency_scanning: true) + end + + context 'and rails is not development' do + before do + allow(Rails.env).to receive(:development?).and_return(false) + end + + it_behaves_like 'it syncs' + end + + context 'and rails is development' do + before do + allow(Rails.env).to receive(:development?).and_return(true) + end + + context 'and sync in dev env variable is true' do + before do + stub_env('PM_SYNC_IN_DEV', true) + end + + it_behaves_like 'it syncs' + end + + context 'and sync in dev env variable is false' do + before do + stub_env('PM_SYNC_IN_DEV', false) + end + + it_behaves_like 'it does not sync' + end + end + end + end +end