diff --git a/.gitlab/issue_templates/Geo Replicate a new Git repository type.md b/.gitlab/issue_templates/Geo Replicate a new Git repository type.md index e858f80ffaa65809279b45fa0b9bc7c70aeda52e..dceb107dd7d4c69a2f8650502a38a8415bd351d6 100644 --- a/.gitlab/issue_templates/Geo Replicate a new Git repository type.md +++ b/.gitlab/issue_templates/Geo Replicate a new Git repository type.md @@ -673,6 +673,41 @@ The GraphQL API is used by `Admin > Geo > Replication Details` views, and is dir Individual Cool Widget replication and verification data should now be available via the GraphQL API. +#### Step 4. Handle batch destroy + +If batch destroy logic is implemented for a replicable, then that logic must be "replicated" by Geo secondaries. The easiest way to do this is use `Geo::BatchEventCreateWorker` to bulk insert a delete event for each replicable. + +For example, if `FastDestroyAll` is used, then you may be able to [use `begin_fast_destroy` and `finalize_fast_destroy` hooks, like we did for uploads](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/69763). + +Or if a special service is used to batch delete records and their associated data, then you probably need to [hook into that service, like we did for job artifacts](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/79530). + +As illustrated by the above two examples, batch destroy logic cannot be handled automatically by Geo secondaries without restricting the way other teams perform batch destroys. It is up to you to produce `Geo::BatchEventCreateWorker` attributes before the records are deleted, and then enqueue `Geo::BatchEventCreateWorker` after the records are deleted. + +- [ ] Ensure that any batch destroy of this replicable is replicated to secondary sites +- [ ] Regardless of implementation details, please verify in specs that when the parent object is removed, the new `Geo::Event` records are created: + +```ruby + describe '#destroy' do + subject { create(:cool_widget) } + + context 'when running in a Geo primary node' do + let_it_be(:primary) { create(:geo_node, :primary) } + let_it_be(:secondary) { create(:geo_node) } + + it 'logs an event to the Geo event log when bulk removal is used', :sidekiq_inline do + stub_current_geo_node(primary) + + expect { subject.project.destroy! }.to change(Geo::Event.where(replicable_name: :cool_widget, event_name: :deleted), :count).by(1) + + payload = Geo::Event.where(replicable_name: :cool_widget, event_name: :deleted).last.payload + + expect(payload['model_record_id']).to eq(subject.id) + expect(payload['blob_path']).to eq(subject.relative_path) + expect(payload['uploader_class']).to eq('CoolWidgetUploader') + end + end + end +``` ### Release Geo support of Cool Widgets - [ ] In the rollout issue you created when creating the feature flag, modify the Roll Out Steps: diff --git a/.gitlab/issue_templates/Geo Replicate a new blob type.md b/.gitlab/issue_templates/Geo Replicate a new blob type.md index 0cbfd79c95875218c59dfd9205cf6b650a25c6ef..cf214b212d8fd15d43b751e4c9d9383bd7d79e14 100644 --- a/.gitlab/issue_templates/Geo Replicate a new blob type.md +++ b/.gitlab/issue_templates/Geo Replicate a new blob type.md @@ -637,6 +637,42 @@ The GraphQL API is used by `Admin > Geo > Replication Details` views, and is dir Individual Cool Widget replication and verification data should now be available via the GraphQL API. +#### Step 4. Handle batch destroy + +If batch destroy logic is implemented for a replicable, then that logic must be "replicated" by Geo secondaries. The easiest way to do this is use `Geo::BatchEventCreateWorker` to bulk insert a delete event for each replicable. + +For example, if `FastDestroyAll` is used, then you may be able to [use `begin_fast_destroy` and `finalize_fast_destroy` hooks, like we did for uploads](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/69763). + +Or if a special service is used to batch delete records and their associated data, then you probably need to [hook into that service, like we did for job artifacts](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/79530). + +As illustrated by the above two examples, batch destroy logic cannot be handled automatically by Geo secondaries without restricting the way other teams perform batch destroys. It is up to you to produce `Geo::BatchEventCreateWorker` attributes before the records are deleted, and then enqueue `Geo::BatchEventCreateWorker` after the records are deleted. + +- [ ] Ensure that any batch destroy of this replicable is replicated to secondary sites +- [ ] Regardless of implementation details, please verify in specs that when the parent object is removed, the new `Geo::Event` records are created: + +```ruby + describe '#destroy' do + subject { create(:cool_widget) } + + context 'when running in a Geo primary node' do + let_it_be(:primary) { create(:geo_node, :primary) } + let_it_be(:secondary) { create(:geo_node) } + + it 'logs an event to the Geo event log when bulk removal is used', :sidekiq_inline do + stub_current_geo_node(primary) + + expect { subject.project.destroy! }.to change(Geo::Event.where(replicable_name: :cool_widget, event_name: :deleted), :count).by(1) + + payload = Geo::Event.where(replicable_name: :cool_widget, event_name: :deleted).last.payload + + expect(payload['model_record_id']).to eq(subject.id) + expect(payload['blob_path']).to eq(subject.relative_path) + expect(payload['uploader_class']).to eq('CoolWidgetUploader') + end + end + end +``` + ### Release Geo support of Cool Widgets - [ ] In the rollout issue you created when creating the feature flag, modify the Roll Out Steps: diff --git a/app/services/ci/job_artifacts/destroy_batch_service.rb b/app/services/ci/job_artifacts/destroy_batch_service.rb index 90d157373c3b6dd9ab48fbba087f6e13f1c8b800..8a44d2b77e45816748c4ad76e49e09a292fdf006 100644 --- a/app/services/ci/job_artifacts/destroy_batch_service.rb +++ b/app/services/ci/job_artifacts/destroy_batch_service.rb @@ -33,9 +33,11 @@ def execute(update_stats: true) destroy_related_records(@job_artifacts) - Ci::DeletedObject.transaction do - Ci::DeletedObject.bulk_import(@job_artifacts, @pick_up_at) - Ci::JobArtifact.id_in(@job_artifacts.map(&:id)).delete_all + destroy_around_hook(@job_artifacts) do + Ci::DeletedObject.transaction do + Ci::DeletedObject.bulk_import(@job_artifacts, @pick_up_at) + Ci::JobArtifact.id_in(@job_artifacts.map(&:id)).delete_all + end end after_batch_destroy_hook(@job_artifacts) @@ -51,6 +53,13 @@ def execute(update_stats: true) private + # Overriden in EE + # :nocov: + def destroy_around_hook(artifacts) + yield + end + # :nocov: + # Overriden in EE def destroy_related_records(artifacts); end diff --git a/ee/app/models/concerns/geo/blob_replicator_strategy.rb b/ee/app/models/concerns/geo/blob_replicator_strategy.rb index 31f76e3c8b3290e98466334cad9f641907d8d06f..b3c2ad144bccb2dce28e4150015de569fe4b7a73 100644 --- a/ee/app/models/concerns/geo/blob_replicator_strategy.rb +++ b/ee/app/models/concerns/geo/blob_replicator_strategy.rb @@ -24,6 +24,34 @@ def data_type def data_type_title _('File') end + + def bulk_create_delete_events_async(deleted_records) + return unless deleted_records.any? + raise 'This method can only be called for a child class of Gitlab::Geo::Replicator' if replicable_name.nil? + + deleted_record_details = [] + + events = deleted_records.map do |record| + deleted_record_details << [replicable_name, record[:model_record_id], record[:blob_path]] + + raise 'model_record_id can not be nil' if record[:model_record_id].nil? + + { + replicable_name: replicable_name, + event_name: 'deleted', + payload: { + model_record_id: record[:model_record_id], + blob_path: record[:blob_path], + uploader_class: record[:uploader_class] + }, + created_at: Time.current + } + end + + log_info('Bulk delete of: ', details: deleted_record_details) + + ::Geo::BatchEventCreateWorker.perform_async(events) + end end def handle_after_create_commit @@ -100,7 +128,7 @@ def file_exists? def deleted_params { - model_record_id: model_record.id, + model_record_id: model_record_id, uploader_class: carrierwave_uploader.class.to_s, blob_path: carrierwave_uploader.relative_path } diff --git a/ee/app/replicators/geo/upload_replicator.rb b/ee/app/replicators/geo/upload_replicator.rb index 50aee878bf632a917aaf4fba65e41cab4dffa3c2..bc1a29cefcb8af62cee7c90eea49c483f8f098e5 100644 --- a/ee/app/replicators/geo/upload_replicator.rb +++ b/ee/app/replicators/geo/upload_replicator.rb @@ -9,31 +9,6 @@ def self.model ::Upload end - def self.bulk_create_delete_events_async(deleted_uploads) - return if deleted_uploads.empty? - - deleted_upload_details = [] - - events = deleted_uploads.map do |upload| - deleted_upload_details << [upload[:model_record_id], upload[:blob_path]] - - { - replicable_name: 'upload', - event_name: 'deleted', - payload: { - model_record_id: upload[:model_record_id], - blob_path: upload[:blob_path], - uploader_class: upload[:uploader_class] - }, - created_at: Time.current - } - end - - log_info('Delete bulk of uploads: ', uploads: deleted_upload_details) - - ::Geo::BatchEventCreateWorker.perform_async(events) - end - override :verification_feature_flag_enabled? def self.verification_feature_flag_enabled? true diff --git a/ee/app/services/ee/ci/job_artifacts/destroy_batch_service.rb b/ee/app/services/ee/ci/job_artifacts/destroy_batch_service.rb index 852a8719a86a239a667fead77b7f856726809ff5..4593c4164cbd250186d9eaf8c1080a8b53086e96 100644 --- a/ee/app/services/ee/ci/job_artifacts/destroy_batch_service.rb +++ b/ee/app/services/ee/ci/job_artifacts/destroy_batch_service.rb @@ -8,6 +8,35 @@ module DestroyBatchService private + override :destroy_around_hook + def destroy_around_hook(artifacts) + return yield unless ::Geo::EventStore.can_create_event? + + ### Deprecated way of creating events. Should be removed by https://gitlab.com/gitlab-org/gitlab/-/issues/349056 + if ::Feature.disabled?(:geo_job_artifact_replication, default_enabled: :yaml) + yield + + ::Gitlab::Database::QueryAnalyzers::PreventCrossDatabaseModification.allow_cross_database_modification_within_transaction(url: 'https://gitlab.com/gitlab-org/gitlab/-/issues/351849') do + ::Geo::JobArtifactDeletedEventStore.bulk_create(artifacts) + end + + return + end + ### Deprecated + + artifact_params = artifacts.map do |artifact| + artifact.replicator.deleted_params + end + + yield + + Sidekiq::Worker.skipping_transaction_check do + ::Gitlab::Database::QueryAnalyzers::PreventCrossDatabaseModification.allow_cross_database_modification_within_transaction(url: 'https://gitlab.com/gitlab-org/gitlab/-/issues/351849') do + ::Geo::JobArtifactReplicator.bulk_create_delete_events_async(artifact_params) + end + end + end + override :after_batch_destroy_hook def after_batch_destroy_hook(artifacts) # This DestroyBatchService is used from different services. @@ -18,14 +47,6 @@ def after_batch_destroy_hook(artifacts) ::Ci::JobArtifactsDeletedEvent.new(data: { job_ids: artifacts.map(&:job_id) }) ) end - - insert_geo_event_records(artifacts) - end - - def insert_geo_event_records(artifacts) - ::Gitlab::Database::QueryAnalyzers::PreventCrossDatabaseModification.allow_cross_database_modification_within_transaction(url: 'https://gitlab.com/gitlab-org/gitlab/-/issues/351849') do - ::Geo::JobArtifactDeletedEventStore.bulk_create(artifacts) - end end end end diff --git a/ee/lib/gitlab/geo/replicator.rb b/ee/lib/gitlab/geo/replicator.rb index cf49229c5807a478cb97cf463fc9ab48285e5d70..4cc10ec4d242fd6ca82409c37a0f5427eec6c7e1 100644 --- a/ee/lib/gitlab/geo/replicator.rb +++ b/ee/lib/gitlab/geo/replicator.rb @@ -193,6 +193,12 @@ def self.verification_enabled? false end + # :nocov: + def self.bulk_create_delete_events_async(deleted_records) + raise NotImplementedError + end + # :nocov: + def self.bulk_create_events(events) ::Geo::EventLog.transaction do results = ::Geo::Event.insert_all!(events) diff --git a/ee/spec/models/ee/ci/job_artifact_spec.rb b/ee/spec/models/ee/ci/job_artifact_spec.rb index 9f0abdb3e5a878e688535a0ec33162ba17910019..7d82ddd0f1aa1ce97c1a9ba2f566ae0a1b74b1ea 100644 --- a/ee/spec/models/ee/ci/job_artifact_spec.rb +++ b/ee/spec/models/ee/ci/job_artifact_spec.rb @@ -25,12 +25,30 @@ stub_current_geo_node(primary) end - it 'creates a JobArtifactDeletedEvent' do - stub_feature_flags(geo_job_artifact_replication: false) + context 'when geo_job_artifact_replication is disabled' do + it 'creates a JobArtifactDeletedEvent' do + stub_feature_flags(geo_job_artifact_replication: false) - job_artifact = create(:ee_ci_job_artifact, :archive) + job_artifact = create(:ee_ci_job_artifact, :archive) - expect { job_artifact.destroy! }.to change { Geo::JobArtifactDeletedEvent.count }.by(1) + expect { job_artifact.destroy! }.to change { Geo::JobArtifactDeletedEvent.count }.by(1) + end + end + + context 'when pipeline is destroyed' do + it 'creates a Geo Event', :sidekiq_inline do + job_artifact = create(:ee_ci_job_artifact, :archive) + + expect do + job_artifact.job.pipeline.destroy! + end.to change(Geo::Event.where(replicable_name: :job_artifact, event_name: :deleted), :count).by(1) + + payload = Geo::Event.where(replicable_name: :job_artifact, event_name: :deleted).last.payload + + expect(payload['model_record_id']).to eq(job_artifact.id) + expect(payload['blob_path']).to eq(job_artifact.file.relative_path.to_s) + expect(payload['uploader_class']).to eq('JobArtifactUploader') + end end context 'JobArtifact destroy fails' do diff --git a/ee/spec/replicators/geo/upload_replicator_spec.rb b/ee/spec/replicators/geo/upload_replicator_spec.rb index e7ed3a6e97a10b3751f6049bcf18623b1da8f38f..54b77024d7dcc782a2adb4ef67f6336901d7d385 100644 --- a/ee/spec/replicators/geo/upload_replicator_spec.rb +++ b/ee/spec/replicators/geo/upload_replicator_spec.rb @@ -7,30 +7,4 @@ include_examples 'a blob replicator' include_examples 'a verifiable replicator' - - describe '.bulk_create_delete_events_async' do - let(:deleted_upload) do - { - model_record_id: 1, - blob_path: 'path', - uploader_class: 'UploaderClass' - } - end - - let(:deleted_uploads) { [deleted_upload] } - - it 'calls Geo::BatchEventCreateWorker and passes events array', :sidekiq_inline do - expect { described_class.bulk_create_delete_events_async(deleted_uploads) }.to change { ::Geo::Event.count }.from(0).to(1) - - created_event = ::Geo::Event.last - expect(created_event.replicable_name).to eq 'upload' - expect(created_event.event_name).to eq 'deleted' - expect(created_event.created_at).to be_present - expect(created_event.payload).to eq(deleted_upload.stringify_keys) - end - - it 'returns nil when empty array is passed' do - expect(described_class.bulk_create_delete_events_async([])).to be_nil - end - end end diff --git a/ee/spec/services/ee/ci/job_artifacts/destroy_batch_service_spec.rb b/ee/spec/services/ee/ci/job_artifacts/destroy_batch_service_spec.rb index c90aa6e5a722fb0d05e834d4020db644e81f4137..ec86256362aef0f80af134b0f6f09a692e0e6fa9 100644 --- a/ee/spec/services/ee/ci/job_artifacts/destroy_batch_service_spec.rb +++ b/ee/spec/services/ee/ci/job_artifacts/destroy_batch_service_spec.rb @@ -10,11 +10,15 @@ let(:service) { described_class.new(Ci::JobArtifact.all, pick_up_at: Time.current) } - let_it_be(:artifact) { create(:ci_job_artifact) } + let_it_be(:artifact) { create(:ci_job_artifact, :zip) } let_it_be(:security_scan) { create(:security_scan, build: artifact.job) } let_it_be(:security_finding) { create(:security_finding, scan: security_scan) } let_it_be(:event_data) { { job_ids: [artifact.job_id] } } + before do + stub_feature_flags(geo_job_artifact_replication: false) + end + it 'destroys all expired artifacts', :sidekiq_inline do expect { subject }.to change { Ci::JobArtifact.count }.by(-1) .and change { Security::Finding.count }.from(1).to(0) @@ -34,7 +38,21 @@ end it 'creates a JobArtifactDeletedEvent' do - expect { subject }.to change { Geo::JobArtifactDeletedEvent.count }.by(1) + expect { subject }.to change { Geo::JobArtifactDeletedEvent.count }.by(2) + end + + context 'with geo_job_artifact_replication flag enabled' do + before do + stub_feature_flags(geo_job_artifact_replication: true) + end + + it 'does not create a JobArtifactDeletedEvent' do + expect { subject }.to change { Geo::JobArtifactDeletedEvent.count }.by(0) + end + + it 'creates an Geo::EventLog', :sidekiq_inline do + expect { subject }.to change { ::Geo::Event.count }.by(2) + end end context 'JobArtifact batch destroy fails' do diff --git a/ee/spec/support/shared_examples/models/concerns/blob_replicator_strategy_shared_examples.rb b/ee/spec/support/shared_examples/models/concerns/blob_replicator_strategy_shared_examples.rb index 44196b6b53469ed656f2dad75043907b28dc01fb..8d6f1ab81781afdbaaf80faea5eab43bd1cc332a 100644 --- a/ee/spec/support/shared_examples/models/concerns/blob_replicator_strategy_shared_examples.rb +++ b/ee/spec/support/shared_examples/models/concerns/blob_replicator_strategy_shared_examples.rb @@ -256,4 +256,23 @@ it { is_expected.to be_falsey } end end + + describe '.bulk_create_delete_events_async' do + let(:uploads) { create_list(:upload, 2) } + let(:upload_deleted_details) { uploads.map { |upload| upload.replicator.deleted_params} } + let(:inherited_replicator_class) { ::Geo::UploadReplicator } + + it 'creates events' do + expect { inherited_replicator_class.bulk_create_delete_events_async(upload_deleted_details) } + .to change { ::Geo::Event.count }.from(0).to(2) + + expect(::Geo::EventLog.last.event).to be_present + end + + it 'raises error when model_record_id is nil' do + expect do + inherited_replicator_class.bulk_create_delete_events_async([{}]) + end.to raise_error(RuntimeError, /model_record_id can not be nil/) + end + end end