From 8433fe464fcac889de1193dc1d93de13410afc2b Mon Sep 17 00:00:00 2001 From: Valery Sizov Date: Tue, 5 Apr 2022 21:42:54 +0100 Subject: [PATCH 1/3] Prevent orphaned JobArtifacts in SSF Uses FastDestroyAll library to create a batch event Changelog: fixed --- .../Geo Replicate a new blob type.md | 43 +++++++++++++++++++ .../ci/job_artifacts/destroy_batch_service.rb | 13 ++++-- .../concerns/geo/blob_replicator_strategy.rb | 2 +- ee/app/replicators/geo/upload_replicator.rb | 25 ----------- .../ci/job_artifacts/destroy_batch_service.rb | 37 ++++++++++++---- ee/lib/gitlab/geo/replicator.rb | 28 ++++++++++++ ee/spec/lib/gitlab/geo/replicator_spec.rb | 19 ++++++++ ee/spec/models/ee/ci/job_artifact_spec.rb | 26 +++++++++-- .../replicators/geo/upload_replicator_spec.rb | 26 ----------- .../destroy_batch_service_spec.rb | 22 +++++++++- 10 files changed, 172 insertions(+), 69 deletions(-) diff --git a/.gitlab/issue_templates/Geo Replicate a new blob type.md b/.gitlab/issue_templates/Geo Replicate a new blob type.md index 0cbfd79c958752..15db7991f77a2b 100644 --- a/.gitlab/issue_templates/Geo Replicate a new blob type.md +++ b/.gitlab/issue_templates/Geo Replicate a new blob type.md @@ -637,6 +637,49 @@ The GraphQL API is used by `Admin > Geo > Replication Details` views, and is dir Individual Cool Widget replication and verification data should now be available via the GraphQL API. +#### Step 4. Implement batch destroy logic for fast destroy hooks + +We need to create a Geo delete event when the parent object of replicable records is removed. +We have a special tool for that, see `app/models/concerns/fast_destroy_all.rb`. +It's hard to give a common example of how it should be implemented for all the cases because sometimes `begin_fast_destroy` and `finalize_fast_destroy` +are already used for other purposes, but the general idea is that in we create event data before the delete transaction is commited +and then create actual events after the commit. In `begin_fast_destroy`: + +```ruby + events_details = cool_widgets.map{ |widget| widget.replicator.deleted_params } +``` + +In `finalize_fast_destroy`: + +```ruby + ::Geo::CoolWidgetReplicator.bulk_create_delete_events_async(events_details) +``` + +Please verify in specs that when the parent object is removed the new `Geo::Event` records are created: + +```ruby + describe '#destroy' do + subject { create(:cool_widget) } + + context 'when running in a Geo primary node' do + let_it_be(:primary) { create(:geo_node, :primary) } + let_it_be(:secondary) { create(:geo_node) } + + it 'logs an event to the Geo event log when bulk removal is used', :sidekiq_inline do + stub_current_geo_node(primary) + + expect { subject.project.destroy! }.to change(Geo::Event.where(replicable_name: :cool_widget, event_name: :deleted), :count).by(1) + + payload = Geo::Event.where(replicable_name: :cool_widget, event_name: :deleted).last.payload + + expect(payload['model_record_id']).to eq(subject.id) + expect(payload['blob_path']).to eq(subject.relative_path) + expect(payload['uploader_class']).to eq('CoolWidgetUploader') + end + end + end +``` + ### Release Geo support of Cool Widgets - [ ] In the rollout issue you created when creating the feature flag, modify the Roll Out Steps: diff --git a/app/services/ci/job_artifacts/destroy_batch_service.rb b/app/services/ci/job_artifacts/destroy_batch_service.rb index 90d157373c3b6d..5a4f81771beb65 100644 --- a/app/services/ci/job_artifacts/destroy_batch_service.rb +++ b/app/services/ci/job_artifacts/destroy_batch_service.rb @@ -33,9 +33,11 @@ def execute(update_stats: true) destroy_related_records(@job_artifacts) - Ci::DeletedObject.transaction do - Ci::DeletedObject.bulk_import(@job_artifacts, @pick_up_at) - Ci::JobArtifact.id_in(@job_artifacts.map(&:id)).delete_all + destroy_around_hook(@job_artifacts) do + Ci::DeletedObject.transaction do + Ci::DeletedObject.bulk_import(@job_artifacts, @pick_up_at) + Ci::JobArtifact.id_in(@job_artifacts.map(&:id)).delete_all + end end after_batch_destroy_hook(@job_artifacts) @@ -51,6 +53,11 @@ def execute(update_stats: true) private + # Overriden in EE + def destroy_around_hook(artifacts) + yield + end + # Overriden in EE def destroy_related_records(artifacts); end diff --git a/ee/app/models/concerns/geo/blob_replicator_strategy.rb b/ee/app/models/concerns/geo/blob_replicator_strategy.rb index 31f76e3c8b3290..2cf3d75b485952 100644 --- a/ee/app/models/concerns/geo/blob_replicator_strategy.rb +++ b/ee/app/models/concerns/geo/blob_replicator_strategy.rb @@ -100,7 +100,7 @@ def file_exists? def deleted_params { - model_record_id: model_record.id, + model_record_id: model_record_id, uploader_class: carrierwave_uploader.class.to_s, blob_path: carrierwave_uploader.relative_path } diff --git a/ee/app/replicators/geo/upload_replicator.rb b/ee/app/replicators/geo/upload_replicator.rb index 50aee878bf632a..bc1a29cefcb8af 100644 --- a/ee/app/replicators/geo/upload_replicator.rb +++ b/ee/app/replicators/geo/upload_replicator.rb @@ -9,31 +9,6 @@ def self.model ::Upload end - def self.bulk_create_delete_events_async(deleted_uploads) - return if deleted_uploads.empty? - - deleted_upload_details = [] - - events = deleted_uploads.map do |upload| - deleted_upload_details << [upload[:model_record_id], upload[:blob_path]] - - { - replicable_name: 'upload', - event_name: 'deleted', - payload: { - model_record_id: upload[:model_record_id], - blob_path: upload[:blob_path], - uploader_class: upload[:uploader_class] - }, - created_at: Time.current - } - end - - log_info('Delete bulk of uploads: ', uploads: deleted_upload_details) - - ::Geo::BatchEventCreateWorker.perform_async(events) - end - override :verification_feature_flag_enabled? def self.verification_feature_flag_enabled? true diff --git a/ee/app/services/ee/ci/job_artifacts/destroy_batch_service.rb b/ee/app/services/ee/ci/job_artifacts/destroy_batch_service.rb index 852a8719a86a23..9f16372cd1ddb6 100644 --- a/ee/app/services/ee/ci/job_artifacts/destroy_batch_service.rb +++ b/ee/app/services/ee/ci/job_artifacts/destroy_batch_service.rb @@ -8,6 +8,35 @@ module DestroyBatchService private + override :destroy_around_hook + def destroy_around_hook(artifacts) + return yield unless ::Geo::EventStore.can_create_event? + + ### Deprecated way of creating events. Should be removed by https://gitlab.com/gitlab-org/gitlab/-/issues/349056 + if ::Feature.disabled?(:geo_job_artifact_replication) + yield + + ::Gitlab::Database::QueryAnalyzers::PreventCrossDatabaseModification.allow_cross_database_modification_within_transaction(url: 'https://gitlab.com/gitlab-org/gitlab/-/issues/351849') do + ::Geo::JobArtifactDeletedEventStore.bulk_create(artifacts) + end + + return + end + ### Deprecated + + artifact_params = artifacts.map do |artifact| + artifact.replicator.deleted_params + end + + yield + + Sidekiq::Worker.skipping_transaction_check do + ::Gitlab::Database::QueryAnalyzers::PreventCrossDatabaseModification.allow_cross_database_modification_within_transaction(url: 'https://gitlab.com/gitlab-org/gitlab/-/issues/351849') do + ::Geo::JobArtifactReplicator.bulk_create_delete_events_async(artifact_params) + end + end + end + override :after_batch_destroy_hook def after_batch_destroy_hook(artifacts) # This DestroyBatchService is used from different services. @@ -18,14 +47,6 @@ def after_batch_destroy_hook(artifacts) ::Ci::JobArtifactsDeletedEvent.new(data: { job_ids: artifacts.map(&:job_id) }) ) end - - insert_geo_event_records(artifacts) - end - - def insert_geo_event_records(artifacts) - ::Gitlab::Database::QueryAnalyzers::PreventCrossDatabaseModification.allow_cross_database_modification_within_transaction(url: 'https://gitlab.com/gitlab-org/gitlab/-/issues/351849') do - ::Geo::JobArtifactDeletedEventStore.bulk_create(artifacts) - end end end end diff --git a/ee/lib/gitlab/geo/replicator.rb b/ee/lib/gitlab/geo/replicator.rb index cf49229c5807a4..4530c33e1a4439 100644 --- a/ee/lib/gitlab/geo/replicator.rb +++ b/ee/lib/gitlab/geo/replicator.rb @@ -193,6 +193,34 @@ def self.verification_enabled? false end + def self.bulk_create_delete_events_async(deleted_records) + return unless deleted_records.any? + raise 'This method can only be called for a child class of Gitlab::Geo::Replicator' if replicable_name.nil? + + deleted_record_details = [] + + events = deleted_records.map do |record| + deleted_record_details << [replicable_name, record[:model_record_id], record[:blob_path]] + + raise 'model_record_id can not be nil' if record[:model_record_id].nil? + + { + replicable_name: replicable_name, + event_name: 'deleted', + payload: { + model_record_id: record.fetch(:model_record_id), + blob_path: record[:blob_path], + uploader_class: record[:uploader_class] + }, + created_at: Time.current + } + end + + log_info('Bulk delete of: ', details: deleted_record_details) + + ::Geo::BatchEventCreateWorker.perform_async(events) + end + def self.bulk_create_events(events) ::Geo::EventLog.transaction do results = ::Geo::Event.insert_all!(events) diff --git a/ee/spec/lib/gitlab/geo/replicator_spec.rb b/ee/spec/lib/gitlab/geo/replicator_spec.rb index 5b9c1448bcc474..8c0d2651e5db80 100644 --- a/ee/spec/lib/gitlab/geo/replicator_spec.rb +++ b/ee/spec/lib/gitlab/geo/replicator_spec.rb @@ -154,6 +154,25 @@ end end + describe '.bulk_create_delete_events_async' do + let(:uploads) { create_list(:upload, 2) } + let(:upload_deleted_details) { uploads.map { |upload| upload.replicator.deleted_params} } + let(:inherited_replicator_class) { ::Geo::UploadReplicator } + + it 'creates events', :sidekiq_inline do + expect { inherited_replicator_class.bulk_create_delete_events_async(upload_deleted_details) } + .to change { ::Geo::Event.count }.from(0).to(2) + + expect(::Geo::EventLog.last.event).to be_present + end + + it 'raises error when model_record_id is nil' do + expect do + inherited_replicator_class.bulk_create_delete_events_async([{}]) + end.to raise_error(RuntimeError, /model_record_id can not be nil/) + end + end + describe '#initialize' do subject(:replicator) { Geo::DummyReplicator.new(**args) } diff --git a/ee/spec/models/ee/ci/job_artifact_spec.rb b/ee/spec/models/ee/ci/job_artifact_spec.rb index 9f0abdb3e5a878..7d82ddd0f1aa1c 100644 --- a/ee/spec/models/ee/ci/job_artifact_spec.rb +++ b/ee/spec/models/ee/ci/job_artifact_spec.rb @@ -25,12 +25,30 @@ stub_current_geo_node(primary) end - it 'creates a JobArtifactDeletedEvent' do - stub_feature_flags(geo_job_artifact_replication: false) + context 'when geo_job_artifact_replication is disabled' do + it 'creates a JobArtifactDeletedEvent' do + stub_feature_flags(geo_job_artifact_replication: false) - job_artifact = create(:ee_ci_job_artifact, :archive) + job_artifact = create(:ee_ci_job_artifact, :archive) - expect { job_artifact.destroy! }.to change { Geo::JobArtifactDeletedEvent.count }.by(1) + expect { job_artifact.destroy! }.to change { Geo::JobArtifactDeletedEvent.count }.by(1) + end + end + + context 'when pipeline is destroyed' do + it 'creates a Geo Event', :sidekiq_inline do + job_artifact = create(:ee_ci_job_artifact, :archive) + + expect do + job_artifact.job.pipeline.destroy! + end.to change(Geo::Event.where(replicable_name: :job_artifact, event_name: :deleted), :count).by(1) + + payload = Geo::Event.where(replicable_name: :job_artifact, event_name: :deleted).last.payload + + expect(payload['model_record_id']).to eq(job_artifact.id) + expect(payload['blob_path']).to eq(job_artifact.file.relative_path.to_s) + expect(payload['uploader_class']).to eq('JobArtifactUploader') + end end context 'JobArtifact destroy fails' do diff --git a/ee/spec/replicators/geo/upload_replicator_spec.rb b/ee/spec/replicators/geo/upload_replicator_spec.rb index e7ed3a6e97a10b..54b77024d7dcc7 100644 --- a/ee/spec/replicators/geo/upload_replicator_spec.rb +++ b/ee/spec/replicators/geo/upload_replicator_spec.rb @@ -7,30 +7,4 @@ include_examples 'a blob replicator' include_examples 'a verifiable replicator' - - describe '.bulk_create_delete_events_async' do - let(:deleted_upload) do - { - model_record_id: 1, - blob_path: 'path', - uploader_class: 'UploaderClass' - } - end - - let(:deleted_uploads) { [deleted_upload] } - - it 'calls Geo::BatchEventCreateWorker and passes events array', :sidekiq_inline do - expect { described_class.bulk_create_delete_events_async(deleted_uploads) }.to change { ::Geo::Event.count }.from(0).to(1) - - created_event = ::Geo::Event.last - expect(created_event.replicable_name).to eq 'upload' - expect(created_event.event_name).to eq 'deleted' - expect(created_event.created_at).to be_present - expect(created_event.payload).to eq(deleted_upload.stringify_keys) - end - - it 'returns nil when empty array is passed' do - expect(described_class.bulk_create_delete_events_async([])).to be_nil - end - end end diff --git a/ee/spec/services/ee/ci/job_artifacts/destroy_batch_service_spec.rb b/ee/spec/services/ee/ci/job_artifacts/destroy_batch_service_spec.rb index c90aa6e5a722fb..ec86256362aef0 100644 --- a/ee/spec/services/ee/ci/job_artifacts/destroy_batch_service_spec.rb +++ b/ee/spec/services/ee/ci/job_artifacts/destroy_batch_service_spec.rb @@ -10,11 +10,15 @@ let(:service) { described_class.new(Ci::JobArtifact.all, pick_up_at: Time.current) } - let_it_be(:artifact) { create(:ci_job_artifact) } + let_it_be(:artifact) { create(:ci_job_artifact, :zip) } let_it_be(:security_scan) { create(:security_scan, build: artifact.job) } let_it_be(:security_finding) { create(:security_finding, scan: security_scan) } let_it_be(:event_data) { { job_ids: [artifact.job_id] } } + before do + stub_feature_flags(geo_job_artifact_replication: false) + end + it 'destroys all expired artifacts', :sidekiq_inline do expect { subject }.to change { Ci::JobArtifact.count }.by(-1) .and change { Security::Finding.count }.from(1).to(0) @@ -34,7 +38,21 @@ end it 'creates a JobArtifactDeletedEvent' do - expect { subject }.to change { Geo::JobArtifactDeletedEvent.count }.by(1) + expect { subject }.to change { Geo::JobArtifactDeletedEvent.count }.by(2) + end + + context 'with geo_job_artifact_replication flag enabled' do + before do + stub_feature_flags(geo_job_artifact_replication: true) + end + + it 'does not create a JobArtifactDeletedEvent' do + expect { subject }.to change { Geo::JobArtifactDeletedEvent.count }.by(0) + end + + it 'creates an Geo::EventLog', :sidekiq_inline do + expect { subject }.to change { ::Geo::Event.count }.by(2) + end end context 'JobArtifact batch destroy fails' do -- GitLab From e1250603a7f7d1f94fc798caec1f08bbc8a69043 Mon Sep 17 00:00:00 2001 From: Michael Kozono Date: Thu, 14 Apr 2022 11:53:44 +0000 Subject: [PATCH 2/3] Apply changes to a guide --- ...Geo Replicate a new Git repository type.md | 35 +++++++++++++++++++ .../Geo Replicate a new blob type.md | 21 ++++------- .../ci/job_artifacts/destroy_batch_service.rb | 2 ++ ee/db/geo/structure.sql | 12 +++---- ee/lib/gitlab/geo/replicator.rb | 2 +- 5 files changed, 51 insertions(+), 21 deletions(-) diff --git a/.gitlab/issue_templates/Geo Replicate a new Git repository type.md b/.gitlab/issue_templates/Geo Replicate a new Git repository type.md index e858f80ffaa658..dceb107dd7d4c6 100644 --- a/.gitlab/issue_templates/Geo Replicate a new Git repository type.md +++ b/.gitlab/issue_templates/Geo Replicate a new Git repository type.md @@ -673,6 +673,41 @@ The GraphQL API is used by `Admin > Geo > Replication Details` views, and is dir Individual Cool Widget replication and verification data should now be available via the GraphQL API. +#### Step 4. Handle batch destroy + +If batch destroy logic is implemented for a replicable, then that logic must be "replicated" by Geo secondaries. The easiest way to do this is use `Geo::BatchEventCreateWorker` to bulk insert a delete event for each replicable. + +For example, if `FastDestroyAll` is used, then you may be able to [use `begin_fast_destroy` and `finalize_fast_destroy` hooks, like we did for uploads](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/69763). + +Or if a special service is used to batch delete records and their associated data, then you probably need to [hook into that service, like we did for job artifacts](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/79530). + +As illustrated by the above two examples, batch destroy logic cannot be handled automatically by Geo secondaries without restricting the way other teams perform batch destroys. It is up to you to produce `Geo::BatchEventCreateWorker` attributes before the records are deleted, and then enqueue `Geo::BatchEventCreateWorker` after the records are deleted. + +- [ ] Ensure that any batch destroy of this replicable is replicated to secondary sites +- [ ] Regardless of implementation details, please verify in specs that when the parent object is removed, the new `Geo::Event` records are created: + +```ruby + describe '#destroy' do + subject { create(:cool_widget) } + + context 'when running in a Geo primary node' do + let_it_be(:primary) { create(:geo_node, :primary) } + let_it_be(:secondary) { create(:geo_node) } + + it 'logs an event to the Geo event log when bulk removal is used', :sidekiq_inline do + stub_current_geo_node(primary) + + expect { subject.project.destroy! }.to change(Geo::Event.where(replicable_name: :cool_widget, event_name: :deleted), :count).by(1) + + payload = Geo::Event.where(replicable_name: :cool_widget, event_name: :deleted).last.payload + + expect(payload['model_record_id']).to eq(subject.id) + expect(payload['blob_path']).to eq(subject.relative_path) + expect(payload['uploader_class']).to eq('CoolWidgetUploader') + end + end + end +``` ### Release Geo support of Cool Widgets - [ ] In the rollout issue you created when creating the feature flag, modify the Roll Out Steps: diff --git a/.gitlab/issue_templates/Geo Replicate a new blob type.md b/.gitlab/issue_templates/Geo Replicate a new blob type.md index 15db7991f77a2b..cf214b212d8fd1 100644 --- a/.gitlab/issue_templates/Geo Replicate a new blob type.md +++ b/.gitlab/issue_templates/Geo Replicate a new blob type.md @@ -637,25 +637,18 @@ The GraphQL API is used by `Admin > Geo > Replication Details` views, and is dir Individual Cool Widget replication and verification data should now be available via the GraphQL API. -#### Step 4. Implement batch destroy logic for fast destroy hooks +#### Step 4. Handle batch destroy -We need to create a Geo delete event when the parent object of replicable records is removed. -We have a special tool for that, see `app/models/concerns/fast_destroy_all.rb`. -It's hard to give a common example of how it should be implemented for all the cases because sometimes `begin_fast_destroy` and `finalize_fast_destroy` -are already used for other purposes, but the general idea is that in we create event data before the delete transaction is commited -and then create actual events after the commit. In `begin_fast_destroy`: +If batch destroy logic is implemented for a replicable, then that logic must be "replicated" by Geo secondaries. The easiest way to do this is use `Geo::BatchEventCreateWorker` to bulk insert a delete event for each replicable. -```ruby - events_details = cool_widgets.map{ |widget| widget.replicator.deleted_params } -``` +For example, if `FastDestroyAll` is used, then you may be able to [use `begin_fast_destroy` and `finalize_fast_destroy` hooks, like we did for uploads](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/69763). -In `finalize_fast_destroy`: +Or if a special service is used to batch delete records and their associated data, then you probably need to [hook into that service, like we did for job artifacts](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/79530). -```ruby - ::Geo::CoolWidgetReplicator.bulk_create_delete_events_async(events_details) -``` +As illustrated by the above two examples, batch destroy logic cannot be handled automatically by Geo secondaries without restricting the way other teams perform batch destroys. It is up to you to produce `Geo::BatchEventCreateWorker` attributes before the records are deleted, and then enqueue `Geo::BatchEventCreateWorker` after the records are deleted. -Please verify in specs that when the parent object is removed the new `Geo::Event` records are created: +- [ ] Ensure that any batch destroy of this replicable is replicated to secondary sites +- [ ] Regardless of implementation details, please verify in specs that when the parent object is removed, the new `Geo::Event` records are created: ```ruby describe '#destroy' do diff --git a/app/services/ci/job_artifacts/destroy_batch_service.rb b/app/services/ci/job_artifacts/destroy_batch_service.rb index 5a4f81771beb65..8a44d2b77e4581 100644 --- a/app/services/ci/job_artifacts/destroy_batch_service.rb +++ b/app/services/ci/job_artifacts/destroy_batch_service.rb @@ -54,9 +54,11 @@ def execute(update_stats: true) private # Overriden in EE + # :nocov: def destroy_around_hook(artifacts) yield end + # :nocov: # Overriden in EE def destroy_related_records(artifacts); end diff --git a/ee/db/geo/structure.sql b/ee/db/geo/structure.sql index 12c522bc792114..2f282457a58808 100644 --- a/ee/db/geo/structure.sql +++ b/ee/db/geo/structure.sql @@ -628,18 +628,18 @@ CREATE UNIQUE INDEX index_terraform_state_version_registry_on_t_state_version_id CREATE UNIQUE INDEX index_tf_state_versions_registry_tf_state_versions_id_unique ON terraform_state_version_registry USING btree (terraform_state_version_id); -CREATE INDEX job_artifact_registry_failed_verification ON job_artifact_registry USING btree (verification_retry_at NULLS FIRST) WHERE ((state = 2) AND (verification_state = 3)); - -CREATE INDEX job_artifact_registry_needs_verification ON job_artifact_registry USING btree (verification_state) WHERE ((state = 2) AND (verification_state = ANY (ARRAY[0, 3]))); - -CREATE INDEX job_artifact_registry_pending_verification ON job_artifact_registry USING btree (verified_at NULLS FIRST) WHERE ((state = 2) AND (verification_state = 0)); - CREATE INDEX lfs_object_registry_failed_verification ON lfs_object_registry USING btree (verification_retry_at NULLS FIRST) WHERE ((state = 2) AND (verification_state = 3)); CREATE INDEX lfs_object_registry_needs_verification ON lfs_object_registry USING btree (verification_state) WHERE ((state = 2) AND (verification_state = ANY (ARRAY[0, 3]))); CREATE INDEX lfs_object_registry_pending_verification ON lfs_object_registry USING btree (verified_at NULLS FIRST) WHERE ((state = 2) AND (verification_state = 0)); +CREATE INDEX job_artifact_registry_failed_verification ON job_artifact_registry USING btree (verification_retry_at NULLS FIRST) WHERE ((state = 2) AND (verification_state = 3)); + +CREATE INDEX job_artifact_registry_needs_verification ON job_artifact_registry USING btree (verification_state) WHERE ((state = 2) AND (verification_state = ANY (ARRAY[0, 3]))); + +CREATE INDEX job_artifact_registry_pending_verification ON job_artifact_registry USING btree (verified_at NULLS FIRST) WHERE ((state = 2) AND (verification_state = 0)); + CREATE INDEX merge_request_diff_registry_failed_verification ON merge_request_diff_registry USING btree (verification_retry_at NULLS FIRST) WHERE ((state = 2) AND (verification_state = 3)); CREATE INDEX merge_request_diff_registry_needs_verification ON merge_request_diff_registry USING btree (verification_state) WHERE ((state = 2) AND (verification_state = ANY (ARRAY[0, 3]))); diff --git a/ee/lib/gitlab/geo/replicator.rb b/ee/lib/gitlab/geo/replicator.rb index 4530c33e1a4439..9d9f8a3fa5795e 100644 --- a/ee/lib/gitlab/geo/replicator.rb +++ b/ee/lib/gitlab/geo/replicator.rb @@ -208,7 +208,7 @@ def self.bulk_create_delete_events_async(deleted_records) replicable_name: replicable_name, event_name: 'deleted', payload: { - model_record_id: record.fetch(:model_record_id), + model_record_id: record[:model_record_id], blob_path: record[:blob_path], uploader_class: record[:uploader_class] }, -- GitLab From 8538a80796a26c9aa668a992735ab17da542659b Mon Sep 17 00:00:00 2001 From: Valery Sizov Date: Thu, 21 Apr 2022 11:25:56 +0100 Subject: [PATCH 3/3] Geo: Move event bulk creation to blob strategy --- .../concerns/geo/blob_replicator_strategy.rb | 28 +++++++++++++++++++ .../ci/job_artifacts/destroy_batch_service.rb | 2 +- ee/db/geo/structure.sql | 12 ++++---- ee/lib/gitlab/geo/replicator.rb | 28 ++----------------- ee/spec/lib/gitlab/geo/replicator_spec.rb | 19 ------------- ...lob_replicator_strategy_shared_examples.rb | 19 +++++++++++++ 6 files changed, 57 insertions(+), 51 deletions(-) diff --git a/ee/app/models/concerns/geo/blob_replicator_strategy.rb b/ee/app/models/concerns/geo/blob_replicator_strategy.rb index 2cf3d75b485952..b3c2ad144bccb2 100644 --- a/ee/app/models/concerns/geo/blob_replicator_strategy.rb +++ b/ee/app/models/concerns/geo/blob_replicator_strategy.rb @@ -24,6 +24,34 @@ def data_type def data_type_title _('File') end + + def bulk_create_delete_events_async(deleted_records) + return unless deleted_records.any? + raise 'This method can only be called for a child class of Gitlab::Geo::Replicator' if replicable_name.nil? + + deleted_record_details = [] + + events = deleted_records.map do |record| + deleted_record_details << [replicable_name, record[:model_record_id], record[:blob_path]] + + raise 'model_record_id can not be nil' if record[:model_record_id].nil? + + { + replicable_name: replicable_name, + event_name: 'deleted', + payload: { + model_record_id: record[:model_record_id], + blob_path: record[:blob_path], + uploader_class: record[:uploader_class] + }, + created_at: Time.current + } + end + + log_info('Bulk delete of: ', details: deleted_record_details) + + ::Geo::BatchEventCreateWorker.perform_async(events) + end end def handle_after_create_commit diff --git a/ee/app/services/ee/ci/job_artifacts/destroy_batch_service.rb b/ee/app/services/ee/ci/job_artifacts/destroy_batch_service.rb index 9f16372cd1ddb6..4593c4164cbd25 100644 --- a/ee/app/services/ee/ci/job_artifacts/destroy_batch_service.rb +++ b/ee/app/services/ee/ci/job_artifacts/destroy_batch_service.rb @@ -13,7 +13,7 @@ def destroy_around_hook(artifacts) return yield unless ::Geo::EventStore.can_create_event? ### Deprecated way of creating events. Should be removed by https://gitlab.com/gitlab-org/gitlab/-/issues/349056 - if ::Feature.disabled?(:geo_job_artifact_replication) + if ::Feature.disabled?(:geo_job_artifact_replication, default_enabled: :yaml) yield ::Gitlab::Database::QueryAnalyzers::PreventCrossDatabaseModification.allow_cross_database_modification_within_transaction(url: 'https://gitlab.com/gitlab-org/gitlab/-/issues/351849') do diff --git a/ee/db/geo/structure.sql b/ee/db/geo/structure.sql index 2f282457a58808..12c522bc792114 100644 --- a/ee/db/geo/structure.sql +++ b/ee/db/geo/structure.sql @@ -628,18 +628,18 @@ CREATE UNIQUE INDEX index_terraform_state_version_registry_on_t_state_version_id CREATE UNIQUE INDEX index_tf_state_versions_registry_tf_state_versions_id_unique ON terraform_state_version_registry USING btree (terraform_state_version_id); -CREATE INDEX lfs_object_registry_failed_verification ON lfs_object_registry USING btree (verification_retry_at NULLS FIRST) WHERE ((state = 2) AND (verification_state = 3)); - -CREATE INDEX lfs_object_registry_needs_verification ON lfs_object_registry USING btree (verification_state) WHERE ((state = 2) AND (verification_state = ANY (ARRAY[0, 3]))); - -CREATE INDEX lfs_object_registry_pending_verification ON lfs_object_registry USING btree (verified_at NULLS FIRST) WHERE ((state = 2) AND (verification_state = 0)); - CREATE INDEX job_artifact_registry_failed_verification ON job_artifact_registry USING btree (verification_retry_at NULLS FIRST) WHERE ((state = 2) AND (verification_state = 3)); CREATE INDEX job_artifact_registry_needs_verification ON job_artifact_registry USING btree (verification_state) WHERE ((state = 2) AND (verification_state = ANY (ARRAY[0, 3]))); CREATE INDEX job_artifact_registry_pending_verification ON job_artifact_registry USING btree (verified_at NULLS FIRST) WHERE ((state = 2) AND (verification_state = 0)); +CREATE INDEX lfs_object_registry_failed_verification ON lfs_object_registry USING btree (verification_retry_at NULLS FIRST) WHERE ((state = 2) AND (verification_state = 3)); + +CREATE INDEX lfs_object_registry_needs_verification ON lfs_object_registry USING btree (verification_state) WHERE ((state = 2) AND (verification_state = ANY (ARRAY[0, 3]))); + +CREATE INDEX lfs_object_registry_pending_verification ON lfs_object_registry USING btree (verified_at NULLS FIRST) WHERE ((state = 2) AND (verification_state = 0)); + CREATE INDEX merge_request_diff_registry_failed_verification ON merge_request_diff_registry USING btree (verification_retry_at NULLS FIRST) WHERE ((state = 2) AND (verification_state = 3)); CREATE INDEX merge_request_diff_registry_needs_verification ON merge_request_diff_registry USING btree (verification_state) WHERE ((state = 2) AND (verification_state = ANY (ARRAY[0, 3]))); diff --git a/ee/lib/gitlab/geo/replicator.rb b/ee/lib/gitlab/geo/replicator.rb index 9d9f8a3fa5795e..4cc10ec4d242fd 100644 --- a/ee/lib/gitlab/geo/replicator.rb +++ b/ee/lib/gitlab/geo/replicator.rb @@ -193,33 +193,11 @@ def self.verification_enabled? false end + # :nocov: def self.bulk_create_delete_events_async(deleted_records) - return unless deleted_records.any? - raise 'This method can only be called for a child class of Gitlab::Geo::Replicator' if replicable_name.nil? - - deleted_record_details = [] - - events = deleted_records.map do |record| - deleted_record_details << [replicable_name, record[:model_record_id], record[:blob_path]] - - raise 'model_record_id can not be nil' if record[:model_record_id].nil? - - { - replicable_name: replicable_name, - event_name: 'deleted', - payload: { - model_record_id: record[:model_record_id], - blob_path: record[:blob_path], - uploader_class: record[:uploader_class] - }, - created_at: Time.current - } - end - - log_info('Bulk delete of: ', details: deleted_record_details) - - ::Geo::BatchEventCreateWorker.perform_async(events) + raise NotImplementedError end + # :nocov: def self.bulk_create_events(events) ::Geo::EventLog.transaction do diff --git a/ee/spec/lib/gitlab/geo/replicator_spec.rb b/ee/spec/lib/gitlab/geo/replicator_spec.rb index 8c0d2651e5db80..5b9c1448bcc474 100644 --- a/ee/spec/lib/gitlab/geo/replicator_spec.rb +++ b/ee/spec/lib/gitlab/geo/replicator_spec.rb @@ -154,25 +154,6 @@ end end - describe '.bulk_create_delete_events_async' do - let(:uploads) { create_list(:upload, 2) } - let(:upload_deleted_details) { uploads.map { |upload| upload.replicator.deleted_params} } - let(:inherited_replicator_class) { ::Geo::UploadReplicator } - - it 'creates events', :sidekiq_inline do - expect { inherited_replicator_class.bulk_create_delete_events_async(upload_deleted_details) } - .to change { ::Geo::Event.count }.from(0).to(2) - - expect(::Geo::EventLog.last.event).to be_present - end - - it 'raises error when model_record_id is nil' do - expect do - inherited_replicator_class.bulk_create_delete_events_async([{}]) - end.to raise_error(RuntimeError, /model_record_id can not be nil/) - end - end - describe '#initialize' do subject(:replicator) { Geo::DummyReplicator.new(**args) } diff --git a/ee/spec/support/shared_examples/models/concerns/blob_replicator_strategy_shared_examples.rb b/ee/spec/support/shared_examples/models/concerns/blob_replicator_strategy_shared_examples.rb index 44196b6b53469e..8d6f1ab81781af 100644 --- a/ee/spec/support/shared_examples/models/concerns/blob_replicator_strategy_shared_examples.rb +++ b/ee/spec/support/shared_examples/models/concerns/blob_replicator_strategy_shared_examples.rb @@ -256,4 +256,23 @@ it { is_expected.to be_falsey } end end + + describe '.bulk_create_delete_events_async' do + let(:uploads) { create_list(:upload, 2) } + let(:upload_deleted_details) { uploads.map { |upload| upload.replicator.deleted_params} } + let(:inherited_replicator_class) { ::Geo::UploadReplicator } + + it 'creates events' do + expect { inherited_replicator_class.bulk_create_delete_events_async(upload_deleted_details) } + .to change { ::Geo::Event.count }.from(0).to(2) + + expect(::Geo::EventLog.last.event).to be_present + end + + it 'raises error when model_record_id is nil' do + expect do + inherited_replicator_class.bulk_create_delete_events_async([{}]) + end.to raise_error(RuntimeError, /model_record_id can not be nil/) + end + end end -- GitLab