From 276f62a2be1d2205256157040777eaa9f44a0d58 Mon Sep 17 00:00:00 2001 From: Dylan Griffith Date: Wed, 5 Jul 2023 16:41:48 +1000 Subject: [PATCH 01/38] WIP: Async batch processing of deleted git refs --- app/models/ci/persistent_ref.rb | 8 ++- app/models/ci/pipeline.rb | 4 +- app/models/merge_request.rb | 15 ++++-- app/models/project.rb | 2 + app/models/to_be_deleted_git_ref.rb | 38 ++++++++++++++ .../delete_gitaly_refs_in_batches.yml | 8 +++ db/docs/p_to_be_deleted_git_refs.yml | 11 ++++ ...431_create_table_to_be_deleted_git_refs.rb | 28 +++++++++++ ...onstraint_to_to_be_deleted_git_refs_ref.rb | 15 ++++++ ...042842_partition_to_be_deleted_git_refs.rb | 30 +++++++++++ ...ble_to_be_deleted_git_refs_to_partition.rb | 32 ++++++++++++ db/schema_migrations/20230704233431 | 1 + db/schema_migrations/20230704235236 | 1 + db/schema_migrations/20230705042842 | 1 + db/schema_migrations/20230705062237 | 1 + db/structure.sql | 50 +++++++++++++++++++ lib/gitlab/git/repository.rb | 10 ++++ 17 files changed, 250 insertions(+), 5 deletions(-) create mode 100644 app/models/to_be_deleted_git_ref.rb create mode 100644 config/feature_flags/development/delete_gitaly_refs_in_batches.yml create mode 100644 db/docs/p_to_be_deleted_git_refs.yml create mode 100644 db/migrate/20230704233431_create_table_to_be_deleted_git_refs.rb create mode 100644 db/migrate/20230704235236_add_constraint_to_to_be_deleted_git_refs_ref.rb create mode 100644 db/migrate/20230705042842_partition_to_be_deleted_git_refs.rb create mode 100644 db/migrate/20230705062237_convert_table_to_be_deleted_git_refs_to_partition.rb create mode 100644 db/schema_migrations/20230704233431 create mode 100644 db/schema_migrations/20230704235236 create mode 100644 db/schema_migrations/20230705042842 create mode 100644 db/schema_migrations/20230705062237 diff --git a/app/models/ci/persistent_ref.rb b/app/models/ci/persistent_ref.rb index f713d5952bcc54..57e2d943a4cbee 100644 --- a/app/models/ci/persistent_ref.rb +++ b/app/models/ci/persistent_ref.rb @@ -11,7 +11,7 @@ class PersistentRef delegate :project, :sha, to: :pipeline delegate :repository, to: :project - delegate :ref_exists?, :create_ref, :delete_refs, to: :repository + delegate :ref_exists?, :create_ref, :delete_refs, :async_delete_refs, to: :repository def exist? ref_exists?(path) @@ -42,6 +42,12 @@ def delete .track_exception(e, pipeline_id: pipeline.id) end + def async_delete + return unless should_delete? + + async_delete_refs(path) + end + def path "refs/#{Repository::REF_PIPELINES}/#{pipeline.id}" end diff --git a/app/models/ci/pipeline.rb b/app/models/ci/pipeline.rb index 97efccf7160cdc..30005fa28974f6 100644 --- a/app/models/ci/pipeline.rb +++ b/app/models/ci/pipeline.rb @@ -342,7 +342,9 @@ class Pipeline < Ci::ApplicationRecord # This needs to be kept in sync with `Ci::PipelineRef#should_delete?` after_transition any => ::Ci::Pipeline.stopped_statuses do |pipeline| pipeline.run_after_commit do - if Feature.enabled?(:pipeline_cleanup_ref_worker_async, pipeline.project) + if Feature.enabled?(:delete_gitaly_refs_in_batches, pipeline.project) + pipeline.persistent_ref.async_delete + elsif Feature.enabled?(:pipeline_cleanup_ref_worker_async, pipeline.project) ::Ci::PipelineCleanupRefWorker.perform_async(pipeline.id) else pipeline.persistent_ref.delete diff --git a/app/models/merge_request.rb b/app/models/merge_request.rb index 2773569161d9b5..37c18ac9f0a1f8 100644 --- a/app/models/merge_request.rb +++ b/app/models/merge_request.rb @@ -1537,20 +1537,29 @@ def train_ref_path end def schedule_cleanup_refs(only: :all) - if Feature.enabled?(:merge_request_cleanup_ref_worker_async, target_project) + if Feature.enabled?(:delete_gitaly_refs_in_batches, target_project) + async_cleanup_refs(only: only) + elsif Feature.enabled?(:merge_request_cleanup_ref_worker_async, target_project) MergeRequests::CleanupRefWorker.perform_async(id, only.to_s) else cleanup_refs(only: only) end end - def cleanup_refs(only: :all) + def refs_to_cleanup(only: :all) target_refs = [] target_refs << ref_path if %i[all head].include?(only) target_refs << merge_ref_path if %i[all merge].include?(only) target_refs << train_ref_path if %i[all train].include?(only) + target_refs + end + + def cleanup_refs(only: :all) + project.repository.delete_refs(*refs_to_cleanup(only: only)) + end - project.repository.delete_refs(*target_refs) + def async_cleanup_refs(only: :all) + project.repository.async_delete_refs(*refs_to_cleanup(only: only)) end def self.merge_request_ref?(ref) diff --git a/app/models/project.rb b/app/models/project.rb index d3642e7d7d032e..3acb554055bc91 100644 --- a/app/models/project.rb +++ b/app/models/project.rb @@ -456,6 +456,8 @@ def self.integration_association_name(name) has_one :ci_project_mirror, class_name: 'Ci::ProjectMirror' has_many :sync_events, class_name: 'Projects::SyncEvent' + has_many :to_be_deleted_git_refs, inverse_of: :project + has_one :build_artifacts_size_refresh, class_name: 'Projects::BuildArtifactsSizeRefresh' accepts_nested_attributes_for :variables, allow_destroy: true diff --git a/app/models/to_be_deleted_git_ref.rb b/app/models/to_be_deleted_git_ref.rb new file mode 100644 index 00000000000000..62da001fdee399 --- /dev/null +++ b/app/models/to_be_deleted_git_ref.rb @@ -0,0 +1,38 @@ +# frozen_string_literal: true + +class ToBeDeletedGitRef < ApplicationRecord + PARTITION_DURATION = 1.day + + include BulkInsertSafe + include PartitionedTable + + self.table_name = 'p_to_be_deleted_git_refs' + self.primary_key = :id + self.ignored_columns = %i[partition_id] + self.sequence_name = :to_be_deleted_git_refs_id_seq + + belongs_to :project, inverse_of: :to_be_deleted_git_refs + + scope :for_partition, -> (partition) { where(partition_id: partition) } + + partitioned_by :partition_id, strategy: :sliding_list, + next_partition_if: -> (active_partition) do + oldest_record_in_partition = ToBeDeletedGitRef + .select(:id, :created_at) + .for_partition(active_partition.value) + .order(:id) + .limit(1) + .take + + oldest_record_in_partition.present? && + oldest_record_in_partition.created_at < PARTITION_DURATION.ago + end, + detach_partition_if: -> (partition) do + !ToBeDeletedGitRef + .for_partition(partition.value) + .status_pending + .exists? + end + + enum status: { pending: 1, processed: 2 }, _prefix: :status +end diff --git a/config/feature_flags/development/delete_gitaly_refs_in_batches.yml b/config/feature_flags/development/delete_gitaly_refs_in_batches.yml new file mode 100644 index 00000000000000..8ff93e4ee1c274 --- /dev/null +++ b/config/feature_flags/development/delete_gitaly_refs_in_batches.yml @@ -0,0 +1,8 @@ +--- +name: delete_gitaly_refs_in_batches +introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/125333 +rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/416969 +milestone: '16.2' +type: development +group: group::gitaly +default_enabled: false diff --git a/db/docs/p_to_be_deleted_git_refs.yml b/db/docs/p_to_be_deleted_git_refs.yml new file mode 100644 index 00000000000000..bdcaff7e88825f --- /dev/null +++ b/db/docs/p_to_be_deleted_git_refs.yml @@ -0,0 +1,11 @@ +--- +table_name: p_to_be_deleted_git_refs +classes: +- ToBeDeletedGitRef +feature_categories: +- gitaly +description: Acts as a queue for refs that need to be deleted in Gitaly. This allows + us to batch deletes rather than sending them one at a time. +introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/125333 +milestone: '16.2' +gitlab_schema: gitlab_main diff --git a/db/migrate/20230704233431_create_table_to_be_deleted_git_refs.rb b/db/migrate/20230704233431_create_table_to_be_deleted_git_refs.rb new file mode 100644 index 00000000000000..76fdd79dee7ff2 --- /dev/null +++ b/db/migrate/20230704233431_create_table_to_be_deleted_git_refs.rb @@ -0,0 +1,28 @@ +# frozen_string_literal: true + +class CreateTableToBeDeletedGitRefs < Gitlab::Database::Migration[2.1] + enable_lock_retries! + + def up + create_table :to_be_deleted_git_refs, id: false do |t| + t.bigint :id, null: false + # TODO: Is it worth making this generic. Repository model uses `#container` so we could support (presumably) group + # wikis and so-on here. Does it matter? Or should we just build that later since our main focus on performance is + # for projects. + t.bigint :project, index: true, null: false # Do not bother with foreign key as it provides not benefit and has a performance cost. These get cleaned up over time anyway. + t.text :ref, null: false + t.bigint :partition_id, null: false, index: true, default: 1 + t.integer :status, null: false, index: true, default: 1, limit: 2 + + t.timestamps null: false + + t.index [:id, :partition_id], unique: true, name: :index_to_be_deleted_git_refs_on_id_and_partition_id + end + + execute "ALTER TABLE to_be_deleted_git_refs ADD CONSTRAINT to_be_deleted_git_refs_pkey PRIMARY KEY USING INDEX index_to_be_deleted_git_refs_on_id_and_partition_id" + end + + def down + drop_table :to_be_deleted_git_refs + end +end diff --git a/db/migrate/20230704235236_add_constraint_to_to_be_deleted_git_refs_ref.rb b/db/migrate/20230704235236_add_constraint_to_to_be_deleted_git_refs_ref.rb new file mode 100644 index 00000000000000..c410094361e718 --- /dev/null +++ b/db/migrate/20230704235236_add_constraint_to_to_be_deleted_git_refs_ref.rb @@ -0,0 +1,15 @@ +# frozen_string_literal: true + + +class AddConstraintToToBeDeletedGitRefsRef < Gitlab::Database::Migration[2.1] + disable_ddl_transaction! + + def up + # TODO: Is there a reliable upper limit here? If something exceeds this upper limit we won't be able to delete it. Should we make it very large or just not have a limit? + add_text_limit :to_be_deleted_git_refs, :ref, 1024 + end + + def down + remove_text_limit :to_be_deleted_git_refs, :ref + end +end diff --git a/db/migrate/20230705042842_partition_to_be_deleted_git_refs.rb b/db/migrate/20230705042842_partition_to_be_deleted_git_refs.rb new file mode 100644 index 00000000000000..b73395638493ff --- /dev/null +++ b/db/migrate/20230705042842_partition_to_be_deleted_git_refs.rb @@ -0,0 +1,30 @@ +# frozen_string_literal: true + +class PartitionToBeDeletedGitRefs < Gitlab::Database::Migration[2.1] + include Gitlab::Database::PartitioningMigrationHelpers::TableManagementHelpers + + disable_ddl_transaction! + + TABLE_NAME = :to_be_deleted_git_refs + PARENT_TABLE_NAME = :p_to_be_deleted_git_refs + FIRST_PARTITION = 1 + PARTITION_COLUMN = :partition_id + + def up + prepare_constraint_for_list_partitioning( + table_name: TABLE_NAME, + partitioning_column: PARTITION_COLUMN, + parent_table_name: PARENT_TABLE_NAME, + initial_partitioning_value: FIRST_PARTITION + ) + end + + def down + revert_preparing_constraint_for_list_partitioning( + table_name: TABLE_NAME, + partitioning_column: PARTITION_COLUMN, + parent_table_name: PARENT_TABLE_NAME, + initial_partitioning_value: FIRST_PARTITION + ) + end +end diff --git a/db/migrate/20230705062237_convert_table_to_be_deleted_git_refs_to_partition.rb b/db/migrate/20230705062237_convert_table_to_be_deleted_git_refs_to_partition.rb new file mode 100644 index 00000000000000..3c27d057294cdc --- /dev/null +++ b/db/migrate/20230705062237_convert_table_to_be_deleted_git_refs_to_partition.rb @@ -0,0 +1,32 @@ +# frozen_string_literal: true + + +class ConvertTableToBeDeletedGitRefsToPartition < Gitlab::Database::Migration[2.1] + include Gitlab::Database::PartitioningMigrationHelpers::TableManagementHelpers + + disable_ddl_transaction! + + TABLE_NAME = :to_be_deleted_git_refs + PARENT_TABLE_NAME = :p_to_be_deleted_git_refs + FIRST_PARTITION = 1 + PARTITION_COLUMN = :partition_id + + def up + convert_table_to_first_list_partition( + table_name: TABLE_NAME, + partitioning_column: PARTITION_COLUMN, + parent_table_name: PARENT_TABLE_NAME, + initial_partitioning_value: FIRST_PARTITION, + lock_tables: [TABLE_NAME] + ) + end + + def down + revert_converting_table_to_first_list_partition( + table_name: TABLE_NAME, + partitioning_column: PARTITION_COLUMN, + parent_table_name: PARENT_TABLE_NAME, + initial_partitioning_value: FIRST_PARTITION + ) + end +end diff --git a/db/schema_migrations/20230704233431 b/db/schema_migrations/20230704233431 new file mode 100644 index 00000000000000..7b0af2a2a3ffb6 --- /dev/null +++ b/db/schema_migrations/20230704233431 @@ -0,0 +1 @@ +d07a8cdd97cc221d64257fdcab34c3e44593f6c3a430523a2e73c96fa12c8ba1 \ No newline at end of file diff --git a/db/schema_migrations/20230704235236 b/db/schema_migrations/20230704235236 new file mode 100644 index 00000000000000..8908154d59c619 --- /dev/null +++ b/db/schema_migrations/20230704235236 @@ -0,0 +1 @@ +f9a1d78f52f5949acd102320590ca4270747492965e3e92cc00c27ad4a9e1677 \ No newline at end of file diff --git a/db/schema_migrations/20230705042842 b/db/schema_migrations/20230705042842 new file mode 100644 index 00000000000000..e7fdba22afc5b9 --- /dev/null +++ b/db/schema_migrations/20230705042842 @@ -0,0 +1 @@ +db9e41402001c9acf2285b5937e8ffd9c8020bebf48753e7cc7bf2ea0a3e3e55 \ No newline at end of file diff --git a/db/schema_migrations/20230705062237 b/db/schema_migrations/20230705062237 new file mode 100644 index 00000000000000..f61f048cfaadef --- /dev/null +++ b/db/schema_migrations/20230705062237 @@ -0,0 +1 @@ +19a6eb559d647964753203a5e3b41198fbb681c2b1148fe8312024e64134b04a \ No newline at end of file diff --git a/db/structure.sql b/db/structure.sql index fc0b145ad3823f..52b12f8a5ba521 100644 --- a/db/structure.sql +++ b/db/structure.sql @@ -19446,6 +19446,18 @@ CREATE SEQUENCE p_ci_job_annotations_id_seq ALTER SEQUENCE p_ci_job_annotations_id_seq OWNED BY p_ci_job_annotations.id; +CREATE TABLE p_to_be_deleted_git_refs ( + id bigint NOT NULL, + project bigint NOT NULL, + ref text NOT NULL, + partition_id bigint DEFAULT 1 NOT NULL, + status smallint DEFAULT 1 NOT NULL, + created_at timestamp(6) without time zone NOT NULL, + updated_at timestamp(6) without time zone NOT NULL, + CONSTRAINT check_ca4b0f9d0f CHECK ((char_length(ref) <= 1024)) +) +PARTITION BY LIST (partition_id); + CREATE TABLE packages_build_infos ( id bigint NOT NULL, package_id integer NOT NULL, @@ -23503,6 +23515,18 @@ CREATE SEQUENCE timelogs_id_seq ALTER SEQUENCE timelogs_id_seq OWNED BY timelogs.id; +CREATE TABLE to_be_deleted_git_refs ( + id bigint NOT NULL, + project bigint NOT NULL, + ref text NOT NULL, + partition_id bigint DEFAULT 1 NOT NULL, + status smallint DEFAULT 1 NOT NULL, + created_at timestamp(6) without time zone NOT NULL, + updated_at timestamp(6) without time zone NOT NULL, + CONSTRAINT check_ca4b0f9d0f CHECK ((char_length(ref) <= 1024)) +); +ALTER TABLE ONLY p_to_be_deleted_git_refs ATTACH PARTITION to_be_deleted_git_refs FOR VALUES IN ('1'); + CREATE TABLE todos ( id integer NOT NULL, user_id integer NOT NULL, @@ -28051,6 +28075,9 @@ ALTER TABLE ONLY p_ci_job_annotations ALTER TABLE ONLY p_ci_runner_machine_builds ADD CONSTRAINT p_ci_runner_machine_builds_pkey PRIMARY KEY (build_id, partition_id); +ALTER TABLE ONLY p_to_be_deleted_git_refs + ADD CONSTRAINT p_to_be_deleted_git_refs_pkey PRIMARY KEY (id, partition_id); + ALTER TABLE ONLY packages_build_infos ADD CONSTRAINT packages_build_infos_pkey PRIMARY KEY (id); @@ -28612,6 +28639,9 @@ ALTER TABLE ONLY timelog_categories ALTER TABLE ONLY timelogs ADD CONSTRAINT timelogs_pkey PRIMARY KEY (id); +ALTER TABLE ONLY to_be_deleted_git_refs + ADD CONSTRAINT to_be_deleted_git_refs_pkey PRIMARY KEY (id, partition_id); + ALTER TABLE ONLY todos ADD CONSTRAINT todos_pkey PRIMARY KEY (id); @@ -33240,6 +33270,18 @@ CREATE INDEX index_timelogs_on_spent_at ON timelogs USING btree (spent_at) WHERE CREATE INDEX index_timelogs_on_user_id ON timelogs USING btree (user_id); +CREATE INDEX p_to_be_deleted_git_refs_partition_id_idx ON ONLY p_to_be_deleted_git_refs USING btree (partition_id); + +CREATE INDEX index_to_be_deleted_git_refs_on_partition_id ON to_be_deleted_git_refs USING btree (partition_id); + +CREATE INDEX p_to_be_deleted_git_refs_project_idx ON ONLY p_to_be_deleted_git_refs USING btree (project); + +CREATE INDEX index_to_be_deleted_git_refs_on_project ON to_be_deleted_git_refs USING btree (project); + +CREATE INDEX p_to_be_deleted_git_refs_status_idx ON ONLY p_to_be_deleted_git_refs USING btree (status); + +CREATE INDEX index_to_be_deleted_git_refs_on_status ON to_be_deleted_git_refs USING btree (status); + CREATE INDEX index_todos_on_author_id ON todos USING btree (author_id); CREATE INDEX index_todos_on_author_id_and_created_at ON todos USING btree (author_id, created_at); @@ -35350,8 +35392,16 @@ ALTER INDEX p_ci_builds_user_id_name_created_at_idx ATTACH PARTITION index_secur ALTER INDEX p_ci_builds_name_id_idx ATTACH PARTITION index_security_ci_builds_on_name_and_id_parser_features; +ALTER INDEX p_to_be_deleted_git_refs_partition_id_idx ATTACH PARTITION index_to_be_deleted_git_refs_on_partition_id; + +ALTER INDEX p_to_be_deleted_git_refs_project_idx ATTACH PARTITION index_to_be_deleted_git_refs_on_project; + +ALTER INDEX p_to_be_deleted_git_refs_status_idx ATTACH PARTITION index_to_be_deleted_git_refs_on_status; + ALTER INDEX p_ci_builds_scheduled_at_idx ATTACH PARTITION partial_index_ci_builds_on_scheduled_at_with_scheduled_jobs; +ALTER INDEX p_to_be_deleted_git_refs_pkey ATTACH PARTITION to_be_deleted_git_refs_pkey; + ALTER INDEX p_ci_builds_token_encrypted_partition_id_idx ATTACH PARTITION unique_ci_builds_token_encrypted_and_partition_id; CREATE TRIGGER chat_names_loose_fk_trigger AFTER DELETE ON chat_names REFERENCING OLD TABLE AS old_table FOR EACH STATEMENT EXECUTE FUNCTION insert_into_loose_foreign_keys_deleted_records(); diff --git a/lib/gitlab/git/repository.rb b/lib/gitlab/git/repository.rb index cb95af69b00d43..07f3d24aaec137 100644 --- a/lib/gitlab/git/repository.rb +++ b/lib/gitlab/git/repository.rb @@ -750,6 +750,16 @@ def delete_branch(branch_name) raise DeleteBranchError, e end + def async_delete_refs(*refs) + records = refs.map do |ref| + project = @gl_repository.project + raise "async_delete_refs only supports project repositories" unless project + ToBeDeledGitRef.new(project_id: project.id, ref: ref) + end + + ToBeDeletedGitRef.bulk_insert!(records) + end + def delete_refs(...) wrapped_gitaly_errors do gitaly_delete_refs(...) -- GitLab From abf47a9fefb0ba18ae88910db502e70fa4cb4ec5 Mon Sep 17 00:00:00 2001 From: Dylan Griffith Date: Wed, 5 Jul 2023 19:00:18 +1000 Subject: [PATCH 02/38] WIP --- .../20230704233431_create_table_to_be_deleted_git_refs.rb | 4 +++- db/structure.sql | 4 ++-- lib/gitlab/git/repository.rb | 8 +++++--- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/db/migrate/20230704233431_create_table_to_be_deleted_git_refs.rb b/db/migrate/20230704233431_create_table_to_be_deleted_git_refs.rb index 76fdd79dee7ff2..c15bdd708b9a21 100644 --- a/db/migrate/20230704233431_create_table_to_be_deleted_git_refs.rb +++ b/db/migrate/20230704233431_create_table_to_be_deleted_git_refs.rb @@ -5,11 +5,13 @@ class CreateTableToBeDeletedGitRefs < Gitlab::Database::Migration[2.1] def up create_table :to_be_deleted_git_refs, id: false do |t| + # TODO: Need to set this to default to sequence value. Maybe just create t.primary_key and then do the swap in a + # later MR t.bigint :id, null: false # TODO: Is it worth making this generic. Repository model uses `#container` so we could support (presumably) group # wikis and so-on here. Does it matter? Or should we just build that later since our main focus on performance is # for projects. - t.bigint :project, index: true, null: false # Do not bother with foreign key as it provides not benefit and has a performance cost. These get cleaned up over time anyway. + t.bigint :project_id, index: true, null: false # Do not bother with foreign key as it provides not benefit and has a performance cost. These get cleaned up over time anyway. t.text :ref, null: false t.bigint :partition_id, null: false, index: true, default: 1 t.integer :status, null: false, index: true, default: 1, limit: 2 diff --git a/db/structure.sql b/db/structure.sql index 52b12f8a5ba521..5e6e94e4c62a81 100644 --- a/db/structure.sql +++ b/db/structure.sql @@ -23517,7 +23517,7 @@ ALTER SEQUENCE timelogs_id_seq OWNED BY timelogs.id; CREATE TABLE to_be_deleted_git_refs ( id bigint NOT NULL, - project bigint NOT NULL, + project_id bigint NOT NULL, ref text NOT NULL, partition_id bigint DEFAULT 1 NOT NULL, status smallint DEFAULT 1 NOT NULL, @@ -33276,7 +33276,7 @@ CREATE INDEX index_to_be_deleted_git_refs_on_partition_id ON to_be_deleted_git_r CREATE INDEX p_to_be_deleted_git_refs_project_idx ON ONLY p_to_be_deleted_git_refs USING btree (project); -CREATE INDEX index_to_be_deleted_git_refs_on_project ON to_be_deleted_git_refs USING btree (project); +CREATE INDEX index_to_be_deleted_git_refs_on_project ON to_be_deleted_git_refs USING btree (project_id); CREATE INDEX p_to_be_deleted_git_refs_status_idx ON ONLY p_to_be_deleted_git_refs USING btree (status); diff --git a/lib/gitlab/git/repository.rb b/lib/gitlab/git/repository.rb index 07f3d24aaec137..9a83fc4983e756 100644 --- a/lib/gitlab/git/repository.rb +++ b/lib/gitlab/git/repository.rb @@ -751,10 +751,12 @@ def delete_branch(branch_name) end def async_delete_refs(*refs) + # TODO: Should we just try and support all containers? It would probably require an extra column for + # container_type and then we'd need to group by container_id, container_type when batch deleting. + raise "async_delete_refs only supports project repositories" unless container.is_a?(Project) + records = refs.map do |ref| - project = @gl_repository.project - raise "async_delete_refs only supports project repositories" unless project - ToBeDeledGitRef.new(project_id: project.id, ref: ref) + ToBeDeletedGitRef.new(project_id: container.id, ref: ref) end ToBeDeletedGitRef.bulk_insert!(records) -- GitLab From 2638f098f985b001584eda797676c6bf4e5d2f16 Mon Sep 17 00:00:00 2001 From: Dylan Griffith Date: Thu, 6 Jul 2023 14:31:42 +1000 Subject: [PATCH 03/38] Fix migrations to create p_to_be_deleted_git_refs --- ...431_create_table_to_be_deleted_git_refs.rb | 26 +++++-- ...onstraint_to_to_be_deleted_git_refs_ref.rb | 4 +- ...042842_partition_to_be_deleted_git_refs.rb | 30 -------- ...ble_to_be_deleted_git_refs_to_partition.rb | 32 -------- db/schema_migrations/20230705042842 | 1 - db/schema_migrations/20230705062237 | 1 - db/structure.sql | 74 +++++++------------ 7 files changed, 48 insertions(+), 120 deletions(-) delete mode 100644 db/migrate/20230705042842_partition_to_be_deleted_git_refs.rb delete mode 100644 db/migrate/20230705062237_convert_table_to_be_deleted_git_refs_to_partition.rb delete mode 100644 db/schema_migrations/20230705042842 delete mode 100644 db/schema_migrations/20230705062237 diff --git a/db/migrate/20230704233431_create_table_to_be_deleted_git_refs.rb b/db/migrate/20230704233431_create_table_to_be_deleted_git_refs.rb index c15bdd708b9a21..fafe15669c6b28 100644 --- a/db/migrate/20230704233431_create_table_to_be_deleted_git_refs.rb +++ b/db/migrate/20230704233431_create_table_to_be_deleted_git_refs.rb @@ -4,10 +4,14 @@ class CreateTableToBeDeletedGitRefs < Gitlab::Database::Migration[2.1] enable_lock_retries! def up - create_table :to_be_deleted_git_refs, id: false do |t| - # TODO: Need to set this to default to sequence value. Maybe just create t.primary_key and then do the swap in a - # later MR - t.bigint :id, null: false + options = { + primary_key: [:id, :partition_id], + options: 'PARTITION BY LIST (partition_id)', + if_not_exists: true + } + + create_table(:p_to_be_deleted_git_refs, **options) do |t| + t.bigserial :id, null: false # TODO: Is it worth making this generic. Repository model uses `#container` so we could support (presumably) group # wikis and so-on here. Does it matter? Or should we just build that later since our main focus on performance is # for projects. @@ -17,14 +21,20 @@ def up t.integer :status, null: false, index: true, default: 1, limit: 2 t.timestamps null: false - - t.index [:id, :partition_id], unique: true, name: :index_to_be_deleted_git_refs_on_id_and_partition_id end - execute "ALTER TABLE to_be_deleted_git_refs ADD CONSTRAINT to_be_deleted_git_refs_pkey PRIMARY KEY USING INDEX index_to_be_deleted_git_refs_on_id_and_partition_id" + connection.execute(<<~SQL) + CREATE TABLE IF NOT EXISTS gitlab_partitions_dynamic.to_be_deleted_git_refs_1 + PARTITION OF p_to_be_deleted_git_refs + FOR VALUES IN (1); + SQL end def down - drop_table :to_be_deleted_git_refs + connection.execute(<<~SQL) + DROP TABLE IF EXISTS gitlab_partitions_dynamic.to_be_deleted_git_refs_1; + SQL + + drop_table :p_to_be_deleted_git_refs end end diff --git a/db/migrate/20230704235236_add_constraint_to_to_be_deleted_git_refs_ref.rb b/db/migrate/20230704235236_add_constraint_to_to_be_deleted_git_refs_ref.rb index c410094361e718..746e35037366ab 100644 --- a/db/migrate/20230704235236_add_constraint_to_to_be_deleted_git_refs_ref.rb +++ b/db/migrate/20230704235236_add_constraint_to_to_be_deleted_git_refs_ref.rb @@ -6,10 +6,10 @@ class AddConstraintToToBeDeletedGitRefsRef < Gitlab::Database::Migration[2.1] def up # TODO: Is there a reliable upper limit here? If something exceeds this upper limit we won't be able to delete it. Should we make it very large or just not have a limit? - add_text_limit :to_be_deleted_git_refs, :ref, 1024 + add_text_limit :p_to_be_deleted_git_refs, :ref, 1024 end def down - remove_text_limit :to_be_deleted_git_refs, :ref + remove_text_limit :p_to_be_deleted_git_refs, :ref end end diff --git a/db/migrate/20230705042842_partition_to_be_deleted_git_refs.rb b/db/migrate/20230705042842_partition_to_be_deleted_git_refs.rb deleted file mode 100644 index b73395638493ff..00000000000000 --- a/db/migrate/20230705042842_partition_to_be_deleted_git_refs.rb +++ /dev/null @@ -1,30 +0,0 @@ -# frozen_string_literal: true - -class PartitionToBeDeletedGitRefs < Gitlab::Database::Migration[2.1] - include Gitlab::Database::PartitioningMigrationHelpers::TableManagementHelpers - - disable_ddl_transaction! - - TABLE_NAME = :to_be_deleted_git_refs - PARENT_TABLE_NAME = :p_to_be_deleted_git_refs - FIRST_PARTITION = 1 - PARTITION_COLUMN = :partition_id - - def up - prepare_constraint_for_list_partitioning( - table_name: TABLE_NAME, - partitioning_column: PARTITION_COLUMN, - parent_table_name: PARENT_TABLE_NAME, - initial_partitioning_value: FIRST_PARTITION - ) - end - - def down - revert_preparing_constraint_for_list_partitioning( - table_name: TABLE_NAME, - partitioning_column: PARTITION_COLUMN, - parent_table_name: PARENT_TABLE_NAME, - initial_partitioning_value: FIRST_PARTITION - ) - end -end diff --git a/db/migrate/20230705062237_convert_table_to_be_deleted_git_refs_to_partition.rb b/db/migrate/20230705062237_convert_table_to_be_deleted_git_refs_to_partition.rb deleted file mode 100644 index 3c27d057294cdc..00000000000000 --- a/db/migrate/20230705062237_convert_table_to_be_deleted_git_refs_to_partition.rb +++ /dev/null @@ -1,32 +0,0 @@ -# frozen_string_literal: true - - -class ConvertTableToBeDeletedGitRefsToPartition < Gitlab::Database::Migration[2.1] - include Gitlab::Database::PartitioningMigrationHelpers::TableManagementHelpers - - disable_ddl_transaction! - - TABLE_NAME = :to_be_deleted_git_refs - PARENT_TABLE_NAME = :p_to_be_deleted_git_refs - FIRST_PARTITION = 1 - PARTITION_COLUMN = :partition_id - - def up - convert_table_to_first_list_partition( - table_name: TABLE_NAME, - partitioning_column: PARTITION_COLUMN, - parent_table_name: PARENT_TABLE_NAME, - initial_partitioning_value: FIRST_PARTITION, - lock_tables: [TABLE_NAME] - ) - end - - def down - revert_converting_table_to_first_list_partition( - table_name: TABLE_NAME, - partitioning_column: PARTITION_COLUMN, - parent_table_name: PARENT_TABLE_NAME, - initial_partitioning_value: FIRST_PARTITION - ) - end -end diff --git a/db/schema_migrations/20230705042842 b/db/schema_migrations/20230705042842 deleted file mode 100644 index e7fdba22afc5b9..00000000000000 --- a/db/schema_migrations/20230705042842 +++ /dev/null @@ -1 +0,0 @@ -db9e41402001c9acf2285b5937e8ffd9c8020bebf48753e7cc7bf2ea0a3e3e55 \ No newline at end of file diff --git a/db/schema_migrations/20230705062237 b/db/schema_migrations/20230705062237 deleted file mode 100644 index f61f048cfaadef..00000000000000 --- a/db/schema_migrations/20230705062237 +++ /dev/null @@ -1 +0,0 @@ -19a6eb559d647964753203a5e3b41198fbb681c2b1148fe8312024e64134b04a \ No newline at end of file diff --git a/db/structure.sql b/db/structure.sql index 5e6e94e4c62a81..80b886f87d8514 100644 --- a/db/structure.sql +++ b/db/structure.sql @@ -554,6 +554,18 @@ CREATE TABLE security_findings ( ) PARTITION BY LIST (partition_number); +CREATE TABLE p_to_be_deleted_git_refs ( + id bigint NOT NULL, + project_id bigint NOT NULL, + ref text NOT NULL, + partition_id bigint DEFAULT 1 NOT NULL, + status smallint DEFAULT 1 NOT NULL, + created_at timestamp(6) without time zone NOT NULL, + updated_at timestamp(6) without time zone NOT NULL, + CONSTRAINT check_1310a6c4df CHECK ((char_length(ref) <= 1024)) +) +PARTITION BY LIST (partition_id); + CREATE TABLE value_stream_dashboard_counts ( id bigint NOT NULL, namespace_id bigint NOT NULL, @@ -19446,17 +19458,14 @@ CREATE SEQUENCE p_ci_job_annotations_id_seq ALTER SEQUENCE p_ci_job_annotations_id_seq OWNED BY p_ci_job_annotations.id; -CREATE TABLE p_to_be_deleted_git_refs ( - id bigint NOT NULL, - project bigint NOT NULL, - ref text NOT NULL, - partition_id bigint DEFAULT 1 NOT NULL, - status smallint DEFAULT 1 NOT NULL, - created_at timestamp(6) without time zone NOT NULL, - updated_at timestamp(6) without time zone NOT NULL, - CONSTRAINT check_ca4b0f9d0f CHECK ((char_length(ref) <= 1024)) -) -PARTITION BY LIST (partition_id); +CREATE SEQUENCE p_to_be_deleted_git_refs_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + +ALTER SEQUENCE p_to_be_deleted_git_refs_id_seq OWNED BY p_to_be_deleted_git_refs.id; CREATE TABLE packages_build_infos ( id bigint NOT NULL, @@ -23515,18 +23524,6 @@ CREATE SEQUENCE timelogs_id_seq ALTER SEQUENCE timelogs_id_seq OWNED BY timelogs.id; -CREATE TABLE to_be_deleted_git_refs ( - id bigint NOT NULL, - project_id bigint NOT NULL, - ref text NOT NULL, - partition_id bigint DEFAULT 1 NOT NULL, - status smallint DEFAULT 1 NOT NULL, - created_at timestamp(6) without time zone NOT NULL, - updated_at timestamp(6) without time zone NOT NULL, - CONSTRAINT check_ca4b0f9d0f CHECK ((char_length(ref) <= 1024)) -); -ALTER TABLE ONLY p_to_be_deleted_git_refs ATTACH PARTITION to_be_deleted_git_refs FOR VALUES IN ('1'); - CREATE TABLE todos ( id integer NOT NULL, user_id integer NOT NULL, @@ -25792,6 +25789,8 @@ ALTER TABLE ONLY p_ci_builds_metadata ALTER COLUMN id SET DEFAULT nextval('ci_bu ALTER TABLE ONLY p_ci_job_annotations ALTER COLUMN id SET DEFAULT nextval('p_ci_job_annotations_id_seq'::regclass); +ALTER TABLE ONLY p_to_be_deleted_git_refs ALTER COLUMN id SET DEFAULT nextval('p_to_be_deleted_git_refs_id_seq'::regclass); + ALTER TABLE ONLY packages_build_infos ALTER COLUMN id SET DEFAULT nextval('packages_build_infos_id_seq'::regclass); ALTER TABLE ONLY packages_composer_cache_files ALTER COLUMN id SET DEFAULT nextval('packages_composer_cache_files_id_seq'::regclass); @@ -28639,9 +28638,6 @@ ALTER TABLE ONLY timelog_categories ALTER TABLE ONLY timelogs ADD CONSTRAINT timelogs_pkey PRIMARY KEY (id); -ALTER TABLE ONLY to_be_deleted_git_refs - ADD CONSTRAINT to_be_deleted_git_refs_pkey PRIMARY KEY (id, partition_id); - ALTER TABLE ONLY todos ADD CONSTRAINT todos_pkey PRIMARY KEY (id); @@ -32364,6 +32360,12 @@ CREATE UNIQUE INDEX index_p_ci_job_annotations_on_partition_id_job_id_name ON ON CREATE INDEX index_p_ci_runner_machine_builds_on_runner_machine_id ON ONLY p_ci_runner_machine_builds USING btree (runner_machine_id); +CREATE INDEX index_p_to_be_deleted_git_refs_on_partition_id ON ONLY p_to_be_deleted_git_refs USING btree (partition_id); + +CREATE INDEX index_p_to_be_deleted_git_refs_on_project_id ON ONLY p_to_be_deleted_git_refs USING btree (project_id); + +CREATE INDEX index_p_to_be_deleted_git_refs_on_status ON ONLY p_to_be_deleted_git_refs USING btree (status); + CREATE INDEX index_packages_build_infos_on_pipeline_id ON packages_build_infos USING btree (pipeline_id); CREATE INDEX index_packages_build_infos_package_id_id ON packages_build_infos USING btree (package_id, id); @@ -33270,18 +33272,6 @@ CREATE INDEX index_timelogs_on_spent_at ON timelogs USING btree (spent_at) WHERE CREATE INDEX index_timelogs_on_user_id ON timelogs USING btree (user_id); -CREATE INDEX p_to_be_deleted_git_refs_partition_id_idx ON ONLY p_to_be_deleted_git_refs USING btree (partition_id); - -CREATE INDEX index_to_be_deleted_git_refs_on_partition_id ON to_be_deleted_git_refs USING btree (partition_id); - -CREATE INDEX p_to_be_deleted_git_refs_project_idx ON ONLY p_to_be_deleted_git_refs USING btree (project); - -CREATE INDEX index_to_be_deleted_git_refs_on_project ON to_be_deleted_git_refs USING btree (project_id); - -CREATE INDEX p_to_be_deleted_git_refs_status_idx ON ONLY p_to_be_deleted_git_refs USING btree (status); - -CREATE INDEX index_to_be_deleted_git_refs_on_status ON to_be_deleted_git_refs USING btree (status); - CREATE INDEX index_todos_on_author_id ON todos USING btree (author_id); CREATE INDEX index_todos_on_author_id_and_created_at ON todos USING btree (author_id, created_at); @@ -35392,16 +35382,8 @@ ALTER INDEX p_ci_builds_user_id_name_created_at_idx ATTACH PARTITION index_secur ALTER INDEX p_ci_builds_name_id_idx ATTACH PARTITION index_security_ci_builds_on_name_and_id_parser_features; -ALTER INDEX p_to_be_deleted_git_refs_partition_id_idx ATTACH PARTITION index_to_be_deleted_git_refs_on_partition_id; - -ALTER INDEX p_to_be_deleted_git_refs_project_idx ATTACH PARTITION index_to_be_deleted_git_refs_on_project; - -ALTER INDEX p_to_be_deleted_git_refs_status_idx ATTACH PARTITION index_to_be_deleted_git_refs_on_status; - ALTER INDEX p_ci_builds_scheduled_at_idx ATTACH PARTITION partial_index_ci_builds_on_scheduled_at_with_scheduled_jobs; -ALTER INDEX p_to_be_deleted_git_refs_pkey ATTACH PARTITION to_be_deleted_git_refs_pkey; - ALTER INDEX p_ci_builds_token_encrypted_partition_id_idx ATTACH PARTITION unique_ci_builds_token_encrypted_and_partition_id; CREATE TRIGGER chat_names_loose_fk_trigger AFTER DELETE ON chat_names REFERENCING OLD TABLE AS old_table FOR EACH STATEMENT EXECUTE FUNCTION insert_into_loose_foreign_keys_deleted_records(); -- GitLab From 5da1ffd3823036d7b44f8bcb6a626b0d376b01b3 Mon Sep 17 00:00:00 2001 From: Dylan Griffith Date: Thu, 6 Jul 2023 14:38:51 +1000 Subject: [PATCH 04/38] WIP: enqueueing is working --- lib/gitlab/git/repository.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/gitlab/git/repository.rb b/lib/gitlab/git/repository.rb index 9a83fc4983e756..bdd8a2b62e8e79 100644 --- a/lib/gitlab/git/repository.rb +++ b/lib/gitlab/git/repository.rb @@ -756,7 +756,7 @@ def async_delete_refs(*refs) raise "async_delete_refs only supports project repositories" unless container.is_a?(Project) records = refs.map do |ref| - ToBeDeletedGitRef.new(project_id: container.id, ref: ref) + ToBeDeletedGitRef.new(project_id: container.id, ref: ref, created_at: Time.current, updated_at: Time.current) end ToBeDeletedGitRef.bulk_insert!(records) -- GitLab From 6fd995edad6fff9a72a22ce657691d2bfd0ad180 Mon Sep 17 00:00:00 2001 From: Dylan Griffith Date: Thu, 6 Jul 2023 17:40:40 +1000 Subject: [PATCH 05/38] WIP: Basic scaffolding of workers and models --- app/models/to_be_deleted_git_ref.rb | 1 + ...deleted_git_ref_cleanup_project_service.rb | 18 ++++++ ...leted_git_ref_cleanup_scheduler_service.rb | 12 ++++ .../deleted_git_ref_cleanup_project_worker.rb | 11 ++++ ...eleted_git_ref_cleanup_scheduler_worker.rb | 12 ++++ config/initializers/1_settings.rb | 3 + config/initializers/postgres_partitioning.rb | 3 +- db/structure.sql | 36 +++++------ ...deleted_git_ref_cleanup_project_service.rb | 61 +++++++++++++++++++ ...leted_git_ref_cleanup_scheduler_service.rb | 30 +++++++++ .../deleted_git_ref_cleanup_project_worker.rb | 19 ++++++ ...d_git_ref_cleanup_scheduler_worker_spec.rb | 19 ++++++ 12 files changed, 206 insertions(+), 19 deletions(-) create mode 100644 app/services/deleted_git_ref_cleanup_project_service.rb create mode 100644 app/services/deleted_git_ref_cleanup_scheduler_service.rb create mode 100644 app/workers/deleted_git_ref_cleanup_project_worker.rb create mode 100644 app/workers/deleted_git_ref_cleanup_scheduler_worker.rb create mode 100644 spec/services/deleted_git_ref_cleanup_project_service.rb create mode 100644 spec/services/deleted_git_ref_cleanup_scheduler_service.rb create mode 100644 spec/workers/deleted_git_ref_cleanup_project_worker.rb create mode 100644 spec/workers/deleted_git_ref_cleanup_scheduler_worker_spec.rb diff --git a/app/models/to_be_deleted_git_ref.rb b/app/models/to_be_deleted_git_ref.rb index 62da001fdee399..0559b9aab1bd86 100644 --- a/app/models/to_be_deleted_git_ref.rb +++ b/app/models/to_be_deleted_git_ref.rb @@ -5,6 +5,7 @@ class ToBeDeletedGitRef < ApplicationRecord include BulkInsertSafe include PartitionedTable + include EachBatch self.table_name = 'p_to_be_deleted_git_refs' self.primary_key = :id diff --git a/app/services/deleted_git_ref_cleanup_project_service.rb b/app/services/deleted_git_ref_cleanup_project_service.rb new file mode 100644 index 00000000000000..8bfab7805d8d6c --- /dev/null +++ b/app/services/deleted_git_ref_cleanup_project_service.rb @@ -0,0 +1,18 @@ +# frozen_string_literal: true + +class DeletedGitRefCleanupProjectService + def initialize(project_id) + @project_id = project_id + end + + def execute + project = Project.find(@project_id) + ToBeDeletedGitRef.where(project_id: @project_id).each_batch do |batch| + batch.each do |to_be_deleted_git_ref| + ref = to_be_deleted_git_ref.ref + + project.repository.delete_refs(ref) + end + end + end +end diff --git a/app/services/deleted_git_ref_cleanup_scheduler_service.rb b/app/services/deleted_git_ref_cleanup_scheduler_service.rb new file mode 100644 index 00000000000000..6a529195d401a7 --- /dev/null +++ b/app/services/deleted_git_ref_cleanup_scheduler_service.rb @@ -0,0 +1,12 @@ +# frozen_string_literal: true + +class DeletedGitRefCleanupSchedulerService + def execute + ToBeDeletedGitRef.select(:project_id).distinct.each_batch do |records| + records.each do |record| + project_id = record.project_id + DeletedGitRefCleanupProjectWorker.perform_async(project_id) + end + end + end +end diff --git a/app/workers/deleted_git_ref_cleanup_project_worker.rb b/app/workers/deleted_git_ref_cleanup_project_worker.rb new file mode 100644 index 00000000000000..5bd633451f8d8f --- /dev/null +++ b/app/workers/deleted_git_ref_cleanup_project_worker.rb @@ -0,0 +1,11 @@ +# frozen_string_literal: true + +class DeletedGitRefCleanupProjectWorker + include ApplicationWorker + + feature_category :gitaly + + def perform(project_id) + DeletedGitRefCleanupProjectService.new(project_id).execute + end +end diff --git a/app/workers/deleted_git_ref_cleanup_scheduler_worker.rb b/app/workers/deleted_git_ref_cleanup_scheduler_worker.rb new file mode 100644 index 00000000000000..7e6fb49e7efe2b --- /dev/null +++ b/app/workers/deleted_git_ref_cleanup_scheduler_worker.rb @@ -0,0 +1,12 @@ +# frozen_string_literal: true + +class DeletedGitRefCleanupSchedulerWorker + include ApplicationWorker + include CronjobQueue + + feature_category :gitaly + + def perform + DeletedGitRefCleanupSchedulerService.new.execute + end +end diff --git a/config/initializers/1_settings.rb b/config/initializers/1_settings.rb index 50d26236a29e8b..63c1ea9f89dc76 100644 --- a/config/initializers/1_settings.rb +++ b/config/initializers/1_settings.rb @@ -661,6 +661,9 @@ Settings.cron_jobs['loose_foreign_keys_cleanup_worker'] ||= {} Settings.cron_jobs['loose_foreign_keys_cleanup_worker']['cron'] ||= '*/1 * * * *' Settings.cron_jobs['loose_foreign_keys_cleanup_worker']['job_class'] = 'LooseForeignKeys::CleanupWorker' +Settings.cron_jobs['deleted_git_ref_cleanup_scheduler_worker'] ||= {} +Settings.cron_jobs['deleted_git_ref_cleanup_scheduler_worker']['cron'] ||= '*/1 * * * *' +Settings.cron_jobs['deleted_git_ref_cleanup_scheduler_worker']['job_class'] = 'DeletedGitRefCleanupSchedulerWorker' Settings.cron_jobs['ci_runner_versions_reconciliation_worker'] ||= {} Settings.cron_jobs['ci_runner_versions_reconciliation_worker']['cron'] ||= '@daily' Settings.cron_jobs['ci_runner_versions_reconciliation_worker']['job_class'] = 'Ci::Runners::ReconcileExistingRunnerVersionsCronWorker' diff --git a/config/initializers/postgres_partitioning.rb b/config/initializers/postgres_partitioning.rb index 1fbe038f0bd964..2e41cc26296b66 100644 --- a/config/initializers/postgres_partitioning.rb +++ b/config/initializers/postgres_partitioning.rb @@ -7,7 +7,8 @@ LooseForeignKeys::DeletedRecord, Gitlab::Database::BackgroundMigration::BatchedJobTransitionLog, Ci::RunnerManagerBuild, - Ci::JobAnnotation + Ci::JobAnnotation, + ToBeDeletedGitRef ]) if Gitlab.ee? diff --git a/db/structure.sql b/db/structure.sql index 80b886f87d8514..0da92fab791b11 100644 --- a/db/structure.sql +++ b/db/structure.sql @@ -488,7 +488,7 @@ PARTITION BY RANGE (created_at); CREATE TABLE p_ci_job_annotations ( id bigint NOT NULL, - partition_id bigint NOT NULL, + partition_id bigint DEFAULT 100 NOT NULL, job_id bigint NOT NULL, name text NOT NULL, data jsonb DEFAULT '[]'::jsonb NOT NULL, @@ -498,7 +498,7 @@ CREATE TABLE p_ci_job_annotations ( PARTITION BY LIST (partition_id); CREATE TABLE p_ci_runner_machine_builds ( - partition_id bigint NOT NULL, + partition_id bigint DEFAULT 100 NOT NULL, build_id bigint NOT NULL, runner_machine_id bigint NOT NULL ) @@ -554,18 +554,6 @@ CREATE TABLE security_findings ( ) PARTITION BY LIST (partition_number); -CREATE TABLE p_to_be_deleted_git_refs ( - id bigint NOT NULL, - project_id bigint NOT NULL, - ref text NOT NULL, - partition_id bigint DEFAULT 1 NOT NULL, - status smallint DEFAULT 1 NOT NULL, - created_at timestamp(6) without time zone NOT NULL, - updated_at timestamp(6) without time zone NOT NULL, - CONSTRAINT check_1310a6c4df CHECK ((char_length(ref) <= 1024)) -) -PARTITION BY LIST (partition_id); - CREATE TABLE value_stream_dashboard_counts ( id bigint NOT NULL, namespace_id bigint NOT NULL, @@ -13175,7 +13163,7 @@ CREATE TABLE p_ci_builds ( scheduling_type smallint, id bigint NOT NULL, stage_id bigint, - partition_id bigint NOT NULL, + partition_id bigint DEFAULT 100 NOT NULL, CONSTRAINT check_1e2fbd1b39 CHECK ((lock_version IS NOT NULL)) ) PARTITION BY LIST (partition_id); @@ -13233,7 +13221,7 @@ CREATE TABLE ci_builds ( scheduling_type smallint, id bigint DEFAULT nextval('ci_builds_id_seq'::regclass) NOT NULL, stage_id bigint, - partition_id bigint NOT NULL, + partition_id bigint DEFAULT 100 NOT NULL, CONSTRAINT check_1e2fbd1b39 CHECK ((lock_version IS NOT NULL)) ); ALTER TABLE ONLY p_ci_builds ATTACH PARTITION ci_builds FOR VALUES IN ('100'); @@ -13253,7 +13241,7 @@ CREATE TABLE p_ci_builds_metadata ( id bigint NOT NULL, runtime_runner_features jsonb DEFAULT '{}'::jsonb NOT NULL, id_tokens jsonb DEFAULT '{}'::jsonb NOT NULL, - partition_id bigint NOT NULL, + partition_id bigint DEFAULT 100 NOT NULL, debug_trace_enabled boolean DEFAULT false NOT NULL ) PARTITION BY LIST (partition_id); @@ -13282,7 +13270,7 @@ CREATE TABLE ci_builds_metadata ( id bigint DEFAULT nextval('ci_builds_metadata_id_seq'::regclass) NOT NULL, runtime_runner_features jsonb DEFAULT '{}'::jsonb NOT NULL, id_tokens jsonb DEFAULT '{}'::jsonb NOT NULL, - partition_id bigint NOT NULL, + partition_id bigint DEFAULT 100 NOT NULL, debug_trace_enabled boolean DEFAULT false NOT NULL ); ALTER TABLE ONLY p_ci_builds_metadata ATTACH PARTITION ci_builds_metadata FOR VALUES IN ('100'); @@ -19458,6 +19446,18 @@ CREATE SEQUENCE p_ci_job_annotations_id_seq ALTER SEQUENCE p_ci_job_annotations_id_seq OWNED BY p_ci_job_annotations.id; +CREATE TABLE p_to_be_deleted_git_refs ( + id bigint NOT NULL, + project_id bigint NOT NULL, + ref text NOT NULL, + partition_id bigint DEFAULT 1 NOT NULL, + status smallint DEFAULT 1 NOT NULL, + created_at timestamp(6) without time zone NOT NULL, + updated_at timestamp(6) without time zone NOT NULL, + CONSTRAINT check_1310a6c4df CHECK ((char_length(ref) <= 1024)) +) +PARTITION BY LIST (partition_id); + CREATE SEQUENCE p_to_be_deleted_git_refs_id_seq START WITH 1 INCREMENT BY 1 diff --git a/spec/services/deleted_git_ref_cleanup_project_service.rb b/spec/services/deleted_git_ref_cleanup_project_service.rb new file mode 100644 index 00000000000000..0df3d7ed57c671 --- /dev/null +++ b/spec/services/deleted_git_ref_cleanup_project_service.rb @@ -0,0 +1,61 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe DeletedGitRefCleanupProjectService do + let(:service) { described_class.new(project1.id) } + let_it_be(:project1) { create(:project, :repository) } + let_it_be(:project2) { create(:project, :repository) } + + describe '#execute' do + before do + project1.repository.create_ref('HEAD','refs/test/ref-to-not-be-deleted') + project1.repository.create_ref('HEAD', 'refs/test/some-ref') + project1.repository.create_ref('HEAD', 'refs/test/another-ref') + project2.repository.create_ref('HEAD', 'refs/test/also-a-ref') + + ToBeDeletedGitRef.create!(project_id: project1.id, ref: 'refs/test/some-ref') + ToBeDeletedGitRef.create!(project_id: project1.id, ref: 'refs/test/another-ref') + ToBeDeletedGitRef.create!(project_id: project2.id, ref: 'refs/test/also-a-ref') + end + + it 'deletes the named refs in batches for the given project only' do + expect(test_refs(project1)).to include( + 'refs/test/ref-to-not-be-deleted', + 'refs/test/some-ref', + 'refs/test/another-ref') + + service.execute + + expect(test_refs(project1)).to include( + 'refs/test/ref-to-not-be-deleted') + + expect(test_refs(project1)).not_to include( + 'refs/test/some-ref', + 'refs/test/another-ref') + + expect(test_refs(project2)).to include( + 'refs/test/also-a-ref') + end + + def test_refs(project) + project.repository.list_refs(['refs/test/']).map(&:name) + end + + it 'returns stats' do + # TODO: + end + + it 'acquires a lock for the given project_id to avoid running duplicate instances' do + # TODO: + end + + it 'does nothing when the project does not exist' do + # TODO: + end + + it 'stops after it reaches limit of deleted refs' do + # TODO: + end + end +end diff --git a/spec/services/deleted_git_ref_cleanup_scheduler_service.rb b/spec/services/deleted_git_ref_cleanup_scheduler_service.rb new file mode 100644 index 00000000000000..8cc45e7aafbba2 --- /dev/null +++ b/spec/services/deleted_git_ref_cleanup_scheduler_service.rb @@ -0,0 +1,30 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe DeletedGitRefCleanupSchedulerService do + let(:service) { described_class.new } + + describe '#execute' do + before do + ToBeDeletedGitRef.create!(project_id: 123, ref: 'some-ref') + ToBeDeletedGitRef.create!(project_id: 123, ref: 'another-ref') + ToBeDeletedGitRef.create!(project_id: 456, ref: 'also-a-ref') + end + + it 'schedules DeletedGitRefCleanupProjectWorker for each project in ToBeDeletedGitRef' do + expect(DeletedGitRefCleanupProjectWorker).to receive(:perform_async).with(123) + expect(DeletedGitRefCleanupProjectWorker).to receive(:perform_async).with(456) + + service.execute + end + + it 'returns stats' do + # TODO: + end + + it 'acquires a lock to avoid running duplicate instances' do + # TODO: + end + end +end diff --git a/spec/workers/deleted_git_ref_cleanup_project_worker.rb b/spec/workers/deleted_git_ref_cleanup_project_worker.rb new file mode 100644 index 00000000000000..97332883f30cb5 --- /dev/null +++ b/spec/workers/deleted_git_ref_cleanup_project_worker.rb @@ -0,0 +1,19 @@ +require 'spec_helper' + +RSpec.describe DeletedGitRefCleanupProjectWorker do + let(:service) { instance_double(DeletedGitRefCleanupProjectService) } + let(:worker) { described_class.new } + + describe '#perform' do + it 'delegates to DeletedGitRefCleanupProjectService' do + expect(DeletedGitRefCleanupProjectService).to receive(:new).with(123).and_return(service) + expect(service).to receive(:execute) + + worker.perform(123) + end + + it 'logs stats' do + # TODO: log_extra_metadata_on_done(:stats, stats) + end + end +end diff --git a/spec/workers/deleted_git_ref_cleanup_scheduler_worker_spec.rb b/spec/workers/deleted_git_ref_cleanup_scheduler_worker_spec.rb new file mode 100644 index 00000000000000..0fce1e46e28d2f --- /dev/null +++ b/spec/workers/deleted_git_ref_cleanup_scheduler_worker_spec.rb @@ -0,0 +1,19 @@ +require 'spec_helper' + +RSpec.describe DeletedGitRefCleanupSchedulerWorker do + let(:service) { instance_double(DeletedGitRefCleanupSchedulerService) } + let(:worker) { described_class.new } + + describe '#perform' do + it 'delegates to DeletedGitRefCleanupSchedulerService' do + expect(DeletedGitRefCleanupSchedulerService).to receive(:new).and_return(service) + expect(service).to receive(:execute) + + worker.perform + end + + it 'logs stats' do + # TODO: log_extra_metadata_on_done(:stats, stats) + end + end +end -- GitLab From 62ef9ce8dde9dc7485765cb2f2445f68b04e5035 Mon Sep 17 00:00:00 2001 From: Dylan Griffith Date: Fri, 7 Jul 2023 10:27:51 +1000 Subject: [PATCH 06/38] WIP: Mark records processed --- app/models/to_be_deleted_git_ref.rb | 8 +++++ ...deleted_git_ref_cleanup_project_service.rb | 2 ++ ...leted_git_ref_cleanup_scheduler_service.rb | 3 +- ...deleted_git_ref_cleanup_project_service.rb | 33 +++++++++++-------- ...git_ref_cleanup_scheduler_service_spec.rb} | 10 +++--- 5 files changed, 37 insertions(+), 19 deletions(-) rename spec/services/{deleted_git_ref_cleanup_scheduler_service.rb => deleted_git_ref_cleanup_scheduler_service_spec.rb} (60%) diff --git a/app/models/to_be_deleted_git_ref.rb b/app/models/to_be_deleted_git_ref.rb index 0559b9aab1bd86..8b7ddcbdd2e650 100644 --- a/app/models/to_be_deleted_git_ref.rb +++ b/app/models/to_be_deleted_git_ref.rb @@ -36,4 +36,12 @@ class ToBeDeletedGitRef < ApplicationRecord end enum status: { pending: 1, processed: 2 }, _prefix: :status + + def self.mark_records_processed(records) + # TODO: Look at app/models/loose_foreign_keys/deleted_record.rb which is grouping by partition when doing this + # update. This is presumably because we've not loaded `partition_id` and therefore this will be updating by `id` + # which needs to search all partitions. If we assume that there is likely normally just 1 partition and sometimes 2 + # partitions this may be fine though. + records.update_all(status: :processed) + end end diff --git a/app/services/deleted_git_ref_cleanup_project_service.rb b/app/services/deleted_git_ref_cleanup_project_service.rb index 8bfab7805d8d6c..5ef77354abacdb 100644 --- a/app/services/deleted_git_ref_cleanup_project_service.rb +++ b/app/services/deleted_git_ref_cleanup_project_service.rb @@ -13,6 +13,8 @@ def execute project.repository.delete_refs(ref) end + + ToBeDeletedGitRef.mark_records_processed(batch) end end end diff --git a/app/services/deleted_git_ref_cleanup_scheduler_service.rb b/app/services/deleted_git_ref_cleanup_scheduler_service.rb index 6a529195d401a7..34fe3f14ceac8e 100644 --- a/app/services/deleted_git_ref_cleanup_scheduler_service.rb +++ b/app/services/deleted_git_ref_cleanup_scheduler_service.rb @@ -2,7 +2,8 @@ class DeletedGitRefCleanupSchedulerService def execute - ToBeDeletedGitRef.select(:project_id).distinct.each_batch do |records| + # TODO: What is an efficient way to get all unique project_id for pending refs from this table in batches? + ToBeDeletedGitRef.status_pending.select(:project_id).distinct.each_batch do |records| records.each do |record| project_id = record.project_id DeletedGitRefCleanupProjectWorker.perform_async(project_id) diff --git a/spec/services/deleted_git_ref_cleanup_project_service.rb b/spec/services/deleted_git_ref_cleanup_project_service.rb index 0df3d7ed57c671..f870e054bcac76 100644 --- a/spec/services/deleted_git_ref_cleanup_project_service.rb +++ b/spec/services/deleted_git_ref_cleanup_project_service.rb @@ -6,24 +6,23 @@ let(:service) { described_class.new(project1.id) } let_it_be(:project1) { create(:project, :repository) } let_it_be(:project2) { create(:project, :repository) } + let!(:project1_ref1) { ToBeDeletedGitRef.create!(project_id: project1.id, ref: 'refs/test/project1-ref1') } + let!(:project1_ref2) { ToBeDeletedGitRef.create!(project_id: project1.id, ref: 'refs/test/project1-ref2') } + let!(:project2_ref1) { ToBeDeletedGitRef.create!(project_id: project1.id, ref: 'refs/test/project2-ref1') } describe '#execute' do before do - project1.repository.create_ref('HEAD','refs/test/ref-to-not-be-deleted') - project1.repository.create_ref('HEAD', 'refs/test/some-ref') - project1.repository.create_ref('HEAD', 'refs/test/another-ref') - project2.repository.create_ref('HEAD', 'refs/test/also-a-ref') - - ToBeDeletedGitRef.create!(project_id: project1.id, ref: 'refs/test/some-ref') - ToBeDeletedGitRef.create!(project_id: project1.id, ref: 'refs/test/another-ref') - ToBeDeletedGitRef.create!(project_id: project2.id, ref: 'refs/test/also-a-ref') + project1.repository.create_ref('HEAD', 'refs/test/ref-to-not-be-deleted') + project1.repository.create_ref('HEAD', project1_ref1.ref) + project1.repository.create_ref('HEAD', project1_ref2.ref) + project2.repository.create_ref('HEAD', project2_ref1.ref) end it 'deletes the named refs in batches for the given project only' do expect(test_refs(project1)).to include( 'refs/test/ref-to-not-be-deleted', - 'refs/test/some-ref', - 'refs/test/another-ref') + 'refs/test/project1-ref1', + 'refs/test/project1-ref2') service.execute @@ -31,11 +30,17 @@ 'refs/test/ref-to-not-be-deleted') expect(test_refs(project1)).not_to include( - 'refs/test/some-ref', - 'refs/test/another-ref') + 'refs/test/project1-ref1', + 'refs/test/project1-ref2') + + expect(test_refs(project2)).to include('refs/test/project2-ref1') + end + + it 'marks the processed ToBeDeletedGitRef as processed' do + service.execute - expect(test_refs(project2)).to include( - 'refs/test/also-a-ref') + expect(ToBeDeletedGitRef.status_pending.map(&:ref)).to eq('test/refs/project2_ref1') + expect(ToBeDeletedGitRef.status_processed.map(&:ref)).to eq('test/refs/project1_ref1', 'test/refs/project1_ref2') end def test_refs(project) diff --git a/spec/services/deleted_git_ref_cleanup_scheduler_service.rb b/spec/services/deleted_git_ref_cleanup_scheduler_service_spec.rb similarity index 60% rename from spec/services/deleted_git_ref_cleanup_scheduler_service.rb rename to spec/services/deleted_git_ref_cleanup_scheduler_service_spec.rb index 8cc45e7aafbba2..c444211ee5497d 100644 --- a/spec/services/deleted_git_ref_cleanup_scheduler_service.rb +++ b/spec/services/deleted_git_ref_cleanup_scheduler_service_spec.rb @@ -7,14 +7,16 @@ describe '#execute' do before do - ToBeDeletedGitRef.create!(project_id: 123, ref: 'some-ref') - ToBeDeletedGitRef.create!(project_id: 123, ref: 'another-ref') - ToBeDeletedGitRef.create!(project_id: 456, ref: 'also-a-ref') + ToBeDeletedGitRef.create!(project_id: 123, ref: 'ref1') + ToBeDeletedGitRef.create!(project_id: 123, ref: 'ref2') + ToBeDeletedGitRef.create!(project_id: 456, ref: 'ref3') + ToBeDeletedGitRef.create!(project_id: 789, ref: 'ref4', status: :processed) end - it 'schedules DeletedGitRefCleanupProjectWorker for each project in ToBeDeletedGitRef' do + it 'schedules DeletedGitRefCleanupProjectWorker for each project in pending ToBeDeletedGitRef' do expect(DeletedGitRefCleanupProjectWorker).to receive(:perform_async).with(123) expect(DeletedGitRefCleanupProjectWorker).to receive(:perform_async).with(456) + expect(DeletedGitRefCleanupProjectWorker).not_to receive(:perform_async).with(789) service.execute end -- GitLab From 91db1987fd690aa6e1ee240fa3bf7dfa1d26283d Mon Sep 17 00:00:00 2001 From: Dylan Griffith Date: Mon, 10 Jul 2023 15:25:22 +1000 Subject: [PATCH 07/38] Add feature_categorys to specs --- spec/services/deleted_git_ref_cleanup_project_service.rb | 2 +- spec/services/deleted_git_ref_cleanup_scheduler_service_spec.rb | 2 +- spec/workers/deleted_git_ref_cleanup_project_worker.rb | 2 +- spec/workers/deleted_git_ref_cleanup_scheduler_worker_spec.rb | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/spec/services/deleted_git_ref_cleanup_project_service.rb b/spec/services/deleted_git_ref_cleanup_project_service.rb index f870e054bcac76..1039ca1d5e033f 100644 --- a/spec/services/deleted_git_ref_cleanup_project_service.rb +++ b/spec/services/deleted_git_ref_cleanup_project_service.rb @@ -2,7 +2,7 @@ require 'spec_helper' -RSpec.describe DeletedGitRefCleanupProjectService do +RSpec.describe DeletedGitRefCleanupProjectService, feature_category: :gitaly do let(:service) { described_class.new(project1.id) } let_it_be(:project1) { create(:project, :repository) } let_it_be(:project2) { create(:project, :repository) } diff --git a/spec/services/deleted_git_ref_cleanup_scheduler_service_spec.rb b/spec/services/deleted_git_ref_cleanup_scheduler_service_spec.rb index c444211ee5497d..542583ade496a6 100644 --- a/spec/services/deleted_git_ref_cleanup_scheduler_service_spec.rb +++ b/spec/services/deleted_git_ref_cleanup_scheduler_service_spec.rb @@ -2,7 +2,7 @@ require 'spec_helper' -RSpec.describe DeletedGitRefCleanupSchedulerService do +RSpec.describe DeletedGitRefCleanupSchedulerService, feature_category: :gitaly do let(:service) { described_class.new } describe '#execute' do diff --git a/spec/workers/deleted_git_ref_cleanup_project_worker.rb b/spec/workers/deleted_git_ref_cleanup_project_worker.rb index 97332883f30cb5..e18299641d1114 100644 --- a/spec/workers/deleted_git_ref_cleanup_project_worker.rb +++ b/spec/workers/deleted_git_ref_cleanup_project_worker.rb @@ -1,6 +1,6 @@ require 'spec_helper' -RSpec.describe DeletedGitRefCleanupProjectWorker do +RSpec.describe DeletedGitRefCleanupProjectWorker, feature_category: :gitaly do let(:service) { instance_double(DeletedGitRefCleanupProjectService) } let(:worker) { described_class.new } diff --git a/spec/workers/deleted_git_ref_cleanup_scheduler_worker_spec.rb b/spec/workers/deleted_git_ref_cleanup_scheduler_worker_spec.rb index 0fce1e46e28d2f..30b4ad07b6604c 100644 --- a/spec/workers/deleted_git_ref_cleanup_scheduler_worker_spec.rb +++ b/spec/workers/deleted_git_ref_cleanup_scheduler_worker_spec.rb @@ -1,6 +1,6 @@ require 'spec_helper' -RSpec.describe DeletedGitRefCleanupSchedulerWorker do +RSpec.describe DeletedGitRefCleanupSchedulerWorker, feature_category: :gitaly do let(:service) { instance_double(DeletedGitRefCleanupSchedulerService) } let(:worker) { described_class.new } -- GitLab From 8e2855434cb164ea67c75bdac3c944ca8e5fce8c Mon Sep 17 00:00:00 2001 From: Dylan Griffith Date: Mon, 10 Jul 2023 15:58:47 +1000 Subject: [PATCH 08/38] Rename table to p_batched_git_updates_deletions add index --- app/models/to_be_deleted_git_ref.rb | 2 +- ...leted_git_ref_cleanup_scheduler_service.rb | 2 +- ...ml => p_batched_git_updates_deletions.yml} | 2 +- ...431_create_table_to_be_deleted_git_refs.rb | 7 +-- ...onstraint_to_to_be_deleted_git_refs_ref.rb | 4 +- db/structure.sql | 52 +++++++++---------- 6 files changed, 35 insertions(+), 34 deletions(-) rename db/docs/{p_to_be_deleted_git_refs.yml => p_batched_git_updates_deletions.yml} (88%) diff --git a/app/models/to_be_deleted_git_ref.rb b/app/models/to_be_deleted_git_ref.rb index 8b7ddcbdd2e650..925fd7d3eb96fc 100644 --- a/app/models/to_be_deleted_git_ref.rb +++ b/app/models/to_be_deleted_git_ref.rb @@ -7,7 +7,7 @@ class ToBeDeletedGitRef < ApplicationRecord include PartitionedTable include EachBatch - self.table_name = 'p_to_be_deleted_git_refs' + self.table_name = 'p_batched_git_updates_deletions' self.primary_key = :id self.ignored_columns = %i[partition_id] self.sequence_name = :to_be_deleted_git_refs_id_seq diff --git a/app/services/deleted_git_ref_cleanup_scheduler_service.rb b/app/services/deleted_git_ref_cleanup_scheduler_service.rb index 34fe3f14ceac8e..8bac854e98798c 100644 --- a/app/services/deleted_git_ref_cleanup_scheduler_service.rb +++ b/app/services/deleted_git_ref_cleanup_scheduler_service.rb @@ -3,7 +3,7 @@ class DeletedGitRefCleanupSchedulerService def execute # TODO: What is an efficient way to get all unique project_id for pending refs from this table in batches? - ToBeDeletedGitRef.status_pending.select(:project_id).distinct.each_batch do |records| + ToBeDeletedGitRef.status_pending.distinct_each_batch(column: :project_id, of: 1000) do |records| records.each do |record| project_id = record.project_id DeletedGitRefCleanupProjectWorker.perform_async(project_id) diff --git a/db/docs/p_to_be_deleted_git_refs.yml b/db/docs/p_batched_git_updates_deletions.yml similarity index 88% rename from db/docs/p_to_be_deleted_git_refs.yml rename to db/docs/p_batched_git_updates_deletions.yml index bdcaff7e88825f..87f5fecace5272 100644 --- a/db/docs/p_to_be_deleted_git_refs.yml +++ b/db/docs/p_batched_git_updates_deletions.yml @@ -1,5 +1,5 @@ --- -table_name: p_to_be_deleted_git_refs +table_name: p_batched_git_updates_deletions classes: - ToBeDeletedGitRef feature_categories: diff --git a/db/migrate/20230704233431_create_table_to_be_deleted_git_refs.rb b/db/migrate/20230704233431_create_table_to_be_deleted_git_refs.rb index fafe15669c6b28..3f418da3bdafc7 100644 --- a/db/migrate/20230704233431_create_table_to_be_deleted_git_refs.rb +++ b/db/migrate/20230704233431_create_table_to_be_deleted_git_refs.rb @@ -10,7 +10,7 @@ def up if_not_exists: true } - create_table(:p_to_be_deleted_git_refs, **options) do |t| + create_table(:p_batched_git_updates_deletions, **options) do |t| t.bigserial :id, null: false # TODO: Is it worth making this generic. Repository model uses `#container` so we could support (presumably) group # wikis and so-on here. Does it matter? Or should we just build that later since our main focus on performance is @@ -19,13 +19,14 @@ def up t.text :ref, null: false t.bigint :partition_id, null: false, index: true, default: 1 t.integer :status, null: false, index: true, default: 1, limit: 2 + t.index :project_id, where: 'status = 1' t.timestamps null: false end connection.execute(<<~SQL) CREATE TABLE IF NOT EXISTS gitlab_partitions_dynamic.to_be_deleted_git_refs_1 - PARTITION OF p_to_be_deleted_git_refs + PARTITION OF p_batched_git_updates_deletions FOR VALUES IN (1); SQL end @@ -35,6 +36,6 @@ def down DROP TABLE IF EXISTS gitlab_partitions_dynamic.to_be_deleted_git_refs_1; SQL - drop_table :p_to_be_deleted_git_refs + drop_table :p_batched_git_updates_deletions end end diff --git a/db/migrate/20230704235236_add_constraint_to_to_be_deleted_git_refs_ref.rb b/db/migrate/20230704235236_add_constraint_to_to_be_deleted_git_refs_ref.rb index 746e35037366ab..8db76a9a7bc8ca 100644 --- a/db/migrate/20230704235236_add_constraint_to_to_be_deleted_git_refs_ref.rb +++ b/db/migrate/20230704235236_add_constraint_to_to_be_deleted_git_refs_ref.rb @@ -6,10 +6,10 @@ class AddConstraintToToBeDeletedGitRefsRef < Gitlab::Database::Migration[2.1] def up # TODO: Is there a reliable upper limit here? If something exceeds this upper limit we won't be able to delete it. Should we make it very large or just not have a limit? - add_text_limit :p_to_be_deleted_git_refs, :ref, 1024 + add_text_limit :p_batched_git_updates_deletions, :ref, 1024 end def down - remove_text_limit :p_to_be_deleted_git_refs, :ref + remove_text_limit :p_batched_git_updates_deletions, :ref end end diff --git a/db/structure.sql b/db/structure.sql index 0da92fab791b11..00f0500fb59900 100644 --- a/db/structure.sql +++ b/db/structure.sql @@ -554,6 +554,18 @@ CREATE TABLE security_findings ( ) PARTITION BY LIST (partition_number); +CREATE TABLE p_batched_git_updates_deletions ( + id bigint NOT NULL, + project_id bigint NOT NULL, + ref text NOT NULL, + partition_id bigint DEFAULT 1 NOT NULL, + status smallint DEFAULT 1 NOT NULL, + created_at timestamp(6) without time zone NOT NULL, + updated_at timestamp(6) without time zone NOT NULL, + CONSTRAINT check_fb9a8cab2b CHECK ((char_length(ref) <= 1024)) +) +PARTITION BY LIST (partition_id); + CREATE TABLE value_stream_dashboard_counts ( id bigint NOT NULL, namespace_id bigint NOT NULL, @@ -19437,35 +19449,23 @@ CREATE SEQUENCE organizations_id_seq ALTER SEQUENCE organizations_id_seq OWNED BY organizations.id; -CREATE SEQUENCE p_ci_job_annotations_id_seq +CREATE SEQUENCE p_batched_git_updates_deletions_id_seq START WITH 1 INCREMENT BY 1 NO MINVALUE NO MAXVALUE CACHE 1; -ALTER SEQUENCE p_ci_job_annotations_id_seq OWNED BY p_ci_job_annotations.id; - -CREATE TABLE p_to_be_deleted_git_refs ( - id bigint NOT NULL, - project_id bigint NOT NULL, - ref text NOT NULL, - partition_id bigint DEFAULT 1 NOT NULL, - status smallint DEFAULT 1 NOT NULL, - created_at timestamp(6) without time zone NOT NULL, - updated_at timestamp(6) without time zone NOT NULL, - CONSTRAINT check_1310a6c4df CHECK ((char_length(ref) <= 1024)) -) -PARTITION BY LIST (partition_id); +ALTER SEQUENCE p_batched_git_updates_deletions_id_seq OWNED BY p_batched_git_updates_deletions.id; -CREATE SEQUENCE p_to_be_deleted_git_refs_id_seq +CREATE SEQUENCE p_ci_job_annotations_id_seq START WITH 1 INCREMENT BY 1 NO MINVALUE NO MAXVALUE CACHE 1; -ALTER SEQUENCE p_to_be_deleted_git_refs_id_seq OWNED BY p_to_be_deleted_git_refs.id; +ALTER SEQUENCE p_ci_job_annotations_id_seq OWNED BY p_ci_job_annotations.id; CREATE TABLE packages_build_infos ( id bigint NOT NULL, @@ -25783,14 +25783,14 @@ ALTER TABLE ONLY organization_users ALTER COLUMN id SET DEFAULT nextval('organiz ALTER TABLE ONLY organizations ALTER COLUMN id SET DEFAULT nextval('organizations_id_seq'::regclass); +ALTER TABLE ONLY p_batched_git_updates_deletions ALTER COLUMN id SET DEFAULT nextval('p_batched_git_updates_deletions_id_seq'::regclass); + ALTER TABLE ONLY p_ci_builds ALTER COLUMN id SET DEFAULT nextval('ci_builds_id_seq'::regclass); ALTER TABLE ONLY p_ci_builds_metadata ALTER COLUMN id SET DEFAULT nextval('ci_builds_metadata_id_seq'::regclass); ALTER TABLE ONLY p_ci_job_annotations ALTER COLUMN id SET DEFAULT nextval('p_ci_job_annotations_id_seq'::regclass); -ALTER TABLE ONLY p_to_be_deleted_git_refs ALTER COLUMN id SET DEFAULT nextval('p_to_be_deleted_git_refs_id_seq'::regclass); - ALTER TABLE ONLY packages_build_infos ALTER COLUMN id SET DEFAULT nextval('packages_build_infos_id_seq'::regclass); ALTER TABLE ONLY packages_composer_cache_files ALTER COLUMN id SET DEFAULT nextval('packages_composer_cache_files_id_seq'::regclass); @@ -28068,15 +28068,15 @@ ALTER TABLE ONLY organization_users ALTER TABLE ONLY organizations ADD CONSTRAINT organizations_pkey PRIMARY KEY (id); +ALTER TABLE ONLY p_batched_git_updates_deletions + ADD CONSTRAINT p_batched_git_updates_deletions_pkey PRIMARY KEY (id, partition_id); + ALTER TABLE ONLY p_ci_job_annotations ADD CONSTRAINT p_ci_job_annotations_pkey PRIMARY KEY (id, partition_id); ALTER TABLE ONLY p_ci_runner_machine_builds ADD CONSTRAINT p_ci_runner_machine_builds_pkey PRIMARY KEY (build_id, partition_id); -ALTER TABLE ONLY p_to_be_deleted_git_refs - ADD CONSTRAINT p_to_be_deleted_git_refs_pkey PRIMARY KEY (id, partition_id); - ALTER TABLE ONLY packages_build_infos ADD CONSTRAINT packages_build_infos_pkey PRIMARY KEY (id); @@ -32356,15 +32356,15 @@ CREATE INDEX index_organization_users_on_user_id ON organization_users USING btr CREATE UNIQUE INDEX index_organizations_on_unique_name_per_group ON customer_relations_organizations USING btree (group_id, lower(name), id); -CREATE UNIQUE INDEX index_p_ci_job_annotations_on_partition_id_job_id_name ON ONLY p_ci_job_annotations USING btree (partition_id, job_id, name); +CREATE INDEX index_p_batched_git_updates_deletions_on_partition_id ON ONLY p_batched_git_updates_deletions USING btree (partition_id); -CREATE INDEX index_p_ci_runner_machine_builds_on_runner_machine_id ON ONLY p_ci_runner_machine_builds USING btree (runner_machine_id); +CREATE INDEX index_p_batched_git_updates_deletions_on_project_id ON ONLY p_batched_git_updates_deletions USING btree (project_id); -CREATE INDEX index_p_to_be_deleted_git_refs_on_partition_id ON ONLY p_to_be_deleted_git_refs USING btree (partition_id); +CREATE INDEX index_p_batched_git_updates_deletions_on_status ON ONLY p_batched_git_updates_deletions USING btree (status); -CREATE INDEX index_p_to_be_deleted_git_refs_on_project_id ON ONLY p_to_be_deleted_git_refs USING btree (project_id); +CREATE UNIQUE INDEX index_p_ci_job_annotations_on_partition_id_job_id_name ON ONLY p_ci_job_annotations USING btree (partition_id, job_id, name); -CREATE INDEX index_p_to_be_deleted_git_refs_on_status ON ONLY p_to_be_deleted_git_refs USING btree (status); +CREATE INDEX index_p_ci_runner_machine_builds_on_runner_machine_id ON ONLY p_ci_runner_machine_builds USING btree (runner_machine_id); CREATE INDEX index_packages_build_infos_on_pipeline_id ON packages_build_infos USING btree (pipeline_id); -- GitLab From 01d5523ee475f0bfcc201c8e4eb7da411eed8b1a Mon Sep 17 00:00:00 2001 From: Dylan Griffith Date: Mon, 10 Jul 2023 16:20:46 +1000 Subject: [PATCH 09/38] Namespace everything under BatchedGitUpdates:: --- .../to_be_deleted_git_ref.rb | 49 +++++++++++++++++++ app/models/to_be_deleted_git_ref.rb | 47 ------------------ ...deleted_git_ref_cleanup_project_service.rb | 22 +++++++++ ...leted_git_ref_cleanup_scheduler_service.rb | 14 ++++++ ...deleted_git_ref_cleanup_project_service.rb | 20 -------- ...leted_git_ref_cleanup_scheduler_service.rb | 13 ----- .../deleted_git_ref_cleanup_project_worker.rb | 13 +++++ ...eleted_git_ref_cleanup_scheduler_worker.rb | 14 ++++++ .../deleted_git_ref_cleanup_project_worker.rb | 11 ----- ...eleted_git_ref_cleanup_scheduler_worker.rb | 12 ----- config/initializers/postgres_partitioning.rb | 2 +- db/docs/p_batched_git_updates_deletions.yml | 2 +- ...onstraint_to_to_be_deleted_git_refs_ref.rb | 1 - lib/gitlab/git/repository.rb | 4 +- ...deleted_git_ref_cleanup_project_service.rb | 14 +++--- ..._git_ref_cleanup_scheduler_service_spec.rb | 32 ++++++++++++ ..._git_ref_cleanup_scheduler_service_spec.rb | 32 ------------ .../deleted_git_ref_cleanup_project_worker.rb | 2 +- ...d_git_ref_cleanup_scheduler_worker_spec.rb | 6 +-- 19 files changed, 159 insertions(+), 151 deletions(-) create mode 100644 app/models/batched_git_updates/to_be_deleted_git_ref.rb delete mode 100644 app/models/to_be_deleted_git_ref.rb create mode 100644 app/services/batched_git_updates/deleted_git_ref_cleanup_project_service.rb create mode 100644 app/services/batched_git_updates/deleted_git_ref_cleanup_scheduler_service.rb delete mode 100644 app/services/deleted_git_ref_cleanup_project_service.rb delete mode 100644 app/services/deleted_git_ref_cleanup_scheduler_service.rb create mode 100644 app/workers/batched_git_updates/deleted_git_ref_cleanup_project_worker.rb create mode 100644 app/workers/batched_git_updates/deleted_git_ref_cleanup_scheduler_worker.rb delete mode 100644 app/workers/deleted_git_ref_cleanup_project_worker.rb delete mode 100644 app/workers/deleted_git_ref_cleanup_scheduler_worker.rb rename spec/services/{ => batched_git_updates}/deleted_git_ref_cleanup_project_service.rb (65%) create mode 100644 spec/services/batched_git_updates/deleted_git_ref_cleanup_scheduler_service_spec.rb delete mode 100644 spec/services/deleted_git_ref_cleanup_scheduler_service_spec.rb rename spec/workers/{ => batched_git_updates}/deleted_git_ref_cleanup_project_worker.rb (83%) rename spec/workers/{ => batched_git_updates}/deleted_git_ref_cleanup_scheduler_worker_spec.rb (51%) diff --git a/app/models/batched_git_updates/to_be_deleted_git_ref.rb b/app/models/batched_git_updates/to_be_deleted_git_ref.rb new file mode 100644 index 00000000000000..b6eb97bfbc5d5b --- /dev/null +++ b/app/models/batched_git_updates/to_be_deleted_git_ref.rb @@ -0,0 +1,49 @@ +# frozen_string_literal: true + +module BatchedGitUpdates + class ToBeDeletedGitRef < ApplicationRecord + PARTITION_DURATION = 1.day + + include BulkInsertSafe + include PartitionedTable + include EachBatch + + self.table_name = 'p_batched_git_updates_deletions' + self.primary_key = :id + self.ignored_columns = %i[partition_id] + self.sequence_name = :to_be_deleted_git_refs_id_seq + + belongs_to :project, inverse_of: :to_be_deleted_git_refs + + scope :for_partition, -> (partition) { where(partition_id: partition) } + + partitioned_by :partition_id, strategy: :sliding_list, + next_partition_if: -> (active_partition) do + oldest_record_in_partition = ToBeDeletedGitRef + .select(:id, :created_at) + .for_partition(active_partition.value) + .order(:id) + .limit(1) + .take + + oldest_record_in_partition.present? && + oldest_record_in_partition.created_at < PARTITION_DURATION.ago + end, + detach_partition_if: -> (partition) do + !ToBeDeletedGitRef + .for_partition(partition.value) + .status_pending + .exists? + end + + enum status: { pending: 1, processed: 2 }, _prefix: :status + + def self.mark_records_processed(records) + # TODO: Look at app/models/loose_foreign_keys/deleted_record.rb which is grouping by partition when doing this + # update. This is presumably because we've not loaded `partition_id` and therefore this will be updating by `id` + # which needs to search all partitions. If we assume that there is likely normally just 1 partition and sometimes 2 + # partitions this may be fine though. + records.update_all(status: :processed) + end + end +end diff --git a/app/models/to_be_deleted_git_ref.rb b/app/models/to_be_deleted_git_ref.rb deleted file mode 100644 index 925fd7d3eb96fc..00000000000000 --- a/app/models/to_be_deleted_git_ref.rb +++ /dev/null @@ -1,47 +0,0 @@ -# frozen_string_literal: true - -class ToBeDeletedGitRef < ApplicationRecord - PARTITION_DURATION = 1.day - - include BulkInsertSafe - include PartitionedTable - include EachBatch - - self.table_name = 'p_batched_git_updates_deletions' - self.primary_key = :id - self.ignored_columns = %i[partition_id] - self.sequence_name = :to_be_deleted_git_refs_id_seq - - belongs_to :project, inverse_of: :to_be_deleted_git_refs - - scope :for_partition, -> (partition) { where(partition_id: partition) } - - partitioned_by :partition_id, strategy: :sliding_list, - next_partition_if: -> (active_partition) do - oldest_record_in_partition = ToBeDeletedGitRef - .select(:id, :created_at) - .for_partition(active_partition.value) - .order(:id) - .limit(1) - .take - - oldest_record_in_partition.present? && - oldest_record_in_partition.created_at < PARTITION_DURATION.ago - end, - detach_partition_if: -> (partition) do - !ToBeDeletedGitRef - .for_partition(partition.value) - .status_pending - .exists? - end - - enum status: { pending: 1, processed: 2 }, _prefix: :status - - def self.mark_records_processed(records) - # TODO: Look at app/models/loose_foreign_keys/deleted_record.rb which is grouping by partition when doing this - # update. This is presumably because we've not loaded `partition_id` and therefore this will be updating by `id` - # which needs to search all partitions. If we assume that there is likely normally just 1 partition and sometimes 2 - # partitions this may be fine though. - records.update_all(status: :processed) - end -end diff --git a/app/services/batched_git_updates/deleted_git_ref_cleanup_project_service.rb b/app/services/batched_git_updates/deleted_git_ref_cleanup_project_service.rb new file mode 100644 index 00000000000000..ef60a993e6a655 --- /dev/null +++ b/app/services/batched_git_updates/deleted_git_ref_cleanup_project_service.rb @@ -0,0 +1,22 @@ +# frozen_string_literal: true + +module BatchedGitUpdates + class DeletedGitRefCleanupProjectService + def initialize(project_id) + @project_id = project_id + end + + def execute + project = Project.find(@project_id) + ToBeDeletedGitRef.where(project_id: @project_id).each_batch do |batch| + batch.each do |to_be_deleted_git_ref| + ref = to_be_deleted_git_ref.ref + + project.repository.delete_refs(ref) + end + + ToBeDeletedGitRef.mark_records_processed(batch) + end + end + end +end diff --git a/app/services/batched_git_updates/deleted_git_ref_cleanup_scheduler_service.rb b/app/services/batched_git_updates/deleted_git_ref_cleanup_scheduler_service.rb new file mode 100644 index 00000000000000..4a80d6c3fd0628 --- /dev/null +++ b/app/services/batched_git_updates/deleted_git_ref_cleanup_scheduler_service.rb @@ -0,0 +1,14 @@ +# frozen_string_literal: true + +module BatchedGitUpdates + class DeletedGitRefCleanupSchedulerService + def execute + ToBeDeletedGitRef.status_pending.distinct_each_batch(column: :project_id, of: 1000) do |records| + records.each do |record| + project_id = record.project_id + DeletedGitRefCleanupProjectWorker.perform_async(project_id) + end + end + end + end +end diff --git a/app/services/deleted_git_ref_cleanup_project_service.rb b/app/services/deleted_git_ref_cleanup_project_service.rb deleted file mode 100644 index 5ef77354abacdb..00000000000000 --- a/app/services/deleted_git_ref_cleanup_project_service.rb +++ /dev/null @@ -1,20 +0,0 @@ -# frozen_string_literal: true - -class DeletedGitRefCleanupProjectService - def initialize(project_id) - @project_id = project_id - end - - def execute - project = Project.find(@project_id) - ToBeDeletedGitRef.where(project_id: @project_id).each_batch do |batch| - batch.each do |to_be_deleted_git_ref| - ref = to_be_deleted_git_ref.ref - - project.repository.delete_refs(ref) - end - - ToBeDeletedGitRef.mark_records_processed(batch) - end - end -end diff --git a/app/services/deleted_git_ref_cleanup_scheduler_service.rb b/app/services/deleted_git_ref_cleanup_scheduler_service.rb deleted file mode 100644 index 8bac854e98798c..00000000000000 --- a/app/services/deleted_git_ref_cleanup_scheduler_service.rb +++ /dev/null @@ -1,13 +0,0 @@ -# frozen_string_literal: true - -class DeletedGitRefCleanupSchedulerService - def execute - # TODO: What is an efficient way to get all unique project_id for pending refs from this table in batches? - ToBeDeletedGitRef.status_pending.distinct_each_batch(column: :project_id, of: 1000) do |records| - records.each do |record| - project_id = record.project_id - DeletedGitRefCleanupProjectWorker.perform_async(project_id) - end - end - end -end diff --git a/app/workers/batched_git_updates/deleted_git_ref_cleanup_project_worker.rb b/app/workers/batched_git_updates/deleted_git_ref_cleanup_project_worker.rb new file mode 100644 index 00000000000000..77b8f6da5d9c68 --- /dev/null +++ b/app/workers/batched_git_updates/deleted_git_ref_cleanup_project_worker.rb @@ -0,0 +1,13 @@ +# frozen_string_literal: true + +module BatchedGitUpdates + class DeletedGitRefCleanupProjectWorker + include ApplicationWorker + + feature_category :gitaly + + def perform(project_id) + DeletedGitRefCleanupProjectService.new(project_id).execute + end + end +end diff --git a/app/workers/batched_git_updates/deleted_git_ref_cleanup_scheduler_worker.rb b/app/workers/batched_git_updates/deleted_git_ref_cleanup_scheduler_worker.rb new file mode 100644 index 00000000000000..a4e1c8ddae210f --- /dev/null +++ b/app/workers/batched_git_updates/deleted_git_ref_cleanup_scheduler_worker.rb @@ -0,0 +1,14 @@ +# frozen_string_literal: true + +module BatchedGitUpdates + class DeletedGitRefCleanupSchedulerWorker + include ApplicationWorker + include CronjobQueue + + feature_category :gitaly + + def perform + DeletedGitRefCleanupSchedulerService.new.execute + end + end +end diff --git a/app/workers/deleted_git_ref_cleanup_project_worker.rb b/app/workers/deleted_git_ref_cleanup_project_worker.rb deleted file mode 100644 index 5bd633451f8d8f..00000000000000 --- a/app/workers/deleted_git_ref_cleanup_project_worker.rb +++ /dev/null @@ -1,11 +0,0 @@ -# frozen_string_literal: true - -class DeletedGitRefCleanupProjectWorker - include ApplicationWorker - - feature_category :gitaly - - def perform(project_id) - DeletedGitRefCleanupProjectService.new(project_id).execute - end -end diff --git a/app/workers/deleted_git_ref_cleanup_scheduler_worker.rb b/app/workers/deleted_git_ref_cleanup_scheduler_worker.rb deleted file mode 100644 index 7e6fb49e7efe2b..00000000000000 --- a/app/workers/deleted_git_ref_cleanup_scheduler_worker.rb +++ /dev/null @@ -1,12 +0,0 @@ -# frozen_string_literal: true - -class DeletedGitRefCleanupSchedulerWorker - include ApplicationWorker - include CronjobQueue - - feature_category :gitaly - - def perform - DeletedGitRefCleanupSchedulerService.new.execute - end -end diff --git a/config/initializers/postgres_partitioning.rb b/config/initializers/postgres_partitioning.rb index 2e41cc26296b66..1d4c82b7c7e34b 100644 --- a/config/initializers/postgres_partitioning.rb +++ b/config/initializers/postgres_partitioning.rb @@ -8,7 +8,7 @@ Gitlab::Database::BackgroundMigration::BatchedJobTransitionLog, Ci::RunnerManagerBuild, Ci::JobAnnotation, - ToBeDeletedGitRef + BatchedGitUpdates::ToBeDeletedGitRef ]) if Gitlab.ee? diff --git a/db/docs/p_batched_git_updates_deletions.yml b/db/docs/p_batched_git_updates_deletions.yml index 87f5fecace5272..f94a042716e86b 100644 --- a/db/docs/p_batched_git_updates_deletions.yml +++ b/db/docs/p_batched_git_updates_deletions.yml @@ -1,7 +1,7 @@ --- table_name: p_batched_git_updates_deletions classes: -- ToBeDeletedGitRef +- BatchedGitUpdates::ToBeDeletedGitRef feature_categories: - gitaly description: Acts as a queue for refs that need to be deleted in Gitaly. This allows diff --git a/db/migrate/20230704235236_add_constraint_to_to_be_deleted_git_refs_ref.rb b/db/migrate/20230704235236_add_constraint_to_to_be_deleted_git_refs_ref.rb index 8db76a9a7bc8ca..0841e1a2bf4af7 100644 --- a/db/migrate/20230704235236_add_constraint_to_to_be_deleted_git_refs_ref.rb +++ b/db/migrate/20230704235236_add_constraint_to_to_be_deleted_git_refs_ref.rb @@ -1,6 +1,5 @@ # frozen_string_literal: true - class AddConstraintToToBeDeletedGitRefsRef < Gitlab::Database::Migration[2.1] disable_ddl_transaction! diff --git a/lib/gitlab/git/repository.rb b/lib/gitlab/git/repository.rb index bdd8a2b62e8e79..d6b1d947cb05de 100644 --- a/lib/gitlab/git/repository.rb +++ b/lib/gitlab/git/repository.rb @@ -756,10 +756,10 @@ def async_delete_refs(*refs) raise "async_delete_refs only supports project repositories" unless container.is_a?(Project) records = refs.map do |ref| - ToBeDeletedGitRef.new(project_id: container.id, ref: ref, created_at: Time.current, updated_at: Time.current) + BatchedGitUpdates::ToBeDeletedGitRef.new(project_id: container.id, ref: ref, created_at: Time.current, updated_at: Time.current) end - ToBeDeletedGitRef.bulk_insert!(records) + BatchedGitUpdates::ToBeDeletedGitRef.bulk_insert!(records) end def delete_refs(...) diff --git a/spec/services/deleted_git_ref_cleanup_project_service.rb b/spec/services/batched_git_updates/deleted_git_ref_cleanup_project_service.rb similarity index 65% rename from spec/services/deleted_git_ref_cleanup_project_service.rb rename to spec/services/batched_git_updates/deleted_git_ref_cleanup_project_service.rb index 1039ca1d5e033f..dc4f1ede0ada43 100644 --- a/spec/services/deleted_git_ref_cleanup_project_service.rb +++ b/spec/services/batched_git_updates/deleted_git_ref_cleanup_project_service.rb @@ -2,13 +2,13 @@ require 'spec_helper' -RSpec.describe DeletedGitRefCleanupProjectService, feature_category: :gitaly do +RSpec.describe BatchedGitUpdates::DeletedGitRefCleanupProjectService, feature_category: :gitaly do let(:service) { described_class.new(project1.id) } let_it_be(:project1) { create(:project, :repository) } let_it_be(:project2) { create(:project, :repository) } - let!(:project1_ref1) { ToBeDeletedGitRef.create!(project_id: project1.id, ref: 'refs/test/project1-ref1') } - let!(:project1_ref2) { ToBeDeletedGitRef.create!(project_id: project1.id, ref: 'refs/test/project1-ref2') } - let!(:project2_ref1) { ToBeDeletedGitRef.create!(project_id: project1.id, ref: 'refs/test/project2-ref1') } + let!(:project1_ref1) { BatchedGitUpdates::ToBeDeletedGitRef.create!(project_id: project1.id, ref: 'refs/test/project1-ref1') } + let!(:project1_ref2) { BatchedGitUpdates::ToBeDeletedGitRef.create!(project_id: project1.id, ref: 'refs/test/project1-ref2') } + let!(:project2_ref1) { BatchedGitUpdates::ToBeDeletedGitRef.create!(project_id: project1.id, ref: 'refs/test/project2-ref1') } describe '#execute' do before do @@ -36,11 +36,11 @@ expect(test_refs(project2)).to include('refs/test/project2-ref1') end - it 'marks the processed ToBeDeletedGitRef as processed' do + it 'marks the processed BatchedGitUpdates::ToBeDeletedGitRef as processed' do service.execute - expect(ToBeDeletedGitRef.status_pending.map(&:ref)).to eq('test/refs/project2_ref1') - expect(ToBeDeletedGitRef.status_processed.map(&:ref)).to eq('test/refs/project1_ref1', 'test/refs/project1_ref2') + expect(BatchedGitUpdates::ToBeDeletedGitRef.status_pending.map(&:ref)).to eq('test/refs/project2_ref1') + expect(BatchedGitUpdates::ToBeDeletedGitRef.status_processed.map(&:ref)).to eq('test/refs/project1_ref1', 'test/refs/project1_ref2') end def test_refs(project) diff --git a/spec/services/batched_git_updates/deleted_git_ref_cleanup_scheduler_service_spec.rb b/spec/services/batched_git_updates/deleted_git_ref_cleanup_scheduler_service_spec.rb new file mode 100644 index 00000000000000..86ff037c8d15a4 --- /dev/null +++ b/spec/services/batched_git_updates/deleted_git_ref_cleanup_scheduler_service_spec.rb @@ -0,0 +1,32 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe BatchedGitUpdates::DeletedGitRefCleanupSchedulerService, feature_category: :gitaly do + let(:service) { described_class.new } + + describe '#execute' do + before do + BatchedGitUpdates::ToBeDeletedGitRef.create!(project_id: 123, ref: 'ref1') + BatchedGitUpdates::ToBeDeletedGitRef.create!(project_id: 123, ref: 'ref2') + BatchedGitUpdates::ToBeDeletedGitRef.create!(project_id: 456, ref: 'ref3') + BatchedGitUpdates::ToBeDeletedGitRef.create!(project_id: 789, ref: 'ref4', status: :processed) + end + + it 'schedules DeletedGitRefCleanupProjectWorker for each project in pending BatchedGitUpdates::ToBeDeletedGitRef' do + expect(BatchedGitUpdates::DeletedGitRefCleanupProjectWorker).to receive(:perform_async).with(123) + expect(BatchedGitUpdates::DeletedGitRefCleanupProjectWorker).to receive(:perform_async).with(456) + expect(BatchedGitUpdates::DeletedGitRefCleanupProjectWorker).not_to receive(:perform_async).with(789) + + service.execute + end + + it 'returns stats' do + # TODO: + end + + it 'acquires a lock to avoid running duplicate instances' do + # TODO: + end + end +end diff --git a/spec/services/deleted_git_ref_cleanup_scheduler_service_spec.rb b/spec/services/deleted_git_ref_cleanup_scheduler_service_spec.rb deleted file mode 100644 index 542583ade496a6..00000000000000 --- a/spec/services/deleted_git_ref_cleanup_scheduler_service_spec.rb +++ /dev/null @@ -1,32 +0,0 @@ -# frozen_string_literal: true - -require 'spec_helper' - -RSpec.describe DeletedGitRefCleanupSchedulerService, feature_category: :gitaly do - let(:service) { described_class.new } - - describe '#execute' do - before do - ToBeDeletedGitRef.create!(project_id: 123, ref: 'ref1') - ToBeDeletedGitRef.create!(project_id: 123, ref: 'ref2') - ToBeDeletedGitRef.create!(project_id: 456, ref: 'ref3') - ToBeDeletedGitRef.create!(project_id: 789, ref: 'ref4', status: :processed) - end - - it 'schedules DeletedGitRefCleanupProjectWorker for each project in pending ToBeDeletedGitRef' do - expect(DeletedGitRefCleanupProjectWorker).to receive(:perform_async).with(123) - expect(DeletedGitRefCleanupProjectWorker).to receive(:perform_async).with(456) - expect(DeletedGitRefCleanupProjectWorker).not_to receive(:perform_async).with(789) - - service.execute - end - - it 'returns stats' do - # TODO: - end - - it 'acquires a lock to avoid running duplicate instances' do - # TODO: - end - end -end diff --git a/spec/workers/deleted_git_ref_cleanup_project_worker.rb b/spec/workers/batched_git_updates/deleted_git_ref_cleanup_project_worker.rb similarity index 83% rename from spec/workers/deleted_git_ref_cleanup_project_worker.rb rename to spec/workers/batched_git_updates/deleted_git_ref_cleanup_project_worker.rb index e18299641d1114..cdd3b72f490de2 100644 --- a/spec/workers/deleted_git_ref_cleanup_project_worker.rb +++ b/spec/workers/batched_git_updates/deleted_git_ref_cleanup_project_worker.rb @@ -1,6 +1,6 @@ require 'spec_helper' -RSpec.describe DeletedGitRefCleanupProjectWorker, feature_category: :gitaly do +RSpec.describe BatchedGitUpdates::DeletedGitRefCleanupProjectWorker, feature_category: :gitaly do let(:service) { instance_double(DeletedGitRefCleanupProjectService) } let(:worker) { described_class.new } diff --git a/spec/workers/deleted_git_ref_cleanup_scheduler_worker_spec.rb b/spec/workers/batched_git_updates/deleted_git_ref_cleanup_scheduler_worker_spec.rb similarity index 51% rename from spec/workers/deleted_git_ref_cleanup_scheduler_worker_spec.rb rename to spec/workers/batched_git_updates/deleted_git_ref_cleanup_scheduler_worker_spec.rb index 30b4ad07b6604c..04d2130a9542d4 100644 --- a/spec/workers/deleted_git_ref_cleanup_scheduler_worker_spec.rb +++ b/spec/workers/batched_git_updates/deleted_git_ref_cleanup_scheduler_worker_spec.rb @@ -1,12 +1,12 @@ require 'spec_helper' -RSpec.describe DeletedGitRefCleanupSchedulerWorker, feature_category: :gitaly do - let(:service) { instance_double(DeletedGitRefCleanupSchedulerService) } +RSpec.describe BatchedGitUpdates::DeletedGitRefCleanupSchedulerWorker, feature_category: :gitaly do + let(:service) { instance_double(BatchedGitUpdates::DeletedGitRefCleanupSchedulerService) } let(:worker) { described_class.new } describe '#perform' do it 'delegates to DeletedGitRefCleanupSchedulerService' do - expect(DeletedGitRefCleanupSchedulerService).to receive(:new).and_return(service) + expect(BatchedGitUpdates::DeletedGitRefCleanupSchedulerService).to receive(:new).and_return(service) expect(service).to receive(:execute) worker.perform -- GitLab From 12ffd4f2661e4d12f70a32c9ffdf80eee125d42f Mon Sep 17 00:00:00 2001 From: Dylan Griffith Date: Mon, 10 Jul 2023 16:33:03 +1000 Subject: [PATCH 10/38] Namespace under BatchedGitRefUpdates and model Deletion --- .../deletion.rb} | 10 ++--- app/models/project.rb | 2 +- ...deleted_git_ref_cleanup_project_service.rb | 6 +-- ...leted_git_ref_cleanup_scheduler_service.rb | 4 +- .../deleted_git_ref_cleanup_project_worker.rb | 2 +- ...eleted_git_ref_cleanup_scheduler_worker.rb | 2 +- config/initializers/postgres_partitioning.rb | 2 +- ...> p_batched_git_ref_updates_deletions.yml} | 4 +- ...able_batched_git_ref_updates_deletions.rb} | 12 +++--- ...t_to_batched_git_ref_updates_deletions.rb} | 6 +-- db/structure.sql | 40 +++++++++---------- lib/gitlab/git/repository.rb | 4 +- ...deleted_git_ref_cleanup_project_service.rb | 14 +++---- ..._git_ref_cleanup_scheduler_service_spec.rb | 32 +++++++++++++++ ..._git_ref_cleanup_scheduler_service_spec.rb | 32 --------------- .../deleted_git_ref_cleanup_project_worker.rb | 2 +- ...d_git_ref_cleanup_scheduler_worker_spec.rb | 6 +-- 17 files changed, 90 insertions(+), 90 deletions(-) rename app/models/{batched_git_updates/to_be_deleted_git_ref.rb => batched_git_ref_updates/deletion.rb} (87%) rename app/services/{batched_git_updates => batched_git_ref_updates}/deleted_git_ref_cleanup_project_service.rb (70%) rename app/services/{batched_git_updates => batched_git_ref_updates}/deleted_git_ref_cleanup_scheduler_service.rb (68%) rename app/workers/{batched_git_updates => batched_git_ref_updates}/deleted_git_ref_cleanup_project_worker.rb (89%) rename app/workers/{batched_git_updates => batched_git_ref_updates}/deleted_git_ref_cleanup_scheduler_worker.rb (89%) rename db/docs/{p_batched_git_updates_deletions.yml => p_batched_git_ref_updates_deletions.yml} (79%) rename db/migrate/{20230704233431_create_table_to_be_deleted_git_refs.rb => 20230704233431_create_table_batched_git_ref_updates_deletions.rb} (71%) rename db/migrate/{20230704235236_add_constraint_to_to_be_deleted_git_refs_ref.rb => 20230704235236_add_constraint_to_batched_git_ref_updates_deletions.rb} (57%) rename spec/services/{batched_git_updates => batched_git_ref_updates}/deleted_git_ref_cleanup_project_service.rb (65%) create mode 100644 spec/services/batched_git_ref_updates/deleted_git_ref_cleanup_scheduler_service_spec.rb delete mode 100644 spec/services/batched_git_updates/deleted_git_ref_cleanup_scheduler_service_spec.rb rename spec/workers/{batched_git_updates => batched_git_ref_updates}/deleted_git_ref_cleanup_project_worker.rb (82%) rename spec/workers/{batched_git_updates => batched_git_ref_updates}/deleted_git_ref_cleanup_scheduler_worker_spec.rb (50%) diff --git a/app/models/batched_git_updates/to_be_deleted_git_ref.rb b/app/models/batched_git_ref_updates/deletion.rb similarity index 87% rename from app/models/batched_git_updates/to_be_deleted_git_ref.rb rename to app/models/batched_git_ref_updates/deletion.rb index b6eb97bfbc5d5b..3f9800d15019ca 100644 --- a/app/models/batched_git_updates/to_be_deleted_git_ref.rb +++ b/app/models/batched_git_ref_updates/deletion.rb @@ -1,14 +1,14 @@ # frozen_string_literal: true -module BatchedGitUpdates - class ToBeDeletedGitRef < ApplicationRecord +module BatchedGitRefUpdates + class Deletion < ApplicationRecord PARTITION_DURATION = 1.day include BulkInsertSafe include PartitionedTable include EachBatch - self.table_name = 'p_batched_git_updates_deletions' + self.table_name = 'p_batched_git_ref_updates_deletions' self.primary_key = :id self.ignored_columns = %i[partition_id] self.sequence_name = :to_be_deleted_git_refs_id_seq @@ -19,7 +19,7 @@ class ToBeDeletedGitRef < ApplicationRecord partitioned_by :partition_id, strategy: :sliding_list, next_partition_if: -> (active_partition) do - oldest_record_in_partition = ToBeDeletedGitRef + oldest_record_in_partition = Deletion .select(:id, :created_at) .for_partition(active_partition.value) .order(:id) @@ -30,7 +30,7 @@ class ToBeDeletedGitRef < ApplicationRecord oldest_record_in_partition.created_at < PARTITION_DURATION.ago end, detach_partition_if: -> (partition) do - !ToBeDeletedGitRef + !Deletion .for_partition(partition.value) .status_pending .exists? diff --git a/app/models/project.rb b/app/models/project.rb index 3acb554055bc91..d618e7c825fb23 100644 --- a/app/models/project.rb +++ b/app/models/project.rb @@ -456,7 +456,7 @@ def self.integration_association_name(name) has_one :ci_project_mirror, class_name: 'Ci::ProjectMirror' has_many :sync_events, class_name: 'Projects::SyncEvent' - has_many :to_be_deleted_git_refs, inverse_of: :project + has_many :batched_git_ref_updates_deletions, inverse_of: :project has_one :build_artifacts_size_refresh, class_name: 'Projects::BuildArtifactsSizeRefresh' diff --git a/app/services/batched_git_updates/deleted_git_ref_cleanup_project_service.rb b/app/services/batched_git_ref_updates/deleted_git_ref_cleanup_project_service.rb similarity index 70% rename from app/services/batched_git_updates/deleted_git_ref_cleanup_project_service.rb rename to app/services/batched_git_ref_updates/deleted_git_ref_cleanup_project_service.rb index ef60a993e6a655..f86f0e510831d0 100644 --- a/app/services/batched_git_updates/deleted_git_ref_cleanup_project_service.rb +++ b/app/services/batched_git_ref_updates/deleted_git_ref_cleanup_project_service.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true -module BatchedGitUpdates +module BatchedGitRefUpdates class DeletedGitRefCleanupProjectService def initialize(project_id) @project_id = project_id @@ -8,14 +8,14 @@ def initialize(project_id) def execute project = Project.find(@project_id) - ToBeDeletedGitRef.where(project_id: @project_id).each_batch do |batch| + Deletion.where(project_id: @project_id).each_batch do |batch| batch.each do |to_be_deleted_git_ref| ref = to_be_deleted_git_ref.ref project.repository.delete_refs(ref) end - ToBeDeletedGitRef.mark_records_processed(batch) + Deletion.mark_records_processed(batch) end end end diff --git a/app/services/batched_git_updates/deleted_git_ref_cleanup_scheduler_service.rb b/app/services/batched_git_ref_updates/deleted_git_ref_cleanup_scheduler_service.rb similarity index 68% rename from app/services/batched_git_updates/deleted_git_ref_cleanup_scheduler_service.rb rename to app/services/batched_git_ref_updates/deleted_git_ref_cleanup_scheduler_service.rb index 4a80d6c3fd0628..11d880c9f3068f 100644 --- a/app/services/batched_git_updates/deleted_git_ref_cleanup_scheduler_service.rb +++ b/app/services/batched_git_ref_updates/deleted_git_ref_cleanup_scheduler_service.rb @@ -1,9 +1,9 @@ # frozen_string_literal: true -module BatchedGitUpdates +module BatchedGitRefUpdates class DeletedGitRefCleanupSchedulerService def execute - ToBeDeletedGitRef.status_pending.distinct_each_batch(column: :project_id, of: 1000) do |records| + Deletion.status_pending.distinct_each_batch(column: :project_id, of: 1000) do |records| records.each do |record| project_id = record.project_id DeletedGitRefCleanupProjectWorker.perform_async(project_id) diff --git a/app/workers/batched_git_updates/deleted_git_ref_cleanup_project_worker.rb b/app/workers/batched_git_ref_updates/deleted_git_ref_cleanup_project_worker.rb similarity index 89% rename from app/workers/batched_git_updates/deleted_git_ref_cleanup_project_worker.rb rename to app/workers/batched_git_ref_updates/deleted_git_ref_cleanup_project_worker.rb index 77b8f6da5d9c68..539c39cf3d8712 100644 --- a/app/workers/batched_git_updates/deleted_git_ref_cleanup_project_worker.rb +++ b/app/workers/batched_git_ref_updates/deleted_git_ref_cleanup_project_worker.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true -module BatchedGitUpdates +module BatchedGitRefUpdates class DeletedGitRefCleanupProjectWorker include ApplicationWorker diff --git a/app/workers/batched_git_updates/deleted_git_ref_cleanup_scheduler_worker.rb b/app/workers/batched_git_ref_updates/deleted_git_ref_cleanup_scheduler_worker.rb similarity index 89% rename from app/workers/batched_git_updates/deleted_git_ref_cleanup_scheduler_worker.rb rename to app/workers/batched_git_ref_updates/deleted_git_ref_cleanup_scheduler_worker.rb index a4e1c8ddae210f..637e404f56c622 100644 --- a/app/workers/batched_git_updates/deleted_git_ref_cleanup_scheduler_worker.rb +++ b/app/workers/batched_git_ref_updates/deleted_git_ref_cleanup_scheduler_worker.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true -module BatchedGitUpdates +module BatchedGitRefUpdates class DeletedGitRefCleanupSchedulerWorker include ApplicationWorker include CronjobQueue diff --git a/config/initializers/postgres_partitioning.rb b/config/initializers/postgres_partitioning.rb index 1d4c82b7c7e34b..bfd737baec984a 100644 --- a/config/initializers/postgres_partitioning.rb +++ b/config/initializers/postgres_partitioning.rb @@ -8,7 +8,7 @@ Gitlab::Database::BackgroundMigration::BatchedJobTransitionLog, Ci::RunnerManagerBuild, Ci::JobAnnotation, - BatchedGitUpdates::ToBeDeletedGitRef + BatchedGitRefUpdates::Deletion ]) if Gitlab.ee? diff --git a/db/docs/p_batched_git_updates_deletions.yml b/db/docs/p_batched_git_ref_updates_deletions.yml similarity index 79% rename from db/docs/p_batched_git_updates_deletions.yml rename to db/docs/p_batched_git_ref_updates_deletions.yml index f94a042716e86b..9e8c1b258069d1 100644 --- a/db/docs/p_batched_git_updates_deletions.yml +++ b/db/docs/p_batched_git_ref_updates_deletions.yml @@ -1,7 +1,7 @@ --- -table_name: p_batched_git_updates_deletions +table_name: p_batched_git_ref_updates_deletions classes: -- BatchedGitUpdates::ToBeDeletedGitRef +- BatchedGitRefUpdates::Deletion feature_categories: - gitaly description: Acts as a queue for refs that need to be deleted in Gitaly. This allows diff --git a/db/migrate/20230704233431_create_table_to_be_deleted_git_refs.rb b/db/migrate/20230704233431_create_table_batched_git_ref_updates_deletions.rb similarity index 71% rename from db/migrate/20230704233431_create_table_to_be_deleted_git_refs.rb rename to db/migrate/20230704233431_create_table_batched_git_ref_updates_deletions.rb index 3f418da3bdafc7..e372460eb13ffd 100644 --- a/db/migrate/20230704233431_create_table_to_be_deleted_git_refs.rb +++ b/db/migrate/20230704233431_create_table_batched_git_ref_updates_deletions.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true -class CreateTableToBeDeletedGitRefs < Gitlab::Database::Migration[2.1] +class CreateTableDeletions < Gitlab::Database::Migration[2.1] enable_lock_retries! def up @@ -10,7 +10,7 @@ def up if_not_exists: true } - create_table(:p_batched_git_updates_deletions, **options) do |t| + create_table(:p_batched_git_ref_updates_deletions, **options) do |t| t.bigserial :id, null: false # TODO: Is it worth making this generic. Repository model uses `#container` so we could support (presumably) group # wikis and so-on here. Does it matter? Or should we just build that later since our main focus on performance is @@ -25,17 +25,17 @@ def up end connection.execute(<<~SQL) - CREATE TABLE IF NOT EXISTS gitlab_partitions_dynamic.to_be_deleted_git_refs_1 - PARTITION OF p_batched_git_updates_deletions + CREATE TABLE IF NOT EXISTS gitlab_partitions_dynamic.batched_git_ref_updates_deletions_1 + PARTITION OF p_batched_git_ref_updates_deletions FOR VALUES IN (1); SQL end def down connection.execute(<<~SQL) - DROP TABLE IF EXISTS gitlab_partitions_dynamic.to_be_deleted_git_refs_1; + DROP TABLE IF EXISTS gitlab_partitions_dynamic.batched_git_ref_updates_deletions_1; SQL - drop_table :p_batched_git_updates_deletions + drop_table :p_batched_git_ref_updates_deletions end end diff --git a/db/migrate/20230704235236_add_constraint_to_to_be_deleted_git_refs_ref.rb b/db/migrate/20230704235236_add_constraint_to_batched_git_ref_updates_deletions.rb similarity index 57% rename from db/migrate/20230704235236_add_constraint_to_to_be_deleted_git_refs_ref.rb rename to db/migrate/20230704235236_add_constraint_to_batched_git_ref_updates_deletions.rb index 0841e1a2bf4af7..dd8bd9c8b80a3c 100644 --- a/db/migrate/20230704235236_add_constraint_to_to_be_deleted_git_refs_ref.rb +++ b/db/migrate/20230704235236_add_constraint_to_batched_git_ref_updates_deletions.rb @@ -1,14 +1,14 @@ # frozen_string_literal: true -class AddConstraintToToBeDeletedGitRefsRef < Gitlab::Database::Migration[2.1] +class AddConstraintToDeletionsRef < Gitlab::Database::Migration[2.1] disable_ddl_transaction! def up # TODO: Is there a reliable upper limit here? If something exceeds this upper limit we won't be able to delete it. Should we make it very large or just not have a limit? - add_text_limit :p_batched_git_updates_deletions, :ref, 1024 + add_text_limit :p_batched_git_ref_updates_deletions, :ref, 1024 end def down - remove_text_limit :p_batched_git_updates_deletions, :ref + remove_text_limit :p_batched_git_ref_updates_deletions, :ref end end diff --git a/db/structure.sql b/db/structure.sql index 00f0500fb59900..439ddaa709fbb2 100644 --- a/db/structure.sql +++ b/db/structure.sql @@ -554,18 +554,6 @@ CREATE TABLE security_findings ( ) PARTITION BY LIST (partition_number); -CREATE TABLE p_batched_git_updates_deletions ( - id bigint NOT NULL, - project_id bigint NOT NULL, - ref text NOT NULL, - partition_id bigint DEFAULT 1 NOT NULL, - status smallint DEFAULT 1 NOT NULL, - created_at timestamp(6) without time zone NOT NULL, - updated_at timestamp(6) without time zone NOT NULL, - CONSTRAINT check_fb9a8cab2b CHECK ((char_length(ref) <= 1024)) -) -PARTITION BY LIST (partition_id); - CREATE TABLE value_stream_dashboard_counts ( id bigint NOT NULL, namespace_id bigint NOT NULL, @@ -19449,14 +19437,26 @@ CREATE SEQUENCE organizations_id_seq ALTER SEQUENCE organizations_id_seq OWNED BY organizations.id; -CREATE SEQUENCE p_batched_git_updates_deletions_id_seq +CREATE TABLE p_batched_git_ref_updates_deletions ( + id bigint NOT NULL, + project_id bigint NOT NULL, + ref text NOT NULL, + partition_id bigint DEFAULT 1 NOT NULL, + status smallint DEFAULT 1 NOT NULL, + created_at timestamp(6) without time zone NOT NULL, + updated_at timestamp(6) without time zone NOT NULL, + CONSTRAINT check_fb9a8cab2b CHECK ((char_length(ref) <= 1024)) +) +PARTITION BY LIST (partition_id); + +CREATE SEQUENCE p_batched_git_ref_updates_deletions_id_seq START WITH 1 INCREMENT BY 1 NO MINVALUE NO MAXVALUE CACHE 1; -ALTER SEQUENCE p_batched_git_updates_deletions_id_seq OWNED BY p_batched_git_updates_deletions.id; +ALTER SEQUENCE p_batched_git_ref_updates_deletions_id_seq OWNED BY p_batched_git_ref_updates_deletions.id; CREATE SEQUENCE p_ci_job_annotations_id_seq START WITH 1 @@ -25783,7 +25783,7 @@ ALTER TABLE ONLY organization_users ALTER COLUMN id SET DEFAULT nextval('organiz ALTER TABLE ONLY organizations ALTER COLUMN id SET DEFAULT nextval('organizations_id_seq'::regclass); -ALTER TABLE ONLY p_batched_git_updates_deletions ALTER COLUMN id SET DEFAULT nextval('p_batched_git_updates_deletions_id_seq'::regclass); +ALTER TABLE ONLY p_batched_git_ref_updates_deletions ALTER COLUMN id SET DEFAULT nextval('p_batched_git_ref_updates_deletions_id_seq'::regclass); ALTER TABLE ONLY p_ci_builds ALTER COLUMN id SET DEFAULT nextval('ci_builds_id_seq'::regclass); @@ -28068,8 +28068,8 @@ ALTER TABLE ONLY organization_users ALTER TABLE ONLY organizations ADD CONSTRAINT organizations_pkey PRIMARY KEY (id); -ALTER TABLE ONLY p_batched_git_updates_deletions - ADD CONSTRAINT p_batched_git_updates_deletions_pkey PRIMARY KEY (id, partition_id); +ALTER TABLE ONLY p_batched_git_ref_updates_deletions + ADD CONSTRAINT p_batched_git_ref_updates_deletions_pkey PRIMARY KEY (id, partition_id); ALTER TABLE ONLY p_ci_job_annotations ADD CONSTRAINT p_ci_job_annotations_pkey PRIMARY KEY (id, partition_id); @@ -32356,11 +32356,11 @@ CREATE INDEX index_organization_users_on_user_id ON organization_users USING btr CREATE UNIQUE INDEX index_organizations_on_unique_name_per_group ON customer_relations_organizations USING btree (group_id, lower(name), id); -CREATE INDEX index_p_batched_git_updates_deletions_on_partition_id ON ONLY p_batched_git_updates_deletions USING btree (partition_id); +CREATE INDEX index_p_batched_git_ref_updates_deletions_on_partition_id ON ONLY p_batched_git_ref_updates_deletions USING btree (partition_id); -CREATE INDEX index_p_batched_git_updates_deletions_on_project_id ON ONLY p_batched_git_updates_deletions USING btree (project_id); +CREATE INDEX index_p_batched_git_ref_updates_deletions_on_project_id ON ONLY p_batched_git_ref_updates_deletions USING btree (project_id); -CREATE INDEX index_p_batched_git_updates_deletions_on_status ON ONLY p_batched_git_updates_deletions USING btree (status); +CREATE INDEX index_p_batched_git_ref_updates_deletions_on_status ON ONLY p_batched_git_ref_updates_deletions USING btree (status); CREATE UNIQUE INDEX index_p_ci_job_annotations_on_partition_id_job_id_name ON ONLY p_ci_job_annotations USING btree (partition_id, job_id, name); diff --git a/lib/gitlab/git/repository.rb b/lib/gitlab/git/repository.rb index d6b1d947cb05de..7c802112558cfb 100644 --- a/lib/gitlab/git/repository.rb +++ b/lib/gitlab/git/repository.rb @@ -756,10 +756,10 @@ def async_delete_refs(*refs) raise "async_delete_refs only supports project repositories" unless container.is_a?(Project) records = refs.map do |ref| - BatchedGitUpdates::ToBeDeletedGitRef.new(project_id: container.id, ref: ref, created_at: Time.current, updated_at: Time.current) + BatchedGitRefUpdates::Deletion.new(project_id: container.id, ref: ref, created_at: Time.current, updated_at: Time.current) end - BatchedGitUpdates::ToBeDeletedGitRef.bulk_insert!(records) + BatchedGitRefUpdates::Deletion.bulk_insert!(records) end def delete_refs(...) diff --git a/spec/services/batched_git_updates/deleted_git_ref_cleanup_project_service.rb b/spec/services/batched_git_ref_updates/deleted_git_ref_cleanup_project_service.rb similarity index 65% rename from spec/services/batched_git_updates/deleted_git_ref_cleanup_project_service.rb rename to spec/services/batched_git_ref_updates/deleted_git_ref_cleanup_project_service.rb index dc4f1ede0ada43..2b43d540c5a047 100644 --- a/spec/services/batched_git_updates/deleted_git_ref_cleanup_project_service.rb +++ b/spec/services/batched_git_ref_updates/deleted_git_ref_cleanup_project_service.rb @@ -2,13 +2,13 @@ require 'spec_helper' -RSpec.describe BatchedGitUpdates::DeletedGitRefCleanupProjectService, feature_category: :gitaly do +RSpec.describe BatchedGitRefUpdates::DeletedGitRefCleanupProjectService, feature_category: :gitaly do let(:service) { described_class.new(project1.id) } let_it_be(:project1) { create(:project, :repository) } let_it_be(:project2) { create(:project, :repository) } - let!(:project1_ref1) { BatchedGitUpdates::ToBeDeletedGitRef.create!(project_id: project1.id, ref: 'refs/test/project1-ref1') } - let!(:project1_ref2) { BatchedGitUpdates::ToBeDeletedGitRef.create!(project_id: project1.id, ref: 'refs/test/project1-ref2') } - let!(:project2_ref1) { BatchedGitUpdates::ToBeDeletedGitRef.create!(project_id: project1.id, ref: 'refs/test/project2-ref1') } + let!(:project1_ref1) { BatchedGitRefUpdates::Deletion.create!(project_id: project1.id, ref: 'refs/test/project1-ref1') } + let!(:project1_ref2) { BatchedGitRefUpdates::Deletion.create!(project_id: project1.id, ref: 'refs/test/project1-ref2') } + let!(:project2_ref1) { BatchedGitRefUpdates::Deletion.create!(project_id: project1.id, ref: 'refs/test/project2-ref1') } describe '#execute' do before do @@ -36,11 +36,11 @@ expect(test_refs(project2)).to include('refs/test/project2-ref1') end - it 'marks the processed BatchedGitUpdates::ToBeDeletedGitRef as processed' do + it 'marks the processed BatchedGitRefUpdates::Deletion as processed' do service.execute - expect(BatchedGitUpdates::ToBeDeletedGitRef.status_pending.map(&:ref)).to eq('test/refs/project2_ref1') - expect(BatchedGitUpdates::ToBeDeletedGitRef.status_processed.map(&:ref)).to eq('test/refs/project1_ref1', 'test/refs/project1_ref2') + expect(BatchedGitRefUpdates::Deletion.status_pending.map(&:ref)).to eq('test/refs/project2_ref1') + expect(BatchedGitRefUpdates::Deletion.status_processed.map(&:ref)).to eq('test/refs/project1_ref1', 'test/refs/project1_ref2') end def test_refs(project) diff --git a/spec/services/batched_git_ref_updates/deleted_git_ref_cleanup_scheduler_service_spec.rb b/spec/services/batched_git_ref_updates/deleted_git_ref_cleanup_scheduler_service_spec.rb new file mode 100644 index 00000000000000..49cc577ba4f440 --- /dev/null +++ b/spec/services/batched_git_ref_updates/deleted_git_ref_cleanup_scheduler_service_spec.rb @@ -0,0 +1,32 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe BatchedGitRefUpdates::DeletedGitRefCleanupSchedulerService, feature_category: :gitaly do + let(:service) { described_class.new } + + describe '#execute' do + before do + BatchedGitRefUpdates::Deletion.create!(project_id: 123, ref: 'ref1') + BatchedGitRefUpdates::Deletion.create!(project_id: 123, ref: 'ref2') + BatchedGitRefUpdates::Deletion.create!(project_id: 456, ref: 'ref3') + BatchedGitRefUpdates::Deletion.create!(project_id: 789, ref: 'ref4', status: :processed) + end + + it 'schedules DeletedGitRefCleanupProjectWorker for each project in pending BatchedGitRefUpdates::Deletion' do + expect(BatchedGitRefUpdates::DeletedGitRefCleanupProjectWorker).to receive(:perform_async).with(123) + expect(BatchedGitRefUpdates::DeletedGitRefCleanupProjectWorker).to receive(:perform_async).with(456) + expect(BatchedGitRefUpdates::DeletedGitRefCleanupProjectWorker).not_to receive(:perform_async).with(789) + + service.execute + end + + it 'returns stats' do + # TODO: + end + + it 'acquires a lock to avoid running duplicate instances' do + # TODO: + end + end +end diff --git a/spec/services/batched_git_updates/deleted_git_ref_cleanup_scheduler_service_spec.rb b/spec/services/batched_git_updates/deleted_git_ref_cleanup_scheduler_service_spec.rb deleted file mode 100644 index 86ff037c8d15a4..00000000000000 --- a/spec/services/batched_git_updates/deleted_git_ref_cleanup_scheduler_service_spec.rb +++ /dev/null @@ -1,32 +0,0 @@ -# frozen_string_literal: true - -require 'spec_helper' - -RSpec.describe BatchedGitUpdates::DeletedGitRefCleanupSchedulerService, feature_category: :gitaly do - let(:service) { described_class.new } - - describe '#execute' do - before do - BatchedGitUpdates::ToBeDeletedGitRef.create!(project_id: 123, ref: 'ref1') - BatchedGitUpdates::ToBeDeletedGitRef.create!(project_id: 123, ref: 'ref2') - BatchedGitUpdates::ToBeDeletedGitRef.create!(project_id: 456, ref: 'ref3') - BatchedGitUpdates::ToBeDeletedGitRef.create!(project_id: 789, ref: 'ref4', status: :processed) - end - - it 'schedules DeletedGitRefCleanupProjectWorker for each project in pending BatchedGitUpdates::ToBeDeletedGitRef' do - expect(BatchedGitUpdates::DeletedGitRefCleanupProjectWorker).to receive(:perform_async).with(123) - expect(BatchedGitUpdates::DeletedGitRefCleanupProjectWorker).to receive(:perform_async).with(456) - expect(BatchedGitUpdates::DeletedGitRefCleanupProjectWorker).not_to receive(:perform_async).with(789) - - service.execute - end - - it 'returns stats' do - # TODO: - end - - it 'acquires a lock to avoid running duplicate instances' do - # TODO: - end - end -end diff --git a/spec/workers/batched_git_updates/deleted_git_ref_cleanup_project_worker.rb b/spec/workers/batched_git_ref_updates/deleted_git_ref_cleanup_project_worker.rb similarity index 82% rename from spec/workers/batched_git_updates/deleted_git_ref_cleanup_project_worker.rb rename to spec/workers/batched_git_ref_updates/deleted_git_ref_cleanup_project_worker.rb index cdd3b72f490de2..b1772951e74597 100644 --- a/spec/workers/batched_git_updates/deleted_git_ref_cleanup_project_worker.rb +++ b/spec/workers/batched_git_ref_updates/deleted_git_ref_cleanup_project_worker.rb @@ -1,6 +1,6 @@ require 'spec_helper' -RSpec.describe BatchedGitUpdates::DeletedGitRefCleanupProjectWorker, feature_category: :gitaly do +RSpec.describe BatchedGitRefUpdates::DeletedGitRefCleanupProjectWorker, feature_category: :gitaly do let(:service) { instance_double(DeletedGitRefCleanupProjectService) } let(:worker) { described_class.new } diff --git a/spec/workers/batched_git_updates/deleted_git_ref_cleanup_scheduler_worker_spec.rb b/spec/workers/batched_git_ref_updates/deleted_git_ref_cleanup_scheduler_worker_spec.rb similarity index 50% rename from spec/workers/batched_git_updates/deleted_git_ref_cleanup_scheduler_worker_spec.rb rename to spec/workers/batched_git_ref_updates/deleted_git_ref_cleanup_scheduler_worker_spec.rb index 04d2130a9542d4..5f8d8128328433 100644 --- a/spec/workers/batched_git_updates/deleted_git_ref_cleanup_scheduler_worker_spec.rb +++ b/spec/workers/batched_git_ref_updates/deleted_git_ref_cleanup_scheduler_worker_spec.rb @@ -1,12 +1,12 @@ require 'spec_helper' -RSpec.describe BatchedGitUpdates::DeletedGitRefCleanupSchedulerWorker, feature_category: :gitaly do - let(:service) { instance_double(BatchedGitUpdates::DeletedGitRefCleanupSchedulerService) } +RSpec.describe BatchedGitRefUpdates::DeletedGitRefCleanupSchedulerWorker, feature_category: :gitaly do + let(:service) { instance_double(BatchedGitRefUpdates::DeletedGitRefCleanupSchedulerService) } let(:worker) { described_class.new } describe '#perform' do it 'delegates to DeletedGitRefCleanupSchedulerService' do - expect(BatchedGitUpdates::DeletedGitRefCleanupSchedulerService).to receive(:new).and_return(service) + expect(BatchedGitRefUpdates::DeletedGitRefCleanupSchedulerService).to receive(:new).and_return(service) expect(service).to receive(:execute) worker.perform -- GitLab From a437cd7b355513542d3cc9623476dc378b629861 Mon Sep 17 00:00:00 2001 From: Dylan Griffith Date: Tue, 11 Jul 2023 14:04:58 +1000 Subject: [PATCH 11/38] Fix up DeletedGitRefCleanupProjectService and spec --- .../batched_git_ref_updates/deletion.rb | 28 +++++---- ...deleted_git_ref_cleanup_project_service.rb | 27 ++++++--- ...leted_git_ref_cleanup_scheduler_service.rb | 1 + ...d_git_ref_cleanup_project_service_spec.rb} | 58 ++++++++++++++----- 4 files changed, 82 insertions(+), 32 deletions(-) rename spec/services/batched_git_ref_updates/{deleted_git_ref_cleanup_project_service.rb => deleted_git_ref_cleanup_project_service_spec.rb} (53%) diff --git a/app/models/batched_git_ref_updates/deletion.rb b/app/models/batched_git_ref_updates/deletion.rb index 3f9800d15019ca..f61ff961d4287e 100644 --- a/app/models/batched_git_ref_updates/deletion.rb +++ b/app/models/batched_git_ref_updates/deletion.rb @@ -4,21 +4,25 @@ module BatchedGitRefUpdates class Deletion < ApplicationRecord PARTITION_DURATION = 1.day + include IgnorableColumns include BulkInsertSafe include PartitionedTable include EachBatch self.table_name = 'p_batched_git_ref_updates_deletions' self.primary_key = :id - self.ignored_columns = %i[partition_id] self.sequence_name = :to_be_deleted_git_refs_id_seq + ignore_column :partition_id, remove_with: '16.4', remove_after: '2023-09-22' + belongs_to :project, inverse_of: :to_be_deleted_git_refs - scope :for_partition, -> (partition) { where(partition_id: partition) } + scope :for_partition, ->(partition) { where(partition_id: partition) } + scope :pluck_ref, -> { pluck(:ref) } + scope :for_project, ->(project_id) { where(project_id: project_id) } partitioned_by :partition_id, strategy: :sliding_list, - next_partition_if: -> (active_partition) do + next_partition_if: ->(active_partition) do oldest_record_in_partition = Deletion .select(:id, :created_at) .for_partition(active_partition.value) @@ -29,21 +33,21 @@ class Deletion < ApplicationRecord oldest_record_in_partition.present? && oldest_record_in_partition.created_at < PARTITION_DURATION.ago end, - detach_partition_if: -> (partition) do + detach_partition_if: ->(partition) do !Deletion .for_partition(partition.value) .status_pending .exists? end - enum status: { pending: 1, processed: 2 }, _prefix: :status + enum status: { pending: 1, processed: 2 }, _prefix: :status - def self.mark_records_processed(records) - # TODO: Look at app/models/loose_foreign_keys/deleted_record.rb which is grouping by partition when doing this - # update. This is presumably because we've not loaded `partition_id` and therefore this will be updating by `id` - # which needs to search all partitions. If we assume that there is likely normally just 1 partition and sometimes 2 - # partitions this may be fine though. - records.update_all(status: :processed) - end + # TODO: Look at app/models/loose_foreign_keys/deleted_record.rb which is grouping by partition when doing this + # update. This is presumably because we've not loaded `partition_id` and therefore this will be updating by `id` + # which needs to search all partitions. If we assume that there is likely normally just 1 partition and sometimes + # 2 partitions this may be fine though. + def self.mark_records_processed(records) + records.update_all(status: :processed) + end end end diff --git a/app/services/batched_git_ref_updates/deleted_git_ref_cleanup_project_service.rb b/app/services/batched_git_ref_updates/deleted_git_ref_cleanup_project_service.rb index f86f0e510831d0..51e863bddcc0fa 100644 --- a/app/services/batched_git_ref_updates/deleted_git_ref_cleanup_project_service.rb +++ b/app/services/batched_git_ref_updates/deleted_git_ref_cleanup_project_service.rb @@ -2,21 +2,34 @@ module BatchedGitRefUpdates class DeletedGitRefCleanupProjectService + include ::Gitlab::ExclusiveLeaseHelpers + + LOCK_TIMEOUT = 10.minutes + BATCH_SIZE = 100 # TODO: Pick a number that will finish in 30s + MAX_DELETES = 10_000 # TODO: Pick a number that will finish in 30s + def initialize(project_id) @project_id = project_id end def execute - project = Project.find(@project_id) - Deletion.where(project_id: @project_id).each_batch do |batch| - batch.each do |to_be_deleted_git_ref| - ref = to_be_deleted_git_ref.ref + total_deletes = 0 - project.repository.delete_refs(ref) - end + in_lock("#{self.class}/#{@project_id}", retries: 0, ttl: LOCK_TIMEOUT) do + project = Project.find_by_id(@project_id) + break unless project - Deletion.mark_records_processed(batch) + Deletion.status_pending.for_project(@project_id).each_batch(of: BATCH_SIZE) do |batch| + refs = batch.pluck_ref + project.repository.delete_refs(*refs) + total_deletes += refs.count + Deletion.mark_records_processed(batch) + + break if total_deletes >= MAX_DELETES + end end + + { total_deletes: total_deletes } end end end diff --git a/app/services/batched_git_ref_updates/deleted_git_ref_cleanup_scheduler_service.rb b/app/services/batched_git_ref_updates/deleted_git_ref_cleanup_scheduler_service.rb index 11d880c9f3068f..e111982de8e4b5 100644 --- a/app/services/batched_git_ref_updates/deleted_git_ref_cleanup_scheduler_service.rb +++ b/app/services/batched_git_ref_updates/deleted_git_ref_cleanup_scheduler_service.rb @@ -4,6 +4,7 @@ module BatchedGitRefUpdates class DeletedGitRefCleanupSchedulerService def execute Deletion.status_pending.distinct_each_batch(column: :project_id, of: 1000) do |records| + # TODO: bulk_perform_async_with_context records.each do |record| project_id = record.project_id DeletedGitRefCleanupProjectWorker.perform_async(project_id) diff --git a/spec/services/batched_git_ref_updates/deleted_git_ref_cleanup_project_service.rb b/spec/services/batched_git_ref_updates/deleted_git_ref_cleanup_project_service_spec.rb similarity index 53% rename from spec/services/batched_git_ref_updates/deleted_git_ref_cleanup_project_service.rb rename to spec/services/batched_git_ref_updates/deleted_git_ref_cleanup_project_service_spec.rb index 2b43d540c5a047..5a555201845197 100644 --- a/spec/services/batched_git_ref_updates/deleted_git_ref_cleanup_project_service.rb +++ b/spec/services/batched_git_ref_updates/deleted_git_ref_cleanup_project_service_spec.rb @@ -6,27 +6,44 @@ let(:service) { described_class.new(project1.id) } let_it_be(:project1) { create(:project, :repository) } let_it_be(:project2) { create(:project, :repository) } - let!(:project1_ref1) { BatchedGitRefUpdates::Deletion.create!(project_id: project1.id, ref: 'refs/test/project1-ref1') } - let!(:project1_ref2) { BatchedGitRefUpdates::Deletion.create!(project_id: project1.id, ref: 'refs/test/project1-ref2') } - let!(:project2_ref1) { BatchedGitRefUpdates::Deletion.create!(project_id: project1.id, ref: 'refs/test/project2-ref1') } + let!(:project1_ref1) do + BatchedGitRefUpdates::Deletion.create!(project_id: project1.id, ref: 'refs/test/project1-ref1') + end + + let!(:project1_ref2) do + BatchedGitRefUpdates::Deletion.create!(project_id: project1.id, ref: 'refs/test/project1-ref2') + end + + let!(:project1_ref3) do + BatchedGitRefUpdates::Deletion.create!(project_id: project1.id, ref: 'refs/test/already-processed', + status: :processed) + end + + let!(:project2_ref1) do + BatchedGitRefUpdates::Deletion.create!(project_id: project2.id, ref: 'refs/test/project2-ref1') + end describe '#execute' do before do project1.repository.create_ref('HEAD', 'refs/test/ref-to-not-be-deleted') project1.repository.create_ref('HEAD', project1_ref1.ref) project1.repository.create_ref('HEAD', project1_ref2.ref) + project1.repository.create_ref('HEAD', 'refs/test/already-processed') project2.repository.create_ref('HEAD', project2_ref1.ref) end it 'deletes the named refs in batches for the given project only' do expect(test_refs(project1)).to include( 'refs/test/ref-to-not-be-deleted', + 'refs/test/already-processed', + 'refs/test/project1-ref1', 'refs/test/project1-ref1', 'refs/test/project1-ref2') service.execute expect(test_refs(project1)).to include( + 'refs/test/already-processed', 'refs/test/ref-to-not-be-deleted') expect(test_refs(project1)).not_to include( @@ -39,28 +56,43 @@ it 'marks the processed BatchedGitRefUpdates::Deletion as processed' do service.execute - expect(BatchedGitRefUpdates::Deletion.status_pending.map(&:ref)).to eq('test/refs/project2_ref1') - expect(BatchedGitRefUpdates::Deletion.status_processed.map(&:ref)).to eq('test/refs/project1_ref1', 'test/refs/project1_ref2') - end - - def test_refs(project) - project.repository.list_refs(['refs/test/']).map(&:name) + expect(BatchedGitRefUpdates::Deletion.status_pending.map(&:ref)).to contain_exactly('refs/test/project2-ref1') + expect(BatchedGitRefUpdates::Deletion.status_processed.map(&:ref)).to contain_exactly( + 'refs/test/project1-ref1', + 'refs/test/project1-ref2', + 'refs/test/already-processed') end it 'returns stats' do - # TODO: + result = service.execute + + expect(result[:total_deletes]).to eq(2) end it 'acquires a lock for the given project_id to avoid running duplicate instances' do - # TODO: + expect(service).to receive(:in_lock) # Mock and don't yield + .with("#{described_class}/#{project1.id}", retries: 0, ttl: described_class::LOCK_TIMEOUT) + + expect { service.execute }.not_to change { BatchedGitRefUpdates::Deletion.status_pending.count } end it 'does nothing when the project does not exist' do - # TODO: + result = described_class.new(non_existing_record_id).execute + + expect(result[:total_deletes]).to eq(0) end it 'stops after it reaches limit of deleted refs' do - # TODO: + stub_const("#{described_class}::BATCH_SIZE", 1) + stub_const("#{described_class}::MAX_DELETES", 1) + + result = service.execute + + expect(result[:total_deletes]).to eq(1) + end + + def test_refs(project) + project.repository.list_refs(['refs/test/']).map(&:name) end end end -- GitLab From 23ff2857a919bf8b5918f2d40c7dc5935ed5a1c7 Mon Sep 17 00:00:00 2001 From: Dylan Griffith Date: Tue, 11 Jul 2023 15:08:05 +1000 Subject: [PATCH 12/38] Fix DeletedGitRefCleanupProjectWorker and spec --- .../deleted_git_ref_cleanup_project_worker.rb | 7 ++++- .../deleted_git_ref_cleanup_project_worker.rb | 19 ------------ ...ted_git_ref_cleanup_project_worker_spec.rb | 29 +++++++++++++++++++ 3 files changed, 35 insertions(+), 20 deletions(-) delete mode 100644 spec/workers/batched_git_ref_updates/deleted_git_ref_cleanup_project_worker.rb create mode 100644 spec/workers/batched_git_ref_updates/deleted_git_ref_cleanup_project_worker_spec.rb diff --git a/app/workers/batched_git_ref_updates/deleted_git_ref_cleanup_project_worker.rb b/app/workers/batched_git_ref_updates/deleted_git_ref_cleanup_project_worker.rb index 539c39cf3d8712..afa374750af85f 100644 --- a/app/workers/batched_git_ref_updates/deleted_git_ref_cleanup_project_worker.rb +++ b/app/workers/batched_git_ref_updates/deleted_git_ref_cleanup_project_worker.rb @@ -4,10 +4,15 @@ module BatchedGitRefUpdates class DeletedGitRefCleanupProjectWorker include ApplicationWorker + idempotent! + data_consistency :delayed + feature_category :gitaly def perform(project_id) - DeletedGitRefCleanupProjectService.new(project_id).execute + stats = DeletedGitRefCleanupProjectService.new(project_id).execute + + log_extra_metadata_on_done(:stats, stats) end end end diff --git a/spec/workers/batched_git_ref_updates/deleted_git_ref_cleanup_project_worker.rb b/spec/workers/batched_git_ref_updates/deleted_git_ref_cleanup_project_worker.rb deleted file mode 100644 index b1772951e74597..00000000000000 --- a/spec/workers/batched_git_ref_updates/deleted_git_ref_cleanup_project_worker.rb +++ /dev/null @@ -1,19 +0,0 @@ -require 'spec_helper' - -RSpec.describe BatchedGitRefUpdates::DeletedGitRefCleanupProjectWorker, feature_category: :gitaly do - let(:service) { instance_double(DeletedGitRefCleanupProjectService) } - let(:worker) { described_class.new } - - describe '#perform' do - it 'delegates to DeletedGitRefCleanupProjectService' do - expect(DeletedGitRefCleanupProjectService).to receive(:new).with(123).and_return(service) - expect(service).to receive(:execute) - - worker.perform(123) - end - - it 'logs stats' do - # TODO: log_extra_metadata_on_done(:stats, stats) - end - end -end diff --git a/spec/workers/batched_git_ref_updates/deleted_git_ref_cleanup_project_worker_spec.rb b/spec/workers/batched_git_ref_updates/deleted_git_ref_cleanup_project_worker_spec.rb new file mode 100644 index 00000000000000..f2474e22071cae --- /dev/null +++ b/spec/workers/batched_git_ref_updates/deleted_git_ref_cleanup_project_worker_spec.rb @@ -0,0 +1,29 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe BatchedGitRefUpdates::DeletedGitRefCleanupProjectWorker, feature_category: :gitaly do + let(:stats) { { total_deletes: 456 } } + let(:service) { instance_double(BatchedGitRefUpdates::DeletedGitRefCleanupProjectService, execute: stats) } + let(:worker) { described_class.new } + + describe '#perform' do + before do + allow(BatchedGitRefUpdates::DeletedGitRefCleanupProjectService).to receive(:new).with(123).and_return(service) + end + + it 'delegates to DeletedGitRefCleanupProjectService' do + expect(service).to receive(:execute) + + worker.perform(123) + end + + it 'logs stats' do + worker.perform(123) + + expect(worker.logging_extras).to eq({ + "extra.batched_git_ref_updates_deleted_git_ref_cleanup_project_worker.stats" => { total_deletes: 456 } + }) + end + end +end -- GitLab From d3400975f8acf629733072812e6c4f4021b10509 Mon Sep 17 00:00:00 2001 From: Dylan Griffith Date: Wed, 12 Jul 2023 14:10:33 +1000 Subject: [PATCH 13/38] Fix DeletedGitRefCleanupSchedulerWorker and spec --- .../deleted_git_ref_cleanup_scheduler_worker.rb | 10 ++++++++-- ...eted_git_ref_cleanup_scheduler_worker_spec.rb | 16 +++++++++++++--- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/app/workers/batched_git_ref_updates/deleted_git_ref_cleanup_scheduler_worker.rb b/app/workers/batched_git_ref_updates/deleted_git_ref_cleanup_scheduler_worker.rb index 637e404f56c622..1771d198e5213e 100644 --- a/app/workers/batched_git_ref_updates/deleted_git_ref_cleanup_scheduler_worker.rb +++ b/app/workers/batched_git_ref_updates/deleted_git_ref_cleanup_scheduler_worker.rb @@ -3,12 +3,18 @@ module BatchedGitRefUpdates class DeletedGitRefCleanupSchedulerWorker include ApplicationWorker - include CronjobQueue + # Ignore RuboCop as the context is added in the service + include CronjobQueue # rubocop:disable Scalability/CronWorkerContext + + idempotent! + data_consistency :sticky feature_category :gitaly def perform - DeletedGitRefCleanupSchedulerService.new.execute + stats = DeletedGitRefCleanupSchedulerService.new.execute + + log_extra_metadata_on_done(:stats, stats) end end end diff --git a/spec/workers/batched_git_ref_updates/deleted_git_ref_cleanup_scheduler_worker_spec.rb b/spec/workers/batched_git_ref_updates/deleted_git_ref_cleanup_scheduler_worker_spec.rb index 5f8d8128328433..fca715363bc35e 100644 --- a/spec/workers/batched_git_ref_updates/deleted_git_ref_cleanup_scheduler_worker_spec.rb +++ b/spec/workers/batched_git_ref_updates/deleted_git_ref_cleanup_scheduler_worker_spec.rb @@ -1,19 +1,29 @@ +# frozen_string_literal: true + require 'spec_helper' RSpec.describe BatchedGitRefUpdates::DeletedGitRefCleanupSchedulerWorker, feature_category: :gitaly do - let(:service) { instance_double(BatchedGitRefUpdates::DeletedGitRefCleanupSchedulerService) } + let(:stats) { { total_projects: 456 } } + let(:service) { instance_double(BatchedGitRefUpdates::DeletedGitRefCleanupSchedulerService, execute: stats) } let(:worker) { described_class.new } describe '#perform' do + before do + allow(BatchedGitRefUpdates::DeletedGitRefCleanupSchedulerService).to receive(:new).and_return(service) + end + it 'delegates to DeletedGitRefCleanupSchedulerService' do - expect(BatchedGitRefUpdates::DeletedGitRefCleanupSchedulerService).to receive(:new).and_return(service) expect(service).to receive(:execute) worker.perform end it 'logs stats' do - # TODO: log_extra_metadata_on_done(:stats, stats) + worker.perform + + expect(worker.logging_extras).to eq({ + "extra.batched_git_ref_updates_deleted_git_ref_cleanup_scheduler_worker.stats" => { total_projects: 456 } + }) end end end -- GitLab From 32806afe136d6c18a7b3c809458943de55a1bd56 Mon Sep 17 00:00:00 2001 From: Dylan Griffith Date: Wed, 12 Jul 2023 15:08:59 +1000 Subject: [PATCH 14/38] Fix DeletedGitRefCleanupSchedulerService and spec --- ...leted_git_ref_cleanup_scheduler_service.rb | 24 +++++++++++--- ..._git_ref_cleanup_scheduler_service_spec.rb | 33 ++++++++++++++++--- 2 files changed, 47 insertions(+), 10 deletions(-) diff --git a/app/services/batched_git_ref_updates/deleted_git_ref_cleanup_scheduler_service.rb b/app/services/batched_git_ref_updates/deleted_git_ref_cleanup_scheduler_service.rb index e111982de8e4b5..3e0ae38b2cbdf3 100644 --- a/app/services/batched_git_ref_updates/deleted_git_ref_cleanup_scheduler_service.rb +++ b/app/services/batched_git_ref_updates/deleted_git_ref_cleanup_scheduler_service.rb @@ -2,14 +2,28 @@ module BatchedGitRefUpdates class DeletedGitRefCleanupSchedulerService + include Gitlab::ExclusiveLeaseHelpers + MAX_PROJECTS = 10_000 + BATCH_SIZE = 100 + LOCK_TIMEOUT = 10.minutes + def execute - Deletion.status_pending.distinct_each_batch(column: :project_id, of: 1000) do |records| - # TODO: bulk_perform_async_with_context - records.each do |record| - project_id = record.project_id - DeletedGitRefCleanupProjectWorker.perform_async(project_id) + total_projects = 0 + + in_lock(self.class.name, retries: 0, ttl: LOCK_TIMEOUT) do + Deletion.status_pending.distinct_each_batch(column: :project_id, of: BATCH_SIZE) do |deletions| + DeletedGitRefCleanupProjectWorker.bulk_perform_async_with_contexts( + deletions, + arguments_proc: ->(deletion) { deletion.project_id }, + context_proc: ->(_) { {} } # No project context because loading the project is wasteful + ) + + total_projects += deletions.count + break if total_projects >= MAX_PROJECTS end end + + { total_projects: total_projects } end end end diff --git a/spec/services/batched_git_ref_updates/deleted_git_ref_cleanup_scheduler_service_spec.rb b/spec/services/batched_git_ref_updates/deleted_git_ref_cleanup_scheduler_service_spec.rb index 49cc577ba4f440..da17ee01307b3a 100644 --- a/spec/services/batched_git_ref_updates/deleted_git_ref_cleanup_scheduler_service_spec.rb +++ b/spec/services/batched_git_ref_updates/deleted_git_ref_cleanup_scheduler_service_spec.rb @@ -14,19 +14,42 @@ end it 'schedules DeletedGitRefCleanupProjectWorker for each project in pending BatchedGitRefUpdates::Deletion' do - expect(BatchedGitRefUpdates::DeletedGitRefCleanupProjectWorker).to receive(:perform_async).with(123) - expect(BatchedGitRefUpdates::DeletedGitRefCleanupProjectWorker).to receive(:perform_async).with(456) - expect(BatchedGitRefUpdates::DeletedGitRefCleanupProjectWorker).not_to receive(:perform_async).with(789) + project_ids = [] + expect(BatchedGitRefUpdates::DeletedGitRefCleanupProjectWorker) + .to receive(:bulk_perform_async_with_contexts) do |deletions, arguments_proc:, context_proc:| # rubocop:disable Lint/UnusedBlockArgument + project_ids += deletions.map(&arguments_proc) + end service.execute + + expect(project_ids).to contain_exactly(123, 456) end it 'returns stats' do - # TODO: + stats = service.execute + + expect(stats).to eq({ + total_projects: 2 + }) end it 'acquires a lock to avoid running duplicate instances' do - # TODO: + expect(service).to receive(:in_lock) # Mock and don't yield + .with(described_class.name, retries: 0, ttl: described_class::LOCK_TIMEOUT) + expect(BatchedGitRefUpdates::DeletedGitRefCleanupProjectWorker).not_to receive(:bulk_perform_async_with_contexts) + + service.execute + end + + it 'limits to MAX_PROJECTS before it stops' do + stub_const("#{described_class}::BATCH_SIZE", 1) + stub_const("#{described_class}::MAX_PROJECTS", 1) + + stats = service.execute + + expect(stats).to eq({ + total_projects: 1 + }) end end end -- GitLab From 9e3c0334423f49ddc37fe8d30b61576671f1d0d2 Mon Sep 17 00:00:00 2001 From: Dylan Griffith Date: Wed, 12 Jul 2023 15:46:18 +1000 Subject: [PATCH 15/38] Specs for Deletion --- .../batched_git_ref_updates/deletion.rb | 4 - .../batched_git_ref_updates/deletion_spec.rb | 125 ++++++++++++++++++ 2 files changed, 125 insertions(+), 4 deletions(-) create mode 100644 spec/models/batched_git_ref_updates/deletion_spec.rb diff --git a/app/models/batched_git_ref_updates/deletion.rb b/app/models/batched_git_ref_updates/deletion.rb index f61ff961d4287e..83d1637f4cefd8 100644 --- a/app/models/batched_git_ref_updates/deletion.rb +++ b/app/models/batched_git_ref_updates/deletion.rb @@ -42,10 +42,6 @@ class Deletion < ApplicationRecord enum status: { pending: 1, processed: 2 }, _prefix: :status - # TODO: Look at app/models/loose_foreign_keys/deleted_record.rb which is grouping by partition when doing this - # update. This is presumably because we've not loaded `partition_id` and therefore this will be updating by `id` - # which needs to search all partitions. If we assume that there is likely normally just 1 partition and sometimes - # 2 partitions this may be fine though. def self.mark_records_processed(records) records.update_all(status: :processed) end diff --git a/spec/models/batched_git_ref_updates/deletion_spec.rb b/spec/models/batched_git_ref_updates/deletion_spec.rb new file mode 100644 index 00000000000000..8a1a31d9d53598 --- /dev/null +++ b/spec/models/batched_git_ref_updates/deletion_spec.rb @@ -0,0 +1,125 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe BatchedGitRefUpdates::Deletion, feature_category: :gitaly do + describe '.mark_records_processed' do + let_it_be(:deletion_1) { described_class.create!(project_id: 5, ref: 'refs/test/1') } + let_it_be(:deletion_2) { described_class.create!(project_id: 1, ref: 'refs/test/2') } + let_it_be(:deletion_3) { described_class.create!(project_id: 3, ref: 'refs/test/3') } + let_it_be(:deletion_4) { described_class.create!(project_id: 1, ref: 'refs/test/4') } + let_it_be(:deletion_5) { described_class.create!(project_id: 4, ref: 'refs/test/5', status: :processed) } + + it 'updates all records' do + expect(described_class.status_pending.count).to eq(4) + expect(described_class.status_processed.count).to eq(1) + + deletions = described_class.for_project(1) + described_class.mark_records_processed(deletions) + + deletions.each do |deletion| + expect(deletion.reload.status).to eq("processed") + end + + expect(described_class.status_pending.count).to eq(2) + expect(described_class.status_processed.count).to eq(3) + end + end + + describe 'sliding_list partitioning' do + let(:partition_manager) { Gitlab::Database::Partitioning::PartitionManager.new(described_class) } + + describe 'next_partition_if callback' do + let(:active_partition) { described_class.partitioning_strategy.active_partition } + + subject(:value) { described_class.partitioning_strategy.next_partition_if.call(active_partition) } + + context 'when the partition is empty' do + it { is_expected.to eq(false) } + end + + context 'when the partition has records' do + before do + described_class.create!(project_id: 1, ref: 'refs/test/1', status: :processed) + described_class.create!(project_id: 2, ref: 'refs/test/2', status: :pending) + end + + it { is_expected.to eq(false) } + end + + context 'when the first record of the partition is older than PARTITION_DURATION' do + before do + described_class.create!( + project_id: 1, + ref: 'refs/test/1', + created_at: (described_class::PARTITION_DURATION + 1.day).ago) + + described_class.create!(project_id: 2, ref: 'refs/test/2') + end + + it { is_expected.to eq(true) } + end + end + + describe 'detach_partition_if callback' do + let(:active_partition) { described_class.partitioning_strategy.active_partition } + + subject(:value) { described_class.partitioning_strategy.detach_partition_if.call(active_partition) } + + context 'when the partition contains unprocessed records' do + before do + described_class.create!(project_id: 1, ref: 'refs/test/1') + described_class.create!(project_id: 2, ref: 'refs/test/2', status: :processed) + end + + it { is_expected.to eq(false) } + end + + context 'when the partition contains only processed records' do + before do + described_class.create!(project_id: 1, ref: 'refs/test/1', status: :processed) + described_class.create!(project_id: 2, ref: 'refs/test/2', status: :processed) + end + + it { is_expected.to eq(true) } + end + end + + describe 'the behavior of the strategy' do + it 'moves records to new partitions as time passes', :freeze_time do + # We start with partition 1 + expect(described_class.partitioning_strategy.current_partitions.map(&:value)).to eq([1]) + + # it's not a day old yet so no new partitions are created + partition_manager.sync_partitions + + expect(described_class.partitioning_strategy.current_partitions.map(&:value)).to eq([1]) + + # add one record so the next partition will be created + described_class.create!(project_id: 1, ref: 'refs/test/1') + + # after traveling forward a day + travel(described_class::PARTITION_DURATION + 1.second) + + # a new partition is created + partition_manager.sync_partitions + + expect(described_class.partitioning_strategy.current_partitions.map(&:value)).to eq([1, 2]) + + # and we can insert to the new partition + described_class.create!(project_id: 2, ref: 'refs/test/2') + + # after processing old records + described_class.mark_records_processed(described_class.for_partition(1)) + + partition_manager.sync_partitions + + # the old one is removed + expect(described_class.partitioning_strategy.current_partitions.map(&:value)).to eq([2]) + + # and we only have the newly created partition left. + expect(described_class.count).to eq(1) + end + end + end +end -- GitLab From c23b60b6d06dab9f7ad84cc4b48f9e894fc4985d Mon Sep 17 00:00:00 2001 From: Dylan Griffith Date: Wed, 12 Jul 2023 16:56:12 +1000 Subject: [PATCH 16/38] Specs for pipeline and merge_request async deletes --- app/models/ci/pipeline.rb | 2 +- app/models/merge_request.rb | 2 +- ..._request_delete_gitaly_refs_in_batches.yml | 8 ++++ ...ipeline_delete_gitaly_refs_in_batches.yml} | 2 +- spec/models/ci/pipeline_spec.rb | 46 ++++++++++++------- spec/models/merge_request_spec.rb | 31 +++++++++---- 6 files changed, 64 insertions(+), 27 deletions(-) create mode 100644 config/feature_flags/development/merge_request_delete_gitaly_refs_in_batches.yml rename config/feature_flags/development/{delete_gitaly_refs_in_batches.yml => pipeline_delete_gitaly_refs_in_batches.yml} (83%) diff --git a/app/models/ci/pipeline.rb b/app/models/ci/pipeline.rb index 30005fa28974f6..e2f017b1d14afb 100644 --- a/app/models/ci/pipeline.rb +++ b/app/models/ci/pipeline.rb @@ -342,7 +342,7 @@ class Pipeline < Ci::ApplicationRecord # This needs to be kept in sync with `Ci::PipelineRef#should_delete?` after_transition any => ::Ci::Pipeline.stopped_statuses do |pipeline| pipeline.run_after_commit do - if Feature.enabled?(:delete_gitaly_refs_in_batches, pipeline.project) + if Feature.enabled?(:pipeline_delete_gitaly_refs_in_batches, pipeline.project) pipeline.persistent_ref.async_delete elsif Feature.enabled?(:pipeline_cleanup_ref_worker_async, pipeline.project) ::Ci::PipelineCleanupRefWorker.perform_async(pipeline.id) diff --git a/app/models/merge_request.rb b/app/models/merge_request.rb index 37c18ac9f0a1f8..cc5152df12fc86 100644 --- a/app/models/merge_request.rb +++ b/app/models/merge_request.rb @@ -1537,7 +1537,7 @@ def train_ref_path end def schedule_cleanup_refs(only: :all) - if Feature.enabled?(:delete_gitaly_refs_in_batches, target_project) + if Feature.enabled?(:merge_request_delete_gitaly_refs_in_batches, target_project) async_cleanup_refs(only: only) elsif Feature.enabled?(:merge_request_cleanup_ref_worker_async, target_project) MergeRequests::CleanupRefWorker.perform_async(id, only.to_s) diff --git a/config/feature_flags/development/merge_request_delete_gitaly_refs_in_batches.yml b/config/feature_flags/development/merge_request_delete_gitaly_refs_in_batches.yml new file mode 100644 index 00000000000000..dee7916df601ca --- /dev/null +++ b/config/feature_flags/development/merge_request_delete_gitaly_refs_in_batches.yml @@ -0,0 +1,8 @@ +--- +name: merge_request_delete_gitaly_refs_in_batches +introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/125333 +rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/416969 +milestone: '16.2' +type: development +group: group::gitaly +default_enabled: false diff --git a/config/feature_flags/development/delete_gitaly_refs_in_batches.yml b/config/feature_flags/development/pipeline_delete_gitaly_refs_in_batches.yml similarity index 83% rename from config/feature_flags/development/delete_gitaly_refs_in_batches.yml rename to config/feature_flags/development/pipeline_delete_gitaly_refs_in_batches.yml index 8ff93e4ee1c274..bd2b2ca8411945 100644 --- a/config/feature_flags/development/delete_gitaly_refs_in_batches.yml +++ b/config/feature_flags/development/pipeline_delete_gitaly_refs_in_batches.yml @@ -1,5 +1,5 @@ --- -name: delete_gitaly_refs_in_batches +name: pipeline_delete_gitaly_refs_in_batches introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/125333 rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/416969 milestone: '16.2' diff --git a/spec/models/ci/pipeline_spec.rb b/spec/models/ci/pipeline_spec.rb index ae3725a0b08196..1e3ce8bc656046 100644 --- a/spec/models/ci/pipeline_spec.rb +++ b/spec/models/ci/pipeline_spec.rb @@ -1328,33 +1328,47 @@ def create_build(name, status) %w[succeed! drop! cancel! skip! block! delay!].each do |action| context "when the pipeline received #{action} event" do - it 'deletes a persistent ref asynchronously', :sidekiq_inline do - expect(pipeline.persistent_ref).not_to receive(:delete_refs) - - expect(Ci::PipelineCleanupRefWorker).to receive(:perform_async) - .with(pipeline.id).and_call_original - - expect_next_instance_of(Ci::PersistentRef) do |persistent_ref| - expect(persistent_ref).to receive(:delete_refs) - .with("refs/#{Repository::REF_PIPELINES}/#{pipeline.id}").once - end + it 'deletes a persistent ref asynchronously' do + expect(pipeline.persistent_ref).to receive(:async_delete) + expect(pipeline.persistent_ref).not_to receive(:delete) pipeline.public_send(action) end - context 'when pipeline_cleanup_ref_worker_async is disabled' do + context 'when pipeline_delete_gitaly_refs_in_batches is disabled' do before do - stub_feature_flags(pipeline_cleanup_ref_worker_async: false) + stub_feature_flags(pipeline_delete_gitaly_refs_in_batches: false) end - it 'deletes a persistent ref synchronously' do - expect(Ci::PipelineCleanupRefWorker).not_to receive(:perform_async).with(pipeline.id) + it 'deletes a persistent ref asynchronously via ::Ci::PipelineCleanupRefWorker', :sidekiq_inline do + expect(pipeline.persistent_ref).not_to receive(:delete_refs) - expect(pipeline.persistent_ref).to receive(:delete_refs).once - .with("refs/#{Repository::REF_PIPELINES}/#{pipeline.id}") + expect(Ci::PipelineCleanupRefWorker).to receive(:perform_async) + .with(pipeline.id).and_call_original + + expect_next_instance_of(Ci::PersistentRef) do |persistent_ref| + expect(persistent_ref).to receive(:delete_refs) + .with("refs/#{Repository::REF_PIPELINES}/#{pipeline.id}").once + end pipeline.public_send(action) end + + context 'when pipeline_cleanup_ref_worker_async is disabled' do + before do + stub_feature_flags(pipeline_delete_gitaly_refs_in_batches: false) + stub_feature_flags(pipeline_cleanup_ref_worker_async: false) + end + + it 'deletes a persistent ref synchronously' do + expect(Ci::PipelineCleanupRefWorker).not_to receive(:perform_async).with(pipeline.id) + + expect(pipeline.persistent_ref).to receive(:delete_refs).once + .with("refs/#{Repository::REF_PIPELINES}/#{pipeline.id}") + + pipeline.public_send(action) + end + end end end end diff --git a/spec/models/merge_request_spec.rb b/spec/models/merge_request_spec.rb index bf71d289105356..1dfd14c0993f9b 100644 --- a/spec/models/merge_request_spec.rb +++ b/spec/models/merge_request_spec.rb @@ -5124,24 +5124,39 @@ def transition! let(:merge_request) { build(:merge_request, source_project: create(:project, :repository)) } - it 'does schedule MergeRequests::CleanupRefWorker' do - expect(MergeRequests::CleanupRefWorker).to receive(:perform_async).with(merge_request.id, 'train') + it 'deletes refs asynchronously' do + expect(merge_request.target_project.repository) + .to receive(:async_delete_refs) + .with(merge_request.train_ref_path) subject end - context 'when merge_request_cleanup_ref_worker_async is disabled' do + context 'when merge_request_delete_gitaly_refs_in_batches is disabled' do before do - stub_feature_flags(merge_request_cleanup_ref_worker_async: false) + stub_feature_flags(merge_request_delete_gitaly_refs_in_batches: false) end - it 'deletes all refs from the target project' do - expect(merge_request.target_project.repository) - .to receive(:delete_refs) - .with(merge_request.train_ref_path) + it 'does schedule MergeRequests::CleanupRefWorker' do + expect(MergeRequests::CleanupRefWorker).to receive(:perform_async).with(merge_request.id, 'train') subject end + + context 'when merge_request_cleanup_ref_worker_async is disabled' do + before do + stub_feature_flags(merge_request_delete_gitaly_refs_in_batches: false) + stub_feature_flags(merge_request_cleanup_ref_worker_async: false) + end + + it 'deletes all refs from the target project' do + expect(merge_request.target_project.repository) + .to receive(:delete_refs) + .with(merge_request.train_ref_path) + + subject + end + end end end -- GitLab From 0c2ef55b74b02b04e87e3c535d05573c341e1956 Mon Sep 17 00:00:00 2001 From: Dylan Griffith Date: Thu, 13 Jul 2023 12:05:08 +1000 Subject: [PATCH 17/38] Rename CleanupSchedulerService and ProjectCleanupWorker --- ...ervice.rb => cleanup_scheduler_service.rb} | 4 +-- ..._service.rb => project_cleanup_service.rb} | 2 +- ..._worker.rb => cleanup_scheduler_worker.rb} | 4 +-- ...ct_worker.rb => project_cleanup_worker.rb} | 4 +-- config/initializers/1_settings.rb | 6 ++-- ...c.rb => cleanup_scheduler_service_spec.rb} | 8 ++--- ...pec.rb => project_cleanup_service_spec.rb} | 2 +- .../cleanup_scheduler_worker_spec.rb | 29 +++++++++++++++++++ ...ted_git_ref_cleanup_project_worker_spec.rb | 29 ------------------- ...d_git_ref_cleanup_scheduler_worker_spec.rb | 29 ------------------- .../project_cleanup_worker_spec.rb | 29 +++++++++++++++++++ 11 files changed, 73 insertions(+), 73 deletions(-) rename app/services/batched_git_ref_updates/{deleted_git_ref_cleanup_scheduler_service.rb => cleanup_scheduler_service.rb} (86%) rename app/services/batched_git_ref_updates/{deleted_git_ref_cleanup_project_service.rb => project_cleanup_service.rb} (95%) rename app/workers/batched_git_ref_updates/{deleted_git_ref_cleanup_scheduler_worker.rb => cleanup_scheduler_worker.rb} (78%) rename app/workers/batched_git_ref_updates/{deleted_git_ref_cleanup_project_worker.rb => project_cleanup_worker.rb} (69%) rename spec/services/batched_git_ref_updates/{deleted_git_ref_cleanup_scheduler_service_spec.rb => cleanup_scheduler_service_spec.rb} (77%) rename spec/services/batched_git_ref_updates/{deleted_git_ref_cleanup_project_service_spec.rb => project_cleanup_service_spec.rb} (96%) create mode 100644 spec/workers/batched_git_ref_updates/cleanup_scheduler_worker_spec.rb delete mode 100644 spec/workers/batched_git_ref_updates/deleted_git_ref_cleanup_project_worker_spec.rb delete mode 100644 spec/workers/batched_git_ref_updates/deleted_git_ref_cleanup_scheduler_worker_spec.rb create mode 100644 spec/workers/batched_git_ref_updates/project_cleanup_worker_spec.rb diff --git a/app/services/batched_git_ref_updates/deleted_git_ref_cleanup_scheduler_service.rb b/app/services/batched_git_ref_updates/cleanup_scheduler_service.rb similarity index 86% rename from app/services/batched_git_ref_updates/deleted_git_ref_cleanup_scheduler_service.rb rename to app/services/batched_git_ref_updates/cleanup_scheduler_service.rb index 3e0ae38b2cbdf3..cf547c0e6b5162 100644 --- a/app/services/batched_git_ref_updates/deleted_git_ref_cleanup_scheduler_service.rb +++ b/app/services/batched_git_ref_updates/cleanup_scheduler_service.rb @@ -1,7 +1,7 @@ # frozen_string_literal: true module BatchedGitRefUpdates - class DeletedGitRefCleanupSchedulerService + class CleanupSchedulerService include Gitlab::ExclusiveLeaseHelpers MAX_PROJECTS = 10_000 BATCH_SIZE = 100 @@ -12,7 +12,7 @@ def execute in_lock(self.class.name, retries: 0, ttl: LOCK_TIMEOUT) do Deletion.status_pending.distinct_each_batch(column: :project_id, of: BATCH_SIZE) do |deletions| - DeletedGitRefCleanupProjectWorker.bulk_perform_async_with_contexts( + ProjectCleanupWorker.bulk_perform_async_with_contexts( deletions, arguments_proc: ->(deletion) { deletion.project_id }, context_proc: ->(_) { {} } # No project context because loading the project is wasteful diff --git a/app/services/batched_git_ref_updates/deleted_git_ref_cleanup_project_service.rb b/app/services/batched_git_ref_updates/project_cleanup_service.rb similarity index 95% rename from app/services/batched_git_ref_updates/deleted_git_ref_cleanup_project_service.rb rename to app/services/batched_git_ref_updates/project_cleanup_service.rb index 51e863bddcc0fa..2ea4a8131e4c28 100644 --- a/app/services/batched_git_ref_updates/deleted_git_ref_cleanup_project_service.rb +++ b/app/services/batched_git_ref_updates/project_cleanup_service.rb @@ -1,7 +1,7 @@ # frozen_string_literal: true module BatchedGitRefUpdates - class DeletedGitRefCleanupProjectService + class ProjectCleanupService include ::Gitlab::ExclusiveLeaseHelpers LOCK_TIMEOUT = 10.minutes diff --git a/app/workers/batched_git_ref_updates/deleted_git_ref_cleanup_scheduler_worker.rb b/app/workers/batched_git_ref_updates/cleanup_scheduler_worker.rb similarity index 78% rename from app/workers/batched_git_ref_updates/deleted_git_ref_cleanup_scheduler_worker.rb rename to app/workers/batched_git_ref_updates/cleanup_scheduler_worker.rb index 1771d198e5213e..9c50e319be04c1 100644 --- a/app/workers/batched_git_ref_updates/deleted_git_ref_cleanup_scheduler_worker.rb +++ b/app/workers/batched_git_ref_updates/cleanup_scheduler_worker.rb @@ -1,7 +1,7 @@ # frozen_string_literal: true module BatchedGitRefUpdates - class DeletedGitRefCleanupSchedulerWorker + class CleanupSchedulerWorker include ApplicationWorker # Ignore RuboCop as the context is added in the service include CronjobQueue # rubocop:disable Scalability/CronWorkerContext @@ -12,7 +12,7 @@ class DeletedGitRefCleanupSchedulerWorker feature_category :gitaly def perform - stats = DeletedGitRefCleanupSchedulerService.new.execute + stats = CleanupSchedulerService.new.execute log_extra_metadata_on_done(:stats, stats) end diff --git a/app/workers/batched_git_ref_updates/deleted_git_ref_cleanup_project_worker.rb b/app/workers/batched_git_ref_updates/project_cleanup_worker.rb similarity index 69% rename from app/workers/batched_git_ref_updates/deleted_git_ref_cleanup_project_worker.rb rename to app/workers/batched_git_ref_updates/project_cleanup_worker.rb index afa374750af85f..b2b1df29430ea2 100644 --- a/app/workers/batched_git_ref_updates/deleted_git_ref_cleanup_project_worker.rb +++ b/app/workers/batched_git_ref_updates/project_cleanup_worker.rb @@ -1,7 +1,7 @@ # frozen_string_literal: true module BatchedGitRefUpdates - class DeletedGitRefCleanupProjectWorker + class ProjectCleanupWorker include ApplicationWorker idempotent! @@ -10,7 +10,7 @@ class DeletedGitRefCleanupProjectWorker feature_category :gitaly def perform(project_id) - stats = DeletedGitRefCleanupProjectService.new(project_id).execute + stats = ProjectCleanupService.new(project_id).execute log_extra_metadata_on_done(:stats, stats) end diff --git a/config/initializers/1_settings.rb b/config/initializers/1_settings.rb index 63c1ea9f89dc76..c12bbb334edeb1 100644 --- a/config/initializers/1_settings.rb +++ b/config/initializers/1_settings.rb @@ -661,9 +661,9 @@ Settings.cron_jobs['loose_foreign_keys_cleanup_worker'] ||= {} Settings.cron_jobs['loose_foreign_keys_cleanup_worker']['cron'] ||= '*/1 * * * *' Settings.cron_jobs['loose_foreign_keys_cleanup_worker']['job_class'] = 'LooseForeignKeys::CleanupWorker' -Settings.cron_jobs['deleted_git_ref_cleanup_scheduler_worker'] ||= {} -Settings.cron_jobs['deleted_git_ref_cleanup_scheduler_worker']['cron'] ||= '*/1 * * * *' -Settings.cron_jobs['deleted_git_ref_cleanup_scheduler_worker']['job_class'] = 'DeletedGitRefCleanupSchedulerWorker' +Settings.cron_jobs['batched_git_ref_updates_cleanup_scheduler_worker'] ||= {} +Settings.cron_jobs['batched_git_ref_updates_cleanup_scheduler_worker']['cron'] ||= '*/1 * * * *' +Settings.cron_jobs['batched_git_ref_updates_cleanup_scheduler_worker']['job_class'] = 'BatchedGitRefUpdates::CleanupSchedulerWorker' Settings.cron_jobs['ci_runner_versions_reconciliation_worker'] ||= {} Settings.cron_jobs['ci_runner_versions_reconciliation_worker']['cron'] ||= '@daily' Settings.cron_jobs['ci_runner_versions_reconciliation_worker']['job_class'] = 'Ci::Runners::ReconcileExistingRunnerVersionsCronWorker' diff --git a/spec/services/batched_git_ref_updates/deleted_git_ref_cleanup_scheduler_service_spec.rb b/spec/services/batched_git_ref_updates/cleanup_scheduler_service_spec.rb similarity index 77% rename from spec/services/batched_git_ref_updates/deleted_git_ref_cleanup_scheduler_service_spec.rb rename to spec/services/batched_git_ref_updates/cleanup_scheduler_service_spec.rb index da17ee01307b3a..50081a5e9e78dd 100644 --- a/spec/services/batched_git_ref_updates/deleted_git_ref_cleanup_scheduler_service_spec.rb +++ b/spec/services/batched_git_ref_updates/cleanup_scheduler_service_spec.rb @@ -2,7 +2,7 @@ require 'spec_helper' -RSpec.describe BatchedGitRefUpdates::DeletedGitRefCleanupSchedulerService, feature_category: :gitaly do +RSpec.describe BatchedGitRefUpdates::CleanupSchedulerService, feature_category: :gitaly do let(:service) { described_class.new } describe '#execute' do @@ -13,9 +13,9 @@ BatchedGitRefUpdates::Deletion.create!(project_id: 789, ref: 'ref4', status: :processed) end - it 'schedules DeletedGitRefCleanupProjectWorker for each project in pending BatchedGitRefUpdates::Deletion' do + it 'schedules ProjectCleanupWorker for each project in pending BatchedGitRefUpdates::Deletion' do project_ids = [] - expect(BatchedGitRefUpdates::DeletedGitRefCleanupProjectWorker) + expect(BatchedGitRefUpdates::ProjectCleanupWorker) .to receive(:bulk_perform_async_with_contexts) do |deletions, arguments_proc:, context_proc:| # rubocop:disable Lint/UnusedBlockArgument project_ids += deletions.map(&arguments_proc) end @@ -36,7 +36,7 @@ it 'acquires a lock to avoid running duplicate instances' do expect(service).to receive(:in_lock) # Mock and don't yield .with(described_class.name, retries: 0, ttl: described_class::LOCK_TIMEOUT) - expect(BatchedGitRefUpdates::DeletedGitRefCleanupProjectWorker).not_to receive(:bulk_perform_async_with_contexts) + expect(BatchedGitRefUpdates::ProjectCleanupWorker).not_to receive(:bulk_perform_async_with_contexts) service.execute end diff --git a/spec/services/batched_git_ref_updates/deleted_git_ref_cleanup_project_service_spec.rb b/spec/services/batched_git_ref_updates/project_cleanup_service_spec.rb similarity index 96% rename from spec/services/batched_git_ref_updates/deleted_git_ref_cleanup_project_service_spec.rb rename to spec/services/batched_git_ref_updates/project_cleanup_service_spec.rb index 5a555201845197..e86e0fde765fe7 100644 --- a/spec/services/batched_git_ref_updates/deleted_git_ref_cleanup_project_service_spec.rb +++ b/spec/services/batched_git_ref_updates/project_cleanup_service_spec.rb @@ -2,7 +2,7 @@ require 'spec_helper' -RSpec.describe BatchedGitRefUpdates::DeletedGitRefCleanupProjectService, feature_category: :gitaly do +RSpec.describe BatchedGitRefUpdates::ProjectCleanupService, feature_category: :gitaly do let(:service) { described_class.new(project1.id) } let_it_be(:project1) { create(:project, :repository) } let_it_be(:project2) { create(:project, :repository) } diff --git a/spec/workers/batched_git_ref_updates/cleanup_scheduler_worker_spec.rb b/spec/workers/batched_git_ref_updates/cleanup_scheduler_worker_spec.rb new file mode 100644 index 00000000000000..507c2535fcab16 --- /dev/null +++ b/spec/workers/batched_git_ref_updates/cleanup_scheduler_worker_spec.rb @@ -0,0 +1,29 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe BatchedGitRefUpdates::CleanupSchedulerWorker, feature_category: :gitaly do + let(:stats) { { total_projects: 456 } } + let(:service) { instance_double(BatchedGitRefUpdates::CleanupSchedulerService, execute: stats) } + let(:worker) { described_class.new } + + describe '#perform' do + before do + allow(BatchedGitRefUpdates::CleanupSchedulerService).to receive(:new).and_return(service) + end + + it 'delegates to CleanupSchedulerService' do + expect(service).to receive(:execute) + + worker.perform + end + + it 'logs stats' do + worker.perform + + expect(worker.logging_extras).to eq({ + "extra.batched_git_ref_updates_cleanup_scheduler_worker.stats" => { total_projects: 456 } + }) + end + end +end diff --git a/spec/workers/batched_git_ref_updates/deleted_git_ref_cleanup_project_worker_spec.rb b/spec/workers/batched_git_ref_updates/deleted_git_ref_cleanup_project_worker_spec.rb deleted file mode 100644 index f2474e22071cae..00000000000000 --- a/spec/workers/batched_git_ref_updates/deleted_git_ref_cleanup_project_worker_spec.rb +++ /dev/null @@ -1,29 +0,0 @@ -# frozen_string_literal: true - -require 'spec_helper' - -RSpec.describe BatchedGitRefUpdates::DeletedGitRefCleanupProjectWorker, feature_category: :gitaly do - let(:stats) { { total_deletes: 456 } } - let(:service) { instance_double(BatchedGitRefUpdates::DeletedGitRefCleanupProjectService, execute: stats) } - let(:worker) { described_class.new } - - describe '#perform' do - before do - allow(BatchedGitRefUpdates::DeletedGitRefCleanupProjectService).to receive(:new).with(123).and_return(service) - end - - it 'delegates to DeletedGitRefCleanupProjectService' do - expect(service).to receive(:execute) - - worker.perform(123) - end - - it 'logs stats' do - worker.perform(123) - - expect(worker.logging_extras).to eq({ - "extra.batched_git_ref_updates_deleted_git_ref_cleanup_project_worker.stats" => { total_deletes: 456 } - }) - end - end -end diff --git a/spec/workers/batched_git_ref_updates/deleted_git_ref_cleanup_scheduler_worker_spec.rb b/spec/workers/batched_git_ref_updates/deleted_git_ref_cleanup_scheduler_worker_spec.rb deleted file mode 100644 index fca715363bc35e..00000000000000 --- a/spec/workers/batched_git_ref_updates/deleted_git_ref_cleanup_scheduler_worker_spec.rb +++ /dev/null @@ -1,29 +0,0 @@ -# frozen_string_literal: true - -require 'spec_helper' - -RSpec.describe BatchedGitRefUpdates::DeletedGitRefCleanupSchedulerWorker, feature_category: :gitaly do - let(:stats) { { total_projects: 456 } } - let(:service) { instance_double(BatchedGitRefUpdates::DeletedGitRefCleanupSchedulerService, execute: stats) } - let(:worker) { described_class.new } - - describe '#perform' do - before do - allow(BatchedGitRefUpdates::DeletedGitRefCleanupSchedulerService).to receive(:new).and_return(service) - end - - it 'delegates to DeletedGitRefCleanupSchedulerService' do - expect(service).to receive(:execute) - - worker.perform - end - - it 'logs stats' do - worker.perform - - expect(worker.logging_extras).to eq({ - "extra.batched_git_ref_updates_deleted_git_ref_cleanup_scheduler_worker.stats" => { total_projects: 456 } - }) - end - end -end diff --git a/spec/workers/batched_git_ref_updates/project_cleanup_worker_spec.rb b/spec/workers/batched_git_ref_updates/project_cleanup_worker_spec.rb new file mode 100644 index 00000000000000..e7db60f33beea3 --- /dev/null +++ b/spec/workers/batched_git_ref_updates/project_cleanup_worker_spec.rb @@ -0,0 +1,29 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe BatchedGitRefUpdates::ProjectCleanupWorker, feature_category: :gitaly do + let(:stats) { { total_deletes: 456 } } + let(:service) { instance_double(BatchedGitRefUpdates::ProjectCleanupService, execute: stats) } + let(:worker) { described_class.new } + + describe '#perform' do + before do + allow(BatchedGitRefUpdates::ProjectCleanupService).to receive(:new).with(123).and_return(service) + end + + it 'delegates to ProjectCleanupService' do + expect(service).to receive(:execute) + + worker.perform(123) + end + + it 'logs stats' do + worker.perform(123) + + expect(worker.logging_extras).to eq({ + "extra.batched_git_ref_updates_project_cleanup_worker.stats" => { total_deletes: 456 } + }) + end + end +end -- GitLab From 0ef9bcc1126e23218c0917c02bfce1ec2d061b9a Mon Sep 17 00:00:00 2001 From: Dylan Griffith Date: Thu, 13 Jul 2023 12:08:08 +1000 Subject: [PATCH 18/38] Generate sidekiq queues yml --- app/workers/all_queues.yml | 18 ++++++++++++++++++ config/sidekiq_queues.yml | 2 ++ 2 files changed, 20 insertions(+) diff --git a/app/workers/all_queues.yml b/app/workers/all_queues.yml index 4f2d101239b415..847c11266d0bce 100644 --- a/app/workers/all_queues.yml +++ b/app/workers/all_queues.yml @@ -219,6 +219,15 @@ :weight: 1 :idempotent: true :tags: [] +- :name: cronjob:batched_git_ref_updates_cleanup_scheduler + :worker_name: BatchedGitRefUpdates::CleanupSchedulerWorker + :feature_category: :gitaly + :has_external_dependencies: false + :urgency: :low + :resource_boundary: :unknown + :weight: 1 + :idempotent: true + :tags: [] - :name: cronjob:bulk_imports_stuck_import :worker_name: BulkImports::StuckImportWorker :feature_category: :importers @@ -2298,6 +2307,15 @@ :weight: 1 :idempotent: false :tags: [] +- :name: batched_git_ref_updates_project_cleanup + :worker_name: BatchedGitRefUpdates::ProjectCleanupWorker + :feature_category: :gitaly + :has_external_dependencies: false + :urgency: :low + :resource_boundary: :unknown + :weight: 1 + :idempotent: true + :tags: [] - :name: bitbucket_server_import_advance_stage :worker_name: Gitlab::BitbucketServerImport::AdvanceStageWorker :feature_category: :importers diff --git a/config/sidekiq_queues.yml b/config/sidekiq_queues.yml index 0205d24d48bd20..665f2bb7419525 100644 --- a/config/sidekiq_queues.yml +++ b/config/sidekiq_queues.yml @@ -77,6 +77,8 @@ - 1 - - batched_background_migrations - 1 +- - batched_git_ref_updates_project_cleanup + - 1 - - bitbucket_server_import_advance_stage - 1 - - bitbucket_server_import_import_lfs_object -- GitLab From 72bf96913d1c253918fe658e60c0cce2a8719afc Mon Sep 17 00:00:00 2001 From: Dylan Griffith Date: Thu, 13 Jul 2023 13:21:06 +1000 Subject: [PATCH 19/38] Revert db/structure.sql --- db/structure.sql | 44 ++++++-------------------------------------- 1 file changed, 6 insertions(+), 38 deletions(-) diff --git a/db/structure.sql b/db/structure.sql index 439ddaa709fbb2..fc0b145ad3823f 100644 --- a/db/structure.sql +++ b/db/structure.sql @@ -488,7 +488,7 @@ PARTITION BY RANGE (created_at); CREATE TABLE p_ci_job_annotations ( id bigint NOT NULL, - partition_id bigint DEFAULT 100 NOT NULL, + partition_id bigint NOT NULL, job_id bigint NOT NULL, name text NOT NULL, data jsonb DEFAULT '[]'::jsonb NOT NULL, @@ -498,7 +498,7 @@ CREATE TABLE p_ci_job_annotations ( PARTITION BY LIST (partition_id); CREATE TABLE p_ci_runner_machine_builds ( - partition_id bigint DEFAULT 100 NOT NULL, + partition_id bigint NOT NULL, build_id bigint NOT NULL, runner_machine_id bigint NOT NULL ) @@ -13163,7 +13163,7 @@ CREATE TABLE p_ci_builds ( scheduling_type smallint, id bigint NOT NULL, stage_id bigint, - partition_id bigint DEFAULT 100 NOT NULL, + partition_id bigint NOT NULL, CONSTRAINT check_1e2fbd1b39 CHECK ((lock_version IS NOT NULL)) ) PARTITION BY LIST (partition_id); @@ -13221,7 +13221,7 @@ CREATE TABLE ci_builds ( scheduling_type smallint, id bigint DEFAULT nextval('ci_builds_id_seq'::regclass) NOT NULL, stage_id bigint, - partition_id bigint DEFAULT 100 NOT NULL, + partition_id bigint NOT NULL, CONSTRAINT check_1e2fbd1b39 CHECK ((lock_version IS NOT NULL)) ); ALTER TABLE ONLY p_ci_builds ATTACH PARTITION ci_builds FOR VALUES IN ('100'); @@ -13241,7 +13241,7 @@ CREATE TABLE p_ci_builds_metadata ( id bigint NOT NULL, runtime_runner_features jsonb DEFAULT '{}'::jsonb NOT NULL, id_tokens jsonb DEFAULT '{}'::jsonb NOT NULL, - partition_id bigint DEFAULT 100 NOT NULL, + partition_id bigint NOT NULL, debug_trace_enabled boolean DEFAULT false NOT NULL ) PARTITION BY LIST (partition_id); @@ -13270,7 +13270,7 @@ CREATE TABLE ci_builds_metadata ( id bigint DEFAULT nextval('ci_builds_metadata_id_seq'::regclass) NOT NULL, runtime_runner_features jsonb DEFAULT '{}'::jsonb NOT NULL, id_tokens jsonb DEFAULT '{}'::jsonb NOT NULL, - partition_id bigint DEFAULT 100 NOT NULL, + partition_id bigint NOT NULL, debug_trace_enabled boolean DEFAULT false NOT NULL ); ALTER TABLE ONLY p_ci_builds_metadata ATTACH PARTITION ci_builds_metadata FOR VALUES IN ('100'); @@ -19437,27 +19437,6 @@ CREATE SEQUENCE organizations_id_seq ALTER SEQUENCE organizations_id_seq OWNED BY organizations.id; -CREATE TABLE p_batched_git_ref_updates_deletions ( - id bigint NOT NULL, - project_id bigint NOT NULL, - ref text NOT NULL, - partition_id bigint DEFAULT 1 NOT NULL, - status smallint DEFAULT 1 NOT NULL, - created_at timestamp(6) without time zone NOT NULL, - updated_at timestamp(6) without time zone NOT NULL, - CONSTRAINT check_fb9a8cab2b CHECK ((char_length(ref) <= 1024)) -) -PARTITION BY LIST (partition_id); - -CREATE SEQUENCE p_batched_git_ref_updates_deletions_id_seq - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - -ALTER SEQUENCE p_batched_git_ref_updates_deletions_id_seq OWNED BY p_batched_git_ref_updates_deletions.id; - CREATE SEQUENCE p_ci_job_annotations_id_seq START WITH 1 INCREMENT BY 1 @@ -25783,8 +25762,6 @@ ALTER TABLE ONLY organization_users ALTER COLUMN id SET DEFAULT nextval('organiz ALTER TABLE ONLY organizations ALTER COLUMN id SET DEFAULT nextval('organizations_id_seq'::regclass); -ALTER TABLE ONLY p_batched_git_ref_updates_deletions ALTER COLUMN id SET DEFAULT nextval('p_batched_git_ref_updates_deletions_id_seq'::regclass); - ALTER TABLE ONLY p_ci_builds ALTER COLUMN id SET DEFAULT nextval('ci_builds_id_seq'::regclass); ALTER TABLE ONLY p_ci_builds_metadata ALTER COLUMN id SET DEFAULT nextval('ci_builds_metadata_id_seq'::regclass); @@ -28068,9 +28045,6 @@ ALTER TABLE ONLY organization_users ALTER TABLE ONLY organizations ADD CONSTRAINT organizations_pkey PRIMARY KEY (id); -ALTER TABLE ONLY p_batched_git_ref_updates_deletions - ADD CONSTRAINT p_batched_git_ref_updates_deletions_pkey PRIMARY KEY (id, partition_id); - ALTER TABLE ONLY p_ci_job_annotations ADD CONSTRAINT p_ci_job_annotations_pkey PRIMARY KEY (id, partition_id); @@ -32356,12 +32330,6 @@ CREATE INDEX index_organization_users_on_user_id ON organization_users USING btr CREATE UNIQUE INDEX index_organizations_on_unique_name_per_group ON customer_relations_organizations USING btree (group_id, lower(name), id); -CREATE INDEX index_p_batched_git_ref_updates_deletions_on_partition_id ON ONLY p_batched_git_ref_updates_deletions USING btree (partition_id); - -CREATE INDEX index_p_batched_git_ref_updates_deletions_on_project_id ON ONLY p_batched_git_ref_updates_deletions USING btree (project_id); - -CREATE INDEX index_p_batched_git_ref_updates_deletions_on_status ON ONLY p_batched_git_ref_updates_deletions USING btree (status); - CREATE UNIQUE INDEX index_p_ci_job_annotations_on_partition_id_job_id_name ON ONLY p_ci_job_annotations USING btree (partition_id, job_id, name); CREATE INDEX index_p_ci_runner_machine_builds_on_runner_machine_id ON ONLY p_ci_runner_machine_builds USING btree (runner_machine_id); -- GitLab From b8a1f6bc34199443ac96794770a7186814afbafd Mon Sep 17 00:00:00 2001 From: Dylan Griffith Date: Thu, 13 Jul 2023 13:30:33 +1000 Subject: [PATCH 20/38] Fix migrations --- ...table_batched_git_ref_updates_deletions.rb | 2 +- ...nt_to_batched_git_ref_updates_deletions.rb | 2 +- db/structure.sql | 32 +++++++++++++++++++ 3 files changed, 34 insertions(+), 2 deletions(-) diff --git a/db/migrate/20230704233431_create_table_batched_git_ref_updates_deletions.rb b/db/migrate/20230704233431_create_table_batched_git_ref_updates_deletions.rb index e372460eb13ffd..26ebfbfaa402c0 100644 --- a/db/migrate/20230704233431_create_table_batched_git_ref_updates_deletions.rb +++ b/db/migrate/20230704233431_create_table_batched_git_ref_updates_deletions.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true -class CreateTableDeletions < Gitlab::Database::Migration[2.1] +class CreateTableBatchedGitRefUpdatesDeletions < Gitlab::Database::Migration[2.1] enable_lock_retries! def up diff --git a/db/migrate/20230704235236_add_constraint_to_batched_git_ref_updates_deletions.rb b/db/migrate/20230704235236_add_constraint_to_batched_git_ref_updates_deletions.rb index dd8bd9c8b80a3c..49c82cfdc39227 100644 --- a/db/migrate/20230704235236_add_constraint_to_batched_git_ref_updates_deletions.rb +++ b/db/migrate/20230704235236_add_constraint_to_batched_git_ref_updates_deletions.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true -class AddConstraintToDeletionsRef < Gitlab::Database::Migration[2.1] +class AddConstraintToBatchedGitRefUpdatesDeletions < Gitlab::Database::Migration[2.1] disable_ddl_transaction! def up diff --git a/db/structure.sql b/db/structure.sql index fc0b145ad3823f..604297e2199a40 100644 --- a/db/structure.sql +++ b/db/structure.sql @@ -486,6 +486,18 @@ CREATE TABLE batched_background_migration_job_transition_logs ( ) PARTITION BY RANGE (created_at); +CREATE TABLE p_batched_git_ref_updates_deletions ( + id bigint NOT NULL, + project_id bigint NOT NULL, + ref text NOT NULL, + partition_id bigint DEFAULT 1 NOT NULL, + status smallint DEFAULT 1 NOT NULL, + created_at timestamp(6) without time zone NOT NULL, + updated_at timestamp(6) without time zone NOT NULL, + CONSTRAINT check_f322d53b92 CHECK ((char_length(ref) <= 1024)) +) +PARTITION BY LIST (partition_id); + CREATE TABLE p_ci_job_annotations ( id bigint NOT NULL, partition_id bigint NOT NULL, @@ -19437,6 +19449,15 @@ CREATE SEQUENCE organizations_id_seq ALTER SEQUENCE organizations_id_seq OWNED BY organizations.id; +CREATE SEQUENCE p_batched_git_ref_updates_deletions_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + +ALTER SEQUENCE p_batched_git_ref_updates_deletions_id_seq OWNED BY p_batched_git_ref_updates_deletions.id; + CREATE SEQUENCE p_ci_job_annotations_id_seq START WITH 1 INCREMENT BY 1 @@ -25762,6 +25783,8 @@ ALTER TABLE ONLY organization_users ALTER COLUMN id SET DEFAULT nextval('organiz ALTER TABLE ONLY organizations ALTER COLUMN id SET DEFAULT nextval('organizations_id_seq'::regclass); +ALTER TABLE ONLY p_batched_git_ref_updates_deletions ALTER COLUMN id SET DEFAULT nextval('p_batched_git_ref_updates_deletions_id_seq'::regclass); + ALTER TABLE ONLY p_ci_builds ALTER COLUMN id SET DEFAULT nextval('ci_builds_id_seq'::regclass); ALTER TABLE ONLY p_ci_builds_metadata ALTER COLUMN id SET DEFAULT nextval('ci_builds_metadata_id_seq'::regclass); @@ -28045,6 +28068,9 @@ ALTER TABLE ONLY organization_users ALTER TABLE ONLY organizations ADD CONSTRAINT organizations_pkey PRIMARY KEY (id); +ALTER TABLE ONLY p_batched_git_ref_updates_deletions + ADD CONSTRAINT p_batched_git_ref_updates_deletions_pkey PRIMARY KEY (id, partition_id); + ALTER TABLE ONLY p_ci_job_annotations ADD CONSTRAINT p_ci_job_annotations_pkey PRIMARY KEY (id, partition_id); @@ -32330,6 +32356,12 @@ CREATE INDEX index_organization_users_on_user_id ON organization_users USING btr CREATE UNIQUE INDEX index_organizations_on_unique_name_per_group ON customer_relations_organizations USING btree (group_id, lower(name), id); +CREATE INDEX index_p_batched_git_ref_updates_deletions_on_partition_id ON ONLY p_batched_git_ref_updates_deletions USING btree (partition_id); + +CREATE INDEX index_p_batched_git_ref_updates_deletions_on_project_id ON ONLY p_batched_git_ref_updates_deletions USING btree (project_id); + +CREATE INDEX index_p_batched_git_ref_updates_deletions_on_status ON ONLY p_batched_git_ref_updates_deletions USING btree (status); + CREATE UNIQUE INDEX index_p_ci_job_annotations_on_partition_id_job_id_name ON ONLY p_ci_job_annotations USING btree (partition_id, job_id, name); CREATE INDEX index_p_ci_runner_machine_builds_on_runner_machine_id ON ONLY p_ci_runner_machine_builds USING btree (runner_machine_id); -- GitLab From c58cc3dbb845baf9916a2c8d5548191b53b81f51 Mon Sep 17 00:00:00 2001 From: Dylan Griffith Date: Thu, 13 Jul 2023 14:53:24 +1000 Subject: [PATCH 21/38] Fix rubocop issues in migrations Changelog: changed --- ...ate_table_batched_git_ref_updates_deletions.rb | 15 +++++++++------ ...traint_to_batched_git_ref_updates_deletions.rb | 1 - db/structure.sql | 6 ++++-- 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/db/migrate/20230704233431_create_table_batched_git_ref_updates_deletions.rb b/db/migrate/20230704233431_create_table_batched_git_ref_updates_deletions.rb index 26ebfbfaa402c0..ab66ee6a1a0ed8 100644 --- a/db/migrate/20230704233431_create_table_batched_git_ref_updates_deletions.rb +++ b/db/migrate/20230704233431_create_table_batched_git_ref_updates_deletions.rb @@ -3,6 +3,8 @@ class CreateTableBatchedGitRefUpdatesDeletions < Gitlab::Database::Migration[2.1] enable_lock_retries! + # rubocop:disable Migration/AddLimitToTextColumns + # limit is added in 20230704235236_add_constraint_to_batched_git_ref_updates_deletions.rb def up options = { primary_key: [:id, :partition_id], @@ -12,16 +14,16 @@ def up create_table(:p_batched_git_ref_updates_deletions, **options) do |t| t.bigserial :id, null: false - # TODO: Is it worth making this generic. Repository model uses `#container` so we could support (presumably) group - # wikis and so-on here. Does it matter? Or should we just build that later since our main focus on performance is - # for projects. - t.bigint :project_id, index: true, null: false # Do not bother with foreign key as it provides not benefit and has a performance cost. These get cleaned up over time anyway. + # Do not bother with foreign key as it provides not benefit and has a performance cost. These get cleaned up over + # time anyway. + t.bigint :project_id, index: true, null: false t.text :ref, null: false t.bigint :partition_id, null: false, index: true, default: 1 t.integer :status, null: false, index: true, default: 1, limit: 2 - t.index :project_id, where: 'status = 1' + t.index :project_id, where: 'status = 1', + name: :idx_git_ref_deletions_on_project_id_where_pending - t.timestamps null: false + t.timestamps_with_timezone null: false end connection.execute(<<~SQL) @@ -30,6 +32,7 @@ def up FOR VALUES IN (1); SQL end + # rubocop:enable Migration/AddLimitToTextColumns def down connection.execute(<<~SQL) diff --git a/db/migrate/20230704235236_add_constraint_to_batched_git_ref_updates_deletions.rb b/db/migrate/20230704235236_add_constraint_to_batched_git_ref_updates_deletions.rb index 49c82cfdc39227..6aa9b6b40d6a4c 100644 --- a/db/migrate/20230704235236_add_constraint_to_batched_git_ref_updates_deletions.rb +++ b/db/migrate/20230704235236_add_constraint_to_batched_git_ref_updates_deletions.rb @@ -4,7 +4,6 @@ class AddConstraintToBatchedGitRefUpdatesDeletions < Gitlab::Database::Migration disable_ddl_transaction! def up - # TODO: Is there a reliable upper limit here? If something exceeds this upper limit we won't be able to delete it. Should we make it very large or just not have a limit? add_text_limit :p_batched_git_ref_updates_deletions, :ref, 1024 end diff --git a/db/structure.sql b/db/structure.sql index 604297e2199a40..54abe1980b87be 100644 --- a/db/structure.sql +++ b/db/structure.sql @@ -492,8 +492,8 @@ CREATE TABLE p_batched_git_ref_updates_deletions ( ref text NOT NULL, partition_id bigint DEFAULT 1 NOT NULL, status smallint DEFAULT 1 NOT NULL, - created_at timestamp(6) without time zone NOT NULL, - updated_at timestamp(6) without time zone NOT NULL, + created_at timestamp with time zone NOT NULL, + updated_at timestamp with time zone NOT NULL, CONSTRAINT check_f322d53b92 CHECK ((char_length(ref) <= 1024)) ) PARTITION BY LIST (partition_id); @@ -30048,6 +30048,8 @@ CREATE UNIQUE INDEX idx_environment_merge_requests_unique_index ON deployment_me CREATE UNIQUE INDEX idx_external_audit_event_destination_id_key_uniq ON audit_events_streaming_headers USING btree (key, external_audit_event_destination_id); +CREATE INDEX idx_git_ref_deletions_on_project_id_where_pending ON ONLY p_batched_git_ref_updates_deletions USING btree (project_id) WHERE (status = 1); + CREATE INDEX idx_headers_instance_external_audit_event_destination_id ON instance_audit_events_streaming_headers USING btree (instance_external_audit_event_destination_id); CREATE INDEX idx_installable_conan_pkgs_on_project_id_id ON packages_packages USING btree (project_id, id) WHERE ((package_type = 3) AND (status = ANY (ARRAY[0, 1]))); -- GitLab From 1e66989c6e90fa78bb89b0fd1756c3e07043c194 Mon Sep 17 00:00:00 2001 From: Dylan Griffith Date: Thu, 13 Jul 2023 15:34:27 +1000 Subject: [PATCH 22/38] More RSpec fixes --- app/models/project.rb | 2 -- spec/db/schema_spec.rb | 1 + spec/models/ci/persistent_ref_spec.rb | 28 ++++++++++++++----- .../cleanup_refs_service_spec.rb | 1 + 4 files changed, 23 insertions(+), 9 deletions(-) diff --git a/app/models/project.rb b/app/models/project.rb index d618e7c825fb23..d3642e7d7d032e 100644 --- a/app/models/project.rb +++ b/app/models/project.rb @@ -456,8 +456,6 @@ def self.integration_association_name(name) has_one :ci_project_mirror, class_name: 'Ci::ProjectMirror' has_many :sync_events, class_name: 'Projects::SyncEvent' - has_many :batched_git_ref_updates_deletions, inverse_of: :project - has_one :build_artifacts_size_refresh, class_name: 'Projects::BuildArtifactsSizeRefresh' accepts_nested_attributes_for :variables, allow_destroy: true diff --git a/spec/db/schema_spec.rb b/spec/db/schema_spec.rb index c22292cb82c6c4..a0f59a659bd09b 100644 --- a/spec/db/schema_spec.rb +++ b/spec/db/schema_spec.rb @@ -88,6 +88,7 @@ oauth_access_tokens: %w[resource_owner_id application_id], oauth_applications: %w[owner_id], p_ci_builds: %w[project_id runner_id user_id erased_by_id trigger_request_id partition_id], + p_batched_git_ref_updates_deletions: %w[project_id partition_id], product_analytics_events_experimental: %w[event_id txn_id user_id], project_build_artifacts_size_refreshes: %w[last_job_artifact_id], project_data_transfers: %w[project_id namespace_id], diff --git a/spec/models/ci/persistent_ref_spec.rb b/spec/models/ci/persistent_ref_spec.rb index ecaa8f59ecfd55..ed4ea02d8ba4cd 100644 --- a/spec/models/ci/persistent_ref_spec.rb +++ b/spec/models/ci/persistent_ref_spec.rb @@ -3,26 +3,40 @@ require 'spec_helper' RSpec.describe Ci::PersistentRef do - it 'cleans up persistent refs after pipeline finished', :sidekiq_inline do + it 'cleans up persistent refs async after pipeline finished' do pipeline = create(:ci_pipeline, :running) - expect(Ci::PipelineCleanupRefWorker).to receive(:perform_async).with(pipeline.id) - - pipeline.succeed! + expect { pipeline.succeed! } + .to change { ::BatchedGitRefUpdates::Deletion.count } + .by(1) end - context 'when pipeline_cleanup_ref_worker_async is disabled' do + context 'when pipeline_delete_gitaly_refs_in_batches is disabled' do before do - stub_feature_flags(pipeline_cleanup_ref_worker_async: false) + stub_feature_flags(pipeline_delete_gitaly_refs_in_batches: false) end it 'cleans up persistent refs after pipeline finished' do pipeline = create(:ci_pipeline, :running) - expect(pipeline.persistent_ref).to receive(:delete).once + expect(Ci::PipelineCleanupRefWorker).to receive(:perform_async).with(pipeline.id) pipeline.succeed! end + + context 'when pipeline_cleanup_ref_worker_async is disabled' do + before do + stub_feature_flags(pipeline_cleanup_ref_worker_async: false) + end + + it 'cleans up persistent refs after pipeline finished' do + pipeline = create(:ci_pipeline, :running) + + expect(pipeline.persistent_ref).to receive(:delete).once + + pipeline.succeed! + end + end end describe '#exist?' do diff --git a/spec/services/merge_requests/cleanup_refs_service_spec.rb b/spec/services/merge_requests/cleanup_refs_service_spec.rb index efb6265e3d8dbe..c818c60ad5fb50 100644 --- a/spec/services/merge_requests/cleanup_refs_service_spec.rb +++ b/spec/services/merge_requests/cleanup_refs_service_spec.rb @@ -18,6 +18,7 @@ describe '#execute' do before do + stub_feature_flags(merge_request_delete_gitaly_refs_in_batches: false) stub_feature_flags(merge_request_cleanup_ref_worker_async: false) # Need to re-enable this as it's being stubbed in spec_helper for -- GitLab From f7cdb0e468bf71a82f58e4460749d4758f5ecce5 Mon Sep 17 00:00:00 2001 From: Dylan Griffith Date: Fri, 14 Jul 2023 08:09:38 +1000 Subject: [PATCH 23/38] Fix migration partition name --- ...table_batched_git_ref_updates_deletions.rb | 4 ++-- db/structure.sql | 24 +++++++++---------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/db/migrate/20230704233431_create_table_batched_git_ref_updates_deletions.rb b/db/migrate/20230704233431_create_table_batched_git_ref_updates_deletions.rb index ab66ee6a1a0ed8..953bd3189c34e4 100644 --- a/db/migrate/20230704233431_create_table_batched_git_ref_updates_deletions.rb +++ b/db/migrate/20230704233431_create_table_batched_git_ref_updates_deletions.rb @@ -27,7 +27,7 @@ def up end connection.execute(<<~SQL) - CREATE TABLE IF NOT EXISTS gitlab_partitions_dynamic.batched_git_ref_updates_deletions_1 + CREATE TABLE IF NOT EXISTS gitlab_partitions_dynamic.p_batched_git_ref_updates_deletions_1 PARTITION OF p_batched_git_ref_updates_deletions FOR VALUES IN (1); SQL @@ -36,7 +36,7 @@ def up def down connection.execute(<<~SQL) - DROP TABLE IF EXISTS gitlab_partitions_dynamic.batched_git_ref_updates_deletions_1; + DROP TABLE IF EXISTS gitlab_partitions_dynamic.p_batched_git_ref_updates_deletions_1; SQL drop_table :p_batched_git_ref_updates_deletions diff --git a/db/structure.sql b/db/structure.sql index 54abe1980b87be..dc6337305efe2e 100644 --- a/db/structure.sql +++ b/db/structure.sql @@ -486,18 +486,6 @@ CREATE TABLE batched_background_migration_job_transition_logs ( ) PARTITION BY RANGE (created_at); -CREATE TABLE p_batched_git_ref_updates_deletions ( - id bigint NOT NULL, - project_id bigint NOT NULL, - ref text NOT NULL, - partition_id bigint DEFAULT 1 NOT NULL, - status smallint DEFAULT 1 NOT NULL, - created_at timestamp with time zone NOT NULL, - updated_at timestamp with time zone NOT NULL, - CONSTRAINT check_f322d53b92 CHECK ((char_length(ref) <= 1024)) -) -PARTITION BY LIST (partition_id); - CREATE TABLE p_ci_job_annotations ( id bigint NOT NULL, partition_id bigint NOT NULL, @@ -549,6 +537,18 @@ CREATE TABLE loose_foreign_keys_deleted_records ( ) PARTITION BY LIST (partition); +CREATE TABLE p_batched_git_ref_updates_deletions ( + id bigint NOT NULL, + project_id bigint NOT NULL, + ref text NOT NULL, + partition_id bigint DEFAULT 1 NOT NULL, + status smallint DEFAULT 1 NOT NULL, + created_at timestamp with time zone NOT NULL, + updated_at timestamp with time zone NOT NULL, + CONSTRAINT check_f322d53b92 CHECK ((char_length(ref) <= 1024)) +) +PARTITION BY LIST (partition_id); + CREATE TABLE security_findings ( id bigint NOT NULL, scan_id bigint NOT NULL, -- GitLab From 8edd7f35d4f816cb2d54de2412b4118c3c6ab287 Mon Sep 17 00:00:00 2001 From: Dylan Griffith Date: Mon, 17 Jul 2023 09:59:28 +1000 Subject: [PATCH 24/38] Mark processed per partition --- .../batched_git_ref_updates/deletion.rb | 20 +++++++++++++++---- .../project_cleanup_service.rb | 4 ++-- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/app/models/batched_git_ref_updates/deletion.rb b/app/models/batched_git_ref_updates/deletion.rb index 83d1637f4cefd8..f2059a37517421 100644 --- a/app/models/batched_git_ref_updates/deletion.rb +++ b/app/models/batched_git_ref_updates/deletion.rb @@ -13,13 +13,13 @@ class Deletion < ApplicationRecord self.primary_key = :id self.sequence_name = :to_be_deleted_git_refs_id_seq - ignore_column :partition_id, remove_with: '16.4', remove_after: '2023-09-22' - belongs_to :project, inverse_of: :to_be_deleted_git_refs + attr_readonly :partition_id + scope :for_partition, ->(partition) { where(partition_id: partition) } - scope :pluck_ref, -> { pluck(:ref) } scope :for_project, ->(project_id) { where(project_id: project_id) } + scope :select_ref_and_identity, -> { select(:ref, :id, :partition_id) } partitioned_by :partition_id, strategy: :sliding_list, next_partition_if: ->(active_partition) do @@ -43,7 +43,19 @@ class Deletion < ApplicationRecord enum status: { pending: 1, processed: 2 }, _prefix: :status def self.mark_records_processed(records) - records.update_all(status: :processed) + update_by_partition(records) do |partitioned_scope| + partitioned_scope.update_all(status: :processed) + end + end + + def self.update_by_partition(records) + records.group_by(&:partition_id).each do |partition, records_within_partition| + partitioned_scope = status_pending + .for_partition(partition) + .where(id: records_within_partition.map(&:id)) + + yield(partitioned_scope) + end end end end diff --git a/app/services/batched_git_ref_updates/project_cleanup_service.rb b/app/services/batched_git_ref_updates/project_cleanup_service.rb index 2ea4a8131e4c28..37617edb8004dc 100644 --- a/app/services/batched_git_ref_updates/project_cleanup_service.rb +++ b/app/services/batched_git_ref_updates/project_cleanup_service.rb @@ -19,8 +19,8 @@ def execute project = Project.find_by_id(@project_id) break unless project - Deletion.status_pending.for_project(@project_id).each_batch(of: BATCH_SIZE) do |batch| - refs = batch.pluck_ref + Deletion.status_pending.for_project(@project_id).select_ref_and_identity.each_batch(of: BATCH_SIZE) do |batch| + refs = batch.map(&:ref) project.repository.delete_refs(*refs) total_deletes += refs.count Deletion.mark_records_processed(batch) -- GitLab From b303987c634df4934419537b2620da07435025da Mon Sep 17 00:00:00 2001 From: Dylan Griffith Date: Mon, 17 Jul 2023 10:00:42 +1000 Subject: [PATCH 25/38] Remove TODO only for projects now --- lib/gitlab/git/repository.rb | 2 -- 1 file changed, 2 deletions(-) diff --git a/lib/gitlab/git/repository.rb b/lib/gitlab/git/repository.rb index 7c802112558cfb..4d7bd549dc844a 100644 --- a/lib/gitlab/git/repository.rb +++ b/lib/gitlab/git/repository.rb @@ -751,8 +751,6 @@ def delete_branch(branch_name) end def async_delete_refs(*refs) - # TODO: Should we just try and support all containers? It would probably require an extra column for - # container_type and then we'd need to group by container_id, container_type when batch deleting. raise "async_delete_refs only supports project repositories" unless container.is_a?(Project) records = refs.map do |ref| -- GitLab From 4fea031d3f21d54466cb7b123cc8a88ca38193c0 Mon Sep 17 00:00:00 2001 From: Dylan Griffith Date: Mon, 17 Jul 2023 16:04:29 +1000 Subject: [PATCH 26/38] Fix index on project_id, id --- ...04233431_create_table_batched_git_ref_updates_deletions.rb | 4 ++-- db/structure.sql | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/db/migrate/20230704233431_create_table_batched_git_ref_updates_deletions.rb b/db/migrate/20230704233431_create_table_batched_git_ref_updates_deletions.rb index 953bd3189c34e4..f000626765f2a6 100644 --- a/db/migrate/20230704233431_create_table_batched_git_ref_updates_deletions.rb +++ b/db/migrate/20230704233431_create_table_batched_git_ref_updates_deletions.rb @@ -20,8 +20,8 @@ def up t.text :ref, null: false t.bigint :partition_id, null: false, index: true, default: 1 t.integer :status, null: false, index: true, default: 1, limit: 2 - t.index :project_id, where: 'status = 1', - name: :idx_git_ref_deletions_on_project_id_where_pending + t.index [:project_id, :id], where: 'status = 1', + name: :idx_deletions_on_project_id_and_id_where_pending t.timestamps_with_timezone null: false end diff --git a/db/structure.sql b/db/structure.sql index dc6337305efe2e..b24042a3dde450 100644 --- a/db/structure.sql +++ b/db/structure.sql @@ -30030,6 +30030,8 @@ CREATE INDEX idx_container_exp_policies_on_project_id_next_run_at_enabled ON con CREATE INDEX idx_container_repos_on_exp_cleanup_status_project_id_start_date ON container_repositories USING btree (expiration_policy_cleanup_status, project_id, expiration_policy_started_at); +CREATE INDEX idx_deletions_on_project_id_and_id_where_pending ON ONLY p_batched_git_ref_updates_deletions USING btree (project_id, id) WHERE (status = 1); + CREATE INDEX idx_deployment_clusters_on_cluster_id_and_kubernetes_namespace ON deployment_clusters USING btree (cluster_id, kubernetes_namespace); CREATE INDEX idx_devops_adoption_segments_namespace_end_time ON analytics_devops_adoption_snapshots USING btree (namespace_id, end_time); @@ -30048,8 +30050,6 @@ CREATE UNIQUE INDEX idx_environment_merge_requests_unique_index ON deployment_me CREATE UNIQUE INDEX idx_external_audit_event_destination_id_key_uniq ON audit_events_streaming_headers USING btree (key, external_audit_event_destination_id); -CREATE INDEX idx_git_ref_deletions_on_project_id_where_pending ON ONLY p_batched_git_ref_updates_deletions USING btree (project_id) WHERE (status = 1); - CREATE INDEX idx_headers_instance_external_audit_event_destination_id ON instance_audit_events_streaming_headers USING btree (instance_external_audit_event_destination_id); CREATE INDEX idx_installable_conan_pkgs_on_project_id_id ON packages_packages USING btree (project_id, id) WHERE ((package_type = 3) AND (status = ANY (ARRAY[0, 1]))); -- GitLab From 992de77484df38d6f44c16c4645db99d01bacec7 Mon Sep 17 00:00:00 2001 From: Dylan Griffith Date: Mon, 17 Jul 2023 16:05:03 +1000 Subject: [PATCH 27/38] Bigger PG batch same Gitaly batch --- .../project_cleanup_service.rb | 13 +++++++++---- .../project_cleanup_service_spec.rb | 2 +- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/app/services/batched_git_ref_updates/project_cleanup_service.rb b/app/services/batched_git_ref_updates/project_cleanup_service.rb index 37617edb8004dc..055739c8003189 100644 --- a/app/services/batched_git_ref_updates/project_cleanup_service.rb +++ b/app/services/batched_git_ref_updates/project_cleanup_service.rb @@ -5,8 +5,9 @@ class ProjectCleanupService include ::Gitlab::ExclusiveLeaseHelpers LOCK_TIMEOUT = 10.minutes - BATCH_SIZE = 100 # TODO: Pick a number that will finish in 30s - MAX_DELETES = 10_000 # TODO: Pick a number that will finish in 30s + GITALY_BATCH_SIZE = 100 + QUERY_BATCH_SIZE = 1000 + MAX_DELETES = 10_000 def initialize(project_id) @project_id = project_id @@ -19,9 +20,13 @@ def execute project = Project.find_by_id(@project_id) break unless project - Deletion.status_pending.for_project(@project_id).select_ref_and_identity.each_batch(of: BATCH_SIZE) do |batch| + Deletion.status_pending.for_project(@project_id).select_ref_and_identity.each_batch(of: QUERY_BATCH_SIZE) do |batch| refs = batch.map(&:ref) - project.repository.delete_refs(*refs) + + refs.each_slice(GITALY_BATCH_SIZE) do |refs_to_delete| + project.repository.delete_refs(*refs_to_delete) + end + total_deletes += refs.count Deletion.mark_records_processed(batch) diff --git a/spec/services/batched_git_ref_updates/project_cleanup_service_spec.rb b/spec/services/batched_git_ref_updates/project_cleanup_service_spec.rb index e86e0fde765fe7..dcdfdfade3cf96 100644 --- a/spec/services/batched_git_ref_updates/project_cleanup_service_spec.rb +++ b/spec/services/batched_git_ref_updates/project_cleanup_service_spec.rb @@ -83,7 +83,7 @@ end it 'stops after it reaches limit of deleted refs' do - stub_const("#{described_class}::BATCH_SIZE", 1) + stub_const("#{described_class}::QUERY_BATCH_SIZE", 1) stub_const("#{described_class}::MAX_DELETES", 1) result = service.execute -- GitLab From a17c1c39756e8b8a0f438a6570121308f66a346b Mon Sep 17 00:00:00 2001 From: Dylan Griffith Date: Mon, 17 Jul 2023 16:25:08 +1000 Subject: [PATCH 28/38] Rubocop fix --- .../batched_git_ref_updates/project_cleanup_service.rb | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/app/services/batched_git_ref_updates/project_cleanup_service.rb b/app/services/batched_git_ref_updates/project_cleanup_service.rb index 055739c8003189..f9518cad975eff 100644 --- a/app/services/batched_git_ref_updates/project_cleanup_service.rb +++ b/app/services/batched_git_ref_updates/project_cleanup_service.rb @@ -20,7 +20,11 @@ def execute project = Project.find_by_id(@project_id) break unless project - Deletion.status_pending.for_project(@project_id).select_ref_and_identity.each_batch(of: QUERY_BATCH_SIZE) do |batch| + Deletion + .status_pending + .for_project(@project_id) + .select_ref_and_identity + .each_batch(of: QUERY_BATCH_SIZE) do |batch| refs = batch.map(&:ref) refs.each_slice(GITALY_BATCH_SIZE) do |refs_to_delete| -- GitLab From 17b4db4761ce38575c5e0604bf685c1324716dfb Mon Sep 17 00:00:00 2001 From: Dylan Griffith Date: Mon, 17 Jul 2023 16:57:10 +1000 Subject: [PATCH 29/38] Remove unused indexes --- ...233431_create_table_batched_git_ref_updates_deletions.rb | 6 +++--- db/structure.sql | 6 ------ 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/db/migrate/20230704233431_create_table_batched_git_ref_updates_deletions.rb b/db/migrate/20230704233431_create_table_batched_git_ref_updates_deletions.rb index f000626765f2a6..91450822ac8171 100644 --- a/db/migrate/20230704233431_create_table_batched_git_ref_updates_deletions.rb +++ b/db/migrate/20230704233431_create_table_batched_git_ref_updates_deletions.rb @@ -16,10 +16,10 @@ def up t.bigserial :id, null: false # Do not bother with foreign key as it provides not benefit and has a performance cost. These get cleaned up over # time anyway. - t.bigint :project_id, index: true, null: false + t.bigint :project_id, null: false t.text :ref, null: false - t.bigint :partition_id, null: false, index: true, default: 1 - t.integer :status, null: false, index: true, default: 1, limit: 2 + t.bigint :partition_id, null: false, default: 1 + t.integer :status, null: false, default: 1, limit: 2 t.index [:project_id, :id], where: 'status = 1', name: :idx_deletions_on_project_id_and_id_where_pending diff --git a/db/structure.sql b/db/structure.sql index b24042a3dde450..7825b395050669 100644 --- a/db/structure.sql +++ b/db/structure.sql @@ -32358,12 +32358,6 @@ CREATE INDEX index_organization_users_on_user_id ON organization_users USING btr CREATE UNIQUE INDEX index_organizations_on_unique_name_per_group ON customer_relations_organizations USING btree (group_id, lower(name), id); -CREATE INDEX index_p_batched_git_ref_updates_deletions_on_partition_id ON ONLY p_batched_git_ref_updates_deletions USING btree (partition_id); - -CREATE INDEX index_p_batched_git_ref_updates_deletions_on_project_id ON ONLY p_batched_git_ref_updates_deletions USING btree (project_id); - -CREATE INDEX index_p_batched_git_ref_updates_deletions_on_status ON ONLY p_batched_git_ref_updates_deletions USING btree (status); - CREATE UNIQUE INDEX index_p_ci_job_annotations_on_partition_id_job_id_name ON ONLY p_ci_job_annotations USING btree (partition_id, job_id, name); CREATE INDEX index_p_ci_runner_machine_builds_on_runner_machine_id ON ONLY p_ci_runner_machine_builds USING btree (runner_machine_id); -- GitLab From 4a1a87b2b3e5f68d7c935cfc98c41b88f1d68901 Mon Sep 17 00:00:00 2001 From: Dylan Griffith Date: Tue, 18 Jul 2023 09:40:39 +1000 Subject: [PATCH 30/38] Replace eq with contain_exactly --- spec/models/batched_git_ref_updates/deletion_spec.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/models/batched_git_ref_updates/deletion_spec.rb b/spec/models/batched_git_ref_updates/deletion_spec.rb index 8a1a31d9d53598..c9697cd1e6ea34 100644 --- a/spec/models/batched_git_ref_updates/deletion_spec.rb +++ b/spec/models/batched_git_ref_updates/deletion_spec.rb @@ -104,7 +104,7 @@ # a new partition is created partition_manager.sync_partitions - expect(described_class.partitioning_strategy.current_partitions.map(&:value)).to eq([1, 2]) + expect(described_class.partitioning_strategy.current_partitions.map(&:value)).to contain_exactly(1, 2) # and we can insert to the new partition described_class.create!(project_id: 2, ref: 'refs/test/2') -- GitLab From c3a2491825370624163dfaa25463fb34310f4392 Mon Sep 17 00:00:00 2001 From: Dylan Griffith Date: Tue, 18 Jul 2023 03:40:15 +0000 Subject: [PATCH 31/38] Test to see if let fixes CI --- spec/models/ci/pipeline_spec.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/models/ci/pipeline_spec.rb b/spec/models/ci/pipeline_spec.rb index 1e3ce8bc656046..dd0c1234c1ecd9 100644 --- a/spec/models/ci/pipeline_spec.rb +++ b/spec/models/ci/pipeline_spec.rb @@ -1319,7 +1319,7 @@ def create_build(name, status) end describe 'state machine' do - let_it_be_with_reload(:pipeline) { create(:ci_empty_pipeline, :created) } + let(:pipeline) { create(:ci_empty_pipeline, :created) } let(:current) { Time.current.change(usec: 0) } let(:build) { create_build('build1', queued_at: 0) } -- GitLab From e1f2117f32a2de2f584c8e85c2ca3c2994509832 Mon Sep 17 00:00:00 2001 From: Dylan Griffith Date: Tue, 18 Jul 2023 05:01:09 +0000 Subject: [PATCH 32/38] Revert to `let_it_be_with_reload` --- spec/models/ci/pipeline_spec.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/models/ci/pipeline_spec.rb b/spec/models/ci/pipeline_spec.rb index dd0c1234c1ecd9..1e3ce8bc656046 100644 --- a/spec/models/ci/pipeline_spec.rb +++ b/spec/models/ci/pipeline_spec.rb @@ -1319,7 +1319,7 @@ def create_build(name, status) end describe 'state machine' do - let(:pipeline) { create(:ci_empty_pipeline, :created) } + let_it_be_with_reload(:pipeline) { create(:ci_empty_pipeline, :created) } let(:current) { Time.current.change(usec: 0) } let(:build) { create_build('build1', queued_at: 0) } -- GitLab From b848787538fb49b05d8ca5f605fbd22d79684dc7 Mon Sep 17 00:00:00 2001 From: Eulyeon Ko <5961404-euko@users.noreply.gitlab.com> Date: Thu, 20 Jul 2023 07:14:03 +0000 Subject: [PATCH 33/38] Improvements suggested from DB review --- app/models/batched_git_ref_updates/deletion.rb | 2 ++ .../development/merge_request_delete_gitaly_refs_in_batches.yml | 2 +- .../development/pipeline_delete_gitaly_refs_in_batches.yml | 2 +- db/docs/p_batched_git_ref_updates_deletions.yml | 2 +- 4 files changed, 5 insertions(+), 3 deletions(-) diff --git a/app/models/batched_git_ref_updates/deletion.rb b/app/models/batched_git_ref_updates/deletion.rb index f2059a37517421..2bbe1c3d8dae01 100644 --- a/app/models/batched_git_ref_updates/deletion.rb +++ b/app/models/batched_git_ref_updates/deletion.rb @@ -57,5 +57,7 @@ def self.update_by_partition(records) yield(partitioned_scope) end end + + private_class_method :update_by_partition end end diff --git a/config/feature_flags/development/merge_request_delete_gitaly_refs_in_batches.yml b/config/feature_flags/development/merge_request_delete_gitaly_refs_in_batches.yml index dee7916df601ca..0f0b81dbbd281b 100644 --- a/config/feature_flags/development/merge_request_delete_gitaly_refs_in_batches.yml +++ b/config/feature_flags/development/merge_request_delete_gitaly_refs_in_batches.yml @@ -2,7 +2,7 @@ name: merge_request_delete_gitaly_refs_in_batches introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/125333 rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/416969 -milestone: '16.2' +milestone: '16.3' type: development group: group::gitaly default_enabled: false diff --git a/config/feature_flags/development/pipeline_delete_gitaly_refs_in_batches.yml b/config/feature_flags/development/pipeline_delete_gitaly_refs_in_batches.yml index bd2b2ca8411945..66e23fa424a339 100644 --- a/config/feature_flags/development/pipeline_delete_gitaly_refs_in_batches.yml +++ b/config/feature_flags/development/pipeline_delete_gitaly_refs_in_batches.yml @@ -2,7 +2,7 @@ name: pipeline_delete_gitaly_refs_in_batches introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/125333 rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/416969 -milestone: '16.2' +milestone: '16.3' type: development group: group::gitaly default_enabled: false diff --git a/db/docs/p_batched_git_ref_updates_deletions.yml b/db/docs/p_batched_git_ref_updates_deletions.yml index 9e8c1b258069d1..a09672f38dac7c 100644 --- a/db/docs/p_batched_git_ref_updates_deletions.yml +++ b/db/docs/p_batched_git_ref_updates_deletions.yml @@ -7,5 +7,5 @@ feature_categories: description: Acts as a queue for refs that need to be deleted in Gitaly. This allows us to batch deletes rather than sending them one at a time. introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/125333 -milestone: '16.2' +milestone: '16.3' gitlab_schema: gitlab_main -- GitLab From e599be335e676e66fa49e0f7cec38aedd2908fb9 Mon Sep 17 00:00:00 2001 From: Dylan Griffith Date: Thu, 20 Jul 2023 17:37:35 +1000 Subject: [PATCH 34/38] Switch to ignore_column --- app/models/batched_git_ref_updates/deletion.rb | 10 ++++++++-- spec/models/batched_git_ref_updates/deletion_spec.rb | 4 ++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/app/models/batched_git_ref_updates/deletion.rb b/app/models/batched_git_ref_updates/deletion.rb index 2bbe1c3d8dae01..7765fde0afecb6 100644 --- a/app/models/batched_git_ref_updates/deletion.rb +++ b/app/models/batched_git_ref_updates/deletion.rb @@ -13,13 +13,17 @@ class Deletion < ApplicationRecord self.primary_key = :id self.sequence_name = :to_be_deleted_git_refs_id_seq + # This column must be ignored otherwise Rails will cache the default value and `bulk_insert!` will start saving + # incorrect partition_id. + ignore_column :parition_id, remove_with: '3000.0', remove_after: '3000-01-01' + belongs_to :project, inverse_of: :to_be_deleted_git_refs attr_readonly :partition_id scope :for_partition, ->(partition) { where(partition_id: partition) } scope :for_project, ->(project_id) { where(project_id: project_id) } - scope :select_ref_and_identity, -> { select(:ref, :id, :partition_id) } + scope :select_ref_and_identity, -> { select(:ref, :id, arel_table[:partition_id].as('partition')) } partitioned_by :partition_id, strategy: :sliding_list, next_partition_if: ->(active_partition) do @@ -48,8 +52,10 @@ def self.mark_records_processed(records) end end + # Your scope must select_ref_and_identity before calling this method as it relies on partition being explicitly + # selected def self.update_by_partition(records) - records.group_by(&:partition_id).each do |partition, records_within_partition| + records.group_by(&:partition).each do |partition, records_within_partition| partitioned_scope = status_pending .for_partition(partition) .where(id: records_within_partition.map(&:id)) diff --git a/spec/models/batched_git_ref_updates/deletion_spec.rb b/spec/models/batched_git_ref_updates/deletion_spec.rb index c9697cd1e6ea34..1679e8977b3184 100644 --- a/spec/models/batched_git_ref_updates/deletion_spec.rb +++ b/spec/models/batched_git_ref_updates/deletion_spec.rb @@ -14,7 +14,7 @@ expect(described_class.status_pending.count).to eq(4) expect(described_class.status_processed.count).to eq(1) - deletions = described_class.for_project(1) + deletions = described_class.for_project(1).select_ref_and_identity described_class.mark_records_processed(deletions) deletions.each do |deletion| @@ -110,7 +110,7 @@ described_class.create!(project_id: 2, ref: 'refs/test/2') # after processing old records - described_class.mark_records_processed(described_class.for_partition(1)) + described_class.mark_records_processed(described_class.for_partition(1).select_ref_and_identity) partition_manager.sync_partitions -- GitLab From e8cf29a851571b69632d55094c726abe2dcb9d64 Mon Sep 17 00:00:00 2001 From: Dylan Griffith Date: Thu, 20 Jul 2023 17:42:23 +1000 Subject: [PATCH 35/38] Reorder migrations --- ...reate_table_batched_git_ref_updates_deletions.rb | 6 +++--- ...nstraint_to_batched_git_ref_updates_deletions.rb | 13 ------------- db/schema_migrations/20230704235236 | 1 - db/structure.sql | 4 ++-- 4 files changed, 5 insertions(+), 19 deletions(-) delete mode 100644 db/migrate/20230704235236_add_constraint_to_batched_git_ref_updates_deletions.rb delete mode 100644 db/schema_migrations/20230704235236 diff --git a/db/migrate/20230704233431_create_table_batched_git_ref_updates_deletions.rb b/db/migrate/20230704233431_create_table_batched_git_ref_updates_deletions.rb index 91450822ac8171..83aaba55bf6ec6 100644 --- a/db/migrate/20230704233431_create_table_batched_git_ref_updates_deletions.rb +++ b/db/migrate/20230704233431_create_table_batched_git_ref_updates_deletions.rb @@ -17,13 +17,13 @@ def up # Do not bother with foreign key as it provides not benefit and has a performance cost. These get cleaned up over # time anyway. t.bigint :project_id, null: false - t.text :ref, null: false t.bigint :partition_id, null: false, default: 1 + t.timestamps_with_timezone null: false t.integer :status, null: false, default: 1, limit: 2 + t.text :ref, null: false, limit: 1024 + t.index [:project_id, :id], where: 'status = 1', name: :idx_deletions_on_project_id_and_id_where_pending - - t.timestamps_with_timezone null: false end connection.execute(<<~SQL) diff --git a/db/migrate/20230704235236_add_constraint_to_batched_git_ref_updates_deletions.rb b/db/migrate/20230704235236_add_constraint_to_batched_git_ref_updates_deletions.rb deleted file mode 100644 index 6aa9b6b40d6a4c..00000000000000 --- a/db/migrate/20230704235236_add_constraint_to_batched_git_ref_updates_deletions.rb +++ /dev/null @@ -1,13 +0,0 @@ -# frozen_string_literal: true - -class AddConstraintToBatchedGitRefUpdatesDeletions < Gitlab::Database::Migration[2.1] - disable_ddl_transaction! - - def up - add_text_limit :p_batched_git_ref_updates_deletions, :ref, 1024 - end - - def down - remove_text_limit :p_batched_git_ref_updates_deletions, :ref - end -end diff --git a/db/schema_migrations/20230704235236 b/db/schema_migrations/20230704235236 deleted file mode 100644 index 8908154d59c619..00000000000000 --- a/db/schema_migrations/20230704235236 +++ /dev/null @@ -1 +0,0 @@ -f9a1d78f52f5949acd102320590ca4270747492965e3e92cc00c27ad4a9e1677 \ No newline at end of file diff --git a/db/structure.sql b/db/structure.sql index 7825b395050669..d83d51960eb3d3 100644 --- a/db/structure.sql +++ b/db/structure.sql @@ -540,11 +540,11 @@ PARTITION BY LIST (partition); CREATE TABLE p_batched_git_ref_updates_deletions ( id bigint NOT NULL, project_id bigint NOT NULL, - ref text NOT NULL, partition_id bigint DEFAULT 1 NOT NULL, - status smallint DEFAULT 1 NOT NULL, created_at timestamp with time zone NOT NULL, updated_at timestamp with time zone NOT NULL, + status smallint DEFAULT 1 NOT NULL, + ref text NOT NULL, CONSTRAINT check_f322d53b92 CHECK ((char_length(ref) <= 1024)) ) PARTITION BY LIST (partition_id); -- GitLab From b98966c8542e791259b4fc785bd90887d3c9e232 Mon Sep 17 00:00:00 2001 From: Dylan Griffith Date: Fri, 21 Jul 2023 15:52:47 +1000 Subject: [PATCH 36/38] Fix ignore_column :partition_id --- app/models/batched_git_ref_updates/deletion.rb | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/app/models/batched_git_ref_updates/deletion.rb b/app/models/batched_git_ref_updates/deletion.rb index 7765fde0afecb6..61bba8aeba9346 100644 --- a/app/models/batched_git_ref_updates/deletion.rb +++ b/app/models/batched_git_ref_updates/deletion.rb @@ -15,12 +15,10 @@ class Deletion < ApplicationRecord # This column must be ignored otherwise Rails will cache the default value and `bulk_insert!` will start saving # incorrect partition_id. - ignore_column :parition_id, remove_with: '3000.0', remove_after: '3000-01-01' + ignore_column :partition_id, remove_with: '3000.0', remove_after: '3000-01-01' belongs_to :project, inverse_of: :to_be_deleted_git_refs - attr_readonly :partition_id - scope :for_partition, ->(partition) { where(partition_id: partition) } scope :for_project, ->(project_id) { where(project_id: project_id) } scope :select_ref_and_identity, -> { select(:ref, :id, arel_table[:partition_id].as('partition')) } -- GitLab From b701fff8de4c546ea423ad598b737f923cfb9ad3 Mon Sep 17 00:00:00 2001 From: Dylan Griffith Date: Fri, 21 Jul 2023 15:55:21 +1000 Subject: [PATCH 37/38] Tidy up Deletion migration --- ...33431_create_table_batched_git_ref_updates_deletions.rb | 7 ------- 1 file changed, 7 deletions(-) diff --git a/db/migrate/20230704233431_create_table_batched_git_ref_updates_deletions.rb b/db/migrate/20230704233431_create_table_batched_git_ref_updates_deletions.rb index 83aaba55bf6ec6..4df413a4a5e560 100644 --- a/db/migrate/20230704233431_create_table_batched_git_ref_updates_deletions.rb +++ b/db/migrate/20230704233431_create_table_batched_git_ref_updates_deletions.rb @@ -3,8 +3,6 @@ class CreateTableBatchedGitRefUpdatesDeletions < Gitlab::Database::Migration[2.1] enable_lock_retries! - # rubocop:disable Migration/AddLimitToTextColumns - # limit is added in 20230704235236_add_constraint_to_batched_git_ref_updates_deletions.rb def up options = { primary_key: [:id, :partition_id], @@ -32,13 +30,8 @@ def up FOR VALUES IN (1); SQL end - # rubocop:enable Migration/AddLimitToTextColumns def down - connection.execute(<<~SQL) - DROP TABLE IF EXISTS gitlab_partitions_dynamic.p_batched_git_ref_updates_deletions_1; - SQL - drop_table :p_batched_git_ref_updates_deletions end end -- GitLab From bd00afc18c8fbb2c319082483e47572c9068ca8c Mon Sep 17 00:00:00 2001 From: Dylan Griffith Date: Mon, 24 Jul 2023 09:03:14 +1000 Subject: [PATCH 38/38] Add specs for idempotent workers --- .../batched_git_ref_updates/cleanup_scheduler_worker_spec.rb | 2 ++ .../batched_git_ref_updates/project_cleanup_worker_spec.rb | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/spec/workers/batched_git_ref_updates/cleanup_scheduler_worker_spec.rb b/spec/workers/batched_git_ref_updates/cleanup_scheduler_worker_spec.rb index 507c2535fcab16..a52043993b79ae 100644 --- a/spec/workers/batched_git_ref_updates/cleanup_scheduler_worker_spec.rb +++ b/spec/workers/batched_git_ref_updates/cleanup_scheduler_worker_spec.rb @@ -26,4 +26,6 @@ }) end end + + it_behaves_like 'an idempotent worker' end diff --git a/spec/workers/batched_git_ref_updates/project_cleanup_worker_spec.rb b/spec/workers/batched_git_ref_updates/project_cleanup_worker_spec.rb index e7db60f33beea3..5442b9dd051942 100644 --- a/spec/workers/batched_git_ref_updates/project_cleanup_worker_spec.rb +++ b/spec/workers/batched_git_ref_updates/project_cleanup_worker_spec.rb @@ -26,4 +26,8 @@ }) end end + + it_behaves_like 'an idempotent worker' do + let(:job_args) { [123] } + end end -- GitLab