From b06303b7d04a58655f86762951d1e068fad584b6 Mon Sep 17 00:00:00 2001 From: Serena Fang Date: Tue, 3 May 2022 19:10:22 -0500 Subject: [PATCH 1/2] Add backfill namespace details migrations Changelog: added --- ...1508_backfill_project_namespace_details.rb | 27 +++++++++++++++++++ ...220316022508_backfill_namespace_details.rb | 27 +++++++++++++++++++ .../backfill_namespace_details.rb | 20 ++++++++++++++ .../backfill_project_namespace_details.rb | 18 +++++++++++++ .../backfill_namespace_details_spec.rb | 25 +++++++++++++++++ ...backfill_project_namespace_details_spec.rb | 25 +++++++++++++++++ 6 files changed, 142 insertions(+) create mode 100644 db/migrate/20220406011508_backfill_project_namespace_details.rb create mode 100644 db/post_migrate/20220316022508_backfill_namespace_details.rb create mode 100644 lib/gitlab/background_migration/backfill_namespace_details.rb create mode 100644 lib/gitlab/background_migration/backfill_project_namespace_details.rb create mode 100644 spec/lib/gitlab/background_migration/backfill_namespace_details_spec.rb create mode 100644 spec/lib/gitlab/background_migration/backfill_project_namespace_details_spec.rb diff --git a/db/migrate/20220406011508_backfill_project_namespace_details.rb b/db/migrate/20220406011508_backfill_project_namespace_details.rb new file mode 100644 index 00000000000000..979fa0265bd484 --- /dev/null +++ b/db/migrate/20220406011508_backfill_project_namespace_details.rb @@ -0,0 +1,27 @@ +# frozen_string_literal: true + +class BackfillProjectNamespaceDetails < Gitlab::Database::Migration[1.0] + DOWNTIME = false + + MIGRATION = 'BackfillProjectNamespaceDetails' + DELAY_INTERVAL = 2.minutes + BATCH_SIZE = 10_000 + + disable_ddl_transaction! + + class Namespace < ActiveRecord::Base + include EachBatch + + self.table_name = 'projects' + end + + def up + say "Scheduling `#{MIGRATION}` jobs" + + queue_background_migration_jobs_by_range_at_intervals(Project, MIGRATION, DELAY_INTERVAL, batch_size: BATCH_SIZE) + end + + def down + # NOOP + end +end diff --git a/db/post_migrate/20220316022508_backfill_namespace_details.rb b/db/post_migrate/20220316022508_backfill_namespace_details.rb new file mode 100644 index 00000000000000..7b4d497f7c709e --- /dev/null +++ b/db/post_migrate/20220316022508_backfill_namespace_details.rb @@ -0,0 +1,27 @@ +# frozen_string_literal: true + +class BackfillNamespaceDetails < Gitlab::Database::Migration[1.0] + DOWNTIME = false + + MIGRATION = 'BackfillNamespaceDetails' + DELAY_INTERVAL = 2.minutes + BATCH_SIZE = 10_000 + + disable_ddl_transaction! + + class Namespace < ActiveRecord::Base + include EachBatch + + self.table_name = 'namespaces' + end + + def up + say "Scheduling `#{MIGRATION}` jobs" + + queue_background_migration_jobs_by_range_at_intervals(Namespace, MIGRATION, DELAY_INTERVAL, batch_size: BATCH_SIZE) + end + + def down + # NOOP + end +end diff --git a/lib/gitlab/background_migration/backfill_namespace_details.rb b/lib/gitlab/background_migration/backfill_namespace_details.rb new file mode 100644 index 00000000000000..0bc7cdad0db2c5 --- /dev/null +++ b/lib/gitlab/background_migration/backfill_namespace_details.rb @@ -0,0 +1,20 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + # Backfill namespace_details for a range of namespaces + class BackfillNamespaceDetails + def perform(start_id, end_id) + ActiveRecord::Base.connection.execute <<~SQL + INSERT INTO namespace_details (namespace_id, created_at, updated_at, description, description_html, cached_markdown_version) + SELECT namespaces.id, now(), now(), namespaces.description, namespaces.description_html, namespaces.cached_markdown_version + FROM namespaces + WHERE namespaces.id BETWEEN #{start_id} AND #{end_id} + AND namespaces.type <> 'Project' + ON CONFLICT (namespace_id) DO NOTHING; + + SQL + end + end + end +end diff --git a/lib/gitlab/background_migration/backfill_project_namespace_details.rb b/lib/gitlab/background_migration/backfill_project_namespace_details.rb new file mode 100644 index 00000000000000..6242a968773358 --- /dev/null +++ b/lib/gitlab/background_migration/backfill_project_namespace_details.rb @@ -0,0 +1,18 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + # Backfill project_namespace namespace_details for a range of project namespaces + class BackfillProjectNamespaceDetails + def perform(start_id, end_id) + ActiveRecord::Base.connection.execute <<~SQL + INSERT INTO namespace_details (namespace_id, created_at, updated_at, description, description_html, cached_markdown_version) + SELECT projects.project_namespace_id, now(), now(), projects.description, projects.description_html, projects.cached_markdown_version + FROM projects + WHERE projects.project_namespace_id BETWEEN #{start_id} AND #{end_id} + ON CONFLICT (namespace_id) DO NOTHING; + SQL + end + end + end +end diff --git a/spec/lib/gitlab/background_migration/backfill_namespace_details_spec.rb b/spec/lib/gitlab/background_migration/backfill_namespace_details_spec.rb new file mode 100644 index 00000000000000..cacb3509d9aa73 --- /dev/null +++ b/spec/lib/gitlab/background_migration/backfill_namespace_details_spec.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +require 'spec_helper' +require_migration! + +RSpec.describe Gitlab::BackgroundMigration::BackfillNamespaceDetails do + let(:namespaces) { table(:namespaces) } + let(:namespace_details) { table(:namespace_details) } + let(:namespace) { table(:namespaces).create!(name: 'user', path: 'user') } + + subject { described_class.new } + + describe '#perform' do + it 'creates details for all namespaces in range' do + namespaces.create!(id: 5, name: 'test1', path: 'test1') + namespaces.create!(id: 6, name: 'test2', path: 'test2', type: 'Project') + namespaces.create!(id: 7, name: 'test3', path: 'test3') + namespaces.create!(id: 8, name: 'test4', path: 'test4') + + subject.perform(5, 7) + + expect(namespace_details.pluck(:namespace_id)).to contain_exactly(5, 7) + end + end +end diff --git a/spec/lib/gitlab/background_migration/backfill_project_namespace_details_spec.rb b/spec/lib/gitlab/background_migration/backfill_project_namespace_details_spec.rb new file mode 100644 index 00000000000000..1256a3738a16d8 --- /dev/null +++ b/spec/lib/gitlab/background_migration/backfill_project_namespace_details_spec.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +require 'spec_helper' +require_migration! + +# rubocop:disable RSpec/FactoriesInMigrationSpecs +RSpec.describe Gitlab::BackgroundMigration::BackfillProjectNamespaceDetails do + let(:namespace_details) { table(:namespace_details) } + let(:namespaces) { table(:namespaces) } + + subject { described_class.new } + + describe '#perform' do + let(:project_namespace1) { create(:project_namespace) } + let(:project_namespace2) { create(:project_namespace) } + let(:project_namespace3) { create(:project_namespace) } + + it 'creates details for all project namespaces in range' do + subject.perform(project_namespace1.id, project_namespace2.id) + + expect(namespace_details.pluck(:namespace_id) & namespaces.all.where(type: "Project").pluck(:id)) + .to contain_exactly(project_namespace1.id, project_namespace2.id) + end + end +end -- GitLab From 5fcbc54e6f9908fe96817bc331facbdd888e3aba Mon Sep 17 00:00:00 2001 From: Serena Fang Date: Wed, 7 Sep 2022 00:48:52 -0500 Subject: [PATCH 2/2] Use queue batched background migration --- ...220316022508_backfill_namespace_details.rb | 27 +++++++++++++++++ ...1508_backfill_project_namespace_details.rb | 30 +++++++++---------- ...220316022508_backfill_namespace_details.rb | 27 ----------------- db/schema_migrations/20220316022508 | 1 + db/schema_migrations/20220406011508 | 1 + .../backfill_namespace_details.rb | 5 ++-- .../backfill_project_namespace_details.rb | 4 +-- 7 files changed, 48 insertions(+), 47 deletions(-) create mode 100644 db/migrate/20220316022508_backfill_namespace_details.rb delete mode 100644 db/post_migrate/20220316022508_backfill_namespace_details.rb create mode 100644 db/schema_migrations/20220316022508 create mode 100644 db/schema_migrations/20220406011508 diff --git a/db/migrate/20220316022508_backfill_namespace_details.rb b/db/migrate/20220316022508_backfill_namespace_details.rb new file mode 100644 index 00000000000000..eda9b135d44b74 --- /dev/null +++ b/db/migrate/20220316022508_backfill_namespace_details.rb @@ -0,0 +1,27 @@ +# frozen_string_literal: true + +class BackfillNamespaceDetails < Gitlab::Database::Migration[1.0] + disable_ddl_transaction! + + MIGRATION = 'BackfillNamespaceDetails' + INTERVAL = 2.minutes + BATCH_SIZE = 1_000 + MAX_BATCH_SIZE = 2_000 + SUB_BATCH_SIZE = 200 + + def up + queue_batched_background_migration( + MIGRATION, + :namespaces, + :id, + job_interval: INTERVAL, + batch_size: BATCH_SIZE, + max_batch_size: MAX_BATCH_SIZE, + sub_batch_size: SUB_BATCH_SIZE + ) + end + + def down + delete_batched_background_migration(MIGRATION, :namespaces, :id, []) + end +end diff --git a/db/migrate/20220406011508_backfill_project_namespace_details.rb b/db/migrate/20220406011508_backfill_project_namespace_details.rb index 979fa0265bd484..a2501d40b5d2b7 100644 --- a/db/migrate/20220406011508_backfill_project_namespace_details.rb +++ b/db/migrate/20220406011508_backfill_project_namespace_details.rb @@ -1,27 +1,27 @@ # frozen_string_literal: true class BackfillProjectNamespaceDetails < Gitlab::Database::Migration[1.0] - DOWNTIME = false - - MIGRATION = 'BackfillProjectNamespaceDetails' - DELAY_INTERVAL = 2.minutes - BATCH_SIZE = 10_000 - disable_ddl_transaction! - class Namespace < ActiveRecord::Base - include EachBatch - - self.table_name = 'projects' - end + MIGRATION = 'BackfillProjectNamespaceDetails' + INTERVAL = 2.minutes + BATCH_SIZE = 1_000 + MAX_BATCH_SIZE = 2_000 + SUB_BATCH_SIZE = 200 def up - say "Scheduling `#{MIGRATION}` jobs" - - queue_background_migration_jobs_by_range_at_intervals(Project, MIGRATION, DELAY_INTERVAL, batch_size: BATCH_SIZE) + queue_batched_background_migration( + MIGRATION, + :projects, + :id, + job_interval: INTERVAL, + batch_size: BATCH_SIZE, + max_batch_size: MAX_BATCH_SIZE, + sub_batch_size: SUB_BATCH_SIZE + ) end def down - # NOOP + delete_batched_background_migration(MIGRATION, :projects, :id, []) end end diff --git a/db/post_migrate/20220316022508_backfill_namespace_details.rb b/db/post_migrate/20220316022508_backfill_namespace_details.rb deleted file mode 100644 index 7b4d497f7c709e..00000000000000 --- a/db/post_migrate/20220316022508_backfill_namespace_details.rb +++ /dev/null @@ -1,27 +0,0 @@ -# frozen_string_literal: true - -class BackfillNamespaceDetails < Gitlab::Database::Migration[1.0] - DOWNTIME = false - - MIGRATION = 'BackfillNamespaceDetails' - DELAY_INTERVAL = 2.minutes - BATCH_SIZE = 10_000 - - disable_ddl_transaction! - - class Namespace < ActiveRecord::Base - include EachBatch - - self.table_name = 'namespaces' - end - - def up - say "Scheduling `#{MIGRATION}` jobs" - - queue_background_migration_jobs_by_range_at_intervals(Namespace, MIGRATION, DELAY_INTERVAL, batch_size: BATCH_SIZE) - end - - def down - # NOOP - end -end diff --git a/db/schema_migrations/20220316022508 b/db/schema_migrations/20220316022508 new file mode 100644 index 00000000000000..06734edec8d1f0 --- /dev/null +++ b/db/schema_migrations/20220316022508 @@ -0,0 +1 @@ +0f4487bcba4e60c442e477e05037d05d4cb36fe5cc88445d4d2021e14a193fc5 \ No newline at end of file diff --git a/db/schema_migrations/20220406011508 b/db/schema_migrations/20220406011508 new file mode 100644 index 00000000000000..e35d7fee09edf7 --- /dev/null +++ b/db/schema_migrations/20220406011508 @@ -0,0 +1 @@ +a3c1a618e4b7e16b425fb5624db3f78fe053a72ff5273956d88320fbb3f3ecb3 \ No newline at end of file diff --git a/lib/gitlab/background_migration/backfill_namespace_details.rb b/lib/gitlab/background_migration/backfill_namespace_details.rb index 0bc7cdad0db2c5..431557a67cd719 100644 --- a/lib/gitlab/background_migration/backfill_namespace_details.rb +++ b/lib/gitlab/background_migration/backfill_namespace_details.rb @@ -6,13 +6,12 @@ module BackgroundMigration class BackfillNamespaceDetails def perform(start_id, end_id) ActiveRecord::Base.connection.execute <<~SQL - INSERT INTO namespace_details (namespace_id, created_at, updated_at, description, description_html, cached_markdown_version) - SELECT namespaces.id, now(), now(), namespaces.description, namespaces.description_html, namespaces.cached_markdown_version + INSERT INTO namespace_details (description, description_html, cached_markdown_version, created_at, updated_at, namespace_id) + SELECT namespaces.description, namespaces.description_html, namespaces.cached_markdown_version, now(), now(), namespaces.id FROM namespaces WHERE namespaces.id BETWEEN #{start_id} AND #{end_id} AND namespaces.type <> 'Project' ON CONFLICT (namespace_id) DO NOTHING; - SQL end end diff --git a/lib/gitlab/background_migration/backfill_project_namespace_details.rb b/lib/gitlab/background_migration/backfill_project_namespace_details.rb index 6242a968773358..fd08895f58c24d 100644 --- a/lib/gitlab/background_migration/backfill_project_namespace_details.rb +++ b/lib/gitlab/background_migration/backfill_project_namespace_details.rb @@ -6,8 +6,8 @@ module BackgroundMigration class BackfillProjectNamespaceDetails def perform(start_id, end_id) ActiveRecord::Base.connection.execute <<~SQL - INSERT INTO namespace_details (namespace_id, created_at, updated_at, description, description_html, cached_markdown_version) - SELECT projects.project_namespace_id, now(), now(), projects.description, projects.description_html, projects.cached_markdown_version + INSERT INTO namespace_details (description, description_html, cached_markdown_version, created_at, updated_at, namespace_id) + SELECT projects.description, projects.description_html, projects.cached_markdown_version, now(), now(), projects.project_namespace_id FROM projects WHERE projects.project_namespace_id BETWEEN #{start_id} AND #{end_id} ON CONFLICT (namespace_id) DO NOTHING; -- GitLab