diff --git a/db/docs/batched_background_migrations/backfill_missing_namespace_id_on_notes.yml b/db/docs/batched_background_migrations/backfill_missing_namespace_id_on_notes.yml new file mode 100644 index 0000000000000000000000000000000000000000..c15162fbf4a839b4587a03161f0c9b5862a28321 --- /dev/null +++ b/db/docs/batched_background_migrations/backfill_missing_namespace_id_on_notes.yml @@ -0,0 +1,9 @@ +--- +migration_job_name: BackfillMissingNamespaceIdOnNotes +description: Backfills missing namespace_id values to support sharding +feature_category: code_review_workflow +introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/163687 +milestone: '17.9' +queued_migration_version: 20240822220027 +finalize_after: "2025-03-31" +finalized_by: # version of the migration that finalized this BBM diff --git a/db/post_migrate/20240822220027_queue_backfill_missing_namespace_id_on_notes.rb b/db/post_migrate/20240822220027_queue_backfill_missing_namespace_id_on_notes.rb new file mode 100644 index 0000000000000000000000000000000000000000..c83f1a97a46b16fa3c0d9917de46587cc6995a6d --- /dev/null +++ b/db/post_migrate/20240822220027_queue_backfill_missing_namespace_id_on_notes.rb @@ -0,0 +1,49 @@ +# frozen_string_literal: true + +# See https://docs.gitlab.com/ee/development/database/batched_background_migrations.html +# for more information on when/how to queue batched background migrations + +# Update below commented lines with appropriate values. + +class QueueBackfillMissingNamespaceIdOnNotes < Gitlab::Database::Migration[2.2] + milestone '17.5' + + restrict_gitlab_migration gitlab_schema: :gitlab_main + + MIGRATION = "BackfillMissingNamespaceIdOnNotes" + DELAY_INTERVAL = 2.minutes + BATCH_SIZE = 1000 + SUB_BATCH_SIZE = 100 + GITLAB_OPTIMIZED_BATCH_SIZE = 75_000 + GITLAB_OPTIMIZED_SUB_BATCH_SIZE = 250 + + def up + queue_batched_background_migration( + MIGRATION, + :notes, + :id, + job_interval: DELAY_INTERVAL, + **batch_sizes + ) + end + + def down + delete_batched_background_migration(MIGRATION, :notes, :id, []) + end + + private + + def batch_sizes + if Gitlab.com_except_jh? + { + batch_size: GITLAB_OPTIMIZED_BATCH_SIZE, + sub_batch_size: GITLAB_OPTIMIZED_SUB_BATCH_SIZE + } + else + { + batch_size: BATCH_SIZE, + sub_batch_size: SUB_BATCH_SIZE + } + end + end +end diff --git a/db/schema_migrations/20240822220027 b/db/schema_migrations/20240822220027 new file mode 100644 index 0000000000000000000000000000000000000000..d078b0a58951681848ceda6b6e1e517637cd6a0d --- /dev/null +++ b/db/schema_migrations/20240822220027 @@ -0,0 +1 @@ +dff289034006ffbf12d45275cf91d3dc3843c80894cea79738ffec3149edbf76 \ No newline at end of file diff --git a/lib/gitlab/background_migration/backfill_missing_namespace_id_on_notes.rb b/lib/gitlab/background_migration/backfill_missing_namespace_id_on_notes.rb new file mode 100644 index 0000000000000000000000000000000000000000..58e29346a3a8d7ec8fefb0a0ad4d2c401ec11a7c --- /dev/null +++ b/lib/gitlab/background_migration/backfill_missing_namespace_id_on_notes.rb @@ -0,0 +1,93 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + class BackfillMissingNamespaceIdOnNotes < BatchedMigrationJob + operation_name :backfill_missing_namespace_id_on_notes + feature_category :code_review_workflow + + def perform + each_sub_batch do |sub_batch| + Gitlab::Database.allow_cross_joins_across_databases( + url: 'https://gitlab.com/gitlab-org/gitlab/-/merge_requests/163687' + ) do + connection.execute(build_query(sub_batch)) + end + end + end + + private + + # rubocop:disable Layout/LineLength -- SQL! + # rubocop:disable Metrics/MethodLength -- I do what I want + def build_query(scope) + records_query = scope.where(namespace_id: nil).select(" + id, + ( + coalesce( + (case + when exists (select 1 from projects where id = notes.project_id) then (select namespace_id from projects where id = notes.project_id) + when noteable_type = 'AlertManagement::Alert' then (select namespace_id from projects where id = (select project_id from alert_management_alerts where noteable_id = notes.id limit 1) limit 1) + when noteable_type = 'MergeRequest' then (select namespace_id from projects where id = (select project_id from merge_requests where noteable_id = notes.id limit 1) limit 1) + when noteable_type = 'Vulnerability' then (select namespace_id from projects where id = (select project_id from vulnerabilities where noteable_id = notes.id limit 1) limit 1) + -- These 2 need to pull namespace_id from the noteable + when noteable_type = 'DesignManagement::Design' then (select namespace_id from design_management_designs where id = notes.noteable_id limit 1) + when noteable_type = 'Issue' then (select namespace_id from issues where id = notes.noteable_id limit 1) + -- Epics pull in group_id + when noteable_type = 'Epic' then (select group_id from epics where id = notes.noteable_id limit 1) + -- Snippets pull from author + when noteable_type = 'Snippet' then (select id from namespaces where owner_id = (select author_id from notes where id = notes.id limit 1) limit 1) + -- Commits pull namespace_id from the project of the note + when noteable_type = 'Commit' then (select namespace_id from projects where id = notes.project_id limit 1) + else + -1 + end + ), -1)) as namespace_id_to_set + ") + + <<~SQL + with records AS ( + #{records_query.to_sql} + ), updated_rows as ( + -- updating records with the located namespace_id_to_set value + update notes set namespace_id = namespace_id_to_set from records where records.id=notes.id and namespace_id_to_set <> -1 + ), deleted_rows as ( + -- deleting the records where we couldn't find the namespace id + delete from notes where id IN (select id from records where namespace_id_to_set = -1) + ) + select 1 + SQL + end + # rubocop:enable Layout/LineLength + # rubocop:enable Metrics/MethodLength + + def backfillable?(note) + note.noteable_type.present? + end + + def extract_namespace_id(note) + # Attempt to find namespace_id from the project first. + # + if note.project_id + project = Project.find_by_id(note.project_id) + + return project.namespace_id if project + end + + # We have to load the noteable here because we don't have access to the + # usual ActiveRecord relationships to do it for us. + # + noteable = note.noteable_type.constantize.find(note.noteable_id) + + case note.noteable_type + when "AlertManagement::Alert", "Commit", "MergeRequest", "Vulnerability" + noteable.project.namespace_id + when "DesignManagement::Design", "Epic", "Issue" + noteable.namespace_id + when "Snippet" + noteable.author.namespace_id + end + end + end + end +end diff --git a/spec/lib/gitlab/background_migration/backfill_missing_namespace_id_on_notes_spec.rb b/spec/lib/gitlab/background_migration/backfill_missing_namespace_id_on_notes_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..11f0aa31197fa77dc07b1d8caee3fbdf5f730e37 --- /dev/null +++ b/spec/lib/gitlab/background_migration/backfill_missing_namespace_id_on_notes_spec.rb @@ -0,0 +1,219 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::BackgroundMigration::BackfillMissingNamespaceIdOnNotes, feature_category: :code_review_workflow do + let(:namespaces_table) { table(:namespaces) } + let(:notes_table) { table(:notes) } + let(:projects_table) { table(:projects) } + let(:snippets_table) { table(:snippets) } + let(:users_table) { table(:users) } + let(:epics_table) { table(:epics) } + let(:issues_table) { table(:issues) } + let(:work_item_types_table) { table(:work_item_types) } + let(:organizations_table) { table(:organizations) } + + let!(:organization) { organizations_table.create!(name: 'organization', path: 'organization') } + + let(:namespace_1) do + namespaces_table.create!( + name: 'namespace', + path: 'namespace-path-1', + organization_id: organization.id + ) + end + + let(:project_namespace_2) do + namespaces_table.create!( + name: 'namespace', + path: 'namespace-path-2', + type: 'Project', + organization_id: organization.id + ) + end + + let!(:project_1) do + projects_table + .create!( + name: 'project1', + path: 'path1', + namespace_id: namespace_1.id, + project_namespace_id: project_namespace_2.id, + visibility_level: 0, + organization_id: organization.id + ) + end + + let!(:user_1) { users_table.create!(name: 'bob', email: 'bob@example.com', projects_limit: 1) } + + context "when namespace_id is derived from note.project_id" do + let(:alert_management_alert_note) do + notes_table.create!(project_id: project_1.id, noteable_type: "AlertManagement::Alert") + end + + let(:commit_note) { notes_table.create!(project_id: project_1.id, noteable_type: "Commit") } + let(:merge_request_note) { notes_table.create!(project_id: project_1.id, noteable_type: "MergeRequest") } + let(:vulnerability_note) { notes_table.create!(project_id: project_1.id, noteable_type: "Vulnerability") } + let(:design_note) { notes_table.create!(project_id: project_1.id, noteable_type: "Design") } + let(:work_item_note) { notes_table.create!(project_id: project_1.id, noteable_type: "WorkItem") } + let(:issue_note) { notes_table.create!(project_id: project_1.id, noteable_type: "Issue") } + + it "updates the namespace_id" do + [ + alert_management_alert_note, + commit_note, + merge_request_note, + vulnerability_note, + design_note, + work_item_note, + issue_note + ].each do |test_note| + expect(test_note.project_id).not_to be_nil + + test_note.update_columns(namespace_id: nil) + test_note.reload + + expect(test_note.namespace_id).to be_nil + + described_class.new( + start_id: test_note.id, + end_id: test_note.id, + batch_table: :notes, + batch_column: :id, + sub_batch_size: 1, + pause_ms: 0, + connection: ActiveRecord::Base.connection + ).perform + + test_note.reload + + expect(test_note.namespace_id).not_to be_nil + expect(test_note.namespace_id).to eq(Project.find(test_note.project_id).namespace_id) + end + end + end + + context "when namespace_id is derived from noteable.author.namespace_id" do + let!(:snippet) do + snippets_table.create!( + author_id: user_1.id, + project_id: project_1.id + ) + end + + let(:personal_snippet_note) do + notes_table.create!(author_id: user_1.id, noteable_type: "Snippet", noteable_id: snippet.id) + end + + let(:project_snippet_note) do + notes_table.create!(author_id: user_1.id, noteable_type: "Snippet", noteable_id: snippet.id) + end + + let!(:user_namespace) do + namespaces_table.create!( + name: 'namespace', + path: 'user-namespace-path', + type: 'User', + owner_id: user_1.id, + organization_id: organization.id + ) + end + + it "updates the namespace_id" do + [project_snippet_note, personal_snippet_note].each do |test_note| + test_note.update_columns(namespace_id: nil) + test_note.reload + + expect(test_note.namespace_id).to be_nil + + described_class.new( + start_id: test_note.id, + end_id: test_note.id, + batch_table: :notes, + batch_column: :id, + sub_batch_size: 1, + pause_ms: 0, + connection: ActiveRecord::Base.connection + ).perform + + test_note.reload + + expect(test_note.namespace_id).not_to be_nil + expect(test_note.namespace_id).to eq(user_namespace.id) + end + end + end + + context "when namespace_id is derived from noteable.id" do + let!(:group_namespace) do + namespaces_table.create!( + name: 'namespace', + path: 'group-namespace-path', + type: 'Group', + owner_id: user_1.id, + organization_id: organization.id + ) + end + + let!(:work_items_type) do + work_item_types_table.create!( + id: Random.random_number(4000), + name: 'User Story', + correct_id: Random.random_number(4000) + ) + end + + let!(:issue) do + issues_table.create!( + title: "Example Epic", + author_id: user_1.id, + namespace_id: group_namespace.id, + correct_work_item_type_id: work_items_type.correct_id, + work_item_type_id: work_items_type.id + ) + end + + let!(:epic) do + epics_table.create!( + title: "Example Epic", + group_id: group_namespace.id, + author_id: user_1.id, + iid: Random.random_number(4000), + title_html: "Example", + issue_id: issue.id + ) + end + + let(:epic_note) do + notes_table.create!( + namespace_id: group_namespace.id, + noteable_type: "Epic", + noteable_id: epic.id + ) + end + + it "updates the namespace_id" do + [epic_note].each do |test_note| + test_note.update_columns(namespace_id: nil) + test_note.reload + + expect(test_note.namespace_id).to be_nil + + described_class.new( + start_id: test_note.id, + end_id: test_note.id, + batch_table: :notes, + batch_column: :id, + sub_batch_size: 1, + pause_ms: 0, + connection: ActiveRecord::Base.connection + ).perform + + test_note.reload + + expect(test_note.namespace_id).not_to be_nil + expect(test_note.namespace_id).to eq(group_namespace.id) + end + end + end +end diff --git a/spec/migrations/20240822220027_queue_backfill_missing_namespace_id_on_notes_spec.rb b/spec/migrations/20240822220027_queue_backfill_missing_namespace_id_on_notes_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..01c61445fe0ca4da704a73fd56aa2b15804ef079 --- /dev/null +++ b/spec/migrations/20240822220027_queue_backfill_missing_namespace_id_on_notes_spec.rb @@ -0,0 +1,26 @@ +# frozen_string_literal: true + +require 'spec_helper' +require_migration! + +RSpec.describe QueueBackfillMissingNamespaceIdOnNotes, feature_category: :code_review_workflow do + let!(:batched_migration) { described_class::MIGRATION } + + it 'schedules a new batched migration' do + reversible_migration do |migration| + migration.before -> { + expect(batched_migration).not_to have_scheduled_batched_migration + } + + migration.after -> { + expect(batched_migration).to have_scheduled_batched_migration( + table_name: :notes, + column_name: :id, + interval: described_class::DELAY_INTERVAL, + batch_size: described_class::BATCH_SIZE, + sub_batch_size: described_class::SUB_BATCH_SIZE + ) + } + end + end +end