diff --git a/db/docs/batched_background_migrations/backfill_organization_id_on_fork_networks.yml b/db/docs/batched_background_migrations/backfill_organization_id_on_fork_networks.yml new file mode 100644 index 0000000000000000000000000000000000000000..99325aee188f7d10f5fa06b2a0f99d60ebf252ea --- /dev/null +++ b/db/docs/batched_background_migrations/backfill_organization_id_on_fork_networks.yml @@ -0,0 +1,8 @@ +--- +migration_job_name: BackfillOrganizationIdOnForkNetworks +description: Backfills the `organization_id` column on `fork_networks` by retrieving the organization through the `root_project_id` project +feature_category: source_code_management +introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/184051 +milestone: '17.11' +queued_migration_version: 20250310143706 +finalized_by: # version of the migration that finalized this BBM diff --git a/db/post_migrate/20250310143706_queue_backfill_organization_id_on_fork_networks.rb b/db/post_migrate/20250310143706_queue_backfill_organization_id_on_fork_networks.rb new file mode 100644 index 0000000000000000000000000000000000000000..ebf4bf07619097948f8d17ef24dfe679045ffa29 --- /dev/null +++ b/db/post_migrate/20250310143706_queue_backfill_organization_id_on_fork_networks.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +class QueueBackfillOrganizationIdOnForkNetworks < Gitlab::Database::Migration[2.2] + milestone '17.11' + + restrict_gitlab_migration gitlab_schema: :gitlab_main + + MIGRATION = "BackfillOrganizationIdOnForkNetworks" + BATCH_SIZE = 1000 + SUB_BATCH_SIZE = 100 + + def up + queue_batched_background_migration( + MIGRATION, + :fork_networks, + :id, + batch_size: BATCH_SIZE, + sub_batch_size: SUB_BATCH_SIZE + ) + end + + def down + delete_batched_background_migration(MIGRATION, :fork_networks, :id, []) + end +end diff --git a/db/schema_migrations/20250310143706 b/db/schema_migrations/20250310143706 new file mode 100644 index 0000000000000000000000000000000000000000..1b3b59998dd1c9f20b948d2fb79d6cd38790dc1a --- /dev/null +++ b/db/schema_migrations/20250310143706 @@ -0,0 +1 @@ +12fd84bbccb017e4a9d723d64f271d2061fddd799c9dee634ab40fb64c536582 \ No newline at end of file diff --git a/lib/gitlab/background_migration/backfill_organization_id_on_fork_networks.rb b/lib/gitlab/background_migration/backfill_organization_id_on_fork_networks.rb new file mode 100644 index 0000000000000000000000000000000000000000..e4b2920ad54bc257c55631a24c4cbe340e76f1a0 --- /dev/null +++ b/lib/gitlab/background_migration/backfill_organization_id_on_fork_networks.rb @@ -0,0 +1,81 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + class BackfillOrganizationIdOnForkNetworks < BatchedMigrationJob + operation_name :backfill_organization_id_on_fork_networks + feature_category :source_code_management + scope_to ->(relation) { relation.where(organization_id: nil) } + + module Organizations + class Organization < ::ApplicationRecord + self.table_name = 'organizations' + end + end + + def perform + # default organization id is currently 1 in the model layer + default_organization_id = Organizations::Organization.find_by(id: 1)&.id || Organizations::Organization.first.id + + each_sub_batch do |sub_batch| + ids = sub_batch.pluck(:id) + next if ids.empty? + + ids_list = ids.join(',') + + connection.execute <<~SQL + #{update_records_with_root_project_id(ids_list)} + SQL + + connection.execute <<~SQL + #{update_records_without_root_project_id(ids_list)} + SQL + + connection.execute <<~SQL + #{update_missed_records(ids_list, default_organization_id)} + SQL + end + end + + private + + def update_records_with_root_project_id(ids_list) + <<~SQL.squish + UPDATE fork_networks + SET organization_id = projects.organization_id + FROM projects + WHERE fork_networks.id IN (#{ids_list}) + AND fork_networks.root_project_id = projects.id + AND fork_networks.organization_id IS NULL + SQL + end + + def update_records_without_root_project_id(ids_list) + <<~SQL.squish + UPDATE fork_networks + SET organization_id = map.organization_id + FROM ( + SELECT DISTINCT projects.organization_id, fork_networks.id + FROM fork_networks + JOIN fork_network_members ON fork_network_members.fork_network_id = fork_networks.id + JOIN projects ON projects.id = fork_network_members.project_id + WHERE fork_networks.root_project_id IS NULL + AND fork_networks.id IN (#{ids_list}) + ) map + WHERE map.id = fork_networks.id + SQL + end + + def update_missed_records(ids_list, default_organization_id) + # we have this here just incase we miss any records + # from the previous queries. + <<~SQL.squish + UPDATE fork_networks + SET organization_id = #{default_organization_id} + WHERE fork_networks.id IN (#{ids_list}) + AND fork_networks.organization_id IS NULL + SQL + end + end + end +end diff --git a/spec/lib/gitlab/background_migration/backfill_organization_id_on_fork_networks_spec.rb b/spec/lib/gitlab/background_migration/backfill_organization_id_on_fork_networks_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..6391413324f671c3ebc614203ac6cee004c839d8 --- /dev/null +++ b/spec/lib/gitlab/background_migration/backfill_organization_id_on_fork_networks_spec.rb @@ -0,0 +1,109 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::BackgroundMigration::BackfillOrganizationIdOnForkNetworks, feature_category: :source_code_management do + let(:fork_networks_table) { table(:fork_networks) } + let(:fork_network_members_table) { table(:fork_network_members) } + let(:namespaces_table) { table(:namespaces) } + let(:organizations_table) { table(:organizations) } + let(:projects_table) { table(:projects) } + + let!(:default_organization) { organizations_table.create!(name: "Organization", path: "organization") } + let(:namespace) { namespaces_table.create!(name: 'Test', path: 'test', organization_id: default_organization.id) } + + let!(:another_organization) { organizations_table.create!(name: "Another", path: "another") } + let(:another_namespace) do + namespaces_table.create!(name: 'Test 2', path: 'test-2', organization_id: another_organization.id) + end + + let(:project) do + projects_table.create!( + name: 'project', + path: 'project', + namespace_id: another_namespace.id, + project_namespace_id: another_namespace.id, + organization_id: another_organization.id + ) + end + + let(:args) do + min, max = fork_networks_table.pick('MIN(id)', 'MAX(id)') + + { + start_id: min, + end_id: max, + batch_table: 'fork_networks', + batch_column: 'id', + sub_batch_size: 1, + pause_ms: 0, + connection: ApplicationRecord.connection + } + end + + subject(:perform_migration) { described_class.new(**args).perform } + + context 'when root project exists' do + let(:fork_network) { fork_networks_table.create!(root_project_id: project.id) } + + it 'updates the organization_id with the organization_id of the root project' do + expect { perform_migration }.to change { + fork_network.reload.organization_id + }.from(nil).to(project.organization_id) + end + end + + context 'when root project is deleted' do + let(:fork_network) { fork_networks_table.create!(root_project_id: nil) } + + context 'and a fork_network_member exists' do + before do + forked_namespace = namespaces_table.create!(name: 'Test 3', path: 'test-3', + organization_id: another_organization.id) + + forked_project = projects_table.create!( + name: 'forked project', + path: 'forked-project', + namespace_id: forked_namespace.id, + project_namespace_id: forked_namespace.id, + organization_id: another_organization.id + ) + + fork_network_members_table.create!(fork_network_id: fork_network.id, + forked_from_project_id: project.id, project_id: forked_project.id) + end + + it 'updates the organization_id via the fork_network_member' do + expect { perform_migration }.to change { + fork_network.reload.organization_id + }.from(nil).to(project.organization_id) + end + end + + context 'and no fork_network_member exists' do + context 'when organization with ID 1 exists' do + let!(:org_id_1) { organizations_table.create!(id: 1, name: "Primary Org", path: "primary-org") } + + it 'uses organization with ID 1 as the default' do + expect { perform_migration }.to change { + fork_network.reload.organization_id + }.from(nil).to(org_id_1.id) + end + end + + context 'when organization with ID 1 does not exist' do + before do + organizations_table.where(id: 1).delete_all + end + + it 'falls back to the first organization' do + expect(Organizations::Organization.first.id).to eq(default_organization.id) + + expect { perform_migration }.to change { + fork_network.reload.organization_id + }.from(nil).to(default_organization.id) + end + end + end + end +end diff --git a/spec/migrations/20250310143706_queue_backfill_organization_id_on_fork_networks_spec.rb b/spec/migrations/20250310143706_queue_backfill_organization_id_on_fork_networks_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..37497945ffc748c1dc4eec5978436c99ad334c9f --- /dev/null +++ b/spec/migrations/20250310143706_queue_backfill_organization_id_on_fork_networks_spec.rb @@ -0,0 +1,26 @@ +# frozen_string_literal: true + +require 'spec_helper' +require_migration! + +RSpec.describe QueueBackfillOrganizationIdOnForkNetworks, migration: :gitlab_main, feature_category: :source_code_management do + let!(:batched_migration) { described_class::MIGRATION } + + it 'schedules a new batched migration' do + reversible_migration do |migration| + migration.before -> { + expect(batched_migration).not_to have_scheduled_batched_migration + } + + migration.after -> { + expect(batched_migration).to have_scheduled_batched_migration( + gitlab_schema: :gitlab_main, + table_name: :fork_networks, + column_name: :id, + batch_size: described_class::BATCH_SIZE, + sub_batch_size: described_class::SUB_BATCH_SIZE + ) + } + end + end +end