diff --git a/db/docs/batched_background_migrations/backfill_partitioned_uploads.yml b/db/docs/batched_background_migrations/backfill_partitioned_uploads.yml index 1e1e619c25bc963ebedcf378278aa5039343aa9e..225908f0438cd52d65750df39e1cd6759d9205d6 100644 --- a/db/docs/batched_background_migrations/backfill_partitioned_uploads.yml +++ b/db/docs/batched_background_migrations/backfill_partitioned_uploads.yml @@ -4,5 +4,5 @@ description: Back-fill partitoned uploads_9ba88c4165 table and populate sharding feature_category: database introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/181349 milestone: '18.2' -queued_migration_version: 20250609164216 -finalized_by: 20250717035404 +queued_migration_version: 20251201121648 +finalized_by: diff --git a/db/post_migrate/20250609164216_queue_backfill_partitioned_uploads.rb b/db/post_migrate/20250609164216_queue_backfill_partitioned_uploads.rb index ab0affe6c6526c90d0c17147810e103c14a4b845..d71548bd2558ae24f31a9caf34b7ee46c8b94957 100644 --- a/db/post_migrate/20250609164216_queue_backfill_partitioned_uploads.rb +++ b/db/post_migrate/20250609164216_queue_backfill_partitioned_uploads.rb @@ -11,17 +11,12 @@ class QueueBackfillPartitionedUploads < Gitlab::Database::Migration[2.3] SUB_BATCH_SIZE = 300 def up - queue_batched_background_migration( - MIGRATION, - :uploads, - :id, - job_interval: DELAY_INTERVAL, - batch_size: BATCH_SIZE, - sub_batch_size: SUB_BATCH_SIZE - ) + # no-op: + # This migration is a no-op because the original migration was re-enqueued with a new version. + # The new migration is 20251201121648_queue_re_enqueue_backfill_partitioned_uploads.rb end def down - delete_batched_background_migration(MIGRATION, :uploads, :id, []) + # no-op end end diff --git a/db/post_migrate/20250717035404_finalize_partitioned_uploads_backfill.rb b/db/post_migrate/20250717035404_finalize_partitioned_uploads_backfill.rb index 8a4d4371102648c3f74f36d71b7366c93eb688cd..07fddb2ae646a4adcd28ea05285802bd54cec334 100644 --- a/db/post_migrate/20250717035404_finalize_partitioned_uploads_backfill.rb +++ b/db/post_migrate/20250717035404_finalize_partitioned_uploads_backfill.rb @@ -8,13 +8,9 @@ class FinalizePartitionedUploadsBackfill < Gitlab::Database::Migration[2.3] restrict_gitlab_migration gitlab_schema: :gitlab_main def up - ensure_batched_background_migration_is_finished( - job_class_name: 'BackfillPartitionedUploads', - table_name: :uploads, - column_name: :id, - job_arguments: [], - finalize: true - ) + # no-op: + # This migration is a no-op because the original migration was re-enqueued with a new version. + # The new migration is 20251201121648_queue_re_enqueue_backfill_partitioned_uploads.rb end def down; end diff --git a/db/post_migrate/20251201121648_queue_re_enqueue_backfill_partitioned_uploads.rb b/db/post_migrate/20251201121648_queue_re_enqueue_backfill_partitioned_uploads.rb new file mode 100644 index 0000000000000000000000000000000000000000..2f18004f4de049003e05f3c0e3ac5d0294cb50a1 --- /dev/null +++ b/db/post_migrate/20251201121648_queue_re_enqueue_backfill_partitioned_uploads.rb @@ -0,0 +1,29 @@ +# frozen_string_literal: true + +class QueueReEnqueueBackfillPartitionedUploads < Gitlab::Database::Migration[2.3] + milestone '18.8' + + restrict_gitlab_migration gitlab_schema: :gitlab_main_org + + MIGRATION = "BackfillPartitionedUploads" + DELAY_INTERVAL = 2.minutes + BATCH_SIZE = 7000 + SUB_BATCH_SIZE = 300 + + def up + delete_batched_background_migration(MIGRATION, :uploads, :id, []) + + queue_batched_background_migration( + MIGRATION, + :uploads, + :id, + job_interval: DELAY_INTERVAL, + batch_size: BATCH_SIZE, + sub_batch_size: SUB_BATCH_SIZE + ) + end + + def down + delete_batched_background_migration(MIGRATION, :uploads, :id, []) + end +end diff --git a/db/schema_migrations/20251201121648 b/db/schema_migrations/20251201121648 new file mode 100644 index 0000000000000000000000000000000000000000..4e5c76284277dee467faa1fef795101beb5ac0a8 --- /dev/null +++ b/db/schema_migrations/20251201121648 @@ -0,0 +1 @@ +a8ce96b5ba09fa3f3aedf0f10724cba6963f978c09f26fafb9841ae1ca844a14 \ No newline at end of file diff --git a/lib/gitlab/background_migration/backfill_partitioned_uploads.rb b/lib/gitlab/background_migration/backfill_partitioned_uploads.rb index 2ecc8b0bd377b97eda700246fcf3fcf7842d09ad..039804fe08293c6018662c5e9ec099b76fc4f873 100644 --- a/lib/gitlab/background_migration/backfill_partitioned_uploads.rb +++ b/lib/gitlab/background_migration/backfill_partitioned_uploads.rb @@ -2,7 +2,7 @@ module Gitlab module BackgroundMigration - class BackfillPartitionedUploads < BatchedMigrationJob + class BackfillPartitionedUploads < BatchedMigrationJob # rubocop:disable Metrics/ClassLength -- contains table mapping feature_category :database operation_name :backfill @@ -12,30 +12,36 @@ class Upload < ApplicationRecord def perform each_sub_batch do |sub_batch| - tables_and_models.each do |model_table, model_name, sources, targets, join_key, db_name| - process_upload_type(sub_batch, model_table, model_name, sources, targets, join_key, db_name) + tables_and_models.each do |model_table, model_name, sources, targets, join_key, db_name, db_schema| + process_upload_type(sub_batch, model_table, model_name, sources, targets, join_key, db_name, db_schema) end end end private - def sharding_key_columns - %w[organization_id namespace_id project_id] + def sharding_key_columns(db_schema) + # NOTE: `uploaded_by_user_id` is also a sharding key column for tables that have the + # gitlab_main_user schema. We want to copy over the column (instead of nullifying it) + # since `uploaded_by_user_id` is an integral part of uploads (to know who an upload was made by). + if db_schema == :gitlab_main_user + %w[uploaded_by_user_id] + else + %w[organization_id namespace_id project_id] + end end - def columns - Upload.column_names - sharding_key_columns + def columns(db_schema) + Upload.column_names - sharding_key_columns(db_schema) end def tables_and_models - # model_table, model_name, sources, targets, join_key, db_name - [ - ['abuse_reports', 'AbuseReport', %w[]], + [ # model_table, model_name, sources, targets, join_key, db_name, db_schema + ['abuse_reports', 'AbuseReport', %w[organization_id]], ['achievements', 'Achievements::Achievement', %w[namespace_id]], ['ai_vectorizable_files', 'Ai::VectorizableFile', %w[project_id]], ['alert_management_alert_metric_images', 'AlertManagement::MetricImage', %w[project_id]], - ['appearances', 'Appearance', %w[]], + ['appearances', 'Appearance', %w[]], # cell-local table ['bulk_import_export_uploads', 'BulkImports::ExportUpload', %w[group_id project_id], %w[namespace_id project_id]], ['design_management_designs_versions', 'DesignManagement::Action', %w[namespace_id]], @@ -47,8 +53,9 @@ def tables_and_models ['topics', 'Projects::Topic', %w[organization_id]], ['projects', 'Project', %w[id], %w[project_id]], ['snippets', 'Snippet', %w[organization_id]], - ['user_permission_export_uploads', 'UserPermissionExportUpload', %w[]], - ['users', 'User', %w[]], + ['user_permission_export_uploads', 'UserPermissionExportUpload', %w[user_id], %w[uploaded_by_user_id], nil, + nil, :gitlab_main_user], + ['users', 'User', %w[organization_id]], # Sec tables ['dependency_list_exports', 'Dependencies::DependencyListExport', %w[organization_id group_id project_id], %w[organization_id namespace_id project_id], nil, :sec], @@ -73,19 +80,20 @@ def tables_and_models # targets - sharding key columns to back-fill # join_key - column to join with the model table, defaults to id # db_name - database the model table belongs to - def process_upload_type(sub_batch, model_table, model_name, sources, targets, join_key, db_name) + # db_schema - database schema the table belongs to + def process_upload_type(sub_batch, model_table, model_name, sources, targets, join_key, db_name, db_schema) relation = sub_batch.select(:id, :model_type).limit(sub_batch_size) targets ||= sources join_key ||= 'id' # Columns that will be reset (nullified) as they are not used for sharding keys - reset_columns = sharding_key_columns - targets + reset_columns = sharding_key_columns(db_schema) - targets # All columns to back-fill - target_columns = (columns + targets + reset_columns).join(', ') + target_columns = (columns(db_schema) + targets + reset_columns).join(', ') # All columns to source from - source_columns = source_columns_sql(sources, reset_columns) + source_columns = source_columns_sql(sources, reset_columns, db_schema) # For existing records update only sharding key columns (if any) on_conflict = if targets.any? - "UPDATE SET #{sharding_key_columns.map { |c| "#{c} = EXCLUDED.#{c}" }.join(', ')}" + "UPDATE SET #{sharding_key_columns(db_schema).map { |c| "#{c} = EXCLUDED.#{c}" }.join(', ')}" else "NOTHING" end @@ -97,7 +105,7 @@ def process_upload_type(sub_batch, model_table, model_name, sources, targets, jo sec_cte = sec_model_values_cte(sub_batch, model_name, join_key, sources, model_table) return unless sec_cte - source_columns = source_columns_sql(sources, reset_columns, nullif: true) + source_columns = source_columns_sql(sources, reset_columns, db_schema, nullif: true) model_values_cte = sec_cte end @@ -117,9 +125,9 @@ def process_upload_type(sub_batch, model_table, model_name, sources, targets, jo connection.execute(upsert) end - def source_columns_sql(sources, reset_columns, nullif: false) + def source_columns_sql(sources, reset_columns, db_schema, nullif: false) ( - columns.map { |c| "uploads.#{c}" } + + columns(db_schema).map { |c| "uploads.#{c}" } + # Convert -1 back to NULL using NULLIF sources.map { |c| nullif ? "NULLIF(model.#{c}, -1)" : "model.#{c}" } + reset_columns.map { 'NULL' } @@ -142,6 +150,6 @@ def sec_model_values_cte(sub_batch, model_name, join_key, sources, model_table) ", #{model_table}(#{columns.join(', ')}) AS (VALUES #{values})" end - end + end # rubocop:enable Metrics/ClassLength end end diff --git a/spec/lib/gitlab/background_migration/backfill_partitioned_uploads_spec.rb b/spec/lib/gitlab/background_migration/backfill_partitioned_uploads_spec.rb index 3da66f0cd1c0cbb6e12779010b5f0e3c99cf0051..396e21bd015178c128049ba2017ec95eeb1a3733 100644 --- a/spec/lib/gitlab/background_migration/backfill_partitioned_uploads_spec.rb +++ b/spec/lib/gitlab/background_migration/backfill_partitioned_uploads_spec.rb @@ -14,31 +14,31 @@ let(:projects) { table(:projects) } let(:users) { table(:users) } let(:abuse_reports) { table(:abuse_reports) } - let(:achievements) { table(:achievements) } + let(:achievements_achievements) { table(:achievements) } let(:ai_vectorizable_files) { table(:ai_vectorizable_files) } let(:alert_management_alerts) { table(:alert_management_alerts) } - let(:alert_management_alert_metric_images) { table(:alert_management_alert_metric_images) } + let(:alert_management_metric_images) { table(:alert_management_alert_metric_images) } let(:appearances) { table(:appearances) } - let(:dependency_list_exports) { table(:dependency_list_exports, database: :sec) } + let(:dependencies_dependency_list_exports) { table(:dependency_list_exports, database: :sec) } let(:import_export_uploads) { table(:import_export_uploads) } let(:issuable_metric_images) { table(:issuable_metric_images) } - let(:organization_details) { table(:organization_details) } - let(:topics) { table(:topics) } + let(:organizations_organization_details) { table(:organization_details) } + let(:projects_topics) { table(:topics) } let(:snippets) { table(:snippets) } let(:user_permission_export_uploads) { table(:user_permission_export_uploads) } - let(:dependency_list_export_parts) { table(:dependency_list_export_parts, database: :sec) } - let(:vulnerability_exports) { table(:vulnerability_exports, database: :sec) } - let(:vulnerability_export_parts) { table(:vulnerability_export_parts, database: :sec) } - let(:vulnerability_remediations) { table(:vulnerability_remediations, database: :sec) } - let(:vulnerability_archive_exports) { table(:vulnerability_archive_exports, database: :sec) } + let(:dependencies_dependency_list_export_parts) { table(:dependency_list_export_parts, database: :sec) } + let(:vulnerabilities_exports) { table(:vulnerability_exports, database: :sec) } + let(:vulnerabilities_export_parts) { table(:vulnerability_export_parts, database: :sec) } + let(:vulnerabilities_remediations) { table(:vulnerability_remediations, database: :sec) } + let(:vulnerabilities_archive_exports) { table(:vulnerability_archive_exports, database: :sec) } let(:project_export_jobs) { table(:project_export_jobs) } let(:project_relation_exports) { table(:project_relation_exports) } - let(:project_relation_export_uploads) { table(:project_relation_export_uploads) } + let(:projects_import_export_relation_export_uploads) { table(:project_relation_export_uploads) } let(:design_management_designs) { table(:design_management_designs) } let(:design_management_versions) { table(:design_management_versions) } - let(:design_management_designs_versions) { table(:design_management_designs_versions) } + let(:design_management_actions) { table(:design_management_designs_versions) } let(:bulk_import_exports) { table(:bulk_import_exports) } - let(:bulk_import_export_uploads) { table(:bulk_import_export_uploads) } + let(:bulk_imports_export_uploads) { table(:bulk_import_export_uploads) } let(:connection) { ApplicationRecord.connection } @@ -289,134 +289,153 @@ ).perform end.not_to raise_error - expect(find_partitioned_upload(abuse_report_upload_to_be_synced.id)).to be_truthy - expect(find_partitioned_upload(abuse_report_upload_synced.id)).to be_truthy + verify_backfilled_values(abuse_report_upload_to_be_synced) + verify_backfilled_values(abuse_report_upload_synced) expect(find_partitioned_upload(abuse_report_upload_to_be_removed.id)).not_to be_truthy - expect(find_partitioned_upload(appearance_upload_to_be_synced.id)).to be_truthy - expect(find_partitioned_upload(appearance_upload_synced.id)).to be_truthy + verify_backfilled_values(appearance_upload_to_be_synced) + verify_backfilled_values(appearance_upload_synced) expect(find_partitioned_upload(appearance_upload_to_be_removed.id)).not_to be_truthy - expect(find_partitioned_upload(achievement_upload_to_be_synced.id).namespace_id) - .to eq(achievement_1.namespace_id) - expect(find_partitioned_upload(achievement_upload_synced.id).namespace_id) - .to eq(achievement_2.namespace_id) + verify_backfilled_values(achievement_upload_to_be_synced) + verify_backfilled_values(achievement_upload_synced) expect(find_partitioned_upload(achievement_upload_to_be_removed.id)).not_to be_truthy - expect(find_partitioned_upload(ai_vectorizable_file_upload_to_be_synced.id).project_id) - .to eq(ai_vectorizable_file_1.project_id) - expect(find_partitioned_upload(ai_vectorizable_file_upload_synced.id).project_id) - .to eq(ai_vectorizable_file_2.project_id) + verify_backfilled_values(ai_vectorizable_file_upload_to_be_synced) + verify_backfilled_values(ai_vectorizable_file_upload_synced) expect(find_partitioned_upload(ai_vectorizable_file_upload_to_be_removed.id)).not_to be_truthy - expect(find_partitioned_upload(alert_metric_image_upload_to_be_synced.id).project_id) - .to eq(alert_metric_image_1.project_id) - expect(find_partitioned_upload(alert_metric_image_upload_synced.id).project_id) - .to eq(alert_metric_image_2.project_id) + verify_backfilled_values(alert_metric_image_upload_to_be_synced) + verify_backfilled_values(alert_metric_image_upload_synced) expect(find_partitioned_upload(alert_metric_image_upload_to_be_removed.id)).not_to be_truthy - expect(find_partitioned_upload(dependency_list_export_upload_to_be_synced.id).namespace_id) - .to eq(dependency_list_export_1.group_id) - expect(find_partitioned_upload(dependency_list_export_upload_synced.id).namespace_id) - .to eq(dependency_list_export_2.group_id) + verify_backfilled_values(dependency_list_export_upload_to_be_synced) + verify_backfilled_values(dependency_list_export_upload_synced) expect(find_partitioned_upload(dependency_list_export_upload_to_be_removed.id)).not_to be_truthy - expect(find_partitioned_upload(dependency_list_export_part_upload_to_be_synced.id).organization_id) - .to eq(dependency_list_export_part_1.organization_id) - expect(find_partitioned_upload(dependency_list_export_part_upload_synced.id).organization_id) - .to eq(dependency_list_export_part_2.organization_id) + verify_backfilled_values(dependency_list_export_part_upload_to_be_synced) + verify_backfilled_values(dependency_list_export_part_upload_synced) expect(find_partitioned_upload(dependency_list_export_part_upload_to_be_removed.id)).not_to be_truthy - expect(find_partitioned_upload(import_export_upload_upload_to_be_synced.id).namespace_id) - .to eq(import_export_upload_1.group_id) - expect(find_partitioned_upload(import_export_upload_upload_synced.id).namespace_id) - .to eq(import_export_upload_2.group_id) + verify_backfilled_values(import_export_upload_upload_to_be_synced) + verify_backfilled_values(import_export_upload_upload_synced) expect(find_partitioned_upload(import_export_upload_upload_to_be_removed.id)).not_to be_truthy - expect(find_partitioned_upload(issuable_metric_image_upload_to_be_synced.id).namespace_id) - .to eq(issuable_metric_image_1.namespace_id) - expect(find_partitioned_upload(issuable_metric_image_upload_synced.id).namespace_id) - .to eq(issuable_metric_image_2.namespace_id) + verify_backfilled_values(issuable_metric_image_upload_to_be_synced) + verify_backfilled_values(issuable_metric_image_upload_synced) expect(find_partitioned_upload(issuable_metric_image_upload_to_be_removed.id)).not_to be_truthy - expect(find_partitioned_upload(organization_detail_upload_to_be_synced.id).organization_id) - .to eq(organization_detail_1.organization_id) - expect(find_partitioned_upload(organization_detail_upload_synced.id).organization_id) - .to eq(organization_detail_2.organization_id) + verify_backfilled_values(organization_detail_upload_to_be_synced) + verify_backfilled_values(organization_detail_upload_synced) expect(find_partitioned_upload(organization_detail_upload_to_be_removed.id)).not_to be_truthy - expect(find_partitioned_upload(topic_upload_to_be_synced.id).organization_id).to eq(topic_1.organization_id) - expect(find_partitioned_upload(topic_upload_synced.id).organization_id).to eq(topic_2.organization_id) + verify_backfilled_values(topic_upload_to_be_synced) + verify_backfilled_values(topic_upload_synced) expect(find_partitioned_upload(topic_upload_to_be_removed.id)).not_to be_truthy - expect(find_partitioned_upload(snippet_upload_to_be_synced.id).organization_id).to eq(snippet_1.organization_id) - expect(find_partitioned_upload(snippet_upload_synced.id).organization_id).to eq(snippet_2.organization_id) + verify_backfilled_values(snippet_upload_to_be_synced) + verify_backfilled_values(snippet_upload_synced) expect(find_partitioned_upload(snippet_upload_to_be_removed.id)).not_to be_truthy - expect(find_partitioned_upload(user_permission_export_upload_upload_to_be_synced.id)).to be_truthy - expect(find_partitioned_upload(user_permission_export_upload_upload_synced.id)).to be_truthy + verify_backfilled_values(user_permission_export_upload_upload_to_be_synced) + verify_backfilled_values(user_permission_export_upload_upload_synced) expect(find_partitioned_upload(user_permission_export_upload_upload_to_be_removed.id)).not_to be_truthy - expect(find_partitioned_upload(user_upload_to_be_synced.id)).to be_truthy - expect(find_partitioned_upload(user_upload_synced.id)).to be_truthy + verify_backfilled_values(user_upload_to_be_synced) + verify_backfilled_values(user_upload_synced) expect(find_partitioned_upload(user_upload_to_be_removed.id)).not_to be_truthy - expect(find_partitioned_upload(vulnerability_export_upload_to_be_synced.id).organization_id) - .to eq(vulnerability_export_1.organization_id) - expect(find_partitioned_upload(vulnerability_export_upload_synced.id).organization_id) - .to eq(vulnerability_export_2.organization_id) + verify_backfilled_values(vulnerability_export_upload_to_be_synced) + verify_backfilled_values(vulnerability_export_upload_synced) expect(find_partitioned_upload(vulnerability_export_upload_to_be_removed.id)).not_to be_truthy - expect(find_partitioned_upload(vulnerability_export_part_upload_to_be_synced.id).organization_id) - .to eq(vulnerability_export_part_1.organization_id) - expect(find_partitioned_upload(vulnerability_export_part_upload_synced.id).organization_id) - .to eq(vulnerability_export_part_2.organization_id) + verify_backfilled_values(vulnerability_export_part_upload_to_be_synced) + verify_backfilled_values(vulnerability_export_part_upload_synced) expect(find_partitioned_upload(vulnerability_export_part_upload_to_be_removed.id)).not_to be_truthy - expect(find_partitioned_upload(vulnerability_remediation_upload_to_be_synced.id).project_id) - .to eq(vulnerability_remediation_1.project_id) - expect(find_partitioned_upload(vulnerability_remediation_upload_synced.id).project_id) - .to eq(vulnerability_remediation_2.project_id) + verify_backfilled_values(vulnerability_remediation_upload_to_be_synced) + verify_backfilled_values(vulnerability_remediation_upload_synced) expect(find_partitioned_upload(vulnerability_remediation_upload_to_be_removed.id)).not_to be_truthy - expect(find_partitioned_upload(vulnerability_archive_export_upload_to_be_synced.id).project_id) - .to eq(vulnerability_archive_export_1.project_id) - expect(find_partitioned_upload(vulnerability_archive_export_upload_synced.id).project_id) - .to eq(vulnerability_archive_export_2.project_id) + verify_backfilled_values(vulnerability_archive_export_upload_to_be_synced) + verify_backfilled_values(vulnerability_archive_export_upload_synced) expect(find_partitioned_upload(vulnerability_archive_export_upload_to_be_removed.id)).not_to be_truthy - expect(find_partitioned_upload(project_upload_to_be_synced.id).project_id).to eq(project_1.id) - expect(find_partitioned_upload(project_upload_synced.id).project_id).to eq(project_2.id) - expect(find_partitioned_upload(project_upload_to_be_synced.id).namespace_id).to be_nil - expect(find_partitioned_upload(project_upload_synced.id).namespace_id).to be_nil + verify_backfilled_values_projects(project_upload_to_be_synced) + verify_backfilled_values_projects(project_upload_synced) expect(find_partitioned_upload(project_upload_to_be_removed.id)).not_to be_truthy - expect(find_partitioned_upload(namespace_upload_to_be_synced.id).namespace_id).to eq(namespace_1.id) - expect(find_partitioned_upload(namespace_upload_synced.id).namespace_id).to eq(namespace_2.id) - expect(find_partitioned_upload(namespace_upload_to_be_synced.id).organization_id).to be_nil - expect(find_partitioned_upload(namespace_upload_synced.id).organization_id).to be_nil + verify_backfilled_values_namespaces(namespace_upload_to_be_synced) + verify_backfilled_values_namespaces(namespace_upload_synced) expect(find_partitioned_upload(namespace_upload_to_be_removed.id)).not_to be_truthy - expect(find_partitioned_upload(project_relation_export_upload_upload_to_be_synced.id).project_id) - .to eq(project_relation_export_upload_1.project_id) - expect(find_partitioned_upload(project_relation_export_upload_upload_synced.id).project_id) - .to eq(project_relation_export_upload_2.project_id) + verify_backfilled_values(project_relation_export_upload_upload_to_be_synced) + verify_backfilled_values(project_relation_export_upload_upload_synced) expect(find_partitioned_upload(project_relation_export_upload_upload_to_be_removed.id)).not_to be_truthy - expect(find_partitioned_upload(designs_version_upload_to_be_synced.id).namespace_id) - .to eq(designs_version_1.namespace_id) - expect(find_partitioned_upload(designs_version_upload_synced.id).namespace_id) - .to eq(designs_version_2.namespace_id) + verify_backfilled_values(designs_version_upload_to_be_synced) + verify_backfilled_values(designs_version_upload_synced) expect(find_partitioned_upload(designs_version_upload_to_be_removed.id)).not_to be_truthy - expect(find_partitioned_upload(bulk_import_export_upload_upload_to_be_synced.id).namespace_id) - .to eq(bulk_import_export_upload_1.group_id) - expect(find_partitioned_upload(bulk_import_export_upload_upload_synced.id).namespace_id) - .to eq(bulk_import_export_upload_2.group_id) + verify_backfilled_values(bulk_import_export_upload_upload_to_be_synced) + verify_backfilled_values(bulk_import_export_upload_upload_synced) expect(find_partitioned_upload(bulk_import_export_upload_upload_to_be_removed.id)).not_to be_truthy end end + def verify_backfilled_values(original_record) + original_record.reload + backfilled_record = find_partitioned_upload(original_record.id) + expect(backfilled_record).to be_truthy + + records_table = original_record.model_type.pluralize.underscore.tr("/", "_") + parent_record = begin + send(records_table).find_by_id(original_record.model_id) + rescue NoMethodError => _e + # NOTE: some tables don't use the column 'id' as a primary key + send(records_table).find(original_record.model_id) + end + + if parent_record.respond_to?(:organization_id) + expect(backfilled_record.organization_id).to eq(parent_record.organization_id) + end + + expect(backfilled_record.namespace_id).to eq(parent_record.namespace_id) if parent_record.respond_to?(:namespace_id) + + expect(backfilled_record.namespace_id).to eq(parent_record.group_id) if parent_record.respond_to?(:group_id) + + expect(backfilled_record.project_id).to eq(parent_record.project_id) if parent_record.respond_to?(:project_id) + + expect(original_record.uploaded_by_user_id).not_to be_nil + expect(original_record.uploaded_by_user_id).to eq(backfilled_record.uploaded_by_user_id) + end + + def verify_backfilled_values_projects(original_record) + original_record.reload + backfilled_record = find_partitioned_upload(original_record.id) + expect(backfilled_record).to be_truthy + + project = projects.find_by_id(original_record.model_id) + expect(backfilled_record.organization_id).to be_nil + expect(backfilled_record.namespace_id).to be_nil + expect(backfilled_record.project_id).to eq(project.id) + expect(original_record.uploaded_by_user_id).not_to be_nil + expect(original_record.uploaded_by_user_id).to eq(backfilled_record.uploaded_by_user_id) + end + + def verify_backfilled_values_namespaces(original_record) + original_record.reload + backfilled_record = find_partitioned_upload(original_record.id) + expect(backfilled_record).to be_truthy + + namespace = namespaces.find_by_id(original_record.model_id) + expect(backfilled_record.organization_id).to be_nil + expect(backfilled_record.namespace_id).to eq(namespace.id) + expect(backfilled_record.project_id).to be_nil + expect(original_record.uploaded_by_user_id).not_to be_nil + expect(original_record.uploaded_by_user_id).to eq(backfilled_record.uploaded_by_user_id) + end + def find_partitioned_upload(id) partitioned_uploads_table.find_by_id(id) end @@ -452,21 +471,28 @@ def create_user(organization_id:) organization_id: organization_id) end - def create_upload(model_type, model, delete_model: false) + # NOTE: provide `uploads_sharding_key` when the uploads partition does not have a trigger to set + # the sharding key column from the parent table (eg. when the parent table belongs to the + # gitlab_sec schema but the uploads partition belongs in the gitlab_main_org schema) + def create_upload(model_type, model, delete_model: false, user_id: nil, uploads_sharding_key: {}) + user_id ||= create_user(organization_id: create_organization.id).id model_id = Array.wrap(model.id).first uploads_table.create!(model_type: model_type, model_id: model_id, size: 42, path: '/some/path', - uploader: 'FileUploader', created_at: Time.current).tap do + uploader: 'FileUploader', created_at: Time.current, uploaded_by_user_id: user_id, + **uploads_sharding_key).tap do model.delete if delete_model end end def create_achievement namespace = create_namespace - achievements.create!(namespace_id: namespace.id, name: SecureRandom.base64) + achievements_achievements.create!(namespace_id: namespace.id, name: SecureRandom.base64) end def create_abuse_report_upload(delete_model: false) - model = abuse_reports.create! + organization = create_organization + reporter = create_user(organization_id: organization.id) + model = abuse_reports.create!(reporter_id: reporter.id) create_upload('AbuseReport', model, delete_model: delete_model) end @@ -497,7 +523,7 @@ def create_alert def create_alert_metric_image alert = create_alert - alert_management_alert_metric_images.create!(alert_id: alert.id, project_id: alert.project_id, file: 'alert file') + alert_management_metric_images.create!(alert_id: alert.id, project_id: alert.project_id, file: 'alert file') end def create_alert_metric_image_upload(model: nil, delete_model: false) @@ -507,24 +533,26 @@ def create_alert_metric_image_upload(model: nil, delete_model: false) def create_dependency_list_export group = create_namespace - dependency_list_exports.create!(group_id: group.id) + dependencies_dependency_list_exports.create!(group_id: group.id) end def create_dependency_list_export_upload(model: nil, delete_model: false) model ||= create_dependency_list_export - create_upload('Dependencies::DependencyListExport', model, delete_model: delete_model) + create_upload('Dependencies::DependencyListExport', model, delete_model: delete_model, + uploads_sharding_key: { namespace_id: model.group_id }) end def create_dependency_list_export_part organization = create_organization dependency_list_export = create_dependency_list_export - dependency_list_export_parts.create!(organization_id: organization.id, + dependencies_dependency_list_export_parts.create!(organization_id: organization.id, dependency_list_export_id: dependency_list_export.id, start_id: 1, end_id: 9) end def create_dependency_list_export_part_upload(model: nil, delete_model: false) model ||= create_dependency_list_export_part - create_upload('Dependencies::DependencyListExport::Part', model, delete_model: delete_model) + create_upload('Dependencies::DependencyListExport::Part', model, delete_model: delete_model, + uploads_sharding_key: { organization_id: model.organization_id }) end def create_import_export_upload @@ -549,13 +577,13 @@ def create_issuable_metric_image end def create_issuable_metric_image_upload(model: nil, delete_model: false) - model ||= create_import_export_upload + model ||= create_issuable_metric_image create_upload('IssuableMetricImage', model, delete_model: delete_model) end def create_organization_detail organization = create_organization - organization_details.create!(organization_id: organization.id) + organizations_organization_details.create!(organization_id: organization.id) end def create_organization_detail_upload(model: nil, delete_model: false) @@ -565,7 +593,7 @@ def create_organization_detail_upload(model: nil, delete_model: false) def create_topic organization = create_organization - topics.create!(organization_id: organization.id, name: SecureRandom.base64) + projects_topics.create!(organization_id: organization.id, name: SecureRandom.base64) end def create_topic_upload(model: nil, delete_model: false) @@ -586,13 +614,14 @@ def create_snippet_upload(model: nil, delete_model: false) def create_user_permission_export_upload organization = create_organization - user = create_user(organization_id: organization.id) - user_permission_export_uploads.create!(user_id: user.id) + user_id = create_user(organization_id: organization.id).id + user_permission_export_uploads.create!(user_id: user_id) end def create_user_permission_export_upload_upload(model: nil, delete_model: false) model ||= create_user_permission_export_upload - create_upload('UserPermissionExportUpload', model, delete_model: delete_model) + create_upload('UserPermissionExportUpload', model, delete_model: delete_model, + user_id: model.user_id) end def create_user_upload(model: nil, delete_model: false) @@ -606,46 +635,50 @@ def create_user_upload(model: nil, delete_model: false) def create_vulnerability_export organization = create_organization user = create_user(organization_id: organization.id) - vulnerability_exports.create!(organization_id: organization.id, author_id: user.id, status: 'open') + vulnerabilities_exports.create!(organization_id: organization.id, author_id: user.id, status: 'open') end def create_vulnerability_export_upload(model: nil, delete_model: false) model ||= create_vulnerability_export - create_upload('Vulnerabilities::Export', model, delete_model: delete_model) + create_upload('Vulnerabilities::Export', model, delete_model: delete_model, + uploads_sharding_key: { organization_id: model.organization_id }) end def create_vulnerability_export_part organization = create_organization vulnerability_export = create_vulnerability_export - vulnerability_export_parts.create!(organization_id: organization.id, + vulnerabilities_export_parts.create!(organization_id: organization.id, vulnerability_export_id: vulnerability_export.id, start_id: 1, end_id: 100) end def create_vulnerability_export_part_upload(model: nil, delete_model: false) model ||= create_vulnerability_export_part - create_upload('Vulnerabilities::Export::Part', model, delete_model: delete_model) + create_upload('Vulnerabilities::Export::Part', model, delete_model: delete_model, + uploads_sharding_key: { organization_id: model.organization_id }) end def create_vulnerability_remediation project = create_project - vulnerability_remediations.create!(project_id: project.id, summary: 'summary', file: 'some_file', checksum: '123') + vulnerabilities_remediations.create!(project_id: project.id, summary: 'summary', file: 'some_file', checksum: '123') end def create_vulnerability_remediation_upload(model: nil, delete_model: false) model ||= create_vulnerability_remediation - create_upload('Vulnerabilities::Remediation', model, delete_model: delete_model) + create_upload('Vulnerabilities::Remediation', model, delete_model: delete_model, + uploads_sharding_key: { project_id: model.project_id }) end def create_vulnerability_archive_export project = create_project user = create_user(organization_id: project.organization_id) - vulnerability_archive_exports.create!(project_id: project.id, author_id: user.id, + vulnerabilities_archive_exports.create!(project_id: project.id, author_id: user.id, date_range: Time.current.yesterday..Time.current) end def create_vulnerability_archive_export_upload(model: nil, delete_model: false) model ||= create_vulnerability_archive_export - create_upload('Vulnerabilities::ArchiveExport', model, delete_model: delete_model) + create_upload('Vulnerabilities::ArchiveExport', model, delete_model: delete_model, + uploads_sharding_key: { project_id: model.project_id }) end def create_project_upload(model: nil, delete_model: false) @@ -667,7 +700,7 @@ def create_project_relation_export_upload project_export_job = project_export_jobs.create!(project_id: project.id, jid: SecureRandom.base64) project_relation_export = project_relation_exports.create!(project_id: project.id, project_export_job_id: project_export_job.id, relation: 'rel') - project_relation_export_uploads.create!(project_id: project.id, + projects_import_export_relation_export_uploads.create!(project_id: project.id, project_relation_export_id: project_relation_export.id, export_file: 'export.file') end @@ -683,7 +716,7 @@ def create_designs_version filename: 'file.name', iid: 1) version = design_management_versions.create!(namespace_id: namespace.id, sha: SecureRandom.base64) - design_management_designs_versions.create!(namespace_id: namespace.id, design_id: design.id, version_id: version.id) + design_management_actions.create!(namespace_id: namespace.id, design_id: design.id, version_id: version.id) end def create_designs_version_upload(model: nil, delete_model: false) @@ -698,7 +731,7 @@ def create_bulk_import_export def create_bulk_import_export_upload export = create_bulk_import_export - bulk_import_export_uploads.create!(export_id: export.id, group_id: export.group_id) + bulk_imports_export_uploads.create!(export_id: export.id, group_id: export.group_id) end def create_bulk_import_export_upload_upload(model: nil, delete_model: false) diff --git a/spec/migrations/20251201121648_queue_re_enqueue_backfill_partitioned_uploads_spec.rb b/spec/migrations/20251201121648_queue_re_enqueue_backfill_partitioned_uploads_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..8016af95faab6e42ea2d5604466c93864f7623e6 --- /dev/null +++ b/spec/migrations/20251201121648_queue_re_enqueue_backfill_partitioned_uploads_spec.rb @@ -0,0 +1,27 @@ +# frozen_string_literal: true + +require 'spec_helper' +require_migration! + +RSpec.describe QueueReEnqueueBackfillPartitionedUploads, migration: :gitlab_main_org, feature_category: :database do + let!(:batched_migration) { described_class::MIGRATION } + + it 'schedules a new batched migration' do + reversible_migration do |migration| + migration.before -> { + expect(batched_migration).not_to have_scheduled_batched_migration + } + + migration.after -> { + expect(batched_migration).to have_scheduled_batched_migration( + gitlab_schema: :gitlab_main_org, + table_name: :uploads, + column_name: :id, + interval: described_class::DELAY_INTERVAL, + batch_size: described_class::BATCH_SIZE, + sub_batch_size: described_class::SUB_BATCH_SIZE + ) + } + end + end +end diff --git a/spec/migrations/queue_backfill_partitioned_uploads_spec.rb b/spec/migrations/queue_backfill_partitioned_uploads_spec.rb index 5b916d76a7422abb2ab3d77ecaac04fe9dedb630..65d9bd1532042860dea113a56ebf130130839b07 100644 --- a/spec/migrations/queue_backfill_partitioned_uploads_spec.rb +++ b/spec/migrations/queue_backfill_partitioned_uploads_spec.rb @@ -13,13 +13,7 @@ } migration.after -> { - expect(batched_migration).to have_scheduled_batched_migration( - table_name: :uploads, - column_name: :id, - interval: described_class::DELAY_INTERVAL, - batch_size: described_class::BATCH_SIZE, - sub_batch_size: described_class::SUB_BATCH_SIZE - ) + expect(batched_migration).not_to have_scheduled_batched_migration } end end