From 9fce3b72fa6313cdfae2f44f2133075ea6bad909 Mon Sep 17 00:00:00 2001 From: Tomasz Skorupa Date: Tue, 28 Oct 2025 14:54:42 -0400 Subject: [PATCH 1/3] Shard abuse_uploads Addresses https://gitlab.com/gitlab-org/gitlab/-/issues/571831 && https://gitlab.com/gitlab-org/gitlab/-/issues/398199#proposal Changelog: other --- db/docs/abuse_report_events.yml | 1 - db/docs/abuse_reports.yml | 12 +--- ...d_sharding_key_trigger_to_abuse_reports.rb | 31 ++++++++++ ...traint_to_abuse_reports_on_sharding_key.rb | 18 ++++++ ...548_backfill_abuse_reports_sharding_key.rb | 22 +++++++ ..._not_null_sharding_key_on_abuse_reports.rb | 11 ++++ db/schema_migrations/20251028184316 | 1 + db/schema_migrations/20251028185952 | 1 + db/schema_migrations/20251028190548 | 1 + db/schema_migrations/20251028192326 | 1 + db/structure.sql | 19 ++++++ ...ackfill_abuse_reports_sharding_key_spec.rb | 61 +++++++++++++++++++ 12 files changed, 168 insertions(+), 11 deletions(-) create mode 100644 db/migrate/20251028184316_add_sharding_key_trigger_to_abuse_reports.rb create mode 100644 db/migrate/20251028185952_add_not_null_not_valid_constraint_to_abuse_reports_on_sharding_key.rb create mode 100644 db/post_migrate/20251028190548_backfill_abuse_reports_sharding_key.rb create mode 100644 db/post_migrate/20251028192326_validate_not_null_sharding_key_on_abuse_reports.rb create mode 100644 db/schema_migrations/20251028184316 create mode 100644 db/schema_migrations/20251028185952 create mode 100644 db/schema_migrations/20251028190548 create mode 100644 db/schema_migrations/20251028192326 create mode 100644 spec/migrations/backfill_abuse_reports_sharding_key_spec.rb diff --git a/db/docs/abuse_report_events.yml b/db/docs/abuse_report_events.yml index 1cedab587d952d..980325b5f727e2 100644 --- a/db/docs/abuse_report_events.yml +++ b/db/docs/abuse_report_events.yml @@ -18,5 +18,4 @@ desired_sharding_key: table_primary_key: id sharding_key: organization_id belongs_to: abuse_report - awaiting_backfill_on_parent: true table_size: small diff --git a/db/docs/abuse_reports.yml b/db/docs/abuse_reports.yml index 487010ae68626d..3750ab921e2d01 100644 --- a/db/docs/abuse_reports.yml +++ b/db/docs/abuse_reports.yml @@ -8,14 +8,6 @@ description: Stores abuse reports from other users. introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/commit/cba7f20dc8614d12e3eeda6e14f454aeb22b9b54 milestone: '7.13' gitlab_schema: gitlab_main_org -desired_sharding_key: - organization_id: - references: organizations - backfill_via: - parent: - foreign_key: reporter_id - table: users - table_primary_key: id - sharding_key: organization_id - belongs_to: reporter +sharding_key: + organization_id: organizations table_size: small diff --git a/db/migrate/20251028184316_add_sharding_key_trigger_to_abuse_reports.rb b/db/migrate/20251028184316_add_sharding_key_trigger_to_abuse_reports.rb new file mode 100644 index 00000000000000..13063835c576a5 --- /dev/null +++ b/db/migrate/20251028184316_add_sharding_key_trigger_to_abuse_reports.rb @@ -0,0 +1,31 @@ +# frozen_string_literal: true + +class AddShardingKeyTriggerToAbuseReports < Gitlab::Database::Migration[2.3] + milestone '18.6' + + TABLE_NAME = :abuse_reports + SHARDING_KEY = :organization_id + PARENT_TABLE = :users + PARENT_SHARDING_KEY = :organization_id + FOREIGN_KEY = :reporter_id + + def up + install_sharding_key_assignment_trigger( + table: TABLE_NAME, + sharding_key: SHARDING_KEY, + parent_table: PARENT_TABLE, + parent_sharding_key: PARENT_SHARDING_KEY, + foreign_key: FOREIGN_KEY + ) + end + + def down + remove_sharding_key_assignment_trigger( + table: TABLE_NAME, + sharding_key: SHARDING_KEY, + parent_table: PARENT_TABLE, + parent_sharding_key: PARENT_SHARDING_KEY, + foreign_key: FOREIGN_KEY + ) + end +end diff --git a/db/migrate/20251028185952_add_not_null_not_valid_constraint_to_abuse_reports_on_sharding_key.rb b/db/migrate/20251028185952_add_not_null_not_valid_constraint_to_abuse_reports_on_sharding_key.rb new file mode 100644 index 00000000000000..24322fbf85f9e7 --- /dev/null +++ b/db/migrate/20251028185952_add_not_null_not_valid_constraint_to_abuse_reports_on_sharding_key.rb @@ -0,0 +1,18 @@ +# frozen_string_literal: true + +class AddNotNullNotValidConstraintToAbuseReportsOnShardingKey < Gitlab::Database::Migration[2.3] + disable_ddl_transaction! + milestone '18.6' + + def up + add_not_null_constraint( + :abuse_reports, + :organization_id, + validate: false + ) + end + + def down + remove_not_null_constraint(:abuse_reports, :organization_id) + end +end diff --git a/db/post_migrate/20251028190548_backfill_abuse_reports_sharding_key.rb b/db/post_migrate/20251028190548_backfill_abuse_reports_sharding_key.rb new file mode 100644 index 00000000000000..cbe2c5e7223084 --- /dev/null +++ b/db/post_migrate/20251028190548_backfill_abuse_reports_sharding_key.rb @@ -0,0 +1,22 @@ +# frozen_string_literal: true + +class BackfillAbuseReportsShardingKey < Gitlab::Database::Migration[2.3] + restrict_gitlab_migration gitlab_schema: :gitlab_main_org + milestone '18.6' + + # To keep transactions as short as possible, + # see https://docs.gitlab.com/development/migration_style_guide/#heavy-operations-in-a-single-transaction + disable_ddl_transaction! + + def up + define_batchable_model(:abuse_reports).each_batch(of: 500) do |batch| + # NOTE: I want to trigger trigger_f7464057d53e() for all rows + # where the sharding key is NULL. + batch + .where(organization_id: nil) + .update_all('updated_at = updated_at') + end + end + + def down; end +end diff --git a/db/post_migrate/20251028192326_validate_not_null_sharding_key_on_abuse_reports.rb b/db/post_migrate/20251028192326_validate_not_null_sharding_key_on_abuse_reports.rb new file mode 100644 index 00000000000000..7b1f05c8a3f221 --- /dev/null +++ b/db/post_migrate/20251028192326_validate_not_null_sharding_key_on_abuse_reports.rb @@ -0,0 +1,11 @@ +# frozen_string_literal: true + +class ValidateNotNullShardingKeyOnAbuseReports < Gitlab::Database::Migration[2.3] + milestone '18.6' + + def up + validate_not_null_constraint :abuse_reports, :organization_id + end + + def down; end +end diff --git a/db/schema_migrations/20251028184316 b/db/schema_migrations/20251028184316 new file mode 100644 index 00000000000000..2405c99a47e00a --- /dev/null +++ b/db/schema_migrations/20251028184316 @@ -0,0 +1 @@ +59293334202bd4e98cd47ba941e73c7a91b848e91069b76ce485173ee9e8e2c1 \ No newline at end of file diff --git a/db/schema_migrations/20251028185952 b/db/schema_migrations/20251028185952 new file mode 100644 index 00000000000000..00c39fa84cef06 --- /dev/null +++ b/db/schema_migrations/20251028185952 @@ -0,0 +1 @@ +88435e02275755185537ce312afbf80497d986168d8d1a8cde4cb0a557817c30 \ No newline at end of file diff --git a/db/schema_migrations/20251028190548 b/db/schema_migrations/20251028190548 new file mode 100644 index 00000000000000..1924ab5d978cad --- /dev/null +++ b/db/schema_migrations/20251028190548 @@ -0,0 +1 @@ +e301df5ac5161dfd920e600372ff213075db435b4c412f626e180d0ca342cc5e \ No newline at end of file diff --git a/db/schema_migrations/20251028192326 b/db/schema_migrations/20251028192326 new file mode 100644 index 00000000000000..d55d16185fdd86 --- /dev/null +++ b/db/schema_migrations/20251028192326 @@ -0,0 +1 @@ +153e439fee33da30fc4b465b2261faf0c1f853416f24a54bf4249a662e29392d \ No newline at end of file diff --git a/db/structure.sql b/db/structure.sql index 01ab3ad768975e..02e941dfe66321 100644 --- a/db/structure.sql +++ b/db/structure.sql @@ -4527,6 +4527,22 @@ RETURN NEW; END $$; +CREATE FUNCTION trigger_f7464057d53e() RETURNS trigger + LANGUAGE plpgsql + AS $$ +BEGIN +IF NEW."organization_id" IS NULL THEN + SELECT "organization_id" + INTO NEW."organization_id" + FROM "users" + WHERE "users"."id" = NEW."reporter_id"; +END IF; + +RETURN NEW; + +END +$$; + CREATE FUNCTION trigger_fac444e0cae6() RETURNS trigger LANGUAGE plpgsql AS $$ @@ -9908,6 +9924,7 @@ CREATE TABLE abuse_reports ( user_id_convert_to_bigint bigint, organization_id bigint, CONSTRAINT abuse_reports_links_to_spam_length_check CHECK ((cardinality(links_to_spam) <= 20)), + CONSTRAINT check_1e642c5f94 CHECK ((organization_id IS NOT NULL)), CONSTRAINT check_4b0a5120e0 CHECK ((char_length(screenshot) <= 255)), CONSTRAINT check_ab1260fa6c CHECK ((char_length(reported_from_url) <= 512)), CONSTRAINT check_f3c0947a2d CHECK ((char_length(mitigation_steps) <= 1000)), @@ -47905,6 +47922,8 @@ CREATE TRIGGER trigger_f6c61cdddf31 BEFORE INSERT OR UPDATE ON ml_model_metadata CREATE TRIGGER trigger_f6f59d8216b3 BEFORE INSERT OR UPDATE ON protected_environment_deploy_access_levels FOR EACH ROW EXECUTE FUNCTION trigger_f6f59d8216b3(); +CREATE TRIGGER trigger_f7464057d53e BEFORE INSERT OR UPDATE ON abuse_reports FOR EACH ROW EXECUTE FUNCTION trigger_f7464057d53e(); + CREATE TRIGGER trigger_fac444e0cae6 BEFORE INSERT OR UPDATE ON design_management_designs_versions FOR EACH ROW EXECUTE FUNCTION trigger_fac444e0cae6(); CREATE TRIGGER trigger_fbd42ed69453 BEFORE INSERT OR UPDATE ON external_status_checks_protected_branches FOR EACH ROW EXECUTE FUNCTION trigger_fbd42ed69453(); diff --git a/spec/migrations/backfill_abuse_reports_sharding_key_spec.rb b/spec/migrations/backfill_abuse_reports_sharding_key_spec.rb new file mode 100644 index 00000000000000..971400c361092f --- /dev/null +++ b/spec/migrations/backfill_abuse_reports_sharding_key_spec.rb @@ -0,0 +1,61 @@ + +require 'spec_helper' + +require_migration! + +RSpec.describe BackfillAbuseReportsShardingKey, feature_category: :instance_resiliency do + let(:connection) { ApplicationRecord.connection } + let(:abuse_reports) { table(:abuse_reports) } + + let!(:organization) { create(:organization) } + let!(:reporter) { create(:user, organization: organization) } + let!(:user1) { create(:user, organization: organization) } + let!(:user2) { create(:user, organization: organization) } + + before do + report = create(:abuse_report, user: user1, reporter: reporter, organization: organization) + expect(report.reload.organization_id).not_to be_nil + + report_without_org = create(:abuse_report, user: user2, reporter: reporter, organization: organization) + drop_constraint_and_trigger + report_without_org.update_columns(organization_id: nil) + expect(report_without_org.reload.organization_id).to be_nil + end + + it 'backfills abuse reports with nil organization_id' do + expect { migrate! }.to change { abuse_reports.where(organization_id: nil).count }.by(-1) + + expect(abuse_reports.find_by(user_id: user1.id, reporter_id: reporter.id).organization_id) + .to eq(organization.id) + expect(abuse_reports.find_by(user_id: user2.id, reporter_id: reporter.id).organization_id) + .to eq(organization.id) + end + + after do + recreate_constraint_and_trigger + end + + private + + def drop_constraint_and_trigger + connection.execute( + <<~SQL + DROP TRIGGER IF EXISTS trigger_f7464057d53e ON abuse_reports; + + ALTER TABLE abuse_reports DROP CONSTRAINT IF EXISTS check_1e642c5f94; + SQL + ) + end + + def recreate_constraint_and_trigger + connection.execute( + <<~SQL + ALTER TABLE abuse_reports + ADD CONSTRAINT check_1e642c5f94 CHECK ((organization_id IS NOT NULL)); + + CREATE TRIGGER trigger_f7464057d53e BEFORE INSERT OR UPDATE + ON abuse_reports FOR EACH ROW EXECUTE FUNCTION trigger_f7464057d53e(); + SQL + ) + end +end -- GitLab From 5699cc83368fe3284e16e40e87423c9244827930 Mon Sep 17 00:00:00 2001 From: Tomasz Skorupa Date: Tue, 4 Nov 2025 12:22:15 -0500 Subject: [PATCH 2/3] fixup! Shard abuse_uploads --- .../backfill_abuse_reports_sharding_key_spec.rb | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/spec/migrations/backfill_abuse_reports_sharding_key_spec.rb b/spec/migrations/backfill_abuse_reports_sharding_key_spec.rb index 971400c361092f..d3c5c64a8a2fa8 100644 --- a/spec/migrations/backfill_abuse_reports_sharding_key_spec.rb +++ b/spec/migrations/backfill_abuse_reports_sharding_key_spec.rb @@ -19,11 +19,13 @@ report_without_org = create(:abuse_report, user: user2, reporter: reporter, organization: organization) drop_constraint_and_trigger report_without_org.update_columns(organization_id: nil) + recreate_trigger expect(report_without_org.reload.organization_id).to be_nil end it 'backfills abuse reports with nil organization_id' do expect { migrate! }.to change { abuse_reports.where(organization_id: nil).count }.by(-1) + recreate_constraint expect(abuse_reports.find_by(user_id: user1.id, reporter_id: reporter.id).organization_id) .to eq(organization.id) @@ -31,10 +33,6 @@ .to eq(organization.id) end - after do - recreate_constraint_and_trigger - end - private def drop_constraint_and_trigger @@ -47,12 +45,18 @@ def drop_constraint_and_trigger ) end - def recreate_constraint_and_trigger + def recreate_constraint connection.execute( <<~SQL ALTER TABLE abuse_reports ADD CONSTRAINT check_1e642c5f94 CHECK ((organization_id IS NOT NULL)); + SQL + ) + end + def recreate_trigger + connection.execute( + <<~SQL CREATE TRIGGER trigger_f7464057d53e BEFORE INSERT OR UPDATE ON abuse_reports FOR EACH ROW EXECUTE FUNCTION trigger_f7464057d53e(); SQL -- GitLab From ea5c64bc3bc6e741f13c9dcc489d748bd82d9369 Mon Sep 17 00:00:00 2001 From: Tomasz Skorupa Date: Tue, 4 Nov 2025 13:49:58 -0500 Subject: [PATCH 3/3] fixup! fixup! Shard abuse_uploads --- ...ackfill_abuse_reports_sharding_key_spec.rb | 42 +++++++++++++++---- 1 file changed, 33 insertions(+), 9 deletions(-) diff --git a/spec/migrations/backfill_abuse_reports_sharding_key_spec.rb b/spec/migrations/backfill_abuse_reports_sharding_key_spec.rb index d3c5c64a8a2fa8..bf79d3f995d7d8 100644 --- a/spec/migrations/backfill_abuse_reports_sharding_key_spec.rb +++ b/spec/migrations/backfill_abuse_reports_sharding_key_spec.rb @@ -1,3 +1,4 @@ +# frozen_string_literal: true require 'spec_helper' @@ -6,26 +7,40 @@ RSpec.describe BackfillAbuseReportsShardingKey, feature_category: :instance_resiliency do let(:connection) { ApplicationRecord.connection } let(:abuse_reports) { table(:abuse_reports) } + let(:organizations) { table(:organizations) } + let(:users) { table(:users) } - let!(:organization) { create(:organization) } - let!(:reporter) { create(:user, organization: organization) } - let!(:user1) { create(:user, organization: organization) } - let!(:user2) { create(:user, organization: organization) } + let!(:organization) { organizations.create!(name: 'org', path: 'org') } + let!(:reporter) { create_user('reporter') } + let!(:user1) { create_user('user1') } + let!(:user2) { create_user('user2') } before do - report = create(:abuse_report, user: user1, reporter: reporter, organization: organization) - expect(report.reload.organization_id).not_to be_nil + abuse_reports.create!( + user_id: user1.id, + reporter_id: reporter.id, + organization_id: organization.id + ) - report_without_org = create(:abuse_report, user: user2, reporter: reporter, organization: organization) + report_without_org = abuse_reports.create!( + user_id: user2.id, + reporter_id: reporter.id, + organization_id: organization.id + ) drop_constraint_and_trigger report_without_org.update_columns(organization_id: nil) recreate_trigger - expect(report_without_org.reload.organization_id).to be_nil + end + + after do + recreate_constraint end it 'backfills abuse reports with nil organization_id' do + expect(abuse_reports.where(organization_id: nil).count).to eq(1) + expect(abuse_reports.where.not(organization_id: nil).count).to eq(1) + expect { migrate! }.to change { abuse_reports.where(organization_id: nil).count }.by(-1) - recreate_constraint expect(abuse_reports.find_by(user_id: user1.id, reporter_id: reporter.id).organization_id) .to eq(organization.id) @@ -35,6 +50,15 @@ private + def create_user(username) + users.create!( + email: "#{username}@example.com", + username: username, + organization_id: organization.id, + projects_limit: 10 + ) + end + def drop_constraint_and_trigger connection.execute( <<~SQL -- GitLab