From d97c83397e27f5b92e261f994fa460a6e3af84f8 Mon Sep 17 00:00:00 2001 From: mo khan Date: Wed, 12 Nov 2025 09:41:50 -0700 Subject: [PATCH 1/2] Shard spam_logs table --- db/docs/spam_logs.yml | 12 +-- ...7_add_sharding_key_trigger_to_spam_logs.rb | 31 ++++++ ...ot_null_to_spam_logs_on_organization_id.rb | 14 +++ ...3549_backfill_spam_logs_organization_id.rb | 26 +++++ ...8_validate_organization_id_on_spam_logs.rb | 11 +++ db/schema_migrations/20251112163007 | 1 + db/schema_migrations/20251112163329 | 1 + db/schema_migrations/20251112163549 | 1 + db/schema_migrations/20251112165028 | 1 + db/structure.sql | 21 +++- ...backfill_spam_logs_organization_id_spec.rb | 96 +++++++++++++++++++ 11 files changed, 204 insertions(+), 11 deletions(-) create mode 100644 db/migrate/20251112163007_add_sharding_key_trigger_to_spam_logs.rb create mode 100644 db/migrate/20251112163329_add_not_null_to_spam_logs_on_organization_id.rb create mode 100644 db/post_migrate/20251112163549_backfill_spam_logs_organization_id.rb create mode 100644 db/post_migrate/20251112165028_validate_organization_id_on_spam_logs.rb create mode 100644 db/schema_migrations/20251112163007 create mode 100644 db/schema_migrations/20251112163329 create mode 100644 db/schema_migrations/20251112163549 create mode 100644 db/schema_migrations/20251112165028 create mode 100644 spec/migrations/backfill_spam_logs_organization_id_spec.rb diff --git a/db/docs/spam_logs.yml b/db/docs/spam_logs.yml index 6738fe6f613bd6..a09f678dc6eb9f 100644 --- a/db/docs/spam_logs.yml +++ b/db/docs/spam_logs.yml @@ -8,14 +8,6 @@ description: Logs users flagged by the Akismet anti-spam integration. introduced_by_url: https://gitlab.com/gitlab-org/gitlab-foss/-/merge_requests/2266 milestone: '8.5' gitlab_schema: gitlab_main_org -desired_sharding_key: - organization_id: - references: organizations - backfill_via: - parent: - foreign_key: user_id - table: users - table_primary_key: id - sharding_key: organization_id - belongs_to: user +sharding_key: + organization_id: organizations table_size: small diff --git a/db/migrate/20251112163007_add_sharding_key_trigger_to_spam_logs.rb b/db/migrate/20251112163007_add_sharding_key_trigger_to_spam_logs.rb new file mode 100644 index 00000000000000..cc39bfc484cbdf --- /dev/null +++ b/db/migrate/20251112163007_add_sharding_key_trigger_to_spam_logs.rb @@ -0,0 +1,31 @@ +# frozen_string_literal: true + +class AddShardingKeyTriggerToSpamLogs < Gitlab::Database::Migration[2.3] + milestone '18.6' + + TABLE_NAME = :spam_logs + SHARDING_KEY = :organization_id + PARENT_TABLE = :users + PARENT_SHARDING_KEY = :organization_id + FOREIGN_KEY = :user_id + + def up + install_sharding_key_assignment_trigger( + table: TABLE_NAME, + sharding_key: SHARDING_KEY, + parent_table: PARENT_TABLE, + parent_sharding_key: PARENT_SHARDING_KEY, + foreign_key: FOREIGN_KEY + ) + end + + def down + remove_sharding_key_assignment_trigger( + table: TABLE_NAME, + sharding_key: SHARDING_KEY, + parent_table: PARENT_TABLE, + parent_sharding_key: PARENT_SHARDING_KEY, + foreign_key: FOREIGN_KEY + ) + end +end diff --git a/db/migrate/20251112163329_add_not_null_to_spam_logs_on_organization_id.rb b/db/migrate/20251112163329_add_not_null_to_spam_logs_on_organization_id.rb new file mode 100644 index 00000000000000..9854ad4d49f02b --- /dev/null +++ b/db/migrate/20251112163329_add_not_null_to_spam_logs_on_organization_id.rb @@ -0,0 +1,14 @@ +# frozen_string_literal: true + +class AddNotNullToSpamLogsOnOrganizationId < Gitlab::Database::Migration[2.3] + disable_ddl_transaction! + milestone '18.6' + + def up + add_not_null_constraint :spam_logs, :organization_id, validate: false + end + + def down + remove_not_null_constraint :spam_logs, :organization_id + end +end diff --git a/db/post_migrate/20251112163549_backfill_spam_logs_organization_id.rb b/db/post_migrate/20251112163549_backfill_spam_logs_organization_id.rb new file mode 100644 index 00000000000000..1a14934e5457b7 --- /dev/null +++ b/db/post_migrate/20251112163549_backfill_spam_logs_organization_id.rb @@ -0,0 +1,26 @@ +# frozen_string_literal: true + +class BackfillSpamLogsOrganizationId < Gitlab::Database::Migration[2.3] + restrict_gitlab_migration gitlab_schema: :gitlab_main_org + disable_ddl_transaction! + milestone '18.6' + + DEFAULT_ORGANIZATION_ID = 1 + + def up + define_batchable_model(:spam_logs).each_batch(of: 500) do |batch| + # Use default organization for rows that have a NULL user_id + batch + .where(organization_id: nil) + .where(user_id: nil) + .update_all(organization_id: DEFAULT_ORGANIZATION_ID) + + # Trigger trigger_0e7036da74c7() to backfill organization_id + batch + .where(organization_id: nil) + .update_all('updated_at = updated_at') + end + end + + def down; end +end diff --git a/db/post_migrate/20251112165028_validate_organization_id_on_spam_logs.rb b/db/post_migrate/20251112165028_validate_organization_id_on_spam_logs.rb new file mode 100644 index 00000000000000..c392aeaa31368b --- /dev/null +++ b/db/post_migrate/20251112165028_validate_organization_id_on_spam_logs.rb @@ -0,0 +1,11 @@ +# frozen_string_literal: true + +class ValidateOrganizationIdOnSpamLogs < Gitlab::Database::Migration[2.3] + milestone '18.6' + + def up + validate_not_null_constraint :spam_logs, :organization_id + end + + def down; end +end diff --git a/db/schema_migrations/20251112163007 b/db/schema_migrations/20251112163007 new file mode 100644 index 00000000000000..d1845a8e72b0de --- /dev/null +++ b/db/schema_migrations/20251112163007 @@ -0,0 +1 @@ +45a5009e100358df8bc505591389b8953005b5381122b740ae845276c11c5581 \ No newline at end of file diff --git a/db/schema_migrations/20251112163329 b/db/schema_migrations/20251112163329 new file mode 100644 index 00000000000000..4d4cf26b0b1f8a --- /dev/null +++ b/db/schema_migrations/20251112163329 @@ -0,0 +1 @@ +4256db6d66962af085f88429ada25183e9a985c2293c5358eb8ea87d2a355930 \ No newline at end of file diff --git a/db/schema_migrations/20251112163549 b/db/schema_migrations/20251112163549 new file mode 100644 index 00000000000000..d77a5a66cdc8b8 --- /dev/null +++ b/db/schema_migrations/20251112163549 @@ -0,0 +1 @@ +c55f110ca4e415443725be4913265571f1c8328ecfcc56536bceb7cd11d35377 \ No newline at end of file diff --git a/db/schema_migrations/20251112165028 b/db/schema_migrations/20251112165028 new file mode 100644 index 00000000000000..59de3efa6f0c1c --- /dev/null +++ b/db/schema_migrations/20251112165028 @@ -0,0 +1 @@ +ce534f52d3a7682295c3aaec93a7256b959eb73d58eb2f60d0561586c190466a \ No newline at end of file diff --git a/db/structure.sql b/db/structure.sql index e95a3aaa2961d9..49e10c44fa568c 100644 --- a/db/structure.sql +++ b/db/structure.sql @@ -1696,6 +1696,22 @@ RETURN NEW; END $$; +CREATE FUNCTION trigger_0e7036da74c7() RETURNS trigger + LANGUAGE plpgsql + AS $$ +BEGIN +IF NEW."organization_id" IS NULL THEN + SELECT "organization_id" + INTO NEW."organization_id" + FROM "users" + WHERE "users"."id" = NEW."user_id"; +END IF; + +RETURN NEW; + +END +$$; + CREATE FUNCTION trigger_0f38e5af9adf() RETURNS trigger LANGUAGE plpgsql AS $$ @@ -26693,7 +26709,8 @@ CREATE TABLE spam_logs ( submitted_as_ham boolean DEFAULT false NOT NULL, recaptcha_verified boolean DEFAULT false NOT NULL, target_id bigint, - organization_id bigint + organization_id bigint, + CONSTRAINT check_0c0873a24a CHECK ((organization_id IS NOT NULL)) ); CREATE SEQUENCE spam_logs_id_seq @@ -47605,6 +47622,8 @@ CREATE TRIGGER trigger_0ddb594934c9 BEFORE INSERT OR UPDATE ON incident_manageme CREATE TRIGGER trigger_0e13f214e504 BEFORE INSERT OR UPDATE ON merge_request_assignment_events FOR EACH ROW EXECUTE FUNCTION trigger_0e13f214e504(); +CREATE TRIGGER trigger_0e7036da74c7 BEFORE INSERT OR UPDATE ON spam_logs FOR EACH ROW EXECUTE FUNCTION trigger_0e7036da74c7(); + CREATE TRIGGER trigger_0f38e5af9adf BEFORE INSERT OR UPDATE ON ml_candidate_params FOR EACH ROW EXECUTE FUNCTION trigger_0f38e5af9adf(); CREATE TRIGGER trigger_13d4aa8fe3dd BEFORE INSERT OR UPDATE ON draft_notes FOR EACH ROW EXECUTE FUNCTION trigger_13d4aa8fe3dd(); diff --git a/spec/migrations/backfill_spam_logs_organization_id_spec.rb b/spec/migrations/backfill_spam_logs_organization_id_spec.rb new file mode 100644 index 00000000000000..b8a02bec4f4268 --- /dev/null +++ b/spec/migrations/backfill_spam_logs_organization_id_spec.rb @@ -0,0 +1,96 @@ +# frozen_string_literal: true + +require 'spec_helper' + +require_migration! + +RSpec.describe BackfillSpamLogsOrganizationId, feature_category: :instance_resiliency do + let(:connection) { ApplicationRecord.connection } + let(:spam_logs) { table(:spam_logs) } + let(:organizations) { table(:organizations) } + let(:users) { table(:users) } + let(:trigger_name) { 'trigger_0e7036da74c7' } + let(:constraint_name) { 'check_0c0873a24a' } + + let!(:default_organization) { organizations.create!(id: 1, name: 'default', path: 'default') } + let!(:organization) { organizations.create!(name: 'org', path: 'org') } + let!(:user1) { create_user('user1') } + let!(:user2) { create_user('user2') } + + let!(:spam_log_with_org) { create_spam_log(user1, organization) } + + let!(:spam_log_without_org) do + drop_constraint_and_trigger + create_spam_log(user2, nil) + end + + let!(:spam_log_without_org_or_user) do + drop_constraint_and_trigger + create_spam_log(nil, nil) + end + + before do + recreate_trigger + end + + after do + recreate_constraint + end + + it 'backfills with nil organization_id' do + expect(spam_logs.where(organization_id: nil).count).to eq(2) + expect(spam_logs.where.not(organization_id: nil).count).to eq(1) + + expect { migrate! }.to change { spam_logs.where(organization_id: nil).count }.by(-2) + + expect(spam_logs.find_by(id: spam_log_with_org.id).organization_id).to eq(organization.id) + expect(spam_logs.find_by(id: spam_log_without_org.id).organization_id).to eq(organization.id) + expect(spam_logs.find_by(id: spam_log_without_org_or_user.id).organization_id).to eq( + Organizations::Organization::DEFAULT_ORGANIZATION_ID + ) + end + + private + + def create_user(username) + users.create!( + email: "#{username}@example.com", + username: username, + organization_id: organization.id, + projects_limit: 10 + ) + end + + def create_spam_log(user, organization) + spam_logs.create!( + user_id: user&.id, + organization_id: organization&.id + ) + end + + def drop_constraint_and_trigger + connection.execute( + <<~SQL + DROP TRIGGER IF EXISTS #{trigger_name} ON spam_logs; + + ALTER TABLE spam_logs DROP CONSTRAINT IF EXISTS #{constraint_name}; + SQL + ) + end + + def recreate_constraint + connection.execute( + <<~SQL + ALTER TABLE spam_logs ADD CONSTRAINT #{constraint_name} CHECK ((organization_id IS NOT NULL)); + SQL + ) + end + + def recreate_trigger + connection.execute( + <<~SQL + CREATE TRIGGER #{trigger_name} BEFORE INSERT OR UPDATE ON spam_logs FOR EACH ROW EXECUTE FUNCTION #{trigger_name}(); + SQL + ) + end +end -- GitLab From 3151636dd235a407cb3a52c67ec9c537e671f769 Mon Sep 17 00:00:00 2001 From: mo khan Date: Wed, 12 Nov 2025 14:41:29 -0700 Subject: [PATCH 2/2] Include records with a user_id --- .../20251112163549_backfill_spam_logs_organization_id.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/db/post_migrate/20251112163549_backfill_spam_logs_organization_id.rb b/db/post_migrate/20251112163549_backfill_spam_logs_organization_id.rb index 1a14934e5457b7..87539adcd6d56d 100644 --- a/db/post_migrate/20251112163549_backfill_spam_logs_organization_id.rb +++ b/db/post_migrate/20251112163549_backfill_spam_logs_organization_id.rb @@ -17,6 +17,7 @@ def up # Trigger trigger_0e7036da74c7() to backfill organization_id batch + .where.not(user_id: nil) .where(organization_id: nil) .update_all('updated_at = updated_at') end -- GitLab