diff --git a/app/models/concerns/loose_foreign_key.rb b/app/models/concerns/loose_foreign_key.rb index 8bbd6980d4dac99c75818bccffa397098740b450..102292672b39973a2cd4e6c2b1995c1a85764643 100644 --- a/app/models/concerns/loose_foreign_key.rb +++ b/app/models/concerns/loose_foreign_key.rb @@ -7,20 +7,18 @@ module LooseForeignKey # Loose foreign keys allow delayed processing of associated database records # with similar guarantees than a database foreign key. # - # TODO: finalize this later once the async job is in place - # # Prerequisites: # # To start using the concern, you'll need to install a database trigger to the parent # table in a standard DB migration (not post-migration). # - # > add_loose_foreign_key_support(:projects, :gitlab_main) + # > track_record_deletions(:projects) # # Usage: # # > class Ci::Build < ApplicationRecord # > - # > loose_foreign_key :security_scans, :build_id, on_delete: :async_delete, gitlab_schema: :gitlab_main + # > loose_foreign_key :security_scans, :build_id, on_delete: :async_delete # > # > # associations can be still defined, the dependent options is no longer necessary: # > has_many :security_scans, class_name: 'Security::Scan' @@ -32,14 +30,6 @@ module LooseForeignKey # - :async_delete - deletes the children rows via an asynchronous process. # - :async_nullify - sets the foreign key column to null via an asynchronous process. # - # Options for gitlab_schema: - # - # - :gitlab_ci - # - :gitlab_main - # - # The value can be determined by calling `Model.gitlab_schema` where the Model represents - # the model for the child table. - # # How it works: # # When adding loose foreign key support to the table, a DELETE trigger is installed @@ -69,23 +59,17 @@ def loose_foreign_key(to_table, column, options) end on_delete_options = %i[async_delete async_nullify] - gitlab_schema_options = %i(gitlab_main gitlab_ci) unless on_delete_options.include?(symbolized_options[:on_delete]&.to_sym) raise "Invalid on_delete option given: #{symbolized_options[:on_delete]}. Valid options: #{on_delete_options.join(', ')}" end - unless gitlab_schema_options.include?(symbolized_options[:gitlab_schema]&.to_sym) - raise "Invalid gitlab_schema option given: #{symbolized_options[:gitlab_schema]}. Valid options: #{gitlab_schema_options.join(', ')}" - end - definition = ActiveRecord::ConnectionAdapters::ForeignKeyDefinition.new( table_name.to_s, to_table.to_s, { column: column.to_s, - on_delete: symbolized_options[:on_delete].to_sym, - gitlab_schema: symbolized_options[:gitlab_schema].to_sym + on_delete: symbolized_options[:on_delete].to_sym } ) diff --git a/app/models/loose_foreign_keys/deleted_record.rb b/app/models/loose_foreign_keys/deleted_record.rb index ca5a2800a03876e19ecb9d1bb5bf9a923e20ee13..01d6d068ca4ffacb6db0be340b585dd59ba6378d 100644 --- a/app/models/loose_foreign_keys/deleted_record.rb +++ b/app/models/loose_foreign_keys/deleted_record.rb @@ -1,5 +1,32 @@ # frozen_string_literal: true class LooseForeignKeys::DeletedRecord < ApplicationRecord - extend SuppressCompositePrimaryKeyWarning + self.primary_key = :id + + scope :for_table, -> (table) { where(fully_qualified_table_name: table) } + scope :ordered_by_id, -> { order(:id, :primary_key_value) } + # This needs to be parameterized once we start adding partitions + scope :for_partition, -> { where(partition: 1) } + + enum status: { pending: 1, processed: 2 }, _prefix: :status + + def self.load_batch_for_table(table, batch_size) + for_table(table) + .for_partition + .status_pending + .ordered_by_id + .limit(batch_size) + .to_a + end + + def self.mark_records_processed_for_table_between(table, from_record, to_record) + from = from_record.id + to = to_record.id + + for_table(table) + .for_partition + .status_pending + .where(id: from..to) + .update_all(status: :processed) + end end diff --git a/app/models/loose_foreign_keys/modification_tracker.rb b/app/models/loose_foreign_keys/modification_tracker.rb new file mode 100644 index 0000000000000000000000000000000000000000..12f3156b98cc7b319e5fe8f8baa67ba4be76cc80 --- /dev/null +++ b/app/models/loose_foreign_keys/modification_tracker.rb @@ -0,0 +1,41 @@ +# frozen_string_literal: true + +module LooseForeignKeys + class ModificationTracker + MAX_DELETES = 100_000 + MAX_UPDATES = 50_000 + MAX_RUNTIME = 3.minutes + + delegate :monotonic_time, to: :'Gitlab::Metrics::System' + + def initialize + @delete_count_by_table = Hash.new { |h, k| h[k] = 0 } + @update_count_by_table = Hash.new { |h, k| h[k] = 0 } + @start_time = monotonic_time + end + + def add_deletions(table, count) + @delete_count_by_table[table] += count + end + + def add_updates(table, count) + @update_count_by_table[table] += count + end + + def over_limit? + @delete_count_by_table.values.sum >= MAX_DELETES || + @update_count_by_table.values.sum >= MAX_UPDATES || + monotonic_time - @start_time >= MAX_RUNTIME + end + + def stats + { + over_limit: over_limit?, + delete_count_by_table: @delete_count_by_table, + update_count_by_table: @update_count_by_table, + delete_count: @delete_count_by_table.values.sum, + update_count: @update_count_by_table.values.sum + } + end + end +end diff --git a/app/services/loose_foreign_keys/batch_cleaner_service.rb b/app/services/loose_foreign_keys/batch_cleaner_service.rb new file mode 100644 index 0000000000000000000000000000000000000000..a623524b2b19f0d2ef4506fb6f132007c716777a --- /dev/null +++ b/app/services/loose_foreign_keys/batch_cleaner_service.rb @@ -0,0 +1,56 @@ +# frozen_string_literal: true + +module LooseForeignKeys + class BatchCleanerService + def initialize(parent_klass:, deleted_parent_records:, modification_tracker: LooseForeignKeys::ModificationTracker.new, models_by_table_name:) + @parent_klass = parent_klass + @deleted_parent_records = deleted_parent_records + @modification_tracker = modification_tracker + @models_by_table_name = models_by_table_name + end + + def execute + parent_klass.loose_foreign_key_definitions.each do |foreign_key_definition| + run_cleaner_service(foreign_key_definition, with_skip_locked: true) + break if modification_tracker.over_limit? + + run_cleaner_service(foreign_key_definition, with_skip_locked: false) + break if modification_tracker.over_limit? + end + + return if modification_tracker.over_limit? + + # At this point, all associations are cleaned up, we can update the status of the parent records + LooseForeignKeys::DeletedRecord + .mark_records_processed_for_table_between(deleted_parent_records.first.fully_qualified_table_name, deleted_parent_records.first, deleted_parent_records.last) + end + + private + + attr_reader :parent_klass, :deleted_parent_records, :modification_tracker, :models_by_table_name + + def record_result(cleaner, result) + if cleaner.async_delete? + modification_tracker.add_deletions(result[:table], result[:affected_rows]) + elsif cleaner.async_nullify? + modification_tracker.add_updates(result[:table], result[:affected_rows]) + end + end + + def run_cleaner_service(foreign_key_definition, with_skip_locked:) + cleaner = CleanerService.new( + model: models_by_table_name.fetch(foreign_key_definition.to_table), + foreign_key_definition: foreign_key_definition, + deleted_parent_records: deleted_parent_records, + with_skip_locked: with_skip_locked + ) + + loop do + result = cleaner.execute + record_result(cleaner, result) + + break if modification_tracker.over_limit? || result[:affected_rows] == 0 + end + end + end +end diff --git a/app/services/loose_foreign_keys/cleaner_service.rb b/app/services/loose_foreign_keys/cleaner_service.rb new file mode 100644 index 0000000000000000000000000000000000000000..8fe053e2edf903f5d348598e61ece0143178f36f --- /dev/null +++ b/app/services/loose_foreign_keys/cleaner_service.rb @@ -0,0 +1,99 @@ +# frozen_string_literal: true + +module LooseForeignKeys + # rubocop: disable CodeReuse/ActiveRecord + class CleanerService + DELETE_LIMIT = 1000 + UPDATE_LIMIT = 500 + + delegate :connection, to: :model + + def initialize(model:, foreign_key_definition:, deleted_parent_records:, with_skip_locked: false) + @model = model + @foreign_key_definition = foreign_key_definition + @deleted_parent_records = deleted_parent_records + @with_skip_locked = with_skip_locked + end + + def execute + result = connection.execute(build_query) + + { affected_rows: result.cmd_tuples, table: foreign_key_definition.to_table } + end + + def async_delete? + foreign_key_definition.on_delete == :async_delete + end + + def async_nullify? + foreign_key_definition.on_delete == :async_nullify + end + + private + + attr_reader :model, :foreign_key_definition, :deleted_parent_records, :with_skip_locked + + def build_query + query = if async_delete? + delete_query + elsif async_nullify? + update_query + else + raise "Invalid on_delete argument: #{foreign_key_definition.on_delete}" + end + + unless query.include?(%{"#{foreign_key_definition.column}" IN (}) + raise("FATAL: foreign key condition is missing from the generated query: #{query}") + end + + query + end + + def arel_table + @arel_table ||= model.arel_table + end + + def primary_keys + @primary_keys ||= connection.primary_keys(model.table_name).map { |key| arel_table[key] } + end + + def quoted_table_name + @quoted_table_name ||= Arel.sql(connection.quote_table_name(model.table_name)) + end + + def delete_query + query = Arel::DeleteManager.new + query.from(quoted_table_name) + + add_in_query_with_limit(query, DELETE_LIMIT) + end + + def update_query + query = Arel::UpdateManager.new + query.table(quoted_table_name) + query.set([[arel_table[foreign_key_definition.column], nil]]) + + add_in_query_with_limit(query, UPDATE_LIMIT) + end + + # IN query with one or composite primary key + # WHERE (primary_key1, primary_key2) IN (subselect) + def add_in_query_with_limit(query, limit) + columns = Arel::Nodes::Grouping.new(primary_keys) + query.where(columns.in(in_query_with_limit(limit))).to_sql + end + + # Builds the following sub-query + # SELECT primary_keys FROM table WHERE foreign_key IN (1, 2, 3) LIMIT N + def in_query_with_limit(limit) + in_query = Arel::SelectManager.new + in_query.from(quoted_table_name) + in_query.where(arel_table[foreign_key_definition.column].in(deleted_parent_records.map(&:primary_key_value))) + in_query.projections = primary_keys + in_query.take(limit) + in_query.lock(Arel.sql('FOR UPDATE SKIP LOCKED')) if with_skip_locked + in_query + end + end + # rubocop: enable CodeReuse/ActiveRecord +end diff --git a/app/services/loose_foreign_keys/process_deleted_records_service.rb b/app/services/loose_foreign_keys/process_deleted_records_service.rb new file mode 100644 index 0000000000000000000000000000000000000000..735fc8a2415f2d4ee81175899cf9014752908986 --- /dev/null +++ b/app/services/loose_foreign_keys/process_deleted_records_service.rb @@ -0,0 +1,74 @@ +# frozen_string_literal: true + +module LooseForeignKeys + class ProcessDeletedRecordsService + BATCH_SIZE = 1000 + + def initialize(connection:) + @connection = connection + end + + def execute + modification_tracker = ModificationTracker.new + + tracked_tables.cycle do |table| + records = load_batch_for_table(table) + + if records.empty? + tracked_tables.delete(table) + next + end + + break if modification_tracker.over_limit? + + model = find_parent_model!(table) + + LooseForeignKeys::BatchCleanerService + .new(parent_klass: model, + deleted_parent_records: records, + modification_tracker: modification_tracker, + models_by_table_name: models_by_table_name) + .execute + + break if modification_tracker.over_limit? + end + + modification_tracker.stats + end + + private + + attr_reader :connection + + def load_batch_for_table(table) + fully_qualified_table_name = "#{current_schema}.#{table}" + LooseForeignKeys::DeletedRecord.load_batch_for_table(fully_qualified_table_name, BATCH_SIZE) + end + + def find_parent_model!(table) + models_by_table_name.fetch(table) + end + + def current_schema + @current_schema = connection.current_schema + end + + def tracked_tables + @tracked_tables ||= models_by_table_name + .select { |table_name, model| model.respond_to?(:loose_foreign_key_definitions) } + .keys + end + + def models_by_table_name + @models_by_table_name ||= begin + all_models + .select(&:base_class?) + .index_by(&:table_name) + end + end + + def all_models + ApplicationRecord.descendants + end + end +end diff --git a/app/workers/all_queues.yml b/app/workers/all_queues.yml index 2bb961bb8c65717a081996d6591882a97d339ebd..2f33d154d99f568f3c3f39de41881db75088e752 100644 --- a/app/workers/all_queues.yml +++ b/app/workers/all_queues.yml @@ -363,6 +363,15 @@ :weight: 1 :idempotent: :tags: [] +- :name: cronjob:loose_foreign_keys_cleanup + :worker_name: LooseForeignKeys::CleanupWorker + :feature_category: :sharding + :has_external_dependencies: + :urgency: :low + :resource_boundary: :unknown + :weight: 1 + :idempotent: true + :tags: [] - :name: cronjob:member_invitation_reminder_emails :worker_name: MemberInvitationReminderEmailsWorker :feature_category: :subgroups diff --git a/app/workers/loose_foreign_keys/cleanup_worker.rb b/app/workers/loose_foreign_keys/cleanup_worker.rb new file mode 100644 index 0000000000000000000000000000000000000000..4df2d26ed16fe0f7971cef7b3efba99a7bddc18a --- /dev/null +++ b/app/workers/loose_foreign_keys/cleanup_worker.rb @@ -0,0 +1,24 @@ +# frozen_string_literal: true + +module LooseForeignKeys + class CleanupWorker + include ApplicationWorker + include Gitlab::ExclusiveLeaseHelpers + include CronjobQueue # rubocop: disable Scalability/CronWorkerContext + + feature_category :sharding + data_consistency :always + idempotent! + + def perform + return if Feature.disabled?(:loose_foreign_key_cleanup, default_enabled: :yaml) + + in_lock(self.class.name.underscore, ttl: 1.hour, retries: 0) do + # TODO: Iterate over the connections + # https://gitlab.com/gitlab-org/gitlab/-/issues/341513 + stats = ProcessDeletedRecordsService.new(connection: ApplicationRecord.connection).execute + log_extra_metadata_on_done(:stats, stats) + end + end + end +end diff --git a/config/feature_flags/development/loose_foreign_key_cleanup.yml b/config/feature_flags/development/loose_foreign_key_cleanup.yml new file mode 100644 index 0000000000000000000000000000000000000000..a5ed9192a190596a33a0e9eee57dd3cbe4cc91fb --- /dev/null +++ b/config/feature_flags/development/loose_foreign_key_cleanup.yml @@ -0,0 +1,8 @@ +--- +name: loose_foreign_key_cleanup +introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/69165 +rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/343545 +milestone: '14.4' +type: development +group: group::sharding +default_enabled: false diff --git a/config/initializers/1_settings.rb b/config/initializers/1_settings.rb index 0e4e6f5cc844f9bd53b435890c2fa0321f4d929c..b871aa92c7f383e1da2acf4c26f7baaf1bd72ebe 100644 --- a/config/initializers/1_settings.rb +++ b/config/initializers/1_settings.rb @@ -714,6 +714,9 @@ Settings.cron_jobs['app_sec_dast_profile_schedule_worker'] ||= Settingslogic.new({}) Settings.cron_jobs['app_sec_dast_profile_schedule_worker']['cron'] ||= '7-59/15 * * * *' Settings.cron_jobs['app_sec_dast_profile_schedule_worker']['job_class'] = 'AppSec::Dast::ProfileScheduleWorker' + Settings.cron_jobs['loose_foreign_keys_cleanup_worker'] ||= Settingslogic.new({}) + Settings.cron_jobs['loose_foreign_keys_cleanup_worker']['cron'] ||= '*/5 * * * *' + Settings.cron_jobs['loose_foreign_keys_cleanup_worker']['job_class'] = 'LooseForeignKeys::CleanupWorker' end # diff --git a/db/post_migrate/20211011104843_add_new_loose_fk_index.rb b/db/post_migrate/20211011104843_add_new_loose_fk_index.rb new file mode 100644 index 0000000000000000000000000000000000000000..710d0917d7fbff1a761791e898ebc0da5e0b4b00 --- /dev/null +++ b/db/post_migrate/20211011104843_add_new_loose_fk_index.rb @@ -0,0 +1,20 @@ +# frozen_string_literal: true + +class AddNewLooseFkIndex < Gitlab::Database::Migration[1.0] + include Gitlab::Database::PartitioningMigrationHelpers + + disable_ddl_transaction! + + INDEX_NAME = 'index_loose_foreign_keys_deleted_records_for_loading_records' + + def up + add_concurrent_partitioned_index :loose_foreign_keys_deleted_records, + %I[fully_qualified_table_name id primary_key_value partition], + where: 'status = 1', + name: INDEX_NAME + end + + def down + remove_concurrent_partitioned_index_by_name :loose_foreign_keys_deleted_records, INDEX_NAME + end +end diff --git a/db/schema_migrations/20211011104843 b/db/schema_migrations/20211011104843 new file mode 100644 index 0000000000000000000000000000000000000000..78789b94ece570d113dec1469c0d4648532e94f2 --- /dev/null +++ b/db/schema_migrations/20211011104843 @@ -0,0 +1 @@ +e2812344b16cd51c544235bae8a365713ab7e34652c2c05511a7fe9d84c05fb1 \ No newline at end of file diff --git a/db/structure.sql b/db/structure.sql index fbadeae0377fbec2114f4bd43d6f522813e9f9a2..045f800f8f763a09adb7540130f7715475ca70e4 100644 --- a/db/structure.sql +++ b/db/structure.sql @@ -23947,6 +23947,10 @@ ALTER TABLE ONLY zentao_tracker_data ALTER TABLE ONLY zoom_meetings ADD CONSTRAINT zoom_meetings_pkey PRIMARY KEY (id); +CREATE INDEX index_loose_foreign_keys_deleted_records_for_loading_records ON ONLY loose_foreign_keys_deleted_records USING btree (fully_qualified_table_name, id, primary_key_value, partition) WHERE (status = 1); + +CREATE INDEX index_8be8640437 ON gitlab_partitions_static.loose_foreign_keys_deleted_records_1 USING btree (fully_qualified_table_name, id, primary_key_value, partition) WHERE (status = 1); + CREATE INDEX index_product_analytics_events_experimental_project_and_time ON ONLY product_analytics_events_experimental USING btree (project_id, collector_tstamp); CREATE INDEX product_analytics_events_expe_project_id_collector_tstamp_idx10 ON gitlab_partitions_static.product_analytics_events_experimental_10 USING btree (project_id, collector_tstamp); @@ -27249,6 +27253,8 @@ ALTER INDEX analytics_cycle_analytics_merge_request_stage_events_pkey ATTACH PAR ALTER INDEX analytics_cycle_analytics_merge_request_stage_events_pkey ATTACH PARTITION gitlab_partitions_static.analytics_cycle_analytics_merge_request_stage_events_31_pkey; +ALTER INDEX index_loose_foreign_keys_deleted_records_for_loading_records ATTACH PARTITION gitlab_partitions_static.index_8be8640437; + ALTER INDEX loose_foreign_keys_deleted_records_pkey ATTACH PARTITION gitlab_partitions_static.loose_foreign_keys_deleted_records_1_pkey; ALTER INDEX index_product_analytics_events_experimental_project_and_time ATTACH PARTITION gitlab_partitions_static.product_analytics_events_expe_project_id_collector_tstamp_idx10; diff --git a/spec/models/concerns/loose_foreign_key_spec.rb b/spec/models/concerns/loose_foreign_key_spec.rb index ce5e33261a9449ab4152617b93726e4e2013a177..42da69eb75eadbb719fd829351f661695a81e878 100644 --- a/spec/models/concerns/loose_foreign_key_spec.rb +++ b/spec/models/concerns/loose_foreign_key_spec.rb @@ -9,8 +9,8 @@ self.table_name = 'projects' - loose_foreign_key :issues, :project_id, on_delete: :async_delete, gitlab_schema: :gitlab_main - loose_foreign_key 'merge_requests', 'project_id', 'on_delete' => 'async_nullify', 'gitlab_schema' => :gitlab_main + loose_foreign_key :issues, :project_id, on_delete: :async_delete + loose_foreign_key 'merge_requests', 'project_id', 'on_delete' => 'async_nullify' end end @@ -28,7 +28,6 @@ expect(definition.to_table).to eq('merge_requests') expect(definition.column).to eq('project_id') expect(definition.on_delete).to eq(:async_nullify) - expect(definition.options[:gitlab_schema]).to eq(:gitlab_main) end context 'validation' do @@ -39,9 +38,9 @@ self.table_name = 'projects' - loose_foreign_key :issues, :project_id, on_delete: :async_delete, gitlab_schema: :gitlab_main - loose_foreign_key :merge_requests, :project_id, on_delete: :async_nullify, gitlab_schema: :gitlab_main - loose_foreign_key :merge_requests, :project_id, on_delete: :destroy, gitlab_schema: :gitlab_main + loose_foreign_key :issues, :project_id, on_delete: :async_delete + loose_foreign_key :merge_requests, :project_id, on_delete: :async_nullify + loose_foreign_key :merge_requests, :project_id, on_delete: :destroy end end @@ -50,28 +49,12 @@ end end - context 'gitlab_schema validation' do - let(:invalid_class) do - Class.new(ApplicationRecord) do - include LooseForeignKey - - self.table_name = 'projects' - - loose_foreign_key :merge_requests, :project_id, on_delete: :async_nullify, gitlab_schema: :unknown - end - end - - it 'raises error when invalid `gitlab_schema` option was given' do - expect { invalid_class }.to raise_error /Invalid gitlab_schema option given: unknown/ - end - end - context 'inheritance validation' do let(:inherited_project_class) do Class.new(Project) do include LooseForeignKey - loose_foreign_key :issues, :project_id, on_delete: :async_delete, gitlab_schema: :gitlab_main + loose_foreign_key :issues, :project_id, on_delete: :async_delete end end diff --git a/spec/models/loose_foreign_keys/deleted_record_spec.rb b/spec/models/loose_foreign_keys/deleted_record_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..8494cd91c6efb009da771cab282956fbe3ceb4b7 --- /dev/null +++ b/spec/models/loose_foreign_keys/deleted_record_spec.rb @@ -0,0 +1,52 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe LooseForeignKeys::DeletedRecord, type: :model do + let_it_be(:table) { 'public.projects' } + + let_it_be(:deleted_record_1) { described_class.create!(partition: 1, fully_qualified_table_name: table, primary_key_value: 5) } + let_it_be(:deleted_record_2) { described_class.create!(partition: 1, fully_qualified_table_name: table, primary_key_value: 1) } + let_it_be(:deleted_record_3) { described_class.create!(partition: 1, fully_qualified_table_name: 'public.other_table', primary_key_value: 3) } + let_it_be(:deleted_record_4) { described_class.create!(partition: 1, fully_qualified_table_name: table, primary_key_value: 1) } # duplicate + + describe '.load_batch_for_table' do + it 'loads records and orders them by creation date' do + records = described_class.load_batch_for_table(table, 10) + + expect(records).to eq([deleted_record_1, deleted_record_2, deleted_record_4]) + end + + it 'supports configurable batch size' do + records = described_class.load_batch_for_table(table, 2) + + expect(records).to eq([deleted_record_1, deleted_record_2]) + end + end + + describe '.mark_records_processed_for_table_between' do + it 'marks processed exactly one record' do + described_class.mark_records_processed_for_table_between(table, deleted_record_2, deleted_record_2) + + expect(described_class.status_pending.count).to eq(3) + expect(described_class.status_processed.count).to eq(1) + + processed_record = described_class.status_processed.first + expect(processed_record).to eq(deleted_record_2) + end + + it 'deletes two records' do + described_class.mark_records_processed_for_table_between(table, deleted_record_2, deleted_record_4) + + expect(described_class.status_pending.count).to eq(2) + expect(described_class.status_processed.count).to eq(2) + end + + it 'deletes all records' do + described_class.mark_records_processed_for_table_between(table, deleted_record_1, deleted_record_4) + + expect(described_class.status_pending.count).to eq(1) + expect(described_class.status_processed.count).to eq(3) + end + end +end diff --git a/spec/models/loose_foreign_keys/modification_tracker_spec.rb b/spec/models/loose_foreign_keys/modification_tracker_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..13e5f920d05d46629456d694c29d41b9b834fe08 --- /dev/null +++ b/spec/models/loose_foreign_keys/modification_tracker_spec.rb @@ -0,0 +1,73 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe LooseForeignKeys::ModificationTracker do + subject(:tracker) { described_class.new } + + describe '#over_limit?' do + it 'is true when deletion MAX_DELETES is exceeded' do + stub_const('LooseForeignKeys::ModificationTracker::MAX_DELETES', 5) + + tracker.add_deletions('issues', 10) + expect(tracker).to be_over_limit + end + + it 'is false when MAX_DELETES is not exceeded' do + tracker.add_deletions('issues', 3) + + expect(tracker).not_to be_over_limit + end + + it 'is true when deletion MAX_UPDATES is exceeded' do + stub_const('LooseForeignKeys::ModificationTracker::MAX_UPDATES', 5) + + tracker.add_updates('issues', 3) + tracker.add_updates('issues', 4) + + expect(tracker).to be_over_limit + end + + it 'is false when MAX_UPDATES is not exceeded' do + tracker.add_updates('projects', 3) + + expect(tracker).not_to be_over_limit + end + + it 'is true when max runtime is exceeded' do + monotonic_time_before = 1 # this will be the start time + monotonic_time_after = described_class::MAX_RUNTIME.to_i + 1 # this will be returned when over_limit? is called + + allow(Gitlab::Metrics::System).to receive(:monotonic_time).and_return(monotonic_time_before, monotonic_time_after) + + tracker + + expect(tracker).to be_over_limit + end + + it 'is false when max runtime is not exceeded' do + expect(tracker).not_to be_over_limit + end + end + + describe '#stats' do + it 'exposes stats' do + freeze_time do + tracker + tracker.add_deletions('issues', 5) + tracker.add_deletions('issues', 2) + tracker.add_deletions('projects', 2) + + tracker.add_updates('projects', 3) + + expect(tracker.stats).to eq({ + over_limit: false, + delete_count_by_table: { 'issues' => 7, 'projects' => 2 }, + update_count_by_table: { 'projects' => 3 }, + delete_count: 9, + update_count: 3 + }) + end + end + end +end diff --git a/spec/services/loose_foreign_keys/batch_cleaner_service_spec.rb b/spec/services/loose_foreign_keys/batch_cleaner_service_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..dcef419cc9a9f0438d03dcd8aaf68de255d4d4f6 --- /dev/null +++ b/spec/services/loose_foreign_keys/batch_cleaner_service_spec.rb @@ -0,0 +1,110 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe LooseForeignKeys::BatchCleanerService do + include MigrationsHelpers + + def create_table_structure + migration = ActiveRecord::Migration.new.extend(Gitlab::Database::MigrationHelpers::LooseForeignKeyHelpers) + + migration.create_table :loose_fk_parent_table + + migration.create_table :loose_fk_child_table_1 do |t| + t.bigint :parent_id + end + + migration.create_table :loose_fk_child_table_2 do |t| + t.bigint :parent_id_with_different_column + end + + migration.track_record_deletions(:loose_fk_parent_table) + end + + let(:parent_model) do + Class.new(ApplicationRecord) do + self.table_name = 'loose_fk_parent_table' + + include LooseForeignKey + + loose_foreign_key :loose_fk_child_table_1, :parent_id, on_delete: :async_delete + loose_foreign_key :loose_fk_child_table_2, :parent_id_with_different_column, on_delete: :async_nullify + end + end + + let(:child_model_1) do + Class.new(ApplicationRecord) do + self.table_name = 'loose_fk_child_table_1' + end + end + + let(:child_model_2) do + Class.new(ApplicationRecord) do + self.table_name = 'loose_fk_child_table_2' + end + end + + let(:loose_fk_child_table_1) { table(:loose_fk_child_table_1) } + let(:loose_fk_child_table_2) { table(:loose_fk_child_table_2) } + let(:parent_record_1) { parent_model.create! } + let(:other_parent_record) { parent_model.create! } + + before(:all) do + create_table_structure + end + + before do + parent_record_1 + + loose_fk_child_table_1.create!(parent_id: parent_record_1.id) + loose_fk_child_table_1.create!(parent_id: parent_record_1.id) + + # these will not be deleted + loose_fk_child_table_1.create!(parent_id: other_parent_record.id) + loose_fk_child_table_1.create!(parent_id: other_parent_record.id) + + loose_fk_child_table_2.create!(parent_id_with_different_column: parent_record_1.id) + loose_fk_child_table_2.create!(parent_id_with_different_column: parent_record_1.id) + + # these will not be deleted + loose_fk_child_table_2.create!(parent_id_with_different_column: other_parent_record.id) + loose_fk_child_table_2.create!(parent_id_with_different_column: other_parent_record.id) + end + + after(:all) do + migration = ActiveRecord::Migration.new + migration.drop_table :loose_fk_parent_table + migration.drop_table :loose_fk_child_table_1 + migration.drop_table :loose_fk_child_table_2 + end + + context 'when parent records are deleted' do + before do + parent_record_1.delete + + expect(loose_fk_child_table_1.count).to eq(4) + expect(loose_fk_child_table_2.count).to eq(4) + + described_class.new(parent_klass: parent_model, + deleted_parent_records: LooseForeignKeys::DeletedRecord.status_pending.all, + models_by_table_name: { + 'loose_fk_child_table_1' => child_model_1, + 'loose_fk_child_table_2' => child_model_2 + }).execute + end + + it 'cleans up the child records' do + expect(loose_fk_child_table_1.where(parent_id: parent_record_1.id)).to be_empty + expect(loose_fk_child_table_2.where(parent_id_with_different_column: nil).count).to eq(2) + end + + it 'cleans up the parent DeletedRecord' do + expect(LooseForeignKeys::DeletedRecord.status_pending.count).to eq(0) + end + + it 'does not delete unrelated records' do + expect(loose_fk_child_table_1.where(parent_id: other_parent_record.id).count).to eq(2) + expect(loose_fk_child_table_2.where(parent_id_with_different_column: other_parent_record.id).count).to eq(2) + end + end +end diff --git a/spec/services/loose_foreign_keys/cleaner_service_spec.rb b/spec/services/loose_foreign_keys/cleaner_service_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..6f37ac49435b33e25cd125d26a6c33f334c149bd --- /dev/null +++ b/spec/services/loose_foreign_keys/cleaner_service_spec.rb @@ -0,0 +1,147 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe LooseForeignKeys::CleanerService do + let(:schema) { ApplicationRecord.connection.current_schema } + let(:deleted_records) do + [ + LooseForeignKeys::DeletedRecord.new(fully_qualified_table_name: "#{schema}.projects", primary_key_value: non_existing_record_id), + LooseForeignKeys::DeletedRecord.new(fully_qualified_table_name: "#{schema}.projects", primary_key_value: non_existing_record_id) + ] + end + + let(:loose_fk_definition) do + ActiveRecord::ConnectionAdapters::ForeignKeyDefinition.new( + 'projects', + 'issues', + { + column: 'project_id', + on_delete: :async_nullify + } + ) + end + + subject(:cleaner_service) do + described_class.new( + model: Issue, + foreign_key_definition: loose_fk_definition, + deleted_parent_records: deleted_records + ) + end + + context 'when invalid foreign key definition is passed' do + context 'when invalid on_delete argument was given' do + before do + loose_fk_definition.options[:on_delete] = :invalid + end + + it 'raises KeyError' do + expect { cleaner_service.execute }.to raise_error(StandardError, /Invalid on_delete argument/) + end + end + end + + describe 'query generation' do + context 'when single primary key is used' do + let(:issue) { create(:issue) } + + let(:deleted_records) do + [ + LooseForeignKeys::DeletedRecord.new(fully_qualified_table_name: "#{schema}.projects", primary_key_value: issue.project_id) + ] + end + + it 'generates an IN query for nullifying the rows' do + expected_query = %{UPDATE "issues" SET "project_id" = NULL WHERE ("issues"."id") IN (SELECT "issues"."id" FROM "issues" WHERE "issues"."project_id" IN (#{issue.project_id}) LIMIT 500)} + expect(ApplicationRecord.connection).to receive(:execute).with(expected_query).and_call_original + + cleaner_service.execute + + issue.reload + expect(issue.project_id).to be_nil + end + + it 'generates an IN query for deleting the rows' do + loose_fk_definition.options[:on_delete] = :async_delete + + expected_query = %{DELETE FROM "issues" WHERE ("issues"."id") IN (SELECT "issues"."id" FROM "issues" WHERE "issues"."project_id" IN (#{issue.project_id}) LIMIT 1000)} + expect(ApplicationRecord.connection).to receive(:execute).with(expected_query).and_call_original + + cleaner_service.execute + + expect(Issue.exists?(id: issue.id)).to eq(false) + end + end + + context 'when composite primary key is used' do + let!(:user) { create(:user) } + let!(:project) { create(:project) } + + let(:loose_fk_definition) do + ActiveRecord::ConnectionAdapters::ForeignKeyDefinition.new( + 'users', + 'project_authorizations', + { + column: 'user_id', + on_delete: :async_delete + } + ) + end + + let(:deleted_records) do + [ + LooseForeignKeys::DeletedRecord.new(fully_qualified_table_name: "#{schema}.users", primary_key_value: user.id) + ] + end + + subject(:cleaner_service) do + described_class.new( + model: ProjectAuthorization, + foreign_key_definition: loose_fk_definition, + deleted_parent_records: deleted_records + ) + end + + before do + project.add_developer(user) + end + + it 'generates an IN query for deleting the rows' do + expected_query = %{DELETE FROM "project_authorizations" WHERE ("project_authorizations"."user_id", "project_authorizations"."project_id", "project_authorizations"."access_level") IN (SELECT "project_authorizations"."user_id", "project_authorizations"."project_id", "project_authorizations"."access_level" FROM "project_authorizations" WHERE "project_authorizations"."user_id" IN (#{user.id}) LIMIT 1000)} + expect(ApplicationRecord.connection).to receive(:execute).with(expected_query).and_call_original + + cleaner_service.execute + + expect(ProjectAuthorization.exists?(user_id: user.id)).to eq(false) + end + + context 'when the query generation is incorrect (paranoid check)' do + it 'raises error if the foreign key condition is missing' do + expect_next_instance_of(LooseForeignKeys::CleanerService) do |instance| + expect(instance).to receive(:delete_query).and_return('wrong query') + end + + expect { cleaner_service.execute }.to raise_error /FATAL: foreign key condition is missing from the generated query/ + end + end + end + + context 'when with_skip_locked parameter is true' do + subject(:cleaner_service) do + described_class.new( + model: Issue, + foreign_key_definition: loose_fk_definition, + deleted_parent_records: deleted_records, + with_skip_locked: true + ) + end + + it 'generates a query with the SKIP LOCKED clause' do + expect(ApplicationRecord.connection).to receive(:execute).with(/FOR UPDATE SKIP LOCKED/).and_call_original + + cleaner_service.execute + end + end + end +end diff --git a/spec/workers/loose_foreign_keys/cleanup_worker_spec.rb b/spec/workers/loose_foreign_keys/cleanup_worker_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..574534a5d4719b5cae868ebf84501f915410a970 --- /dev/null +++ b/spec/workers/loose_foreign_keys/cleanup_worker_spec.rb @@ -0,0 +1,153 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe LooseForeignKeys::CleanupWorker do + include MigrationsHelpers + + def create_table_structure + migration = ActiveRecord::Migration.new.extend(Gitlab::Database::MigrationHelpers::LooseForeignKeyHelpers) + + migration.create_table :loose_fk_parent_table_1 + migration.create_table :loose_fk_parent_table_2 + + migration.create_table :loose_fk_child_table_1_1 do |t| + t.bigint :parent_id + end + + migration.create_table :loose_fk_child_table_1_2 do |t| + t.bigint :parent_id_with_different_column + end + + migration.create_table :loose_fk_child_table_2_1 do |t| + t.bigint :parent_id + end + + migration.track_record_deletions(:loose_fk_parent_table_1) + migration.track_record_deletions(:loose_fk_parent_table_2) + end + + let!(:parent_model_1) do + Class.new(ApplicationRecord) do + self.table_name = 'loose_fk_parent_table_1' + + include LooseForeignKey + + loose_foreign_key :loose_fk_child_table_1_1, :parent_id, on_delete: :async_delete + loose_foreign_key :loose_fk_child_table_1_2, :parent_id_with_different_column, on_delete: :async_nullify + end + end + + let!(:parent_model_2) do + Class.new(ApplicationRecord) do + self.table_name = 'loose_fk_parent_table_2' + + include LooseForeignKey + + loose_foreign_key :loose_fk_child_table_2_1, :parent_id, on_delete: :async_delete + end + end + + let!(:child_model_1) do + Class.new(ApplicationRecord) do + self.table_name = 'loose_fk_child_table_1_1' + end + end + + let!(:child_model_2) do + Class.new(ApplicationRecord) do + self.table_name = 'loose_fk_child_table_1_2' + end + end + + let!(:child_model_3) do + Class.new(ApplicationRecord) do + self.table_name = 'loose_fk_child_table_2_1' + end + end + + let(:loose_fk_parent_table_1) { table(:loose_fk_parent_table_1) } + let(:loose_fk_parent_table_2) { table(:loose_fk_parent_table_2) } + let(:loose_fk_child_table_1_1) { table(:loose_fk_child_table_1_1) } + let(:loose_fk_child_table_1_2) { table(:loose_fk_child_table_1_2) } + let(:loose_fk_child_table_2_1) { table(:loose_fk_child_table_2_1) } + + before(:all) do + create_table_structure + end + + after(:all) do + migration = ActiveRecord::Migration.new + + migration.drop_table :loose_fk_parent_table_1 + migration.drop_table :loose_fk_parent_table_2 + migration.drop_table :loose_fk_child_table_1_1 + migration.drop_table :loose_fk_child_table_1_2 + migration.drop_table :loose_fk_child_table_2_1 + end + + before do + parent_record_1 = loose_fk_parent_table_1.create! + loose_fk_child_table_1_1.create!(parent_id: parent_record_1.id) + loose_fk_child_table_1_2.create!(parent_id_with_different_column: parent_record_1.id) + + parent_record_2 = loose_fk_parent_table_1.create! + 2.times { loose_fk_child_table_1_1.create!(parent_id: parent_record_2.id) } + 3.times { loose_fk_child_table_1_2.create!(parent_id_with_different_column: parent_record_2.id) } + + parent_record_3 = loose_fk_parent_table_2.create! + 5.times { loose_fk_child_table_2_1.create!(parent_id: parent_record_3.id) } + + parent_model_1.delete_all + parent_model_2.delete_all + end + + it 'cleans up all rows' do + described_class.new.perform + + expect(loose_fk_child_table_1_1.count).to eq(0) + expect(loose_fk_child_table_1_2.where(parent_id_with_different_column: nil).count).to eq(4) + expect(loose_fk_child_table_2_1.count).to eq(0) + end + + context 'when deleting in batches' do + before do + stub_const('LooseForeignKeys::CleanupWorker::BATCH_SIZE', 2) + end + + it 'cleans up all rows' do + expect(LooseForeignKeys::BatchCleanerService).to receive(:new).exactly(:twice).and_call_original + + described_class.new.perform + + expect(loose_fk_child_table_1_1.count).to eq(0) + expect(loose_fk_child_table_1_2.where(parent_id_with_different_column: nil).count).to eq(4) + expect(loose_fk_child_table_2_1.count).to eq(0) + end + end + + context 'when the deleted rows count limit have been reached' do + def count_deletable_rows + loose_fk_child_table_1_1.count + loose_fk_child_table_2_1.count + end + + before do + stub_const('LooseForeignKeys::ModificationTracker::MAX_DELETES', 2) + stub_const('LooseForeignKeys::CleanerService::DELETE_LIMIT', 1) + end + + it 'cleans up 2 rows' do + expect { described_class.new.perform }.to change { count_deletable_rows }.by(-2) + end + end + + context 'when the loose_foreign_key_cleanup feature flag is off' do + before do + stub_feature_flags(loose_foreign_key_cleanup: false) + end + + it 'does nothing' do + expect { described_class.new.perform }.not_to change { LooseForeignKeys::DeletedRecord.status_processed.count } + end + end +end