diff --git a/app/services/database/consistency_check_service.rb b/app/services/database/consistency_check_service.rb new file mode 100644 index 0000000000000000000000000000000000000000..e39bc8f25b87aed80654f91ee137aad4cc04a6d9 --- /dev/null +++ b/app/services/database/consistency_check_service.rb @@ -0,0 +1,109 @@ +# frozen_string_literal: true + +module Database + class ConsistencyCheckService + CURSOR_REDIS_KEY_TTL = 7.days + EMPTY_RESULT = { matches: 0, mismatches: 0, batches: 0, mismatches_details: [] }.freeze + + def initialize(source_model:, target_model:, source_columns:, target_columns:) + @source_model = source_model + @target_model = target_model + @source_columns = source_columns + @target_columns = target_columns + @source_sort_column = source_columns.first + @target_sort_column = target_columns.first + end + + # This class takes two ActiveRecord models, and compares the selected columns + # of the two models tables, for the purposes of checking the consistency of + # mirroring of tables. For example Namespace and Ci::NamepaceMirror + # + # It compares up to 25 batches (1000 records / batch), or up to 30 seconds + # for all the batches in total. + # + # It saves the cursor of the next start_id (cusror) in Redis. If the start_id + # wasn't saved in Redis, for example, in the first run, it will choose some random start_id + # + # Example: + # service = Database::ConsistencyCheckService.new( + # source_model: Namespace, + # target_model: Ci::NamespaceMirror, + # source_columns: %w[id traversal_ids], + # target_columns: %w[namespace_id traversal_ids], + # ) + # result = service.execute + # + # result is a hash that has the following fields: + # - batches: Number of batches checked + # - matches: The number of matched records + # - mismatches: The number of mismatched records + # - mismatches_details: It's an array that contains details about the mismatched records. + # each record in this array is a hash of format {id: ID, source_table: [...], target_table: [...]} + # Each record represents the attributes of the records in the two tables. + # - start_id: The start id cursor of the current batch. means no records. + # - next_start_id: The ID that can be used for the next batch iteration check. means no records + def execute + start_id = next_start_id + + return EMPTY_RESULT if start_id.nil? + + result = consistency_checker.execute(start_id: start_id) + result[:start_id] = start_id + + save_next_start_id(result[:next_start_id]) + + result + end + + private + + attr_reader :source_model, :target_model, :source_columns, :target_columns, :source_sort_column, :target_sort_column + + def consistency_checker + @consistency_checker ||= Gitlab::Database::ConsistencyChecker.new( + source_model: source_model, + target_model: target_model, + source_columns: source_columns, + target_columns: target_columns + ) + end + + def next_start_id + return if min_id.nil? + + fetch_next_start_id || random_start_id + end + + # rubocop: disable CodeReuse/ActiveRecord + def min_id + @min_id ||= source_model.minimum(source_sort_column) + end + + def max_id + @max_id ||= source_model.minimum(source_sort_column) + end + # rubocop: enable CodeReuse/ActiveRecord + + def fetch_next_start_id + Gitlab::Redis::SharedState.with { |redis| redis.get(cursor_redis_shared_state_key)&.to_i } + end + + # This returns some random start_id, so that we don't always start checking + # from the start of the table, in case we lose the cursor in Redis. + def random_start_id + range_start = min_id + range_end = [min_id, max_id - Gitlab::Database::ConsistencyChecker::BATCH_SIZE].max + rand(range_start..range_end) + end + + def save_next_start_id(start_id) + Gitlab::Redis::SharedState.with do |redis| + redis.set(cursor_redis_shared_state_key, start_id, ex: CURSOR_REDIS_KEY_TTL) + end + end + + def cursor_redis_shared_state_key + "consistency_check_cursor:#{source_model.table_name}:#{target_model.table_name}" + end + end +end diff --git a/app/workers/all_queues.yml b/app/workers/all_queues.yml index 86d36fd7f79992369813d00403f308e1e6250b71..5e0de3376d7d5784abb198f7f383063b7d6d2645 100644 --- a/app/workers/all_queues.yml +++ b/app/workers/all_queues.yml @@ -327,6 +327,24 @@ :weight: 1 :idempotent: true :tags: [] +- :name: cronjob:database_ci_namespace_mirrors_consistency_check + :worker_name: Database::CiNamespaceMirrorsConsistencyCheckWorker + :feature_category: :sharding + :has_external_dependencies: + :urgency: :low + :resource_boundary: :unknown + :weight: 1 + :idempotent: true + :tags: [] +- :name: cronjob:database_ci_project_mirrors_consistency_check + :worker_name: Database::CiProjectMirrorsConsistencyCheckWorker + :feature_category: :sharding + :has_external_dependencies: + :urgency: :low + :resource_boundary: :unknown + :weight: 1 + :idempotent: true + :tags: [] - :name: cronjob:database_drop_detached_partitions :worker_name: Database::DropDetachedPartitionsWorker :feature_category: :database diff --git a/app/workers/database/ci_namespace_mirrors_consistency_check_worker.rb b/app/workers/database/ci_namespace_mirrors_consistency_check_worker.rb new file mode 100644 index 0000000000000000000000000000000000000000..2b4253947acd007e451ed62c3c82271c612dc7d9 --- /dev/null +++ b/app/workers/database/ci_namespace_mirrors_consistency_check_worker.rb @@ -0,0 +1,28 @@ +# frozen_string_literal: true + +module Database + class CiNamespaceMirrorsConsistencyCheckWorker + include ApplicationWorker + include CronjobQueue # rubocop: disable Scalability/CronWorkerContext + + sidekiq_options retry: false + feature_category :sharding + data_consistency :sticky + idempotent! + + version 1 + + def perform + return if Feature.disabled?(:ci_namespace_mirrors_consistency_check, default_enabled: :yaml) + + results = ConsistencyCheckService.new( + source_model: Namespace, + target_model: Ci::NamespaceMirror, + source_columns: %w[id traversal_ids], + target_columns: %w[namespace_id traversal_ids] + ).execute + + log_extra_metadata_on_done(:results, results) + end + end +end diff --git a/app/workers/database/ci_project_mirrors_consistency_check_worker.rb b/app/workers/database/ci_project_mirrors_consistency_check_worker.rb new file mode 100644 index 0000000000000000000000000000000000000000..e9413256617dc168af4c942721de5567681cde5f --- /dev/null +++ b/app/workers/database/ci_project_mirrors_consistency_check_worker.rb @@ -0,0 +1,28 @@ +# frozen_string_literal: true + +module Database + class CiProjectMirrorsConsistencyCheckWorker + include ApplicationWorker + include CronjobQueue # rubocop: disable Scalability/CronWorkerContext + + sidekiq_options retry: false + feature_category :sharding + data_consistency :sticky + idempotent! + + version 1 + + def perform + return if Feature.disabled?(:ci_project_mirrors_consistency_check, default_enabled: :yaml) + + results = ConsistencyCheckService.new( + source_model: Project, + target_model: Ci::ProjectMirror, + source_columns: %w[id namespace_id], + target_columns: %w[project_id namespace_id] + ).execute + + log_extra_metadata_on_done(:results, results) + end + end +end diff --git a/config/feature_flags/development/ci_namespace_mirrors_consistency_check.yml b/config/feature_flags/development/ci_namespace_mirrors_consistency_check.yml new file mode 100644 index 0000000000000000000000000000000000000000..965fb904a155f0be87d99dafaf347ae3be4251f7 --- /dev/null +++ b/config/feature_flags/development/ci_namespace_mirrors_consistency_check.yml @@ -0,0 +1,8 @@ +--- +name: ci_namespace_mirrors_consistency_check +introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/81836 +rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/356577 +milestone: '14.10' +type: development +group: group::sharding +default_enabled: false diff --git a/config/feature_flags/development/ci_project_mirrors_consistency_check.yml b/config/feature_flags/development/ci_project_mirrors_consistency_check.yml new file mode 100644 index 0000000000000000000000000000000000000000..9ba2865216dc0f93b11956e36cd6338cfe8c5206 --- /dev/null +++ b/config/feature_flags/development/ci_project_mirrors_consistency_check.yml @@ -0,0 +1,8 @@ +--- +name: ci_project_mirrors_consistency_check +introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/81836 +rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/356583 +milestone: '14.10' +type: development +group: group::sharding +default_enabled: false diff --git a/config/initializers/1_settings.rb b/config/initializers/1_settings.rb index 3c1bc98c34ad2b4e6d29d511e36ea22cc7cbab59..20983134889dd2c87a0c8bdb69498be9a6e2077d 100644 --- a/config/initializers/1_settings.rb +++ b/config/initializers/1_settings.rb @@ -763,6 +763,12 @@ Settings.cron_jobs['loose_foreign_keys_cleanup_worker'] ||= Settingslogic.new({}) Settings.cron_jobs['loose_foreign_keys_cleanup_worker']['cron'] ||= '*/1 * * * *' Settings.cron_jobs['loose_foreign_keys_cleanup_worker']['job_class'] = 'LooseForeignKeys::CleanupWorker' + Settings.cron_jobs['ci_namespace_mirrors_consistency_check_worker'] ||= Settingslogic.new({}) + Settings.cron_jobs['ci_namespace_mirrors_consistency_check_worker']['cron'] ||= '*/4 * * * *' + Settings.cron_jobs['ci_namespace_mirrors_consistency_check_worker']['job_class'] = 'Database::CiNamespaceMirrorsConsistencyCheckWorker' + Settings.cron_jobs['ci_project_mirrors_consistency_check_worker'] ||= Settingslogic.new({}) + Settings.cron_jobs['ci_project_mirrors_consistency_check_worker']['cron'] ||= '2-58/4 * * * *' + Settings.cron_jobs['ci_project_mirrors_consistency_check_worker']['job_class'] = 'Database::CiProjectMirrorsConsistencyCheckWorker' end # diff --git a/lib/gitlab/database/consistency_checker.rb b/lib/gitlab/database/consistency_checker.rb new file mode 100644 index 0000000000000000000000000000000000000000..e398fef744cc736f0e29c95c94bfff20e02d7120 --- /dev/null +++ b/lib/gitlab/database/consistency_checker.rb @@ -0,0 +1,122 @@ +# frozen_string_literal: true + +module Gitlab + module Database + class ConsistencyChecker + BATCH_SIZE = 1000 + MAX_BATCHES = 25 + MAX_RUNTIME = 30.seconds # must be less than the scheduling frequency of the ConsistencyCheck jobs + + delegate :monotonic_time, to: :'Gitlab::Metrics::System' + + def initialize(source_model:, target_model:, source_columns:, target_columns:) + @source_model = source_model + @target_model = target_model + @source_columns = source_columns + @target_columns = target_columns + @source_sort_column = source_columns.first + @target_sort_column = target_columns.first + @result = { matches: 0, mismatches: 0, batches: 0, mismatches_details: [] } + end + + # rubocop:disable Metrics/AbcSize + def execute(start_id:) + current_start_id = start_id + + return build_result(next_start_id: nil) if max_id.nil? + return build_result(next_start_id: min_id) if current_start_id > max_id + + @start_time = monotonic_time + + MAX_BATCHES.times do + if (current_start_id <= max_id) && !over_time_limit? + ids_range = current_start_id...(current_start_id + BATCH_SIZE) + # rubocop: disable CodeReuse/ActiveRecord + source_data = source_model.where(source_sort_column => ids_range) + .order(source_sort_column => :asc).pluck(*source_columns) + target_data = target_model.where(target_sort_column => ids_range) + .order(target_sort_column => :asc).pluck(*target_columns) + # rubocop: enable CodeReuse/ActiveRecord + + current_start_id += BATCH_SIZE + result[:matches] += append_mismatches_details(source_data, target_data) + result[:batches] += 1 + else + break + end + end + + result[:mismatches] = result[:mismatches_details].length + metrics_counter.increment({ source_table: source_model.table_name, result: "match" }, result[:matches]) + metrics_counter.increment({ source_table: source_model.table_name, result: "mismatch" }, result[:mismatches]) + + build_result(next_start_id: current_start_id > max_id ? min_id : current_start_id) + end + # rubocop:enable Metrics/AbcSize + + private + + attr_reader :source_model, :target_model, :source_columns, :target_columns, + :source_sort_column, :target_sort_column, :start_time, :result + + def build_result(next_start_id:) + { next_start_id: next_start_id }.merge(result) + end + + def over_time_limit? + (monotonic_time - start_time) >= MAX_RUNTIME + end + + # This where comparing the items happen, and building the diff log + # It returns the number of matching elements + def append_mismatches_details(source_data, target_data) + # Mapping difference the sort key to the item values + # source - target + source_diff_hash = (source_data - target_data).index_by { |item| item.shift } + # target - source + target_diff_hash = (target_data - source_data).index_by { |item| item.shift } + + matches = source_data.length - source_diff_hash.length + + # Items that exist in the first table + Different items + source_diff_hash.each do |id, values| + result[:mismatches_details] << { + id: id, + source_table: values, + target_table: target_diff_hash[id] + } + end + + # Only the items that exist in the target table + target_diff_hash.each do |id, values| + next if source_diff_hash[id] # It's already added + + result[:mismatches_details] << { + id: id, + source_table: source_diff_hash[id], + target_table: values + } + end + + matches + end + + # rubocop: disable CodeReuse/ActiveRecord + def min_id + @min_id ||= source_model.minimum(source_sort_column) + end + + def max_id + @max_id ||= source_model.maximum(source_sort_column) + end + # rubocop: enable CodeReuse/ActiveRecord + + def metrics_counter + @metrics_counter ||= Gitlab::Metrics.counter( + :consistency_checks, + "Consistency Check Results" + ) + end + end + end +end diff --git a/spec/lib/gitlab/database/consistency_checker_spec.rb b/spec/lib/gitlab/database/consistency_checker_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..2ff79d207864817b43615006ce425c7a8b5fa3cb --- /dev/null +++ b/spec/lib/gitlab/database/consistency_checker_spec.rb @@ -0,0 +1,189 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::Database::ConsistencyChecker do + let(:batch_size) { 10 } + let(:max_batches) { 4 } + let(:max_runtime) { described_class::MAX_RUNTIME } + let(:metrics_counter) { Gitlab::Metrics.registry.get(:consistency_checks) } + + subject(:consistency_checker) do + described_class.new( + source_model: Namespace, + target_model: Ci::NamespaceMirror, + source_columns: %w[id traversal_ids], + target_columns: %w[namespace_id traversal_ids] + ) + end + + before do + stub_const("#{described_class.name}::BATCH_SIZE", batch_size) + stub_const("#{described_class.name}::MAX_BATCHES", max_batches) + redis_shared_state_cleanup! # For Prometheus Counters + end + + after do + Gitlab::Metrics.reset_registry! + end + + describe '#over_time_limit?' do + before do + allow(consistency_checker).to receive(:start_time).and_return(0) + end + + it 'returns true only if the running time has exceeded MAX_RUNTIME' do + allow(consistency_checker).to receive(:monotonic_time).and_return(0, max_runtime - 1, max_runtime + 1) + expect(consistency_checker.monotonic_time).to eq(0) + expect(consistency_checker.send(:over_time_limit?)).to eq(false) + expect(consistency_checker.send(:over_time_limit?)).to eq(true) + end + end + + describe '#execute' do + context 'when empty tables' do + it 'returns an empty response' do + expected_result = { matches: 0, mismatches: 0, batches: 0, mismatches_details: [], next_start_id: nil } + expect(consistency_checker.execute(start_id: 1)).to eq(expected_result) + end + end + + context 'when the tables contain matching items' do + before do + create_list(:namespace, 50) # This will also create Ci::NameSpaceMirror objects + end + + it 'does not process more than MAX_BATCHES' do + max_batches = 3 + stub_const("#{described_class.name}::MAX_BATCHES", max_batches) + result = consistency_checker.execute(start_id: Namespace.minimum(:id)) + expect(result[:batches]).to eq(max_batches) + expect(result[:matches]).to eq(max_batches * batch_size) + end + + it 'doesn not exceed the MAX_RUNTIME' do + allow(consistency_checker).to receive(:monotonic_time).and_return(0, max_runtime - 1, max_runtime + 1) + result = consistency_checker.execute(start_id: Namespace.minimum(:id)) + expect(result[:batches]).to eq(1) + expect(result[:matches]).to eq(1 * batch_size) + end + + it 'returns the correct number of matches and batches checked' do + expected_result = { + next_start_id: Namespace.minimum(:id) + described_class::MAX_BATCHES * described_class::BATCH_SIZE, + batches: max_batches, + matches: max_batches * batch_size, + mismatches: 0, + mismatches_details: [] + } + expect(consistency_checker.execute(start_id: Namespace.minimum(:id))).to eq(expected_result) + end + + it 'returns the min_id as the next_start_id if the check reaches the last element' do + expect(Gitlab::Metrics).to receive(:counter).at_most(:once) + .with(:consistency_checks, "Consistency Check Results") + .and_call_original + + # Starting from the 5th last element + start_id = Namespace.all.order(id: :desc).limit(5).pluck(:id).last + expected_result = { + next_start_id: Namespace.first.id, + batches: 1, + matches: 5, + mismatches: 0, + mismatches_details: [] + } + expect(consistency_checker.execute(start_id: start_id)).to eq(expected_result) + + expect(metrics_counter.get(source_table: "namespaces", result: "mismatch")).to eq(0) + expect(metrics_counter.get(source_table: "namespaces", result: "match")).to eq(5) + end + end + + context 'when some items are missing from the first table' do + let(:missing_namespace) { Namespace.all.order(:id).limit(2).last } + + before do + create_list(:namespace, 50) # This will also create Ci::NameSpaceMirror objects + missing_namespace.delete + end + + it 'reports the missing elements' do + expected_result = { + next_start_id: Namespace.first.id + described_class::MAX_BATCHES * described_class::BATCH_SIZE, + batches: max_batches, + matches: 39, + mismatches: 1, + mismatches_details: [{ + id: missing_namespace.id, + source_table: nil, + target_table: [missing_namespace.traversal_ids] + }] + } + expect(consistency_checker.execute(start_id: Namespace.first.id)).to eq(expected_result) + + expect(metrics_counter.get(source_table: "namespaces", result: "mismatch")).to eq(1) + expect(metrics_counter.get(source_table: "namespaces", result: "match")).to eq(39) + end + end + + context 'when some items are missing from the second table' do + let(:missing_ci_namespace_mirror) { Ci::NamespaceMirror.all.order(:id).limit(2).last } + + before do + create_list(:namespace, 50) # This will also create Ci::NameSpaceMirror objects + missing_ci_namespace_mirror.delete + end + + it 'reports the missing elements' do + expected_result = { + next_start_id: Namespace.first.id + described_class::MAX_BATCHES * described_class::BATCH_SIZE, + batches: 4, + matches: 39, + mismatches: 1, + mismatches_details: [{ + id: missing_ci_namespace_mirror.namespace_id, + source_table: [missing_ci_namespace_mirror.traversal_ids], + target_table: nil + }] + } + expect(consistency_checker.execute(start_id: Namespace.first.id)).to eq(expected_result) + + expect(metrics_counter.get(source_table: "namespaces", result: "mismatch")).to eq(1) + expect(metrics_counter.get(source_table: "namespaces", result: "match")).to eq(39) + end + end + + context 'when elements are different between the two tables' do + let(:different_namespaces) { Namespace.order(:id).limit(max_batches * batch_size).sample(3).sort_by(&:id) } + + before do + create_list(:namespace, 50) # This will also create Ci::NameSpaceMirror objects + + different_namespaces.each do |namespace| + namespace.update_attribute(:traversal_ids, []) + end + end + + it 'reports the difference between the two tables' do + expected_result = { + next_start_id: Namespace.first.id + described_class::MAX_BATCHES * described_class::BATCH_SIZE, + batches: 4, + matches: 37, + mismatches: 3, + mismatches_details: different_namespaces.map do |namespace| + { + id: namespace.id, + source_table: [[]], + target_table: [[namespace.id]] # old traversal_ids of the namespace + } + end + } + expect(consistency_checker.execute(start_id: Namespace.first.id)).to eq(expected_result) + + expect(metrics_counter.get(source_table: "namespaces", result: "mismatch")).to eq(3) + expect(metrics_counter.get(source_table: "namespaces", result: "match")).to eq(37) + end + end + end +end diff --git a/spec/services/database/consistency_check_service_spec.rb b/spec/services/database/consistency_check_service_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..2e64245143259faa4f8b7c403c6d5ece9d2ed94f --- /dev/null +++ b/spec/services/database/consistency_check_service_spec.rb @@ -0,0 +1,154 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Database::ConsistencyCheckService do + let(:batch_size) { 5 } + let(:max_batches) { 2 } + + before do + stub_const("Gitlab::Database::ConsistencyChecker::BATCH_SIZE", batch_size) + stub_const("Gitlab::Database::ConsistencyChecker::MAX_BATCHES", max_batches) + end + + after do + redis_shared_state_cleanup! + end + + subject(:consistency_check_service) do + described_class.new( + source_model: Namespace, + target_model: Ci::NamespaceMirror, + source_columns: %w[id traversal_ids], + target_columns: %w[namespace_id traversal_ids] + ) + end + + describe '#random_start_id' do + let(:batch_size) { 5 } + + before do + create_list(:namespace, 50) # This will also create Ci::NameSpaceMirror objects + end + + it 'generates a random start_id within the records ids' do + 10.times do + start_id = subject.send(:random_start_id) + expect(start_id).to be_between(Namespace.first.id, Namespace.last.id).inclusive + end + end + end + + describe '#execute' do + let(:empty_results) do + { batches: 0, matches: 0, mismatches: 0, mismatches_details: [] } + end + + context 'when empty tables' do + it 'returns results with zero counters' do + result = consistency_check_service.execute + + expect(result).to eq(empty_results) + end + + it 'does not call the ConsistencyCheckService' do + expect(Gitlab::Database::ConsistencyChecker).not_to receive(:new) + consistency_check_service.execute + end + end + + context 'no cursor has been saved before' do + let(:selected_start_id) { Namespace.order(:id).limit(5).pluck(:id).last } + let(:expected_next_start_id) { selected_start_id + batch_size * max_batches } + + before do + create_list(:namespace, 50) # This will also create Ci::NameSpaceMirror objects + expect(consistency_check_service).to receive(:random_start_id).and_return(selected_start_id) + end + + it 'picks a random start_id' do + expected_result = { + batches: 2, + matches: 10, + mismatches: 0, + mismatches_details: [], + start_id: selected_start_id, + next_start_id: expected_next_start_id + } + expect(consistency_check_service.execute).to eq(expected_result) + end + + it 'calls the ConsistencyCheckService with the expected parameters' do + allow_next_instance_of(Gitlab::Database::ConsistencyChecker) do |instance| + expect(instance).to receive(:execute).with(start_id: selected_start_id).and_return({ + batches: 2, + next_start_id: expected_next_start_id, + matches: 10, + mismatches: 0, + mismatches_details: [] + }) + end + + expect(Gitlab::Database::ConsistencyChecker).to receive(:new).with( + source_model: Namespace, + target_model: Ci::NamespaceMirror, + source_columns: %w[id traversal_ids], + target_columns: %w[namespace_id traversal_ids] + ).and_call_original + + expected_result = { + batches: 2, + start_id: selected_start_id, + next_start_id: expected_next_start_id, + matches: 10, + mismatches: 0, + mismatches_details: [] + } + expect(consistency_check_service.execute).to eq(expected_result) + end + + it 'saves the next_start_id in Redis for he next iteration' do + expect(consistency_check_service).to receive(:save_next_start_id).with(expected_next_start_id).and_call_original + consistency_check_service.execute + end + end + + context 'cursor saved in Redis and moving' do + let(:first_namespace_id) { Namespace.order(:id).first.id } + let(:second_namespace_id) { Namespace.order(:id).second.id } + + before do + create_list(:namespace, 30) # This will also create Ci::NameSpaceMirror objects + end + + it "keeps moving the cursor with each call to the service" do + expect(consistency_check_service).to receive(:random_start_id).at_most(:once).and_return(first_namespace_id) + + allow_next_instance_of(Gitlab::Database::ConsistencyChecker) do |instance| + expect(instance).to receive(:execute).ordered.with(start_id: first_namespace_id).and_call_original + expect(instance).to receive(:execute).ordered.with(start_id: first_namespace_id + 10).and_call_original + expect(instance).to receive(:execute).ordered.with(start_id: first_namespace_id + 20).and_call_original + # Gets back to the start of the table + expect(instance).to receive(:execute).ordered.with(start_id: first_namespace_id).and_call_original + end + + 4.times do + consistency_check_service.execute + end + end + + it "keeps moving the cursor from any start point" do + expect(consistency_check_service).to receive(:random_start_id).at_most(:once).and_return(second_namespace_id) + + allow_next_instance_of(Gitlab::Database::ConsistencyChecker) do |instance| + expect(instance).to receive(:execute).ordered.with(start_id: second_namespace_id).and_call_original + expect(instance).to receive(:execute).ordered.with(start_id: second_namespace_id + 10).and_call_original + end + + 2.times do + consistency_check_service.execute + end + end + end + end +end diff --git a/spec/workers/database/ci_namespace_mirrors_consistency_check_worker_spec.rb b/spec/workers/database/ci_namespace_mirrors_consistency_check_worker_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..116026ea8f7741ed1ba7d514afe55677e3538069 --- /dev/null +++ b/spec/workers/database/ci_namespace_mirrors_consistency_check_worker_spec.rb @@ -0,0 +1,67 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Database::CiNamespaceMirrorsConsistencyCheckWorker do + let(:worker) { described_class.new } + + describe '#perform' do + context 'feature flag is disabled' do + before do + stub_feature_flags(ci_namespace_mirrors_consistency_check: false) + end + + it 'does not perform the consistency check on namespaces' do + expect(Database::ConsistencyCheckService).not_to receive(:new) + expect(worker).not_to receive(:log_extra_metadata_on_done) + worker.perform + end + end + + context 'feature flag is enabled' do + before do + stub_feature_flags(ci_namespace_mirrors_consistency_check: true) + end + + it 'executes the consistency check on namespaces' do + expect(Database::ConsistencyCheckService).to receive(:new).and_call_original + expected_result = { batches: 0, matches: 0, mismatches: 0, mismatches_details: [] } + expect(worker).to receive(:log_extra_metadata_on_done).with(:results, expected_result) + worker.perform + end + end + + context 'logs should contain the detailed mismatches' do + let(:first_namespace) { Namespace.all.order(:id).limit(1).first } + let(:missing_namespace) { Namespace.all.order(:id).limit(2).last } + + before do + redis_shared_state_cleanup! + stub_feature_flags(ci_namespace_mirrors_consistency_check: true) + create_list(:namespace, 10) # This will also create Ci::NameSpaceMirror objects + missing_namespace.delete + + allow_next_instance_of(Database::ConsistencyCheckService) do |instance| + allow(instance).to receive(:random_start_id).and_return(Namespace.first.id) + end + end + + it 'reports the differences to the logs' do + expected_result = { + batches: 1, + matches: 9, + mismatches: 1, + mismatches_details: [{ + id: missing_namespace.id, + source_table: nil, + target_table: [missing_namespace.traversal_ids] + }], + start_id: first_namespace.id, + next_start_id: first_namespace.id # The batch size > number of namespaces + } + expect(worker).to receive(:log_extra_metadata_on_done).with(:results, expected_result) + worker.perform + end + end + end +end diff --git a/spec/workers/database/ci_project_mirrors_consistency_check_worker_spec.rb b/spec/workers/database/ci_project_mirrors_consistency_check_worker_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..b6bd825ffcd0cd7ef83f407fc972f6134bb7e4ed --- /dev/null +++ b/spec/workers/database/ci_project_mirrors_consistency_check_worker_spec.rb @@ -0,0 +1,67 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Database::CiProjectMirrorsConsistencyCheckWorker do + let(:worker) { described_class.new } + + describe '#perform' do + context 'feature flag is disabled' do + before do + stub_feature_flags(ci_project_mirrors_consistency_check: false) + end + + it 'does not perform the consistency check on projects' do + expect(Database::ConsistencyCheckService).not_to receive(:new) + expect(worker).not_to receive(:log_extra_metadata_on_done) + worker.perform + end + end + + context 'feature flag is enabled' do + before do + stub_feature_flags(ci_project_mirrors_consistency_check: true) + end + + it 'executes the consistency check on projects' do + expect(Database::ConsistencyCheckService).to receive(:new).and_call_original + expected_result = { batches: 0, matches: 0, mismatches: 0, mismatches_details: [] } + expect(worker).to receive(:log_extra_metadata_on_done).with(:results, expected_result) + worker.perform + end + end + + context 'logs should contain the detailed mismatches' do + let(:first_project) { Project.all.order(:id).limit(1).first } + let(:missing_project) { Project.all.order(:id).limit(2).last } + + before do + redis_shared_state_cleanup! + stub_feature_flags(ci_project_mirrors_consistency_check: true) + create_list(:project, 10) # This will also create Ci::NameSpaceMirror objects + missing_project.delete + + allow_next_instance_of(Database::ConsistencyCheckService) do |instance| + allow(instance).to receive(:random_start_id).and_return(Project.first.id) + end + end + + it 'reports the differences to the logs' do + expected_result = { + batches: 1, + matches: 9, + mismatches: 1, + mismatches_details: [{ + id: missing_project.id, + source_table: nil, + target_table: [missing_project.namespace_id] + }], + start_id: first_project.id, + next_start_id: first_project.id # The batch size > number of projects + } + expect(worker).to receive(:log_extra_metadata_on_done).with(:results, expected_result) + worker.perform + end + end + end +end