diff --git a/ee/lib/gitlab/geo/geo_tasks.rb b/ee/lib/gitlab/geo/geo_tasks.rb index 3e6cd99d87264ce5142ec3a91c694dd456cea08f..31fc8fddb8c902a95508d4b2478b7a2ff269380e 100644 --- a/ee/lib/gitlab/geo/geo_tasks.rb +++ b/ee/lib/gitlab/geo/geo_tasks.rb @@ -53,6 +53,92 @@ def update_primary_geo_node_url exit 1 end end + + def enable_maintenance_mode + maintenance_mode_message = ENV['MAINTENANCE_MESSAGE'] + + $stdout.puts 'Enabling GitLab Maintenance Mode' + update_attrs = { maintenance_mode: true } + update_attrs.merge!(maintenance_mode_message: maintenance_mode_message) if maintenance_mode_message.present? + ::Gitlab::CurrentSettings.update!(update_attrs) + end + + def wait_for_empty_non_geo_queues + $stdout.puts 'Sidekiq Queues: Disabling all non-Geo queues' + Sidekiq::Cron::Job.all.each { |job| job.disable! } + + # Do not enable `geo_sidekiq_cron_config_worker`, due to https://gitlab.com/gitlab-org/gitlab/-/issues/37135 + geo_wanted_jobs = ::Gitlab::Geo::CronManager::COMMON_GEO_JOBS + ::Gitlab::Geo::CronManager::COMMON_GEO_AND_NON_GEO_JOBS + ::Gitlab::Geo::CronManager::PRIMARY_GEO_JOBS + geo_wanted_jobs.each { |name| Sidekiq::Cron::Job.find(name).enable! } + + $stdout.puts "Sidekiq Queues: Waiting for all non-Geo queues to be empty" + until Sidekiq::Queue.all.select { |queue| !queue.name.include?('geo') && Sidekiq::Queue.new(queue.name).size > 0 }.empty? + sleep(1) + end + $stdout.puts "Sidekiq Queues: Non-Geo queues empty".color(:green) + end + + def wait_empty_geo_queues + puts "Sidekiq Queues: Waiting for all Geo queues to be empty" + until Sidekiq::Queue.all.select { |queue| queue.name.include?('geo') && Sidekiq::Queue.new(queue.name).size > 0 }.empty? + sleep(1) + end + puts "Sidekiq Queues: Geo queues empty".color(:green) + end + + def wait_for_database_replication(current_node_status) + puts "Database replication: Waiting for replication lag == 0" + until current_node_status.db_replication_lag_seconds == 0 + sleep(1) + end + puts "Database replication: Caught up".color(:green) + end + + def wait_for_geo_log_cursor(current_node_status) + puts "Geo log cursor: Wait for events being equal on primary and secondary" + # last_event_id => Primary, geo_cursor_last_event_id => Secondary + until current_node_status.last_event_id == current_node_status.cursor_last_event_id + puts "Geo log cursor: #{current_node_status.last_event_id} != #{current_node_status.cursor_last_event_id}" + sleep(1) + end + puts "Geo log cursor: Caught up".color(:green) + end + + def wait_for_data_replication_and_verification(current_node_status, geo_node) + puts "Data replication/verification: Wait for all data to be replication and verified" + until Gitlab::Geo::GeoNodeStatusCheck.new(current_node_status, geo_node).replication_verification_complete? + Gitlab::Geo.enabled_replicator_classes.each do |replicator_class| + # Sync State + next unless current_node_status.count_for(replicator_class) > 0 + next unless current_node_status.synced_in_percentage_for(replicator_class) < 100 + + Gitlab::Geo::GeoNodeStatusCheck.new(current_node_status, geo_node).send(:print_counts_row, description: replicator_class.replicable_title_plural, failed: replicator_class.failed_count, succeeded: replicator_class.synced_count, total: replicator_class.registry_count, percentage: current_node_status.synced_in_percentage_for(replicator_class)) + + # Verification State + next unless replicator_class.verification_enabled? + next unless current_node_status.verified_in_percentage_for(replicator_class) < 100 + + Gitlab::Geo::GeoNodeStatusCheck.new(current_node_status, geo_node).send(:print_counts_row, description: "#{replicator_class.replicable_title_plural} Verified", failed: replicator_class.verification_failed_count, succeeded: replicator_class.verified_count, total: replicator_class.registry_count, percentage: current_node_status.verified_in_percentage_for(replicator_class)) + end + + sleep(1) + + # Update status + current_node_status = GeoNodeStatus.current_node_status + end + puts "Data replication/verification: All data successfully replicated and verified".color(:green) + end + + def wait_until_replicated_and_verified + wait_empty_geo_queues + + current_node_status = GeoNodeStatus.current_node_status + geo_node = current_node_status.geo_node + + wait_for_database_replication(current_node_status) + wait_for_geo_log_cursor(current_node_status) + wait_for_data_replication_and_verification(current_node_status, geo_node) + end end end end diff --git a/ee/lib/tasks/gitlab/geo.rake b/ee/lib/tasks/gitlab/geo.rake index 7f45219f233f795f4653d699be86d024aacb6828..cf2262160bdb7ab2612e8e752b60c36e5bcfc034 100644 --- a/ee/lib/tasks/gitlab/geo.rake +++ b/ee/lib/tasks/gitlab/geo.rake @@ -60,5 +60,20 @@ namespace :gitlab do " gitlab-rake gitlab:geo:check".color(:red) end end + + describe 'Gitlab | Geo | Prevent updates to primary site' + task prevent_updates_to_primary_site: :environment do + abort 'This command is only available on a primary node' unless ::Gitlab::Geo.primary? + + Gitlab::Geo::GeoTasks.enable_maintenance_mode + Gitlab::Geo::GeoTasks.wait_for_empty_non_geo_queues + end + + describe 'Gitlab | Geo | Wait until replicated and verified' + task wait_until_replicated_and_verified: :environment do + abort 'This command is only available on a secondary node' unless ::Gitlab::Geo.secondary? + + Gitlab::Geo::GeoTasks.wait_until_replicated_and_verified + end end end