diff --git a/config/initializers/health_check.rb b/config/initializers/health_check.rb index c8e2a9c8e9d1dab01b0470bb02a46788d4a10aff..b445cb7752942461ea4c0f160e044cdb970749e7 100644 --- a/config/initializers/health_check.rb +++ b/config/initializers/health_check.rb @@ -3,6 +3,6 @@ config.full_checks = %w(database migrations cache) config.add_custom_check('geo') do - Gitlab::Geo::HealthCheck.perform_checks + Gitlab::Geo::HealthCheck.new.perform_checks end end diff --git a/ee/app/models/geo_node_status.rb b/ee/app/models/geo_node_status.rb index c6528bade4748499ff4bae03dd726b03ddb4b882..3a991d2e9e7cc2a1095ee62a9e440532a2c826ee 100644 --- a/ee/app/models/geo_node_status.rb +++ b/ee/app/models/geo_node_status.rb @@ -209,7 +209,7 @@ def load_primary_data def load_secondary_data if Gitlab::Geo.secondary? - self.db_replication_lag_seconds = Gitlab::Geo::HealthCheck.db_replication_lag_seconds + self.db_replication_lag_seconds = Gitlab::Geo::HealthCheck.new.db_replication_lag_seconds self.cursor_last_event_id = current_cursor_last_event_id self.cursor_last_event_date = Geo::EventLog.find_by(id: self.cursor_last_event_id)&.created_at self.repositories_synced_count = projects_finder.count_synced_repositories diff --git a/ee/lib/gitlab/geo/health_check.rb b/ee/lib/gitlab/geo/health_check.rb index 00543c97b5a2e12654b75b8ae1690d06f579eed3..6ff59e77dfcd1a3ec75cad712671ceef344f7ab9 100644 --- a/ee/lib/gitlab/geo/health_check.rb +++ b/ee/lib/gitlab/geo/health_check.rb @@ -3,34 +3,21 @@ module Gitlab module Geo class HealthCheck - def self.perform_checks + include Gitlab::Utils::StrongMemoize + + def perform_checks raise NotImplementedError.new('Geo is only compatible with PostgreSQL') unless Gitlab::Database.postgresql? return '' unless Gitlab::Geo.secondary? - return 'The Geo database configuration file is missing.' unless Gitlab::Geo.geo_database_configured? - return 'The Geo node has a database that is not configured for streaming replication with the primary node.' unless self.database_secondary? - return 'The Geo node does not appear to be replicating the database from the primary node.' if Gitlab::Database.pg_stat_wal_receiver_supported? && !self.streaming_active? - - database_version = self.get_database_version.to_i - migration_version = self.get_migration_version.to_i - - if database_version != migration_version - return "Current Geo database version (#{database_version}) does not match latest migration (#{migration_version}).\n"\ - 'You may have to run `gitlab-rake geo:db:migrate` as root on the secondary.' - end - - return 'The Geo database is not configured to use Foreign Data Wrapper.' unless Gitlab::Geo::Fdw.enabled? + return 'Geo database configuration file is missing.' unless Gitlab::Geo.geo_database_configured? + return 'Geo node has a database that is not configured for streaming replication with the primary node.' unless Gitlab::Database.db_read_only? + return 'Geo node does not appear to be replicating the database from the primary node.' if Gitlab::Database.pg_stat_wal_receiver_supported? && !streaming_active? + return "Current Geo database version (#{database_version}) does not match latest migration (#{migration_version}).\nYou may have to run `gitlab-rake geo:db:migrate` as root on the secondary." unless database_migration_version_match? + return 'Geo database is not configured to use Foreign Data Wrapper.' unless Gitlab::Geo::Fdw.enabled? unless Gitlab::Geo::Fdw.foreign_tables_up_to_date? - output = "The Geo database has an outdated FDW remote schema." - - foreign_schema_tables_count = Gitlab::Geo::Fdw.foreign_schema_tables_count - gitlab_schema_tables_count = Gitlab::Geo::Fdw.gitlab_schema_tables_count - - unless gitlab_schema_tables_count == foreign_schema_tables_count - output = "#{output} It contains #{foreign_schema_tables_count} of #{gitlab_schema_tables_count} expected tables." - end - + output = "Geo database has an outdated FDW remote schema." + output = "#{output} It contains #{foreign_schema_tables_count} of #{gitlab_schema_tables_count} expected tables." unless schema_tables_match? return output end @@ -39,74 +26,90 @@ def self.perform_checks e.message end - def self.db_migrate_path + def db_replication_lag_seconds + # Obtain the replication lag in seconds + ActiveRecord::Base.connection + .execute(db_replication_lag_seconds_query) + .first + .fetch('replication_lag').to_i + end + + private + + def db_replication_lag_seconds_query + <<-SQL.squish + SELECT CASE + WHEN #{Gitlab::Database.pg_last_wal_receive_lsn}() = #{Gitlab::Database.pg_last_wal_replay_lsn}() + THEN 0 + ELSE + EXTRACT (EPOCH FROM now() - pg_last_xact_replay_timestamp())::INTEGER + END + AS replication_lag + SQL + end + + def db_migrate_path # Lazy initialisation so Rails.root will be defined @db_migrate_path ||= File.join(Rails.root, 'ee', 'db', 'geo', 'migrate') end - def self.db_post_migrate_path + def db_post_migrate_path # Lazy initialisation so Rails.root will be defined @db_post_migrate_path ||= File.join(Rails.root, 'ee', 'db', 'geo', 'post_migrate') end - def self.get_database_version - if defined?(ActiveRecord) - connection = ::Geo::BaseRegistry.connection - schema_migrations_table_name = ActiveRecord::Base.schema_migrations_table_name - - if connection.data_source_exists?(schema_migrations_table_name) - connection.execute("SELECT MAX(version) AS version FROM #{schema_migrations_table_name}") - .first - .fetch('version') + def database_version + strong_memoize(:database_version) do + if defined?(ActiveRecord) + connection = ::Geo::BaseRegistry.connection + schema_migrations_table_name = ActiveRecord::Base.schema_migrations_table_name + + if connection.data_source_exists?(schema_migrations_table_name) + connection.execute("SELECT MAX(version) AS version FROM #{schema_migrations_table_name}") + .first + .fetch('version') + end end end end - def self.get_migration_version - latest_migration = nil + def migration_version + strong_memoize(:migration_version) do + latest_migration = nil - Dir[File.join(self.db_migrate_path, "[0-9]*_*.rb"), File.join(self.db_post_migrate_path, "[0-9]*_*.rb")].each do |f| - timestamp = f.scan(/0*([0-9]+)_[_.a-zA-Z0-9]*.rb/).first.first rescue -1 + Dir[File.join(db_migrate_path, "[0-9]*_*.rb"), File.join(db_post_migrate_path, "[0-9]*_*.rb")].each do |f| + timestamp = f.scan(/0*([0-9]+)_[_.a-zA-Z0-9]*.rb/).first.first rescue -1 - if latest_migration.nil? || timestamp.to_i > latest_migration.to_i - latest_migration = timestamp + if latest_migration.nil? || timestamp.to_i > latest_migration.to_i + latest_migration = timestamp + end end + + latest_migration end + end - latest_migration + def database_migration_version_match? + database_version.to_i == migration_version.to_i end - def self.database_secondary? - Gitlab::Database.db_read_only? + def gitlab_schema_tables_count + @gitlab_schema_tables_count ||= Gitlab::Geo::Fdw.gitlab_schema_tables_count end - def self.db_replication_lag_seconds - # Obtain the replication lag in seconds - lag = - ActiveRecord::Base.connection.execute(<<-SQL.squish) - SELECT CASE - WHEN #{Gitlab::Database.pg_last_wal_receive_lsn}() = #{Gitlab::Database.pg_last_wal_receive_lsn}() - THEN 0 - ELSE - EXTRACT (EPOCH FROM now() - pg_last_xact_replay_timestamp())::INTEGER - END - AS replication_lag - SQL - .first - .fetch('replication_lag') + def foreign_schema_tables_count + @foreign_schema_tables_count ||= Gitlab::Geo::Fdw.foreign_schema_tables_count + end - lag.present? ? lag.to_i : lag + def schema_tables_match? + gitlab_schema_tables_count == foreign_schema_tables_count end - def self.streaming_active? + def streaming_active? # Get a streaming status # This only works for Postgresql 9.6 and greater - pid = - ActiveRecord::Base.connection.select_values(' - SELECT pid FROM pg_stat_wal_receiver;') - .first - - pid.to_i > 0 + ActiveRecord::Base.connection + .select_values('SELECT pid FROM pg_stat_wal_receiver').first.to_i > 0 end end end diff --git a/ee/lib/tasks/geo.rake b/ee/lib/tasks/geo.rake index aa9e1191b9e5c93f079618520a5301602fd1c41c..3894b44be7bda1068df9845e60a4f111f9958d66 100644 --- a/ee/lib/tasks/geo.rake +++ b/ee/lib/tasks/geo.rake @@ -297,7 +297,7 @@ namespace :geo do puts geo_node.namespaces.any? ? 'Selective' : 'Full' print 'Database replication lag: '.rjust(COLUMN_WIDTH) - puts "#{Gitlab::Geo::HealthCheck.db_replication_lag_seconds} seconds" + puts "#{Gitlab::Geo::HealthCheck.new.db_replication_lag_seconds} seconds" print 'Last event ID seen from primary: '.rjust(COLUMN_WIDTH) last_event = Geo::EventLog.last diff --git a/ee/spec/lib/gitlab/geo/health_check_spec.rb b/ee/spec/lib/gitlab/geo/health_check_spec.rb index 2640ac903f0d5c463a8a01c70da8202f0d70142f..951643a49f42164098c50fd2c51c7f513d308555 100644 --- a/ee/spec/lib/gitlab/geo/health_check_spec.rb +++ b/ee/spec/lib/gitlab/geo/health_check_spec.rb @@ -1,125 +1,157 @@ require 'spec_helper' describe Gitlab::Geo::HealthCheck, :geo do - set(:secondary) { create(:geo_node) } - - subject { described_class } - - before do - allow(Gitlab::Geo).to receive(:current_node).and_return(secondary) - end - - describe '.perform_checks' do - it 'returns a string if database is not fully migrated' do - allow(described_class).to receive(:geo_database_configured?).and_return(true) - allow(described_class).to receive(:database_secondary?).and_return(true) - allow(described_class).to receive(:get_database_version).and_return('20170101') - allow(described_class).to receive(:get_migration_version).and_return('20170201') - allow(described_class).to receive(:streaming_active?).and_return(true) - - message = subject.perform_checks - - expect(message).to include('Current Geo database version (20170101) does not match latest migration (20170201)') - expect(message).to include('gitlab-rake geo:db:migrate') - end + include ::EE::GeoHelpers - it 'returns an empty string when not running on a secondary node' do - allow(Gitlab::Geo).to receive(:secondary?) { false } - - expect(subject.perform_checks).to be_blank - end - - it 'returns an error when database is not configured for streaming replication' do - allow(Gitlab::Geo).to receive(:configured?) { true } - allow(Gitlab::Database).to receive(:postgresql?) { true } - allow(described_class).to receive(:database_secondary?) { false } - - expect(subject.perform_checks).not_to be_blank - end - - it 'returns an error when streaming replication is not working' do - allow(Gitlab::Geo).to receive(:configured?) { true } - allow(Gitlab::Database).to receive(:postgresql?) { true } - allow(described_class).to receive(:database_secondary?) { false } - - expect(subject.perform_checks).to include('not configured for streaming replication') - end - - it 'returns an error when configuration file is missing for tracking DB' do - allow(Rails.configuration).to receive(:respond_to?).with(:geo_database) { false } - - expect(subject.perform_checks).to include('database configuration file is missing') - end - - it 'returns an error when Geo database version does not match the latest migration version' do - allow(described_class).to receive(:database_secondary?).and_return(true) - allow(subject).to receive(:get_database_version) { 1 } - allow(described_class).to receive(:streaming_active?).and_return(true) + set(:secondary) { create(:geo_node) } - expect(subject.perform_checks).to match(/Current Geo database version \([0-9]+\) does not match latest migration \([0-9]+\)/) + describe '#perform_checks' do + before do + stub_current_geo_node(secondary) end - it 'returns an error when latest migration version does not match the Geo database version' do - allow(described_class).to receive(:database_secondary?).and_return(true) - allow(subject).to receive(:get_migration_version) { 1 } - allow(described_class).to receive(:streaming_active?).and_return(true) + context 'when an exception is raised' do + it 'catches the exception nicely and returns the message' do + allow(Gitlab::Database).to receive(:postgresql?).and_raise('Uh oh') - expect(subject.perform_checks).to match(/Current Geo database version \([0-9]+\) does not match latest migration \([0-9]+\)/) + expect(subject.perform_checks).to eq('Uh oh') + end end - it 'returns an error when streaming is not active and Postgresql supports pg_stat_wal_receiver' do - allow(Gitlab::Database).to receive(:pg_stat_wal_receiver_supported?).and_return(true) - allow(described_class).to receive(:database_secondary?).and_return(true) - allow(described_class).to receive(:streaming_active?).and_return(false) + context 'without PostgreSQL' do + it 'raises an error' do + allow(Gitlab::Database).to receive(:postgresql?) { false } - expect(subject.perform_checks).to match(/The Geo node does not appear to be replicating the database from the primary node/) + expect { subject.perform_checks }.to raise_error(NotImplementedError) + end end - it 'returns an error when streaming is not active and Postgresql does not support pg_stat_wal_receiver' do - allow(Gitlab::Database).to receive(:pg_stat_wal_receiver_supported?).and_return(false) - allow(described_class).to receive(:database_secondary?).and_return(true) - allow(described_class).to receive(:streaming_active?).and_return(false) - - expect(subject.perform_checks).to be_empty + context 'with PostgreSQL' do + before do + allow(Gitlab::Database).to receive(:postgresql?) { true } + end + + context 'on the primary node' do + it 'returns an empty string' do + allow(Gitlab::Geo).to receive(:secondary?) { false } + + expect(subject.perform_checks).to be_blank + end + end + + context 'on the secondary node' do + let(:geo_database_configured) { true } + let(:db_read_only) { true } + + before do + allow(Gitlab::Geo).to receive(:secondary?) { true } + allow(Gitlab::Geo).to receive(:geo_database_configured?) { geo_database_configured } + allow(Gitlab::Database).to receive(:db_read_only?) { db_read_only } + end + + context 'when the Geo tracking DB is not configured' do + let(:geo_database_configured) { false } + + it 'returns an error' do + expect(subject.perform_checks).to include('Geo database configuration file is missing') + end + end + + context 'when the database is writable' do + let(:db_read_only) { false } + + it 'returns an error' do + expect(subject.perform_checks).to include('Geo node has a database that is not configured for streaming replication with the primary node.') + end + end + + context 'streaming replication' do + before do + allow(Gitlab::Database).to receive(:pg_stat_wal_receiver_supported?).and_return(true) + end + + context 'that is supported but not working' do + it 'returns an error' do + allow(ActiveRecord::Base).to receive_message_chain('connection.select_values').with(no_args).with('SELECT pid FROM pg_stat_wal_receiver').and_return([]) + + expect(subject.perform_checks).to match(/Geo node does not appear to be replicating the database from the primary node/) + end + end + + context 'that is supported and working' do + before do + allow(ActiveRecord::Base).to receive_message_chain('connection.select_values').with(no_args).with('SELECT pid FROM pg_stat_wal_receiver').and_return(['123']) + allow(Gitlab::Geo::Fdw).to receive(:enabled?) { true } + allow(Gitlab::Geo::Fdw).to receive(:foreign_tables_up_to_date?) { true } + end + + it 'returns an error if database is not fully migrated' do + allow(subject).to receive(:database_version).and_return('20170101') + allow(subject).to receive(:migration_version).and_return('20170201') + + message = subject.perform_checks + + expect(message).to include('Geo database version (20170101) does not match latest migration (20170201)') + expect(message).to include('gitlab-rake geo:db:migrate') + end + + it 'returns an error when FDW is disabled' do + allow(Gitlab::Geo::Fdw).to receive(:enabled?) { false } + + expect(subject.perform_checks).to match(/Geo database is not configured to use Foreign Data Wrapper/) + end + + context 'when foreign tables are not up-to-date' do + before do + allow(Gitlab::Geo::Fdw).to receive(:foreign_tables_up_to_date?) { false } + end + + it 'returns an error when FDW remote table is not in sync but has same amount of tables' do + allow(Gitlab::Geo::Fdw).to receive(:foreign_schema_tables_count) { 1 } + allow(Gitlab::Geo::Fdw).to receive(:gitlab_schema_tables_count) { 1 } + + expect(subject.perform_checks).to match(/Geo database has an outdated FDW remote schema\./) + end + + it 'returns an error when FDW remote table is not in sync and has same different amount of tables' do + allow(Gitlab::Geo::Fdw).to receive(:foreign_schema_tables_count) { 1 } + allow(Gitlab::Geo::Fdw).to receive(:gitlab_schema_tables_count) { 2 } + + expect(subject.perform_checks).to match(/Geo database has an outdated FDW remote schema\. It contains [0-9]+ of [0-9]+ expected tables/) + end + end + + it 'finally returns an empty string when everything is healthy' do + expect(subject.perform_checks).to be_blank + end + end + end + end end + end - it 'returns an error when FDW is disabled' do - allow(described_class).to receive(:database_secondary?) { true } - allow(described_class).to receive(:streaming_active?) { true } - - allow(Gitlab::Geo::Fdw).to receive(:enabled?) { false } - - expect(subject.perform_checks).to match(/The Geo database is not configured to use Foreign Data Wrapper/) - end - - it 'returns an error when FDW remote table is not in sync but has same amount of tables' do - allow(described_class).to receive(:database_secondary?) { true } - allow(described_class).to receive(:streaming_active?) { true } - - allow(Gitlab::Geo::Fdw).to receive(:foreign_tables_up_to_date?) { false } - allow(Gitlab::Geo::Fdw).to receive(:foreign_schema_tables_count) { 1 } - allow(Gitlab::Geo::Fdw).to receive(:gitlab_schema_tables_count) { 1 } - - expect(subject.perform_checks).to match(/The Geo database has an outdated FDW remote schema\./) + describe '#db_replication_lag_seconds' do + before do + query = 'SELECT CASE WHEN pg_last_wal_receive_lsn() = pg_last_wal_replay_lsn() THEN 0 ELSE EXTRACT (EPOCH FROM now() - pg_last_xact_replay_timestamp())::INTEGER END AS replication_lag' + allow(Gitlab::Database).to receive(:pg_last_wal_receive_lsn).and_return('pg_last_wal_receive_lsn') + allow(Gitlab::Database).to receive(:pg_last_wal_replay_lsn).and_return('pg_last_wal_replay_lsn') + allow(ActiveRecord::Base).to receive_message_chain('connection.execute').with(query).and_return([{ 'replication_lag' => lag_in_seconds }]) end - it 'returns an error when FDW remote table is not in sync and has same different amount of tables' do - allow(described_class).to receive(:database_secondary?) { true } - allow(described_class).to receive(:streaming_active?) { true } - - allow(Gitlab::Geo::Fdw).to receive(:foreign_tables_up_to_date?) { false } - allow(Gitlab::Geo::Fdw).to receive(:foreign_schema_tables_count) { 1 } - allow(Gitlab::Geo::Fdw).to receive(:gitlab_schema_tables_count) { 2 } + context 'when there is no lag' do + let(:lag_in_seconds) { nil } - expect(subject.perform_checks).to match(/The Geo database has an outdated FDW remote schema\. It contains [0-9]+ of [0-9]+ expected tables/) + it 'returns 0 seconds' do + expect(subject.db_replication_lag_seconds).to eq(0) + end end - end - describe 'MySQL checks' do - it 'raises an error' do - allow(Gitlab::Database).to receive(:postgresql?) { false } + context 'when there is lag' do + let(:lag_in_seconds) { 7 } - expect { subject.perform_checks }.to raise_error(NotImplementedError) + it 'returns the number of seconds' do + expect(subject.db_replication_lag_seconds).to eq(7) + end end end end diff --git a/ee/spec/models/geo_node_status_spec.rb b/ee/spec/models/geo_node_status_spec.rb index 128463cadba2844c3a9ab0f5339d76366d746a8d..a10b4415427d35a1f367adcdbf9fa7b3f3516e79 100644 --- a/ee/spec/models/geo_node_status_spec.rb +++ b/ee/spec/models/geo_node_status_spec.rb @@ -228,14 +228,14 @@ describe '#db_replication_lag_seconds' do it 'returns the set replication lag if secondary' do allow(Gitlab::Geo).to receive(:secondary?).and_return(true) - allow(Gitlab::Geo::HealthCheck).to receive(:db_replication_lag_seconds).and_return(1000) + geo_health_check = double('Gitlab::Geo::HealthCheck', perform_checks: '', db_replication_lag_seconds: 1000) + allow(Gitlab::Geo::HealthCheck).to receive(:new).and_return(geo_health_check) expect(subject.db_replication_lag_seconds).to eq(1000) end it "doesn't attempt to set replication lag if primary" do stub_current_geo_node(primary) - expect(Gitlab::Geo::HealthCheck).not_to receive(:db_replication_lag_seconds) expect(subject.db_replication_lag_seconds).to eq(nil) end