diff --git a/lib/gitlab/database/collation_checker.rb b/lib/gitlab/database/collation_checker.rb new file mode 100644 index 0000000000000000000000000000000000000000..537a7f15659486c4a91b1c661e21ca5a474027ef --- /dev/null +++ b/lib/gitlab/database/collation_checker.rb @@ -0,0 +1,206 @@ +# frozen_string_literal: true + +module Gitlab + module Database + class CollationChecker + COLLATION_VERSION_MISMATCH_QUERY = <<~SQL + SELECT + collname AS collation_name, + collprovider AS provider, + collversion AS stored_version, + pg_collation_actual_version(oid) AS actual_version, + collversion <> pg_collation_actual_version(oid) AS has_mismatch + FROM + pg_collation + WHERE + collprovider = 'c' + AND collversion IS NOT NULL + AND pg_collation_actual_version(oid) IS NOT NULL + AND collversion <> pg_collation_actual_version(oid); + SQL + + def self.run(database_name: nil, logger: Gitlab::AppLogger) + Gitlab::Database::EachDatabase.each_connection(only: database_name) do |connection, database| + new(connection, database, logger).run + end + end + + attr_reader :connection, :database_name, :logger + + def initialize(connection, database_name, logger) + @connection = connection + @database_name = database_name + @logger = logger + end + + def run + result = { mismatches_found: false, affected_indexes: [] } + + logger.info("Checking for PostgreSQL collation mismatches on #{database_name} database...") + + mismatched = mismatched_collations + + if mismatched.empty? + logger.info("No collation mismatches detected on #{database_name}.") + return result + end + + result[:mismatches_found] = true + + logger.warn("⚠️ COLLATION MISMATCHES DETECTED on #{database_name} database!") + logger.warn("#{mismatched.count} collation(s) have version mismatches:") + + mismatched.each do |row| + logger.warn( + " - #{row['collation_name']}: stored=#{row['stored_version']}, actual=#{row['actual_version']}" + ) + end + + affected_indexes = find_affected_indexes(mismatched) + + if affected_indexes.empty? + logger.info("No indexes appear to be affected by the collation mismatches.") + return result + end + + result[:affected_indexes] = affected_indexes + + logger.warn("Affected indexes that need to be rebuilt:") + affected_indexes.each do |row| + logger.warn(" - #{row['index_name']} (#{row['index_type']}) on table #{row['table_name']}") + logger.warn(" • Affected columns: #{row['affected_columns']}") + logger.warn(" • Type: #{unique?(row) ? 'UNIQUE' : 'NON-UNIQUE'}") + end + + # Provide remediation guidance + provide_remediation_guidance(affected_indexes) + + result + end + + private + + # Helper method to check if an index is unique, handling both string and boolean values + def unique?(index) + unique = index['is_unique'] + unique == 't' || unique == true || unique == 'true' + end + + def mismatched_collations + connection.select_all(COLLATION_VERSION_MISMATCH_QUERY).to_a + rescue ActiveRecord::StatementInvalid => e + logger.error("Error checking collation mismatches: #{e.message}") + [] + end + + def find_affected_indexes(mismatched_collations) + return [] if mismatched_collations.empty? + + collation_names = mismatched_collations.map { |row| connection.quote(row['collation_name']) }.join(',') + + # Using a more comprehensive query based on PostgreSQL wiki + # Link: https://wiki.postgresql.org/wiki/Locale_data_changes#What_indexes_are_affected + query = <<~SQL + SELECT DISTINCT + indrelid::regclass::text AS table_name, + indexrelid::regclass::text AS index_name, + string_agg(a.attname, ', ' ORDER BY s.attnum) AS affected_columns, + am.amname AS index_type, + s.indisunique AS is_unique + FROM + (SELECT + indexrelid, + indrelid, + indcollation[j] coll, + indkey[j] attnum, + indisunique + FROM + pg_index i, + generate_subscripts(indcollation, 1) g(j) + ) s + JOIN + pg_collation c ON coll=c.oid + JOIN + pg_class idx ON idx.oid = s.indexrelid + JOIN + pg_am am ON idx.relam = am.oid + JOIN + pg_attribute a ON a.attrelid = s.indrelid AND a.attnum = s.attnum + WHERE + c.collname IN (#{collation_names}) + GROUP BY + s.indexrelid, s.indrelid, s.indisunique, index_name, table_name, am.amname + ORDER BY + table_name, + index_name; + SQL + + connection.select_all(query).to_a + rescue ActiveRecord::StatementInvalid => e + logger.error("Error finding affected indexes: #{e.message}") + [] + end + + def provide_remediation_guidance(affected_indexes) + log_remediation_header + log_duplicate_entry_checks(affected_indexes) + log_index_rebuild_commands(affected_indexes) + log_collation_refresh_commands + log_conclusion + end + + def log_remediation_header + logger.warn("\nREMEDIATION STEPS:") + logger.warn("1. Put GitLab into maintenance mode") + logger.warn("2. Run the following SQL commands:") + end + + def log_duplicate_entry_checks(affected_indexes) + # Use the unique? helper method for consistency + unique_indexes = affected_indexes.select { |idx| unique?(idx) } + return unless unique_indexes.any? + + logger.warn("\n# Step 1: Check for duplicate entries in unique indexes") + unique_indexes.each do |idx| + logger.warn("-- Check for duplicates in #{idx['table_name']} (unique index: #{idx['index_name']})") + columns = idx['affected_columns'].split(', ') + cols_str = columns.join(', ') + + logger.warn( + "SELECT #{cols_str}, COUNT(*), ARRAY_AGG(id) " \ + "FROM #{idx['table_name']} " \ + "GROUP BY #{cols_str} HAVING COUNT(*) > 1 LIMIT 1;" + ) + end + + logger.warn("\n# If duplicates exist, you may need to use gitlab:db:deduplicate_tags or similar tasks") + logger.warn("# to fix duplicate entries before rebuilding unique indexes.") + end + + def log_index_rebuild_commands(affected_indexes) + return unless affected_indexes.any? + + logger.warn("\n# Step 2: Rebuild affected indexes") + logger.warn("# Option A: Rebuild individual indexes with minimal downtime:") + affected_indexes.each do |row| + logger.warn("REINDEX INDEX #{row['index_name']} CONCURRENTLY;") + end + + logger.warn("\n# Option B: Alternatively, rebuild all indexes at once (requires downtime):") + logger.warn("REINDEX DATABASE #{database_name};") + end + + def log_collation_refresh_commands + # Customer reported this command as working: https://gitlab.com/groups/gitlab-org/-/epics/8573#note_2513370623 + logger.warn("\n# Step 3: Refresh collation versions") + logger.warn("ALTER DATABASE #{database_name} REFRESH COLLATION VERSION;") + logger.warn("-- This updates all collation versions in the database to match the current OS") + end + + def log_conclusion + logger.warn("\n3. Take GitLab out of maintenance mode") + logger.warn("\nFor more information, see: https://docs.gitlab.com/administration/postgresql/upgrading_os/") + end + end + end +end diff --git a/lib/tasks/gitlab/db.rake b/lib/tasks/gitlab/db.rake index 995944b865a7e1ffd7569291cb3c6bc12150a945..ab52d6494cd64711d015f4b1b30ec10095c249a5 100644 --- a/lib/tasks/gitlab/db.rake +++ b/lib/tasks/gitlab/db.rake @@ -598,6 +598,20 @@ namespace :gitlab do end end + desc 'GitLab | DB | Check for PostgreSQL collation mismatches and list affected indexes' + task collation_checker: :environment do + Gitlab::Database::CollationChecker.run(logger: Logger.new($stdout)) + end + + namespace :collation_checker do + each_database(databases) do |database_name| + desc "GitLab | DB | Check for PostgreSQL collation mismatches on the #{database_name} database" + task database_name => :environment do + Gitlab::Database::CollationChecker.run(database_name: database_name, logger: Logger.new($stdout)) + end + end + end + namespace :dictionary do desc 'Generate database docs yaml' task generate: :environment do diff --git a/spec/lib/gitlab/database/collation_checker_spec.rb b/spec/lib/gitlab/database/collation_checker_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..4550784503f3bfa5098f2bcc79e3cab582fb6bd9 --- /dev/null +++ b/spec/lib/gitlab/database/collation_checker_spec.rb @@ -0,0 +1,333 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::Database::CollationChecker, feature_category: :database do + describe '.run' do + let(:connection) { instance_double(ActiveRecord::ConnectionAdapters::PostgreSQLAdapter) } + let(:database_name) { 'main' } + let(:logger) { instance_double(Gitlab::AppLogger, info: nil, warn: nil, error: nil) } + + it 'instantiates the class and calls run' do + instance = instance_double(described_class) + + expect(Gitlab::Database::EachDatabase).to receive(:each_connection) + .with(only: database_name) + .and_yield(connection, database_name) + + expect(described_class).to receive(:new) + .with(connection, database_name, logger) + .and_return(instance) + expect(instance).to receive(:run) + + described_class.run(database_name: database_name, logger: logger) + end + end + + describe '#run' do + # Mock-based tests for edge cases and error handling + context 'with mocked database connection' do + let(:connection) { instance_double(ActiveRecord::ConnectionAdapters::PostgreSQLAdapter) } + let(:database_name) { 'main' } + let(:logger) { instance_double(Gitlab::AppLogger, info: nil, warn: nil, error: nil) } + let(:checker) { described_class.new(connection, database_name, logger) } + + context 'when no collation mismatches are found' do + let(:empty_results) { instance_double(ActiveRecord::Result, to_a: []) } + + before do + allow(connection).to receive(:select_all) + .with(described_class::COLLATION_VERSION_MISMATCH_QUERY) + .and_return(empty_results) + end + + it 'logs a success message and returns no mismatches' do + expect(logger).to receive(:info).with("Checking for PostgreSQL collation mismatches on main database...") + expect(logger).to receive(:info).with("No collation mismatches detected on main.") + + result = checker.run + + expect(result).to eq({ mismatches_found: false, affected_indexes: [] }) + end + end + + context 'when collation mismatches exist but no indexes are affected' do + let(:mismatches) do + instance_double( + ActiveRecord::Result, + to_a: [{ 'collation_name' => 'en_US.utf8', 'stored_version' => '1.2.3', 'actual_version' => '1.2.4' }] + ) + end + + let(:empty_affected) { instance_double(ActiveRecord::Result, to_a: []) } + + before do + allow(connection).to receive(:quote) + .with('en_US.utf8') + .and_return("'en_US.utf8'") + + allow(connection).to receive(:select_all) + .with(described_class::COLLATION_VERSION_MISMATCH_QUERY) + .and_return(mismatches) + + allow(connection).to receive(:select_all) + .with(/SELECT DISTINCT.*FROM.*pg_collation.*WHERE.*collname IN \('en_US.utf8'\)/m) + .and_return(empty_affected) + end + + it 'logs warnings about mismatches but reports no affected indexes' do + expect(logger).to receive(:info).with("Checking for PostgreSQL collation mismatches on main database...") + expect(logger).to receive(:warn).with("⚠️ COLLATION MISMATCHES DETECTED on main database!") + expect(logger).to receive(:warn).with("1 collation(s) have version mismatches:") + expect(logger).to receive(:warn).with(" - en_US.utf8: stored=1.2.3, actual=1.2.4") + expect(logger).to receive(:info).with("No indexes appear to be affected by the collation mismatches.") + + result = checker.run + + expect(result).to eq({ mismatches_found: true, affected_indexes: [] }) + end + end + + context 'when collation mismatches exist and indexes are affected (mock version)' do + let(:mismatches) do + instance_double( + ActiveRecord::Result, + to_a: [{ 'collation_name' => 'en_US.utf8', 'stored_version' => '1.2.3', 'actual_version' => '1.2.4' }] + ) + end + + let(:affected_indexes) do + instance_double( + ActiveRecord::Result, + to_a: [ + { + 'table_name' => 'projects', + 'index_name' => 'index_projects_on_name', + 'affected_columns' => 'name', + 'index_type' => 'btree', + 'is_unique' => 't' + }, + { + 'table_name' => 'users', + 'index_name' => 'index_users_on_username', + 'affected_columns' => 'username', + 'index_type' => 'btree', + 'is_unique' => 'f' + } + ] + ) + end + + before do + allow(connection).to receive(:select_all) + .with(described_class::COLLATION_VERSION_MISMATCH_QUERY) + .and_return(mismatches) + + allow(connection).to receive(:quote) + .with('en_US.utf8') + .and_return("'en_US.utf8'") + + allow(connection).to receive(:select_all) + .with(/SELECT DISTINCT.*FROM.*pg_collation.*WHERE.*collname IN \('en_US.utf8'\)/m) + .and_return(affected_indexes) + end + + it 'logs warnings and provides remediation guidance' do + # Test basic detection + expect(logger).to receive(:info).with("Checking for PostgreSQL collation mismatches on main database...") + expect(logger).to receive(:warn).with("⚠️ COLLATION MISMATCHES DETECTED on main database!") + expect(logger).to receive(:warn).with("1 collation(s) have version mismatches:") + expect(logger).to receive(:warn).with(" - en_US.utf8: stored=1.2.3, actual=1.2.4") + + # Test affected indexes are listed + expect(logger).to receive(:warn).with("Affected indexes that need to be rebuilt:") + expect(logger).to receive(:warn).with(" - index_projects_on_name (btree) on table projects") + expect(logger).to receive(:warn).with(" • Affected columns: name") + expect(logger).to receive(:warn).with(" • Type: UNIQUE") + expect(logger).to receive(:warn).with(" - index_users_on_username (btree) on table users") + expect(logger).to receive(:warn).with(" • Affected columns: username") + expect(logger).to receive(:warn).with(" • Type: NON-UNIQUE") + + # Test remediation header + expect(logger).to receive(:warn).with("\nREMEDIATION STEPS:") + expect(logger).to receive(:warn).with("1. Put GitLab into maintenance mode") + expect(logger).to receive(:warn).with("2. Run the following SQL commands:") + + # Test duplicate entry checks + expect(logger).to receive(:warn).with("\n# Step 1: Check for duplicate entries in unique indexes") + expect(logger).to receive(:warn).with( + "-- Check for duplicates in projects (unique index: index_projects_on_name)" + ) + expect(logger).to receive(:warn).with( + /SELECT name, COUNT\(\*\), ARRAY_AGG\(id\) FROM projects GROUP BY name HAVING COUNT\(\*\) > 1 LIMIT 1;/ + ) + expect(logger).to receive(:warn).with(/\n# If duplicates exist/) + + # Test index rebuild commands + expect(logger).to receive(:warn).with("\n# Step 2: Rebuild affected indexes") + expect(logger).to receive(:warn).with("# Option A: Rebuild individual indexes with minimal downtime:") + expect(logger).to receive(:warn).with("REINDEX INDEX index_projects_on_name CONCURRENTLY;") + expect(logger).to receive(:warn).with("REINDEX INDEX index_users_on_username CONCURRENTLY;") + expect(logger).to receive(:warn).with( + "\n# Option B: Alternatively, rebuild all indexes at once (requires downtime):" + ) + expect(logger).to receive(:warn).with("REINDEX DATABASE main;") + + # Test collation refresh commands + expect(logger).to receive(:warn).with("\n# Step 3: Refresh collation versions") + expect(logger).to receive(:warn).with("ALTER DATABASE main REFRESH COLLATION VERSION;") + expect(logger).to receive(:warn).with( + "-- This updates all collation versions in the database to match the current OS" + ) + + # Test conclusion + expect(logger).to receive(:warn).with("\n3. Take GitLab out of maintenance mode") + expect(logger).to receive(:warn).with("\nFor more information, see: https://docs.gitlab.com/administration/postgresql/upgrading_os/") + + result = checker.run + + expect(result).to include(mismatches_found: true) + expect(result[:affected_indexes]).to eq(affected_indexes.to_a) + end + end + + context 'when there is an error checking for mismatches' do + before do + allow(connection).to receive(:select_all) + .with(described_class::COLLATION_VERSION_MISMATCH_QUERY) + .and_raise(ActiveRecord::StatementInvalid, 'test error') + end + + it 'logs the error and returns no mismatches' do + expect(logger).to receive(:info).with("Checking for PostgreSQL collation mismatches on main database...") + expect(logger).to receive(:error).with("Error checking collation mismatches: test error") + + result = checker.run + + expect(result).to eq({ mismatches_found: false, affected_indexes: [] }) + end + end + + context 'when there is an error finding affected indexes' do + let(:mismatches) do + instance_double( + ActiveRecord::Result, + to_a: [{ 'collation_name' => 'en_US.utf8', 'stored_version' => '1.2.3', 'actual_version' => '1.2.4' }] + ) + end + + before do + allow(connection).to receive(:select_all) + .with(described_class::COLLATION_VERSION_MISMATCH_QUERY) + .and_return(mismatches) + + allow(connection).to receive(:quote) + .with('en_US.utf8') + .and_return("'en_US.utf8'") + + allow(connection).to receive(:select_all) + .with(/SELECT DISTINCT.*FROM.*pg_collation.*WHERE.*collname IN \('en_US.utf8'\)/m) + .and_raise(ActiveRecord::StatementInvalid, 'test error') + end + + it 'logs the error and returns only mismatches' do + expect(logger).to receive(:info).with("Checking for PostgreSQL collation mismatches on main database...") + expect(logger).to receive(:warn).with("⚠️ COLLATION MISMATCHES DETECTED on main database!") + expect(logger).to receive(:warn).with("1 collation(s) have version mismatches:") + expect(logger).to receive(:warn).with(" - en_US.utf8: stored=1.2.3, actual=1.2.4") + expect(logger).to receive(:error).with("Error finding affected indexes: test error") + + result = checker.run + + expect(result).to include(mismatches_found: true) + expect(result[:affected_indexes]).to eq([]) + end + end + end + + # Real database test for the happy path + context 'with real database connection' do + let(:connection) { ActiveRecord::Base.connection } + let(:database_name) { connection.current_database } + let(:logger) { instance_double(Logger, info: nil, warn: nil, error: nil) } + let(:checker) { described_class.new(connection, database_name, logger) } + + let(:table_name) { '_test_c_collation_table' } + let(:index_name) { '_test_c_collation_index' } + let(:c_collation) { 'C' } # Use standard C collation which should be available + + # Find the real OID of the C collation for our test + let!(:c_collation_info) do + connection.select_all( + "SELECT oid FROM pg_collation WHERE collname = '#{c_collation}' AND collprovider = 'c' LIMIT 1" + ).first + end + + let!(:c_collation_oid) { c_collation_info&.[]('oid') } + + before do + skip 'C collation not found in database' unless c_collation_info + + # Create test table with a column using C collation + connection.execute(<<~SQL) + CREATE TABLE #{table_name} ( + id serial PRIMARY KEY, + test_col varchar(255) COLLATE "#{c_collation}" NOT NULL + ); + SQL + + # Create an index on the collated column + connection.execute(<<~SQL) + CREATE INDEX #{index_name} ON #{table_name} (test_col); + SQL + + # Insert test data + connection.execute(<<~SQL) + INSERT INTO #{table_name} (test_col) VALUES ('value1'); + SQL + end + + after do + connection.execute("DROP TABLE IF EXISTS #{table_name} CASCADE;") + end + + it 'detects C collation mismatch and finds affected index' do + allow(checker).to receive(:mismatched_collations) do + # Create a modified query to simulate actual version being different + modified_query = described_class::COLLATION_VERSION_MISMATCH_QUERY + .gsub('collversion', "'123.456'") + .gsub('pg_collation_actual_version(oid)', "'987.654.321'") + + connection.select_all(modified_query).to_a + end + + # Run the checker with our mocked version mismatch + result = checker.run + + # Verify we found mismatches + expect(result[:mismatches_found]).to be true + + # Verify we found affected indexes + expect(result[:affected_indexes]).not_to be_empty + + # Verify we found our test table index + test_indexes = result[:affected_indexes].select { |idx| idx['table_name'] == table_name } + + expect(test_indexes).not_to be_empty, "Expected to find test table index but found none" + expect(test_indexes.first['index_name']).to eq(index_name), "Expected to find our specific test index" + + # Verify remediation SQL includes our test index + rebuild_commands = [] + allow(logger).to receive(:warn) do |message| + rebuild_commands << message if message.include?('REINDEX INDEX') + end + + # Run again to capture remediation SQL + checker.run + + # Verify rebuild command for our test index + expect(rebuild_commands.any? { |cmd| cmd.include?(index_name) }).to be true + end + end + end +end diff --git a/spec/tasks/gitlab/db_rake_spec.rb b/spec/tasks/gitlab/db_rake_spec.rb index 6328ad74e3a74eb28f70240684320d0a52433b24..2810c39ad4307486ea4b77204e5e3b55a0edaf35 100644 --- a/spec/tasks/gitlab/db_rake_spec.rb +++ b/spec/tasks/gitlab/db_rake_spec.rb @@ -575,6 +575,72 @@ end end + describe 'collation_checker' do + context 'with a single database' do + before do + skip_if_multiple_databases_are_setup + end + + it 'calls Gitlab::Database::CollationChecker with correct arguments' do + logger_double = instance_double(Logger, level: nil, info: nil, warn: nil, error: nil) + allow(Logger).to receive(:new).with($stdout).and_return(logger_double) + + expect(Gitlab::Database::CollationChecker).to receive(:run) + .with(logger: logger_double) + + run_rake_task('gitlab:db:collation_checker') + end + end + + context 'with multiple databases' do + let(:logger_double) { instance_double(Logger, level: nil, info: nil, warn: nil, error: nil) } + + before do + skip_if_multiple_databases_not_setup(:ci) + + allow(Logger).to receive(:new).with($stdout).and_return(logger_double) + end + + it 'calls Gitlab::Database::CollationChecker with correct arguments' do + expect(Gitlab::Database::CollationChecker).to receive(:run) + .with(logger: logger_double) + + run_rake_task('gitlab:db:collation_checker') + end + + context 'when the single database task is used' do + before do + skip_if_shared_database(:ci) + end + + it 'calls Gitlab::Database::CollationChecker with the main database' do + expect(Gitlab::Database::CollationChecker).to receive(:run) + .with(database_name: 'main', logger: logger_double) + + run_rake_task('gitlab:db:collation_checker:main') + end + + it 'calls Gitlab::Database::CollationChecker with the ci database' do + expect(Gitlab::Database::CollationChecker).to receive(:run) + .with(database_name: 'ci', logger: logger_double) + + run_rake_task('gitlab:db:collation_checker:ci') + end + end + + context 'with geo configured' do + before do + skip_unless_geo_configured + end + + it 'does not create a task for the geo database' do + expect { run_rake_task('gitlab:db:collation_checker:geo') } + .to raise_error(/Don't know how to build task 'gitlab:db:collation_checker:geo'/) + end + end + end + end + describe 'dictionary generate' do let(:db_config) { instance_double(ActiveRecord::DatabaseConfigurations::HashConfig, name: 'fake_db') }