diff --git a/config/feature_flags/development/detect_cross_database_modification.yml b/config/feature_flags/development/detect_cross_database_modification.yml new file mode 100644 index 0000000000000000000000000000000000000000..7f74e1362918a191dae58864ee91dbeec686dfa7 --- /dev/null +++ b/config/feature_flags/development/detect_cross_database_modification.yml @@ -0,0 +1,8 @@ +--- +name: detect_cross_database_modification +introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/73316 +rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/344620 +milestone: '14.5' +type: development +group: group::sharding +default_enabled: false diff --git a/config/initializers/database_query_analyzers.rb b/config/initializers/database_query_analyzers.rb index a2db2d2a2b8c2043a54f8653a44ff933a56e6df0..2f1854e77227d8e8329bd9a8d460d3a5448c19f2 100644 --- a/config/initializers/database_query_analyzers.rb +++ b/config/initializers/database_query_analyzers.rb @@ -4,6 +4,7 @@ if Gitlab.dev_or_test_env? || Gitlab::Utils.to_boolean(ENV['GITLAB_ENABLE_QUERY_ANALYZERS'], default: false) Gitlab::Database::QueryAnalyzer.instance.hook! Gitlab::Database::QueryAnalyzer.instance.all_analyzers.append(::Gitlab::Database::QueryAnalyzers::GitlabSchemasMetrics) + Gitlab::Database::QueryAnalyzer.instance.all_analyzers.append(::Gitlab::Database::QueryAnalyzers::PreventCrossDatabaseModification) Gitlab::Application.configure do |config| config.middleware.use(Gitlab::Middleware::QueryAnalyzer) diff --git a/config/initializers/detect_cross_database_modification.rb b/config/initializers/detect_cross_database_modification.rb new file mode 100644 index 0000000000000000000000000000000000000000..deaf8356017efb662a57a076b66d0071c3007b89 --- /dev/null +++ b/config/initializers/detect_cross_database_modification.rb @@ -0,0 +1,12 @@ +# frozen_string_literal: true + +# Do not use middleware in tests since we already wrap all tests with +# `PreventCrossDatabaseModification` logic . Also the environment variable +# offers a quick way to disable this check if it is causing problems +unless Rails.env.test? || ENV['DISABLE_CROSS_DATABASE_MODIFICATION_DETECTION'] + require_dependency 'gitlab/middleware/detect_cross_database_modification' + + Gitlab::Application.configure do |config| + config.middleware.use(Gitlab::Middleware::DetectCrossDatabaseModification) + end +end diff --git a/lib/gitlab/database.rb b/lib/gitlab/database.rb index c83436f24b4e7518c110cd1306132cd3cf3a3205..9c74e5d2ca811184797565592106bb7ba1bffd88 100644 --- a/lib/gitlab/database.rb +++ b/lib/gitlab/database.rb @@ -168,18 +168,6 @@ def self.allow_cross_joins_across_databases(url:) yield end - # This method will allow cross database modifications within the block - # Example: - # - # allow_cross_database_modification_within_transaction(url: 'url-to-an-issue') do - # create(:build) # inserts ci_build and project record in one transaction - # end - def self.allow_cross_database_modification_within_transaction(url:) - # this method will be overridden in: - # spec/support/database/cross_database_modification_check.rb - yield - end - def self.add_post_migrate_path_to_rails(force: false) return if ENV['SKIP_POST_DEPLOYMENT_MIGRATIONS'] && !force diff --git a/lib/gitlab/database/query_analyzers/prevent_cross_database_modification.rb b/lib/gitlab/database/query_analyzers/prevent_cross_database_modification.rb new file mode 100644 index 0000000000000000000000000000000000000000..62c5e913a17c77398099416d6f9b671fc4a1ee66 --- /dev/null +++ b/lib/gitlab/database/query_analyzers/prevent_cross_database_modification.rb @@ -0,0 +1,148 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module QueryAnalyzers + class PreventCrossDatabaseModification < Database::QueryAnalyzers::Base + CrossDatabaseModificationAcrossUnsupportedTablesError = Class.new(StandardError) + + # This method will allow cross database modifications within the block + # Example: + # + # allow_cross_database_modification_within_transaction(url: 'url-to-an-issue') do + # create(:build) # inserts ci_build and project record in one transaction + # end + def self.allow_cross_database_modification_within_transaction(url:) + cross_database_context = self.cross_database_context + return yield unless cross_database_context && cross_database_context[:enabled] + + transaction_tracker_enabled_was = cross_database_context[:enabled] + cross_database_context[:enabled] = false + + yield + ensure + cross_database_context[:enabled] = transaction_tracker_enabled_was if cross_database_context + end + + def self.with_cross_database_modification_prevented(log_only: false) + reset_cross_database_context! + cross_database_context.merge!(enabled: true, log_only: log_only) + + yield if block_given? + ensure + cleanup_with_cross_database_modification_prevented if block_given? + end + + def self.cleanup_with_cross_database_modification_prevented + if cross_database_context + cross_database_context[:enabled] = false + end + end + + def self.cross_database_context + Thread.current[:transaction_tracker] + end + + def self.reset_cross_database_context! + Thread.current[:transaction_tracker] = initial_data + end + + def self.initial_data + { + enabled: false, + transaction_depth_by_db: Hash.new { |h, k| h[k] = 0 }, + modified_tables_by_db: Hash.new { |h, k| h[k] = Set.new }, + log_only: false + } + end + + def self.enabled? + true + end + + # rubocop:disable Metrics/AbcSize + def self.analyze(parsed) + return false unless cross_database_context + return false unless cross_database_context[:enabled] + + connection = parsed.connection + return false if connection.pool.instance_of?(ActiveRecord::ConnectionAdapters::NullPool) + + return if in_factory_bot_create? + + database = connection.pool.db_config.name + + sql = parsed.sql + + # We ignore BEGIN in tests as this is the outer transaction for + # DatabaseCleaner + if sql.start_with?('SAVEPOINT') || (!Rails.env.test? && sql.start_with?('BEGIN')) + cross_database_context[:transaction_depth_by_db][database] += 1 + + return + elsif sql.start_with?('RELEASE SAVEPOINT', 'ROLLBACK TO SAVEPOINT') || (!Rails.env.test? && sql.start_with?('ROLLBACK', 'COMMIT')) + cross_database_context[:transaction_depth_by_db][database] -= 1 + if cross_database_context[:transaction_depth_by_db][database] <= 0 + cross_database_context[:modified_tables_by_db][database].clear + end + + return + end + + return if cross_database_context[:transaction_depth_by_db].values.all?(&:zero?) + + # PgQuery might fail in some cases due to limited nesting: + # https://github.com/pganalyze/pg_query/issues/209 + tables = sql.downcase.include?(' for update') ? parsed.pg.tables : parsed.pg.dml_tables + + # We have some code where plans and gitlab_subscriptions are lazily + # created and this causes lots of spec failures + # https://gitlab.com/gitlab-org/gitlab/-/issues/343394 + tables -= %w[plans gitlab_subscriptions] + + return if tables.empty? + + # All migrations will write to schema_migrations in the same transaction. + # It's safe to ignore this since schema_migrations exists in all + # databases + return if tables == ['schema_migrations'] + + cross_database_context[:modified_tables_by_db][database].merge(tables) + all_tables = cross_database_context[:modified_tables_by_db].values.map(&:to_a).flatten + schemas = ::Gitlab::Database::GitlabSchema.table_schemas(all_tables) + + if schemas.many? + message = "Cross-database data modification of '#{schemas.to_a.join(", ")}' were detected within " \ + "a transaction modifying the '#{all_tables.to_a.join(", ")}' tables." \ + "Please refer to https://docs.gitlab.com/ee/development/database/multiple_databases.html#removing-cross-database-transactions for details on how to resolve this exception." + + if schemas.any? { |s| s.to_s.start_with?("undefined") } + message += " The gitlab_schema was undefined for one or more of the tables in this transaction. Any new tables must be added to lib/gitlab/database/gitlab_schemas.yml ." + end + + begin + raise CrossDatabaseModificationAcrossUnsupportedTablesError, message + rescue CrossDatabaseModificationAcrossUnsupportedTablesError => e + ::Gitlab::ErrorTracking.track_exception(e, { gitlab_schemas: schemas, tables: all_tables, query: parsed.sql }) + raise unless cross_database_context[:log_only] + end + end + rescue StandardError => e + # Extra safety net to ensure we never raise in production + # if something goes wrong in this logic + Gitlab::ErrorTracking.track_and_raise_for_dev_exception(e) + end + # rubocop:enable Metrics/AbcSize + + # We ignore execution in the #create method from FactoryBot + # because it is not representative of real code we run in + # production. There are far too many false positives caused + # by instantiating objects in different `gitlab_schema` in a + # FactoryBot `create`. + def self.in_factory_bot_create? + Rails.env.test? && caller_locations.any? { |l| l.path.end_with?('lib/factory_bot/evaluation.rb') && l.label == 'create' } + end + end + end + end +end diff --git a/lib/gitlab/middleware/detect_cross_database_modification.rb b/lib/gitlab/middleware/detect_cross_database_modification.rb new file mode 100644 index 0000000000000000000000000000000000000000..c577f188e2e3b3b6289a09663bb9cb4fe74c741a --- /dev/null +++ b/lib/gitlab/middleware/detect_cross_database_modification.rb @@ -0,0 +1,21 @@ +# frozen_string_literal: true + +module Gitlab + module Middleware + class DetectCrossDatabaseModification + def initialize(app) + @app = app + end + + def call(env) + if Feature.enabled?(:detect_cross_database_modification, default_enabled: :yaml) + ::Gitlab::Database::QueryAnalyzers::PreventCrossDatabaseModification.with_cross_database_modification_prevented(log_only: true) do + @app.call(env) + end + else + @app.call(env) + end + end + end + end +end diff --git a/lib/gitlab/sidekiq_middleware.rb b/lib/gitlab/sidekiq_middleware.rb index 69802fd6217e42683a62894088a524abdfafb291..d72631113a30e8d8384aa0a9433815e862535d6e 100644 --- a/lib/gitlab/sidekiq_middleware.rb +++ b/lib/gitlab/sidekiq_middleware.rb @@ -41,6 +41,12 @@ def self.server_configurator(metrics: true, arguments_logger: true, memory_kille # so we can compare the latest WAL location against replica chain.add ::Gitlab::SidekiqMiddleware::DuplicateJobs::Server chain.add ::Gitlab::Database::LoadBalancing::SidekiqServerMiddleware + + # Do not use middleware in tests since we already wrap all tests with + # `PreventCrossDatabaseModification` logic . Also the environment + # variable offers a quick way to disable this check if it is causing + # problems + chain.add ::Gitlab::SidekiqMiddleware::DetectCrossDatabaseModification unless Rails.env.test? || ENV['DISABLE_CROSS_DATABASE_MODIFICATION_DETECTION'] end end diff --git a/lib/gitlab/sidekiq_middleware/detect_cross_database_modification.rb b/lib/gitlab/sidekiq_middleware/detect_cross_database_modification.rb new file mode 100644 index 0000000000000000000000000000000000000000..4fb6c5053541eabe972d54b7e7d2bd51362eb47e --- /dev/null +++ b/lib/gitlab/sidekiq_middleware/detect_cross_database_modification.rb @@ -0,0 +1,17 @@ +# frozen_string_literal: true + +module Gitlab + module SidekiqMiddleware + class DetectCrossDatabaseModification + def call(worker, job, queue) + if Feature.enabled?(:detect_cross_database_modification, default_enabled: :yaml) + ::Gitlab::Database::QueryAnalyzers::PreventCrossDatabaseModification.with_cross_database_modification_prevented(log_only: true) do + yield + end + else + yield + end + end + end + end +end diff --git a/spec/support_specs/database/prevent_cross_database_modification_spec.rb b/spec/lib/gitlab/database/query_analyzers/prevent_cross_database_modification_spec.rb similarity index 91% rename from spec/support_specs/database/prevent_cross_database_modification_spec.rb rename to spec/lib/gitlab/database/query_analyzers/prevent_cross_database_modification_spec.rb index 365f1f32858df56f9bf856d8b6dc94168b947d1b..bb8451b64c342e5dfec97464bc008948b4aa1f6e 100644 --- a/spec/support_specs/database/prevent_cross_database_modification_spec.rb +++ b/spec/lib/gitlab/database/query_analyzers/prevent_cross_database_modification_spec.rb @@ -2,7 +2,7 @@ require 'spec_helper' -RSpec.describe 'Database::PreventCrossDatabaseModification' do +RSpec.describe Gitlab::Database::QueryAnalyzers::PreventCrossDatabaseModification do let_it_be(:pipeline, refind: true) { create(:ci_pipeline) } let_it_be(:project, refind: true) { create(:project) } @@ -122,10 +122,10 @@ def run_queries include_examples 'successful examples' end - describe '#allow_cross_database_modification_within_transaction' do + describe '.allow_cross_database_modification_within_transaction' do it 'skips raising error' do expect do - Gitlab::Database.allow_cross_database_modification_within_transaction(url: 'gitlab-issue') do + described_class.allow_cross_database_modification_within_transaction(url: 'gitlab-issue') do Project.transaction do pipeline.touch project.touch @@ -136,7 +136,7 @@ def run_queries it 'skips raising error on factory creation' do expect do - Gitlab::Database.allow_cross_database_modification_within_transaction(url: 'gitlab-issue') do + described_class.allow_cross_database_modification_within_transaction(url: 'gitlab-issue') do ApplicationRecord.transaction do create(:ci_pipeline) end diff --git a/spec/lib/gitlab/middleware/detect_cross_database_modification_spec.rb b/spec/lib/gitlab/middleware/detect_cross_database_modification_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..42ba9ed2a0cdc6c1b5abd3c0a91255ea21245ac2 --- /dev/null +++ b/spec/lib/gitlab/middleware/detect_cross_database_modification_spec.rb @@ -0,0 +1,59 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::Middleware::DetectCrossDatabaseModification do + describe '#call' do + let(:app) { double(:app) } + let(:middleware) { described_class.new(app) } + let(:env) { {} } + + subject { middleware.call(env) } + + context 'when there is a cross modification' do + before do + allow(app).to receive(:call) do + Project.transaction do + Project.where(id: -1).update_all(id: -1) + ::Ci::Pipeline.where(id: -1).update_all(id: -1) + end + end + end + + it 'detects cross modifications and logs them' do + expect(::Gitlab::ErrorTracking).to receive(:track_exception) + + subject + end + + context 'when the detect_cross_database_modification is disabled' do + before do + stub_feature_flags(detect_cross_database_modification: false) + end + + it 'does not detect cross modifications' do + expect(::Gitlab::ErrorTracking).not_to receive(:track_exception) + + subject + end + end + end + + context 'when there is no cross modification' do + before do + allow(app).to receive(:call) do + Project.transaction do + Project.where(id: -1).update_all(id: -1) + Namespace.where(id: -1).update_all(id: -1) + end + end + end + + it 'does not log anything' do + expect(::Gitlab::ErrorTracking).not_to receive(:track_exception) + + subject + end + end + end +end diff --git a/spec/lib/gitlab/sidekiq_middleware/detect_cross_database_modification_spec.rb b/spec/lib/gitlab/sidekiq_middleware/detect_cross_database_modification_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..eb20e6c8cd9dc6fb1ec1331def3f1691a46ca84a --- /dev/null +++ b/spec/lib/gitlab/sidekiq_middleware/detect_cross_database_modification_spec.rb @@ -0,0 +1,59 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::SidekiqMiddleware::DetectCrossDatabaseModification do + describe '#call' do + let(:worker) { double(:worker) } + let(:job) { { 'jid' => 'job123' } } + let(:queue) { 'some-queue' } + let(:middleware) { described_class.new } + + def do_queries + end + + subject { middleware.call(worker, job, queue) { do_queries } } + + context 'when there is a cross modification' do + def do_queries + Project.transaction do + Project.where(id: -1).update_all(id: -1) + ::Ci::Pipeline.where(id: -1).update_all(id: -1) + end + end + + it 'detects cross modifications and logs them' do + expect(::Gitlab::ErrorTracking).to receive(:track_exception) + + subject + end + + context 'when the detect_cross_database_modification is disabled' do + before do + stub_feature_flags(detect_cross_database_modification: false) + end + + it 'does not detect cross modifications' do + expect(::Gitlab::ErrorTracking).not_to receive(:track_exception) + + subject + end + end + end + + context 'when there is no cross modification' do + def do_queries + Project.transaction do + Project.where(id: -1).update_all(id: -1) + Namespace.where(id: -1).update_all(id: -1) + end + end + + it 'does not log anything' do + expect(::Gitlab::ErrorTracking).not_to receive(:track_exception) + + subject + end + end + end +end diff --git a/spec/support/database/cross-database-modification-allowlist.yml b/spec/support/database/cross-database-modification-allowlist.yml index e0166f2ae3f7e5c46f90350031e5aa1627c94391..cd5bd8029ae80df1ed4d170b82244592cef7a10c 100644 --- a/spec/support/database/cross-database-modification-allowlist.yml +++ b/spec/support/database/cross-database-modification-allowlist.yml @@ -50,6 +50,8 @@ - "./spec/lib/gitlab/email/handler/create_merge_request_handler_spec.rb" - "./spec/lib/gitlab/email/handler/create_note_handler_spec.rb" - "./spec/lib/gitlab/email/handler/create_note_on_issuable_handler_spec.rb" +- "./spec/lib/gitlab/middleware/detect_cross_database_modification_spec.rb" +- "./spec/lib/gitlab/sidekiq_middleware/detect_cross_database_modification_spec.rb" - "./spec/lib/peek/views/active_record_spec.rb" - "./spec/models/ci/build_need_spec.rb" - "./spec/models/ci/build_trace_chunk_spec.rb" diff --git a/spec/support/database/prevent_cross_database_modification.rb b/spec/support/database/prevent_cross_database_modification.rb index 09a87f92072751b2e31eab1779595cfd393c6948..b31c05b4823771c05e885c7bd7c34ef45f66fddb 100644 --- a/spec/support/database/prevent_cross_database_modification.rb +++ b/spec/support/database/prevent_cross_database_modification.rb @@ -1,138 +1,19 @@ # frozen_string_literal: true -module Database - module PreventCrossDatabaseModification - CrossDatabaseModificationAcrossUnsupportedTablesError = Class.new(StandardError) - - module GitlabDatabaseMixin - def allow_cross_database_modification_within_transaction(url:) - cross_database_context = Database::PreventCrossDatabaseModification.cross_database_context - return yield unless cross_database_context && cross_database_context[:enabled] - - transaction_tracker_enabled_was = cross_database_context[:enabled] - cross_database_context[:enabled] = false - - yield - ensure - cross_database_context[:enabled] = transaction_tracker_enabled_was if cross_database_context - end - end - - module SpecHelpers - def with_cross_database_modification_prevented - subscriber = ActiveSupport::Notifications.subscribe('sql.active_record') do |name, start, finish, id, payload| - PreventCrossDatabaseModification.prevent_cross_database_modification!(payload[:connection], payload[:sql]) - end - - PreventCrossDatabaseModification.reset_cross_database_context! - PreventCrossDatabaseModification.cross_database_context.merge!(enabled: true, subscriber: subscriber) - - yield if block_given? - ensure - cleanup_with_cross_database_modification_prevented if block_given? - end - - def cleanup_with_cross_database_modification_prevented - if PreventCrossDatabaseModification.cross_database_context - ActiveSupport::Notifications.unsubscribe(PreventCrossDatabaseModification.cross_database_context[:subscriber]) - PreventCrossDatabaseModification.cross_database_context[:enabled] = false - end - end - end - - def self.cross_database_context - Thread.current[:transaction_tracker] - end - - def self.reset_cross_database_context! - Thread.current[:transaction_tracker] = initial_data - end - - def self.initial_data - { - enabled: false, - transaction_depth_by_db: Hash.new { |h, k| h[k] = 0 }, - modified_tables_by_db: Hash.new { |h, k| h[k] = Set.new } - } - end - - def self.prevent_cross_database_modification!(connection, sql) - return unless cross_database_context - return unless cross_database_context[:enabled] - - return if connection.pool.instance_of?(ActiveRecord::ConnectionAdapters::NullPool) - return if in_factory_bot_create? - - database = connection.pool.db_config.name - - if sql.start_with?('SAVEPOINT') - cross_database_context[:transaction_depth_by_db][database] += 1 - - return - elsif sql.start_with?('RELEASE SAVEPOINT', 'ROLLBACK TO SAVEPOINT') - cross_database_context[:transaction_depth_by_db][database] -= 1 - if cross_database_context[:transaction_depth_by_db][database] <= 0 - cross_database_context[:modified_tables_by_db][database].clear - end - - return - end - - return if cross_database_context[:transaction_depth_by_db].values.all?(&:zero?) - - # PgQuery might fail in some cases due to limited nesting: - # https://github.com/pganalyze/pg_query/issues/209 - parsed_query = PgQuery.parse(sql) - tables = sql.downcase.include?(' for update') ? parsed_query.tables : parsed_query.dml_tables - - # We have some code where plans and gitlab_subscriptions are lazily - # created and this causes lots of spec failures - # https://gitlab.com/gitlab-org/gitlab/-/issues/343394 - tables -= %w[plans gitlab_subscriptions] - - return if tables.empty? - - # All migrations will write to schema_migrations in the same transaction. - # It's safe to ignore this since schema_migrations exists in all - # databases - return if tables == ['schema_migrations'] - - cross_database_context[:modified_tables_by_db][database].merge(tables) - - all_tables = cross_database_context[:modified_tables_by_db].values.map(&:to_a).flatten - schemas = ::Gitlab::Database::GitlabSchema.table_schemas(all_tables) - - if schemas.many? - message = "Cross-database data modification of '#{schemas.to_a.join(", ")}' were detected within " \ - "a transaction modifying the '#{all_tables.to_a.join(", ")}' tables." \ - "Please refer to https://docs.gitlab.com/ee/development/database/multiple_databases.html#removing-cross-database-transactions for details on how to resolve this exception." - - if schemas.any? { |s| s.to_s.start_with?("undefined") } - message += " The gitlab_schema was undefined for one or more of the tables in this transaction. Any new tables must be added to spec/support/database/gitlab_schemas.yml ." - end - - raise Database::PreventCrossDatabaseModification::CrossDatabaseModificationAcrossUnsupportedTablesError, message - end - end +module PreventCrossDatabaseModificationSpecHelpers + def with_cross_database_modification_prevented(...) + ::Gitlab::Database::QueryAnalyzers::PreventCrossDatabaseModification.with_cross_database_modification_prevented(...) + end - # We ignore execution in the #create method from FactoryBot - # because it is not representative of real code we run in - # production. There are far too many false positives caused - # by instantiating objects in different `gitlab_schema` in a - # FactoryBot `create`. - def self.in_factory_bot_create? - caller_locations.any? { |l| l.path.end_with?('lib/factory_bot/evaluation.rb') && l.label == 'create' } - end + def cleanup_with_cross_database_modification_prevented + ::Gitlab::Database::QueryAnalyzers::PreventCrossDatabaseModification.cleanup_with_cross_database_modification_prevented end end -Gitlab::Database.singleton_class.prepend( - Database::PreventCrossDatabaseModification::GitlabDatabaseMixin) - CROSS_DB_MODIFICATION_ALLOW_LIST = Set.new(YAML.load_file(File.join(__dir__, 'cross-database-modification-allowlist.yml'))).freeze RSpec.configure do |config| - config.include(::Database::PreventCrossDatabaseModification::SpecHelpers) + config.include(PreventCrossDatabaseModificationSpecHelpers) # Using before and after blocks because the around block causes problems with the let_it_be # record creations. It makes an extra savepoint which breaks the transaction count logic.