From 7cfbf29862a931bf13908e689a7025a6fe208358 Mon Sep 17 00:00:00 2001 From: Dylan Griffith Date: Thu, 28 Oct 2021 05:46:49 +0000 Subject: [PATCH 1/3] Detect and log cross-database modifications in production This adds a Rack middleware and Sidekiq middleware for detecting "Cross-database modification" violations. These occur when you query 2 different databases in the context of a single transaction. Previously we only wrapped our RSpec code with this detection logic but we found that it was often causing lots of false positives in RSpec (in test-only code) which made it hard to sort through the real issues. In addition we want extra validation by running this in production and therefore detecting code that our specs may be missing. The middlewares use a feature flag `detect_cross_database_modification` which we intend to enable for a small percentage of time. The middlewares can also be disabled by setting the env var `DISABLE_CROSS_DATABASE_MODIFICATION_DETECTION=true` and restarting the application. This environment variable is an extra safety net if the middlewares are causing problems. --- .../detect_cross_database_modification.yml | 8 + .../detect_cross_database_modification.rb | 12 ++ lib/gitlab/database.rb | 12 -- .../prevent_cross_database_modification.rb | 143 ++++++++++++++++++ .../detect_cross_database_modification.rb | 21 +++ lib/gitlab/sidekiq_middleware.rb | 6 + .../detect_cross_database_modification.rb | 17 +++ ...revent_cross_database_modification_spec.rb | 8 +- ...detect_cross_database_modification_spec.rb | 59 ++++++++ ...detect_cross_database_modification_spec.rb | 59 ++++++++ .../cross-database-modification-allowlist.yml | 2 + .../prevent_cross_database_modification.rb | 133 +--------------- 12 files changed, 338 insertions(+), 142 deletions(-) create mode 100644 config/feature_flags/development/detect_cross_database_modification.yml create mode 100644 config/initializers/detect_cross_database_modification.rb create mode 100644 lib/gitlab/database/prevent_cross_database_modification.rb create mode 100644 lib/gitlab/middleware/detect_cross_database_modification.rb create mode 100644 lib/gitlab/sidekiq_middleware/detect_cross_database_modification.rb rename spec/{support_specs => lib/gitlab}/database/prevent_cross_database_modification_spec.rb (90%) create mode 100644 spec/lib/gitlab/middleware/detect_cross_database_modification_spec.rb create mode 100644 spec/lib/gitlab/sidekiq_middleware/detect_cross_database_modification_spec.rb diff --git a/config/feature_flags/development/detect_cross_database_modification.yml b/config/feature_flags/development/detect_cross_database_modification.yml new file mode 100644 index 00000000000000..7f74e1362918a1 --- /dev/null +++ b/config/feature_flags/development/detect_cross_database_modification.yml @@ -0,0 +1,8 @@ +--- +name: detect_cross_database_modification +introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/73316 +rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/344620 +milestone: '14.5' +type: development +group: group::sharding +default_enabled: false diff --git a/config/initializers/detect_cross_database_modification.rb b/config/initializers/detect_cross_database_modification.rb new file mode 100644 index 00000000000000..deaf8356017efb --- /dev/null +++ b/config/initializers/detect_cross_database_modification.rb @@ -0,0 +1,12 @@ +# frozen_string_literal: true + +# Do not use middleware in tests since we already wrap all tests with +# `PreventCrossDatabaseModification` logic . Also the environment variable +# offers a quick way to disable this check if it is causing problems +unless Rails.env.test? || ENV['DISABLE_CROSS_DATABASE_MODIFICATION_DETECTION'] + require_dependency 'gitlab/middleware/detect_cross_database_modification' + + Gitlab::Application.configure do |config| + config.middleware.use(Gitlab::Middleware::DetectCrossDatabaseModification) + end +end diff --git a/lib/gitlab/database.rb b/lib/gitlab/database.rb index c83436f24b4e75..9c74e5d2ca8111 100644 --- a/lib/gitlab/database.rb +++ b/lib/gitlab/database.rb @@ -168,18 +168,6 @@ def self.allow_cross_joins_across_databases(url:) yield end - # This method will allow cross database modifications within the block - # Example: - # - # allow_cross_database_modification_within_transaction(url: 'url-to-an-issue') do - # create(:build) # inserts ci_build and project record in one transaction - # end - def self.allow_cross_database_modification_within_transaction(url:) - # this method will be overridden in: - # spec/support/database/cross_database_modification_check.rb - yield - end - def self.add_post_migrate_path_to_rails(force: false) return if ENV['SKIP_POST_DEPLOYMENT_MIGRATIONS'] && !force diff --git a/lib/gitlab/database/prevent_cross_database_modification.rb b/lib/gitlab/database/prevent_cross_database_modification.rb new file mode 100644 index 00000000000000..d681a86b3a0f2c --- /dev/null +++ b/lib/gitlab/database/prevent_cross_database_modification.rb @@ -0,0 +1,143 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module PreventCrossDatabaseModification + CrossDatabaseModificationAcrossUnsupportedTablesError = Class.new(StandardError) + + # This method will allow cross database modifications within the block + # Example: + # + # allow_cross_database_modification_within_transaction(url: 'url-to-an-issue') do + # create(:build) # inserts ci_build and project record in one transaction + # end + def self.allow_cross_database_modification_within_transaction(url:) + cross_database_context = Database::PreventCrossDatabaseModification.cross_database_context + return yield unless cross_database_context && cross_database_context[:enabled] + + transaction_tracker_enabled_was = cross_database_context[:enabled] + cross_database_context[:enabled] = false + + yield + ensure + cross_database_context[:enabled] = transaction_tracker_enabled_was if cross_database_context + end + + def self.with_cross_database_modification_prevented(log_only: false) + subscriber = ActiveSupport::Notifications.subscribe('sql.active_record') do |name, start, finish, id, payload| + prevent_cross_database_modification!(payload[:connection], payload[:sql], log_only: log_only) + end + + reset_cross_database_context! + cross_database_context.merge!(enabled: true, subscriber: subscriber) + + yield if block_given? + ensure + cleanup_with_cross_database_modification_prevented if block_given? + end + + def self.cleanup_with_cross_database_modification_prevented + if cross_database_context + ActiveSupport::Notifications.unsubscribe(PreventCrossDatabaseModification.cross_database_context[:subscriber]) + cross_database_context[:enabled] = false + end + end + + def self.cross_database_context + Thread.current[:transaction_tracker] + end + + def self.reset_cross_database_context! + Thread.current[:transaction_tracker] = initial_data + end + + def self.initial_data + { + enabled: false, + transaction_depth_by_db: Hash.new { |h, k| h[k] = 0 }, + modified_tables_by_db: Hash.new { |h, k| h[k] = Set.new } + } + end + + # rubocop:disable Metrics/AbcSize + def self.prevent_cross_database_modification!(connection, sql, log_only: false) + return unless cross_database_context + return unless cross_database_context[:enabled] + + return if connection.pool.instance_of?(ActiveRecord::ConnectionAdapters::NullPool) + return if in_factory_bot_create? + + database = connection.pool.db_config.name + + # We ignore BEGIN in tests as this is the outer transaction for + # DatabaseCleaner + if sql.start_with?('SAVEPOINT') || (!Rails.env.test? && sql.start_with?('BEGIN')) + cross_database_context[:transaction_depth_by_db][database] += 1 + + return + elsif sql.start_with?('RELEASE SAVEPOINT', 'ROLLBACK TO SAVEPOINT') || (!Rails.env.test? && sql.start_with?('ROLLBACK', 'COMMIT')) + cross_database_context[:transaction_depth_by_db][database] -= 1 + if cross_database_context[:transaction_depth_by_db][database] <= 0 + cross_database_context[:modified_tables_by_db][database].clear + end + + return + end + + return if cross_database_context[:transaction_depth_by_db].values.all?(&:zero?) + + # PgQuery might fail in some cases due to limited nesting: + # https://github.com/pganalyze/pg_query/issues/209 + parsed_query = PgQuery.parse(sql) + tables = sql.downcase.include?(' for update') ? parsed_query.tables : parsed_query.dml_tables + + # We have some code where plans and gitlab_subscriptions are lazily + # created and this causes lots of spec failures + # https://gitlab.com/gitlab-org/gitlab/-/issues/343394 + tables -= %w[plans gitlab_subscriptions] + + return if tables.empty? + + # All migrations will write to schema_migrations in the same transaction. + # It's safe to ignore this since schema_migrations exists in all + # databases + return if tables == ['schema_migrations'] + + cross_database_context[:modified_tables_by_db][database].merge(tables) + all_tables = cross_database_context[:modified_tables_by_db].values.map(&:to_a).flatten + schemas = ::Gitlab::Database::GitlabSchema.table_schemas(all_tables) + + if schemas.many? + message = "Cross-database data modification of '#{schemas.to_a.join(", ")}' were detected within " \ + "a transaction modifying the '#{all_tables.to_a.join(", ")}' tables." \ + "Please refer to https://docs.gitlab.com/ee/development/database/multiple_databases.html#removing-cross-database-transactions for details on how to resolve this exception." + + if schemas.any? { |s| s.to_s.start_with?("undefined") } + message += " The gitlab_schema was undefined for one or more of the tables in this transaction. Any new tables must be added to lib/gitlab/database/gitlab_schemas.yml ." + end + + begin + raise Database::PreventCrossDatabaseModification::CrossDatabaseModificationAcrossUnsupportedTablesError, message + rescue Database::PreventCrossDatabaseModification::CrossDatabaseModificationAcrossUnsupportedTablesError => e + ::Gitlab::ErrorTracking.track_exception(e, { gitlab_schemas: schemas, tables: all_tables, query: PgQuery.normalize(sql) }) + raise unless log_only + end + end + rescue StandardError => e + # Extra safety net to ensure we never raise in production + # if something goes wrong in this logic + Gitlab::ErrorTracking.track_and_raise_for_dev_exception(e) + end + # rubocop:enable Metrics/AbcSize + + # We ignore execution in the #create method from FactoryBot + # because it is not representative of real code we run in + # production. There are far too many false positives caused + # by instantiating objects in different `gitlab_schema` in a + # FactoryBot `create`. + def self.in_factory_bot_create? + caller_locations.any? { |l| l.path.end_with?('lib/factory_bot/evaluation.rb') && l.label == 'create' } + end + end + end +end diff --git a/lib/gitlab/middleware/detect_cross_database_modification.rb b/lib/gitlab/middleware/detect_cross_database_modification.rb new file mode 100644 index 00000000000000..9a04afdf3c6f4f --- /dev/null +++ b/lib/gitlab/middleware/detect_cross_database_modification.rb @@ -0,0 +1,21 @@ +# frozen_string_literal: true + +module Gitlab + module Middleware + class DetectCrossDatabaseModification + def initialize(app) + @app = app + end + + def call(env) + if Feature.enabled?(:detect_cross_database_modification, default_enabled: :yaml) + ::Gitlab::Database::PreventCrossDatabaseModification.with_cross_database_modification_prevented(log_only: true) do + @app.call(env) + end + else + @app.call(env) + end + end + end + end +end diff --git a/lib/gitlab/sidekiq_middleware.rb b/lib/gitlab/sidekiq_middleware.rb index 69802fd6217e42..d72631113a30e8 100644 --- a/lib/gitlab/sidekiq_middleware.rb +++ b/lib/gitlab/sidekiq_middleware.rb @@ -41,6 +41,12 @@ def self.server_configurator(metrics: true, arguments_logger: true, memory_kille # so we can compare the latest WAL location against replica chain.add ::Gitlab::SidekiqMiddleware::DuplicateJobs::Server chain.add ::Gitlab::Database::LoadBalancing::SidekiqServerMiddleware + + # Do not use middleware in tests since we already wrap all tests with + # `PreventCrossDatabaseModification` logic . Also the environment + # variable offers a quick way to disable this check if it is causing + # problems + chain.add ::Gitlab::SidekiqMiddleware::DetectCrossDatabaseModification unless Rails.env.test? || ENV['DISABLE_CROSS_DATABASE_MODIFICATION_DETECTION'] end end diff --git a/lib/gitlab/sidekiq_middleware/detect_cross_database_modification.rb b/lib/gitlab/sidekiq_middleware/detect_cross_database_modification.rb new file mode 100644 index 00000000000000..a3a52eca35d923 --- /dev/null +++ b/lib/gitlab/sidekiq_middleware/detect_cross_database_modification.rb @@ -0,0 +1,17 @@ +# frozen_string_literal: true + +module Gitlab + module SidekiqMiddleware + class DetectCrossDatabaseModification + def call(worker, job, queue) + if Feature.enabled?(:detect_cross_database_modification, default_enabled: :yaml) + ::Gitlab::Database::PreventCrossDatabaseModification.with_cross_database_modification_prevented(log_only: true) do + yield + end + else + yield + end + end + end + end +end diff --git a/spec/support_specs/database/prevent_cross_database_modification_spec.rb b/spec/lib/gitlab/database/prevent_cross_database_modification_spec.rb similarity index 90% rename from spec/support_specs/database/prevent_cross_database_modification_spec.rb rename to spec/lib/gitlab/database/prevent_cross_database_modification_spec.rb index 365f1f32858df5..11d5e33d1a06bc 100644 --- a/spec/support_specs/database/prevent_cross_database_modification_spec.rb +++ b/spec/lib/gitlab/database/prevent_cross_database_modification_spec.rb @@ -2,7 +2,7 @@ require 'spec_helper' -RSpec.describe 'Database::PreventCrossDatabaseModification' do +RSpec.describe Gitlab::Database::PreventCrossDatabaseModification do let_it_be(:pipeline, refind: true) { create(:ci_pipeline) } let_it_be(:project, refind: true) { create(:project) } @@ -122,10 +122,10 @@ def run_queries include_examples 'successful examples' end - describe '#allow_cross_database_modification_within_transaction' do + describe '.allow_cross_database_modification_within_transaction' do it 'skips raising error' do expect do - Gitlab::Database.allow_cross_database_modification_within_transaction(url: 'gitlab-issue') do + ::Gitlab::Database::PreventCrossDatabaseModification.allow_cross_database_modification_within_transaction(url: 'gitlab-issue') do Project.transaction do pipeline.touch project.touch @@ -136,7 +136,7 @@ def run_queries it 'skips raising error on factory creation' do expect do - Gitlab::Database.allow_cross_database_modification_within_transaction(url: 'gitlab-issue') do + ::Gitlab::Database::PreventCrossDatabaseModification.allow_cross_database_modification_within_transaction(url: 'gitlab-issue') do ApplicationRecord.transaction do create(:ci_pipeline) end diff --git a/spec/lib/gitlab/middleware/detect_cross_database_modification_spec.rb b/spec/lib/gitlab/middleware/detect_cross_database_modification_spec.rb new file mode 100644 index 00000000000000..42ba9ed2a0cdc6 --- /dev/null +++ b/spec/lib/gitlab/middleware/detect_cross_database_modification_spec.rb @@ -0,0 +1,59 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::Middleware::DetectCrossDatabaseModification do + describe '#call' do + let(:app) { double(:app) } + let(:middleware) { described_class.new(app) } + let(:env) { {} } + + subject { middleware.call(env) } + + context 'when there is a cross modification' do + before do + allow(app).to receive(:call) do + Project.transaction do + Project.where(id: -1).update_all(id: -1) + ::Ci::Pipeline.where(id: -1).update_all(id: -1) + end + end + end + + it 'detects cross modifications and logs them' do + expect(::Gitlab::ErrorTracking).to receive(:track_exception) + + subject + end + + context 'when the detect_cross_database_modification is disabled' do + before do + stub_feature_flags(detect_cross_database_modification: false) + end + + it 'does not detect cross modifications' do + expect(::Gitlab::ErrorTracking).not_to receive(:track_exception) + + subject + end + end + end + + context 'when there is no cross modification' do + before do + allow(app).to receive(:call) do + Project.transaction do + Project.where(id: -1).update_all(id: -1) + Namespace.where(id: -1).update_all(id: -1) + end + end + end + + it 'does not log anything' do + expect(::Gitlab::ErrorTracking).not_to receive(:track_exception) + + subject + end + end + end +end diff --git a/spec/lib/gitlab/sidekiq_middleware/detect_cross_database_modification_spec.rb b/spec/lib/gitlab/sidekiq_middleware/detect_cross_database_modification_spec.rb new file mode 100644 index 00000000000000..eb20e6c8cd9dc6 --- /dev/null +++ b/spec/lib/gitlab/sidekiq_middleware/detect_cross_database_modification_spec.rb @@ -0,0 +1,59 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::SidekiqMiddleware::DetectCrossDatabaseModification do + describe '#call' do + let(:worker) { double(:worker) } + let(:job) { { 'jid' => 'job123' } } + let(:queue) { 'some-queue' } + let(:middleware) { described_class.new } + + def do_queries + end + + subject { middleware.call(worker, job, queue) { do_queries } } + + context 'when there is a cross modification' do + def do_queries + Project.transaction do + Project.where(id: -1).update_all(id: -1) + ::Ci::Pipeline.where(id: -1).update_all(id: -1) + end + end + + it 'detects cross modifications and logs them' do + expect(::Gitlab::ErrorTracking).to receive(:track_exception) + + subject + end + + context 'when the detect_cross_database_modification is disabled' do + before do + stub_feature_flags(detect_cross_database_modification: false) + end + + it 'does not detect cross modifications' do + expect(::Gitlab::ErrorTracking).not_to receive(:track_exception) + + subject + end + end + end + + context 'when there is no cross modification' do + def do_queries + Project.transaction do + Project.where(id: -1).update_all(id: -1) + Namespace.where(id: -1).update_all(id: -1) + end + end + + it 'does not log anything' do + expect(::Gitlab::ErrorTracking).not_to receive(:track_exception) + + subject + end + end + end +end diff --git a/spec/support/database/cross-database-modification-allowlist.yml b/spec/support/database/cross-database-modification-allowlist.yml index e0166f2ae3f7e5..cd5bd8029ae80d 100644 --- a/spec/support/database/cross-database-modification-allowlist.yml +++ b/spec/support/database/cross-database-modification-allowlist.yml @@ -50,6 +50,8 @@ - "./spec/lib/gitlab/email/handler/create_merge_request_handler_spec.rb" - "./spec/lib/gitlab/email/handler/create_note_handler_spec.rb" - "./spec/lib/gitlab/email/handler/create_note_on_issuable_handler_spec.rb" +- "./spec/lib/gitlab/middleware/detect_cross_database_modification_spec.rb" +- "./spec/lib/gitlab/sidekiq_middleware/detect_cross_database_modification_spec.rb" - "./spec/lib/peek/views/active_record_spec.rb" - "./spec/models/ci/build_need_spec.rb" - "./spec/models/ci/build_trace_chunk_spec.rb" diff --git a/spec/support/database/prevent_cross_database_modification.rb b/spec/support/database/prevent_cross_database_modification.rb index 09a87f92072751..33644913af4a25 100644 --- a/spec/support/database/prevent_cross_database_modification.rb +++ b/spec/support/database/prevent_cross_database_modification.rb @@ -1,138 +1,19 @@ # frozen_string_literal: true -module Database - module PreventCrossDatabaseModification - CrossDatabaseModificationAcrossUnsupportedTablesError = Class.new(StandardError) - - module GitlabDatabaseMixin - def allow_cross_database_modification_within_transaction(url:) - cross_database_context = Database::PreventCrossDatabaseModification.cross_database_context - return yield unless cross_database_context && cross_database_context[:enabled] - - transaction_tracker_enabled_was = cross_database_context[:enabled] - cross_database_context[:enabled] = false - - yield - ensure - cross_database_context[:enabled] = transaction_tracker_enabled_was if cross_database_context - end - end - - module SpecHelpers - def with_cross_database_modification_prevented - subscriber = ActiveSupport::Notifications.subscribe('sql.active_record') do |name, start, finish, id, payload| - PreventCrossDatabaseModification.prevent_cross_database_modification!(payload[:connection], payload[:sql]) - end - - PreventCrossDatabaseModification.reset_cross_database_context! - PreventCrossDatabaseModification.cross_database_context.merge!(enabled: true, subscriber: subscriber) - - yield if block_given? - ensure - cleanup_with_cross_database_modification_prevented if block_given? - end - - def cleanup_with_cross_database_modification_prevented - if PreventCrossDatabaseModification.cross_database_context - ActiveSupport::Notifications.unsubscribe(PreventCrossDatabaseModification.cross_database_context[:subscriber]) - PreventCrossDatabaseModification.cross_database_context[:enabled] = false - end - end - end - - def self.cross_database_context - Thread.current[:transaction_tracker] - end - - def self.reset_cross_database_context! - Thread.current[:transaction_tracker] = initial_data - end - - def self.initial_data - { - enabled: false, - transaction_depth_by_db: Hash.new { |h, k| h[k] = 0 }, - modified_tables_by_db: Hash.new { |h, k| h[k] = Set.new } - } - end - - def self.prevent_cross_database_modification!(connection, sql) - return unless cross_database_context - return unless cross_database_context[:enabled] - - return if connection.pool.instance_of?(ActiveRecord::ConnectionAdapters::NullPool) - return if in_factory_bot_create? - - database = connection.pool.db_config.name - - if sql.start_with?('SAVEPOINT') - cross_database_context[:transaction_depth_by_db][database] += 1 - - return - elsif sql.start_with?('RELEASE SAVEPOINT', 'ROLLBACK TO SAVEPOINT') - cross_database_context[:transaction_depth_by_db][database] -= 1 - if cross_database_context[:transaction_depth_by_db][database] <= 0 - cross_database_context[:modified_tables_by_db][database].clear - end - - return - end - - return if cross_database_context[:transaction_depth_by_db].values.all?(&:zero?) - - # PgQuery might fail in some cases due to limited nesting: - # https://github.com/pganalyze/pg_query/issues/209 - parsed_query = PgQuery.parse(sql) - tables = sql.downcase.include?(' for update') ? parsed_query.tables : parsed_query.dml_tables - - # We have some code where plans and gitlab_subscriptions are lazily - # created and this causes lots of spec failures - # https://gitlab.com/gitlab-org/gitlab/-/issues/343394 - tables -= %w[plans gitlab_subscriptions] - - return if tables.empty? - - # All migrations will write to schema_migrations in the same transaction. - # It's safe to ignore this since schema_migrations exists in all - # databases - return if tables == ['schema_migrations'] - - cross_database_context[:modified_tables_by_db][database].merge(tables) - - all_tables = cross_database_context[:modified_tables_by_db].values.map(&:to_a).flatten - schemas = ::Gitlab::Database::GitlabSchema.table_schemas(all_tables) - - if schemas.many? - message = "Cross-database data modification of '#{schemas.to_a.join(", ")}' were detected within " \ - "a transaction modifying the '#{all_tables.to_a.join(", ")}' tables." \ - "Please refer to https://docs.gitlab.com/ee/development/database/multiple_databases.html#removing-cross-database-transactions for details on how to resolve this exception." - - if schemas.any? { |s| s.to_s.start_with?("undefined") } - message += " The gitlab_schema was undefined for one or more of the tables in this transaction. Any new tables must be added to spec/support/database/gitlab_schemas.yml ." - end - - raise Database::PreventCrossDatabaseModification::CrossDatabaseModificationAcrossUnsupportedTablesError, message - end - end +module PreventCrossDatabaseModificationSpecHelpers + def with_cross_database_modification_prevented(...) + ::Gitlab::Database::PreventCrossDatabaseModification.with_cross_database_modification_prevented(...) + end - # We ignore execution in the #create method from FactoryBot - # because it is not representative of real code we run in - # production. There are far too many false positives caused - # by instantiating objects in different `gitlab_schema` in a - # FactoryBot `create`. - def self.in_factory_bot_create? - caller_locations.any? { |l| l.path.end_with?('lib/factory_bot/evaluation.rb') && l.label == 'create' } - end + def cleanup_with_cross_database_modification_prevented + ::Gitlab::Database::PreventCrossDatabaseModification.cleanup_with_cross_database_modification_prevented end end -Gitlab::Database.singleton_class.prepend( - Database::PreventCrossDatabaseModification::GitlabDatabaseMixin) - CROSS_DB_MODIFICATION_ALLOW_LIST = Set.new(YAML.load_file(File.join(__dir__, 'cross-database-modification-allowlist.yml'))).freeze RSpec.configure do |config| - config.include(::Database::PreventCrossDatabaseModification::SpecHelpers) + config.include(PreventCrossDatabaseModificationSpecHelpers) # Using before and after blocks because the around block causes problems with the let_it_be # record creations. It makes an extra savepoint which breaks the transaction count logic. -- GitLab From 74b92e453354734fe34fd0a7ed7dbdd4be77dde8 Mon Sep 17 00:00:00 2001 From: Dylan Griffith Date: Tue, 9 Nov 2021 06:40:28 +0000 Subject: [PATCH 2/3] Refactor prevent_cross_database_modification to be analyzer --- .../initializers/database_query_analyzers.rb | 1 + .../prevent_cross_database_modification.rb | 35 ++++++++++--------- 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/config/initializers/database_query_analyzers.rb b/config/initializers/database_query_analyzers.rb index a2db2d2a2b8c20..259a3a4f92d7d6 100644 --- a/config/initializers/database_query_analyzers.rb +++ b/config/initializers/database_query_analyzers.rb @@ -4,6 +4,7 @@ if Gitlab.dev_or_test_env? || Gitlab::Utils.to_boolean(ENV['GITLAB_ENABLE_QUERY_ANALYZERS'], default: false) Gitlab::Database::QueryAnalyzer.instance.hook! Gitlab::Database::QueryAnalyzer.instance.all_analyzers.append(::Gitlab::Database::QueryAnalyzers::GitlabSchemasMetrics) + Gitlab::Database::QueryAnalyzer.instance.all_analyzers.append(::Gitlab::Database::PreventCrossDatabaseModification) Gitlab::Application.configure do |config| config.middleware.use(Gitlab::Middleware::QueryAnalyzer) diff --git a/lib/gitlab/database/prevent_cross_database_modification.rb b/lib/gitlab/database/prevent_cross_database_modification.rb index d681a86b3a0f2c..cb56c13a886344 100644 --- a/lib/gitlab/database/prevent_cross_database_modification.rb +++ b/lib/gitlab/database/prevent_cross_database_modification.rb @@ -2,7 +2,7 @@ module Gitlab module Database - module PreventCrossDatabaseModification + class PreventCrossDatabaseModification < Database::QueryAnalyzer::Base CrossDatabaseModificationAcrossUnsupportedTablesError = Class.new(StandardError) # This method will allow cross database modifications within the block @@ -24,12 +24,8 @@ def self.allow_cross_database_modification_within_transaction(url:) end def self.with_cross_database_modification_prevented(log_only: false) - subscriber = ActiveSupport::Notifications.subscribe('sql.active_record') do |name, start, finish, id, payload| - prevent_cross_database_modification!(payload[:connection], payload[:sql], log_only: log_only) - end - reset_cross_database_context! - cross_database_context.merge!(enabled: true, subscriber: subscriber) + cross_database_context.merge!(enabled: true, log_only: log_only) yield if block_given? ensure @@ -38,7 +34,6 @@ def self.with_cross_database_modification_prevented(log_only: false) def self.cleanup_with_cross_database_modification_prevented if cross_database_context - ActiveSupport::Notifications.unsubscribe(PreventCrossDatabaseModification.cross_database_context[:subscriber]) cross_database_context[:enabled] = false end end @@ -55,20 +50,29 @@ def self.initial_data { enabled: false, transaction_depth_by_db: Hash.new { |h, k| h[k] = 0 }, - modified_tables_by_db: Hash.new { |h, k| h[k] = Set.new } + modified_tables_by_db: Hash.new { |h, k| h[k] = Set.new }, + log_only: false } end + def self.enabled? + true + end + # rubocop:disable Metrics/AbcSize - def self.prevent_cross_database_modification!(connection, sql, log_only: false) - return unless cross_database_context - return unless cross_database_context[:enabled] + def self.analyze(parsed) + return false unless cross_database_context + return false unless cross_database_context[:enabled] + + connection = parsed.connection + return false if connection.pool.instance_of?(ActiveRecord::ConnectionAdapters::NullPool) - return if connection.pool.instance_of?(ActiveRecord::ConnectionAdapters::NullPool) return if in_factory_bot_create? database = connection.pool.db_config.name + sql = parsed.sql + # We ignore BEGIN in tests as this is the outer transaction for # DatabaseCleaner if sql.start_with?('SAVEPOINT') || (!Rails.env.test? && sql.start_with?('BEGIN')) @@ -88,8 +92,7 @@ def self.prevent_cross_database_modification!(connection, sql, log_only: false) # PgQuery might fail in some cases due to limited nesting: # https://github.com/pganalyze/pg_query/issues/209 - parsed_query = PgQuery.parse(sql) - tables = sql.downcase.include?(' for update') ? parsed_query.tables : parsed_query.dml_tables + tables = sql.downcase.include?(' for update') ? parsed.pg.tables : parsed.pg.dml_tables # We have some code where plans and gitlab_subscriptions are lazily # created and this causes lots of spec failures @@ -120,7 +123,7 @@ def self.prevent_cross_database_modification!(connection, sql, log_only: false) raise Database::PreventCrossDatabaseModification::CrossDatabaseModificationAcrossUnsupportedTablesError, message rescue Database::PreventCrossDatabaseModification::CrossDatabaseModificationAcrossUnsupportedTablesError => e ::Gitlab::ErrorTracking.track_exception(e, { gitlab_schemas: schemas, tables: all_tables, query: PgQuery.normalize(sql) }) - raise unless log_only + raise unless cross_database_context[:log_only] end end rescue StandardError => e @@ -136,7 +139,7 @@ def self.prevent_cross_database_modification!(connection, sql, log_only: false) # by instantiating objects in different `gitlab_schema` in a # FactoryBot `create`. def self.in_factory_bot_create? - caller_locations.any? { |l| l.path.end_with?('lib/factory_bot/evaluation.rb') && l.label == 'create' } + Rails.env.test? && caller_locations.any? { |l| l.path.end_with?('lib/factory_bot/evaluation.rb') && l.label == 'create' } end end end -- GitLab From 139e5e1f1e875d7583c864c284f06ae95531126e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kamil=20Trzci=C5=84ski?= Date: Wed, 10 Nov 2021 10:54:36 +0100 Subject: [PATCH 3/3] Move `PreventCrossDatabaseModification` into `QueryAnalyzers` This adapts the `PreventCrossDatabaseModification` to latest `QueryAnalyzers` structure. It still does not fix all structural optimisations that can be done. --- .../initializers/database_query_analyzers.rb | 2 +- .../prevent_cross_database_modification.rb | 146 ----------------- .../prevent_cross_database_modification.rb | 148 ++++++++++++++++++ .../detect_cross_database_modification.rb | 2 +- .../detect_cross_database_modification.rb | 2 +- ...revent_cross_database_modification_spec.rb | 6 +- .../prevent_cross_database_modification.rb | 4 +- 7 files changed, 156 insertions(+), 154 deletions(-) delete mode 100644 lib/gitlab/database/prevent_cross_database_modification.rb create mode 100644 lib/gitlab/database/query_analyzers/prevent_cross_database_modification.rb rename spec/lib/gitlab/database/{ => query_analyzers}/prevent_cross_database_modification_spec.rb (92%) diff --git a/config/initializers/database_query_analyzers.rb b/config/initializers/database_query_analyzers.rb index 259a3a4f92d7d6..2f1854e77227d8 100644 --- a/config/initializers/database_query_analyzers.rb +++ b/config/initializers/database_query_analyzers.rb @@ -4,7 +4,7 @@ if Gitlab.dev_or_test_env? || Gitlab::Utils.to_boolean(ENV['GITLAB_ENABLE_QUERY_ANALYZERS'], default: false) Gitlab::Database::QueryAnalyzer.instance.hook! Gitlab::Database::QueryAnalyzer.instance.all_analyzers.append(::Gitlab::Database::QueryAnalyzers::GitlabSchemasMetrics) - Gitlab::Database::QueryAnalyzer.instance.all_analyzers.append(::Gitlab::Database::PreventCrossDatabaseModification) + Gitlab::Database::QueryAnalyzer.instance.all_analyzers.append(::Gitlab::Database::QueryAnalyzers::PreventCrossDatabaseModification) Gitlab::Application.configure do |config| config.middleware.use(Gitlab::Middleware::QueryAnalyzer) diff --git a/lib/gitlab/database/prevent_cross_database_modification.rb b/lib/gitlab/database/prevent_cross_database_modification.rb deleted file mode 100644 index cb56c13a886344..00000000000000 --- a/lib/gitlab/database/prevent_cross_database_modification.rb +++ /dev/null @@ -1,146 +0,0 @@ -# frozen_string_literal: true - -module Gitlab - module Database - class PreventCrossDatabaseModification < Database::QueryAnalyzer::Base - CrossDatabaseModificationAcrossUnsupportedTablesError = Class.new(StandardError) - - # This method will allow cross database modifications within the block - # Example: - # - # allow_cross_database_modification_within_transaction(url: 'url-to-an-issue') do - # create(:build) # inserts ci_build and project record in one transaction - # end - def self.allow_cross_database_modification_within_transaction(url:) - cross_database_context = Database::PreventCrossDatabaseModification.cross_database_context - return yield unless cross_database_context && cross_database_context[:enabled] - - transaction_tracker_enabled_was = cross_database_context[:enabled] - cross_database_context[:enabled] = false - - yield - ensure - cross_database_context[:enabled] = transaction_tracker_enabled_was if cross_database_context - end - - def self.with_cross_database_modification_prevented(log_only: false) - reset_cross_database_context! - cross_database_context.merge!(enabled: true, log_only: log_only) - - yield if block_given? - ensure - cleanup_with_cross_database_modification_prevented if block_given? - end - - def self.cleanup_with_cross_database_modification_prevented - if cross_database_context - cross_database_context[:enabled] = false - end - end - - def self.cross_database_context - Thread.current[:transaction_tracker] - end - - def self.reset_cross_database_context! - Thread.current[:transaction_tracker] = initial_data - end - - def self.initial_data - { - enabled: false, - transaction_depth_by_db: Hash.new { |h, k| h[k] = 0 }, - modified_tables_by_db: Hash.new { |h, k| h[k] = Set.new }, - log_only: false - } - end - - def self.enabled? - true - end - - # rubocop:disable Metrics/AbcSize - def self.analyze(parsed) - return false unless cross_database_context - return false unless cross_database_context[:enabled] - - connection = parsed.connection - return false if connection.pool.instance_of?(ActiveRecord::ConnectionAdapters::NullPool) - - return if in_factory_bot_create? - - database = connection.pool.db_config.name - - sql = parsed.sql - - # We ignore BEGIN in tests as this is the outer transaction for - # DatabaseCleaner - if sql.start_with?('SAVEPOINT') || (!Rails.env.test? && sql.start_with?('BEGIN')) - cross_database_context[:transaction_depth_by_db][database] += 1 - - return - elsif sql.start_with?('RELEASE SAVEPOINT', 'ROLLBACK TO SAVEPOINT') || (!Rails.env.test? && sql.start_with?('ROLLBACK', 'COMMIT')) - cross_database_context[:transaction_depth_by_db][database] -= 1 - if cross_database_context[:transaction_depth_by_db][database] <= 0 - cross_database_context[:modified_tables_by_db][database].clear - end - - return - end - - return if cross_database_context[:transaction_depth_by_db].values.all?(&:zero?) - - # PgQuery might fail in some cases due to limited nesting: - # https://github.com/pganalyze/pg_query/issues/209 - tables = sql.downcase.include?(' for update') ? parsed.pg.tables : parsed.pg.dml_tables - - # We have some code where plans and gitlab_subscriptions are lazily - # created and this causes lots of spec failures - # https://gitlab.com/gitlab-org/gitlab/-/issues/343394 - tables -= %w[plans gitlab_subscriptions] - - return if tables.empty? - - # All migrations will write to schema_migrations in the same transaction. - # It's safe to ignore this since schema_migrations exists in all - # databases - return if tables == ['schema_migrations'] - - cross_database_context[:modified_tables_by_db][database].merge(tables) - all_tables = cross_database_context[:modified_tables_by_db].values.map(&:to_a).flatten - schemas = ::Gitlab::Database::GitlabSchema.table_schemas(all_tables) - - if schemas.many? - message = "Cross-database data modification of '#{schemas.to_a.join(", ")}' were detected within " \ - "a transaction modifying the '#{all_tables.to_a.join(", ")}' tables." \ - "Please refer to https://docs.gitlab.com/ee/development/database/multiple_databases.html#removing-cross-database-transactions for details on how to resolve this exception." - - if schemas.any? { |s| s.to_s.start_with?("undefined") } - message += " The gitlab_schema was undefined for one or more of the tables in this transaction. Any new tables must be added to lib/gitlab/database/gitlab_schemas.yml ." - end - - begin - raise Database::PreventCrossDatabaseModification::CrossDatabaseModificationAcrossUnsupportedTablesError, message - rescue Database::PreventCrossDatabaseModification::CrossDatabaseModificationAcrossUnsupportedTablesError => e - ::Gitlab::ErrorTracking.track_exception(e, { gitlab_schemas: schemas, tables: all_tables, query: PgQuery.normalize(sql) }) - raise unless cross_database_context[:log_only] - end - end - rescue StandardError => e - # Extra safety net to ensure we never raise in production - # if something goes wrong in this logic - Gitlab::ErrorTracking.track_and_raise_for_dev_exception(e) - end - # rubocop:enable Metrics/AbcSize - - # We ignore execution in the #create method from FactoryBot - # because it is not representative of real code we run in - # production. There are far too many false positives caused - # by instantiating objects in different `gitlab_schema` in a - # FactoryBot `create`. - def self.in_factory_bot_create? - Rails.env.test? && caller_locations.any? { |l| l.path.end_with?('lib/factory_bot/evaluation.rb') && l.label == 'create' } - end - end - end -end diff --git a/lib/gitlab/database/query_analyzers/prevent_cross_database_modification.rb b/lib/gitlab/database/query_analyzers/prevent_cross_database_modification.rb new file mode 100644 index 00000000000000..62c5e913a17c77 --- /dev/null +++ b/lib/gitlab/database/query_analyzers/prevent_cross_database_modification.rb @@ -0,0 +1,148 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module QueryAnalyzers + class PreventCrossDatabaseModification < Database::QueryAnalyzers::Base + CrossDatabaseModificationAcrossUnsupportedTablesError = Class.new(StandardError) + + # This method will allow cross database modifications within the block + # Example: + # + # allow_cross_database_modification_within_transaction(url: 'url-to-an-issue') do + # create(:build) # inserts ci_build and project record in one transaction + # end + def self.allow_cross_database_modification_within_transaction(url:) + cross_database_context = self.cross_database_context + return yield unless cross_database_context && cross_database_context[:enabled] + + transaction_tracker_enabled_was = cross_database_context[:enabled] + cross_database_context[:enabled] = false + + yield + ensure + cross_database_context[:enabled] = transaction_tracker_enabled_was if cross_database_context + end + + def self.with_cross_database_modification_prevented(log_only: false) + reset_cross_database_context! + cross_database_context.merge!(enabled: true, log_only: log_only) + + yield if block_given? + ensure + cleanup_with_cross_database_modification_prevented if block_given? + end + + def self.cleanup_with_cross_database_modification_prevented + if cross_database_context + cross_database_context[:enabled] = false + end + end + + def self.cross_database_context + Thread.current[:transaction_tracker] + end + + def self.reset_cross_database_context! + Thread.current[:transaction_tracker] = initial_data + end + + def self.initial_data + { + enabled: false, + transaction_depth_by_db: Hash.new { |h, k| h[k] = 0 }, + modified_tables_by_db: Hash.new { |h, k| h[k] = Set.new }, + log_only: false + } + end + + def self.enabled? + true + end + + # rubocop:disable Metrics/AbcSize + def self.analyze(parsed) + return false unless cross_database_context + return false unless cross_database_context[:enabled] + + connection = parsed.connection + return false if connection.pool.instance_of?(ActiveRecord::ConnectionAdapters::NullPool) + + return if in_factory_bot_create? + + database = connection.pool.db_config.name + + sql = parsed.sql + + # We ignore BEGIN in tests as this is the outer transaction for + # DatabaseCleaner + if sql.start_with?('SAVEPOINT') || (!Rails.env.test? && sql.start_with?('BEGIN')) + cross_database_context[:transaction_depth_by_db][database] += 1 + + return + elsif sql.start_with?('RELEASE SAVEPOINT', 'ROLLBACK TO SAVEPOINT') || (!Rails.env.test? && sql.start_with?('ROLLBACK', 'COMMIT')) + cross_database_context[:transaction_depth_by_db][database] -= 1 + if cross_database_context[:transaction_depth_by_db][database] <= 0 + cross_database_context[:modified_tables_by_db][database].clear + end + + return + end + + return if cross_database_context[:transaction_depth_by_db].values.all?(&:zero?) + + # PgQuery might fail in some cases due to limited nesting: + # https://github.com/pganalyze/pg_query/issues/209 + tables = sql.downcase.include?(' for update') ? parsed.pg.tables : parsed.pg.dml_tables + + # We have some code where plans and gitlab_subscriptions are lazily + # created and this causes lots of spec failures + # https://gitlab.com/gitlab-org/gitlab/-/issues/343394 + tables -= %w[plans gitlab_subscriptions] + + return if tables.empty? + + # All migrations will write to schema_migrations in the same transaction. + # It's safe to ignore this since schema_migrations exists in all + # databases + return if tables == ['schema_migrations'] + + cross_database_context[:modified_tables_by_db][database].merge(tables) + all_tables = cross_database_context[:modified_tables_by_db].values.map(&:to_a).flatten + schemas = ::Gitlab::Database::GitlabSchema.table_schemas(all_tables) + + if schemas.many? + message = "Cross-database data modification of '#{schemas.to_a.join(", ")}' were detected within " \ + "a transaction modifying the '#{all_tables.to_a.join(", ")}' tables." \ + "Please refer to https://docs.gitlab.com/ee/development/database/multiple_databases.html#removing-cross-database-transactions for details on how to resolve this exception." + + if schemas.any? { |s| s.to_s.start_with?("undefined") } + message += " The gitlab_schema was undefined for one or more of the tables in this transaction. Any new tables must be added to lib/gitlab/database/gitlab_schemas.yml ." + end + + begin + raise CrossDatabaseModificationAcrossUnsupportedTablesError, message + rescue CrossDatabaseModificationAcrossUnsupportedTablesError => e + ::Gitlab::ErrorTracking.track_exception(e, { gitlab_schemas: schemas, tables: all_tables, query: parsed.sql }) + raise unless cross_database_context[:log_only] + end + end + rescue StandardError => e + # Extra safety net to ensure we never raise in production + # if something goes wrong in this logic + Gitlab::ErrorTracking.track_and_raise_for_dev_exception(e) + end + # rubocop:enable Metrics/AbcSize + + # We ignore execution in the #create method from FactoryBot + # because it is not representative of real code we run in + # production. There are far too many false positives caused + # by instantiating objects in different `gitlab_schema` in a + # FactoryBot `create`. + def self.in_factory_bot_create? + Rails.env.test? && caller_locations.any? { |l| l.path.end_with?('lib/factory_bot/evaluation.rb') && l.label == 'create' } + end + end + end + end +end diff --git a/lib/gitlab/middleware/detect_cross_database_modification.rb b/lib/gitlab/middleware/detect_cross_database_modification.rb index 9a04afdf3c6f4f..c577f188e2e3b3 100644 --- a/lib/gitlab/middleware/detect_cross_database_modification.rb +++ b/lib/gitlab/middleware/detect_cross_database_modification.rb @@ -9,7 +9,7 @@ def initialize(app) def call(env) if Feature.enabled?(:detect_cross_database_modification, default_enabled: :yaml) - ::Gitlab::Database::PreventCrossDatabaseModification.with_cross_database_modification_prevented(log_only: true) do + ::Gitlab::Database::QueryAnalyzers::PreventCrossDatabaseModification.with_cross_database_modification_prevented(log_only: true) do @app.call(env) end else diff --git a/lib/gitlab/sidekiq_middleware/detect_cross_database_modification.rb b/lib/gitlab/sidekiq_middleware/detect_cross_database_modification.rb index a3a52eca35d923..4fb6c5053541ea 100644 --- a/lib/gitlab/sidekiq_middleware/detect_cross_database_modification.rb +++ b/lib/gitlab/sidekiq_middleware/detect_cross_database_modification.rb @@ -5,7 +5,7 @@ module SidekiqMiddleware class DetectCrossDatabaseModification def call(worker, job, queue) if Feature.enabled?(:detect_cross_database_modification, default_enabled: :yaml) - ::Gitlab::Database::PreventCrossDatabaseModification.with_cross_database_modification_prevented(log_only: true) do + ::Gitlab::Database::QueryAnalyzers::PreventCrossDatabaseModification.with_cross_database_modification_prevented(log_only: true) do yield end else diff --git a/spec/lib/gitlab/database/prevent_cross_database_modification_spec.rb b/spec/lib/gitlab/database/query_analyzers/prevent_cross_database_modification_spec.rb similarity index 92% rename from spec/lib/gitlab/database/prevent_cross_database_modification_spec.rb rename to spec/lib/gitlab/database/query_analyzers/prevent_cross_database_modification_spec.rb index 11d5e33d1a06bc..bb8451b64c342e 100644 --- a/spec/lib/gitlab/database/prevent_cross_database_modification_spec.rb +++ b/spec/lib/gitlab/database/query_analyzers/prevent_cross_database_modification_spec.rb @@ -2,7 +2,7 @@ require 'spec_helper' -RSpec.describe Gitlab::Database::PreventCrossDatabaseModification do +RSpec.describe Gitlab::Database::QueryAnalyzers::PreventCrossDatabaseModification do let_it_be(:pipeline, refind: true) { create(:ci_pipeline) } let_it_be(:project, refind: true) { create(:project) } @@ -125,7 +125,7 @@ def run_queries describe '.allow_cross_database_modification_within_transaction' do it 'skips raising error' do expect do - ::Gitlab::Database::PreventCrossDatabaseModification.allow_cross_database_modification_within_transaction(url: 'gitlab-issue') do + described_class.allow_cross_database_modification_within_transaction(url: 'gitlab-issue') do Project.transaction do pipeline.touch project.touch @@ -136,7 +136,7 @@ def run_queries it 'skips raising error on factory creation' do expect do - ::Gitlab::Database::PreventCrossDatabaseModification.allow_cross_database_modification_within_transaction(url: 'gitlab-issue') do + described_class.allow_cross_database_modification_within_transaction(url: 'gitlab-issue') do ApplicationRecord.transaction do create(:ci_pipeline) end diff --git a/spec/support/database/prevent_cross_database_modification.rb b/spec/support/database/prevent_cross_database_modification.rb index 33644913af4a25..b31c05b4823771 100644 --- a/spec/support/database/prevent_cross_database_modification.rb +++ b/spec/support/database/prevent_cross_database_modification.rb @@ -2,11 +2,11 @@ module PreventCrossDatabaseModificationSpecHelpers def with_cross_database_modification_prevented(...) - ::Gitlab::Database::PreventCrossDatabaseModification.with_cross_database_modification_prevented(...) + ::Gitlab::Database::QueryAnalyzers::PreventCrossDatabaseModification.with_cross_database_modification_prevented(...) end def cleanup_with_cross_database_modification_prevented - ::Gitlab::Database::PreventCrossDatabaseModification.cleanup_with_cross_database_modification_prevented + ::Gitlab::Database::QueryAnalyzers::PreventCrossDatabaseModification.cleanup_with_cross_database_modification_prevented end end -- GitLab