From d09f8d12d401723d3d14ede8e99a741874ea4787 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kamil=20Trzci=C5=84ski?= Date: Tue, 9 Nov 2021 12:35:59 +0100 Subject: [PATCH 1/5] Ensure that QueryAnalyzer handles recursive calls It is possible that Query Analyzer to unintentionally trigger SQL query. To prevent Stack Too Deep discover recursive calls and prevent them. --- lib/gitlab/database/query_analyzer.rb | 17 ++++++++++++++++- spec/lib/gitlab/database/query_analyzer_spec.rb | 9 +++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/lib/gitlab/database/query_analyzer.rb b/lib/gitlab/database/query_analyzer.rb index 6cbeefa0aae596..5fe9d005a86744 100644 --- a/lib/gitlab/database/query_analyzer.rb +++ b/lib/gitlab/database/query_analyzer.rb @@ -15,7 +15,11 @@ class QueryAnalyzer def hook! @subscriber = ActiveSupport::Notifications.subscribe('sql.active_record') do |event| - process_sql(event.payload[:sql], event.payload[:connection]) + # In some cases analyzer code might trigger another SQL call + # to avoid stack too deep this detects recursive call of subscriber + with_ignored_recursive_calls do + process_sql(event.payload[:sql], event.payload[:connection]) + end end end @@ -57,6 +61,17 @@ def parse(sql, connection) nil end + + def with_ignored_recursive_calls + return if Thread.current[:query_analyzer_recursive] + + begin + Thread.current[:query_analyzer_recursive] = true + yield + ensure + Thread.current[:query_analyzer_recursive] = nil + end + end end end end diff --git a/spec/lib/gitlab/database/query_analyzer_spec.rb b/spec/lib/gitlab/database/query_analyzer_spec.rb index f0a3021927ba7b..2d9906bd3b0dd0 100644 --- a/spec/lib/gitlab/database/query_analyzer_spec.rb +++ b/spec/lib/gitlab/database/query_analyzer_spec.rb @@ -19,6 +19,15 @@ Project.connection.execute("SELECT 1 FROM projects") end + + it 'does prevent recursive execution' do + expect(analyzer).to receive(:enabled?).and_return(true) + expect(analyzer).to receive(:analyze) do + Project.connection.execute("SELECT 1 FROM projects") + end + + Project.connection.execute("SELECT 1 FROM projects") + end end describe '#process_sql' do -- GitLab From 99cc0e4058101e11bcfd78072daf7543145df817 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kamil=20Trzci=C5=84ski?= Date: Tue, 9 Nov 2021 12:30:53 +0100 Subject: [PATCH 2/5] Make QueryAnalyzers to hold a state and hook to middleware One of the problems of QueryAnalyzers is that their state is evaluated each time. This is problematic for various reasons as we need to re-evaluate feature flags, or other dynamic conditions. This makes QueryAnalyzers behavior sticky. The enabled flag is checked exactly once for each request/worker/spec. If it resolves to true, the query analyzer will be enabled for the whole execution of a job. This makes also QueryAnalyzer to hold a context that is set/reset on boundaries of job execution. --- .../initializers/database_query_analyzers.rb | 5 ++ lib/gitlab/database/query_analyzer.rb | 62 ++++++++++++++---- lib/gitlab/database/query_analyzers/base.rb | 14 ++++- lib/gitlab/middleware/query_analyzer.rb | 15 +++++ lib/gitlab/sidekiq_middleware.rb | 1 + .../sidekiq_middleware/query_analyzer.rb | 11 ++++ .../gitlab/database/query_analyzer_spec.rb | 63 +++++++++++++++++-- spec/support/database/query_analyzer.rb | 14 +++++ 8 files changed, 168 insertions(+), 17 deletions(-) create mode 100644 lib/gitlab/middleware/query_analyzer.rb create mode 100644 lib/gitlab/sidekiq_middleware/query_analyzer.rb create mode 100644 spec/support/database/query_analyzer.rb diff --git a/config/initializers/database_query_analyzers.rb b/config/initializers/database_query_analyzers.rb index b2ddb6bccd1bb1..98b40e62a495c8 100644 --- a/config/initializers/database_query_analyzers.rb +++ b/config/initializers/database_query_analyzers.rb @@ -3,4 +3,9 @@ # Currently we register validator only for `dev` or `test` environment if Gitlab.dev_or_test_env? || Gitlab::Utils.to_boolean('GITLAB_ENABLE_QUERY_ANALYZERS', default: false) Gitlab::Database::QueryAnalyzer.instance.hook! + + Gitlab::Application.configure do |config| + # ApolloUploadServer::Middleware expects to find uploaded files ready to use + config.middleware.use(Gitlab::Middleware::QueryAnalyzer) + end end diff --git a/lib/gitlab/database/query_analyzer.rb b/lib/gitlab/database/query_analyzer.rb index 5fe9d005a86744..12a78acca087f5 100644 --- a/lib/gitlab/database/query_analyzer.rb +++ b/lib/gitlab/database/query_analyzer.rb @@ -4,15 +4,22 @@ module Gitlab module Database # The purpose of this class is to implement a various query analyzers based on `pg_query` # And process them all via `Gitlab::Database::QueryAnalyzers::*` + # + # Sometimes this might cause errors in specs. + # This is best to be disable with `describe '...', query_analyzers: false do` class QueryAnalyzer include ::Singleton - ANALYZERS = [].freeze - Parsed = Struct.new( :sql, :connection, :pg ) + attr_reader :all_analyzers + + def initialize + @all_analyzers = [] + end + def hook! @subscriber = ActiveSupport::Notifications.subscribe('sql.active_record') do |event| # In some cases analyzer code might trigger another SQL call @@ -23,30 +30,63 @@ def hook! end end - private + def within + return yield if enabled_analyzers + + begin! + + begin + yield + ensure + end! + end + end def process_sql(sql, connection) - analyzers = enabled_analyzers(connection) - return unless analyzers.any? + analyzers = enabled_analyzers + return unless analyzers&.any? parsed = parse(sql, connection) return unless parsed analyzers.each do |analyzer| analyzer.analyze(parsed) - rescue => e # rubocop:disable Style/RescueStandardError + rescue StandardError => e # We catch all standard errors to prevent validation errors to introduce fatal errors in production Gitlab::ErrorTracking.track_and_raise_for_dev_exception(e) end end - def enabled_analyzers(connection) - ANALYZERS.select do |analyzer| - analyzer.enabled?(connection) - rescue StandardError => e # rubocop:disable Style/RescueStandardError - # We catch all standard errors to prevent validation errors to introduce fatal errors in production + private + + # Enable query analyzers + def begin! + analyzers = all_analyzers.select do |analyzer| + if analyzer.enabled? + analyzer.begin! + + true + end + rescue StandardError => e + Gitlab::ErrorTracking.track_and_raise_for_dev_exception(e) + end + + Thread.current[:query_analyzer_enabled_analyzers] = analyzers + end + + # Disable enabled query analyzers + def end! + enabled_analyzers.select do |analyzer| + analyzer.end! + rescue StandardError => e Gitlab::ErrorTracking.track_and_raise_for_dev_exception(e) end + + Thread.current[:query_analyzer_enabled_analyzers] = nil + end + + def enabled_analyzers + Thread.current[:query_analyzer_enabled_analyzers] end def parse(sql, connection) diff --git a/lib/gitlab/database/query_analyzers/base.rb b/lib/gitlab/database/query_analyzers/base.rb index e1be889c803ea6..d3a20523e74174 100644 --- a/lib/gitlab/database/query_analyzers/base.rb +++ b/lib/gitlab/database/query_analyzers/base.rb @@ -4,7 +4,19 @@ module Gitlab module Database module QueryAnalyzers class Base - def self.enabled?(connection) + def self.begin! + Thread.current[self.class.name] = {} + end + + def self.end! + Thread.current[self.class.name] = nil + end + + def self.context + Thread.current[self.class.name] + end + + def self.enabled? raise NotImplementedError end diff --git a/lib/gitlab/middleware/query_analyzer.rb b/lib/gitlab/middleware/query_analyzer.rb new file mode 100644 index 00000000000000..8d63c644a6985c --- /dev/null +++ b/lib/gitlab/middleware/query_analyzer.rb @@ -0,0 +1,15 @@ +# frozen_string_literal: true + +module Gitlab + module Middleware + class QueryAnalyzer + def initialize(app) + @app = app + end + + def call(env) + ::Gitlab::Database::QueryAnalyzer.instance.within { @app.call(env) } + end + end + end +end diff --git a/lib/gitlab/sidekiq_middleware.rb b/lib/gitlab/sidekiq_middleware.rb index c97b1632bf8073..fd3a5f715e8a5e 100644 --- a/lib/gitlab/sidekiq_middleware.rb +++ b/lib/gitlab/sidekiq_middleware.rb @@ -33,6 +33,7 @@ def self.server_configurator(metrics: true, arguments_logger: true, memory_kille chain.add ::Gitlab::SidekiqMiddleware::BatchLoader chain.add ::Gitlab::SidekiqMiddleware::InstrumentationLogger chain.add ::Gitlab::SidekiqMiddleware::AdminMode::Server + chain.add ::Gitlab::SidekiqMiddleware::QueryAnalyzer chain.add ::Gitlab::SidekiqVersioning::Middleware chain.add ::Gitlab::SidekiqStatus::ServerMiddleware chain.add ::Gitlab::SidekiqMiddleware::WorkerContext::Server diff --git a/lib/gitlab/sidekiq_middleware/query_analyzer.rb b/lib/gitlab/sidekiq_middleware/query_analyzer.rb new file mode 100644 index 00000000000000..4478fcd359456a --- /dev/null +++ b/lib/gitlab/sidekiq_middleware/query_analyzer.rb @@ -0,0 +1,11 @@ +# frozen_string_literal: true + +module Gitlab + module SidekiqMiddleware + class QueryAnalyzer + def call(worker, job, queue) + ::Gitlab::Database::QueryAnalyzer.instance.within { yield } + end + end + end +end diff --git a/spec/lib/gitlab/database/query_analyzer_spec.rb b/spec/lib/gitlab/database/query_analyzer_spec.rb index 2d9906bd3b0dd0..559b66194b0159 100644 --- a/spec/lib/gitlab/database/query_analyzer_spec.rb +++ b/spec/lib/gitlab/database/query_analyzer_spec.rb @@ -2,11 +2,16 @@ require 'spec_helper' -RSpec.describe Gitlab::Database::QueryAnalyzer do +RSpec.describe Gitlab::Database::QueryAnalyzer, query_analyzers: false do let(:analyzer) { double(:query_analyzer) } + let(:disabled_analyzer) { double(:disabled_query_analyzer) } before do - stub_const('Gitlab::Database::QueryAnalyzer::ANALYZERS', [analyzer]) + allow(described_class.instance).to receive(:all_analyzers).and_return([analyzer, disabled_analyzer]) + allow(analyzer).to receive(:enabled?).and_return(true) + allow(analyzer).to receive(:begin!) + allow(analyzer).to receive(:end!) + allow(disabled_analyzer).to receive(:enabled?).and_return(false) end context 'the hook is enabled by default in specs' do @@ -17,7 +22,9 @@ expect(parsed.pg.tables).to eq(%w[projects]) end - Project.connection.execute("SELECT 1 FROM projects") + described_class.instance.within do + Project.connection.execute("SELECT 1 FROM projects") + end end it 'does prevent recursive execution' do @@ -26,7 +33,46 @@ Project.connection.execute("SELECT 1 FROM projects") end - Project.connection.execute("SELECT 1 FROM projects") + described_class.instance.within do + Project.connection.execute("SELECT 1 FROM projects") + end + end + end + + describe '#within' do + context 'when it is already initialized' do + around do |example| + described_class.instance.within do + example.run + end + end + + it 'does not evaluate enabled? again do yield block' do + expect(analyzer).not_to receive(:enabled?) + + expect { |b| described_class.instance.within(&b) }.to yield_control + end + end + + context 'when initializer is enabled' do + before do + expect(analyzer).to receive(:enabled?).and_return(true) + end + + it 'calls begin! and end!' do + expect(analyzer).to receive(:begin!) + expect(analyzer).to receive(:end!) + + expect { |b| described_class.instance.within(&b) }.to yield_control + end + + it 'when begin! raises the end! is not called' do + expect(analyzer).to receive(:begin!).and_raise('exception') + expect(analyzer).not_to receive(:end!) + expect(Gitlab::ErrorTracking).to receive(:track_and_raise_for_dev_exception) + + expect { |b| described_class.instance.within(&b) }.to yield_control + end end end @@ -72,9 +118,16 @@ expect { process_sql("SELECT 1 FROM projects") }.not_to raise_error end + it 'does call analyze only on enabled initializers' do + expect(analyzer).to receive(:analyze) + expect(disabled_analyzer).not_to receive(:analyze) + + expect { process_sql("SELECT 1 FROM projects") }.not_to raise_error + end + def process_sql(sql) ApplicationRecord.load_balancer.read_write do |connection| - described_class.instance.send(:process_sql, sql, connection) + described_class.instance.process_sql(sql, connection) end end end diff --git a/spec/support/database/query_analyzer.rb b/spec/support/database/query_analyzer.rb new file mode 100644 index 00000000000000..85fa55f81efd1e --- /dev/null +++ b/spec/support/database/query_analyzer.rb @@ -0,0 +1,14 @@ +# frozen_string_literal: true + +# With the usage of `describe '...', query_analyzers: false` +# can be disabled selectively + +RSpec.configure do |config| + config.around do |example| + if example.metadata.fetch(:query_analyzers, true) + ::Gitlab::Database::QueryAnalyzer.instance.within { example.run } + else + example.run + end + end +end -- GitLab From 717a101022eb9e2cbb9e2cbb43ff71275d95ade0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kamil=20Trzci=C5=84ski?= Date: Tue, 9 Nov 2021 18:32:06 +0100 Subject: [PATCH 3/5] Fix after code review Fix `#process_sql` and invalid comment. --- config/initializers/database_query_analyzers.rb | 1 - spec/lib/gitlab/database/query_analyzer_spec.rb | 6 ++++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/config/initializers/database_query_analyzers.rb b/config/initializers/database_query_analyzers.rb index 98b40e62a495c8..7b570fcfb29da5 100644 --- a/config/initializers/database_query_analyzers.rb +++ b/config/initializers/database_query_analyzers.rb @@ -5,7 +5,6 @@ Gitlab::Database::QueryAnalyzer.instance.hook! Gitlab::Application.configure do |config| - # ApolloUploadServer::Middleware expects to find uploaded files ready to use config.middleware.use(Gitlab::Middleware::QueryAnalyzer) end end diff --git a/spec/lib/gitlab/database/query_analyzer_spec.rb b/spec/lib/gitlab/database/query_analyzer_spec.rb index 559b66194b0159..16b73b8123fe5c 100644 --- a/spec/lib/gitlab/database/query_analyzer_spec.rb +++ b/spec/lib/gitlab/database/query_analyzer_spec.rb @@ -126,8 +126,10 @@ end def process_sql(sql) - ApplicationRecord.load_balancer.read_write do |connection| - described_class.instance.process_sql(sql, connection) + described_class.instance.within do + ApplicationRecord.load_balancer.read_write do |connection| + described_class.instance.process_sql(sql, connection) + end end end end -- GitLab From e52bf66b4bdace1a72edf957eaa6cf187862613c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kamil=20Trzci=C5=84ski?= Date: Tue, 9 Nov 2021 23:45:05 +0100 Subject: [PATCH 4/5] Improve readibility of `already_within?` The prior `return if enabled_analyzers` was a pretty criptic way to indicate nested calls. This makes this concept clear with reasons. --- lib/gitlab/database/query_analyzer.rb | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/lib/gitlab/database/query_analyzer.rb b/lib/gitlab/database/query_analyzer.rb index 12a78acca087f5..53de6697ca6103 100644 --- a/lib/gitlab/database/query_analyzer.rb +++ b/lib/gitlab/database/query_analyzer.rb @@ -31,7 +31,10 @@ def hook! end def within - return yield if enabled_analyzers + # Due to singleton nature of analyzers + # only an outer invocation of the `.within` + # is allowed to initialize them + return yield if already_within? begin! @@ -42,6 +45,11 @@ def within end end + def already_within? + # If analyzers are set they are already configured + !enabled_analyzers.nil? + end + def process_sql(sql, connection) analyzers = enabled_analyzers return unless analyzers&.any? @@ -69,6 +77,8 @@ def begin! end rescue StandardError => e Gitlab::ErrorTracking.track_and_raise_for_dev_exception(e) + + false end Thread.current[:query_analyzer_enabled_analyzers] = analyzers -- GitLab From bfa6a5fdbb7cac16bcfd9a9ba2330c1a344cac30 Mon Sep 17 00:00:00 2001 From: Dylan Griffith Date: Wed, 10 Nov 2021 08:26:55 +0000 Subject: [PATCH 5/5] Check FF when adding `SidekiqMiddleware::QueryAnalyzer` --- lib/gitlab/sidekiq_middleware.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/gitlab/sidekiq_middleware.rb b/lib/gitlab/sidekiq_middleware.rb index fd3a5f715e8a5e..69802fd6217e42 100644 --- a/lib/gitlab/sidekiq_middleware.rb +++ b/lib/gitlab/sidekiq_middleware.rb @@ -33,7 +33,7 @@ def self.server_configurator(metrics: true, arguments_logger: true, memory_kille chain.add ::Gitlab::SidekiqMiddleware::BatchLoader chain.add ::Gitlab::SidekiqMiddleware::InstrumentationLogger chain.add ::Gitlab::SidekiqMiddleware::AdminMode::Server - chain.add ::Gitlab::SidekiqMiddleware::QueryAnalyzer + chain.add ::Gitlab::SidekiqMiddleware::QueryAnalyzer if Gitlab.dev_or_test_env? || Gitlab::Utils.to_boolean(ENV['GITLAB_ENABLE_QUERY_ANALYZERS'], default: false) chain.add ::Gitlab::SidekiqVersioning::Middleware chain.add ::Gitlab::SidekiqStatus::ServerMiddleware chain.add ::Gitlab::SidekiqMiddleware::WorkerContext::Server -- GitLab