From 71dfd19330f2d292e47343db1fc4a8f7582c803f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kamil=20Trzci=C5=84ski?= Date: Fri, 5 Nov 2021 14:44:23 +0100 Subject: [PATCH 1/2] Add `QueryAnalyzers::GitlabSchemasMetrics` to observe used schemas This analyzers output is Prometheus metrics observing connections vs gitlab schemas of executed queries. It will present well in a single metric how many connections do cross-join or are misplaced once additional databases are configured. --- .../query_analyzer_gitlab_schema_metrics.yml | 8 ++ .../initializers/database_query_analyzers.rb | 1 + .../query_analyzers/gitlab_schemas_metrics.rb | 45 +++++++++++ .../gitlab_schemas_metrics_spec.rb | 75 +++++++++++++++++++ spec/support/database/multiple_databases.rb | 25 +++++++ 5 files changed, 154 insertions(+) create mode 100644 config/feature_flags/development/query_analyzer_gitlab_schema_metrics.yml create mode 100644 lib/gitlab/database/query_analyzers/gitlab_schemas_metrics.rb create mode 100644 spec/lib/gitlab/database/query_analyzers/gitlab_schemas_metrics_spec.rb diff --git a/config/feature_flags/development/query_analyzer_gitlab_schema_metrics.yml b/config/feature_flags/development/query_analyzer_gitlab_schema_metrics.yml new file mode 100644 index 00000000000000..b784105368ca8e --- /dev/null +++ b/config/feature_flags/development/query_analyzer_gitlab_schema_metrics.yml @@ -0,0 +1,8 @@ +--- +name: query_analyzer_gitlab_schema_metrics +introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/73839 +rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/345034 +milestone: '14.5' +type: development +group: group::sharding +default_enabled: false diff --git a/config/initializers/database_query_analyzers.rb b/config/initializers/database_query_analyzers.rb index 7b570fcfb29da5..83c62a99eea8d3 100644 --- a/config/initializers/database_query_analyzers.rb +++ b/config/initializers/database_query_analyzers.rb @@ -3,6 +3,7 @@ # Currently we register validator only for `dev` or `test` environment if Gitlab.dev_or_test_env? || Gitlab::Utils.to_boolean('GITLAB_ENABLE_QUERY_ANALYZERS', default: false) Gitlab::Database::QueryAnalyzer.instance.hook! + Gitlab::Database::QueryAnalyzer.instance.all_analyzers.append(::Gitlab::Database::QueryAnalyzers::GitlabSchemasMetrics) Gitlab::Application.configure do |config| config.middleware.use(Gitlab::Middleware::QueryAnalyzer) diff --git a/lib/gitlab/database/query_analyzers/gitlab_schemas_metrics.rb b/lib/gitlab/database/query_analyzers/gitlab_schemas_metrics.rb new file mode 100644 index 00000000000000..4e745e783556e3 --- /dev/null +++ b/lib/gitlab/database/query_analyzers/gitlab_schemas_metrics.rb @@ -0,0 +1,45 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module QueryAnalyzers + # The purpose of this analyzer is to observe via prometheus metrics + # all unique schemas observed on a given connection + # + # This effectively allows to do sample 1% or 0.01% of queries hitting + # system and observe if on a given connection we observe queries that + # are misaligned (`ci_replica` sees queries doing accessing only `gitlab_main`) + # + class GitlabSchemasMetrics < Base + class << self + def enabled?(_connection) + Feature.enabled?(:query_analyzer_gitlab_schema_metrics) + end + + def analyze(parsed) + db_config_name = ::Gitlab::Database.db_config_name(parsed.connection) + return unless db_config_name + + gitlab_schemas = ::Gitlab::Database::GitlabSchema.table_schemas(parsed.pg.tables) + return if gitlab_schemas.empty? + + # to reduce amount of labels sort schemas used + gitlab_schemas = gitlab_schemas.to_a.sort.join(",") + + schemas_metrics.increment({ + gitlab_schemas: gitlab_schemas, + db_config_name: db_config_name + }) + end + + def schemas_metrics + @schemas_metrics ||= ::Gitlab::Metrics.counter( + :gitlab_database_decomposition_gitlab_schemas_used, + 'The number of observed schemas dependent on connection' + ) + end + end + end + end + end +end diff --git a/spec/lib/gitlab/database/query_analyzers/gitlab_schemas_metrics_spec.rb b/spec/lib/gitlab/database/query_analyzers/gitlab_schemas_metrics_spec.rb new file mode 100644 index 00000000000000..069bb11a9b649b --- /dev/null +++ b/spec/lib/gitlab/database/query_analyzers/gitlab_schemas_metrics_spec.rb @@ -0,0 +1,75 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::Database::QueryAnalyzers::GitlabSchemasMetrics do + let(:analyzer) { described_class } + + before do + stub_const('Gitlab::Database::QueryAnalyzer::ANALYZERS', [analyzer]) + end + + it 'does not increment metrics if feature flag is disabled' do + stub_feature_flags(query_analyzer_gitlab_schema_metrics: false) + + expect(analyzer).not_to receive(:analyze) + + process_sql(ActiveRecord::Base, "SELECT 1 FROM projects") + end + + context 'properly observes all queries', :mocked_ci_connection do + using RSpec::Parameterized::TableSyntax + + where do + { + "for simple query observes schema correctly" => { + model: ApplicationRecord, + sql: "SELECT 1 FROM projects", + expectations: { + gitlab_schemas: "gitlab_main", + db_config_name: "main" + } + }, + "for query accessing gitlab_ci and gitlab_main" => { + model: ApplicationRecord, + sql: "SELECT 1 FROM projects LEFT JOIN ci_builds ON ci_builds.project_id=projects.id", + expectations: { + gitlab_schemas: "gitlab_ci,gitlab_main", + db_config_name: "main" + } + }, + "for query accessing gitlab_ci and gitlab_main the gitlab_schemas is always ordered" => { + model: ApplicationRecord, + sql: "SELECT 1 FROM ci_builds LEFT JOIN projects ON ci_builds.project_id=projects.id", + expectations: { + gitlab_schemas: "gitlab_ci,gitlab_main", + db_config_name: "main" + } + }, + "for query accessing CI database" => { + model: Ci::ApplicationRecord, + sql: "SELECT 1 FROM ci_builds", + expectations: { + gitlab_schemas: "gitlab_ci", + db_config_name: "ci" + } + } + } + end + + with_them do + it do + expect(described_class.schemas_metrics).to receive(:increment) + .with(expectations).and_call_original + + process_sql(model, sql) + end + end + end + + def process_sql(model, sql) + model.connection.load_balancer.read_write do |connection| + Gitlab::Database::QueryAnalyzer.new.process_sql(sql, connection) + end + end +end diff --git a/spec/support/database/multiple_databases.rb b/spec/support/database/multiple_databases.rb index 5e1ae60536fd43..29cfa0b515d114 100644 --- a/spec/support/database/multiple_databases.rb +++ b/spec/support/database/multiple_databases.rb @@ -6,6 +6,18 @@ def skip_if_multiple_databases_not_setup skip 'Skipping because multiple databases not set up' unless Gitlab::Database.has_config?(:ci) end + def reconfigure_db_connection(name: nil, config_hash: {}, model: ActiveRecord::Base, config_model: nil) + db_config = (config_model || model).connection_db_config + + new_db_config = ActiveRecord::DatabaseConfigurations::HashConfig.new( + db_config.env_name, + name ? name.to_s : db_config.name, + db_config.configuration_hash.merge(config_hash) + ) + + model.establish_connection(new_db_config) + end + # The usage of this method switches temporarily used `connection_handler` # allowing full manipulation of ActiveRecord::Base connections without # having side effects like: @@ -56,6 +68,19 @@ def establish_connection(*args) example.run end end + + config.around(:each, :mocked_ci_connection) do |example| + with_reestablished_active_record_base(reconnect: true) do + reconfigure_db_connection( + name: :ci, + model: Ci::ApplicationRecord, + config_model: ActiveRecord::Base + ) + + example.run + end + + end end ActiveRecord::Base.singleton_class.prepend(::Database::ActiveRecordBaseEstablishConnection) # rubocop:disable Database/MultipleDatabases -- GitLab From 5b6853f9e6bf3cf79ef1be46231c679e81740c3a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kamil=20Trzci=C5=84ski?= Date: Fri, 5 Nov 2021 18:38:29 +0100 Subject: [PATCH 2/2] Skip LB and retrieve connection of model Since LB is initialised globally in a non-decomposed run we will receive connection of `ActiveRecord::Base`. Ensure that `GitlabSchemasMetrics` can use FF --- ee/spec/lib/ee/feature_spec.rb | 2 +- .../query_analyzers/gitlab_schemas_metrics.rb | 5 +++-- .../query_analyzers/gitlab_schemas_metrics_spec.rb | 13 +++++++++---- spec/support/database/multiple_databases.rb | 1 - 4 files changed, 13 insertions(+), 8 deletions(-) diff --git a/ee/spec/lib/ee/feature_spec.rb b/ee/spec/lib/ee/feature_spec.rb index 9b8c37377edc4b..606d86691086af 100644 --- a/ee/spec/lib/ee/feature_spec.rb +++ b/ee/spec/lib/ee/feature_spec.rb @@ -2,7 +2,7 @@ require 'spec_helper' -RSpec.describe Feature, stub_feature_flags: false do +RSpec.describe Feature, stub_feature_flags: false, query_analyzers: false do include EE::GeoHelpers describe '.enable' do diff --git a/lib/gitlab/database/query_analyzers/gitlab_schemas_metrics.rb b/lib/gitlab/database/query_analyzers/gitlab_schemas_metrics.rb index 4e745e783556e3..06e2b114c91bf8 100644 --- a/lib/gitlab/database/query_analyzers/gitlab_schemas_metrics.rb +++ b/lib/gitlab/database/query_analyzers/gitlab_schemas_metrics.rb @@ -12,8 +12,9 @@ module QueryAnalyzers # class GitlabSchemasMetrics < Base class << self - def enabled?(_connection) - Feature.enabled?(:query_analyzer_gitlab_schema_metrics) + def enabled? + ::Feature::FlipperFeature.table_exists? && + Feature.enabled?(:query_analyzer_gitlab_schema_metrics) end def analyze(parsed) diff --git a/spec/lib/gitlab/database/query_analyzers/gitlab_schemas_metrics_spec.rb b/spec/lib/gitlab/database/query_analyzers/gitlab_schemas_metrics_spec.rb index 069bb11a9b649b..ab5f05e3ec4f61 100644 --- a/spec/lib/gitlab/database/query_analyzers/gitlab_schemas_metrics_spec.rb +++ b/spec/lib/gitlab/database/query_analyzers/gitlab_schemas_metrics_spec.rb @@ -2,11 +2,11 @@ require 'spec_helper' -RSpec.describe Gitlab::Database::QueryAnalyzers::GitlabSchemasMetrics do +RSpec.describe Gitlab::Database::QueryAnalyzers::GitlabSchemasMetrics, query_analyzers: false do let(:analyzer) { described_class } before do - stub_const('Gitlab::Database::QueryAnalyzer::ANALYZERS', [analyzer]) + allow(Gitlab::Database::QueryAnalyzer.instance).to receive(:all_analyzers).and_return([analyzer]) end it 'does not increment metrics if feature flag is disabled' do @@ -58,6 +58,10 @@ end with_them do + around do |example| + Gitlab::Database::QueryAnalyzer.instance.within { example.run } + end + it do expect(described_class.schemas_metrics).to receive(:increment) .with(expectations).and_call_original @@ -68,8 +72,9 @@ end def process_sql(model, sql) - model.connection.load_balancer.read_write do |connection| - Gitlab::Database::QueryAnalyzer.new.process_sql(sql, connection) + Gitlab::Database::QueryAnalyzer.instance.within do + # Skip load balancer and retrieve connection assigned to model + Gitlab::Database::QueryAnalyzer.instance.process_sql(sql, model.retrieve_connection) end end end diff --git a/spec/support/database/multiple_databases.rb b/spec/support/database/multiple_databases.rb index 29cfa0b515d114..bae7c726628ccc 100644 --- a/spec/support/database/multiple_databases.rb +++ b/spec/support/database/multiple_databases.rb @@ -79,7 +79,6 @@ def establish_connection(*args) example.run end - end end -- GitLab