diff --git a/config/feature_flags/development/query_analyzer_gitlab_schema_metrics.yml b/config/feature_flags/development/query_analyzer_gitlab_schema_metrics.yml new file mode 100644 index 0000000000000000000000000000000000000000..b784105368ca8ecfb4fe63fdeb4dc07d0340c8cc --- /dev/null +++ b/config/feature_flags/development/query_analyzer_gitlab_schema_metrics.yml @@ -0,0 +1,8 @@ +--- +name: query_analyzer_gitlab_schema_metrics +introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/73839 +rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/345034 +milestone: '14.5' +type: development +group: group::sharding +default_enabled: false diff --git a/config/initializers/database_query_analyzers.rb b/config/initializers/database_query_analyzers.rb index 7b570fcfb29da5ad35d918de0b7ccd4cb63b7971..83c62a99eea8d3930bda21aed0306b4bbf6c070d 100644 --- a/config/initializers/database_query_analyzers.rb +++ b/config/initializers/database_query_analyzers.rb @@ -3,6 +3,7 @@ # Currently we register validator only for `dev` or `test` environment if Gitlab.dev_or_test_env? || Gitlab::Utils.to_boolean('GITLAB_ENABLE_QUERY_ANALYZERS', default: false) Gitlab::Database::QueryAnalyzer.instance.hook! + Gitlab::Database::QueryAnalyzer.instance.all_analyzers.append(::Gitlab::Database::QueryAnalyzers::GitlabSchemasMetrics) Gitlab::Application.configure do |config| config.middleware.use(Gitlab::Middleware::QueryAnalyzer) diff --git a/ee/spec/lib/ee/feature_spec.rb b/ee/spec/lib/ee/feature_spec.rb index 9b8c37377edc4b27b20f4bec640cc8c4c15b0ff1..606d86691086af58ff3779e86878cde10cb304c7 100644 --- a/ee/spec/lib/ee/feature_spec.rb +++ b/ee/spec/lib/ee/feature_spec.rb @@ -2,7 +2,7 @@ require 'spec_helper' -RSpec.describe Feature, stub_feature_flags: false do +RSpec.describe Feature, stub_feature_flags: false, query_analyzers: false do include EE::GeoHelpers describe '.enable' do diff --git a/lib/gitlab/database/query_analyzers/gitlab_schemas_metrics.rb b/lib/gitlab/database/query_analyzers/gitlab_schemas_metrics.rb new file mode 100644 index 0000000000000000000000000000000000000000..06e2b114c91bf896d695b96500e4b31f89b5de40 --- /dev/null +++ b/lib/gitlab/database/query_analyzers/gitlab_schemas_metrics.rb @@ -0,0 +1,46 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module QueryAnalyzers + # The purpose of this analyzer is to observe via prometheus metrics + # all unique schemas observed on a given connection + # + # This effectively allows to do sample 1% or 0.01% of queries hitting + # system and observe if on a given connection we observe queries that + # are misaligned (`ci_replica` sees queries doing accessing only `gitlab_main`) + # + class GitlabSchemasMetrics < Base + class << self + def enabled? + ::Feature::FlipperFeature.table_exists? && + Feature.enabled?(:query_analyzer_gitlab_schema_metrics) + end + + def analyze(parsed) + db_config_name = ::Gitlab::Database.db_config_name(parsed.connection) + return unless db_config_name + + gitlab_schemas = ::Gitlab::Database::GitlabSchema.table_schemas(parsed.pg.tables) + return if gitlab_schemas.empty? + + # to reduce amount of labels sort schemas used + gitlab_schemas = gitlab_schemas.to_a.sort.join(",") + + schemas_metrics.increment({ + gitlab_schemas: gitlab_schemas, + db_config_name: db_config_name + }) + end + + def schemas_metrics + @schemas_metrics ||= ::Gitlab::Metrics.counter( + :gitlab_database_decomposition_gitlab_schemas_used, + 'The number of observed schemas dependent on connection' + ) + end + end + end + end + end +end diff --git a/spec/lib/gitlab/database/query_analyzers/gitlab_schemas_metrics_spec.rb b/spec/lib/gitlab/database/query_analyzers/gitlab_schemas_metrics_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..ab5f05e3ec4f61878007589752ea2d77c34f0075 --- /dev/null +++ b/spec/lib/gitlab/database/query_analyzers/gitlab_schemas_metrics_spec.rb @@ -0,0 +1,80 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::Database::QueryAnalyzers::GitlabSchemasMetrics, query_analyzers: false do + let(:analyzer) { described_class } + + before do + allow(Gitlab::Database::QueryAnalyzer.instance).to receive(:all_analyzers).and_return([analyzer]) + end + + it 'does not increment metrics if feature flag is disabled' do + stub_feature_flags(query_analyzer_gitlab_schema_metrics: false) + + expect(analyzer).not_to receive(:analyze) + + process_sql(ActiveRecord::Base, "SELECT 1 FROM projects") + end + + context 'properly observes all queries', :mocked_ci_connection do + using RSpec::Parameterized::TableSyntax + + where do + { + "for simple query observes schema correctly" => { + model: ApplicationRecord, + sql: "SELECT 1 FROM projects", + expectations: { + gitlab_schemas: "gitlab_main", + db_config_name: "main" + } + }, + "for query accessing gitlab_ci and gitlab_main" => { + model: ApplicationRecord, + sql: "SELECT 1 FROM projects LEFT JOIN ci_builds ON ci_builds.project_id=projects.id", + expectations: { + gitlab_schemas: "gitlab_ci,gitlab_main", + db_config_name: "main" + } + }, + "for query accessing gitlab_ci and gitlab_main the gitlab_schemas is always ordered" => { + model: ApplicationRecord, + sql: "SELECT 1 FROM ci_builds LEFT JOIN projects ON ci_builds.project_id=projects.id", + expectations: { + gitlab_schemas: "gitlab_ci,gitlab_main", + db_config_name: "main" + } + }, + "for query accessing CI database" => { + model: Ci::ApplicationRecord, + sql: "SELECT 1 FROM ci_builds", + expectations: { + gitlab_schemas: "gitlab_ci", + db_config_name: "ci" + } + } + } + end + + with_them do + around do |example| + Gitlab::Database::QueryAnalyzer.instance.within { example.run } + end + + it do + expect(described_class.schemas_metrics).to receive(:increment) + .with(expectations).and_call_original + + process_sql(model, sql) + end + end + end + + def process_sql(model, sql) + Gitlab::Database::QueryAnalyzer.instance.within do + # Skip load balancer and retrieve connection assigned to model + Gitlab::Database::QueryAnalyzer.instance.process_sql(sql, model.retrieve_connection) + end + end +end diff --git a/spec/support/database/multiple_databases.rb b/spec/support/database/multiple_databases.rb index 5e1ae60536fd43c40d77dc4e459f0855193860bc..bae7c726628ccc9793ecebc05fcf496f09cbc418 100644 --- a/spec/support/database/multiple_databases.rb +++ b/spec/support/database/multiple_databases.rb @@ -6,6 +6,18 @@ def skip_if_multiple_databases_not_setup skip 'Skipping because multiple databases not set up' unless Gitlab::Database.has_config?(:ci) end + def reconfigure_db_connection(name: nil, config_hash: {}, model: ActiveRecord::Base, config_model: nil) + db_config = (config_model || model).connection_db_config + + new_db_config = ActiveRecord::DatabaseConfigurations::HashConfig.new( + db_config.env_name, + name ? name.to_s : db_config.name, + db_config.configuration_hash.merge(config_hash) + ) + + model.establish_connection(new_db_config) + end + # The usage of this method switches temporarily used `connection_handler` # allowing full manipulation of ActiveRecord::Base connections without # having side effects like: @@ -56,6 +68,18 @@ def establish_connection(*args) example.run end end + + config.around(:each, :mocked_ci_connection) do |example| + with_reestablished_active_record_base(reconnect: true) do + reconfigure_db_connection( + name: :ci, + model: Ci::ApplicationRecord, + config_model: ActiveRecord::Base + ) + + example.run + end + end end ActiveRecord::Base.singleton_class.prepend(::Database::ActiveRecordBaseEstablishConnection) # rubocop:disable Database/MultipleDatabases