From 73923deea7d29ee1565a4c026329c68533daf81a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kamil=20Trzci=C5=84ski?= Date: Tue, 9 Nov 2021 12:35:59 +0100 Subject: [PATCH 1/2] Ensure that QueryAnalyzer handles recursive calls It is possible that Query Analyzer to unintentionally trigger SQL query. To prevent Stack Too Deep discover recursive calls and prevent them. --- spec/lib/gitlab/database/query_analyzer_spec.rb | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/spec/lib/gitlab/database/query_analyzer_spec.rb b/spec/lib/gitlab/database/query_analyzer_spec.rb index 16b73b8123fe5c..633ee654792a09 100644 --- a/spec/lib/gitlab/database/query_analyzer_spec.rb +++ b/spec/lib/gitlab/database/query_analyzer_spec.rb @@ -74,6 +74,15 @@ expect { |b| described_class.instance.within(&b) }.to yield_control end end + + it 'does prevent recursive execution' do + expect(analyzer).to receive(:enabled?).and_return(true) + expect(analyzer).to receive(:analyze) do + Project.connection.execute("SELECT 1 FROM projects") + end + + Project.connection.execute("SELECT 1 FROM projects") + end end describe '#process_sql' do -- GitLab From 56f07934ec61d57ec9f8555c94834e82c874403a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kamil=20Trzci=C5=84ski?= Date: Tue, 9 Nov 2021 13:33:32 +0100 Subject: [PATCH 2/2] Use `gitlab_schema` to parse each query and track failures This uses `pg_query` on each executed SQL query to ensure that it adheres to allowed schema access for a given connection. Allowing to model any allowed access pattern depending on connection being used. Ex.: - allow read-only access for CI-only tables - allow read-only access for any tables - allow read-write access for CI-only tables This skips DDL changes as we assume that we require them to always be replied and allowed in all contexes. --- config/database.yml.decomposed-postgresql | 6 + .../initializers/database_query_analyzers.rb | 1 + lib/gitlab/database/gitlab_schema.rb | 3 + .../gitlab_schema_validator.rb | 93 +++++++++ .../gitlab_schema_validator_spec.rb | 189 ++++++++++++++++++ ...revent_cross_database_modification_spec.rb | 2 +- 6 files changed, 293 insertions(+), 1 deletion(-) create mode 100644 lib/gitlab/database/query_analyzers/gitlab_schema_validator.rb create mode 100644 spec/lib/gitlab/database/query_analyzers/gitlab_schema_validator_spec.rb diff --git a/config/database.yml.decomposed-postgresql b/config/database.yml.decomposed-postgresql index 729d84470773df..47b68e33c4c8d9 100644 --- a/config/database.yml.decomposed-postgresql +++ b/config/database.yml.decomposed-postgresql @@ -33,6 +33,9 @@ test: &test password: host: localhost prepared_statements: false + # The primary connection is shared today between `main` and `ci` + gitlab_primary_schemas: [gitlab_main, gitlab_ci, gitlab_shared] + gitlab_replica_schemas: [gitlab_main, gitlab_shared] variables: statement_timeout: 15s ci: @@ -43,5 +46,8 @@ test: &test password: host: localhost prepared_statements: false + # The primary connection is shared today between `main` and `ci` + gitlab_primary_schemas: [gitlab_main, gitlab_ci, gitlab_shared] + gitlab_replica_schemas: [gitlab_ci, gitlab_shared] variables: statement_timeout: 15s diff --git a/config/initializers/database_query_analyzers.rb b/config/initializers/database_query_analyzers.rb index a2db2d2a2b8c20..250ccc3f0840ee 100644 --- a/config/initializers/database_query_analyzers.rb +++ b/config/initializers/database_query_analyzers.rb @@ -4,6 +4,7 @@ if Gitlab.dev_or_test_env? || Gitlab::Utils.to_boolean(ENV['GITLAB_ENABLE_QUERY_ANALYZERS'], default: false) Gitlab::Database::QueryAnalyzer.instance.hook! Gitlab::Database::QueryAnalyzer.instance.all_analyzers.append(::Gitlab::Database::QueryAnalyzers::GitlabSchemasMetrics) + Gitlab::Database::QueryAnalyzer.instance.all_analyzers.append(::Gitlab::Database::QueryAnalyzers::GitlabSchemaValidator) Gitlab::Application.configure do |config| config.middleware.use(Gitlab::Middleware::QueryAnalyzer) diff --git a/lib/gitlab/database/gitlab_schema.rb b/lib/gitlab/database/gitlab_schema.rb index 14807494a79e1f..1731730bf8e468 100644 --- a/lib/gitlab/database/gitlab_schema.rb +++ b/lib/gitlab/database/gitlab_schema.rb @@ -19,6 +19,7 @@ module GitlabSchema # main tables 'alerts_service_data' => :gitlab_main, 'analytics_devops_adoption_segment_selections' => :gitlab_main, + 'analytics_instance_statistics_measurements' => :gitlab_main, 'analytics_repository_file_commits' => :gitlab_main, 'analytics_repository_file_edits' => :gitlab_main, 'analytics_repository_files' => :gitlab_main, @@ -43,6 +44,8 @@ module GitlabSchema 'ci_daily_report_results' => :gitlab_ci, 'ci_test_cases' => :gitlab_ci, 'ci_test_case_failures' => :gitlab_ci, + 'dep_ci_build_trace_sections' => :gitlab_ci, + 'dep_ci_build_trace_section_names' => :gitlab_ci, # leftovers from early implementation of partitioning 'audit_events_part_5fc467ac26' => :gitlab_main, diff --git a/lib/gitlab/database/query_analyzers/gitlab_schema_validator.rb b/lib/gitlab/database/query_analyzers/gitlab_schema_validator.rb new file mode 100644 index 00000000000000..9285e271c7b2e7 --- /dev/null +++ b/lib/gitlab/database/query_analyzers/gitlab_schema_validator.rb @@ -0,0 +1,93 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module QueryAnalyzers + # The purpose of this class is to implement a validator + # of a virtual schema concept as defined in config/initializers/00_active_record_gitlab_schema.rb + # + # This Validator hooks into each executed query and tracks what "schemas" are being modified + # to during development/test. Later this will be tracked in production. + # + # This works only if explicitly enable via `config/database.yml`. Each connection + # if contains `gitlab_primary_schemas = []` or `gitlab_replica_schemas = []` + # defines a list of allowed schemas to be accessed on a given connection + # + # gitlab_primary_schemas - defines a list of schemas allowed to be used on primary connection + # gitlab_replica_schemas - defines a list of schemas allowed to be used on replica connection + # + # - in all cases DDL changes are allowed + # + # This to some extent works very similar to spec/support/database/prevent_cross_joins.rb + # and likely suprseedes it in the future. + # + # Currently only 3 schemas are known: + # - gitlab_main: all inheriting from `ApplicationRecord` with data in `main:` + # - gitlab_ci: all inheriting from `Ci::ApplicationRecord` with data in `ci:` + # - gitlab_shared: all inheriting from `SharedModel` with data in both `main:` and `ci:` + # + class GitlabSchemaValidator < Base + OutOfGitlabSchemaAccessError = Class.new(StandardError) + + def self.with_disabled_validator + return yield if context[:disabled] + + begin + context[:disabled] = true + yield + ensure + context[:disabled] = nil + end + end + + # Enable this analyzer only in dev/test environment + def self.enabled? + Gitlab.dev_or_test_env? + end + + def self.analyze(parsed) + return false if context[:disabled] + + connection_name, allowed_schemas = allowed_schemas_from_connection(parsed.connection) + return unless allowed_schemas + + validate_schemas!( + connection_name, + parsed.sql, + allowed_schemas, + parsed.pg.select_tables + parsed.pg.dml_tables + ) + rescue => e # rubocop:disable Style/RescueStandardError + # We catch all standard errors to prevent validation errors to introduce fatal errors in production + Gitlab::ErrorTracking.track_and_raise_for_dev_exception(e) + end + + def self.allowed_schemas_from_connection(connection) + config = ::Gitlab::Database.db_config_for_connection(connection) + return unless config + + config_hash = config.configuration_hash + + case Gitlab::Database::LoadBalancing.db_role_for_connection(connection) + when Gitlab::Database::LoadBalancing::ROLE_REPLICA + [config.name, config_hash[:gitlab_replica_schemas]] + else + [config.name, config_hash[:gitlab_primary_schemas]] + end + end + + def self.validate_schemas!(db_config_name, sql, allowed_schemas, used_tables) + allowed_schemas = Array(allowed_schemas) # convert string to array + used_schemas = ::Gitlab::Database::GitlabSchema.table_schemas(used_tables) + disallowed_schemas = used_schemas - allowed_schemas.map(&:to_sym) + + if disallowed_schemas.any? + raise OutOfGitlabSchemaAccessError, \ + "The `#{sql}` uses `#{used_schemas}` which is outside "\ + "of allowed `gitlab_schemas` (#{allowed_schemas}) for a given connection (#{db_config_name})" + end + end + end + end + end +end diff --git a/spec/lib/gitlab/database/query_analyzers/gitlab_schema_validator_spec.rb b/spec/lib/gitlab/database/query_analyzers/gitlab_schema_validator_spec.rb new file mode 100644 index 00000000000000..2a7085b5539d57 --- /dev/null +++ b/spec/lib/gitlab/database/query_analyzers/gitlab_schema_validator_spec.rb @@ -0,0 +1,189 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::Database::QueryAnalyzers::GitlabSchemaValidator, :reestablished_active_record_base do + using RSpec::Parameterized::TableSyntax + + let(:analyzer) { described_class } + + let(:load_balancer_configuration) do + Gitlab::Database::LoadBalancing::Configuration.new( + ActiveRecord::Base, [ActiveRecord::Base.connection_db_config.host]) + end + + let(:load_balancer) do + Gitlab::Database::LoadBalancing::LoadBalancer.new(load_balancer_configuration) + end + + around do |example| + Gitlab::Database::QueryAnalyzer.instance.within do + # This method is alternative to allow-cross joins that works on a connection + # configuration-level and can be used in production + Gitlab::Database.allow_cross_joins_across_databases(url: nil) do + example.run + end + end + end + + before do + allow(Gitlab::Database::QueryAnalyzer.instance).to receive(:all_analyzers).and_return(analyzer) + + ActiveRecord::Base.establish_connection( + ActiveRecord::Base.connection_db_config.configuration_hash.merge( + gitlab_primary_schemas: gitlab_primary_schemas, + gitlab_replica_schemas: gitlab_replica_schemas + ) + ) + end + + context 'when executing various SQL queries' do + where do + { + # testing access to gitlab_main + "when doing DDL changes on write connection" => { + operation: :read_write, + sql: "ALTER TABLE projects ADD COLUMN __test_column BIGINT", + raises_exception: false + }, + "when reading projects from primary" => { + operation: :read_write, + sql: "SELECT * FROM projects", + raises_exception: false + }, + "when reading projects from replica" => { + operation: :read, + sql: "SELECT * FROM projects", + raises_exception: false + }, + "when updating projects" => { + operation: :read_write, + sql: "UPDATE projects SET description='project'", + raises_exception: false + }, + "when adding column on ci_instance_variables" => { + operation: :read_write, + sql: "UPDATE projects SET description='project'", + raises_exception: false + }, + "when reading projects and instance variables from primary" => { + operation: :read_write, + sql: "SELECT * FROM projects WHERE EXISTS (SELECT 1 FROM ci_instance_variables)", + raises_exception: true + }, + "when allowed reading projects and instance variables from primary" => { + operation: :read_write, + sql: "SELECT * FROM projects WHERE EXISTS (SELECT 1 FROM ci_instance_variables)", + overwrite_gitlab_primary_schemas: %i[gitlab_main gitlab_ci gitlab_shared], + raises_exception: false + }, + "when reading projects and instance variables from replica" => { + operation: :read, + sql: "SELECT * FROM projects WHERE EXISTS (SELECT 1 FROM ci_instance_variables)", + raises_exception: true + }, + "when allowed reading projects and instance variables from replica" => { + operation: :read, + sql: "SELECT * FROM projects WHERE EXISTS (SELECT 1 FROM ci_instance_variables)", + overwrite_gitlab_replica_schemas: %i[gitlab_main gitlab_ci gitlab_shared], + raises_exception: false + }, + "when updating instance variables" => { + operation: :read_write, + sql: "UPDATE ci_instance_variables SET key='variable'", + raises_exception: true + }, + "when allowed updating instance variables" => { + operation: :read_write, + sql: "UPDATE ci_instance_variables SET key='variable'", + overwrite_gitlab_primary_schemas: %i[gitlab_main gitlab_ci gitlab_shared], + raises_exception: false + } + } + end + + with_them do + let(:gitlab_primary_schemas) { overwrite_gitlab_primary_schemas || %i[gitlab_main gitlab_shared] } + let(:gitlab_replica_schemas) { overwrite_gitlab_replica_schemas || %i[gitlab_main gitlab_shared] } + + it do + if raises_exception + expect { execute(sql) }.to raise_error /of allowed `gitlab_schemas`/ + else + expect { execute(sql) }.not_to raise_error + end + end + + def execute(sql) + load_balancer.public_send(operation) do |connection| + Gitlab::Database::QueryAnalyzer.instance.process_sql(sql, connection) + end + end + end + end + + context 'when running migration on gitlab_main' do + let(:gitlab_primary_schemas) { %i[gitlab_main gitlab_shared] } + let(:gitlab_replica_schemas) { %i[gitlab_main gitlab_shared] } + + context 'when doing changes in gitlab_main' do + context 'modifying DDL' do + let(:migration) do + Class.new(Gitlab::Database::Migration[1.0]) do + def change + add_column(:projects, :__test_column2, :bigint) + end + end + end + + it 'does modify DDL' do + expect { migration.new.up }.not_to raise_error + end + end + + context 'modifying data' do + let(:migration) do + Class.new(Gitlab::Database::Migration[1.0]) do + def up + execute("UPDATE projects SET description='aa'") + end + end + end + + it 'does change DML' do + expect { migration.new.up }.not_to raise_error + end + end + end + + context 'when doing changes in gitlab_ci' do + context 'modifying DDL' do + let(:migration) do + Class.new(Gitlab::Database::Migration[1.0]) do + def change + add_column(:ci_instance_variables, :__test_column2, :bigint) + end + end + end + + it 'does modify DDL' do + expect { migration.new.up }.not_to raise_error + end + end + + context 'modifying data' do + let(:migration) do + Class.new(Gitlab::Database::Migration[1.0]) do + def up + execute("UPDATE ci_instance_variables SET key='aaaa'") + end + end + end + + it 'does change DML' do + expect { migration.new.up }.to raise_error /of allowed `gitlab_schemas`/ + end + end + end + end +end diff --git a/spec/support_specs/database/prevent_cross_database_modification_spec.rb b/spec/support_specs/database/prevent_cross_database_modification_spec.rb index 365f1f32858df5..2b86d4a868e689 100644 --- a/spec/support_specs/database/prevent_cross_database_modification_spec.rb +++ b/spec/support_specs/database/prevent_cross_database_modification_spec.rb @@ -2,7 +2,7 @@ require 'spec_helper' -RSpec.describe 'Database::PreventCrossDatabaseModification' do +RSpec.describe 'Database::PreventCrossDatabaseModification', query_analyzers: false do let_it_be(:pipeline, refind: true) { create(:ci_pipeline) } let_it_be(:project, refind: true) { create(:project) } -- GitLab