From e5e89c0c544369f874ee3670764715065dae604e Mon Sep 17 00:00:00 2001 From: Adam Hegyi Date: Thu, 17 Aug 2023 07:12:38 +0200 Subject: [PATCH] Support redacting values from CH statements This change allows redacting placeholders from raw SQL statements for the ClickHouse database. --- config/initializers/click_house.rb | 3 +- .../clickhouse/clickhouse_within_gitlab.md | 198 ++++++++++++++++++ doc/development/database/index.md | 1 + .../lib/click_house/client.rb | 12 +- .../click_house/client/bind_index_manager.rb | 17 ++ .../lib/click_house/client/database.rb | 3 +- .../lib/click_house/client/query.rb | 68 ++++++ .../lib/click_house/client/query_like.rb | 19 ++ .../client}/bind_index_manager_spec.rb | 2 +- .../spec/click_house/client/database_spec.rb | 1 - .../click_house/client/query_like_spec.rb | 15 ++ .../spec/click_house/client/query_spec.rb | 125 +++++++++++ .../spec/click_house/client_spec.rb | 2 +- lib/click_house/bind_index_manager.rb | 15 -- lib/click_house/query_builder.rb | 6 +- lib/click_house/redactor.rb | 3 +- lib/peek/views/click_house.rb | 2 +- spec/lib/click_house/query_builder_spec.rb | 26 ++- .../database/click_house_client_spec.rb | 14 +- 19 files changed, 501 insertions(+), 31 deletions(-) create mode 100644 doc/development/database/clickhouse/clickhouse_within_gitlab.md create mode 100644 gems/click_house-client/lib/click_house/client/bind_index_manager.rb create mode 100644 gems/click_house-client/lib/click_house/client/query.rb create mode 100644 gems/click_house-client/lib/click_house/client/query_like.rb rename {spec/lib/click_house => gems/click_house-client/spec/click_house/client}/bind_index_manager_spec.rb (91%) create mode 100644 gems/click_house-client/spec/click_house/client/query_like_spec.rb create mode 100644 gems/click_house-client/spec/click_house/client/query_spec.rb delete mode 100644 lib/click_house/bind_index_manager.rb diff --git a/config/initializers/click_house.rb b/config/initializers/click_house.rb index 481942d775ecb6..bd063f1558e92c 100644 --- a/config/initializers/click_house.rb +++ b/config/initializers/click_house.rb @@ -20,8 +20,9 @@ config.json_parser = Gitlab::Json config.http_post_proc = ->(url, headers, body) do options = { + multipart: true, headers: headers, - body: ActiveSupport::Gzip.compress(body), + body: body, allow_local_requests: Rails.env.development? || Rails.env.test? } diff --git a/doc/development/database/clickhouse/clickhouse_within_gitlab.md b/doc/development/database/clickhouse/clickhouse_within_gitlab.md new file mode 100644 index 00000000000000..597b1732abb893 --- /dev/null +++ b/doc/development/database/clickhouse/clickhouse_within_gitlab.md @@ -0,0 +1,198 @@ +--- +stage: Data Stores +group: Database +info: To determine the technical writer assigned to the Stage/Group associated with this page, see https://about.gitlab.com/handbook/product/ux/technical-writing/#assignments +--- + +# ClickHouse within GitLab + +This document gives a high-level overview of how to develop features using ClickHouse in the GitLab Rails application. + +NOTE: +Most of the tooling and APIs are considered unstable. + +## GDK setup + +For instructions on how to set up a ClickHouse server locally, see the [ClickHouse installation documentation](https://clickhouse.com/docs/en/install). + +### Configure your Rails application + +1. Copy the example file and configure the credentials: + + ```shell + cp config/click_house.yml.example + config/click_house.yml + ``` + +1. Create the database using the `clickhouse-client` CLI tool: + + ```shell + clickhouse-client --password + ``` + + ```sql + create database gitlab_clickhouse_development; + ``` + +### Validate your setup + +Run the Rails console and invoke a simple query: + +```ruby +ClickHouse::Client.select('SELECT 1', :main) +# => [{"1"=>1}] +``` + +## Database schema and migrations + +For the ClickHouse database there are no established schema migration procedures yet. We have very basic tooling to build up the database schema in the test environment from scratch using timestamp-prefixed SQL files. + +You can create a table by placing a new SQL file in the `db/click_house/main` folder: + +```sql +// 20230811124511_create_issues.sql +CREATE TABLE issues +( + id UInt64 DEFAULT 0, + title String DEFAULT '' +) +ENGINE = MergeTree +PRIMARY KEY (id) +``` + +When you're working locally in your development environment, you can create or re-create your table schema by executing the respective `CREATE TABLE` statement. Alternatively, you can use the following snippet in the Rails console: + +```ruby +require_relative 'spec/support/database/click_house/hooks.rb' + +# Drops and re-creates all tables +ClickHouseTestRunner.new.ensure_schema +``` + +## Writing database queries + +For the ClickHouse database we don't use ORM (Object Relational Mapping). The main reason is that the GitLab application has many customizations for the `ActiveRecord` PostgresSQL adapter and the application generally assumes that all databases are using `PostgreSQL`. Since ClickHouse-related features are still in a very early stage of development, we decided to implement a simple HTTP client to avoid hard to discover bugs and long debugging time when dealing with multiple `ActiveRecord` adapters. + +Additionally, ClickHouse might not be used the same way as other adapters for `ActiveRecord`. The access patterns differ from traditional transactional databases, in that ClickHouse: + +- Uses nested aggregation `SELECT` queries with `GROUP BY` clauses. +- Doesn't use single `INSERT` statements. Data is inserted in batches via background jobs. +- Has different consistency characteristics, no transactions. +- Has very little database-level validations. + +Database queries are written and executed with the help of the `ClickHouse::Client` gem. + +A simple query from the `events` table: + +```ruby +rows = ClickHouse::Client.select('SELECT * FROM events', :main) +``` + +When working with queries with placeholders you can use the `ClickHouse::Query` object where you need to specify the placeholder name and its data type. The actual variable replacement, quoting and escaping will be done by the ClickHouse server. + +```ruby +raw_query = 'SELECT * FROM events WHERE id > {min_id:UInt64}' +placeholders = { min_id: Integer(100) } +query = ClickHouse::Client::Query.new(raw_query: raw_query, placeholders: placeholders) + +rows = ClickHouse::Client.select(query, :main) +``` + +When using placeholders the client can provide the query with redacted placeholder values which can be ingested by our logging system. You can see the redacted version of your query by calling the `to_redacted_sql` method: + +```ruby +puts query.to_redacted_sql +``` + +ClickHouse allows only one statement per request. This means that the common SQL injection vulnerability where the statement is closed with a `;` character and then another query is "injected" cannot be exploited: + +```ruby +ClickHouse::Client.select('SELECT 1; SELECT 2', :main) + +# ClickHouse::Client::DatabaseError: Code: 62. DB::Exception: Syntax error (Multi-statements are not allowed): failed at position 9 (end of query): ; SELECT 2. . (SYNTAX_ERROR) (version 23.4.2.11 (official build)) +``` + +### Subqueries + +You can compose complex queries with the `ClickHouse::Client::Query` class by specifying the query placeholder with the special `Subquery` type. The library will make sure to correctly merge the queries and the placeholders: + +```ruby +subquery = ClickHouse::Client::Query.new(raw_query: 'SELECT id FROM events WHERE id = {id:UInt64}', placeholders: { id: Integer(10) }) + +raw_query = 'SELECT * FROM events WHERE id > {id:UInt64} AND id IN ({q:Subquery})' +placeholders = { id: Integer(10), q: subquery } + +query = ClickHouse::Client::Query.new(raw_query: raw_query, placeholders: placeholders) +rows = ClickHouse::Client.select(query, :main) + +# ClickHouse will replace the placeholders +puts query.to_sql # SELECT * FROM events WHERE id > {id:UInt64} AND id IN (SELECT id FROM events WHERE id = {id:UInt64}) + +puts query.to_redacted_sql # SELECT * FROM events WHERE id > $1 AND id IN (SELECT id FROM events WHERE id = $2) + +puts query.placeholders # { id: 10 } +``` + +In case there are placeholders with the same name but different values the query will raise an error. + +### Writing query conditions + +When working with complex forms where multiple filter conditions are present, building queries by concatenating query fragments as string can get out of hands very quickly. For queries with several conditions you may use the `ClickHouse::QueryBuilder` class. The class uses the `Arel` gem to generate queries and provides a similar query interface like `ActiveRecord`. + +```ruby +builder = ClickHouse::QueryBuilder.new('events') + +query = builder + .where(builder.table[:created_at].lteq(Date.today)) + .where(id: [1,2,3]) + +rows = ClickHouse::Client.select(query, :main) +``` + +## Testing + +ClickHouse is enabled on CI/CD but to avoid significantly affecting the pipeline runtime we've decided to run the ClickHouse server for test cases tagged with `:click_house` only. + +The `:click_house` tag ensures that the database schema is properly set up before every test case. + +```ruby +RSpec.describe MyClickHouseFeature, :click_house do + it 'returns rows' do + rows = ClickHouse::Client.select('SELECT 1', :main) + expect(rows.size).to eq(1) + end +end +``` + +## Multiple databases + +By design, the `ClickHouse::Client` library supports configuring multiple databases. Because we're still at a very early stage of development, we only have one database called `main`. + +Multi database configuration example: + +```yaml +development: + main: + database: gitlab_clickhouse_main_development + url: 'http://localhost:8123' + username: clickhouse + password: clickhouse + + user_analytics: # made up database + database: gitlab_clickhouse_user_analytics_development + url: 'http://localhost:8123' + username: clickhouse + password: clickhouse +``` + +## Observability + +All queries executed via the `ClickHouse::Client` library expose the query with performance metrics (timings, read bytes) via `ActiveSupport::Notifications`. + +```ruby +ActiveSupport::Notifications.subscribe('sql.click_house') do |_, _, _, _, data| + puts data.inspect +end +``` + +Additionally, to view the executed ClickHouse queries in web interactions, on the performance bar, next to the `ch` label select the count. diff --git a/doc/development/database/index.md b/doc/development/database/index.md index 1ee6aeaa213c53..284c04d5f91b17 100644 --- a/doc/development/database/index.md +++ b/doc/development/database/index.md @@ -109,6 +109,7 @@ including the major methods: ## ClickHouse - [Introduction](clickhouse/index.md) +- [ClickHouse within GitLab](clickhouse/clickhouse_within_gitlab.md) - [Optimizing query execution](clickhouse/optimization.md) - [Rebuild GitLab features using ClickHouse 1: Activity data](clickhouse/gitlab_activity_data.md) - [Rebuild GitLab features using ClickHouse 2: Merge Request analytics](clickhouse/merge_request_analytics.md) diff --git a/gems/click_house-client/lib/click_house/client.rb b/gems/click_house-client/lib/click_house/client.rb index abc54f2bce0677..53170e9fb1a6df 100644 --- a/gems/click_house-client/lib/click_house/client.rb +++ b/gems/click_house-client/lib/click_house/client.rb @@ -6,6 +6,9 @@ require 'active_support/notifications' require_relative "client/database" require_relative "client/configuration" +require_relative "client/bind_index_manager" +require_relative "client/query_like" +require_relative "client/query" require_relative "client/formatter" require_relative "client/response" @@ -25,6 +28,7 @@ def configure Error = Class.new(StandardError) ConfigurationError = Class.new(Error) DatabaseError = Class.new(Error) + QueryError = Class.new(Error) # Executes a SELECT database query def self.select(query, database, configuration = self.configuration) @@ -58,11 +62,17 @@ def self.execute(query, database, configuration = self.configuration) private_class_method def self.instrumented_execute(query, database, configuration) db = lookup_database(configuration, database) + query = ClickHouse::Client::Query.new(raw_query: query) unless query.is_a?(ClickHouse::Client::QueryLike) ActiveSupport::Notifications.instrument('sql.click_house', { query: query, database: database }) do |instrument| + # Use a multipart POST request where the placeholders are sent with the param_ prefix + # See: https://github.com/ClickHouse/ClickHouse/issues/8842 + query_with_params = query.placeholders.transform_keys { |key| "param_#{key}" } + query_with_params['query'] = query.to_sql + response = configuration.http_post_proc.call( db.uri.to_s, db.headers, - query + query_with_params ) raise DatabaseError, response.body unless response.success? diff --git a/gems/click_house-client/lib/click_house/client/bind_index_manager.rb b/gems/click_house-client/lib/click_house/client/bind_index_manager.rb new file mode 100644 index 00000000000000..618b13f2fd7045 --- /dev/null +++ b/gems/click_house-client/lib/click_house/client/bind_index_manager.rb @@ -0,0 +1,17 @@ +# frozen_string_literal: true + +module ClickHouse + module Client + class BindIndexManager + def initialize(start_index = 1) + @current_index = start_index + end + + def next_bind_str + bind_str = "$#{@current_index}" + @current_index += 1 + bind_str + end + end + end +end diff --git a/gems/click_house-client/lib/click_house/client/database.rb b/gems/click_house-client/lib/click_house/client/database.rb index faf5a953a1283e..2ce5fad1d057a1 100644 --- a/gems/click_house-client/lib/click_house/client/database.rb +++ b/gems/click_house-client/lib/click_house/client/database.rb @@ -28,8 +28,7 @@ def headers @headers ||= { 'X-ClickHouse-User' => @username, 'X-ClickHouse-Key' => @password, - 'X-ClickHouse-Format' => 'JSON', # always return JSON data - 'Content-Encoding' => 'gzip' # tell the server that we send compressed data + 'X-ClickHouse-Format' => 'JSON' # always return JSON data }.freeze end end diff --git a/gems/click_house-client/lib/click_house/client/query.rb b/gems/click_house-client/lib/click_house/client/query.rb new file mode 100644 index 00000000000000..bd2443b1dc1328 --- /dev/null +++ b/gems/click_house-client/lib/click_house/client/query.rb @@ -0,0 +1,68 @@ +# frozen_string_literal: true + +module ClickHouse + module Client + class Query < QueryLike + SUBQUERY_PLACEHOLDER_REGEX = /{\w+:Subquery}/ # exmaple: {var:Subquery}, special "internal" type for subqueries + PLACEHOLDER_REGEX = /{\w+:\w+}/ # exmaple: {var:UInt8} + PLACEHOLDER_NAME_REGEX = /{(\w+):/ # exmaple: {var:UInt8} => var + + def initialize(raw_query:, placeholders: {}) + raise QueryError, 'Empty query string given' if raw_query.blank? + + @raw_query = raw_query + @placeholders = placeholders || {} + end + + # List of placeholders to be sent to ClickHouse for replacement. + # If there are subqueries, merge their placeholders as well. + def placeholders + all_placeholders = @placeholders.select { |_, v| !v.is_a?(QueryLike) } + @placeholders.each do |_name, value| + next unless value.is_a?(QueryLike) + + all_placeholders.merge!(value.placeholders) do |key, a, b| + raise QueryError, "mismatching values for the '#{key}' placeholder: #{a} vs #{b}" + end + end + + all_placeholders + end + + # Placeholder replacement is handled by ClickHouse, only subquery placeholders + # will be replaced. + def to_sql + raw_query.gsub(SUBQUERY_PLACEHOLDER_REGEX) do |placeholder_in_query| + value = placeholder_value(placeholder_in_query) + + if value.is_a?(QueryLike) + value.to_sql + else + placeholder_in_query + end + end + end + + def to_redacted_sql(bind_index_manager = BindIndexManager.new) + raw_query.gsub(PLACEHOLDER_REGEX) do |placeholder_in_query| + value = placeholder_value(placeholder_in_query) + + if value.is_a?(QueryLike) + value.to_redacted_sql(bind_index_manager) + else + bind_index_manager.next_bind_str + end + end + end + + private + + attr_reader :raw_query + + def placeholder_value(placeholder_in_query) + placeholder = placeholder_in_query[PLACEHOLDER_NAME_REGEX, 1] + @placeholders.fetch(placeholder.to_sym) + end + end + end +end diff --git a/gems/click_house-client/lib/click_house/client/query_like.rb b/gems/click_house-client/lib/click_house/client/query_like.rb new file mode 100644 index 00000000000000..9e9ee46a3388cb --- /dev/null +++ b/gems/click_house-client/lib/click_house/client/query_like.rb @@ -0,0 +1,19 @@ +# frozen_string_literal: true + +module ClickHouse + module Client + class QueryLike + # Build a SQL string that can be executed on a ClickHouse database. + def to_sql + raise NotImplementedError + end + + # Redacted version of the SQL query generated by the to_sql method where the + # placeholders are stripped. These queries are meant to be exported to external + # log aggregation systems. + def to_redacted_sql(bind_index_manager = BindIndexManager.new) + raise NotImplementedError + end + end + end +end diff --git a/spec/lib/click_house/bind_index_manager_spec.rb b/gems/click_house-client/spec/click_house/client/bind_index_manager_spec.rb similarity index 91% rename from spec/lib/click_house/bind_index_manager_spec.rb rename to gems/click_house-client/spec/click_house/client/bind_index_manager_spec.rb index 1c659017c636f4..38e0865676a710 100644 --- a/spec/lib/click_house/bind_index_manager_spec.rb +++ b/gems/click_house-client/spec/click_house/client/bind_index_manager_spec.rb @@ -2,7 +2,7 @@ require 'spec_helper' -RSpec.describe ClickHouse::BindIndexManager, feature_category: :database do +RSpec.describe ClickHouse::Client::BindIndexManager do describe '#next_bind_str' do context 'when initialized without a start index' do let(:bind_manager) { described_class.new } diff --git a/gems/click_house-client/spec/click_house/client/database_spec.rb b/gems/click_house-client/spec/click_house/client/database_spec.rb index a74d4a119a435c..fdb4c72c0cbc62 100644 --- a/gems/click_house-client/spec/click_house/client/database_spec.rb +++ b/gems/click_house-client/spec/click_house/client/database_spec.rb @@ -24,7 +24,6 @@ describe '#headers' do it 'returns the correct headers' do expect(database.headers).to eq({ - "Content-Encoding" => "gzip", "X-ClickHouse-Format" => "JSON", 'X-ClickHouse-User' => 'user', 'X-ClickHouse-Key' => 'pass' diff --git a/gems/click_house-client/spec/click_house/client/query_like_spec.rb b/gems/click_house-client/spec/click_house/client/query_like_spec.rb new file mode 100644 index 00000000000000..8b8426bd5fd8f3 --- /dev/null +++ b/gems/click_house-client/spec/click_house/client/query_like_spec.rb @@ -0,0 +1,15 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe ClickHouse::Client::QueryLike do + subject(:query) { described_class.new } + + describe '#to_sql' do + it { expect { query.to_sql }.to raise_error(NotImplementedError) } + end + + describe '#to_redacted_sql' do + it { expect { query.to_redacted_sql }.to raise_error(NotImplementedError) } + end +end diff --git a/gems/click_house-client/spec/click_house/client/query_spec.rb b/gems/click_house-client/spec/click_house/client/query_spec.rb new file mode 100644 index 00000000000000..82733e523b1a79 --- /dev/null +++ b/gems/click_house-client/spec/click_house/client/query_spec.rb @@ -0,0 +1,125 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe ClickHouse::Client::Query do + subject(:query) { described_class.new(raw_query: raw_query, placeholders: placeholders) } + + let(:sql) { query.to_sql } + let(:redacted_sql) { query.to_redacted_sql } + + context 'when using no placeholders' do + let(:raw_query) { 'SELECT * FROM events' } + let(:placeholders) { nil } + + it { expect(sql).to eq(raw_query) } + it { expect(redacted_sql).to eq(raw_query) } + + context 'when placeholders is an empty hash' do + let(:placeholders) { {} } + + it { expect(sql).to eq(raw_query) } + it { expect(redacted_sql).to eq(raw_query) } + end + end + + context 'when placeholders are given' do + let(:raw_query) { 'SELECT * FROM events WHERE id = {id:UInt64}' } + let(:placeholders) { { id: 1 } } + + it { expect(sql).to eq(raw_query) } + it { expect(redacted_sql).to eq('SELECT * FROM events WHERE id = $1') } + end + + context 'when multiple placeholders are given' do + let(:raw_query) do + <<~SQL.squish + SELECT * + FROM events + WHERE + id = {id:UInt64} AND + title = {some_title:String} AND + another_id = {id:UInt64} + SQL + end + + let(:placeholders) { { id: 1, some_title: "'title'" } } + + it do + expect(sql).to eq(raw_query) + end + + it do + expect(redacted_sql).to eq( + <<~SQL.squish + SELECT * + FROM events + WHERE + id = $1 AND + title = $2 AND + another_id = $3 + SQL + ) + end + end + + context 'when dealing with subqueries' do + let(:raw_query) { 'SELECT * FROM events WHERE id < {min_id:UInt64} AND id IN ({q:Subquery})' } + + let(:subquery) do + described_class.new(raw_query: 'SELECT id FROM events WHERE id > {max_id:UInt64}', placeholders: { max_id: 11 }) + end + + let(:placeholders) { { min_id: 100, q: subquery } } + + it 'replaces the subquery but preserves the other placeholders' do + q = 'SELECT * FROM events WHERE id < {min_id:UInt64} AND id IN (SELECT id FROM events WHERE id > {max_id:UInt64})' + expect(sql).to eq(q) + end + + it 'replaces the subquery and replaces the placeholders with indexed values' do + expect(redacted_sql).to eq('SELECT * FROM events WHERE id < $1 AND id IN (SELECT id FROM events WHERE id > $2)') + end + + it 'merges the placeholders' do + expect(query.placeholders).to eq({ min_id: 100, max_id: 11 }) + end + end + + describe 'validation' do + context 'when SQL string is empty' do + let(:raw_query) { '' } + let(:placeholders) { {} } + + it 'raises error' do + expect { query }.to raise_error(ClickHouse::Client::QueryError, /Empty query string given/) + end + end + + context 'when SQL string is nil' do + let(:raw_query) { nil } + let(:placeholders) { {} } + + it 'raises error' do + expect { query }.to raise_error(ClickHouse::Client::QueryError, /Empty query string given/) + end + end + + context 'when same placeholder value does not match' do + let(:raw_query) { 'SELECT id FROM events WHERE id = {id:UInt64} AND id IN ({q:Subquery})' } + + let(:subquery) do + subquery_string = 'SELECT id FROM events WHERE id = {id:UInt64}' + described_class.new(raw_query: subquery_string, placeholders: { id: 10 }) + end + + let(:placeholders) { { id: 5, q: subquery } } + + it 'raises error' do + expect do + query.placeholders + end.to raise_error(ClickHouse::Client::QueryError, /mismatching values for the 'id' placeholder/) + end + end + end +end diff --git a/gems/click_house-client/spec/click_house/client_spec.rb b/gems/click_house-client/spec/click_house/client_spec.rb index 883199198badbb..51cc097a228fbc 100644 --- a/gems/click_house-client/spec/click_house/client_spec.rb +++ b/gems/click_house-client/spec/click_house/client_spec.rb @@ -71,7 +71,7 @@ end context 'when the DB is not configured' do - it 'raises erro' do + it 'raises error' do expect do described_class.select('SELECT * FROM issues', :different_db, configuration) end.to raise_error(ClickHouse::Client::ConfigurationError, /not configured/) diff --git a/lib/click_house/bind_index_manager.rb b/lib/click_house/bind_index_manager.rb deleted file mode 100644 index 96b0940ce710b4..00000000000000 --- a/lib/click_house/bind_index_manager.rb +++ /dev/null @@ -1,15 +0,0 @@ -# frozen_string_literal: true - -module ClickHouse - class BindIndexManager - def initialize(start_index = 1) - @current_index = start_index - end - - def next_bind_str - bind_str = "$#{@current_index}" - @current_index += 1 - bind_str - end - end -end diff --git a/lib/click_house/query_builder.rb b/lib/click_house/query_builder.rb index a2136420b2fcff..dc139663e7c0c6 100644 --- a/lib/click_house/query_builder.rb +++ b/lib/click_house/query_builder.rb @@ -2,7 +2,7 @@ # rubocop:disable CodeReuse/ActiveRecord module ClickHouse - class QueryBuilder + class QueryBuilder < ClickHouse::Client::QueryLike attr_reader :table attr_accessor :conditions, :manager @@ -93,8 +93,8 @@ def to_sql manager.to_sql end - def to_redacted_sql - ::ClickHouse::Redactor.redact(self) + def to_redacted_sql(bind_index_manager = ClickHouse::Client::BindIndexManager.new) + ::ClickHouse::Redactor.redact(self, bind_index_manager) end private diff --git a/lib/click_house/redactor.rb b/lib/click_house/redactor.rb index 9b8e2bc90d987c..6ca7c46e747a93 100644 --- a/lib/click_house/redactor.rb +++ b/lib/click_house/redactor.rb @@ -14,9 +14,8 @@ module Redactor # redacted_query = ClickHouse::Redactor.redact(query_builder) # # The redacted_query will contain the SQL query with values replaced by placeholders. # output: "SELECT * FROM \"users\" WHERE \"users\".\"name\" = $1" - def self.redact(query_builder) + def self.redact(query_builder, bind_manager = ClickHouse::Client::BindIndexManager.new) cloned_query_builder = query_builder.clone - bind_manager = ::ClickHouse::BindIndexManager.new cloned_query_builder.conditions = cloned_query_builder.conditions.map do |condition| redact_condition(condition, bind_manager) diff --git a/lib/peek/views/click_house.rb b/lib/peek/views/click_house.rb index cc109ccea5111e..97f02ad3dab09d 100644 --- a/lib/peek/views/click_house.rb +++ b/lib/peek/views/click_house.rb @@ -39,7 +39,7 @@ def generate_detail(start, finish, data) { start: start, duration: finish - start, - sql: data[:query].strip, + sql: data[:query].to_sql.strip, backtrace: Gitlab::BacktraceCleaner.clean_backtrace(caller), database: "database: #{data[:database]}", statistics: "query stats: #{data[:statistics]}" diff --git a/spec/lib/click_house/query_builder_spec.rb b/spec/lib/click_house/query_builder_spec.rb index 9e3f1118eebe1c..f5e1d53e7c185b 100644 --- a/spec/lib/click_house/query_builder_spec.rb +++ b/spec/lib/click_house/query_builder_spec.rb @@ -288,7 +288,8 @@ describe '#to_redacted_sql' do it 'calls ::ClickHouse::Redactor correctly' do - expect(::ClickHouse::Redactor).to receive(:redact).with(builder) + expect(::ClickHouse::Redactor).to receive(:redact).with(builder, + an_instance_of(ClickHouse::Client::BindIndexManager)) builder.to_redacted_sql end @@ -331,4 +332,27 @@ expect(sql).to eq(expected_sql) end end + + context 'when combining with a raw query' do + it 'correctly generates the SQL query' do + raw_query = 'SELECT * FROM isues WHERE title = {title:String} AND id IN ({query:Subquery})' + placeholders = { + title: "'test'", + query: builder.select(:id).where(column1: 'value1', column2: 'value2') + } + + query = ClickHouse::Client::Query.new(raw_query: raw_query, placeholders: placeholders) + expected_sql = "SELECT * FROM isues WHERE title = {title:String} AND id IN (SELECT \"test_table\".\"id\" " \ + "FROM \"test_table\" WHERE \"test_table\".\"column1\" = 'value1' AND " \ + "\"test_table\".\"column2\" = 'value2')" + + expect(query.to_sql).to eq(expected_sql) + + expected_redacted_sql = "SELECT * FROM isues WHERE title = $1 AND id IN (SELECT \"test_table\".\"id\" " \ + "FROM \"test_table\" WHERE \"test_table\".\"column1\" = $2 AND " \ + "\"test_table\".\"column2\" = $3)" + + expect(query.to_redacted_sql).to eq(expected_redacted_sql) + end + end end diff --git a/spec/lib/gitlab/database/click_house_client_spec.rb b/spec/lib/gitlab/database/click_house_client_spec.rb index 50086795b2b1a0..06a3536b57cda4 100644 --- a/spec/lib/gitlab/database/click_house_client_spec.rb +++ b/spec/lib/gitlab/database/click_house_client_spec.rb @@ -89,9 +89,19 @@ def format_row(event) 'target_type' => event3.target_type )) - ClickHouse::Client.execute("DELETE FROM events WHERE id = #{event3.id}", :main) + delete_query = ClickHouse::Client::Query.new( + raw_query: 'DELETE FROM events WHERE id = {id:UInt64}', + placeholders: { id: event3.id } + ) - results = ClickHouse::Client.select("SELECT * FROM events WHERE id = #{event3.id}", :main) + ClickHouse::Client.execute(delete_query, :main) + + select_query = ClickHouse::Client::Query.new( + raw_query: 'SELECT * FROM events WHERE id = {id:UInt64}', + placeholders: { id: event3.id } + ) + + results = ClickHouse::Client.select(select_query, :main) expect(results).to be_empty end end -- GitLab