From 46443b5ee3eda118599191cb8a943eb622f15994 Mon Sep 17 00:00:00 2001 From: charlieablett Date: Wed, 30 Aug 2023 16:41:41 +1200 Subject: [PATCH 1/2] Add ClickHouse logging - STDOUT by default, but a logger and log proc can be specified via configuration - Add structured log configuration for logging dev/test --- config/initializers/click_house.rb | 16 ++++++++++ ee/lib/click_house/logger.rb | 11 +++++++ .../lib/click_house/client.rb | 4 +++ .../lib/click_house/client/configuration.rb | 12 ++++++- .../spec/click_house/client_spec.rb | 32 +++++++++++++++++++ .../database/click_house_client_spec.rb | 19 +++++++++++ 6 files changed, 93 insertions(+), 1 deletion(-) create mode 100644 ee/lib/click_house/logger.rb diff --git a/config/initializers/click_house.rb b/config/initializers/click_house.rb index 7230eab1b03b28..ecf1ccb97bf874 100644 --- a/config/initializers/click_house.rb +++ b/config/initializers/click_house.rb @@ -17,6 +17,18 @@ ) end + if Rails.env.development? || Rails.env.test? + config.logger = ::ClickHouse::Logger.build + config.log_proc = ->(query) do + structured_log(query.to_sql) + end + else + config.logger = Logger.new('/dev/null') + config.log_proc = ->(query) do + structured_log(query.to_redacted_sql) + end + end + config.json_parser = Gitlab::Json config.http_post_proc = ->(url, headers, body) do options = { @@ -32,3 +44,7 @@ ClickHouse::Client::Response.new(response.body, response.code, response.headers) end end + +def structured_log(query_string) + { query: query_string, correlation_id: Labkit::Correlation::CorrelationId.current_id.to_s } +end diff --git a/ee/lib/click_house/logger.rb b/ee/lib/click_house/logger.rb new file mode 100644 index 00000000000000..90af644799b3f0 --- /dev/null +++ b/ee/lib/click_house/logger.rb @@ -0,0 +1,11 @@ +# frozen_string_literal: true + +module ClickHouse + class Logger < ::Gitlab::JsonLogger + exclude_context! + + def self.file_name_noext + 'clickhouse' + end + end +end diff --git a/gems/click_house-client/lib/click_house/client.rb b/gems/click_house-client/lib/click_house/client.rb index 2a9c5e402f5d37..1ca3653c45f915 100644 --- a/gems/click_house-client/lib/click_house/client.rb +++ b/gems/click_house-client/lib/click_house/client.rb @@ -100,6 +100,10 @@ def self.insert_csv(query, io, database, configuration = self.configuration) db = lookup_database(configuration, database) query = ClickHouse::Client::Query.build(query) + + log_contents = configuration.log_proc.call(query) + configuration.logger.info(log_contents) + ActiveSupport::Notifications.instrument('sql.click_house', { query: query, database: database }) do |instrument| # Use a multipart POST request where the placeholders are sent with the param_ prefix # See: https://github.com/ClickHouse/ClickHouse/issues/8842 diff --git a/gems/click_house-client/lib/click_house/client/configuration.rb b/gems/click_house-client/lib/click_house/client/configuration.rb index 882b37993dc83a..4a71934466f2b0 100644 --- a/gems/click_house-client/lib/click_house/client/configuration.rb +++ b/gems/click_house-client/lib/click_house/client/configuration.rb @@ -18,6 +18,9 @@ class Configuration # # *json_parser*: object for parsing JSON strings, it should respond to the "parse" method # + # *logger*: object for receiving logger commands. Default `$stdout` + # *log_proc*: any output (e.g. structure) to wrap around the query for every statement + # # Example: # # Gitlab::ClickHouse::Client.configure do |c| @@ -31,6 +34,11 @@ class Configuration # } # ) # + # c.logger = MyLogger.new + # c.log_proc = ->(query) do + # { query_body: query.to_redacted_sql } + # end + # # c.http_post_proc = lambda do |url, headers, body| # options = { # headers: headers, @@ -44,13 +52,15 @@ class Configuration # # c.json_parser = JSON # end - attr_accessor :http_post_proc, :json_parser + attr_accessor :http_post_proc, :json_parser, :logger, :log_proc attr_reader :databases def initialize @databases = {} @http_post_proc = nil @json_parser = JSON + @logger = ::Logger.new($stdout) + @log_proc = ->(query) { query.to_sql } end def register_database(name, **args) diff --git a/gems/click_house-client/spec/click_house/client_spec.rb b/gems/click_house-client/spec/click_house/client_spec.rb index 51cc097a228fbc..ab2407a83d7fbd 100644 --- a/gems/click_house-client/spec/click_house/client_spec.rb +++ b/gems/click_house-client/spec/click_house/client_spec.rb @@ -30,6 +30,9 @@ let(:configuration) do ClickHouse::Client::Configuration.new.tap do |config| + config.log_proc = ->(query) do + { query_string: query.to_sql } + end config.register_database(:test_db, **database_config) config.http_post_proc = ->(_url, _headers, _query) { body = File.read(query_result_fixture) @@ -94,5 +97,34 @@ end.to raise_error(ClickHouse::Client::DatabaseError, 'some error') end end + + describe 'default logging' do + let(:fake_logger) { instance_double("Logger", info: 'logged!') } + let(:query_string) { 'SELECT * FROM issues' } + + before do + configuration.logger = fake_logger + end + + shared_examples 'proper logging' do + it 'calls the custom logger and log_proc' do + expect(fake_logger).to receive(:info).at_least(:once).with({ query_string: query_string }) + + described_class.select(query_object, :test_db, configuration) + end + end + + context 'when query is a string' do # rubocop:disable RSpec/MultipleMemoizedHelpers + let(:query_object) { query_string } + + it_behaves_like 'proper logging' + end + + context 'when query is a Query object' do # rubocop:disable RSpec/MultipleMemoizedHelpers + let(:query_object) { ClickHouse::Client::Query.new(raw_query: query_string) } + + it_behaves_like 'proper logging' + end + end end end diff --git a/spec/lib/gitlab/database/click_house_client_spec.rb b/spec/lib/gitlab/database/click_house_client_spec.rb index 8e4a474cc9b010..6e63ae56557808 100644 --- a/spec/lib/gitlab/database/click_house_client_spec.rb +++ b/spec/lib/gitlab/database/click_house_client_spec.rb @@ -115,4 +115,23 @@ def format_row(event) end end end + + describe 'logging' do + let(:query_string) { "SELECT * FROM events WHERE id IN (4, 5, 6)" } + + context 'on dev and test environments' do + it 'logs the un-redacted query' do + expect(ClickHouse::Client.configuration.logger).to receive(:info).with({ + query: query_string, + correlation_id: a_kind_of(String) + }) + + ClickHouse::Client.select(query_string, :main) + end + + it 'has a ClickHouse logger' do + expect(ClickHouse::Client.configuration.logger).to be_a(ClickHouse::Logger) + end + end + end end -- GitLab From ac5fbc86309e6f37bb2287f2c5cb58af21f85213 Mon Sep 17 00:00:00 2001 From: charlieablett Date: Thu, 7 Sep 2023 17:29:11 +1200 Subject: [PATCH 2/2] Change from EE to FOSS --- {ee/lib => lib}/click_house/logger.rb | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename {ee/lib => lib}/click_house/logger.rb (100%) diff --git a/ee/lib/click_house/logger.rb b/lib/click_house/logger.rb similarity index 100% rename from ee/lib/click_house/logger.rb rename to lib/click_house/logger.rb -- GitLab