From 27e189e4cac2e2b215db5311c7fa416f145ed9af Mon Sep 17 00:00:00 2001 From: Adam Hegyi Date: Wed, 5 Jul 2023 14:58:09 +0200 Subject: [PATCH 1/6] Setup ClickHouse on CI This change updates the pipeline settings to run a separate job for RSpec tests tagged with click_house. --- .gitlab/ci/global.gitlab-ci.yml | 18 ++++++++++++++++++ .../gitlab/database/click_house_client_spec.rb | 14 ++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/.gitlab/ci/global.gitlab-ci.yml b/.gitlab/ci/global.gitlab-ci.yml index c501d930352d4d..f94edf628f139b 100644 --- a/.gitlab/ci/global.gitlab-ci.yml +++ b/.gitlab/ci/global.gitlab-ci.yml @@ -400,6 +400,24 @@ CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT: 1 CLICKHOUSE_DB: gitlab_clickhouse_test +.use-pg14-clickhouse23: + services: + - name: ${REGISTRY_HOST}/${REGISTRY_GROUP}/gitlab-build-images:postgres-14-pgvector-0.4.1 + command: ["postgres", "-c", "fsync=off", "-c", "synchronous_commit=off", "-c", "full_page_writes=off"] + alias: postgres + - name: ${REGISTRY_HOST}/${REGISTRY_GROUP}/gitlab-build-images:redis-cluster-6.2.12 + alias: rediscluster # configure connections in config/redis.yml + - name: redis:6.2-alpine + - name: clickhouse/clickhouse-server:23-alpine + alias: clickhouse + variables: + POSTGRES_HOST_AUTH_METHOD: trust + PG_VERSION: "14" + CLICKHOUSE_USER: clickhouse + CLICKHOUSE_PASSWORD: clickhouse + CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT: 1 + CLICKHOUSE_DB: gitlab_clickhouse_test + .use-kaniko: image: name: ${REGISTRY_HOST}/${REGISTRY_GROUP}/gitlab-build-images:kaniko diff --git a/spec/lib/gitlab/database/click_house_client_spec.rb b/spec/lib/gitlab/database/click_house_client_spec.rb index 16b7fb82c4ad98..85f9874c072c91 100644 --- a/spec/lib/gitlab/database/click_house_client_spec.rb +++ b/spec/lib/gitlab/database/click_house_client_spec.rb @@ -30,4 +30,18 @@ expect(result).to eq([{ 'value' => 1 }]) end end + + describe 'querying data', :click_house do + around do |example| + with_net_connect_allowed do + example.run + end + end + + it 'returns data from the DB' do + result = Gitlab::ClickHouse::Client.execute("SELECT 1 AS value", :main) + + expect(result).to eq([{ 'value' => 1 }]) + end + end end -- GitLab From 6d56d707d59ea90419954bb1d3e384182ad3dae7 Mon Sep 17 00:00:00 2001 From: charlieablett Date: Mon, 10 Jul 2023 20:29:15 +1200 Subject: [PATCH 2/6] Add extra spec for database configuration --- spec/lib/gitlab/database/click_house_client_spec.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/lib/gitlab/database/click_house_client_spec.rb b/spec/lib/gitlab/database/click_house_client_spec.rb index 85f9874c072c91..d9fe0c4744147f 100644 --- a/spec/lib/gitlab/database/click_house_client_spec.rb +++ b/spec/lib/gitlab/database/click_house_client_spec.rb @@ -39,7 +39,7 @@ end it 'returns data from the DB' do - result = Gitlab::ClickHouse::Client.execute("SELECT 1 AS value", :main) + result = ClickHouse::Client.execute("SELECT 1 AS value", :main) expect(result).to eq([{ 'value' => 1 }]) end -- GitLab From f1996c3b59f63d5218bbcc818f59b04719b5feca Mon Sep 17 00:00:00 2001 From: Adam Hegyi Date: Wed, 5 Jul 2023 14:58:09 +0200 Subject: [PATCH 3/6] Setup ClickHouse on CI This change updates the pipeline settings to run a separate job for RSpec tests tagged with click_house. --- spec/lib/gitlab/database/click_house_client_spec.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spec/lib/gitlab/database/click_house_client_spec.rb b/spec/lib/gitlab/database/click_house_client_spec.rb index d9fe0c4744147f..c77255a28587bc 100644 --- a/spec/lib/gitlab/database/click_house_client_spec.rb +++ b/spec/lib/gitlab/database/click_house_client_spec.rb @@ -31,7 +31,7 @@ end end - describe 'querying data', :click_house do + describe 'querying data' do around do |example| with_net_connect_allowed do example.run @@ -39,7 +39,7 @@ end it 'returns data from the DB' do - result = ClickHouse::Client.execute("SELECT 1 AS value", :main) + result = ClickHouse::Client.select("SELECT 1 AS value", :main) expect(result).to eq([{ 'value' => 1 }]) end -- GitLab From d711c9ef3a2ca81e141d3400810457d526efb43d Mon Sep 17 00:00:00 2001 From: Adam Hegyi Date: Wed, 5 Jul 2023 15:03:32 +0200 Subject: [PATCH 4/6] Add basic test cases for ClickHouse This change sets up basic test cases and RSpec hooks for ClickHouse. --- .../main/20230705124511_create_events.sql | 15 ++++ .../lib/click_house/client.rb | 27 ++++++- .../spec/click_house/client_spec.rb | 8 +- .../database/click_house_client_spec.rb | 76 ++++++++++++++++--- spec/support/database/click_house/hooks.rb | 56 ++++++++++++++ 5 files changed, 165 insertions(+), 17 deletions(-) create mode 100644 db/click_house/main/20230705124511_create_events.sql create mode 100644 spec/support/database/click_house/hooks.rb diff --git a/db/click_house/main/20230705124511_create_events.sql b/db/click_house/main/20230705124511_create_events.sql new file mode 100644 index 00000000000000..45e0139165af53 --- /dev/null +++ b/db/click_house/main/20230705124511_create_events.sql @@ -0,0 +1,15 @@ +CREATE TABLE events +( + id UInt64 DEFAULT 0, + path String DEFAULT '', + author_id UInt64 DEFAULT 0, + target_id UInt64 DEFAULT 0, + target_type LowCardinality(String) DEFAULT '', + action UInt8 DEFAULT 0, + created_at DateTime64(6, 'UTC') DEFAULT now(), + updated_at DateTime64(6, 'UTC') DEFAULT now() +) +ENGINE = ReplacingMergeTree(updated_at) +PRIMARY KEY (id) +ORDER BY (id) +PARTITION BY toYear(created_at) diff --git a/gems/click_house-client/lib/click_house/client.rb b/gems/click_house-client/lib/click_house/client.rb index 1c8da64f38f700..22c42d7be6eaa3 100644 --- a/gems/click_house-client/lib/click_house/client.rb +++ b/gems/click_house-client/lib/click_house/client.rb @@ -25,9 +25,9 @@ def configure ConfigurationError = Class.new(Error) DatabaseError = Class.new(Error) - def self.execute(query, database, configuration = self.configuration) - db = configuration.databases[database] - raise ConfigurationError, "The database '#{database}' is not configured" unless db + # Executes a SELECT database query + def self.select(query, database, configuration = self.configuration) + db = lookup_database(configuration, database) response = configuration.http_post_proc.call( db.uri.to_s, @@ -39,5 +39,26 @@ def self.execute(query, database, configuration = self.configuration) Formatter.format(configuration.json_parser.parse(response.body)) end + + # Executes any kinds of database query without returning any data (INSERT, DELETE) + def self.execute(query, database, configuration = self.configuration) + db = lookup_database(configuration, database) + + response = configuration.http_post_proc.call( + db.uri.to_s, + db.headers, + query + ) + + raise DatabaseError, response.body unless response.success? + + true + end + + private_class_method def self.lookup_database(configuration, database) + configuration.databases[database].tap do |db| + raise ConfigurationError, "The database '#{database}' is not configured" unless db + end + end end end diff --git a/gems/click_house-client/spec/click_house/client_spec.rb b/gems/click_house-client/spec/click_house/client_spec.rb index 19850bc722745d..883199198badbb 100644 --- a/gems/click_house-client/spec/click_house/client_spec.rb +++ b/gems/click_house-client/spec/click_house/client_spec.rb @@ -1,7 +1,7 @@ # frozen_string_literal: true RSpec.describe ClickHouse::Client do - describe '#execute' do + describe '#select' do # Assuming we have a DB table with the following schema # # CREATE TABLE issues ( @@ -39,7 +39,7 @@ end it 'parses the results and returns the data as array of hashes' do - result = described_class.execute('SELECT * FROM issues', :test_db, configuration) + result = described_class.select('SELECT * FROM issues', :test_db, configuration) timestamp1 = ActiveSupport::TimeZone["UTC"].parse('2023-06-21 13:33:44') timestamp2 = ActiveSupport::TimeZone["UTC"].parse('2023-06-21 13:33:50') @@ -73,7 +73,7 @@ context 'when the DB is not configured' do it 'raises erro' do expect do - described_class.execute('SELECT * FROM issues', :different_db, configuration) + described_class.select('SELECT * FROM issues', :different_db, configuration) end.to raise_error(ClickHouse::Client::ConfigurationError, /not configured/) end end @@ -90,7 +90,7 @@ it 'raises error' do expect do - described_class.execute('SELECT * FROM issues', :test_db, configuration) + described_class.select('SELECT * FROM issues', :test_db, configuration) end.to raise_error(ClickHouse::Client::DatabaseError, 'some error') end end diff --git a/spec/lib/gitlab/database/click_house_client_spec.rb b/spec/lib/gitlab/database/click_house_client_spec.rb index c77255a28587bc..bef3df586de1b3 100644 --- a/spec/lib/gitlab/database/click_house_client_spec.rb +++ b/spec/lib/gitlab/database/click_house_client_spec.rb @@ -25,23 +25,79 @@ end it 'returns data from the DB' do - result = ClickHouse::Client.execute("SELECT 1 AS value", :main) + result = ClickHouse::Client.select("SELECT 1 AS value", :main) expect(result).to eq([{ 'value' => 1 }]) end - end - describe 'querying data' do - around do |example| - with_net_connect_allowed do - example.run + describe 'inserting' do + let_it_be(:group) { create(:group) } + let_it_be(:project) { create(:project) } + + let_it_be(:author1) { create(:user).tap { |u| project.add_developer(u) } } + let_it_be(:author2) { create(:user).tap { |u| project.add_developer(u) } } + + let_it_be(:issue1) { create(:issue, project: project) } + let_it_be(:issue2) { create(:issue, project: project) } + let_it_be(:merge_request) { create(:merge_request, source_project: project) } + + let_it_be(:event1) { create(:event, :created, target: issue1, author: author1) } + let_it_be(:event2) { create(:event, :closed, target: issue2, author: author2) } + let_it_be(:event3) { create(:event, :merged, target: merge_request, author: author1) } + + let(:events) { [event1, event2, event3] } + + def format_row(event) + path = event.project.reload.project_namespace.traversal_ids.join('/') + + action = Event.actions[event.action] + [ + event.id, + "'#{path}/'", + event.author_id, + event.target_id, + "'#{event.target_type}'", + action, + event.created_at.to_f, + event.updated_at.to_f + ].join(',') end - end - it 'returns data from the DB' do - result = ClickHouse::Client.select("SELECT 1 AS value", :main) + describe 'RSpec hooks' do + it 'ensures that tables are empty' do + results = ClickHouse::Client.select('SELECT * FROM events', :main) + expect(results).to be_empty + end + end - expect(result).to eq([{ 'value' => 1 }]) + it 'inserts and modifies data' do + insert_query = <<~SQL + INSERT INTO events + (id, path, author_id, target_id, target_type, action, created_at, updated_at) + VALUES + (#{format_row(event1)}), + (#{format_row(event2)}), + (#{format_row(event3)}) + SQL + + ClickHouse::Client.execute(insert_query, :main) + + results = ClickHouse::Client.select('SELECT * FROM events ORDER BY id', :main) + expect(results.size).to eq(3) + + last = results.last + expect(last).to match(a_hash_including( + 'id' => event3.id, + 'author_id' => event3.author_id, + 'created_at' => be_within(0.05).of(event3.created_at), + 'target_type' => event3.target_type + )) + + ClickHouse::Client.execute("DELETE FROM events WHERE id = #{event3.id}", :main) + + results = ClickHouse::Client.select("SELECT * FROM events WHERE id = #{event3.id}", :main) + expect(results).to be_empty + end end end end diff --git a/spec/support/database/click_house/hooks.rb b/spec/support/database/click_house/hooks.rb new file mode 100644 index 00000000000000..3878eeb751e043 --- /dev/null +++ b/spec/support/database/click_house/hooks.rb @@ -0,0 +1,56 @@ +# frozen_string_literal: true + +# rubocop: disable Gitlab/NamespacedClass +class ClickHouseTestRunner + def truncate_tables + ClickHouse::Client.configuration.databases.each_key do |db| + tables_for(db).each do |table| + ClickHouse::Client.execute("TRUNCATE TABLE #{table}", db) + end + end + end + + def ensure_schema + return if @ensure_schema + + ClickHouse::Client.configuration.databases.each_key do |db| + # drop all tables + lookup_tables(db).each do |table| + ClickHouse::Client.execute("DROP TABLE IF EXISTS #{table}", db) + end + + # run the schema SQL files + Dir[Rails.root.join("db/click_house/#{db}/*.sql")].each do |file| + ClickHouse::Client.execute(File.read(file), db) + end + end + + @ensure_schema = true + end + + private + + def tables_for(db) + @tables ||= {} + @tables[db] ||= lookup_tables(db) + @tables[db] + end + + def lookup_tables(db) + ClickHouse::Client.select('SHOW TABLES', db).pluck('name') + end +end +# rubocop: enable Gitlab/NamespacedClass + +RSpec.configure do |config| + click_house_test_runner = ClickHouseTestRunner.new + + config.around(:each, :click_house) do |example| + with_net_connect_allowed do + click_house_test_runner.ensure_schema + click_house_test_runner.truncate_tables + + example.run + end + end +end -- GitLab From 1a758c0bd2de2272c16a92aa5b271e74a2bf6a85 Mon Sep 17 00:00:00 2001 From: charlieablett Date: Mon, 17 Jul 2023 17:12:22 +1200 Subject: [PATCH 5/6] Update specs to use select Apply reviewer feedback --- .gitlab/ci/global.gitlab-ci.yml | 18 --- config/initializers/click_house.rb | 1 + .../database/click_house_client_spec.rb | 140 ++++++++++-------- spec/support/database/click_house/hooks.rb | 1 - 4 files changed, 76 insertions(+), 84 deletions(-) diff --git a/.gitlab/ci/global.gitlab-ci.yml b/.gitlab/ci/global.gitlab-ci.yml index f94edf628f139b..c501d930352d4d 100644 --- a/.gitlab/ci/global.gitlab-ci.yml +++ b/.gitlab/ci/global.gitlab-ci.yml @@ -400,24 +400,6 @@ CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT: 1 CLICKHOUSE_DB: gitlab_clickhouse_test -.use-pg14-clickhouse23: - services: - - name: ${REGISTRY_HOST}/${REGISTRY_GROUP}/gitlab-build-images:postgres-14-pgvector-0.4.1 - command: ["postgres", "-c", "fsync=off", "-c", "synchronous_commit=off", "-c", "full_page_writes=off"] - alias: postgres - - name: ${REGISTRY_HOST}/${REGISTRY_GROUP}/gitlab-build-images:redis-cluster-6.2.12 - alias: rediscluster # configure connections in config/redis.yml - - name: redis:6.2-alpine - - name: clickhouse/clickhouse-server:23-alpine - alias: clickhouse - variables: - POSTGRES_HOST_AUTH_METHOD: trust - PG_VERSION: "14" - CLICKHOUSE_USER: clickhouse - CLICKHOUSE_PASSWORD: clickhouse - CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT: 1 - CLICKHOUSE_DB: gitlab_clickhouse_test - .use-kaniko: image: name: ${REGISTRY_HOST}/${REGISTRY_GROUP}/gitlab-build-images:kaniko diff --git a/config/initializers/click_house.rb b/config/initializers/click_house.rb index 7fc216d9a59469..c1bec683c6b1f0 100644 --- a/config/initializers/click_house.rb +++ b/config/initializers/click_house.rb @@ -3,6 +3,7 @@ return unless File.exist?(Rails.root.join('config/click_house.yml')) raw_config = Rails.application.config_for(:click_house) + return if raw_config.blank? ClickHouse::Client.configure do |config| diff --git a/spec/lib/gitlab/database/click_house_client_spec.rb b/spec/lib/gitlab/database/click_house_client_spec.rb index bef3df586de1b3..d9a46dbd24169f 100644 --- a/spec/lib/gitlab/database/click_house_client_spec.rb +++ b/spec/lib/gitlab/database/click_house_client_spec.rb @@ -24,79 +24,89 @@ expect(databases).not_to be_empty end - it 'returns data from the DB' do + it 'returns data from the DB via `select` method' do result = ClickHouse::Client.select("SELECT 1 AS value", :main) + # returns JSON if successful. Otherwise error expect(result).to eq([{ 'value' => 1 }]) end - describe 'inserting' do - let_it_be(:group) { create(:group) } - let_it_be(:project) { create(:project) } - - let_it_be(:author1) { create(:user).tap { |u| project.add_developer(u) } } - let_it_be(:author2) { create(:user).tap { |u| project.add_developer(u) } } - - let_it_be(:issue1) { create(:issue, project: project) } - let_it_be(:issue2) { create(:issue, project: project) } - let_it_be(:merge_request) { create(:merge_request, source_project: project) } - - let_it_be(:event1) { create(:event, :created, target: issue1, author: author1) } - let_it_be(:event2) { create(:event, :closed, target: issue2, author: author2) } - let_it_be(:event3) { create(:event, :merged, target: merge_request, author: author1) } - - let(:events) { [event1, event2, event3] } - - def format_row(event) - path = event.project.reload.project_namespace.traversal_ids.join('/') - - action = Event.actions[event.action] - [ - event.id, - "'#{path}/'", - event.author_id, - event.target_id, - "'#{event.target_type}'", - action, - event.created_at.to_f, - event.updated_at.to_f - ].join(',') - end + it 'returns data from the DB via `execute` method' do + result = ClickHouse::Client.select("SELECT 1 AS value", :main) - describe 'RSpec hooks' do - it 'ensures that tables are empty' do - results = ClickHouse::Client.select('SELECT * FROM events', :main) - expect(results).to be_empty + # does not return data, just true if successful. Otherwise error. + expect(result).to be_truthy + end + + describe 'data manipulation' do + describe 'inserting' do + let_it_be(:group) { create(:group) } + let_it_be(:project) { create(:project) } + + let_it_be(:author1) { create(:user).tap { |u| project.add_developer(u) } } + let_it_be(:author2) { create(:user).tap { |u| project.add_developer(u) } } + + let_it_be(:issue1) { create(:issue, project: project) } + let_it_be(:issue2) { create(:issue, project: project) } + let_it_be(:merge_request) { create(:merge_request, source_project: project) } + + let_it_be(:event1) { create(:event, :created, target: issue1, author: author1) } + let_it_be(:event2) { create(:event, :closed, target: issue2, author: author2) } + let_it_be(:event3) { create(:event, :merged, target: merge_request, author: author1) } + + let(:events) { [event1, event2, event3] } + + def format_row(event) + path = event.project.reload.project_namespace.traversal_ids.join('/') + + action = Event.actions[event.action] + [ + event.id, + "'#{path}/'", + event.author_id, + event.target_id, + "'#{event.target_type}'", + action, + event.created_at.to_f, + event.updated_at.to_f + ].join(',') + end + + describe 'RSpec hooks' do + it 'ensures that tables are empty' do + results = ClickHouse::Client.select('SELECT * FROM events', :main) + expect(results).to be_empty + end end - end - it 'inserts and modifies data' do - insert_query = <<~SQL - INSERT INTO events - (id, path, author_id, target_id, target_type, action, created_at, updated_at) - VALUES - (#{format_row(event1)}), - (#{format_row(event2)}), - (#{format_row(event3)}) - SQL - - ClickHouse::Client.execute(insert_query, :main) - - results = ClickHouse::Client.select('SELECT * FROM events ORDER BY id', :main) - expect(results.size).to eq(3) - - last = results.last - expect(last).to match(a_hash_including( - 'id' => event3.id, - 'author_id' => event3.author_id, - 'created_at' => be_within(0.05).of(event3.created_at), - 'target_type' => event3.target_type - )) - - ClickHouse::Client.execute("DELETE FROM events WHERE id = #{event3.id}", :main) - - results = ClickHouse::Client.select("SELECT * FROM events WHERE id = #{event3.id}", :main) - expect(results).to be_empty + it 'inserts and modifies data' do + insert_query = <<~SQL + INSERT INTO events + (id, path, author_id, target_id, target_type, action, created_at, updated_at) + VALUES + (#{format_row(event1)}), + (#{format_row(event2)}), + (#{format_row(event3)}) + SQL + + ClickHouse::Client.execute(insert_query, :main) + + results = ClickHouse::Client.select('SELECT * FROM events ORDER BY id', :main) + expect(results.size).to eq(3) + + last = results.last + expect(last).to match(a_hash_including( + 'id' => event3.id, + 'author_id' => event3.author_id, + 'created_at' => be_within(0.05).of(event3.created_at), + 'target_type' => event3.target_type + )) + + ClickHouse::Client.execute("DELETE FROM events WHERE id = #{event3.id}", :main) + + results = ClickHouse::Client.select("SELECT * FROM events WHERE id = #{event3.id}", :main) + expect(results).to be_empty + end end end end diff --git a/spec/support/database/click_house/hooks.rb b/spec/support/database/click_house/hooks.rb index 3878eeb751e043..27abd19dc3f23c 100644 --- a/spec/support/database/click_house/hooks.rb +++ b/spec/support/database/click_house/hooks.rb @@ -33,7 +33,6 @@ def ensure_schema def tables_for(db) @tables ||= {} @tables[db] ||= lookup_tables(db) - @tables[db] end def lookup_tables(db) -- GitLab From 5f67d6a2c843bb128f685de4bdb813b58935e61b Mon Sep 17 00:00:00 2001 From: Vladimir Shushlin Date: Tue, 18 Jul 2023 15:11:15 +0000 Subject: [PATCH 6/6] Apply reviewer suggestion --- spec/lib/gitlab/database/click_house_client_spec.rb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/spec/lib/gitlab/database/click_house_client_spec.rb b/spec/lib/gitlab/database/click_house_client_spec.rb index d9a46dbd24169f..502d879bf6a32f 100644 --- a/spec/lib/gitlab/database/click_house_client_spec.rb +++ b/spec/lib/gitlab/database/click_house_client_spec.rb @@ -31,11 +31,11 @@ expect(result).to eq([{ 'value' => 1 }]) end - it 'returns data from the DB via `execute` method' do - result = ClickHouse::Client.select("SELECT 1 AS value", :main) + it 'does not return data via `execute` method' do + result = ClickHouse::Client.execute("SELECT 1 AS value", :main) # does not return data, just true if successful. Otherwise error. - expect(result).to be_truthy + expect(result).to eq(true) end describe 'data manipulation' do -- GitLab