From fba2c40dbb5fc4e5f8a22dc788a7da04d616588b Mon Sep 17 00:00:00 2001 From: Pavel Shutsin Date: Thu, 4 Dec 2025 13:28:53 +0100 Subject: [PATCH 1/5] Introduce basic ClickHouse aggregation engine Aggergation engine can be used to dynamically create aggregation queries based on given request. No user facing changes yet --- Gemfile | 2 +- Gemfile.checksum | 2 +- Gemfile.lock | 4 +- Gemfile.next.checksum | 2 +- Gemfile.next.lock | 4 +- .../active_record/aggregation_result.rb | 2 +- .../click_house/aggregation_result.rb | 17 ++ .../aggregation/click_house/column.rb | 30 +++ .../database/aggregation/click_house/count.rb | 39 ++++ .../aggregation/click_house/engine.rb | 110 ++++++++++ .../database/aggregation/click_house/mean.rb | 28 +++ .../click_house/part_definition.rb | 12 + .../aggregation/click_house/quantile.rb | 33 +++ .../database/aggregation/click_house/rate.rb | 34 +++ lib/gitlab/database/aggregation/engine.rb | 1 + .../aggregation/parameterized_definition.rb | 2 +- .../aggregation/click_house/count_spec.rb | 95 ++++++++ .../aggregation/click_house/engine_spec.rb | 205 ++++++++++++++++++ .../aggregation/click_house/mean_spec.rb | 78 +++++++ .../aggregation/click_house/quantile_spec.rb | 139 ++++++++++++ .../aggregation/click_house/rate_spec.rb | 79 +++++++ .../aggregation/definitions_collector_spec.rb | 2 +- .../database/aggregation/formatter_spec.rb | 3 +- spec/support/helpers/click_house_helpers.rb | 18 +- .../execute_aggregation_matcher.rb | 4 +- .../clickhouse_engine_shared_context.rb | 62 ++++++ 26 files changed, 987 insertions(+), 20 deletions(-) create mode 100644 lib/gitlab/database/aggregation/click_house/aggregation_result.rb create mode 100644 lib/gitlab/database/aggregation/click_house/column.rb create mode 100644 lib/gitlab/database/aggregation/click_house/count.rb create mode 100644 lib/gitlab/database/aggregation/click_house/engine.rb create mode 100644 lib/gitlab/database/aggregation/click_house/mean.rb create mode 100644 lib/gitlab/database/aggregation/click_house/part_definition.rb create mode 100644 lib/gitlab/database/aggregation/click_house/quantile.rb create mode 100644 lib/gitlab/database/aggregation/click_house/rate.rb create mode 100644 spec/lib/gitlab/database/aggregation/click_house/count_spec.rb create mode 100644 spec/lib/gitlab/database/aggregation/click_house/engine_spec.rb create mode 100644 spec/lib/gitlab/database/aggregation/click_house/mean_spec.rb create mode 100644 spec/lib/gitlab/database/aggregation/click_house/quantile_spec.rb create mode 100644 spec/lib/gitlab/database/aggregation/click_house/rate_spec.rb create mode 100644 spec/support/shared_contexts/lib/gitlab/database/aggregation/clickhouse_engine_shared_context.rb diff --git a/Gemfile b/Gemfile index d82fe047e8de91..0b646d90b9c22d 100644 --- a/Gemfile +++ b/Gemfile @@ -378,7 +378,7 @@ gem 'rack-proxy', '~> 0.7.7', feature_category: :shared # rubocop:todo Gemfile/M gem 'cssbundling-rails', '1.4.3', feature_category: :shared # rubocop:todo Gemfile/MissingFeatureCategory -- https://gitlab.com/gitlab-org/gitlab/-/issues/581839 gem 'terser', '1.0.2', feature_category: :shared # rubocop:todo Gemfile/MissingFeatureCategory -- https://gitlab.com/gitlab-org/gitlab/-/issues/581839 -gem 'click_house-client', '0.8.2', feature_category: :database +gem 'click_house-client', '0.8.4', feature_category: :database gem 'addressable', '~> 2.8', feature_category: :shared # rubocop:todo Gemfile/MissingFeatureCategory -- https://gitlab.com/gitlab-org/gitlab/-/issues/581839 gem 'gon', '~> 6.5.0', feature_category: :shared # rubocop:todo Gemfile/MissingFeatureCategory -- https://gitlab.com/gitlab-org/gitlab/-/issues/581839 gem 'request_store', '~> 1.7.0', feature_category: :shared # rubocop:todo Gemfile/MissingFeatureCategory -- https://gitlab.com/gitlab-org/gitlab/-/issues/581839 diff --git a/Gemfile.checksum b/Gemfile.checksum index bbeab09596a8c1..f423d54bffe892 100644 --- a/Gemfile.checksum +++ b/Gemfile.checksum @@ -82,7 +82,7 @@ {"name":"citrus","version":"3.0.2","platform":"ruby","checksum":"4ec2412fc389ad186735f4baee1460f7900a8e130ffe3f216b30d4f9c684f650"}, {"name":"claide","version":"1.1.0","platform":"ruby","checksum":"6d3c5c089dde904d96aa30e73306d0d4bd444b1accb9b3125ce14a3c0183f82e"}, {"name":"claide-plugins","version":"0.9.2","platform":"ruby","checksum":"c7ea78bc067ab23bce8515497cdcdcb8f01c86dadfbe13c44644e382922c1c2e"}, -{"name":"click_house-client","version":"0.8.2","platform":"ruby","checksum":"49e25617a1bf11eb51045191569bcae000773dc58b49e8bffdbde2304ed565cc"}, +{"name":"click_house-client","version":"0.8.4","platform":"ruby","checksum":"8278a6319fd30e8cb5dd4e8ada247a16819a6d4f3548cb0efed6c188ee5dfd18"}, {"name":"coderay","version":"1.1.3","platform":"ruby","checksum":"dc530018a4684512f8f38143cd2a096c9f02a1fc2459edcfe534787a7fc77d4b"}, {"name":"coercible","version":"1.0.0","platform":"ruby","checksum":"5081ad24352cc8435ce5472bc2faa30260c7ea7f2102cc6a9f167c4d9bffaadc"}, {"name":"colored2","version":"3.1.2","platform":"ruby","checksum":"b13c2bd7eeae2cf7356a62501d398e72fde78780bd26aec6a979578293c28b4a"}, diff --git a/Gemfile.lock b/Gemfile.lock index 09ab3af1dd4573..214ca36fa118be 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -434,7 +434,7 @@ GEM cork nap open4 (~> 1.3) - click_house-client (0.8.2) + click_house-client (0.8.4) activerecord (>= 7.0, < 9.0) activesupport (>= 7.0, < 9.0) addressable (~> 2.8) @@ -2122,7 +2122,7 @@ DEPENDENCIES carrierwave (~> 1.3) charlock_holmes (~> 0.7.9) circuitbox (= 2.0.0) - click_house-client (= 0.8.2) + click_house-client (= 0.8.4) commonmarker (~> 0.23.10) concurrent-ruby (~> 1.1) connection_pool (~> 2.5.3) diff --git a/Gemfile.next.checksum b/Gemfile.next.checksum index bbeab09596a8c1..f423d54bffe892 100644 --- a/Gemfile.next.checksum +++ b/Gemfile.next.checksum @@ -82,7 +82,7 @@ {"name":"citrus","version":"3.0.2","platform":"ruby","checksum":"4ec2412fc389ad186735f4baee1460f7900a8e130ffe3f216b30d4f9c684f650"}, {"name":"claide","version":"1.1.0","platform":"ruby","checksum":"6d3c5c089dde904d96aa30e73306d0d4bd444b1accb9b3125ce14a3c0183f82e"}, {"name":"claide-plugins","version":"0.9.2","platform":"ruby","checksum":"c7ea78bc067ab23bce8515497cdcdcb8f01c86dadfbe13c44644e382922c1c2e"}, -{"name":"click_house-client","version":"0.8.2","platform":"ruby","checksum":"49e25617a1bf11eb51045191569bcae000773dc58b49e8bffdbde2304ed565cc"}, +{"name":"click_house-client","version":"0.8.4","platform":"ruby","checksum":"8278a6319fd30e8cb5dd4e8ada247a16819a6d4f3548cb0efed6c188ee5dfd18"}, {"name":"coderay","version":"1.1.3","platform":"ruby","checksum":"dc530018a4684512f8f38143cd2a096c9f02a1fc2459edcfe534787a7fc77d4b"}, {"name":"coercible","version":"1.0.0","platform":"ruby","checksum":"5081ad24352cc8435ce5472bc2faa30260c7ea7f2102cc6a9f167c4d9bffaadc"}, {"name":"colored2","version":"3.1.2","platform":"ruby","checksum":"b13c2bd7eeae2cf7356a62501d398e72fde78780bd26aec6a979578293c28b4a"}, diff --git a/Gemfile.next.lock b/Gemfile.next.lock index 09ab3af1dd4573..214ca36fa118be 100644 --- a/Gemfile.next.lock +++ b/Gemfile.next.lock @@ -434,7 +434,7 @@ GEM cork nap open4 (~> 1.3) - click_house-client (0.8.2) + click_house-client (0.8.4) activerecord (>= 7.0, < 9.0) activesupport (>= 7.0, < 9.0) addressable (~> 2.8) @@ -2122,7 +2122,7 @@ DEPENDENCIES carrierwave (~> 1.3) charlock_holmes (~> 0.7.9) circuitbox (= 2.0.0) - click_house-client (= 0.8.2) + click_house-client (= 0.8.4) commonmarker (~> 0.23.10) concurrent-ruby (~> 1.1) connection_pool (~> 2.5.3) diff --git a/lib/gitlab/database/aggregation/active_record/aggregation_result.rb b/lib/gitlab/database/aggregation/active_record/aggregation_result.rb index ea5f47db8cd978..1d9d64e8046393 100644 --- a/lib/gitlab/database/aggregation/active_record/aggregation_result.rb +++ b/lib/gitlab/database/aggregation/active_record/aggregation_result.rb @@ -8,7 +8,7 @@ class AggregationResult < Gitlab::Database::Aggregation::AggregationResult private def load_data - query.model.connection.select_all(query.limit(1000).to_sql) + query.model.connection.select_all(query.to_sql) end end end diff --git a/lib/gitlab/database/aggregation/click_house/aggregation_result.rb b/lib/gitlab/database/aggregation/click_house/aggregation_result.rb new file mode 100644 index 00000000000000..c6775c65d066da --- /dev/null +++ b/lib/gitlab/database/aggregation/click_house/aggregation_result.rb @@ -0,0 +1,17 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module Aggregation + module ClickHouse + class AggregationResult < Gitlab::Database::Aggregation::AggregationResult + private + + def load_data + ::ClickHouse::Client.select(query, :main) + end + end + end + end + end +end diff --git a/lib/gitlab/database/aggregation/click_house/column.rb b/lib/gitlab/database/aggregation/click_house/column.rb new file mode 100644 index 00000000000000..cef6727dcdab17 --- /dev/null +++ b/lib/gitlab/database/aggregation/click_house/column.rb @@ -0,0 +1,30 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module Aggregation + module ClickHouse + class Column < PartDefinition + attr_reader :name, :type, :expression, :secondary_expression + + def initialize(*args, **kwargs) + super + @secondary_expression = kwargs[:if] + end + + def secondary_arel(_context) + secondary_expression&.call + end + + def to_inner_arel(context) + expression ? expression.call : context[:scope][name] + end + + def to_outer_arel(context) + Arel::Table.new(context[:inner_query_name])[context.fetch(:local_alias, name)] + end + end + end + end + end +end diff --git a/lib/gitlab/database/aggregation/click_house/count.rb b/lib/gitlab/database/aggregation/click_house/count.rb new file mode 100644 index 00000000000000..7693da623d5b85 --- /dev/null +++ b/lib/gitlab/database/aggregation/click_house/count.rb @@ -0,0 +1,39 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module Aggregation + module ClickHouse + class Count < Column + attr_reader :distinct + + def initialize(name = 'total', type = :integer, expression = nil, distinct: false, **kwargs) + @distinct = distinct + super + end + + def identifier + :"#{name}_count" + end + + def to_inner_arel(...) + expression&.call + end + + def to_outer_arel(context) + return regular_count(context) unless secondary_expression + + inner_condition_column = Arel::Table.new(context[:inner_query_name])[context.fetch(:local_secondary_alias, + name)] + Arel::Nodes::NamedFunction.new('countIf', [inner_condition_column.eq(1)]) + end + + def regular_count(context) + inner_column = Arel::Table.new(context[:inner_query_name])[context[:local_alias]] if context[:local_alias] + Arel::Nodes::Count.new([inner_column || Arel.star], distinct) + end + end + end + end + end +end diff --git a/lib/gitlab/database/aggregation/click_house/engine.rb b/lib/gitlab/database/aggregation/click_house/engine.rb new file mode 100644 index 00000000000000..bf3fcc7790c468 --- /dev/null +++ b/lib/gitlab/database/aggregation/click_house/engine.rb @@ -0,0 +1,110 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module Aggregation + module ClickHouse + class Engine < Gitlab::Database::Aggregation::Engine + extend ::Gitlab::Utils::Override + + INNER_QUERY_NAME = 'ch_aggregation_inner_query' + + class << self + attr_accessor :table_name, :table_primary_key + + def dimensions_mapping + { + column: Column + } + end + + def metrics_mapping + { + count: Count, + mean: Mean, + rate: Rate, + quantile: Quantile + } + end + end + + private + + # Example resulting query + # SELECT + # `ch_aggregation_inner_query`.`dimension_0` AS dimension_0, + # COUNT(*) AS metric_0, + # countIf(`ch_aggregation_inner_query`.`metric_1_condition` = 1) AS metric_1, + # avgIf(`ch_aggregation_inner_query`.`metric_2`, `ch_aggregation_inner_query`.`metric_2_condition` = 1) + # AS metric_2 + # FROM ( + # SELECT `agent_platform_sessions`.`flow_type` AS dimension_0, + # anyIfMerge(finished_event_at) IS NOT NULL AS metric_1_condition, + # anyIfMerge(finished_event_at)-anyIfMerge(created_event_at) AS metric_2, + # anyIfMerge(finished_event_at) IS NOT NULL AS metric_2_condition, + # `agent_platform_sessions`.`user_id`, + # `agent_platform_sessions`.`namespace_path`, + # `agent_platform_sessions`.`session_id`, + # `agent_platform_sessions`.`flow_type` + # FROM `agent_platform_sessions` + # GROUP BY ALL) ch_aggregation_inner_query + # GROUP BY ALL + override :execute_query_plan + def execute_query_plan(plan) + inner_projections, outer_projections = build_select_list_and_aliases(plan) + + query = context[:scope].select(*inner_projections).group(Arel.sql("ALL")) + + query = ::ClickHouse::Client::QueryBuilder.new(query, INNER_QUERY_NAME) + .select(*outer_projections).group(Arel.sql("ALL")) + + plan.order.each { |order| query = query.order(Arel.sql(order.instance_key), order.direction) } + + AggregationResult.new(self, plan, query) + end + + def build_select_list_and_aliases(plan) + inner_projections_list = [] + outer_projections_list = [] + + plan.dimensions.each do |dimension| + inner_projections, outer_projections = *build_part_selections(dimension) + inner_projections_list += inner_projections + outer_projections_list += outer_projections + end + + plan.metrics.each do |metric| + inner_projections, outer_projections = *build_part_selections(metric) + inner_projections_list += inner_projections + outer_projections_list += outer_projections + end + + # fill in primary_key + inner_projections_list += self.class.table_primary_key.map { |n| context[:scope][n] } + + [inner_projections_list.compact, outer_projections_list.compact] + end + + def build_part_selections(part) + alias_name = part.instance_key.to_s + inner_context = context.merge(:inner_query_name => INNER_QUERY_NAME, + part.name => part.configuration) + inner_projection = part.definition.to_inner_arel(inner_context)&.as(alias_name) + + if part.definition.secondary_expression + secondary_alias_name = "#{alias_name}_secondary" + secondary_projection = part.definition.secondary_arel(inner_context)&.as(secondary_alias_name) + end + + outer_context = inner_context + outer_context[:local_alias] = alias_name if inner_projection + outer_context[:local_secondary_alias] = secondary_alias_name if secondary_projection + outer_projection = part.definition.to_outer_arel(outer_context).as(alias_name) + + [[inner_projection, secondary_projection], [outer_projection]] + end + end + end + end + end +end diff --git a/lib/gitlab/database/aggregation/click_house/mean.rb b/lib/gitlab/database/aggregation/click_house/mean.rb new file mode 100644 index 00000000000000..f2c2ef656b1a67 --- /dev/null +++ b/lib/gitlab/database/aggregation/click_house/mean.rb @@ -0,0 +1,28 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module Aggregation + module ClickHouse + class Mean < Column + def initialize(name, type = :float, expression = nil, **kwargs) + super + end + + def identifier + :"mean_#{name}" + end + + def to_outer_arel(context) + return super.average unless secondary_expression + + inner_column = Arel::Table.new(context[:inner_query_name])[context.fetch(:local_alias, name)] + inner_condition_column = Arel::Table.new(context[:inner_query_name])[context.fetch(:local_secondary_alias, + name)] + Arel::Nodes::NamedFunction.new('avgIf', [inner_column, inner_condition_column.eq(1)]) + end + end + end + end + end +end diff --git a/lib/gitlab/database/aggregation/click_house/part_definition.rb b/lib/gitlab/database/aggregation/click_house/part_definition.rb new file mode 100644 index 00000000000000..47824a5457480d --- /dev/null +++ b/lib/gitlab/database/aggregation/click_house/part_definition.rb @@ -0,0 +1,12 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module Aggregation + module ClickHouse + class PartDefinition < ::Gitlab::Database::Aggregation::PartDefinition + end + end + end + end +end diff --git a/lib/gitlab/database/aggregation/click_house/quantile.rb b/lib/gitlab/database/aggregation/click_house/quantile.rb new file mode 100644 index 00000000000000..4b10074f6ebe12 --- /dev/null +++ b/lib/gitlab/database/aggregation/click_house/quantile.rb @@ -0,0 +1,33 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module Aggregation + module ClickHouse + class Quantile < Column + include ParameterizedDefinition + + self.supported_parameters = %i[quantile] + + DEFAULT_QUANTILE = 0.5 + + def initialize(name, type = :float, expression = nil, **kwargs) + super + end + + def identifier + :"#{name}_quantile" + end + + def to_outer_arel(context) + quantile = instance_parameter(:quantile, context[name]) || DEFAULT_QUANTILE + + inner_column = Arel::Table.new(context[:inner_query_name])[context.fetch(:local_alias, name)] + + Arel.sql("quantile(?)(?)", context[:scope].quote(quantile), inner_column) + end + end + end + end + end +end diff --git a/lib/gitlab/database/aggregation/click_house/rate.rb b/lib/gitlab/database/aggregation/click_house/rate.rb new file mode 100644 index 00000000000000..e372aa92b44ded --- /dev/null +++ b/lib/gitlab/database/aggregation/click_house/rate.rb @@ -0,0 +1,34 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module Aggregation + module ClickHouse + class Rate < Column + # Uses regular expression as numerator condition and "if" expression as denominator condition + def initialize(name, type = :float, numerator_if:, denominator_if: nil) + super(name, type, numerator_if, if: denominator_if) + end + + def identifier + :"#{name}_rate" + end + + def to_outer_arel(context) + inner_column = Arel::Table.new(context[:inner_query_name])[context.fetch(:local_alias, name)] + + if secondary_expression + inner_condition_column = Arel::Table.new(context[:inner_query_name])[context.fetch(:local_secondary_alias, + name)] + denominator = Arel::Nodes::NamedFunction.new('countIf', [inner_condition_column.eq(1)]) + else + denominator = Arel::Nodes::Count.new([Arel.star]) + end + + Arel::Nodes::NamedFunction.new('countIf', [inner_column.eq(1)]) / denominator + end + end + end + end + end +end diff --git a/lib/gitlab/database/aggregation/engine.rb b/lib/gitlab/database/aggregation/engine.rb index 176fb95bc6f0ae..e087b883a196ae 100644 --- a/lib/gitlab/database/aggregation/engine.rb +++ b/lib/gitlab/database/aggregation/engine.rb @@ -71,6 +71,7 @@ def initialize(context:) @context = context end + # @return [Gitlab::Database::Aggregation::AggregationResult] def execute(request) plan = QueryPlan.build(request, self) run_validations(plan) diff --git a/lib/gitlab/database/aggregation/parameterized_definition.rb b/lib/gitlab/database/aggregation/parameterized_definition.rb index 1a910f6816fbb1..5697447acf465a 100644 --- a/lib/gitlab/database/aggregation/parameterized_definition.rb +++ b/lib/gitlab/database/aggregation/parameterized_definition.rb @@ -41,7 +41,7 @@ def instance_key(configuration) private def instance_parameter(param_identifier, configuration) - configuration.dig(:parameters, param_identifier) + parameters[param_identifier] && configuration.dig(:parameters, param_identifier) end def parameterized? diff --git a/spec/lib/gitlab/database/aggregation/click_house/count_spec.rb b/spec/lib/gitlab/database/aggregation/click_house/count_spec.rb new file mode 100644 index 00000000000000..86ab28e39fb4c8 --- /dev/null +++ b/spec/lib/gitlab/database/aggregation/click_house/count_spec.rb @@ -0,0 +1,95 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::Database::Aggregation::ClickHouse::Count, :click_house, feature_category: :database do + include_context 'with agent_platform_sessions ClickHouse aggregation engine' + + let(:engine_definition) do + Gitlab::Database::Aggregation::ClickHouse::Engine.build do + self.table_name = 'agent_platform_sessions' + self.table_primary_key = %w[namespace_path user_id session_id flow_type] + + metrics do + count + count :finished, if: -> { Arel.sql('anyIfMerge(finished_event_at) IS NOT NULL') } + count :users, :integer, -> { Arel.sql('user_id') }, distinct: true + end + end + end + + let(:session1) do # finished & long + created_at = DateTime.parse('2025-03-01 00:00:00 UTC') + { session_id: 1, user_id: 1, project_id: 1, namespace_path: '1/2/', flow_type: 'chat', environment: 'prod', + session_year: 2025, + created_event_at: created_at, + started_event_at: created_at + 1.second, + finished_event_at: created_at + 10.minutes, + resumed_event_at: created_at + 9.minutes } + end + + let(:session2) do # finished & short + created_at = DateTime.parse('2025-03-02 00:00:00 UTC') + { session_id: 2, user_id: 2, project_id: 1, namespace_path: '1/2/', flow_type: 'chat', environment: 'prod', + session_year: 2025, + created_event_at: created_at, + started_event_at: created_at + 1.second, + finished_event_at: created_at + 3.minutes, + resumed_event_at: created_at + 2.minutes } + end + + let(:session3) do # not finished yet. in progress + created_at = DateTime.parse('2025-03-04 00:00:00 UTC') + { session_id: 3, user_id: 1, project_id: 1, namespace_path: '1/2/', flow_type: 'code_review', environment: 'prod', + session_year: 2025, + created_event_at: created_at, + started_event_at: created_at + 1.second, + resumed_event_at: created_at + 9.minutes } + end + + let(:all_data_rows) do + [session1, session2, session3] + end + + describe "count" do + it 'returns total sessions count' do + request = Gitlab::Database::Aggregation::Request.new( + metrics: [{ identifier: :total_count }] + ) + + expect(engine).to execute_aggregation(request).and_return([ + { total_count: all_data_rows.count } + ]) + end + end + + describe "count distinct" do + it 'returns number of distinct users' do + request = Gitlab::Database::Aggregation::Request.new( + metrics: [{ identifier: :users_count }] + ) + + user_ids = all_data_rows.pluck(:user_id) + expect(user_ids.count).not_to eq(user_ids.uniq.count) + + expect(engine).to execute_aggregation(request).and_return([ + { users_count: user_ids.uniq.count } + ]) + end + end + + describe "conditional count" do + it 'returns sessions count with condition fulfilled' do + request = Gitlab::Database::Aggregation::Request.new( + metrics: [{ identifier: :finished_count }] + ) + + finished_timestamps = all_data_rows.pluck(:finished_event_at) + expect(finished_timestamps.count).not_to eq(finished_timestamps.compact.count) + + expect(engine).to execute_aggregation(request).and_return([ + { finished_count: finished_timestamps.compact.count } + ]) + end + end +end diff --git a/spec/lib/gitlab/database/aggregation/click_house/engine_spec.rb b/spec/lib/gitlab/database/aggregation/click_house/engine_spec.rb new file mode 100644 index 00000000000000..81778477b68892 --- /dev/null +++ b/spec/lib/gitlab/database/aggregation/click_house/engine_spec.rb @@ -0,0 +1,205 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::Database::Aggregation::ClickHouse::Engine, :click_house, feature_category: :database do + include_context 'with agent_platform_sessions ClickHouse aggregation engine' + + let(:engine_definition) do + described_class.build do + self.table_name = 'agent_platform_sessions' + self.table_primary_key = %w[namespace_path user_id session_id flow_type] + + dimensions do + column :user_id, :integer + column :flow_type, :string + column :duration, :integer, -> { + Arel.sql("dateDiff('seconds', anyIfMerge(created_event_at), anyIfMerge(finished_event_at))") + } + column :environment, :string, nil, formatter: ->(v) { v.upcase } + end + + metrics do + count + mean :duration, :float, -> { + Arel.sql("dateDiff('seconds', anyIfMerge(created_event_at), anyIfMerge(finished_event_at))") + } + quantile :duration, :float, + -> { Arel.sql("dateDiff('seconds', anyIfMerge(created_event_at), anyIfMerge(finished_event_at))") }, + parameters: { quantile: { type: :float } } + count :with_format, :integer, nil, formatter: ->(v) { v * -1 } + end + end + end + + let(:session1) do # finished & long + created_at = DateTime.parse('2025-03-01 00:00:00 UTC') + { session_id: 1, user_id: 1, project_id: 1, namespace_path: '1/2/', flow_type: 'chat', environment: 'prod', + session_year: 2025, + created_event_at: created_at, + started_event_at: created_at + 1.second, + finished_event_at: created_at + 10.minutes, + resumed_event_at: created_at + 9.minutes } + end + + let(:session2) do # finished & short + created_at = DateTime.parse('2025-03-02 00:00:00 UTC') + { session_id: 2, user_id: 2, project_id: 1, namespace_path: '1/2/', flow_type: 'chat', environment: 'prod', + session_year: 2025, + created_event_at: created_at, + started_event_at: created_at + 1.second, + finished_event_at: created_at + 3.minutes, + resumed_event_at: created_at + 2.minutes } + end + + let(:session3) do # in progress + created_at = DateTime.parse('2025-03-04 00:00:00 UTC') + { session_id: 3, user_id: 1, project_id: 1, namespace_path: '1/2/', flow_type: 'chat', environment: 'prod', + session_year: 2025, + created_event_at: created_at, + started_event_at: created_at + 1.second, + resumed_event_at: created_at + 9.minutes } + end + + let(:session4) do # dropped + created_at = DateTime.parse('2025-03-04 00:00:00 UTC') + { session_id: 4, user_id: 1, project_id: 1, namespace_path: '1/2/', flow_type: 'chat', environment: 'prod', + session_year: 2025, + created_event_at: created_at, + started_event_at: created_at + 1.second, + dropped_event_at: created_at + 10.minutes, + resumed_event_at: created_at + 9.minutes } + end + + let(:session5) do # finished medium + created_at = DateTime.parse('2025-04-04 00:00:00 UTC') + { session_id: 5, user_id: 1, project_id: 1, namespace_path: '1/2/', flow_type: 'chat', environment: 'prod', + session_year: 2025, + created_event_at: created_at, + started_event_at: created_at + 1.second, + finished_event_at: created_at + 7.minutes, + resumed_event_at: created_at + 9.minutes } + end + + let(:all_data_rows) do + [session1, session2, session3, session4, session5] + end + + describe "dimensions" do + it 'groups by single dimension' do + request = Gitlab::Database::Aggregation::Request.new( + dimensions: [{ identifier: :user_id }], + metrics: [{ identifier: :total_count }] + ) + + expect(engine).to execute_aggregation(request).and_return(match_array([ + { user_id: 2, total_count: 1 }, + { user_id: 1, total_count: 4 } + ])) + end + + it 'groups by multiple dimensions' do + request = Gitlab::Database::Aggregation::Request.new( + dimensions: [{ identifier: :user_id }, { identifier: :flow_type }], + metrics: [{ identifier: :total_count }] + ) + + expect(engine).to execute_aggregation(request).and_return(match_array([ + { user_id: 2, flow_type: 'chat', total_count: 1 }, + { user_id: 1, flow_type: 'chat', total_count: 4 } + ])) + end + + it 'groups by column with expression' do + request = Gitlab::Database::Aggregation::Request.new( + dimensions: [{ identifier: :duration }], + metrics: [{ identifier: :total_count }] + ) + + expect(engine).to execute_aggregation(request).and_return(match_array([ + { duration: nil, total_count: 2 }, + { duration: 600, total_count: 1 }, + { duration: 180, total_count: 1 }, + { duration: 420, total_count: 1 } + ])) + end + end + + describe "sorting" do + it 'accepts metric sort' do + request = Gitlab::Database::Aggregation::Request.new( + dimensions: [{ identifier: :duration }], + metrics: [{ identifier: :total_count }], + order: [{ identifier: :total_count, direction: :asc }] + ) + + expect(engine).to execute_aggregation(request).and_return([ + { duration: 420, total_count: 1 }, + { duration: 180, total_count: 1 }, + { duration: 600, total_count: 1 }, + { duration: nil, total_count: 2 } + ]) + end + + it 'accepts dimension sort' do + request = Gitlab::Database::Aggregation::Request.new( + dimensions: [{ identifier: :duration }], + metrics: [{ identifier: :total_count }], + order: [{ identifier: :duration, direction: :asc }] + ) + + expect(engine).to execute_aggregation(request).and_return([ + { duration: 180, total_count: 1 }, + { duration: 420, total_count: 1 }, + { duration: 600, total_count: 1 }, + { duration: nil, total_count: 2 } + ]) + end + + it 'accepts multiple orders' do + request = Gitlab::Database::Aggregation::Request.new( + dimensions: [{ identifier: :duration }], + metrics: [{ identifier: :total_count }], + order: [ + { identifier: :total_count, direction: :desc }, + { identifier: :duration, direction: :asc } + ] + ) + + expect(engine).to execute_aggregation(request).and_return([ + { duration: nil, total_count: 2 }, + { duration: 180, total_count: 1 }, + { duration: 420, total_count: 1 }, + { duration: 600, total_count: 1 } + ]) + end + + it 'accepts order by parameterized metric' do + request = Gitlab::Database::Aggregation::Request.new( + dimensions: [{ identifier: :user_id }], + metrics: [{ identifier: :duration_quantile, parameters: { quantile: 0.1 } }], + order: [ + { identifier: :duration_quantile, parameters: { quantile: 0.1 }, direction: :desc } + ] + ) + + expect(engine).to execute_aggregation(request).and_return([ + { user_id: 1, duration_quantile_14be4: 438 }, + { user_id: 2, duration_quantile_14be4: 180 } + ]) + end + end + + describe "formatting" do + it 'applies formatting if defined' do + request = Gitlab::Database::Aggregation::Request.new( + dimensions: [{ identifier: :environment }], + metrics: [{ identifier: :with_format_count }] + ) + + expect(engine).to execute_aggregation(request).and_return([ + { environment: "PROD", with_format_count: -5 } + ]) + end + end +end diff --git a/spec/lib/gitlab/database/aggregation/click_house/mean_spec.rb b/spec/lib/gitlab/database/aggregation/click_house/mean_spec.rb new file mode 100644 index 00000000000000..b1c9e0ca7061ca --- /dev/null +++ b/spec/lib/gitlab/database/aggregation/click_house/mean_spec.rb @@ -0,0 +1,78 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::Database::Aggregation::ClickHouse::Mean, :click_house, feature_category: :database do + include_context 'with agent_platform_sessions ClickHouse aggregation engine' + + let(:engine_definition) do + Gitlab::Database::Aggregation::ClickHouse::Engine.build do + self.table_name = 'agent_platform_sessions' + self.table_primary_key = %w[namespace_path user_id session_id flow_type] + + metrics do + mean :session_id, :float + mean :duration, :float, -> { + Arel.sql("dateDiff('seconds', anyIfMerge(created_event_at), anyIfMerge(finished_event_at))") + } + end + end + end + + let(:session1) do # finished & long + created_at = DateTime.parse('2025-03-01 00:00:00 UTC') + { session_id: 1, user_id: 1, project_id: 1, namespace_path: '1/2/', flow_type: 'chat', environment: 'prod', + session_year: 2025, + created_event_at: created_at, + started_event_at: created_at + 1.second, + finished_event_at: created_at + 10.minutes, + resumed_event_at: created_at + 9.minutes } + end + + let(:session2) do # finished & short + created_at = DateTime.parse('2025-03-02 00:00:00 UTC') + { session_id: 2, user_id: 2, project_id: 1, namespace_path: '1/2/', flow_type: 'chat', environment: 'prod', + session_year: 2025, + created_event_at: created_at, + started_event_at: created_at + 1.second, + finished_event_at: created_at + 3.minutes, + resumed_event_at: created_at + 2.minutes } + end + + let(:session3) do # in progress + created_at = DateTime.parse('2025-03-04 00:00:00 UTC') + { session_id: 3, user_id: 1, project_id: 1, namespace_path: '1/2/', flow_type: 'code_review', environment: 'prod', + session_year: 2025, + created_event_at: created_at, + started_event_at: created_at + 1.second, + resumed_event_at: created_at + 9.minutes } + end + + let(:all_data_rows) do + [session1, session2, session3] + end + + describe "mean" do + it 'returns average session_id' do + request = Gitlab::Database::Aggregation::Request.new( + metrics: [{ identifier: :mean_session_id }] + ) + + expect(engine).to execute_aggregation(request).and_return([ + { mean_session_id: 2 } + ]) + end + end + + describe "mean with expression" do + it 'returns average session duration' do + request = Gitlab::Database::Aggregation::Request.new( + metrics: [{ identifier: :mean_duration }] + ) + + expect(engine).to execute_aggregation(request).and_return([ + { mean_duration: 390 } + ]) + end + end +end diff --git a/spec/lib/gitlab/database/aggregation/click_house/quantile_spec.rb b/spec/lib/gitlab/database/aggregation/click_house/quantile_spec.rb new file mode 100644 index 00000000000000..2d06a2f2de1da8 --- /dev/null +++ b/spec/lib/gitlab/database/aggregation/click_house/quantile_spec.rb @@ -0,0 +1,139 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::Database::Aggregation::ClickHouse::Quantile, :click_house, feature_category: :database do + include_context 'with agent_platform_sessions ClickHouse aggregation engine' + + let(:engine_definition) do + Gitlab::Database::Aggregation::ClickHouse::Engine.build do + self.table_name = 'agent_platform_sessions' + self.table_primary_key = %w[namespace_path user_id session_id flow_type] + + metrics do + quantile :duration, :float, + -> { Arel.sql("dateDiff('seconds', anyIfMerge(created_event_at), anyIfMerge(finished_event_at))") } + quantile :duration_with_param, :float, + -> { Arel.sql("dateDiff('seconds', anyIfMerge(created_event_at), anyIfMerge(finished_event_at))") }, + parameters: { quantile: { type: :float } } + end + end + end + + let(:session1) do # finished 1.minute + created_at = DateTime.parse('2025-03-01 00:00:00 UTC') + { session_id: 1, user_id: 1, project_id: 1, namespace_path: '1/2/', flow_type: 'chat', environment: 'prod', + session_year: 2025, + created_event_at: created_at, + started_event_at: created_at + 1.second, + finished_event_at: created_at + 1.minute, + resumed_event_at: created_at + 9.minutes } + end + + let(:session2) do # finished 2 minutes + created_at = DateTime.parse('2025-03-02 00:00:00 UTC') + { session_id: 2, user_id: 2, project_id: 1, namespace_path: '1/2/', flow_type: 'chat', environment: 'prod', + session_year: 2025, + created_event_at: created_at, + started_event_at: created_at + 1.second, + finished_event_at: created_at + 2.minutes, + resumed_event_at: created_at + 2.minutes } + end + + let(:session3) do # finished 3 minutes + created_at = DateTime.parse('2025-03-02 00:00:00 UTC') + { session_id: 3, user_id: 2, project_id: 1, namespace_path: '1/2/', flow_type: 'chat', environment: 'prod', + session_year: 2025, + created_event_at: created_at, + started_event_at: created_at + 1.second, + finished_event_at: created_at + 3.minutes, + resumed_event_at: created_at + 2.minutes } + end + + let(:session4) do # finished 4 minutes + created_at = DateTime.parse('2025-03-02 00:00:00 UTC') + { session_id: 4, user_id: 2, project_id: 1, namespace_path: '1/2/', flow_type: 'chat', environment: 'prod', + session_year: 2025, + created_event_at: created_at, + started_event_at: created_at + 1.second, + finished_event_at: created_at + 4.minutes, + resumed_event_at: created_at + 2.minutes } + end + + let(:session5) do # finished 5 minutes + created_at = DateTime.parse('2025-03-02 00:00:00 UTC') + { session_id: 5, user_id: 2, project_id: 1, namespace_path: '1/2/', flow_type: 'chat', environment: 'prod', + session_year: 2025, + created_event_at: created_at, + started_event_at: created_at + 1.second, + finished_event_at: created_at + 5.minutes, + resumed_event_at: created_at + 2.minutes } + end + + let(:session6) do # dropped + created_at = DateTime.parse('2025-03-04 00:00:00 UTC') + { session_id: 6, user_id: 1, project_id: 1, namespace_path: '1/2/', flow_type: 'code_review', environment: 'prod', + session_year: 2025, + created_event_at: created_at, + started_event_at: created_at + 1.second, + dropped_event_at: created_at + 10.minutes, + resumed_event_at: created_at + 9.minutes } + end + + let(:session7) do # in progress + created_at = DateTime.parse('2025-03-04 00:00:00 UTC') + { session_id: 7, user_id: 1, project_id: 1, namespace_path: '1/2/', flow_type: 'code_review', environment: 'prod', + session_year: 2025, + created_event_at: created_at, + started_event_at: created_at + 1.second, + resumed_event_at: created_at + 9.minutes } + end + + let(:all_data_rows) do + [session1, session2, session3, session4, session5, session6, session7] + end + + describe "quantile without param" do + it 'returns median' do + request = Gitlab::Database::Aggregation::Request.new( + metrics: [{ identifier: :duration_quantile }] + ) + + expect(engine).to execute_aggregation(request).and_return([ + { duration_quantile: 180.0 } + ]) + end + + it 'ignores passed param' do + request = Gitlab::Database::Aggregation::Request.new( + metrics: [{ identifier: :duration_quantile, parameters: { quantile: 0.1 } }] + ) + + expect(engine).to execute_aggregation(request).and_return([ + { duration_quantile: 180.0 } + ]) + end + end + + describe "quantile with param" do + it 'returns median by default' do + request = Gitlab::Database::Aggregation::Request.new( + metrics: [{ identifier: :duration_with_param_quantile }] + ) + + expect(engine).to execute_aggregation(request).and_return([ + { duration_with_param_quantile: 180.0 } + ]) + end + + it 'uses passed param' do + request = Gitlab::Database::Aggregation::Request.new( + metrics: [{ identifier: :duration_with_param_quantile, parameters: { quantile: 0.1 } }] + ) + + expect(engine).to execute_aggregation(request).and_return([ + { duration_with_param_quantile_14be4: 84.0 } + ]) + end + end +end diff --git a/spec/lib/gitlab/database/aggregation/click_house/rate_spec.rb b/spec/lib/gitlab/database/aggregation/click_house/rate_spec.rb new file mode 100644 index 00000000000000..e66bc42a5ddd8b --- /dev/null +++ b/spec/lib/gitlab/database/aggregation/click_house/rate_spec.rb @@ -0,0 +1,79 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::Database::Aggregation::ClickHouse::Rate, :click_house, feature_category: :database do + include_context 'with agent_platform_sessions ClickHouse aggregation engine' + + let(:engine_definition) do + Gitlab::Database::Aggregation::ClickHouse::Engine.build do + self.table_name = 'agent_platform_sessions' + self.table_primary_key = %w[namespace_path user_id session_id flow_type] + + metrics do + rate :completion, numerator_if: -> { Arel.sql('anyIfMerge(finished_event_at) IS NOT NULL') } + rate :finished_to_dropped, + denominator_if: -> { Arel.sql('anyIfMerge(dropped_event_at) IS NOT NULL') }, + numerator_if: -> { Arel.sql('anyIfMerge(finished_event_at) IS NOT NULL') } + end + end + end + + let(:session1) do # finished & long + created_at = DateTime.parse('2025-03-01 00:00:00 UTC') + { session_id: 1, user_id: 1, project_id: 1, namespace_path: '1/2/', flow_type: 'chat', environment: 'prod', + session_year: 2025, + created_event_at: created_at, + started_event_at: created_at + 1.second, + finished_event_at: created_at + 10.minutes, + resumed_event_at: created_at + 9.minutes } + end + + let(:session2) do # finished & short + created_at = DateTime.parse('2025-03-02 00:00:00 UTC') + { session_id: 2, user_id: 2, project_id: 1, namespace_path: '1/2/', flow_type: 'chat', environment: 'prod', + session_year: 2025, + created_event_at: created_at, + started_event_at: created_at + 1.second, + finished_event_at: created_at + 3.minutes, + resumed_event_at: created_at + 2.minutes } + end + + let(:session3) do # dropped + created_at = DateTime.parse('2025-03-04 00:00:00 UTC') + { session_id: 3, user_id: 1, project_id: 1, namespace_path: '1/2/', flow_type: 'code_review', environment: 'prod', + session_year: 2025, + created_event_at: created_at, + started_event_at: created_at + 1.second, + dropped_event_at: created_at + 10.minutes, + resumed_event_at: created_at + 9.minutes } + end + + let(:all_data_rows) do + [session1, session2, session3] + end + + describe "rate with numerator only" do + it 'returns numerator/total_count rate' do + request = Gitlab::Database::Aggregation::Request.new( + metrics: [{ identifier: :completion_rate }] + ) + + expect(engine).to execute_aggregation(request).and_return([ + { completion_rate: 2.0 / 3 } + ]) + end + end + + describe "rate with numerator and denominator" do + it 'returns numerator/denominator rate' do + request = Gitlab::Database::Aggregation::Request.new( + metrics: [{ identifier: :finished_to_dropped_rate }] + ) + + expect(engine).to execute_aggregation(request).and_return([ + { finished_to_dropped_rate: 2.0 } + ]) + end + end +end diff --git a/spec/lib/gitlab/database/aggregation/definitions_collector_spec.rb b/spec/lib/gitlab/database/aggregation/definitions_collector_spec.rb index 5baffb6e983efe..853f1ddee7b59e 100644 --- a/spec/lib/gitlab/database/aggregation/definitions_collector_spec.rb +++ b/spec/lib/gitlab/database/aggregation/definitions_collector_spec.rb @@ -27,7 +27,7 @@ def initialize(*args) foo(1, 2, 3) end - expect(collected_definitions.map(&:args)).to eq([[1, 2, 3]]) + expect(collected_definitions.map(&:args)).to match_array([[1, 2, 3]]) end it 'raises NoMethodError for unknown definition' do diff --git a/spec/lib/gitlab/database/aggregation/formatter_spec.rb b/spec/lib/gitlab/database/aggregation/formatter_spec.rb index 9d67cc40716306..5cfbe22d667d87 100644 --- a/spec/lib/gitlab/database/aggregation/formatter_spec.rb +++ b/spec/lib/gitlab/database/aggregation/formatter_spec.rb @@ -33,8 +33,7 @@ def self.metrics_mapping end it 'leaves unknown data untouched' do - expect(formatter.format_data([{ 'dummy' => 3 }])) - .to eq([{ 'dummy' => 3 }]) + expect(formatter.format_data([{ 'dummy' => 3 }])).to eq([{ 'dummy' => 3 }]) end end end diff --git a/spec/support/helpers/click_house_helpers.rb b/spec/support/helpers/click_house_helpers.rb index 12574a42d32d79..392fbf2b72bb86 100644 --- a/spec/support/helpers/click_house_helpers.rb +++ b/spec/support/helpers/click_house_helpers.rb @@ -67,7 +67,7 @@ def self.default_timezone ActiveRecord.default_timezone end - def clickhouse_fixture(table, data, db = :main) + def clickhouse_fixture(table, data, db = :main, &block) return if data.empty? if data.map { |row| row.keys.sort }.uniq.size > 1 @@ -77,15 +77,21 @@ def clickhouse_fixture(table, data, db = :main) structure = data.first.keys rows = data.map do |row| - cols = structure.map do |col| + structure.map do |col| val = row[col].is_a?(Hash) ? row[col].to_json : row[col] - ClickHouseHelpers.quote(val) + val.is_a?(Arel::Nodes::SqlLiteral) ? val : ClickHouseHelpers.quote(val) end - - "(#{cols.join(', ')})" end - query = "INSERT INTO #{table} (#{structure.join(', ')}) VALUES #{rows.join(',')}" + query = if block + yield(rows, structure) + else + values_data = rows.map do |cols| + "(#{cols.join(', ')})" + end.join(',') + + "INSERT INTO #{table} (#{structure.join(', ')}) VALUES #{values_data}" + end ClickHouse::Client.execute(query, db) end diff --git a/spec/support/matchers/database/aggregations/execute_aggregation_matcher.rb b/spec/support/matchers/database/aggregations/execute_aggregation_matcher.rb index ebd60dc195aae8..95db9920e6f64b 100644 --- a/spec/support/matchers/database/aggregations/execute_aggregation_matcher.rb +++ b/spec/support/matchers/database/aggregations/execute_aggregation_matcher.rb @@ -3,7 +3,7 @@ # Matcher specific to Gitlab::Database::Aggregation::Engine class testing RSpec::Matchers.define :execute_aggregation do |request| chain :and_return do |expected_data| - @expected_data = expected_data.map(&:deep_stringify_keys) + @expected_data = expected_data end chain :with_errors do |expected_errors| @@ -13,7 +13,7 @@ match do |engine| response = engine.execute(request) - @actual_data = response[:data]&.to_a + @actual_data = response[:data]&.to_a&.map(&:with_indifferent_access) @actual_errors = response[:errors]&.to_a if @expected_data diff --git a/spec/support/shared_contexts/lib/gitlab/database/aggregation/clickhouse_engine_shared_context.rb b/spec/support/shared_contexts/lib/gitlab/database/aggregation/clickhouse_engine_shared_context.rb new file mode 100644 index 00000000000000..c2a3234740cf48 --- /dev/null +++ b/spec/support/shared_contexts/lib/gitlab/database/aggregation/clickhouse_engine_shared_context.rb @@ -0,0 +1,62 @@ +# frozen_string_literal: true + +RSpec.shared_context 'with agent_platform_sessions ClickHouse aggregation engine' do + include ClickHouseHelpers + + let(:engine) { engine_definition.new(context: { scope: query_builder }) } + let(:query_builder) { ClickHouse::Client::QueryBuilder.new(engine_definition.table_name) } + + let(:row_structure) do + %i[user_id namespace_path project_id session_id flow_type environment session_year created_event_at + started_event_at finished_event_at dropped_event_at stopped_event_at resumed_event_at] + end + + let(:prepared_all_data_rows) do + all_data_rows.map do |row| + row_structure.to_h do |key| + value = row.fetch(key, nil) + if key.to_s.end_with?('_at') # timestamp + value = if value + Arel.sql("parseDateTime64BestEffort('#{value}', 6, 'UTC')") + else + Arel.sql("CAST(NULL AS Nullable(DateTime64(6, 'UTC')))") + end + end + + [key, value] + end + end + end + + before do + clickhouse_fixture(engine_definition.table_name, prepared_all_data_rows) do |rows, structure| + subselects = rows.map do |row| + fields = structure.map.with_index do |field, i| + "#{row[i]} AS #{field}" + end + + "SELECT #{fields.join(', ')}" + end.join(' UNION ALL ') + + <<-SQL + INSERT INTO agent_platform_sessions + SELECT + user_id, + namespace_path, + project_id, + session_id, + flow_type, + environment, + session_year, + anyIfState(toNullable(created_event_at), created_event_at IS NOT NULL) as created_event_at, + anyIfState(toNullable(started_event_at), started_event_at IS NOT NULL) as started_event_at, + anyIfState(toNullable(finished_event_at), finished_event_at IS NOT NULL) as finished_event_at, + anyIfState(toNullable(dropped_event_at), dropped_event_at IS NOT NULL) as dropped_event_at, + anyIfState(toNullable(stopped_event_at), stopped_event_at IS NOT NULL) as stopped_event_at, + anyIfState(toNullable(resumed_event_at), resumed_event_at IS NOT NULL) as resumed_event_at + FROM (#{subselects}) + GROUP BY user_id, namespace_path, project_id, session_id, flow_type, environment, session_year + SQL + end + end +end -- GitLab From 68db2a4eeb99442e5deeebe4af2ec65ac340da5d Mon Sep 17 00:00:00 2001 From: Pavel Shutsin Date: Mon, 8 Dec 2025 14:19:10 +0100 Subject: [PATCH 2/5] Add basic filtering to aggregation engines Basic filters include `exact_match` and `range` filters for CH engine and `exact_match` filter for PG engine. --- .../aggregation/active_record/column.rb | 11 -- .../aggregation/active_record/engine.rb | 22 ++- .../active_record/exact_match_filter.rb | 15 ++ .../active_record/filter_definition.rb | 41 +++++ .../active_record/part_definition.rb | 22 +++ .../active_record/timestamp_column.rb | 19 ++- .../aggregation/click_house/engine.rb | 9 ++ .../click_house/exact_match_filter.rb | 21 +++ .../click_house/filter_definition.rb | 50 ++++++ .../aggregation/click_house/range_filter.rb | 21 +++ lib/gitlab/database/aggregation/engine.rb | 55 +++---- .../database/aggregation/part_definition.rb | 4 + lib/gitlab/database/aggregation/query_plan.rb | 116 +++++++------- .../aggregation/query_plan/base_part.rb | 14 ++ .../database/aggregation/query_plan/filter.rb | 12 ++ .../database/aggregation/query_plan/order.rb | 10 +- lib/gitlab/database/aggregation/request.rb | 11 +- locale/gitlab.pot | 7 +- .../aggregation/active_record/engine_spec.rb | 112 ------------- .../active_record/exact_match_filter_spec.rb | 81 ++++++++++ .../aggregation/click_house/engine_spec.rb | 34 ++++ .../click_house/exact_match_filter_spec.rb | 77 +++++++++ .../click_house/range_filter_spec.rb | 88 ++++++++++ .../database/aggregation/engine_spec.rb | 80 +++++++--- .../database/aggregation/formatter_spec.rb | 2 +- .../database/aggregation/query_plan_spec.rb | 150 ++++++++++++++++++ .../execute_aggregation_matcher.rb | 11 +- 27 files changed, 849 insertions(+), 246 deletions(-) create mode 100644 lib/gitlab/database/aggregation/active_record/exact_match_filter.rb create mode 100644 lib/gitlab/database/aggregation/active_record/filter_definition.rb create mode 100644 lib/gitlab/database/aggregation/active_record/part_definition.rb create mode 100644 lib/gitlab/database/aggregation/click_house/exact_match_filter.rb create mode 100644 lib/gitlab/database/aggregation/click_house/filter_definition.rb create mode 100644 lib/gitlab/database/aggregation/click_house/range_filter.rb create mode 100644 lib/gitlab/database/aggregation/query_plan/filter.rb create mode 100644 spec/lib/gitlab/database/aggregation/active_record/exact_match_filter_spec.rb create mode 100644 spec/lib/gitlab/database/aggregation/click_house/exact_match_filter_spec.rb create mode 100644 spec/lib/gitlab/database/aggregation/click_house/range_filter_spec.rb create mode 100644 spec/lib/gitlab/database/aggregation/query_plan_spec.rb diff --git a/lib/gitlab/database/aggregation/active_record/column.rb b/lib/gitlab/database/aggregation/active_record/column.rb index 0e372e8fbbbe9e..0aa819e002b28c 100644 --- a/lib/gitlab/database/aggregation/active_record/column.rb +++ b/lib/gitlab/database/aggregation/active_record/column.rb @@ -42,21 +42,10 @@ module ActiveRecord # # INNER JOIN "issue_metrics" ON "issue_metrics"."issue_id" = "issues"."id" # # WHERE "issues"."project_id" = 1 class Column < PartDefinition - attr_reader :scope_proc - - def initialize(*args, scope_proc: nil, **kwargs) - super - @scope_proc = scope_proc - end - def to_hash super.merge(kind: :column) end - def apply_scope(scope, context) - scope_proc ? scope_proc.call(scope, context) : scope - end - # Returns an Arel node representing this column or metric in a SELECT statement. # Subclasses may wrap the expression (e.g., date_trunc, AVG, COUNT). def to_arel(context) diff --git a/lib/gitlab/database/aggregation/active_record/engine.rb b/lib/gitlab/database/aggregation/active_record/engine.rb index 40d67848506eab..7c5cab1a3e67f9 100644 --- a/lib/gitlab/database/aggregation/active_record/engine.rb +++ b/lib/gitlab/database/aggregation/active_record/engine.rb @@ -24,6 +24,13 @@ def self.dimensions_mapping } end + override :filters_mapping + def self.filters_mapping + { + exact_match: ExactMatchFilter + } + end + private override :execute_query_plan @@ -31,6 +38,7 @@ def execute_query_plan(plan) projections, dimension_aliases, metric_aliases = build_select_list_and_aliases(plan) relation = context[:scope].select(*projections) + relation = apply_filters(relation, plan) relation = apply_scope(relation, plan) relation = apply_grouping(relation, dimension_aliases) relation = apply_order(relation, plan, dimension_aliases, metric_aliases) @@ -38,14 +46,6 @@ def execute_query_plan(plan) AggregationResult.new(self, plan, relation) end - def run_validations(plan) - super - - return unless plan.dimensions.size > 2 - - errors.add(:dimensions, s_("AggregationEngine|maximum two dimensions are supported")) - end - # Returns [projections, dimension_aliases, metric_aliases] def build_select_list_and_aliases(plan) projections = [] @@ -75,6 +75,12 @@ def apply_scope(relation, plan) end end + def apply_filters(relation, plan) + plan.filters.reduce(relation) do |rel, part| + part.definition.apply(rel, part.configuration) + end + end + def apply_grouping(relation, dimension_aliases) return relation if dimension_aliases.empty? diff --git a/lib/gitlab/database/aggregation/active_record/exact_match_filter.rb b/lib/gitlab/database/aggregation/active_record/exact_match_filter.rb new file mode 100644 index 00000000000000..a228f5bb34db0d --- /dev/null +++ b/lib/gitlab/database/aggregation/active_record/exact_match_filter.rb @@ -0,0 +1,15 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module Aggregation + module ActiveRecord + class ExactMatchFilter < FilterDefinition + def apply(relation, filter_config) + relation.where(column(relation).in(filter_config[:values])) + end + end + end + end + end +end diff --git a/lib/gitlab/database/aggregation/active_record/filter_definition.rb b/lib/gitlab/database/aggregation/active_record/filter_definition.rb new file mode 100644 index 00000000000000..464ecbacd4110e --- /dev/null +++ b/lib/gitlab/database/aggregation/active_record/filter_definition.rb @@ -0,0 +1,41 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module Aggregation + module ActiveRecord + class FilterDefinition < PartDefinition + attr_reader :max_size + + def initialize(*args, max_size: nil, **kwargs) + super + @max_size = max_size + end + + def apply(_relation, _filter_config) + raise NoMethodError + end + + def validate_part(part) + validate_max_size(part) + end + + private + + def column(relation) + expression&.call || relation.arel_table[name] + end + + def validate_max_size(part) + return unless max_size && part.configuration[:values].size > max_size + + part.errors.add(:values, + format(s_("AggregationEngine|maximum size of %{max_size} exceeded for filter `%{key}`"), + max_size: max_size, + key: part.instance_key)) + end + end + end + end + end +end diff --git a/lib/gitlab/database/aggregation/active_record/part_definition.rb b/lib/gitlab/database/aggregation/active_record/part_definition.rb new file mode 100644 index 00000000000000..d21368d7ee47ab --- /dev/null +++ b/lib/gitlab/database/aggregation/active_record/part_definition.rb @@ -0,0 +1,22 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module Aggregation + module ActiveRecord + class PartDefinition < ::Gitlab::Database::Aggregation::PartDefinition + attr_reader :scope_proc + + def initialize(*args, scope_proc: nil, **kwargs) + super + @scope_proc = scope_proc + end + + def apply_scope(scope, context) + scope_proc ? scope_proc.call(scope, context) : scope + end + end + end + end + end +end diff --git a/lib/gitlab/database/aggregation/active_record/timestamp_column.rb b/lib/gitlab/database/aggregation/active_record/timestamp_column.rb index a9962a3fe0e97d..9e698f9a987038 100644 --- a/lib/gitlab/database/aggregation/active_record/timestamp_column.rb +++ b/lib/gitlab/database/aggregation/active_record/timestamp_column.rb @@ -22,16 +22,31 @@ def to_hash super.merge(kind: :timestamp_column) end + def validate_part(part) + validate_granularity(part) + end + def to_arel(context) granularity = instance_parameter(:granularity, context[name]) || DEFAULT_GRANULARITY - # TODO: Validate granularity in query plan! - quoted_string = context[:scope].model.connection.quote(GRANULARITIES_MAP[granularity]) expr = expression ? expression.call : context[:scope].arel_table[name] Arel::Nodes::NamedFunction.new('date_trunc', [Arel.sql(quoted_string), expr]) end + + private + + def validate_granularity(part) + granularity = part.configuration[:granularity] + return unless granularity + return if granularity.in?(parameters.dig(:granularity, :in) || []) + + part.errors.add(:granularity, + format(s_("AggregationEngine|Unknown granularity \"%{granularity}\""), + granularity: granularity) + ) + end end end end diff --git a/lib/gitlab/database/aggregation/click_house/engine.rb b/lib/gitlab/database/aggregation/click_house/engine.rb index bf3fcc7790c468..df09db496efa3e 100644 --- a/lib/gitlab/database/aggregation/click_house/engine.rb +++ b/lib/gitlab/database/aggregation/click_house/engine.rb @@ -26,6 +26,13 @@ def metrics_mapping quantile: Quantile } end + + def filters_mapping + { + exact_match: ExactMatchFilter, + range: RangeFilter + } + end end private @@ -55,6 +62,8 @@ def execute_query_plan(plan) query = context[:scope].select(*inner_projections).group(Arel.sql("ALL")) + plan.filters.each { |filter| query = filter.definition.apply_inner(query, filter.configuration) } + query = ::ClickHouse::Client::QueryBuilder.new(query, INNER_QUERY_NAME) .select(*outer_projections).group(Arel.sql("ALL")) diff --git a/lib/gitlab/database/aggregation/click_house/exact_match_filter.rb b/lib/gitlab/database/aggregation/click_house/exact_match_filter.rb new file mode 100644 index 00000000000000..9f87934f59ee86 --- /dev/null +++ b/lib/gitlab/database/aggregation/click_house/exact_match_filter.rb @@ -0,0 +1,21 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module Aggregation + module ClickHouse + class ExactMatchFilter < FilterDefinition + private + + def apply(query_builder, filter_config) + if merge_column? + query_builder.having(column(query_builder).in(filter_config[:values])) + else + query_builder.where(column(query_builder).in(filter_config[:values])) + end + end + end + end + end + end +end diff --git a/lib/gitlab/database/aggregation/click_house/filter_definition.rb b/lib/gitlab/database/aggregation/click_house/filter_definition.rb new file mode 100644 index 00000000000000..21da6a20f20ce2 --- /dev/null +++ b/lib/gitlab/database/aggregation/click_house/filter_definition.rb @@ -0,0 +1,50 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module Aggregation + module ClickHouse + class FilterDefinition < PartDefinition + attr_reader :max_size + + def initialize(name, type, expression = nil, **kwargs) + super + @merge_column = kwargs[:merge_column] + @max_size = kwargs[:max_size] + end + + def apply_inner(query_builder, filter_config) + apply(query_builder, filter_config) + end + + def validate_part(part) + validate_max_size(part) + end + + private + + def merge_column? + !!@merge_column + end + + def column(query_builder) + expression&.call || query_builder.table[name] + end + + def apply(_query_builder, _filter_config) + raise NoMethodError + end + + def validate_max_size(part) + return unless max_size && part.configuration[:values].size > max_size + + part.errors.add(:values, + format(s_("AggregationEngine|maximum size of %{max_size} exceeded for filter `%{key}`"), + max_size: max_size, + key: part.instance_key)) + end + end + end + end + end +end diff --git a/lib/gitlab/database/aggregation/click_house/range_filter.rb b/lib/gitlab/database/aggregation/click_house/range_filter.rb new file mode 100644 index 00000000000000..5b31e0c0c00a80 --- /dev/null +++ b/lib/gitlab/database/aggregation/click_house/range_filter.rb @@ -0,0 +1,21 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module Aggregation + module ClickHouse + class RangeFilter < FilterDefinition + private + + def apply(query_builder, filter_config) + if merge_column? + query_builder.having(column(query_builder).between(filter_config[:values])) + else + query_builder.where(column(query_builder).between(filter_config[:values])) + end + end + end + end + end + end +end diff --git a/lib/gitlab/database/aggregation/engine.rb b/lib/gitlab/database/aggregation/engine.rb index e087b883a196ae..44a45d0a8ef3b8 100644 --- a/lib/gitlab/database/aggregation/engine.rb +++ b/lib/gitlab/database/aggregation/engine.rb @@ -15,6 +15,10 @@ def metrics_mapping raise NoMethodError end + def filters_mapping + raise NoMethodError + end + def dimensions_mapping raise NoMethodError end @@ -23,6 +27,17 @@ def build(&block) Class.new(self).tap { |klass| klass.class_eval(&block) } end + def filters(&block) + @filters ||= [] + return @filters unless block + + @filters += DefinitionsCollector.new(filters_mapping).collect(&block) + + guard_definitions_uniqueness!(filters) + + @filters + end + def dimensions(&block) @dimensions ||= [] @@ -30,7 +45,7 @@ def dimensions(&block) @dimensions += DefinitionsCollector.new(dimensions_mapping).collect(&block) - guard_definitions_uniqueness! + guard_definitions_uniqueness!(dimensions + metrics) @dimensions end @@ -41,13 +56,14 @@ def metrics(&block) @metrics += DefinitionsCollector.new(metrics_mapping).collect(&block) - guard_definitions_uniqueness! + guard_definitions_uniqueness!(dimensions + metrics) @metrics end def to_hash { + filters: filters.map(&:to_hash), metrics: metrics.map(&:to_hash), dimensions: dimensions.map(&:to_hash) } @@ -55,8 +71,8 @@ def to_hash private - def guard_definitions_uniqueness! - identifiers = dimensions.map(&:identifier) + metrics.map(&:identifier) + def guard_definitions_uniqueness!(parts) + identifiers = parts.map(&:identifier) duplicates = identifiers.group_by(&:itself).select { |_k, v| v.size > 1 }.keys return unless duplicates.present? @@ -73,8 +89,10 @@ def initialize(context:) # @return [Gitlab::Database::Aggregation::AggregationResult] def execute(request) - plan = QueryPlan.build(request, self) - run_validations(plan) + validate + + plan = request.to_query_plan(self) + plan.validate errors.merge!(plan.errors) if errors.any? @@ -89,31 +107,6 @@ def execute(request) def execute_query_plan(_plan) raise NoMethodError end - - # Override this method if you want to add engine-specific validations. - def run_validations(plan) - ensure_instance_keys(:dimensions, plan.dimensions) - ensure_instance_keys(:metrics, plan.metrics) - - return unless plan.metrics.empty? - - errors.add(:metrics, s_("AggregationEngine|at least one metric is required")) - end - - def ensure_instance_keys(error_key, collection) - instance_keys = collection.group_by(&:instance_key) - duplicates = instance_keys.select { |_value, occurrences| occurrences.size > 1 }.values - - return unless duplicates.any? - - duplicate = duplicates.first.first - - placeholder = { identifier: duplicate.definition.identifier } - errors.add( - error_key, - format(s_("AggregationEngine|duplicated identifier found: %{identifier}"), placeholder) - ) - end end end end diff --git a/lib/gitlab/database/aggregation/part_definition.rb b/lib/gitlab/database/aggregation/part_definition.rb index 6f3136090f0b0d..41404e35a1a78c 100644 --- a/lib/gitlab/database/aggregation/part_definition.rb +++ b/lib/gitlab/database/aggregation/part_definition.rb @@ -34,6 +34,10 @@ def format_value(val) formatter ? formatter.call(val) : val end + def validate_part(_plan_part) + # no-op by default + end + # part identifier. Must be unique across all part definitions. def identifier name diff --git a/lib/gitlab/database/aggregation/query_plan.rb b/lib/gitlab/database/aggregation/query_plan.rb index 1a78924dc65ffe..f62d87f2787443 100644 --- a/lib/gitlab/database/aggregation/query_plan.rb +++ b/lib/gitlab/database/aggregation/query_plan.rb @@ -6,62 +6,52 @@ module Aggregation class QueryPlan include ActiveModel::Validations - attr_reader :dimensions, :metrics, :order - - class << self - def build(request, engine) - engine_definition = engine.class - plan = new - - dimension_definitions = engine_definition.dimensions.index_by(&:identifier) - request.dimensions.each do |configuration| - dimension_definition = dimension_definitions[configuration[:identifier]] - if dimension_definition.nil? - add_error_for(plan, :dimensions, configuration[:identifier]) - break - end - - plan.add_dimension(dimension_definition, configuration) - end - - metric_definitions = engine_definition.metrics.index_by(&:identifier) - request.metrics.each do |configuration| - metric_definition = metric_definitions[configuration[:identifier]] - if metric_definition.nil? - add_error_for(plan, :metrics, configuration[:identifier]) - break - end + attr_reader :engine, :request - plan.add_metric(metric_definition, configuration) - end + validate :validate_request + validate :validate_parts + validate :validate_instance_keys_uniqueness - request.order.each do |configuration| - plan_part = plan.orderable_parts.detect do |plan_part| - configuration.except(:direction) == plan_part.configuration - end - - unless plan_part - add_error_for(plan, :order, configuration[:identifier]) - break - end + def initialize(engine, request) + @engine = engine + @request = request + end - plan.add_order(plan_part, configuration) + def filters + @filters ||= begin + definitions = engine.class.filters.index_by(&:identifier) + request.filters.map do |configuration| + Filter.new(definitions[configuration[:identifier]], configuration) end + end + end - plan + def dimensions + @dimensions ||= begin + definitions = engine.class.dimensions.index_by(&:identifier) + request.dimensions.map do |configuration| + Dimension.new(definitions[configuration[:identifier]], configuration) + end end + end - def add_error_for(plan, object, identifier) - plan.errors.add(object, - format(s_("AggregationEngine|the specified identifier is not available: '%{identifier}'"), - identifier: identifier)) + def metrics + @metrics ||= begin + definitions = engine.class.metrics.index_by(&:identifier) + request.metrics.map do |configuration| + Metric.new(definitions[configuration[:identifier]], configuration) + end end end - def initialize - @dimensions = [] - @metrics = [] - @order = [] + def order + @order ||= request.order.map do |configuration| + plan_part = orderable_parts.detect do |plan_part| + configuration.except(:direction) == plan_part.configuration + end + + Order.new(plan_part, configuration) + end end def orderable_parts @@ -69,19 +59,39 @@ def orderable_parts end def parts - dimensions + metrics + order + filters + dimensions + metrics + order end - def add_dimension(definition, configuration) - @dimensions << Dimension.new(definition, configuration) + private + + def validate_parts + parts.reject(&:valid?).each do |invalid_part| + errors.merge!(invalid_part.errors) + end end - def add_metric(definition, configuration) - @metrics << Metric.new(definition, configuration) + def validate_request + return unless request.metrics.empty? + + errors.add(:metrics, + s_("AggregationEngine|at least one metric is required")) end - def add_order(plan_part, configuration) - @order << Order.new(plan_part, configuration) + def validate_instance_keys_uniqueness + return unless errors.empty? + + instance_keys = (dimensions + metrics).group_by(&:instance_key) + duplicates = instance_keys.select { |_value, occurrences| occurrences.size > 1 }.values.flatten + + return unless duplicates.any? + + identifiers = duplicates.map(&:definition).map(&:identifier).uniq + + placeholder = { identifiers: identifiers.join(', ') } + errors.add( + :base, + format(s_("AggregationEngine|duplicated identifiers found: %{identifiers}"), placeholder) + ) end end end diff --git a/lib/gitlab/database/aggregation/query_plan/base_part.rb b/lib/gitlab/database/aggregation/query_plan/base_part.rb index 092006a0a36312..14b40ba966ea0d 100644 --- a/lib/gitlab/database/aggregation/query_plan/base_part.rb +++ b/lib/gitlab/database/aggregation/query_plan/base_part.rb @@ -5,10 +5,15 @@ module Database module Aggregation class QueryPlan class BasePart + include ActiveModel::Validations + attr_reader :definition, :configuration delegate :name, :type, to: :definition + validate :validate_definition_presence + validate -> { definition&.validate_part(self) } + def initialize(definition, configuration) @definition = definition @configuration = configuration @@ -17,6 +22,15 @@ def initialize(definition, configuration) def instance_key definition.instance_key(**configuration) end + + private + + def validate_definition_presence + return if definition + + errors.add(:base, format(s_("AggregationEngine|the specified identifier is not available: '%{identifier}'"), + identifier: configuration[:identifier])) + end end end end diff --git a/lib/gitlab/database/aggregation/query_plan/filter.rb b/lib/gitlab/database/aggregation/query_plan/filter.rb new file mode 100644 index 00000000000000..a49d92fc7d10ba --- /dev/null +++ b/lib/gitlab/database/aggregation/query_plan/filter.rb @@ -0,0 +1,12 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module Aggregation + class QueryPlan + class Filter < BasePart + end + end + end + end +end diff --git a/lib/gitlab/database/aggregation/query_plan/order.rb b/lib/gitlab/database/aggregation/query_plan/order.rb index b7f96935aa1773..8003f2c045aa60 100644 --- a/lib/gitlab/database/aggregation/query_plan/order.rb +++ b/lib/gitlab/database/aggregation/query_plan/order.rb @@ -7,13 +7,19 @@ class QueryPlan class Order < BasePart attr_reader :plan_part - delegate :definition, :instance_key, to: :plan_part - def initialize(plan_part, configuration) @plan_part = plan_part @configuration = configuration end + def definition + plan_part&.definition + end + + def instance_key + plan_part&.instance_key + end + def direction configuration[:direction] end diff --git a/lib/gitlab/database/aggregation/request.rb b/lib/gitlab/database/aggregation/request.rb index a2158c6490f749..4edb25399456d6 100644 --- a/lib/gitlab/database/aggregation/request.rb +++ b/lib/gitlab/database/aggregation/request.rb @@ -4,13 +4,18 @@ module Gitlab module Database module Aggregation class Request - attr_reader :metrics, :dimensions, :order + attr_reader :filters, :dimensions, :metrics, :order - def initialize(metrics:, dimensions: [], order: []) - @metrics = metrics + def initialize(metrics:, filters: [], dimensions: [], order: []) + @filters = filters || [] @dimensions = dimensions || [] + @metrics = metrics @order = order || [] end + + def to_query_plan(engine) + QueryPlan.new(engine, self) + end end end end diff --git a/locale/gitlab.pot b/locale/gitlab.pot index f084baa081bc04..fb75c74d62d46d 100644 --- a/locale/gitlab.pot +++ b/locale/gitlab.pot @@ -6685,13 +6685,16 @@ msgstr "" msgid "Agent not found for provided id." msgstr "" +msgid "AggregationEngine|Unknown granularity \"%{granularity}\"" +msgstr "" + msgid "AggregationEngine|at least one metric is required" msgstr "" -msgid "AggregationEngine|duplicated identifier found: %{identifier}" +msgid "AggregationEngine|duplicated identifiers found: %{identifiers}" msgstr "" -msgid "AggregationEngine|maximum two dimensions are supported" +msgid "AggregationEngine|maximum size of %{max_size} exceeded for filter `%{key}`" msgstr "" msgid "AggregationEngine|the specified identifier is not available: '%{identifier}'" diff --git a/spec/lib/gitlab/database/aggregation/active_record/engine_spec.rb b/spec/lib/gitlab/database/aggregation/active_record/engine_spec.rb index 04ecfd91d2bb9f..bfb9582f62ea83 100644 --- a/spec/lib/gitlab/database/aggregation/active_record/engine_spec.rb +++ b/spec/lib/gitlab/database/aggregation/active_record/engine_spec.rb @@ -201,116 +201,4 @@ end end end - - describe '.to_hash' do - it 'exposes the engine_definition definition' do - expect(engine_definition.to_hash).to match({ - dimensions: array_including( - hash_including({ identifier: :state_id, name: :state_id }) - ), - metrics: array_including( - hash_including({ identifier: :total_count }) - ) - }) - end - end - - describe 'validations' do - context 'when metric cannot be found' do - it 'returns error' do - request = Gitlab::Database::Aggregation::Request.new( - dimensions: [{ identifier: :state_id }], - metrics: [{ identifier: :missing_identfier }] - ) - - expect(engine).to execute_aggregation(request).with_errors(array_including( - a_string_matching(/identifier is not available: 'missing_identfier'/) - )) - end - end - - context 'when dimension cannot be found' do - it 'returns error' do - request = Gitlab::Database::Aggregation::Request.new( - dimensions: [{ identifier: :missing_identfier }], - metrics: [{ identifier: :mean_time_estimate }] - ) - - expect(engine).to execute_aggregation(request).with_errors(array_including( - a_string_matching(/identifier is not available: 'missing_identfier'/) - )) - end - end - - context 'when no metric is passed in' do - it 'returns error' do - request = Gitlab::Database::Aggregation::Request.new( - dimensions: [{ identifier: :state_id }], - metrics: [] - ) - - expect(engine).to execute_aggregation(request).with_errors(array_including( - a_string_matching(/at least one metric is required/) - )) - end - end - - context 'when dimensions limit is reached' do - it 'returns error' do - request = Gitlab::Database::Aggregation::Request.new( - dimensions: [ - { identifier: :state_id }, - { identifier: :project_id }, - { identifier: :state }, - { identifier: :merged_at, parameters: { granularity: 'monthly' } } - ], - metrics: [{ identifier: :mean_time_estimate }] - ) - - expect(engine).to execute_aggregation(request).with_errors(array_including( - a_string_matching(/maximum two dimensions/) - )) - end - end - - context 'when order cannot be found' do - it 'returns error' do - request = Gitlab::Database::Aggregation::Request.new( - dimensions: [{ identifier: :state_id }], - metrics: [{ identifier: :mean_time_estimate }], - order: [{ identifier: :unknown, direction: :asc }] - ) - - expect(engine).to execute_aggregation(request).with_errors(array_including( - a_string_matching(/identifier is not available/) - )) - end - end - - context 'when duplicated dimensions are passed in' do - it 'returns error' do - request = Gitlab::Database::Aggregation::Request.new( - dimensions: [{ identifier: :state_id }, { identifier: :state_id }], - metrics: [{ identifier: :mean_time_estimate }] - ) - - expect(engine).to execute_aggregation(request).with_errors(array_including( - a_string_matching(/duplicated identifier found: state_id/) - )) - end - end - - context 'when duplicated metrics are passed in' do - it 'returns error' do - request = Gitlab::Database::Aggregation::Request.new( - dimensions: [{ identifier: :state_id }], - metrics: [{ identifier: :mean_time_estimate }, { identifier: :mean_time_estimate }] - ) - - expect(engine).to execute_aggregation(request).with_errors(array_including( - a_string_matching(/duplicated identifier found: mean_time_estimate/) - )) - end - end - end end diff --git a/spec/lib/gitlab/database/aggregation/active_record/exact_match_filter_spec.rb b/spec/lib/gitlab/database/aggregation/active_record/exact_match_filter_spec.rb new file mode 100644 index 00000000000000..146538e83a1ee5 --- /dev/null +++ b/spec/lib/gitlab/database/aggregation/active_record/exact_match_filter_spec.rb @@ -0,0 +1,81 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::Database::Aggregation::ActiveRecord::ExactMatchFilter, feature_category: :database do + let_it_be(:user) { create(:user) } + let_it_be(:project) { create(:project) } + let_it_be(:merge_request1) do + create(:merge_request, :unique_branches, + updated_by_id: user.id, + target_project: project, + source_project: project, + time_estimate: 1, + created_at: '2025-04-05').tap do |mr| + mr.metrics.update!(merged_at: '2025-05-03') + end + end + + let_it_be(:merge_request2) do + create(:merge_request, :unique_branches, + target_project: project, + source_project: project, + time_estimate: 3, + created_at: '2025-04-04').tap do |mr| + mr.metrics.update!(latest_closed_at: '2025-05-03') + end + end + + let_it_be(:merge_request3) do + create(:merge_request, updated_by_id: user.id, time_estimate: 5, created_at: '2025-06-05').tap do |mr| + mr.metrics.update!(merged_at: '2025-06-04') + end + end + + let(:engine_definition) do + Gitlab::Database::Aggregation::ActiveRecord::Engine.build do + filters do + exact_match :created_at, :datetime, max_size: 2 + end + + metrics do + count + end + end + end + + let(:engine) { engine_definition.new(context: { scope: MergeRequest.all }) } + + it 'applies single value filter' do + request = Gitlab::Database::Aggregation::Request.new( + filters: [{ identifier: :created_at, values: ['2025-04-04'] }], + metrics: [{ identifier: :total_count }] + ) + + expect(engine).to execute_aggregation(request).and_return([ + { total_count: 1 } + ]) + end + + it 'applies multiple values filter' do + request = Gitlab::Database::Aggregation::Request.new( + filters: [{ identifier: :created_at, values: %w[2025-04-04 2025-04-05] }], + metrics: [{ identifier: :total_count }] + ) + + expect(engine).to execute_aggregation(request).and_return([ + { total_count: 2 } + ]) + end + + it 'respects max_size limit' do + request = Gitlab::Database::Aggregation::Request.new( + filters: [{ identifier: :created_at, values: %w[2025-04-04 2025-04-05 2025-05-05] }], + metrics: [{ identifier: :total_count }] + ) + + expect(engine).to execute_aggregation(request).with_errors([ + "Values maximum size of 2 exceeded for filter `created_at`" + ]) + end +end diff --git a/spec/lib/gitlab/database/aggregation/click_house/engine_spec.rb b/spec/lib/gitlab/database/aggregation/click_house/engine_spec.rb index 81778477b68892..b0b116f794f2b5 100644 --- a/spec/lib/gitlab/database/aggregation/click_house/engine_spec.rb +++ b/spec/lib/gitlab/database/aggregation/click_house/engine_spec.rb @@ -10,6 +10,11 @@ self.table_name = 'agent_platform_sessions' self.table_primary_key = %w[namespace_path user_id session_id flow_type] + filters do + exact_match :user_id, :integer + range :created_event_at, :datetime, -> { Arel.sql('anyIfMerge(created_event_at)') }, merge_column: true + end + dimensions do column :user_id, :integer column :flow_type, :string @@ -85,6 +90,35 @@ [session1, session2, session3, session4, session5] end + describe "filtering" do + it 'applies merge column filtering' do + filter_range = session1[:created_event_at].to_date..session4[:created_event_at].to_date + + request = Gitlab::Database::Aggregation::Request.new( + filters: [{ identifier: :created_event_at, values: filter_range }], + dimensions: [{ identifier: :user_id }], + metrics: [{ identifier: :total_count }] + ) + + expect(engine).to execute_aggregation(request).and_return([ + { user_id: 2, total_count: 1 }, + { user_id: 1, total_count: 3 } + ]) + end + + it 'applies regular filtering' do + request = Gitlab::Database::Aggregation::Request.new( + filters: [{ identifier: :user_id, values: [1] }], + dimensions: [{ identifier: :user_id }], + metrics: [{ identifier: :total_count }] + ) + + expect(engine).to execute_aggregation(request).and_return([ + { user_id: 1, total_count: 4 } + ]) + end + end + describe "dimensions" do it 'groups by single dimension' do request = Gitlab::Database::Aggregation::Request.new( diff --git a/spec/lib/gitlab/database/aggregation/click_house/exact_match_filter_spec.rb b/spec/lib/gitlab/database/aggregation/click_house/exact_match_filter_spec.rb new file mode 100644 index 00000000000000..2ad9fa12397adc --- /dev/null +++ b/spec/lib/gitlab/database/aggregation/click_house/exact_match_filter_spec.rb @@ -0,0 +1,77 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::Database::Aggregation::ClickHouse::ExactMatchFilter, :click_house, feature_category: :database do + include_context 'with agent_platform_sessions ClickHouse aggregation engine' + + let(:engine_definition) do + Gitlab::Database::Aggregation::ClickHouse::Engine.build do + self.table_name = 'agent_platform_sessions' + self.table_primary_key = %w[namespace_path user_id session_id flow_type] + + metrics do + count + end + + filters do + exact_match :session_id, :integer + end + end + end + + let(:session1) do # finished & long + created_at = DateTime.parse('2025-03-01 00:00:00 UTC') + { session_id: 1, user_id: 1, project_id: 1, namespace_path: '1/2/', flow_type: 'chat', environment: 'prod', + session_year: 2025, + created_event_at: created_at, + started_event_at: created_at + 1.second, + finished_event_at: created_at + 10.minutes, + resumed_event_at: created_at + 9.minutes } + end + + let(:session2) do # finished & short + created_at = DateTime.parse('2025-03-02 00:00:00 UTC') + { session_id: 2, user_id: 2, project_id: 1, namespace_path: '1/2/', flow_type: 'chat', environment: 'prod', + session_year: 2025, + created_event_at: created_at, + started_event_at: created_at + 1.second, + finished_event_at: created_at + 3.minutes, + resumed_event_at: created_at + 2.minutes } + end + + let(:session3) do # not finished yet. in progress + created_at = DateTime.parse('2025-03-04 00:00:00 UTC') + { session_id: 3, user_id: 1, project_id: 1, namespace_path: '1/2/', flow_type: 'code_review', environment: 'prod', + session_year: 2025, + created_event_at: created_at, + started_event_at: created_at + 1.second, + resumed_event_at: created_at + 9.minutes } + end + + let(:all_data_rows) do + [session1, session2, session3] + end + + it 'applies single value filter' do + request = Gitlab::Database::Aggregation::Request.new( + filters: [{ identifier: :session_id, values: [1] }], + metrics: [{ identifier: :total_count }] + ) + + expect(engine).to execute_aggregation(request).and_return([ + { total_count: 1 } + ]) + end + + it 'applies multiple values filter' do + request = Gitlab::Database::Aggregation::Request.new( + filters: [{ identifier: :session_id, values: [1, 2] }], + metrics: [{ identifier: :total_count }] + ) + + expect(engine).to execute_aggregation(request).and_return([ + { total_count: 2 } + ]) + end +end diff --git a/spec/lib/gitlab/database/aggregation/click_house/range_filter_spec.rb b/spec/lib/gitlab/database/aggregation/click_house/range_filter_spec.rb new file mode 100644 index 00000000000000..e1a6b7893d7485 --- /dev/null +++ b/spec/lib/gitlab/database/aggregation/click_house/range_filter_spec.rb @@ -0,0 +1,88 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::Database::Aggregation::ClickHouse::RangeFilter, :click_house, feature_category: :database do + include_context 'with agent_platform_sessions ClickHouse aggregation engine' + + let(:engine_definition) do + Gitlab::Database::Aggregation::ClickHouse::Engine.build do + self.table_name = 'agent_platform_sessions' + self.table_primary_key = %w[namespace_path user_id session_id flow_type] + + metrics do + count + end + + filters do + range :session_id, :integer + end + end + end + + let(:session1) do # finished & long + created_at = DateTime.parse('2025-03-01 00:00:00 UTC') + { session_id: 1, user_id: 1, project_id: 1, namespace_path: '1/2/', flow_type: 'chat', environment: 'prod', + session_year: 2025, + created_event_at: created_at, + started_event_at: created_at + 1.second, + finished_event_at: created_at + 10.minutes, + resumed_event_at: created_at + 9.minutes } + end + + let(:session2) do # finished & short + created_at = DateTime.parse('2025-03-02 00:00:00 UTC') + { session_id: 2, user_id: 2, project_id: 1, namespace_path: '1/2/', flow_type: 'chat', environment: 'prod', + session_year: 2025, + created_event_at: created_at, + started_event_at: created_at + 1.second, + finished_event_at: created_at + 3.minutes, + resumed_event_at: created_at + 2.minutes } + end + + let(:session3) do # not finished yet. in progress + created_at = DateTime.parse('2025-03-04 00:00:00 UTC') + { session_id: 3, user_id: 1, project_id: 1, namespace_path: '1/2/', flow_type: 'code_review', environment: 'prod', + session_year: 2025, + created_event_at: created_at, + started_event_at: created_at + 1.second, + resumed_event_at: created_at + 9.minutes } + end + + let(:all_data_rows) do + [session1, session2, session3] + end + + it 'applies full range filter' do + request = Gitlab::Database::Aggregation::Request.new( + filters: [{ identifier: :session_id, values: 1..2 }], + metrics: [{ identifier: :total_count }] + ) + + expect(engine).to execute_aggregation(request).and_return([ + { total_count: 2 } + ]) + end + + it 'applies 2.. range filter' do + request = Gitlab::Database::Aggregation::Request.new( + filters: [{ identifier: :session_id, values: 1..nil }], + metrics: [{ identifier: :total_count }] + ) + + expect(engine).to execute_aggregation(request).and_return([ + { total_count: 3 } + ]) + end + + it 'applies ...3 range filter' do + request = Gitlab::Database::Aggregation::Request.new( + filters: [{ identifier: :session_id, values: nil...2 }], + metrics: [{ identifier: :total_count }] + ) + + expect(engine).to execute_aggregation(request).and_return([ + { total_count: 1 } + ]) + end +end diff --git a/spec/lib/gitlab/database/aggregation/engine_spec.rb b/spec/lib/gitlab/database/aggregation/engine_spec.rb index d9314087566949..e1b2a8f7f83515 100644 --- a/spec/lib/gitlab/database/aggregation/engine_spec.rb +++ b/spec/lib/gitlab/database/aggregation/engine_spec.rb @@ -3,6 +3,44 @@ require 'spec_helper' RSpec.describe Gitlab::Database::Aggregation::Engine, feature_category: :database do + let(:engine_klass) do + described_class.build do + def self.metrics_mapping + { + count: Gitlab::Database::Aggregation::PartDefinition + } + end + + def self.dimensions_mapping + { + column: Gitlab::Database::Aggregation::PartDefinition + } + end + + def self.filters_mapping + { + column: Gitlab::Database::Aggregation::PartDefinition + } + end + + dimensions do + column :user_id, :integer + end + + filters do + column :user_id, :integer + end + + metrics do + count :total_count, :integer + end + end + end + + it 'requires filters_mapping definition' do + expect(described_class).to require_method_definition(:filters_mapping) + end + it 'requires metrics_mapping definition' do expect(described_class).to require_method_definition(:metrics_mapping) end @@ -15,27 +53,23 @@ expect(described_class.new(context: {})).to require_method_definition(:execute_query_plan, nil) end - describe 'duplicates validation' do - let(:engine_klass) do - Gitlab::Database::Aggregation::Engine.build do - def self.metrics_mapping - { - count: Gitlab::Database::Aggregation::PartDefinition - } - end - - def self.dimensions_mapping - { - column: Gitlab::Database::Aggregation::PartDefinition - } - end - - dimensions do - column :user_id, :integer - end - end + describe '.to_hash' do + it 'exposes the engine_definition definition' do + expect(engine_klass.to_hash).to match({ + dimensions: array_including( + hash_including({ identifier: :user_id }) + ), + metrics: array_including( + hash_including({ identifier: :total_count }) + ), + filters: array_including( + hash_including({ identifier: :user_id }) + ) + }) end + end + describe 'duplicated definitions validation' do it 'raises an exception if duplicate dimensions are defined' do expect do engine_klass.dimensions do @@ -51,5 +85,13 @@ def self.dimensions_mapping end end.to raise_error("Identical engine parts found: [:user_id]. Engine parts identifiers must be unique.") end + + it 'raises an exception if duplicate filters are defined' do + expect do + engine_klass.filters do + column :user_id, :integer + end + end.to raise_error("Identical engine parts found: [:user_id]. Engine parts identifiers must be unique.") + end end end diff --git a/spec/lib/gitlab/database/aggregation/formatter_spec.rb b/spec/lib/gitlab/database/aggregation/formatter_spec.rb index 5cfbe22d667d87..00ad697c5fd8b3 100644 --- a/spec/lib/gitlab/database/aggregation/formatter_spec.rb +++ b/spec/lib/gitlab/database/aggregation/formatter_spec.rb @@ -20,7 +20,7 @@ def self.metrics_mapping end end - let(:plan) { Gitlab::Database::Aggregation::QueryPlan.build(request, engine.new(context: {})) } + let(:plan) { request.to_query_plan(engine.new(context: {})) } let(:request) do Gitlab::Database::Aggregation::Request.new(metrics: [{ identifier: :total_count }, { identifier: :total_with_formatting_count }]) diff --git a/spec/lib/gitlab/database/aggregation/query_plan_spec.rb b/spec/lib/gitlab/database/aggregation/query_plan_spec.rb new file mode 100644 index 00000000000000..64a80963fec6a4 --- /dev/null +++ b/spec/lib/gitlab/database/aggregation/query_plan_spec.rb @@ -0,0 +1,150 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::Database::Aggregation::QueryPlan, feature_category: :database do + let_it_be(:engine_definition) do + Gitlab::Database::Aggregation::Engine.build do + def self.dimensions_mapping + { + column: Gitlab::Database::Aggregation::PartDefinition + } + end + + def self.metrics_mapping + { + count: Gitlab::Database::Aggregation::PartDefinition + } + end + + def self.filters_mapping + { + exact_match: Gitlab::Database::Aggregation::PartDefinition + } + end + + dimensions do + column :state_id, :integer + end + + metrics do + count :total_count, :integer + end + + filters do + exact_match :state_id, :integer + end + end + end + + let(:engine) { engine_definition.new(context: { scope: MergeRequest.all }) } + + describe 'validations' do + context 'when filter cannot be found' do + it 'returns error' do + request = Gitlab::Database::Aggregation::Request.new( + filters: [{ identifier: :missing_identfier }], + metrics: [{ identifier: :total_count }] + ) + + query_plan = described_class.new(engine, request) + + expect(query_plan).not_to be_valid + expect(query_plan.errors.to_a).to include( + a_string_matching(/identifier is not available: 'missing_identfier'/) + ) + end + end + + context 'when dimension cannot be found' do + it 'returns error' do + request = Gitlab::Database::Aggregation::Request.new( + dimensions: [{ identifier: :missing_identfier }], + metrics: [{ identifier: :total_count }] + ) + + query_plan = described_class.new(engine, request) + + expect(query_plan).not_to be_valid + expect(query_plan.errors.to_a).to include( + a_string_matching(/identifier is not available: 'missing_identfier'/) + ) + end + end + + context 'when metric cannot be found' do + it 'returns error' do + request = Gitlab::Database::Aggregation::Request.new( + dimensions: [{ identifier: :state_id }], + metrics: [{ identifier: :missing_identfier }] + ) + + query_plan = described_class.new(engine, request) + + expect(query_plan).not_to be_valid + expect(query_plan.errors.to_a).to include( + a_string_matching(/identifier is not available: 'missing_identfier'/) + ) + end + end + + context 'when no metric is passed in' do + it 'returns error' do + request = Gitlab::Database::Aggregation::Request.new( + dimensions: [{ identifier: :state_id }], + metrics: [] + ) + + query_plan = described_class.new(engine, request) + + expect(query_plan).not_to be_valid + expect(query_plan.errors.to_a).to include(a_string_matching(/at least one metric is required/)) + end + end + + context 'when order cannot be found' do + it 'returns error' do + request = Gitlab::Database::Aggregation::Request.new( + dimensions: [{ identifier: :state_id }], + metrics: [{ identifier: :total_count }], + order: [{ identifier: :missing_identfier, direction: :asc }] + ) + + query_plan = described_class.new(engine, request) + + expect(query_plan).not_to be_valid + expect(query_plan.errors.to_a).to include( + a_string_matching(/identifier is not available: 'missing_identfier'/) + ) + end + end + + context 'when duplicated dimensions are passed in' do + it 'returns error' do + request = Gitlab::Database::Aggregation::Request.new( + dimensions: [{ identifier: :state_id }, { identifier: :state_id }], + metrics: [{ identifier: :total_count }] + ) + + query_plan = described_class.new(engine, request) + + expect(query_plan).not_to be_valid + expect(query_plan.errors.to_a).to include(a_string_matching(/duplicated identifiers found: state_id/)) + end + end + + context 'when duplicated metrics are passed in' do + it 'returns error' do + request = Gitlab::Database::Aggregation::Request.new( + dimensions: [{ identifier: :state_id }], + metrics: [{ identifier: :total_count }, { identifier: :total_count }] + ) + + query_plan = described_class.new(engine, request) + + expect(query_plan).not_to be_valid + expect(query_plan.errors.to_a).to include(a_string_matching(/duplicated identifiers found: total_count/)) + end + end + end +end diff --git a/spec/support/matchers/database/aggregations/execute_aggregation_matcher.rb b/spec/support/matchers/database/aggregations/execute_aggregation_matcher.rb index 95db9920e6f64b..7a23c901ea292d 100644 --- a/spec/support/matchers/database/aggregations/execute_aggregation_matcher.rb +++ b/spec/support/matchers/database/aggregations/execute_aggregation_matcher.rb @@ -25,8 +25,15 @@ failure_message do |engine| if @expected_data - "expected #{engine.class} to execute aggregation and return #{@expected_data.inspect}, " \ - "but got #{@actual_data.inspect}" + message = "expected #{engine.class} to execute aggregation and return #{@expected_data.inspect}, " + if @actual_data + message << "but got #{@actual_data.inspect}" + else + message << 'but got no data' + message << " and errors #{@actual_errors.inspect}" if @actual_errors + end + + message elsif @expected_errors "expected #{engine.class} to execute aggregation with errors #{@expected_errors.inspect}, " \ "but got #{@actual_errors.inspect}" -- GitLab From ca6c2b7723735290a4561b42843abfee727e46cc Mon Sep 17 00:00:00 2001 From: Pavel Shutsin Date: Thu, 11 Dec 2025 17:32:52 +0100 Subject: [PATCH 3/5] Add aggregation engines GraphQL exposure mechanism --- Gemfile | 2 +- Gemfile.checksum | 2 +- Gemfile.lock | 4 +- Gemfile.next.checksum | 2 +- Gemfile.next.lock | 4 +- .../analytics/aggregation/engine_resolver.rb | 97 +++++++++++++++++++ .../engine_response_dimensions_type.rb | 57 +++++++++++ .../aggregation/engine_response_type.rb | 45 +++++++++ .../types/analytics/aggregation/order_type.rb | 14 +++ .../concerns/aggregation_engine_helpers.rb | 16 +++ ...amp_column.rb => date_bucket_dimension.rb} | 2 +- .../aggregation/active_record/engine.rb | 2 +- .../click_house/date_bucket_dimension.rb | 61 ++++++++++++ .../aggregation/click_house/engine.rb | 3 +- .../database/aggregation/click_house/rate.rb | 4 +- .../aggregation/definitions_collector.rb | 4 + lib/gitlab/database/aggregation/graphql.rb | 10 ++ .../database/aggregation/graphql/adapter.rb | 50 ++++++++++ .../aggregation/query_plan/base_part.rb | 2 +- .../aggregation/active_record/engine_spec.rb | 8 +- .../aggregation/click_house/engine_spec.rb | 8 +- 21 files changed, 376 insertions(+), 21 deletions(-) create mode 100644 app/graphql/resolvers/analytics/aggregation/engine_resolver.rb create mode 100644 app/graphql/types/analytics/aggregation/engine_response_dimensions_type.rb create mode 100644 app/graphql/types/analytics/aggregation/engine_response_type.rb create mode 100644 app/graphql/types/analytics/aggregation/order_type.rb create mode 100644 ee/app/graphql/types/concerns/aggregation_engine_helpers.rb rename lib/gitlab/database/aggregation/active_record/{timestamp_column.rb => date_bucket_dimension.rb} (97%) create mode 100644 lib/gitlab/database/aggregation/click_house/date_bucket_dimension.rb create mode 100644 lib/gitlab/database/aggregation/graphql.rb create mode 100644 lib/gitlab/database/aggregation/graphql/adapter.rb diff --git a/Gemfile b/Gemfile index 0b646d90b9c22d..cff3072484e870 100644 --- a/Gemfile +++ b/Gemfile @@ -378,7 +378,7 @@ gem 'rack-proxy', '~> 0.7.7', feature_category: :shared # rubocop:todo Gemfile/M gem 'cssbundling-rails', '1.4.3', feature_category: :shared # rubocop:todo Gemfile/MissingFeatureCategory -- https://gitlab.com/gitlab-org/gitlab/-/issues/581839 gem 'terser', '1.0.2', feature_category: :shared # rubocop:todo Gemfile/MissingFeatureCategory -- https://gitlab.com/gitlab-org/gitlab/-/issues/581839 -gem 'click_house-client', '0.8.4', feature_category: :database +gem 'click_house-client', '0.8.5', feature_category: :database gem 'addressable', '~> 2.8', feature_category: :shared # rubocop:todo Gemfile/MissingFeatureCategory -- https://gitlab.com/gitlab-org/gitlab/-/issues/581839 gem 'gon', '~> 6.5.0', feature_category: :shared # rubocop:todo Gemfile/MissingFeatureCategory -- https://gitlab.com/gitlab-org/gitlab/-/issues/581839 gem 'request_store', '~> 1.7.0', feature_category: :shared # rubocop:todo Gemfile/MissingFeatureCategory -- https://gitlab.com/gitlab-org/gitlab/-/issues/581839 diff --git a/Gemfile.checksum b/Gemfile.checksum index f423d54bffe892..71e23f7a73503b 100644 --- a/Gemfile.checksum +++ b/Gemfile.checksum @@ -82,7 +82,7 @@ {"name":"citrus","version":"3.0.2","platform":"ruby","checksum":"4ec2412fc389ad186735f4baee1460f7900a8e130ffe3f216b30d4f9c684f650"}, {"name":"claide","version":"1.1.0","platform":"ruby","checksum":"6d3c5c089dde904d96aa30e73306d0d4bd444b1accb9b3125ce14a3c0183f82e"}, {"name":"claide-plugins","version":"0.9.2","platform":"ruby","checksum":"c7ea78bc067ab23bce8515497cdcdcb8f01c86dadfbe13c44644e382922c1c2e"}, -{"name":"click_house-client","version":"0.8.4","platform":"ruby","checksum":"8278a6319fd30e8cb5dd4e8ada247a16819a6d4f3548cb0efed6c188ee5dfd18"}, +{"name":"click_house-client","version":"0.8.5","platform":"ruby","checksum":"aadf5f2437f287efdbf1feae576d99a0dbcaeb888deb4af094b27295fb342a3f"}, {"name":"coderay","version":"1.1.3","platform":"ruby","checksum":"dc530018a4684512f8f38143cd2a096c9f02a1fc2459edcfe534787a7fc77d4b"}, {"name":"coercible","version":"1.0.0","platform":"ruby","checksum":"5081ad24352cc8435ce5472bc2faa30260c7ea7f2102cc6a9f167c4d9bffaadc"}, {"name":"colored2","version":"3.1.2","platform":"ruby","checksum":"b13c2bd7eeae2cf7356a62501d398e72fde78780bd26aec6a979578293c28b4a"}, diff --git a/Gemfile.lock b/Gemfile.lock index 214ca36fa118be..46273c3e5b4fff 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -434,7 +434,7 @@ GEM cork nap open4 (~> 1.3) - click_house-client (0.8.4) + click_house-client (0.8.5) activerecord (>= 7.0, < 9.0) activesupport (>= 7.0, < 9.0) addressable (~> 2.8) @@ -2122,7 +2122,7 @@ DEPENDENCIES carrierwave (~> 1.3) charlock_holmes (~> 0.7.9) circuitbox (= 2.0.0) - click_house-client (= 0.8.4) + click_house-client (= 0.8.5) commonmarker (~> 0.23.10) concurrent-ruby (~> 1.1) connection_pool (~> 2.5.3) diff --git a/Gemfile.next.checksum b/Gemfile.next.checksum index f423d54bffe892..71e23f7a73503b 100644 --- a/Gemfile.next.checksum +++ b/Gemfile.next.checksum @@ -82,7 +82,7 @@ {"name":"citrus","version":"3.0.2","platform":"ruby","checksum":"4ec2412fc389ad186735f4baee1460f7900a8e130ffe3f216b30d4f9c684f650"}, {"name":"claide","version":"1.1.0","platform":"ruby","checksum":"6d3c5c089dde904d96aa30e73306d0d4bd444b1accb9b3125ce14a3c0183f82e"}, {"name":"claide-plugins","version":"0.9.2","platform":"ruby","checksum":"c7ea78bc067ab23bce8515497cdcdcb8f01c86dadfbe13c44644e382922c1c2e"}, -{"name":"click_house-client","version":"0.8.4","platform":"ruby","checksum":"8278a6319fd30e8cb5dd4e8ada247a16819a6d4f3548cb0efed6c188ee5dfd18"}, +{"name":"click_house-client","version":"0.8.5","platform":"ruby","checksum":"aadf5f2437f287efdbf1feae576d99a0dbcaeb888deb4af094b27295fb342a3f"}, {"name":"coderay","version":"1.1.3","platform":"ruby","checksum":"dc530018a4684512f8f38143cd2a096c9f02a1fc2459edcfe534787a7fc77d4b"}, {"name":"coercible","version":"1.0.0","platform":"ruby","checksum":"5081ad24352cc8435ce5472bc2faa30260c7ea7f2102cc6a9f167c4d9bffaadc"}, {"name":"colored2","version":"3.1.2","platform":"ruby","checksum":"b13c2bd7eeae2cf7356a62501d398e72fde78780bd26aec6a979578293c28b4a"}, diff --git a/Gemfile.next.lock b/Gemfile.next.lock index 214ca36fa118be..46273c3e5b4fff 100644 --- a/Gemfile.next.lock +++ b/Gemfile.next.lock @@ -434,7 +434,7 @@ GEM cork nap open4 (~> 1.3) - click_house-client (0.8.4) + click_house-client (0.8.5) activerecord (>= 7.0, < 9.0) activesupport (>= 7.0, < 9.0) addressable (~> 2.8) @@ -2122,7 +2122,7 @@ DEPENDENCIES carrierwave (~> 1.3) charlock_holmes (~> 0.7.9) circuitbox (= 2.0.0) - click_house-client (= 0.8.4) + click_house-client (= 0.8.5) commonmarker (~> 0.23.10) concurrent-ruby (~> 1.1) connection_pool (~> 2.5.3) diff --git a/app/graphql/resolvers/analytics/aggregation/engine_resolver.rb b/app/graphql/resolvers/analytics/aggregation/engine_resolver.rb new file mode 100644 index 00000000000000..2b82646ebde62e --- /dev/null +++ b/app/graphql/resolvers/analytics/aggregation/engine_resolver.rb @@ -0,0 +1,97 @@ +# frozen_string_literal: true + +module Resolvers + module Analytics + module Aggregation + class EngineResolver < BaseResolver # rubocop:disable Graphql/ResolverType -- type declared in subclasses + class << self + attr_accessor :engine + + def build(engine, **graphql_context, &block) + klass = Class.new(self) + klass.engine = engine + klass.class_eval do + type Types::Analytics::Aggregation::EngineResponseType.build(engine, **graphql_context), null: true + + declare_filters + end + klass.class_eval(&block) if block + + klass + end + + private + + def declare_filters + adapter = ::Gitlab::Database::Aggregation::Graphql::Adapter + adapter.each_filter_argument(engine.filters) do |name, type, kwargs| + argument(name, type, **kwargs) # rubocop:disable Graphql/Descriptions -- defined in adapter + end + end + end + + include LooksAhead + + argument :order_by, + [Types::Analytics::Aggregation::OrderType], + required: false, + description: 'Sorting order for the aggregated data.' + + def resolve_with_lookahead(**arguments) + request = build_aggregation_request(arguments) + response = engine_class.new(context: { scope: aggregation_scope }).execute(request) + + if response.success? + response.payload[:data].to_a # TODO: remove `.to_a` when pagination connection is implemented + else + # TODO: handle errors + { + errors: errors.map { |field, messages| { field: field, messages: messages } } + } + end + end + + private + + def engine_class + self.class.engine + end + + def aggregation_scope + raise NoMethodError # must be overloaded in dynamic class definition + end + + def build_aggregation_request(arguments) + selections = lookahead.selections.first.selections + dimensions_selection = selections.detect { |s| s.name == :dimensions } + dimensions = if dimensions_selection + dimensions_selection.selections.map do |s| + { identifier: s.name.to_sym, parameters: s.arguments } + end + else + [] + end + # prepare order + order = arguments.delete(:order_by).map do |o| + o.to_hash.tap do |ord| + ord[:identifier] = ord[:identifier].to_sym + ord[:parameters] ||= {} + end + end + # prepare filters: arguments - orderBy = filters + filters = arguments.map { |key, values| { identifier: key.to_sym, values: values } } + + # selections - dimensions = metrics + metric_selections = selections.reject { |s| s.name == :dimensions } + metrics = metric_selections.map do |selection| + { identifier: selection.name.to_sym, parameters: selection.arguments } + end + + ::Gitlab::Database::Aggregation::Request.new( + filters: filters, dimensions: dimensions, metrics: metrics, order: order + ) + end + end + end + end +end diff --git a/app/graphql/types/analytics/aggregation/engine_response_dimensions_type.rb b/app/graphql/types/analytics/aggregation/engine_response_dimensions_type.rb new file mode 100644 index 00000000000000..86139fd7d187cc --- /dev/null +++ b/app/graphql/types/analytics/aggregation/engine_response_dimensions_type.rb @@ -0,0 +1,57 @@ +# frozen_string_literal: true + +module Types + module Analytics + module Aggregation + module EngineResponseDimensionsType + class << self + def build(engine, graphql_context) + adapter = ::Gitlab::Database::Aggregation::Graphql::Adapter + mounted_engine_name = adapter.mounted_engine_name(graphql_context[:name]) + + Class.new(BaseObject) do + graphql_name "#{mounted_engine_name}AggregationResponseDimensions" + description "Response dimensions for #{mounted_engine_name} aggregation engine" + + engine.dimensions.each do |dimension| + params = dimension.respond_to?(:parameters) ? dimension.parameters : {} + field dimension.identifier.to_sym, + adapter.graphql_type(dimension.type), + null: true, + description: dimension.description do + params.each do |param_name, param_config| + argument param_name, adapter.graphql_type(param_config[:type]), + required: false, description: param_config[:description] + end + end + + define_method(dimension.identifier) do |**field_kwargs| + allowed_params = field_kwargs.slice(*params.keys) + + object[dimension.instance_key(parameters: allowed_params)] + end + end + + # adapter.dimensions_to_fields(engine.dimensions) do |field_name, type, params, kwargs| + # field(field_name, type, **kwargs) do-- defined in adapter + # params.each do |param_name, param_config| + # argument(*adapter.parameter_to_argument(param_name, param_config))-- defined in adapter + # end + # end + + # define_method(field_name) do |**field_kwargs| + # allowed_params = field_kwargs.slice(*params.pluck(:identifier))-- not AR + + # part = engine.parts.detect { |d| d.identifier == field_name } + # key = part.instance_key(parameters: allowed_params) + + # object[key] + # end + # end + end + end + end + end + end + end +end diff --git a/app/graphql/types/analytics/aggregation/engine_response_type.rb b/app/graphql/types/analytics/aggregation/engine_response_type.rb new file mode 100644 index 00000000000000..07c4491972f108 --- /dev/null +++ b/app/graphql/types/analytics/aggregation/engine_response_type.rb @@ -0,0 +1,45 @@ +# frozen_string_literal: true + +module Types + module Analytics + module Aggregation + module EngineResponseType + class << self + def build(engine, graphql_context) + adapter = ::Gitlab::Database::Aggregation::Graphql::Adapter + mounted_engine_name = adapter.mounted_engine_name(graphql_context[:name]) + + Class.new(BaseObject) do + graphql_name "#{mounted_engine_name}AggregationResponse" + description "Response for #{mounted_engine_name} aggregation engine" + + field :dimensions, + Types::Analytics::Aggregation::EngineResponseDimensionsType.build(engine, **graphql_context), + description: 'Aggregation dimensions. Every selected dimension will be used for aggregation.', + resolver_method: :object + + engine.metrics.each do |metric| + params = metric.respond_to?(:parameters) ? metric.parameters : {} + field metric.identifier.to_sym, + adapter.graphql_type(metric.type), + null: true, + description: metric.description do + params.each do |param_name, param_config| + argument param_name, adapter.graphql_type(param_config[:type]), + required: false, description: param_config[:description] + end + end + + define_method(metric.identifier) do |**field_kwargs| + allowed_params = field_kwargs.slice(*params.keys) + + object[metric.instance_key(parameters: allowed_params)] + end + end + end + end + end + end + end + end +end diff --git a/app/graphql/types/analytics/aggregation/order_type.rb b/app/graphql/types/analytics/aggregation/order_type.rb new file mode 100644 index 00000000000000..a3c35dd6c2406f --- /dev/null +++ b/app/graphql/types/analytics/aggregation/order_type.rb @@ -0,0 +1,14 @@ +# frozen_string_literal: true + +module Types + module Analytics + module Aggregation + class OrderType < BaseInputObject + graphql_name 'AggregationOrder' + + argument :direction, SortDirectionEnum, required: true, description: 'Sorting direction.' + argument :identifier, String, required: true, description: 'Dimension or metric identifier.' + end + end + end +end diff --git a/ee/app/graphql/types/concerns/aggregation_engine_helpers.rb b/ee/app/graphql/types/concerns/aggregation_engine_helpers.rb new file mode 100644 index 00000000000000..0cc64693554bea --- /dev/null +++ b/ee/app/graphql/types/concerns/aggregation_engine_helpers.rb @@ -0,0 +1,16 @@ +# frozen_string_literal: true + +module AggregationEngineHelpers # rubocop:disable Gitlab/BoundedContexts -- global helper + extend ActiveSupport::Concern + + class_methods do + def mount_aggregation_engine(engine, **kwargs, &block) + resolver = Resolvers::Analytics::Aggregation::EngineResolver.build(engine, **kwargs, &block) + response_type = Types::Analytics::Aggregation::EngineResponseType.build(engine, **kwargs) + + field_params = kwargs.except(:name).merge(resolver: resolver) + + field kwargs[:name], response_type.connection_type, **field_params # rubocop:disable Graphql/Descriptions -- description provided via kwargs + end + end +end diff --git a/lib/gitlab/database/aggregation/active_record/timestamp_column.rb b/lib/gitlab/database/aggregation/active_record/date_bucket_dimension.rb similarity index 97% rename from lib/gitlab/database/aggregation/active_record/timestamp_column.rb rename to lib/gitlab/database/aggregation/active_record/date_bucket_dimension.rb index 9e698f9a987038..93e1d4b5bb60d2 100644 --- a/lib/gitlab/database/aggregation/active_record/timestamp_column.rb +++ b/lib/gitlab/database/aggregation/active_record/date_bucket_dimension.rb @@ -4,7 +4,7 @@ module Gitlab module Database module Aggregation module ActiveRecord - class TimestampColumn < Column + class DateBucketDimension < Column include ParameterizedDefinition self.supported_parameters = %i[granularity] diff --git a/lib/gitlab/database/aggregation/active_record/engine.rb b/lib/gitlab/database/aggregation/active_record/engine.rb index 7c5cab1a3e67f9..5a8b87adc2aed8 100644 --- a/lib/gitlab/database/aggregation/active_record/engine.rb +++ b/lib/gitlab/database/aggregation/active_record/engine.rb @@ -20,7 +20,7 @@ def self.metrics_mapping def self.dimensions_mapping { column: Column, - timestamp_column: TimestampColumn + date_bucket: DateBucketDimension } end diff --git a/lib/gitlab/database/aggregation/click_house/date_bucket_dimension.rb b/lib/gitlab/database/aggregation/click_house/date_bucket_dimension.rb new file mode 100644 index 00000000000000..31f3703d763de4 --- /dev/null +++ b/lib/gitlab/database/aggregation/click_house/date_bucket_dimension.rb @@ -0,0 +1,61 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module Aggregation + module ClickHouse + class DateBucketDimension < Column + include ParameterizedDefinition + + self.supported_parameters = %i[granularity] + + GRANULARITIES_MAP = { + daily: :day, + weekly: :week, + monthly: :month, + yearly: :year + }.with_indifferent_access.freeze + + DEFAULT_GRANULARITY = :monthly + + def to_hash + super.merge(kind: :date_bucket) + end + + def validate_part(part) + validate_granularity(part) + end + + def to_outer_arel(context) + granularity = instance_parameter(:granularity, context[name]) || DEFAULT_GRANULARITY + granularity = GRANULARITIES_MAP[granularity] + + context[:scope].func('toStartOfInterval', [super, Arel.sql("INTERVAL 1 #{granularity}")]) + end + + # def to_arel(context) + # granularity = instance_parameter(:granularity, context[name]) || DEFAULT_GRANULARITY + + # quoted_string = context[:scope].model.connection.quote(GRANULARITIES_MAP[granularity]) + + # expr = expression ? expression.call : context[:scope].arel_table[name] + # Arel::Nodes::NamedFunction.new('date_trunc', [Arel.sql(quoted_string), expr]) + # end + + private + + def validate_granularity(part) + granularity = part.configuration[:granularity] + return unless granularity + return if granularity.in?(parameters.dig(:granularity, :in) || []) + + part.errors.add(:granularity, + format(s_("AggregationEngine|Unknown granularity \"%{granularity}\""), + granularity: granularity) + ) + end + end + end + end + end +end diff --git a/lib/gitlab/database/aggregation/click_house/engine.rb b/lib/gitlab/database/aggregation/click_house/engine.rb index df09db496efa3e..f0dcb3ec543f3f 100644 --- a/lib/gitlab/database/aggregation/click_house/engine.rb +++ b/lib/gitlab/database/aggregation/click_house/engine.rb @@ -14,7 +14,8 @@ class << self def dimensions_mapping { - column: Column + column: Column, + date_bucket: DateBucketDimension } end diff --git a/lib/gitlab/database/aggregation/click_house/rate.rb b/lib/gitlab/database/aggregation/click_house/rate.rb index e372aa92b44ded..7ec9371be66c83 100644 --- a/lib/gitlab/database/aggregation/click_house/rate.rb +++ b/lib/gitlab/database/aggregation/click_house/rate.rb @@ -6,8 +6,8 @@ module Aggregation module ClickHouse class Rate < Column # Uses regular expression as numerator condition and "if" expression as denominator condition - def initialize(name, type = :float, numerator_if:, denominator_if: nil) - super(name, type, numerator_if, if: denominator_if) + def initialize(name, type = :float, numerator_if:, denominator_if: nil, **kwargs) + super(name, type, numerator_if, if: denominator_if, **kwargs) end def identifier diff --git a/lib/gitlab/database/aggregation/definitions_collector.rb b/lib/gitlab/database/aggregation/definitions_collector.rb index d64c6f77f12404..3009217412e679 100644 --- a/lib/gitlab/database/aggregation/definitions_collector.rb +++ b/lib/gitlab/database/aggregation/definitions_collector.rb @@ -17,6 +17,10 @@ def collect(&definitions_block) private + def sql(...) + Arel.sql(...) + end + def method_missing(method_name, ...) return super unless @mapping[method_name] diff --git a/lib/gitlab/database/aggregation/graphql.rb b/lib/gitlab/database/aggregation/graphql.rb new file mode 100644 index 00000000000000..87738cfa7be3f8 --- /dev/null +++ b/lib/gitlab/database/aggregation/graphql.rb @@ -0,0 +1,10 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module Aggregation + module Graphql + end + end + end +end diff --git a/lib/gitlab/database/aggregation/graphql/adapter.rb b/lib/gitlab/database/aggregation/graphql/adapter.rb new file mode 100644 index 00000000000000..2324dd0ba09e32 --- /dev/null +++ b/lib/gitlab/database/aggregation/graphql/adapter.rb @@ -0,0 +1,50 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module Aggregation + module Graphql + module Adapter + class << self + def mounted_engine_name(name) + name.to_s.camelize(:lower) + end + + def each_filter_argument(filters) + filters.each do |filter| + filter_to_arguments(filter).each { |args| yield(*args) } + end + end + + def graphql_type(type) + case type.to_sym + when :string then ::GraphQL::Types::String + when :integer then ::GraphQL::Types::Int + when :boolean then ::GraphQL::Types::Boolean + when :float then ::GraphQL::Types::Float + when :date then ::Types::DateType + when :datetime then ::Types::TimeType + end + end + + private + + def filter_to_arguments(filter) + case filter + when ::Gitlab::Database::Aggregation::ClickHouse::ExactMatchFilter + [[filter.identifier, [graphql_type(filter.type)], { required: false, description: filter.description }]] + when ::Gitlab::Database::Aggregation::ClickHouse::RangeFilter + [[:"#{filter.identifier}_from", + graphql_type(filter.type), + { required: false, description: "#{filter.description}. Start of the range." }], + [:"#{filter.identifier}_to", + graphql_type(filter.type), + { required: false, description: "#{filter.description}. End of the range." }]] + end + end + end + end + end + end + end +end diff --git a/lib/gitlab/database/aggregation/query_plan/base_part.rb b/lib/gitlab/database/aggregation/query_plan/base_part.rb index 14b40ba966ea0d..a1745060b28a19 100644 --- a/lib/gitlab/database/aggregation/query_plan/base_part.rb +++ b/lib/gitlab/database/aggregation/query_plan/base_part.rb @@ -9,7 +9,7 @@ class BasePart attr_reader :definition, :configuration - delegate :name, :type, to: :definition + delegate :name, :type, :identifier, to: :definition validate :validate_definition_presence validate -> { definition&.validate_part(self) } diff --git a/spec/lib/gitlab/database/aggregation/active_record/engine_spec.rb b/spec/lib/gitlab/database/aggregation/active_record/engine_spec.rb index bfb9582f62ea83..bff71a8e594469 100644 --- a/spec/lib/gitlab/database/aggregation/active_record/engine_spec.rb +++ b/spec/lib/gitlab/database/aggregation/active_record/engine_spec.rb @@ -38,15 +38,15 @@ column :state_id, :integer, description: 'Integer representation of the existing merge request states' column :updated_by_id, :integer, description: 'User id value who last updated the the merge request' column :project_id, :integer, description: 'ID of the associated project' - timestamp_column :created_at, :timestamp, description: 'Bucketed creation timestamp (month)', + date_bucket :created_at, :timestamp, description: 'Bucketed creation timestamp (month)', parameters: { granularity: { type: :string, in: %w[monthly daily] } } - timestamp_column :merged_at, + date_bucket :merged_at, :timestamp, -> { MergeRequest::Metrics.arel_table[:merged_at] }, parameters: { granularity: { type: :string, in: %w[monthly weekly] } }, scope_proc: ->(scope, _ctx) { scope.joins(:metrics).where.not(merge_request_metrics: { merged_at: nil }) }, description: 'Bucketed merge timestamp (month or week).' - timestamp_column :closed_at, + date_bucket :closed_at, :timestamp, -> { MergeRequest::Metrics.arel_table[:latest_closed_at] }, parameters: { granularity: { type: :string, in: %w[monthly weekly] } }, @@ -187,7 +187,7 @@ end end - context 'when timestamp_column dimension returns null values' do + context 'when date_bucket dimension returns null values' do it 'groups null values together' do request = Gitlab::Database::Aggregation::Request.new( dimensions: [{ identifier: :closed_at, parameters: { granularity: 'monthly' } }], diff --git a/spec/lib/gitlab/database/aggregation/click_house/engine_spec.rb b/spec/lib/gitlab/database/aggregation/click_house/engine_spec.rb index b0b116f794f2b5..85f09ccc7b1bcf 100644 --- a/spec/lib/gitlab/database/aggregation/click_house/engine_spec.rb +++ b/spec/lib/gitlab/database/aggregation/click_house/engine_spec.rb @@ -12,14 +12,14 @@ filters do exact_match :user_id, :integer - range :created_event_at, :datetime, -> { Arel.sql('anyIfMerge(created_event_at)') }, merge_column: true + range :created_event_at, :datetime, -> { sql('anyIfMerge(created_event_at)') }, merge_column: true end dimensions do column :user_id, :integer column :flow_type, :string column :duration, :integer, -> { - Arel.sql("dateDiff('seconds', anyIfMerge(created_event_at), anyIfMerge(finished_event_at))") + sql("dateDiff('seconds', anyIfMerge(created_event_at), anyIfMerge(finished_event_at))") } column :environment, :string, nil, formatter: ->(v) { v.upcase } end @@ -27,10 +27,10 @@ metrics do count mean :duration, :float, -> { - Arel.sql("dateDiff('seconds', anyIfMerge(created_event_at), anyIfMerge(finished_event_at))") + sql("dateDiff('seconds', anyIfMerge(created_event_at), anyIfMerge(finished_event_at))") } quantile :duration, :float, - -> { Arel.sql("dateDiff('seconds', anyIfMerge(created_event_at), anyIfMerge(finished_event_at))") }, + -> { sql("dateDiff('seconds', anyIfMerge(created_event_at), anyIfMerge(finished_event_at))") }, parameters: { quantile: { type: :float } } count :with_format, :integer, nil, formatter: ->(v) { v * -1 } end -- GitLab From 430cfda38ffbece89e6e7d5185f4aace5ed312e9 Mon Sep 17 00:00:00 2001 From: Pavel Shutsin Date: Fri, 12 Dec 2025 15:17:05 +0100 Subject: [PATCH 4/5] Example engine for testing --- doc/api/graphql/reference/_index.md | 113 ++++++++++++++++++ ee/app/graphql/ee/types/group_type.rb | 7 ++ .../types/analytics/ai_usage/ai_usage_type.rb | 23 ++++ .../agent_platform_sessions.rb | 63 ++++++++++ .../fixtures/development/94_ai_usage_stats.rb | 16 +-- 5 files changed, 214 insertions(+), 8 deletions(-) create mode 100644 ee/app/graphql/types/analytics/ai_usage/ai_usage_type.rb create mode 100644 ee/app/models/analytics/aggregation_engines/agent_platform_sessions.rb diff --git a/doc/api/graphql/reference/_index.md b/doc/api/graphql/reference/_index.md index d49625a1e2206d..63ae7a0ce61348 100644 --- a/doc/api/graphql/reference/_index.md +++ b/doc/api/graphql/reference/_index.md @@ -24311,6 +24311,29 @@ The edge type for [`WorkspaceVariable`](#workspacevariable). | `cursor` | [`String!`](#string) | A cursor for use in pagination. | | `node` | [`WorkspaceVariable`](#workspacevariable) | The item at the end of the edge. | +#### `agentPlatformSessionsAggregationResponseConnection` + +The connection type for [`agentPlatformSessionsAggregationResponse`](#agentplatformsessionsaggregationresponse). + +##### Fields + +| Name | Type | Description | +| ---- | ---- | ----------- | +| `edges` | [`[agentPlatformSessionsAggregationResponseEdge]`](#agentplatformsessionsaggregationresponseedge) | A list of edges. | +| `nodes` | [`[agentPlatformSessionsAggregationResponse]`](#agentplatformsessionsaggregationresponse) | A list of nodes. | +| `pageInfo` | [`PageInfo!`](#pageinfo) | Information to aid in pagination. | + +#### `agentPlatformSessionsAggregationResponseEdge` + +The edge type for [`agentPlatformSessionsAggregationResponse`](#agentplatformsessionsaggregationresponse). + +##### Fields + +| Name | Type | Description | +| ---- | ---- | ----------- | +| `cursor` | [`String!`](#string) | A cursor for use in pagination. | +| `node` | [`agentPlatformSessionsAggregationResponse`](#agentplatformsessionsaggregationresponse) | The item at the end of the edge. | + ## Object types Object types represent the resources that the GitLab GraphQL API can return. @@ -25624,6 +25647,32 @@ Self-hosted LLM servers. | `releaseState` | [`AiSelfHostedModelReleaseState!`](#aiselfhostedmodelreleasestate) | GitLab release status of the model. | | `updatedAt` | [`Time`](#time) | Timestamp of last update. | +### `AiUsage` + +Usage metrics. Not for production use yet. + +#### Fields with arguments + +##### `AiUsage.agentPlatformSessions` + +Usage of GitLab agent platform sessions. + +Returns [`agentPlatformSessionsAggregationResponseConnection`](#agentplatformsessionsaggregationresponseconnection). + +This field returns a [connection](#connections). It accepts the +four standard [pagination arguments](#pagination-arguments): +`before: String`, `after: String`, `first: Int`, and `last: Int`. + +###### Arguments + +| Name | Type | Description | +| ---- | ---- | ----------- | +| `createdEventAtFrom` | [`Time`](#time) | Filter by session creation timestamp. Start of the range. | +| `createdEventAtTo` | [`Time`](#time) | Filter by session creation timestamp. End of the range. | +| `flowType` | [`[String!]`](#string) | Filter by one or many flow types. | +| `orderBy` | [`[AggregationOrder!]`](#aggregationorder) | Sorting order for the aggregated data. | +| `userId` | [`[Int!]`](#int) | Filter by one or many user ids. | + ### `AiUsageData` Usage data for events stored in either PostgreSQL (default) or ClickHouse (when configured). Data retention: three months in PostgreSQL, indefinite in ClickHouse. Requires a personal access token. Works only on top-level groups. Premium and Ultimate only. @@ -32946,6 +32995,7 @@ GPG signature for a signed commit. | `additionalPurchasedStorageSize` | [`Float`](#float) | Additional storage purchased for the root namespace in bytes. | | `adminEditPath` | [`String`](#string) | Admin path for editing group. Only available to admins. | | `adminShowPath` | [`String`](#string) | Admin path of the group. Only available to admins. | +| `aiUsage` {{< icon name="warning-solid" >}} | [`AiUsage`](#aiusage) | **Introduced** in GitLab 18.8. **Status**: Experiment. AI-related data 2.0. | | `aiUsageData` {{< icon name="warning-solid" >}} | [`AiUsageData`](#aiusagedata) | **Introduced** in GitLab 17.5. **Status**: Experiment. AI-related data. | | `allowStaleRunnerPruning` | [`Boolean!`](#boolean) | Indicates whether to regularly prune stale group runners. Defaults to false. | | `amazonS3Configurations` | [`AmazonS3ConfigurationTypeConnection`](#amazons3configurationtypeconnection) | Amazon S3 configurations that receive audit events belonging to the group. (see [Connections](#connections)) | @@ -49952,6 +50002,60 @@ Requires ClickHouse. Premium and Ultimate only. | ---- | ---- | ----------- | | `flowMetrics` | [`[AgentPlatformFlowMetric!]`](#agentplatformflowmetric) | Aggregated flow metrics for agent platform. | +### `agentPlatformSessionsAggregationResponse` + +Response for agentPlatformSessions aggregation engine. + +#### Fields + +| Name | Type | Description | +| ---- | ---- | ----------- | +| `completionRate` | [`Float`](#float) | Session completion rate. | +| `dimensions` | [`agentPlatformSessionsAggregationResponseDimensions`](#agentplatformsessionsaggregationresponsedimensions) | Aggregation dimensions. Every selected dimension will be used for aggregation. | +| `finishedCount` | [`Int`](#int) | Number of finished sessions. | +| `meanDuration` | [`Float`](#float) | Average session duration in seconds. | +| `totalCount` | [`Int`](#int) | Total number of sessions. | +| `usersCount` | [`Int`](#int) | Number of unique users. | + +#### Fields with arguments + +##### `agentPlatformSessionsAggregationResponse.durationQuantile` + +Session duration quantile in seconds. + +Returns [`Float`](#float). + +###### Arguments + +| Name | Type | Description | +| ---- | ---- | ----------- | +| `quantile` | [`Float`](#float) | | + +### `agentPlatformSessionsAggregationResponseDimensions` + +Response dimensions for agentPlatformSessions aggregation engine. + +#### Fields + +| Name | Type | Description | +| ---- | ---- | ----------- | +| `flowType` | [`String`](#string) | Type of session. | +| `userId` | [`Int`](#int) | User ID. | + +#### Fields with arguments + +##### `agentPlatformSessionsAggregationResponseDimensions.createdEventAt` + +Session creation time. + +Returns [`Date`](#date). + +###### Arguments + +| Name | Type | Description | +| ---- | ---- | ----------- | +| `granularity` | [`String`](#string) | | + ### `agentPlatformUserMetrics` Agent Platform user metrics for a user. Requires ClickHouse. Premium and Ultimate with GitLab Duo Enterprise only. @@ -57800,6 +57904,15 @@ be used as arguments). Only general use input types are listed here. For mutation input types, see the associated mutation type above. +### `AggregationOrder` + +#### Arguments + +| Name | Type | Description | +| ---- | ---- | ----------- | +| `direction` | [`SortDirectionEnum!`](#sortdirectionenum) | Sorting direction. | +| `identifier` | [`String!`](#string) | Dimension or metric identifier. | + ### `AiAdditionalContextInput` #### Arguments diff --git a/ee/app/graphql/ee/types/group_type.rb b/ee/app/graphql/ee/types/group_type.rb index 0fdd7899faa787..7b5b9f02f77cec 100644 --- a/ee/app/graphql/ee/types/group_type.rb +++ b/ee/app/graphql/ee/types/group_type.rb @@ -231,6 +231,13 @@ module GroupType resolver: ::Resolvers::Analytics::AiMetrics::UserMetricsResolver, experiment: { milestone: '17.5' } + field :ai_usage, + ::Types::Analytics::AiUsage::AiUsageType, + description: 'AI-related data 2.0.', + method: :itself, + authorize: :read_enterprise_ai_analytics, + experiment: { milestone: '18.8' } + field :project_compliance_standards_adherence, ::Types::Projects::ComplianceStandards::AdherenceType.connection_type, null: true, diff --git a/ee/app/graphql/types/analytics/ai_usage/ai_usage_type.rb b/ee/app/graphql/types/analytics/ai_usage/ai_usage_type.rb new file mode 100644 index 00000000000000..4f1efeb54fec7d --- /dev/null +++ b/ee/app/graphql/types/analytics/ai_usage/ai_usage_type.rb @@ -0,0 +1,23 @@ +# frozen_string_literal: true + +module Types + module Analytics + module AiUsage + class AiUsageType < BaseObject + graphql_name 'AiUsage' + description "Usage metrics. Not for production use yet." + authorize :read_pro_ai_analytics + + include AggregationEngineHelpers + + mount_aggregation_engine ::Analytics::AggregationEngines::AgentPlatformSessions, + name: :agent_platform_sessions, + description: 'Usage of GitLab agent platform sessions' do + define_method(:aggregation_scope) do + ::Analytics::AggregationEngines::AgentPlatformSessions.prepare_base_aggregation_scope(object) + end + end + end + end + end +end diff --git a/ee/app/models/analytics/aggregation_engines/agent_platform_sessions.rb b/ee/app/models/analytics/aggregation_engines/agent_platform_sessions.rb new file mode 100644 index 00000000000000..f6737e89cc0ff3 --- /dev/null +++ b/ee/app/models/analytics/aggregation_engines/agent_platform_sessions.rb @@ -0,0 +1,63 @@ +# frozen_string_literal: true + +module Analytics + module AggregationEngines + class AgentPlatformSessions < Gitlab::Database::Aggregation::ClickHouse::Engine + self.table_name = 'agent_platform_sessions' + self.table_primary_key = %w[user_id namespace_path session_id flow_type] + + dimensions do + column :flow_type, :string, description: 'Type of session' + column :user_id, :integer, description: 'User ID' + date_bucket :created_event_at, :date, -> { + sql('anyIfMerge(created_event_at)') + }, description: 'Session creation time', parameters: { + granularity: { type: :string, in: %w[weekly monthly] } + } + end + + metrics do + count description: 'Total number of sessions' + count :finished, if: -> { + sql('anyIfMerge(finished_event_at) IS NOT NULL') + }, description: 'Number of finished sessions' + count :users, :integer, -> { sql('user_id') }, distinct: true, description: 'Number of unique users' + + mean :duration, :float, -> { + sql("dateDiff('seconds', anyIfMerge(created_event_at), anyIfMerge(finished_event_at))") + }, description: 'Average session duration in seconds' + + rate :completion, numerator_if: -> { + sql('anyIfMerge(finished_event_at) IS NOT NULL') + }, description: 'Session completion rate' + + quantile :duration, :float, -> { + sql("dateDiff('seconds', anyIfMerge(created_event_at), anyIfMerge(finished_event_at))") + }, description: 'Session duration quantile in seconds', parameters: { + quantile: { type: :float, in: 0.0..1.0 } + } + end + + filters do + exact_match :user_id, :integer, description: 'Filter by one or many user ids' + exact_match :flow_type, :string, description: 'Filter by one or many flow types' + range :created_event_at, :datetime, -> { sql('anyIfMerge(created_event_at)') }, + merge_column: true, + description: 'Filter by session creation timestamp' + end + + def self.prepare_base_aggregation_scope(object) + namespace = case object + when Project then object.project_namespace + else object + end + + builder = ClickHouse::Client::QueryBuilder.new(table_name) + + builder.where(builder.func('startsWith', [ + builder[:namespace_path], Arel::Nodes.build_quoted(namespace.traversal_path.to_s) + ])) + end + end + end +end diff --git a/ee/db/fixtures/development/94_ai_usage_stats.rb b/ee/db/fixtures/development/94_ai_usage_stats.rb index 010153b22c13ab..87fd84cc5c7b1c 100644 --- a/ee/db/fixtures/development/94_ai_usage_stats.rb +++ b/ee/db/fixtures/development/94_ai_usage_stats.rb @@ -32,11 +32,9 @@ def self.sync_to_click_house end def self.sync_to_postgres - Sidekiq::Testing.inline! do - ::UsageEvents::DumpWriteBufferCronWorker.new.perform - ::Analytics::AiAnalytics::EventsCountAggregationWorker.new.perform - ::Analytics::DumpAiUserMetricsWriteBufferCronWorker.new.perform - end + ::UsageEvents::DumpWriteBufferCronWorker.new.perform + ::Analytics::AiAnalytics::EventsCountAggregationWorker.new.perform + ::Analytics::DumpAiUserMetricsWriteBufferCronWorker.new.perform end def initialize(project) @@ -184,10 +182,12 @@ def generate_mcp_extras project = Project.includes(:builds, :users).find_by(id: ENV['PROJECT_ID']) project ||= Project.first - Gitlab::Seeder::AiUsageStats.new(project).seed! + Sidekiq::Testing.inline! do + Gitlab::Seeder::AiUsageStats.new(project).seed! - Gitlab::Seeder::AiUsageStats.sync_to_postgres - Gitlab::Seeder::AiUsageStats.sync_to_click_house + Gitlab::Seeder::AiUsageStats.sync_to_postgres + Gitlab::Seeder::AiUsageStats.sync_to_click_house + end puts "Successfully seeded '#{project.full_path}' for Ai Analytics!" puts "URL: #{Rails.application.routes.url_helpers.project_url(project)}" -- GitLab From 68fbf48a98eefe37f5197165c44f26d699ef6bd6 Mon Sep 17 00:00:00 2001 From: Adam Hegyi Date: Mon, 15 Dec 2025 20:22:49 +0100 Subject: [PATCH 5/5] Aggregation engine pagination support This change adds GraphQL offset pagination support for aggregation engine results. --- .../analytics/aggregation/engine_resolver.rb | 2 +- .../aggregation/aggregation_result.rb | 8 +- .../click_house/aggregation_result.rb | 16 ++ .../graphql_aggregation_connection.rb | 92 ++++++++++ lib/gitlab/graphql/pagination/connections.rb | 5 + .../graphql_aggregation_connection_spec.rb | 157 ++++++++++++++++++ 6 files changed, 278 insertions(+), 2 deletions(-) create mode 100644 lib/gitlab/database/aggregation/graphql_aggregation_connection.rb create mode 100644 spec/lib/gitlab/database/aggregation/graphql_aggregation_connection_spec.rb diff --git a/app/graphql/resolvers/analytics/aggregation/engine_resolver.rb b/app/graphql/resolvers/analytics/aggregation/engine_resolver.rb index 2b82646ebde62e..4d2be89e9cc468 100644 --- a/app/graphql/resolvers/analytics/aggregation/engine_resolver.rb +++ b/app/graphql/resolvers/analytics/aggregation/engine_resolver.rb @@ -42,7 +42,7 @@ def resolve_with_lookahead(**arguments) response = engine_class.new(context: { scope: aggregation_scope }).execute(request) if response.success? - response.payload[:data].to_a # TODO: remove `.to_a` when pagination connection is implemented + response.payload[:data] else # TODO: handle errors { diff --git a/lib/gitlab/database/aggregation/aggregation_result.rb b/lib/gitlab/database/aggregation/aggregation_result.rb index 85fd9574281349..a5d1bbaed3e138 100644 --- a/lib/gitlab/database/aggregation/aggregation_result.rb +++ b/lib/gitlab/database/aggregation/aggregation_result.rb @@ -12,7 +12,13 @@ def initialize(engine, plan, query) @query = query end - # TODO: add interface for paginating + def limit(_limit_value) + raise NoMethodError + end + + def offset(_offset_value) + raise NoMethodError + end delegate :to_a, :[], :each, to: :loaded_results diff --git a/lib/gitlab/database/aggregation/click_house/aggregation_result.rb b/lib/gitlab/database/aggregation/click_house/aggregation_result.rb index c6775c65d066da..c455dcedfd212a 100644 --- a/lib/gitlab/database/aggregation/click_house/aggregation_result.rb +++ b/lib/gitlab/database/aggregation/click_house/aggregation_result.rb @@ -5,6 +5,22 @@ module Database module Aggregation module ClickHouse class AggregationResult < Gitlab::Database::Aggregation::AggregationResult + def limit(limit_value) + self.class.new( + engine, + plan, + query.limit(limit_value) + ) + end + + def offset(offset_value) + self.class.new( + engine, + plan, + query.offset(offset_value) + ) + end + private def load_data diff --git a/lib/gitlab/database/aggregation/graphql_aggregation_connection.rb b/lib/gitlab/database/aggregation/graphql_aggregation_connection.rb new file mode 100644 index 00000000000000..49ad5802188021 --- /dev/null +++ b/lib/gitlab/database/aggregation/graphql_aggregation_connection.rb @@ -0,0 +1,92 @@ +# frozen_string_literal: true + +module Gitlab + module Database + # -- requires AR methods to build the keyset pagination conditions + module Aggregation + class GraphqlAggregationConnection < GraphQL::Pagination::Connection + # rubocop: disable Naming/PredicateName -- these methods are part of the GraphQL pagination API + def has_next_page + load_nodes + @has_next_page + end + + def has_previous_page + load_nodes + # If we have skipped any records (offset > 0), a previous page exists. + @final_offset > 0 + end + # rubocop: enable Naming/PredicateName + + def cursor_for(node) + load_nodes + # The cursor is the absolute index: Base Offset + Index in current batch + # We use object identity (index) to find the node in the current batch. + batch_index = nodes.index(node) + + raise GraphQL::ExecutionError, "Node not found in current batch" unless batch_index + + encode((@final_offset + batch_index).to_s) + end + + def nodes + load_nodes + @nodes + end + + private + + def load_nodes + # Memoize so we don't run the query multiple times for pageInfo + nodes + return if defined?(@nodes) + + # We block 'last' if 'before' is missing. This prevents us from + # needing to know the Total Count of the underlying query. + if last.present? && before.blank? + raise GraphQL::ExecutionError, "Argument 'last' can only be used in conjunction with 'before'." + end + + current_first = first || (last.blank? ? limit_value : nil) + + start_index = after ? decode_cursor(after) + 1 : 0 + end_index = before ? decode_cursor(before) : nil + + if end_index + end_index = [end_index, start_index + current_first].min if current_first + + start_index = [start_index, end_index - last].max if last + + @final_offset = start_index + final_limit = [0, end_index - start_index].max + else + @final_offset = start_index + final_limit = current_first + end + + paginated_items = @items.limit(final_limit + 1).offset(@final_offset).to_a + + if paginated_items.size > final_limit + @has_next_page = true + # Remove the last element as it's only used for determining the next page + paginated_items.pop + else + @has_next_page = false + end + + @nodes = paginated_items + end + + def limit_value + @limit_value ||= [first, last, max_page_size || GitlabSchema.default_max_page_size].compact.min + end + + def decode_cursor(cursor) + value = Integer(decode(cursor)) + raise GraphQL::ExecutionError, "Invalid cursor provided" if value < 0 + + value + end + end + end + end +end diff --git a/lib/gitlab/graphql/pagination/connections.rb b/lib/gitlab/graphql/pagination/connections.rb index 073853b6d5d468..b654cac56ebab3 100644 --- a/lib/gitlab/graphql/pagination/connections.rb +++ b/lib/gitlab/graphql/pagination/connections.rb @@ -30,6 +30,11 @@ def self.use(schema) Gitlab::Graphql::Pagination::ClickHouseConnection ) + schema.connections.add( + Gitlab::Database::Aggregation::AggregationResult, + Gitlab::Database::Aggregation::GraphqlAggregationConnection + ) + schema.connections.add( Gitlab::Graphql::Pagination::ClickHouseAggregatedRelation, Gitlab::Graphql::Pagination::ClickHouseAggregatedConnection diff --git a/spec/lib/gitlab/database/aggregation/graphql_aggregation_connection_spec.rb b/spec/lib/gitlab/database/aggregation/graphql_aggregation_connection_spec.rb new file mode 100644 index 00000000000000..9729a41a88aa0a --- /dev/null +++ b/spec/lib/gitlab/database/aggregation/graphql_aggregation_connection_spec.rb @@ -0,0 +1,157 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::Database::Aggregation::GraphqlAggregationConnection, :click_house, feature_category: :database do + include GraphqlHelpers + include ClickHouseHelpers + + let_it_be(:user1) { create(:user) } + let_it_be(:user2) { create(:user) } + let_it_be(:user3) { create(:user) } + let_it_be(:event1) { create(:event, author: user1) } + let_it_be(:event2) { create(:event, author: user1) } + let_it_be(:event3) { create(:event, author: user2) } + let_it_be(:event4) { create(:event, author: user3) } + let_it_be(:event5) { create(:event, author: user3) } + let_it_be(:event6) { create(:event, author: user3) } + + let(:arguments) { {} } + let(:ctx) { GraphQL::Query::Context.new(query: query_double, values: nil) } + + let(:engine) { engine_definition.new(context: { scope: query_builder }) } + let(:query_builder) { query } + + let(:aggregation_request) do + Gitlab::Database::Aggregation::Request.new( + dimensions: [{ identifier: :author_id }], + metrics: [{ identifier: :total_count }], + order: [{ identifier: :total_count, direction: :desc }] + ) + end + + let(:nodes) do + engine.execute(aggregation_request).payload[:data] + end + + subject(:connection) do + described_class.new(nodes, **{ context: ctx, max_page_size: 10 }.merge(arguments)) + end + + context 'when using ClickHouse aggregation engine' do + let(:query) { ClickHouse::Client::QueryBuilder.new('events') } + + let(:engine_definition) do + Gitlab::Database::Aggregation::ClickHouse::Engine.build do + self.table_primary_key = %w[id] + + dimensions do + column :author_id, :integer + end + + metrics do + count + end + end + end + + before do + insert_events_into_click_house + end + + describe 'forward pagination' do + context 'when requesting the first page' do + let(:arguments) { { first: 2 } } + + it 'returns the top 2 records' do + expect(connection.nodes).to match([ + a_hash_including("author_id" => user3.id, "total_count" => 3), + a_hash_including("author_id" => user1.id, "total_count" => 2) + ]) + end + + it 'sets correct page_info' do + expect(connection.has_next_page).to be true + expect(connection.has_previous_page).to be false + end + + it 'generates correct cursors' do + node_1 = connection.nodes.first + expect(connection.cursor_for(node_1)).to eq(encode_cursor(0)) + end + end + + context 'when paginating with "after"' do + let(:arguments) { { first: 2, after: encode_cursor(0) } } + + it 'returns the next records' do + expect(connection.nodes).to match([ + a_hash_including("author_id" => user1.id, "total_count" => 2), + a_hash_including("author_id" => user2.id, "total_count" => 1) + ]) + end + + it 'sets correct page_info' do + expect(connection.has_next_page).to be false + expect(connection.has_previous_page).to be true + end + end + + context 'when "after" is the last item' do + let(:arguments) { { first: 2, after: encode_cursor(2) } } + + it 'returns empty list' do + expect(connection.nodes).to be_empty + expect(connection.has_next_page).to be false + expect(connection.has_previous_page).to be true + end + end + end + + describe 'backward pagination' do + context 'when using last and before' do + let(:arguments) { { last: 1, before: encode_cursor(2) } } + + it 'returns the preceding record' do + expect(connection.nodes).to match([ + a_hash_including("author_id" => user1.id) + ]) + end + + it 'sets correct page_info' do + expect(connection.has_next_page).to be true + expect(connection.has_previous_page).to be true + end + end + + context 'when "last" is larger than available "before" records' do + let(:arguments) { { last: 10, before: encode_cursor(2) } } + + it 'returns all preceding records up to the start' do + expect(connection.nodes).to match([ + a_hash_including("author_id" => user3.id), + a_hash_including("author_id" => user1.id) + ]) + end + + it 'indicates no previous page' do + expect(connection.has_previous_page).to be false + expect(connection.has_next_page).to be true + end + end + + context 'when "last" is provided without "before"' do + let(:arguments) { { last: 2 } } + + it 'raises an execution error' do + expect { connection.nodes } + .to raise_error(GraphQL::ExecutionError, /Argument 'last' can only be used in conjunction with 'before'/) + end + end + end + end + + def encode_cursor(value) + GitlabSchema.cursor_encoder.encode(value.to_s, nonce: true) + end +end -- GitLab