diff --git a/Gemfile b/Gemfile
index d82fe047e8de914ffcd50497ee37730321ecb485..cff3072484e87098e49b10dfdf62dbb1b0017531 100644
--- a/Gemfile
+++ b/Gemfile
@@ -378,7 +378,7 @@ gem 'rack-proxy', '~> 0.7.7', feature_category: :shared # rubocop:todo Gemfile/M
gem 'cssbundling-rails', '1.4.3', feature_category: :shared # rubocop:todo Gemfile/MissingFeatureCategory -- https://gitlab.com/gitlab-org/gitlab/-/issues/581839
gem 'terser', '1.0.2', feature_category: :shared # rubocop:todo Gemfile/MissingFeatureCategory -- https://gitlab.com/gitlab-org/gitlab/-/issues/581839
-gem 'click_house-client', '0.8.2', feature_category: :database
+gem 'click_house-client', '0.8.5', feature_category: :database
gem 'addressable', '~> 2.8', feature_category: :shared # rubocop:todo Gemfile/MissingFeatureCategory -- https://gitlab.com/gitlab-org/gitlab/-/issues/581839
gem 'gon', '~> 6.5.0', feature_category: :shared # rubocop:todo Gemfile/MissingFeatureCategory -- https://gitlab.com/gitlab-org/gitlab/-/issues/581839
gem 'request_store', '~> 1.7.0', feature_category: :shared # rubocop:todo Gemfile/MissingFeatureCategory -- https://gitlab.com/gitlab-org/gitlab/-/issues/581839
diff --git a/Gemfile.checksum b/Gemfile.checksum
index bbeab09596a8c150df5c41bf1c75bd95895094b1..71e23f7a73503b7e3885b38a73cf4956f8ffb396 100644
--- a/Gemfile.checksum
+++ b/Gemfile.checksum
@@ -82,7 +82,7 @@
{"name":"citrus","version":"3.0.2","platform":"ruby","checksum":"4ec2412fc389ad186735f4baee1460f7900a8e130ffe3f216b30d4f9c684f650"},
{"name":"claide","version":"1.1.0","platform":"ruby","checksum":"6d3c5c089dde904d96aa30e73306d0d4bd444b1accb9b3125ce14a3c0183f82e"},
{"name":"claide-plugins","version":"0.9.2","platform":"ruby","checksum":"c7ea78bc067ab23bce8515497cdcdcb8f01c86dadfbe13c44644e382922c1c2e"},
-{"name":"click_house-client","version":"0.8.2","platform":"ruby","checksum":"49e25617a1bf11eb51045191569bcae000773dc58b49e8bffdbde2304ed565cc"},
+{"name":"click_house-client","version":"0.8.5","platform":"ruby","checksum":"aadf5f2437f287efdbf1feae576d99a0dbcaeb888deb4af094b27295fb342a3f"},
{"name":"coderay","version":"1.1.3","platform":"ruby","checksum":"dc530018a4684512f8f38143cd2a096c9f02a1fc2459edcfe534787a7fc77d4b"},
{"name":"coercible","version":"1.0.0","platform":"ruby","checksum":"5081ad24352cc8435ce5472bc2faa30260c7ea7f2102cc6a9f167c4d9bffaadc"},
{"name":"colored2","version":"3.1.2","platform":"ruby","checksum":"b13c2bd7eeae2cf7356a62501d398e72fde78780bd26aec6a979578293c28b4a"},
diff --git a/Gemfile.lock b/Gemfile.lock
index 09ab3af1dd457311e4db7483691c21f69047d0d4..46273c3e5b4fffe7f6a812bb348ccab54e476968 100644
--- a/Gemfile.lock
+++ b/Gemfile.lock
@@ -434,7 +434,7 @@ GEM
cork
nap
open4 (~> 1.3)
- click_house-client (0.8.2)
+ click_house-client (0.8.5)
activerecord (>= 7.0, < 9.0)
activesupport (>= 7.0, < 9.0)
addressable (~> 2.8)
@@ -2122,7 +2122,7 @@ DEPENDENCIES
carrierwave (~> 1.3)
charlock_holmes (~> 0.7.9)
circuitbox (= 2.0.0)
- click_house-client (= 0.8.2)
+ click_house-client (= 0.8.5)
commonmarker (~> 0.23.10)
concurrent-ruby (~> 1.1)
connection_pool (~> 2.5.3)
diff --git a/Gemfile.next.checksum b/Gemfile.next.checksum
index bbeab09596a8c150df5c41bf1c75bd95895094b1..71e23f7a73503b7e3885b38a73cf4956f8ffb396 100644
--- a/Gemfile.next.checksum
+++ b/Gemfile.next.checksum
@@ -82,7 +82,7 @@
{"name":"citrus","version":"3.0.2","platform":"ruby","checksum":"4ec2412fc389ad186735f4baee1460f7900a8e130ffe3f216b30d4f9c684f650"},
{"name":"claide","version":"1.1.0","platform":"ruby","checksum":"6d3c5c089dde904d96aa30e73306d0d4bd444b1accb9b3125ce14a3c0183f82e"},
{"name":"claide-plugins","version":"0.9.2","platform":"ruby","checksum":"c7ea78bc067ab23bce8515497cdcdcb8f01c86dadfbe13c44644e382922c1c2e"},
-{"name":"click_house-client","version":"0.8.2","platform":"ruby","checksum":"49e25617a1bf11eb51045191569bcae000773dc58b49e8bffdbde2304ed565cc"},
+{"name":"click_house-client","version":"0.8.5","platform":"ruby","checksum":"aadf5f2437f287efdbf1feae576d99a0dbcaeb888deb4af094b27295fb342a3f"},
{"name":"coderay","version":"1.1.3","platform":"ruby","checksum":"dc530018a4684512f8f38143cd2a096c9f02a1fc2459edcfe534787a7fc77d4b"},
{"name":"coercible","version":"1.0.0","platform":"ruby","checksum":"5081ad24352cc8435ce5472bc2faa30260c7ea7f2102cc6a9f167c4d9bffaadc"},
{"name":"colored2","version":"3.1.2","platform":"ruby","checksum":"b13c2bd7eeae2cf7356a62501d398e72fde78780bd26aec6a979578293c28b4a"},
diff --git a/Gemfile.next.lock b/Gemfile.next.lock
index 09ab3af1dd457311e4db7483691c21f69047d0d4..46273c3e5b4fffe7f6a812bb348ccab54e476968 100644
--- a/Gemfile.next.lock
+++ b/Gemfile.next.lock
@@ -434,7 +434,7 @@ GEM
cork
nap
open4 (~> 1.3)
- click_house-client (0.8.2)
+ click_house-client (0.8.5)
activerecord (>= 7.0, < 9.0)
activesupport (>= 7.0, < 9.0)
addressable (~> 2.8)
@@ -2122,7 +2122,7 @@ DEPENDENCIES
carrierwave (~> 1.3)
charlock_holmes (~> 0.7.9)
circuitbox (= 2.0.0)
- click_house-client (= 0.8.2)
+ click_house-client (= 0.8.5)
commonmarker (~> 0.23.10)
concurrent-ruby (~> 1.1)
connection_pool (~> 2.5.3)
diff --git a/app/graphql/resolvers/analytics/aggregation/engine_resolver.rb b/app/graphql/resolvers/analytics/aggregation/engine_resolver.rb
new file mode 100644
index 0000000000000000000000000000000000000000..4d2be89e9cc468b6d716712fa5880118f64c1a5a
--- /dev/null
+++ b/app/graphql/resolvers/analytics/aggregation/engine_resolver.rb
@@ -0,0 +1,97 @@
+# frozen_string_literal: true
+
+module Resolvers
+ module Analytics
+ module Aggregation
+ class EngineResolver < BaseResolver # rubocop:disable Graphql/ResolverType -- type declared in subclasses
+ class << self
+ attr_accessor :engine
+
+ def build(engine, **graphql_context, &block)
+ klass = Class.new(self)
+ klass.engine = engine
+ klass.class_eval do
+ type Types::Analytics::Aggregation::EngineResponseType.build(engine, **graphql_context), null: true
+
+ declare_filters
+ end
+ klass.class_eval(&block) if block
+
+ klass
+ end
+
+ private
+
+ def declare_filters
+ adapter = ::Gitlab::Database::Aggregation::Graphql::Adapter
+ adapter.each_filter_argument(engine.filters) do |name, type, kwargs|
+ argument(name, type, **kwargs) # rubocop:disable Graphql/Descriptions -- defined in adapter
+ end
+ end
+ end
+
+ include LooksAhead
+
+ argument :order_by,
+ [Types::Analytics::Aggregation::OrderType],
+ required: false,
+ description: 'Sorting order for the aggregated data.'
+
+ def resolve_with_lookahead(**arguments)
+ request = build_aggregation_request(arguments)
+ response = engine_class.new(context: { scope: aggregation_scope }).execute(request)
+
+ if response.success?
+ response.payload[:data]
+ else
+ # TODO: handle errors
+ {
+ errors: errors.map { |field, messages| { field: field, messages: messages } }
+ }
+ end
+ end
+
+ private
+
+ def engine_class
+ self.class.engine
+ end
+
+ def aggregation_scope
+ raise NoMethodError # must be overloaded in dynamic class definition
+ end
+
+ def build_aggregation_request(arguments)
+ selections = lookahead.selections.first.selections
+ dimensions_selection = selections.detect { |s| s.name == :dimensions }
+ dimensions = if dimensions_selection
+ dimensions_selection.selections.map do |s|
+ { identifier: s.name.to_sym, parameters: s.arguments }
+ end
+ else
+ []
+ end
+ # prepare order
+ order = arguments.delete(:order_by).map do |o|
+ o.to_hash.tap do |ord|
+ ord[:identifier] = ord[:identifier].to_sym
+ ord[:parameters] ||= {}
+ end
+ end
+ # prepare filters: arguments - orderBy = filters
+ filters = arguments.map { |key, values| { identifier: key.to_sym, values: values } }
+
+ # selections - dimensions = metrics
+ metric_selections = selections.reject { |s| s.name == :dimensions }
+ metrics = metric_selections.map do |selection|
+ { identifier: selection.name.to_sym, parameters: selection.arguments }
+ end
+
+ ::Gitlab::Database::Aggregation::Request.new(
+ filters: filters, dimensions: dimensions, metrics: metrics, order: order
+ )
+ end
+ end
+ end
+ end
+end
diff --git a/app/graphql/types/analytics/aggregation/engine_response_dimensions_type.rb b/app/graphql/types/analytics/aggregation/engine_response_dimensions_type.rb
new file mode 100644
index 0000000000000000000000000000000000000000..86139fd7d187ccf046e609d02e88d78e3bf9767b
--- /dev/null
+++ b/app/graphql/types/analytics/aggregation/engine_response_dimensions_type.rb
@@ -0,0 +1,57 @@
+# frozen_string_literal: true
+
+module Types
+ module Analytics
+ module Aggregation
+ module EngineResponseDimensionsType
+ class << self
+ def build(engine, graphql_context)
+ adapter = ::Gitlab::Database::Aggregation::Graphql::Adapter
+ mounted_engine_name = adapter.mounted_engine_name(graphql_context[:name])
+
+ Class.new(BaseObject) do
+ graphql_name "#{mounted_engine_name}AggregationResponseDimensions"
+ description "Response dimensions for #{mounted_engine_name} aggregation engine"
+
+ engine.dimensions.each do |dimension|
+ params = dimension.respond_to?(:parameters) ? dimension.parameters : {}
+ field dimension.identifier.to_sym,
+ adapter.graphql_type(dimension.type),
+ null: true,
+ description: dimension.description do
+ params.each do |param_name, param_config|
+ argument param_name, adapter.graphql_type(param_config[:type]),
+ required: false, description: param_config[:description]
+ end
+ end
+
+ define_method(dimension.identifier) do |**field_kwargs|
+ allowed_params = field_kwargs.slice(*params.keys)
+
+ object[dimension.instance_key(parameters: allowed_params)]
+ end
+ end
+
+ # adapter.dimensions_to_fields(engine.dimensions) do |field_name, type, params, kwargs|
+ # field(field_name, type, **kwargs) do-- defined in adapter
+ # params.each do |param_name, param_config|
+ # argument(*adapter.parameter_to_argument(param_name, param_config))-- defined in adapter
+ # end
+ # end
+
+ # define_method(field_name) do |**field_kwargs|
+ # allowed_params = field_kwargs.slice(*params.pluck(:identifier))-- not AR
+
+ # part = engine.parts.detect { |d| d.identifier == field_name }
+ # key = part.instance_key(parameters: allowed_params)
+
+ # object[key]
+ # end
+ # end
+ end
+ end
+ end
+ end
+ end
+ end
+end
diff --git a/app/graphql/types/analytics/aggregation/engine_response_type.rb b/app/graphql/types/analytics/aggregation/engine_response_type.rb
new file mode 100644
index 0000000000000000000000000000000000000000..07c4491972f1085c13730aca8d7203d8f1f5cf83
--- /dev/null
+++ b/app/graphql/types/analytics/aggregation/engine_response_type.rb
@@ -0,0 +1,45 @@
+# frozen_string_literal: true
+
+module Types
+ module Analytics
+ module Aggregation
+ module EngineResponseType
+ class << self
+ def build(engine, graphql_context)
+ adapter = ::Gitlab::Database::Aggregation::Graphql::Adapter
+ mounted_engine_name = adapter.mounted_engine_name(graphql_context[:name])
+
+ Class.new(BaseObject) do
+ graphql_name "#{mounted_engine_name}AggregationResponse"
+ description "Response for #{mounted_engine_name} aggregation engine"
+
+ field :dimensions,
+ Types::Analytics::Aggregation::EngineResponseDimensionsType.build(engine, **graphql_context),
+ description: 'Aggregation dimensions. Every selected dimension will be used for aggregation.',
+ resolver_method: :object
+
+ engine.metrics.each do |metric|
+ params = metric.respond_to?(:parameters) ? metric.parameters : {}
+ field metric.identifier.to_sym,
+ adapter.graphql_type(metric.type),
+ null: true,
+ description: metric.description do
+ params.each do |param_name, param_config|
+ argument param_name, adapter.graphql_type(param_config[:type]),
+ required: false, description: param_config[:description]
+ end
+ end
+
+ define_method(metric.identifier) do |**field_kwargs|
+ allowed_params = field_kwargs.slice(*params.keys)
+
+ object[metric.instance_key(parameters: allowed_params)]
+ end
+ end
+ end
+ end
+ end
+ end
+ end
+ end
+end
diff --git a/app/graphql/types/analytics/aggregation/order_type.rb b/app/graphql/types/analytics/aggregation/order_type.rb
new file mode 100644
index 0000000000000000000000000000000000000000..a3c35dd6c2406fa24ba7a64f3876bc55da635e35
--- /dev/null
+++ b/app/graphql/types/analytics/aggregation/order_type.rb
@@ -0,0 +1,14 @@
+# frozen_string_literal: true
+
+module Types
+ module Analytics
+ module Aggregation
+ class OrderType < BaseInputObject
+ graphql_name 'AggregationOrder'
+
+ argument :direction, SortDirectionEnum, required: true, description: 'Sorting direction.'
+ argument :identifier, String, required: true, description: 'Dimension or metric identifier.'
+ end
+ end
+ end
+end
diff --git a/doc/api/graphql/reference/_index.md b/doc/api/graphql/reference/_index.md
index d49625a1e2206deff1325fd47f64bc94435e0c21..63ae7a0ce61348758be41864b0f96172824d4722 100644
--- a/doc/api/graphql/reference/_index.md
+++ b/doc/api/graphql/reference/_index.md
@@ -24311,6 +24311,29 @@ The edge type for [`WorkspaceVariable`](#workspacevariable).
| `cursor` | [`String!`](#string) | A cursor for use in pagination. |
| `node` | [`WorkspaceVariable`](#workspacevariable) | The item at the end of the edge. |
+#### `agentPlatformSessionsAggregationResponseConnection`
+
+The connection type for [`agentPlatformSessionsAggregationResponse`](#agentplatformsessionsaggregationresponse).
+
+##### Fields
+
+| Name | Type | Description |
+| ---- | ---- | ----------- |
+| `edges` | [`[agentPlatformSessionsAggregationResponseEdge]`](#agentplatformsessionsaggregationresponseedge) | A list of edges. |
+| `nodes` | [`[agentPlatformSessionsAggregationResponse]`](#agentplatformsessionsaggregationresponse) | A list of nodes. |
+| `pageInfo` | [`PageInfo!`](#pageinfo) | Information to aid in pagination. |
+
+#### `agentPlatformSessionsAggregationResponseEdge`
+
+The edge type for [`agentPlatformSessionsAggregationResponse`](#agentplatformsessionsaggregationresponse).
+
+##### Fields
+
+| Name | Type | Description |
+| ---- | ---- | ----------- |
+| `cursor` | [`String!`](#string) | A cursor for use in pagination. |
+| `node` | [`agentPlatformSessionsAggregationResponse`](#agentplatformsessionsaggregationresponse) | The item at the end of the edge. |
+
## Object types
Object types represent the resources that the GitLab GraphQL API can return.
@@ -25624,6 +25647,32 @@ Self-hosted LLM servers.
| `releaseState` | [`AiSelfHostedModelReleaseState!`](#aiselfhostedmodelreleasestate) | GitLab release status of the model. |
| `updatedAt` | [`Time`](#time) | Timestamp of last update. |
+### `AiUsage`
+
+Usage metrics. Not for production use yet.
+
+#### Fields with arguments
+
+##### `AiUsage.agentPlatformSessions`
+
+Usage of GitLab agent platform sessions.
+
+Returns [`agentPlatformSessionsAggregationResponseConnection`](#agentplatformsessionsaggregationresponseconnection).
+
+This field returns a [connection](#connections). It accepts the
+four standard [pagination arguments](#pagination-arguments):
+`before: String`, `after: String`, `first: Int`, and `last: Int`.
+
+###### Arguments
+
+| Name | Type | Description |
+| ---- | ---- | ----------- |
+| `createdEventAtFrom` | [`Time`](#time) | Filter by session creation timestamp. Start of the range. |
+| `createdEventAtTo` | [`Time`](#time) | Filter by session creation timestamp. End of the range. |
+| `flowType` | [`[String!]`](#string) | Filter by one or many flow types. |
+| `orderBy` | [`[AggregationOrder!]`](#aggregationorder) | Sorting order for the aggregated data. |
+| `userId` | [`[Int!]`](#int) | Filter by one or many user ids. |
+
### `AiUsageData`
Usage data for events stored in either PostgreSQL (default) or ClickHouse (when configured). Data retention: three months in PostgreSQL, indefinite in ClickHouse. Requires a personal access token. Works only on top-level groups. Premium and Ultimate only.
@@ -32946,6 +32995,7 @@ GPG signature for a signed commit.
| `additionalPurchasedStorageSize` | [`Float`](#float) | Additional storage purchased for the root namespace in bytes. |
| `adminEditPath` | [`String`](#string) | Admin path for editing group. Only available to admins. |
| `adminShowPath` | [`String`](#string) | Admin path of the group. Only available to admins. |
+| `aiUsage` {{< icon name="warning-solid" >}} | [`AiUsage`](#aiusage) | **Introduced** in GitLab 18.8. **Status**: Experiment. AI-related data 2.0. |
| `aiUsageData` {{< icon name="warning-solid" >}} | [`AiUsageData`](#aiusagedata) | **Introduced** in GitLab 17.5. **Status**: Experiment. AI-related data. |
| `allowStaleRunnerPruning` | [`Boolean!`](#boolean) | Indicates whether to regularly prune stale group runners. Defaults to false. |
| `amazonS3Configurations` | [`AmazonS3ConfigurationTypeConnection`](#amazons3configurationtypeconnection) | Amazon S3 configurations that receive audit events belonging to the group. (see [Connections](#connections)) |
@@ -49952,6 +50002,60 @@ Requires ClickHouse. Premium and Ultimate only.
| ---- | ---- | ----------- |
| `flowMetrics` | [`[AgentPlatformFlowMetric!]`](#agentplatformflowmetric) | Aggregated flow metrics for agent platform. |
+### `agentPlatformSessionsAggregationResponse`
+
+Response for agentPlatformSessions aggregation engine.
+
+#### Fields
+
+| Name | Type | Description |
+| ---- | ---- | ----------- |
+| `completionRate` | [`Float`](#float) | Session completion rate. |
+| `dimensions` | [`agentPlatformSessionsAggregationResponseDimensions`](#agentplatformsessionsaggregationresponsedimensions) | Aggregation dimensions. Every selected dimension will be used for aggregation. |
+| `finishedCount` | [`Int`](#int) | Number of finished sessions. |
+| `meanDuration` | [`Float`](#float) | Average session duration in seconds. |
+| `totalCount` | [`Int`](#int) | Total number of sessions. |
+| `usersCount` | [`Int`](#int) | Number of unique users. |
+
+#### Fields with arguments
+
+##### `agentPlatformSessionsAggregationResponse.durationQuantile`
+
+Session duration quantile in seconds.
+
+Returns [`Float`](#float).
+
+###### Arguments
+
+| Name | Type | Description |
+| ---- | ---- | ----------- |
+| `quantile` | [`Float`](#float) | |
+
+### `agentPlatformSessionsAggregationResponseDimensions`
+
+Response dimensions for agentPlatformSessions aggregation engine.
+
+#### Fields
+
+| Name | Type | Description |
+| ---- | ---- | ----------- |
+| `flowType` | [`String`](#string) | Type of session. |
+| `userId` | [`Int`](#int) | User ID. |
+
+#### Fields with arguments
+
+##### `agentPlatformSessionsAggregationResponseDimensions.createdEventAt`
+
+Session creation time.
+
+Returns [`Date`](#date).
+
+###### Arguments
+
+| Name | Type | Description |
+| ---- | ---- | ----------- |
+| `granularity` | [`String`](#string) | |
+
### `agentPlatformUserMetrics`
Agent Platform user metrics for a user. Requires ClickHouse. Premium and Ultimate with GitLab Duo Enterprise only.
@@ -57800,6 +57904,15 @@ be used as arguments).
Only general use input types are listed here. For mutation input types,
see the associated mutation type above.
+### `AggregationOrder`
+
+#### Arguments
+
+| Name | Type | Description |
+| ---- | ---- | ----------- |
+| `direction` | [`SortDirectionEnum!`](#sortdirectionenum) | Sorting direction. |
+| `identifier` | [`String!`](#string) | Dimension or metric identifier. |
+
### `AiAdditionalContextInput`
#### Arguments
diff --git a/ee/app/graphql/ee/types/group_type.rb b/ee/app/graphql/ee/types/group_type.rb
index 0fdd7899faa787dda0bf2a826d1d1d255b6c2ce8..7b5b9f02f77cec54d4a884c3dad141b0a2cc1e62 100644
--- a/ee/app/graphql/ee/types/group_type.rb
+++ b/ee/app/graphql/ee/types/group_type.rb
@@ -231,6 +231,13 @@ module GroupType
resolver: ::Resolvers::Analytics::AiMetrics::UserMetricsResolver,
experiment: { milestone: '17.5' }
+ field :ai_usage,
+ ::Types::Analytics::AiUsage::AiUsageType,
+ description: 'AI-related data 2.0.',
+ method: :itself,
+ authorize: :read_enterprise_ai_analytics,
+ experiment: { milestone: '18.8' }
+
field :project_compliance_standards_adherence,
::Types::Projects::ComplianceStandards::AdherenceType.connection_type,
null: true,
diff --git a/ee/app/graphql/types/analytics/ai_usage/ai_usage_type.rb b/ee/app/graphql/types/analytics/ai_usage/ai_usage_type.rb
new file mode 100644
index 0000000000000000000000000000000000000000..4f1efeb54fec7de06f8f92c9db4db523d4b5c168
--- /dev/null
+++ b/ee/app/graphql/types/analytics/ai_usage/ai_usage_type.rb
@@ -0,0 +1,23 @@
+# frozen_string_literal: true
+
+module Types
+ module Analytics
+ module AiUsage
+ class AiUsageType < BaseObject
+ graphql_name 'AiUsage'
+ description "Usage metrics. Not for production use yet."
+ authorize :read_pro_ai_analytics
+
+ include AggregationEngineHelpers
+
+ mount_aggregation_engine ::Analytics::AggregationEngines::AgentPlatformSessions,
+ name: :agent_platform_sessions,
+ description: 'Usage of GitLab agent platform sessions' do
+ define_method(:aggregation_scope) do
+ ::Analytics::AggregationEngines::AgentPlatformSessions.prepare_base_aggregation_scope(object)
+ end
+ end
+ end
+ end
+ end
+end
diff --git a/ee/app/graphql/types/concerns/aggregation_engine_helpers.rb b/ee/app/graphql/types/concerns/aggregation_engine_helpers.rb
new file mode 100644
index 0000000000000000000000000000000000000000..0cc64693554beab66d16544a29a2f579bab63022
--- /dev/null
+++ b/ee/app/graphql/types/concerns/aggregation_engine_helpers.rb
@@ -0,0 +1,16 @@
+# frozen_string_literal: true
+
+module AggregationEngineHelpers # rubocop:disable Gitlab/BoundedContexts -- global helper
+ extend ActiveSupport::Concern
+
+ class_methods do
+ def mount_aggregation_engine(engine, **kwargs, &block)
+ resolver = Resolvers::Analytics::Aggregation::EngineResolver.build(engine, **kwargs, &block)
+ response_type = Types::Analytics::Aggregation::EngineResponseType.build(engine, **kwargs)
+
+ field_params = kwargs.except(:name).merge(resolver: resolver)
+
+ field kwargs[:name], response_type.connection_type, **field_params # rubocop:disable Graphql/Descriptions -- description provided via kwargs
+ end
+ end
+end
diff --git a/ee/app/models/analytics/aggregation_engines/agent_platform_sessions.rb b/ee/app/models/analytics/aggregation_engines/agent_platform_sessions.rb
new file mode 100644
index 0000000000000000000000000000000000000000..f6737e89cc0ff3ae6c0317782d05eb19c7328898
--- /dev/null
+++ b/ee/app/models/analytics/aggregation_engines/agent_platform_sessions.rb
@@ -0,0 +1,63 @@
+# frozen_string_literal: true
+
+module Analytics
+ module AggregationEngines
+ class AgentPlatformSessions < Gitlab::Database::Aggregation::ClickHouse::Engine
+ self.table_name = 'agent_platform_sessions'
+ self.table_primary_key = %w[user_id namespace_path session_id flow_type]
+
+ dimensions do
+ column :flow_type, :string, description: 'Type of session'
+ column :user_id, :integer, description: 'User ID'
+ date_bucket :created_event_at, :date, -> {
+ sql('anyIfMerge(created_event_at)')
+ }, description: 'Session creation time', parameters: {
+ granularity: { type: :string, in: %w[weekly monthly] }
+ }
+ end
+
+ metrics do
+ count description: 'Total number of sessions'
+ count :finished, if: -> {
+ sql('anyIfMerge(finished_event_at) IS NOT NULL')
+ }, description: 'Number of finished sessions'
+ count :users, :integer, -> { sql('user_id') }, distinct: true, description: 'Number of unique users'
+
+ mean :duration, :float, -> {
+ sql("dateDiff('seconds', anyIfMerge(created_event_at), anyIfMerge(finished_event_at))")
+ }, description: 'Average session duration in seconds'
+
+ rate :completion, numerator_if: -> {
+ sql('anyIfMerge(finished_event_at) IS NOT NULL')
+ }, description: 'Session completion rate'
+
+ quantile :duration, :float, -> {
+ sql("dateDiff('seconds', anyIfMerge(created_event_at), anyIfMerge(finished_event_at))")
+ }, description: 'Session duration quantile in seconds', parameters: {
+ quantile: { type: :float, in: 0.0..1.0 }
+ }
+ end
+
+ filters do
+ exact_match :user_id, :integer, description: 'Filter by one or many user ids'
+ exact_match :flow_type, :string, description: 'Filter by one or many flow types'
+ range :created_event_at, :datetime, -> { sql('anyIfMerge(created_event_at)') },
+ merge_column: true,
+ description: 'Filter by session creation timestamp'
+ end
+
+ def self.prepare_base_aggregation_scope(object)
+ namespace = case object
+ when Project then object.project_namespace
+ else object
+ end
+
+ builder = ClickHouse::Client::QueryBuilder.new(table_name)
+
+ builder.where(builder.func('startsWith', [
+ builder[:namespace_path], Arel::Nodes.build_quoted(namespace.traversal_path.to_s)
+ ]))
+ end
+ end
+ end
+end
diff --git a/ee/db/fixtures/development/94_ai_usage_stats.rb b/ee/db/fixtures/development/94_ai_usage_stats.rb
index 010153b22c13ab027cecf85feb0ea593aec45f00..87fd84cc5c7b1c233eecaae1c6a441a27e24acd6 100644
--- a/ee/db/fixtures/development/94_ai_usage_stats.rb
+++ b/ee/db/fixtures/development/94_ai_usage_stats.rb
@@ -32,11 +32,9 @@ def self.sync_to_click_house
end
def self.sync_to_postgres
- Sidekiq::Testing.inline! do
- ::UsageEvents::DumpWriteBufferCronWorker.new.perform
- ::Analytics::AiAnalytics::EventsCountAggregationWorker.new.perform
- ::Analytics::DumpAiUserMetricsWriteBufferCronWorker.new.perform
- end
+ ::UsageEvents::DumpWriteBufferCronWorker.new.perform
+ ::Analytics::AiAnalytics::EventsCountAggregationWorker.new.perform
+ ::Analytics::DumpAiUserMetricsWriteBufferCronWorker.new.perform
end
def initialize(project)
@@ -184,10 +182,12 @@ def generate_mcp_extras
project = Project.includes(:builds, :users).find_by(id: ENV['PROJECT_ID'])
project ||= Project.first
- Gitlab::Seeder::AiUsageStats.new(project).seed!
+ Sidekiq::Testing.inline! do
+ Gitlab::Seeder::AiUsageStats.new(project).seed!
- Gitlab::Seeder::AiUsageStats.sync_to_postgres
- Gitlab::Seeder::AiUsageStats.sync_to_click_house
+ Gitlab::Seeder::AiUsageStats.sync_to_postgres
+ Gitlab::Seeder::AiUsageStats.sync_to_click_house
+ end
puts "Successfully seeded '#{project.full_path}' for Ai Analytics!"
puts "URL: #{Rails.application.routes.url_helpers.project_url(project)}"
diff --git a/lib/gitlab/database/aggregation/active_record/aggregation_result.rb b/lib/gitlab/database/aggregation/active_record/aggregation_result.rb
index ea5f47db8cd9782984cdd44afa6621cab4fd1683..1d9d64e804639323d7d86b9dc8034278f2c9e77e 100644
--- a/lib/gitlab/database/aggregation/active_record/aggregation_result.rb
+++ b/lib/gitlab/database/aggregation/active_record/aggregation_result.rb
@@ -8,7 +8,7 @@ class AggregationResult < Gitlab::Database::Aggregation::AggregationResult
private
def load_data
- query.model.connection.select_all(query.limit(1000).to_sql)
+ query.model.connection.select_all(query.to_sql)
end
end
end
diff --git a/lib/gitlab/database/aggregation/active_record/column.rb b/lib/gitlab/database/aggregation/active_record/column.rb
index 0e372e8fbbbe9e52a5963683805c4b72abca87b8..0aa819e002b28cea3561de60a09ecb2fec8c0843 100644
--- a/lib/gitlab/database/aggregation/active_record/column.rb
+++ b/lib/gitlab/database/aggregation/active_record/column.rb
@@ -42,21 +42,10 @@ module ActiveRecord
# # INNER JOIN "issue_metrics" ON "issue_metrics"."issue_id" = "issues"."id"
# # WHERE "issues"."project_id" = 1
class Column < PartDefinition
- attr_reader :scope_proc
-
- def initialize(*args, scope_proc: nil, **kwargs)
- super
- @scope_proc = scope_proc
- end
-
def to_hash
super.merge(kind: :column)
end
- def apply_scope(scope, context)
- scope_proc ? scope_proc.call(scope, context) : scope
- end
-
# Returns an Arel node representing this column or metric in a SELECT statement.
# Subclasses may wrap the expression (e.g., date_trunc, AVG, COUNT).
def to_arel(context)
diff --git a/lib/gitlab/database/aggregation/active_record/timestamp_column.rb b/lib/gitlab/database/aggregation/active_record/date_bucket_dimension.rb
similarity index 63%
rename from lib/gitlab/database/aggregation/active_record/timestamp_column.rb
rename to lib/gitlab/database/aggregation/active_record/date_bucket_dimension.rb
index a9962a3fe0e97dbac4ddc6468f1712b3462d731b..93e1d4b5bb60d24352f6c282e871beb5340f1233 100644
--- a/lib/gitlab/database/aggregation/active_record/timestamp_column.rb
+++ b/lib/gitlab/database/aggregation/active_record/date_bucket_dimension.rb
@@ -4,7 +4,7 @@ module Gitlab
module Database
module Aggregation
module ActiveRecord
- class TimestampColumn < Column
+ class DateBucketDimension < Column
include ParameterizedDefinition
self.supported_parameters = %i[granularity]
@@ -22,16 +22,31 @@ def to_hash
super.merge(kind: :timestamp_column)
end
+ def validate_part(part)
+ validate_granularity(part)
+ end
+
def to_arel(context)
granularity = instance_parameter(:granularity, context[name]) || DEFAULT_GRANULARITY
- # TODO: Validate granularity in query plan!
-
quoted_string = context[:scope].model.connection.quote(GRANULARITIES_MAP[granularity])
expr = expression ? expression.call : context[:scope].arel_table[name]
Arel::Nodes::NamedFunction.new('date_trunc', [Arel.sql(quoted_string), expr])
end
+
+ private
+
+ def validate_granularity(part)
+ granularity = part.configuration[:granularity]
+ return unless granularity
+ return if granularity.in?(parameters.dig(:granularity, :in) || [])
+
+ part.errors.add(:granularity,
+ format(s_("AggregationEngine|Unknown granularity \"%{granularity}\""),
+ granularity: granularity)
+ )
+ end
end
end
end
diff --git a/lib/gitlab/database/aggregation/active_record/engine.rb b/lib/gitlab/database/aggregation/active_record/engine.rb
index 40d67848506eab0e4a054266769de4ed9fb1a5a9..5a8b87adc2aed89521046a0eabb135f04984cf46 100644
--- a/lib/gitlab/database/aggregation/active_record/engine.rb
+++ b/lib/gitlab/database/aggregation/active_record/engine.rb
@@ -20,7 +20,14 @@ def self.metrics_mapping
def self.dimensions_mapping
{
column: Column,
- timestamp_column: TimestampColumn
+ date_bucket: DateBucketDimension
+ }
+ end
+
+ override :filters_mapping
+ def self.filters_mapping
+ {
+ exact_match: ExactMatchFilter
}
end
@@ -31,6 +38,7 @@ def execute_query_plan(plan)
projections, dimension_aliases, metric_aliases = build_select_list_and_aliases(plan)
relation = context[:scope].select(*projections)
+ relation = apply_filters(relation, plan)
relation = apply_scope(relation, plan)
relation = apply_grouping(relation, dimension_aliases)
relation = apply_order(relation, plan, dimension_aliases, metric_aliases)
@@ -38,14 +46,6 @@ def execute_query_plan(plan)
AggregationResult.new(self, plan, relation)
end
- def run_validations(plan)
- super
-
- return unless plan.dimensions.size > 2
-
- errors.add(:dimensions, s_("AggregationEngine|maximum two dimensions are supported"))
- end
-
# Returns [projections, dimension_aliases, metric_aliases]
def build_select_list_and_aliases(plan)
projections = []
@@ -75,6 +75,12 @@ def apply_scope(relation, plan)
end
end
+ def apply_filters(relation, plan)
+ plan.filters.reduce(relation) do |rel, part|
+ part.definition.apply(rel, part.configuration)
+ end
+ end
+
def apply_grouping(relation, dimension_aliases)
return relation if dimension_aliases.empty?
diff --git a/lib/gitlab/database/aggregation/active_record/exact_match_filter.rb b/lib/gitlab/database/aggregation/active_record/exact_match_filter.rb
new file mode 100644
index 0000000000000000000000000000000000000000..a228f5bb34db0d5725b26e2f6a4f9c801ca423a8
--- /dev/null
+++ b/lib/gitlab/database/aggregation/active_record/exact_match_filter.rb
@@ -0,0 +1,15 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ module Aggregation
+ module ActiveRecord
+ class ExactMatchFilter < FilterDefinition
+ def apply(relation, filter_config)
+ relation.where(column(relation).in(filter_config[:values]))
+ end
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/aggregation/active_record/filter_definition.rb b/lib/gitlab/database/aggregation/active_record/filter_definition.rb
new file mode 100644
index 0000000000000000000000000000000000000000..464ecbacd4110e9956a3f1dc9f0e16ba071ecfb8
--- /dev/null
+++ b/lib/gitlab/database/aggregation/active_record/filter_definition.rb
@@ -0,0 +1,41 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ module Aggregation
+ module ActiveRecord
+ class FilterDefinition < PartDefinition
+ attr_reader :max_size
+
+ def initialize(*args, max_size: nil, **kwargs)
+ super
+ @max_size = max_size
+ end
+
+ def apply(_relation, _filter_config)
+ raise NoMethodError
+ end
+
+ def validate_part(part)
+ validate_max_size(part)
+ end
+
+ private
+
+ def column(relation)
+ expression&.call || relation.arel_table[name]
+ end
+
+ def validate_max_size(part)
+ return unless max_size && part.configuration[:values].size > max_size
+
+ part.errors.add(:values,
+ format(s_("AggregationEngine|maximum size of %{max_size} exceeded for filter `%{key}`"),
+ max_size: max_size,
+ key: part.instance_key))
+ end
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/aggregation/active_record/part_definition.rb b/lib/gitlab/database/aggregation/active_record/part_definition.rb
new file mode 100644
index 0000000000000000000000000000000000000000..d21368d7ee47abaffb827bdfa7ec4aab9cb61bde
--- /dev/null
+++ b/lib/gitlab/database/aggregation/active_record/part_definition.rb
@@ -0,0 +1,22 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ module Aggregation
+ module ActiveRecord
+ class PartDefinition < ::Gitlab::Database::Aggregation::PartDefinition
+ attr_reader :scope_proc
+
+ def initialize(*args, scope_proc: nil, **kwargs)
+ super
+ @scope_proc = scope_proc
+ end
+
+ def apply_scope(scope, context)
+ scope_proc ? scope_proc.call(scope, context) : scope
+ end
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/aggregation/aggregation_result.rb b/lib/gitlab/database/aggregation/aggregation_result.rb
index 85fd957428134927ba6e366921e219920bdc4391..a5d1bbaed3e138d0152e22f26ce91b2861a88c88 100644
--- a/lib/gitlab/database/aggregation/aggregation_result.rb
+++ b/lib/gitlab/database/aggregation/aggregation_result.rb
@@ -12,7 +12,13 @@ def initialize(engine, plan, query)
@query = query
end
- # TODO: add interface for paginating
+ def limit(_limit_value)
+ raise NoMethodError
+ end
+
+ def offset(_offset_value)
+ raise NoMethodError
+ end
delegate :to_a, :[], :each, to: :loaded_results
diff --git a/lib/gitlab/database/aggregation/click_house/aggregation_result.rb b/lib/gitlab/database/aggregation/click_house/aggregation_result.rb
new file mode 100644
index 0000000000000000000000000000000000000000..c455dcedfd212a6428ecafc0c0582b939b33477f
--- /dev/null
+++ b/lib/gitlab/database/aggregation/click_house/aggregation_result.rb
@@ -0,0 +1,33 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ module Aggregation
+ module ClickHouse
+ class AggregationResult < Gitlab::Database::Aggregation::AggregationResult
+ def limit(limit_value)
+ self.class.new(
+ engine,
+ plan,
+ query.limit(limit_value)
+ )
+ end
+
+ def offset(offset_value)
+ self.class.new(
+ engine,
+ plan,
+ query.offset(offset_value)
+ )
+ end
+
+ private
+
+ def load_data
+ ::ClickHouse::Client.select(query, :main)
+ end
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/aggregation/click_house/column.rb b/lib/gitlab/database/aggregation/click_house/column.rb
new file mode 100644
index 0000000000000000000000000000000000000000..cef6727dcdab172c7a53817b33ff62ab1c17c47c
--- /dev/null
+++ b/lib/gitlab/database/aggregation/click_house/column.rb
@@ -0,0 +1,30 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ module Aggregation
+ module ClickHouse
+ class Column < PartDefinition
+ attr_reader :name, :type, :expression, :secondary_expression
+
+ def initialize(*args, **kwargs)
+ super
+ @secondary_expression = kwargs[:if]
+ end
+
+ def secondary_arel(_context)
+ secondary_expression&.call
+ end
+
+ def to_inner_arel(context)
+ expression ? expression.call : context[:scope][name]
+ end
+
+ def to_outer_arel(context)
+ Arel::Table.new(context[:inner_query_name])[context.fetch(:local_alias, name)]
+ end
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/aggregation/click_house/count.rb b/lib/gitlab/database/aggregation/click_house/count.rb
new file mode 100644
index 0000000000000000000000000000000000000000..7693da623d5b85c040710c8e2b58a241c36af498
--- /dev/null
+++ b/lib/gitlab/database/aggregation/click_house/count.rb
@@ -0,0 +1,39 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ module Aggregation
+ module ClickHouse
+ class Count < Column
+ attr_reader :distinct
+
+ def initialize(name = 'total', type = :integer, expression = nil, distinct: false, **kwargs)
+ @distinct = distinct
+ super
+ end
+
+ def identifier
+ :"#{name}_count"
+ end
+
+ def to_inner_arel(...)
+ expression&.call
+ end
+
+ def to_outer_arel(context)
+ return regular_count(context) unless secondary_expression
+
+ inner_condition_column = Arel::Table.new(context[:inner_query_name])[context.fetch(:local_secondary_alias,
+ name)]
+ Arel::Nodes::NamedFunction.new('countIf', [inner_condition_column.eq(1)])
+ end
+
+ def regular_count(context)
+ inner_column = Arel::Table.new(context[:inner_query_name])[context[:local_alias]] if context[:local_alias]
+ Arel::Nodes::Count.new([inner_column || Arel.star], distinct)
+ end
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/aggregation/click_house/date_bucket_dimension.rb b/lib/gitlab/database/aggregation/click_house/date_bucket_dimension.rb
new file mode 100644
index 0000000000000000000000000000000000000000..31f3703d763de4c3fe18634c32d8d6b499265044
--- /dev/null
+++ b/lib/gitlab/database/aggregation/click_house/date_bucket_dimension.rb
@@ -0,0 +1,61 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ module Aggregation
+ module ClickHouse
+ class DateBucketDimension < Column
+ include ParameterizedDefinition
+
+ self.supported_parameters = %i[granularity]
+
+ GRANULARITIES_MAP = {
+ daily: :day,
+ weekly: :week,
+ monthly: :month,
+ yearly: :year
+ }.with_indifferent_access.freeze
+
+ DEFAULT_GRANULARITY = :monthly
+
+ def to_hash
+ super.merge(kind: :date_bucket)
+ end
+
+ def validate_part(part)
+ validate_granularity(part)
+ end
+
+ def to_outer_arel(context)
+ granularity = instance_parameter(:granularity, context[name]) || DEFAULT_GRANULARITY
+ granularity = GRANULARITIES_MAP[granularity]
+
+ context[:scope].func('toStartOfInterval', [super, Arel.sql("INTERVAL 1 #{granularity}")])
+ end
+
+ # def to_arel(context)
+ # granularity = instance_parameter(:granularity, context[name]) || DEFAULT_GRANULARITY
+
+ # quoted_string = context[:scope].model.connection.quote(GRANULARITIES_MAP[granularity])
+
+ # expr = expression ? expression.call : context[:scope].arel_table[name]
+ # Arel::Nodes::NamedFunction.new('date_trunc', [Arel.sql(quoted_string), expr])
+ # end
+
+ private
+
+ def validate_granularity(part)
+ granularity = part.configuration[:granularity]
+ return unless granularity
+ return if granularity.in?(parameters.dig(:granularity, :in) || [])
+
+ part.errors.add(:granularity,
+ format(s_("AggregationEngine|Unknown granularity \"%{granularity}\""),
+ granularity: granularity)
+ )
+ end
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/aggregation/click_house/engine.rb b/lib/gitlab/database/aggregation/click_house/engine.rb
new file mode 100644
index 0000000000000000000000000000000000000000..f0dcb3ec543f3ff96493fd6cecbf443d39c28730
--- /dev/null
+++ b/lib/gitlab/database/aggregation/click_house/engine.rb
@@ -0,0 +1,120 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ module Aggregation
+ module ClickHouse
+ class Engine < Gitlab::Database::Aggregation::Engine
+ extend ::Gitlab::Utils::Override
+
+ INNER_QUERY_NAME = 'ch_aggregation_inner_query'
+
+ class << self
+ attr_accessor :table_name, :table_primary_key
+
+ def dimensions_mapping
+ {
+ column: Column,
+ date_bucket: DateBucketDimension
+ }
+ end
+
+ def metrics_mapping
+ {
+ count: Count,
+ mean: Mean,
+ rate: Rate,
+ quantile: Quantile
+ }
+ end
+
+ def filters_mapping
+ {
+ exact_match: ExactMatchFilter,
+ range: RangeFilter
+ }
+ end
+ end
+
+ private
+
+ # Example resulting query
+ # SELECT
+ # `ch_aggregation_inner_query`.`dimension_0` AS dimension_0,
+ # COUNT(*) AS metric_0,
+ # countIf(`ch_aggregation_inner_query`.`metric_1_condition` = 1) AS metric_1,
+ # avgIf(`ch_aggregation_inner_query`.`metric_2`, `ch_aggregation_inner_query`.`metric_2_condition` = 1)
+ # AS metric_2
+ # FROM (
+ # SELECT `agent_platform_sessions`.`flow_type` AS dimension_0,
+ # anyIfMerge(finished_event_at) IS NOT NULL AS metric_1_condition,
+ # anyIfMerge(finished_event_at)-anyIfMerge(created_event_at) AS metric_2,
+ # anyIfMerge(finished_event_at) IS NOT NULL AS metric_2_condition,
+ # `agent_platform_sessions`.`user_id`,
+ # `agent_platform_sessions`.`namespace_path`,
+ # `agent_platform_sessions`.`session_id`,
+ # `agent_platform_sessions`.`flow_type`
+ # FROM `agent_platform_sessions`
+ # GROUP BY ALL) ch_aggregation_inner_query
+ # GROUP BY ALL
+ override :execute_query_plan
+ def execute_query_plan(plan)
+ inner_projections, outer_projections = build_select_list_and_aliases(plan)
+
+ query = context[:scope].select(*inner_projections).group(Arel.sql("ALL"))
+
+ plan.filters.each { |filter| query = filter.definition.apply_inner(query, filter.configuration) }
+
+ query = ::ClickHouse::Client::QueryBuilder.new(query, INNER_QUERY_NAME)
+ .select(*outer_projections).group(Arel.sql("ALL"))
+
+ plan.order.each { |order| query = query.order(Arel.sql(order.instance_key), order.direction) }
+
+ AggregationResult.new(self, plan, query)
+ end
+
+ def build_select_list_and_aliases(plan)
+ inner_projections_list = []
+ outer_projections_list = []
+
+ plan.dimensions.each do |dimension|
+ inner_projections, outer_projections = *build_part_selections(dimension)
+ inner_projections_list += inner_projections
+ outer_projections_list += outer_projections
+ end
+
+ plan.metrics.each do |metric|
+ inner_projections, outer_projections = *build_part_selections(metric)
+ inner_projections_list += inner_projections
+ outer_projections_list += outer_projections
+ end
+
+ # fill in primary_key
+ inner_projections_list += self.class.table_primary_key.map { |n| context[:scope][n] }
+
+ [inner_projections_list.compact, outer_projections_list.compact]
+ end
+
+ def build_part_selections(part)
+ alias_name = part.instance_key.to_s
+ inner_context = context.merge(:inner_query_name => INNER_QUERY_NAME,
+ part.name => part.configuration)
+ inner_projection = part.definition.to_inner_arel(inner_context)&.as(alias_name)
+
+ if part.definition.secondary_expression
+ secondary_alias_name = "#{alias_name}_secondary"
+ secondary_projection = part.definition.secondary_arel(inner_context)&.as(secondary_alias_name)
+ end
+
+ outer_context = inner_context
+ outer_context[:local_alias] = alias_name if inner_projection
+ outer_context[:local_secondary_alias] = secondary_alias_name if secondary_projection
+ outer_projection = part.definition.to_outer_arel(outer_context).as(alias_name)
+
+ [[inner_projection, secondary_projection], [outer_projection]]
+ end
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/aggregation/click_house/exact_match_filter.rb b/lib/gitlab/database/aggregation/click_house/exact_match_filter.rb
new file mode 100644
index 0000000000000000000000000000000000000000..9f87934f59ee86c17b0775a228257af18e651d3d
--- /dev/null
+++ b/lib/gitlab/database/aggregation/click_house/exact_match_filter.rb
@@ -0,0 +1,21 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ module Aggregation
+ module ClickHouse
+ class ExactMatchFilter < FilterDefinition
+ private
+
+ def apply(query_builder, filter_config)
+ if merge_column?
+ query_builder.having(column(query_builder).in(filter_config[:values]))
+ else
+ query_builder.where(column(query_builder).in(filter_config[:values]))
+ end
+ end
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/aggregation/click_house/filter_definition.rb b/lib/gitlab/database/aggregation/click_house/filter_definition.rb
new file mode 100644
index 0000000000000000000000000000000000000000..21da6a20f20ce2f721e7308070ef81bd38d7d9c9
--- /dev/null
+++ b/lib/gitlab/database/aggregation/click_house/filter_definition.rb
@@ -0,0 +1,50 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ module Aggregation
+ module ClickHouse
+ class FilterDefinition < PartDefinition
+ attr_reader :max_size
+
+ def initialize(name, type, expression = nil, **kwargs)
+ super
+ @merge_column = kwargs[:merge_column]
+ @max_size = kwargs[:max_size]
+ end
+
+ def apply_inner(query_builder, filter_config)
+ apply(query_builder, filter_config)
+ end
+
+ def validate_part(part)
+ validate_max_size(part)
+ end
+
+ private
+
+ def merge_column?
+ !!@merge_column
+ end
+
+ def column(query_builder)
+ expression&.call || query_builder.table[name]
+ end
+
+ def apply(_query_builder, _filter_config)
+ raise NoMethodError
+ end
+
+ def validate_max_size(part)
+ return unless max_size && part.configuration[:values].size > max_size
+
+ part.errors.add(:values,
+ format(s_("AggregationEngine|maximum size of %{max_size} exceeded for filter `%{key}`"),
+ max_size: max_size,
+ key: part.instance_key))
+ end
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/aggregation/click_house/mean.rb b/lib/gitlab/database/aggregation/click_house/mean.rb
new file mode 100644
index 0000000000000000000000000000000000000000..f2c2ef656b1a67830cfae3fae4b50b56ebcf2611
--- /dev/null
+++ b/lib/gitlab/database/aggregation/click_house/mean.rb
@@ -0,0 +1,28 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ module Aggregation
+ module ClickHouse
+ class Mean < Column
+ def initialize(name, type = :float, expression = nil, **kwargs)
+ super
+ end
+
+ def identifier
+ :"mean_#{name}"
+ end
+
+ def to_outer_arel(context)
+ return super.average unless secondary_expression
+
+ inner_column = Arel::Table.new(context[:inner_query_name])[context.fetch(:local_alias, name)]
+ inner_condition_column = Arel::Table.new(context[:inner_query_name])[context.fetch(:local_secondary_alias,
+ name)]
+ Arel::Nodes::NamedFunction.new('avgIf', [inner_column, inner_condition_column.eq(1)])
+ end
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/aggregation/click_house/part_definition.rb b/lib/gitlab/database/aggregation/click_house/part_definition.rb
new file mode 100644
index 0000000000000000000000000000000000000000..47824a5457480d547cc6f9a3487afe7b4e0ef000
--- /dev/null
+++ b/lib/gitlab/database/aggregation/click_house/part_definition.rb
@@ -0,0 +1,12 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ module Aggregation
+ module ClickHouse
+ class PartDefinition < ::Gitlab::Database::Aggregation::PartDefinition
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/aggregation/click_house/quantile.rb b/lib/gitlab/database/aggregation/click_house/quantile.rb
new file mode 100644
index 0000000000000000000000000000000000000000..4b10074f6ebe12fd4796721d7b9162422b75fab5
--- /dev/null
+++ b/lib/gitlab/database/aggregation/click_house/quantile.rb
@@ -0,0 +1,33 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ module Aggregation
+ module ClickHouse
+ class Quantile < Column
+ include ParameterizedDefinition
+
+ self.supported_parameters = %i[quantile]
+
+ DEFAULT_QUANTILE = 0.5
+
+ def initialize(name, type = :float, expression = nil, **kwargs)
+ super
+ end
+
+ def identifier
+ :"#{name}_quantile"
+ end
+
+ def to_outer_arel(context)
+ quantile = instance_parameter(:quantile, context[name]) || DEFAULT_QUANTILE
+
+ inner_column = Arel::Table.new(context[:inner_query_name])[context.fetch(:local_alias, name)]
+
+ Arel.sql("quantile(?)(?)", context[:scope].quote(quantile), inner_column)
+ end
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/aggregation/click_house/range_filter.rb b/lib/gitlab/database/aggregation/click_house/range_filter.rb
new file mode 100644
index 0000000000000000000000000000000000000000..5b31e0c0c00a800c6e0611c143dc320a27ce173f
--- /dev/null
+++ b/lib/gitlab/database/aggregation/click_house/range_filter.rb
@@ -0,0 +1,21 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ module Aggregation
+ module ClickHouse
+ class RangeFilter < FilterDefinition
+ private
+
+ def apply(query_builder, filter_config)
+ if merge_column?
+ query_builder.having(column(query_builder).between(filter_config[:values]))
+ else
+ query_builder.where(column(query_builder).between(filter_config[:values]))
+ end
+ end
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/aggregation/click_house/rate.rb b/lib/gitlab/database/aggregation/click_house/rate.rb
new file mode 100644
index 0000000000000000000000000000000000000000..7ec9371be66c830974bb662c41830f60c637c03c
--- /dev/null
+++ b/lib/gitlab/database/aggregation/click_house/rate.rb
@@ -0,0 +1,34 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ module Aggregation
+ module ClickHouse
+ class Rate < Column
+ # Uses regular expression as numerator condition and "if" expression as denominator condition
+ def initialize(name, type = :float, numerator_if:, denominator_if: nil, **kwargs)
+ super(name, type, numerator_if, if: denominator_if, **kwargs)
+ end
+
+ def identifier
+ :"#{name}_rate"
+ end
+
+ def to_outer_arel(context)
+ inner_column = Arel::Table.new(context[:inner_query_name])[context.fetch(:local_alias, name)]
+
+ if secondary_expression
+ inner_condition_column = Arel::Table.new(context[:inner_query_name])[context.fetch(:local_secondary_alias,
+ name)]
+ denominator = Arel::Nodes::NamedFunction.new('countIf', [inner_condition_column.eq(1)])
+ else
+ denominator = Arel::Nodes::Count.new([Arel.star])
+ end
+
+ Arel::Nodes::NamedFunction.new('countIf', [inner_column.eq(1)]) / denominator
+ end
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/aggregation/definitions_collector.rb b/lib/gitlab/database/aggregation/definitions_collector.rb
index d64c6f77f124042e6f5af1c6ff06b35216597740..3009217412e679bc2c25ea7997d8041bcd89fd15 100644
--- a/lib/gitlab/database/aggregation/definitions_collector.rb
+++ b/lib/gitlab/database/aggregation/definitions_collector.rb
@@ -17,6 +17,10 @@ def collect(&definitions_block)
private
+ def sql(...)
+ Arel.sql(...)
+ end
+
def method_missing(method_name, ...)
return super unless @mapping[method_name]
diff --git a/lib/gitlab/database/aggregation/engine.rb b/lib/gitlab/database/aggregation/engine.rb
index 176fb95bc6f0ae799889809b8a1ea21313083469..44a45d0a8ef3b80f9e6f701012a876737be78180 100644
--- a/lib/gitlab/database/aggregation/engine.rb
+++ b/lib/gitlab/database/aggregation/engine.rb
@@ -15,6 +15,10 @@ def metrics_mapping
raise NoMethodError
end
+ def filters_mapping
+ raise NoMethodError
+ end
+
def dimensions_mapping
raise NoMethodError
end
@@ -23,6 +27,17 @@ def build(&block)
Class.new(self).tap { |klass| klass.class_eval(&block) }
end
+ def filters(&block)
+ @filters ||= []
+ return @filters unless block
+
+ @filters += DefinitionsCollector.new(filters_mapping).collect(&block)
+
+ guard_definitions_uniqueness!(filters)
+
+ @filters
+ end
+
def dimensions(&block)
@dimensions ||= []
@@ -30,7 +45,7 @@ def dimensions(&block)
@dimensions += DefinitionsCollector.new(dimensions_mapping).collect(&block)
- guard_definitions_uniqueness!
+ guard_definitions_uniqueness!(dimensions + metrics)
@dimensions
end
@@ -41,13 +56,14 @@ def metrics(&block)
@metrics += DefinitionsCollector.new(metrics_mapping).collect(&block)
- guard_definitions_uniqueness!
+ guard_definitions_uniqueness!(dimensions + metrics)
@metrics
end
def to_hash
{
+ filters: filters.map(&:to_hash),
metrics: metrics.map(&:to_hash),
dimensions: dimensions.map(&:to_hash)
}
@@ -55,8 +71,8 @@ def to_hash
private
- def guard_definitions_uniqueness!
- identifiers = dimensions.map(&:identifier) + metrics.map(&:identifier)
+ def guard_definitions_uniqueness!(parts)
+ identifiers = parts.map(&:identifier)
duplicates = identifiers.group_by(&:itself).select { |_k, v| v.size > 1 }.keys
return unless duplicates.present?
@@ -71,9 +87,12 @@ def initialize(context:)
@context = context
end
+ # @return [Gitlab::Database::Aggregation::AggregationResult]
def execute(request)
- plan = QueryPlan.build(request, self)
- run_validations(plan)
+ validate
+
+ plan = request.to_query_plan(self)
+ plan.validate
errors.merge!(plan.errors)
if errors.any?
@@ -88,31 +107,6 @@ def execute(request)
def execute_query_plan(_plan)
raise NoMethodError
end
-
- # Override this method if you want to add engine-specific validations.
- def run_validations(plan)
- ensure_instance_keys(:dimensions, plan.dimensions)
- ensure_instance_keys(:metrics, plan.metrics)
-
- return unless plan.metrics.empty?
-
- errors.add(:metrics, s_("AggregationEngine|at least one metric is required"))
- end
-
- def ensure_instance_keys(error_key, collection)
- instance_keys = collection.group_by(&:instance_key)
- duplicates = instance_keys.select { |_value, occurrences| occurrences.size > 1 }.values
-
- return unless duplicates.any?
-
- duplicate = duplicates.first.first
-
- placeholder = { identifier: duplicate.definition.identifier }
- errors.add(
- error_key,
- format(s_("AggregationEngine|duplicated identifier found: %{identifier}"), placeholder)
- )
- end
end
end
end
diff --git a/lib/gitlab/database/aggregation/graphql.rb b/lib/gitlab/database/aggregation/graphql.rb
new file mode 100644
index 0000000000000000000000000000000000000000..87738cfa7be3f8f738f3596030b34601f514957e
--- /dev/null
+++ b/lib/gitlab/database/aggregation/graphql.rb
@@ -0,0 +1,10 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ module Aggregation
+ module Graphql
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/aggregation/graphql/adapter.rb b/lib/gitlab/database/aggregation/graphql/adapter.rb
new file mode 100644
index 0000000000000000000000000000000000000000..2324dd0ba09e327e97fdfb9fa850dccba697fe85
--- /dev/null
+++ b/lib/gitlab/database/aggregation/graphql/adapter.rb
@@ -0,0 +1,50 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ module Aggregation
+ module Graphql
+ module Adapter
+ class << self
+ def mounted_engine_name(name)
+ name.to_s.camelize(:lower)
+ end
+
+ def each_filter_argument(filters)
+ filters.each do |filter|
+ filter_to_arguments(filter).each { |args| yield(*args) }
+ end
+ end
+
+ def graphql_type(type)
+ case type.to_sym
+ when :string then ::GraphQL::Types::String
+ when :integer then ::GraphQL::Types::Int
+ when :boolean then ::GraphQL::Types::Boolean
+ when :float then ::GraphQL::Types::Float
+ when :date then ::Types::DateType
+ when :datetime then ::Types::TimeType
+ end
+ end
+
+ private
+
+ def filter_to_arguments(filter)
+ case filter
+ when ::Gitlab::Database::Aggregation::ClickHouse::ExactMatchFilter
+ [[filter.identifier, [graphql_type(filter.type)], { required: false, description: filter.description }]]
+ when ::Gitlab::Database::Aggregation::ClickHouse::RangeFilter
+ [[:"#{filter.identifier}_from",
+ graphql_type(filter.type),
+ { required: false, description: "#{filter.description}. Start of the range." }],
+ [:"#{filter.identifier}_to",
+ graphql_type(filter.type),
+ { required: false, description: "#{filter.description}. End of the range." }]]
+ end
+ end
+ end
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/aggregation/graphql_aggregation_connection.rb b/lib/gitlab/database/aggregation/graphql_aggregation_connection.rb
new file mode 100644
index 0000000000000000000000000000000000000000..49ad5802188021c3374f400cf7cb88422c5694d6
--- /dev/null
+++ b/lib/gitlab/database/aggregation/graphql_aggregation_connection.rb
@@ -0,0 +1,92 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ # -- requires AR methods to build the keyset pagination conditions
+ module Aggregation
+ class GraphqlAggregationConnection < GraphQL::Pagination::Connection
+ # rubocop: disable Naming/PredicateName -- these methods are part of the GraphQL pagination API
+ def has_next_page
+ load_nodes
+ @has_next_page
+ end
+
+ def has_previous_page
+ load_nodes
+ # If we have skipped any records (offset > 0), a previous page exists.
+ @final_offset > 0
+ end
+ # rubocop: enable Naming/PredicateName
+
+ def cursor_for(node)
+ load_nodes
+ # The cursor is the absolute index: Base Offset + Index in current batch
+ # We use object identity (index) to find the node in the current batch.
+ batch_index = nodes.index(node)
+
+ raise GraphQL::ExecutionError, "Node not found in current batch" unless batch_index
+
+ encode((@final_offset + batch_index).to_s)
+ end
+
+ def nodes
+ load_nodes
+ @nodes
+ end
+
+ private
+
+ def load_nodes
+ # Memoize so we don't run the query multiple times for pageInfo + nodes
+ return if defined?(@nodes)
+
+ # We block 'last' if 'before' is missing. This prevents us from
+ # needing to know the Total Count of the underlying query.
+ if last.present? && before.blank?
+ raise GraphQL::ExecutionError, "Argument 'last' can only be used in conjunction with 'before'."
+ end
+
+ current_first = first || (last.blank? ? limit_value : nil)
+
+ start_index = after ? decode_cursor(after) + 1 : 0
+ end_index = before ? decode_cursor(before) : nil
+
+ if end_index
+ end_index = [end_index, start_index + current_first].min if current_first
+
+ start_index = [start_index, end_index - last].max if last
+
+ @final_offset = start_index
+ final_limit = [0, end_index - start_index].max
+ else
+ @final_offset = start_index
+ final_limit = current_first
+ end
+
+ paginated_items = @items.limit(final_limit + 1).offset(@final_offset).to_a
+
+ if paginated_items.size > final_limit
+ @has_next_page = true
+ # Remove the last element as it's only used for determining the next page
+ paginated_items.pop
+ else
+ @has_next_page = false
+ end
+
+ @nodes = paginated_items
+ end
+
+ def limit_value
+ @limit_value ||= [first, last, max_page_size || GitlabSchema.default_max_page_size].compact.min
+ end
+
+ def decode_cursor(cursor)
+ value = Integer(decode(cursor))
+ raise GraphQL::ExecutionError, "Invalid cursor provided" if value < 0
+
+ value
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/aggregation/parameterized_definition.rb b/lib/gitlab/database/aggregation/parameterized_definition.rb
index 1a910f6816fbb128c72bcc7cf6eb277b928da858..5697447acf465a26959263460b057c281a87add5 100644
--- a/lib/gitlab/database/aggregation/parameterized_definition.rb
+++ b/lib/gitlab/database/aggregation/parameterized_definition.rb
@@ -41,7 +41,7 @@ def instance_key(configuration)
private
def instance_parameter(param_identifier, configuration)
- configuration.dig(:parameters, param_identifier)
+ parameters[param_identifier] && configuration.dig(:parameters, param_identifier)
end
def parameterized?
diff --git a/lib/gitlab/database/aggregation/part_definition.rb b/lib/gitlab/database/aggregation/part_definition.rb
index 6f3136090f0b0df8bc4e230d17a5aa90dd1e0757..41404e35a1a78cd634e5bd950d88d17d0521f47a 100644
--- a/lib/gitlab/database/aggregation/part_definition.rb
+++ b/lib/gitlab/database/aggregation/part_definition.rb
@@ -34,6 +34,10 @@ def format_value(val)
formatter ? formatter.call(val) : val
end
+ def validate_part(_plan_part)
+ # no-op by default
+ end
+
# part identifier. Must be unique across all part definitions.
def identifier
name
diff --git a/lib/gitlab/database/aggregation/query_plan.rb b/lib/gitlab/database/aggregation/query_plan.rb
index 1a78924dc65ffe7b456b07561ad698a6a5deb7dd..f62d87f27874430d86a1c701bcf8cffc57d42918 100644
--- a/lib/gitlab/database/aggregation/query_plan.rb
+++ b/lib/gitlab/database/aggregation/query_plan.rb
@@ -6,62 +6,52 @@ module Aggregation
class QueryPlan
include ActiveModel::Validations
- attr_reader :dimensions, :metrics, :order
-
- class << self
- def build(request, engine)
- engine_definition = engine.class
- plan = new
-
- dimension_definitions = engine_definition.dimensions.index_by(&:identifier)
- request.dimensions.each do |configuration|
- dimension_definition = dimension_definitions[configuration[:identifier]]
- if dimension_definition.nil?
- add_error_for(plan, :dimensions, configuration[:identifier])
- break
- end
-
- plan.add_dimension(dimension_definition, configuration)
- end
-
- metric_definitions = engine_definition.metrics.index_by(&:identifier)
- request.metrics.each do |configuration|
- metric_definition = metric_definitions[configuration[:identifier]]
- if metric_definition.nil?
- add_error_for(plan, :metrics, configuration[:identifier])
- break
- end
+ attr_reader :engine, :request
- plan.add_metric(metric_definition, configuration)
- end
+ validate :validate_request
+ validate :validate_parts
+ validate :validate_instance_keys_uniqueness
- request.order.each do |configuration|
- plan_part = plan.orderable_parts.detect do |plan_part|
- configuration.except(:direction) == plan_part.configuration
- end
-
- unless plan_part
- add_error_for(plan, :order, configuration[:identifier])
- break
- end
+ def initialize(engine, request)
+ @engine = engine
+ @request = request
+ end
- plan.add_order(plan_part, configuration)
+ def filters
+ @filters ||= begin
+ definitions = engine.class.filters.index_by(&:identifier)
+ request.filters.map do |configuration|
+ Filter.new(definitions[configuration[:identifier]], configuration)
end
+ end
+ end
- plan
+ def dimensions
+ @dimensions ||= begin
+ definitions = engine.class.dimensions.index_by(&:identifier)
+ request.dimensions.map do |configuration|
+ Dimension.new(definitions[configuration[:identifier]], configuration)
+ end
end
+ end
- def add_error_for(plan, object, identifier)
- plan.errors.add(object,
- format(s_("AggregationEngine|the specified identifier is not available: '%{identifier}'"),
- identifier: identifier))
+ def metrics
+ @metrics ||= begin
+ definitions = engine.class.metrics.index_by(&:identifier)
+ request.metrics.map do |configuration|
+ Metric.new(definitions[configuration[:identifier]], configuration)
+ end
end
end
- def initialize
- @dimensions = []
- @metrics = []
- @order = []
+ def order
+ @order ||= request.order.map do |configuration|
+ plan_part = orderable_parts.detect do |plan_part|
+ configuration.except(:direction) == plan_part.configuration
+ end
+
+ Order.new(plan_part, configuration)
+ end
end
def orderable_parts
@@ -69,19 +59,39 @@ def orderable_parts
end
def parts
- dimensions + metrics + order
+ filters + dimensions + metrics + order
end
- def add_dimension(definition, configuration)
- @dimensions << Dimension.new(definition, configuration)
+ private
+
+ def validate_parts
+ parts.reject(&:valid?).each do |invalid_part|
+ errors.merge!(invalid_part.errors)
+ end
end
- def add_metric(definition, configuration)
- @metrics << Metric.new(definition, configuration)
+ def validate_request
+ return unless request.metrics.empty?
+
+ errors.add(:metrics,
+ s_("AggregationEngine|at least one metric is required"))
end
- def add_order(plan_part, configuration)
- @order << Order.new(plan_part, configuration)
+ def validate_instance_keys_uniqueness
+ return unless errors.empty?
+
+ instance_keys = (dimensions + metrics).group_by(&:instance_key)
+ duplicates = instance_keys.select { |_value, occurrences| occurrences.size > 1 }.values.flatten
+
+ return unless duplicates.any?
+
+ identifiers = duplicates.map(&:definition).map(&:identifier).uniq
+
+ placeholder = { identifiers: identifiers.join(', ') }
+ errors.add(
+ :base,
+ format(s_("AggregationEngine|duplicated identifiers found: %{identifiers}"), placeholder)
+ )
end
end
end
diff --git a/lib/gitlab/database/aggregation/query_plan/base_part.rb b/lib/gitlab/database/aggregation/query_plan/base_part.rb
index 092006a0a36312b1edc9dd37d780c6075c3a1e5d..a1745060b28a19479b7a022cfd8dc04329f5a120 100644
--- a/lib/gitlab/database/aggregation/query_plan/base_part.rb
+++ b/lib/gitlab/database/aggregation/query_plan/base_part.rb
@@ -5,9 +5,14 @@ module Database
module Aggregation
class QueryPlan
class BasePart
+ include ActiveModel::Validations
+
attr_reader :definition, :configuration
- delegate :name, :type, to: :definition
+ delegate :name, :type, :identifier, to: :definition
+
+ validate :validate_definition_presence
+ validate -> { definition&.validate_part(self) }
def initialize(definition, configuration)
@definition = definition
@@ -17,6 +22,15 @@ def initialize(definition, configuration)
def instance_key
definition.instance_key(**configuration)
end
+
+ private
+
+ def validate_definition_presence
+ return if definition
+
+ errors.add(:base, format(s_("AggregationEngine|the specified identifier is not available: '%{identifier}'"),
+ identifier: configuration[:identifier]))
+ end
end
end
end
diff --git a/lib/gitlab/database/aggregation/query_plan/filter.rb b/lib/gitlab/database/aggregation/query_plan/filter.rb
new file mode 100644
index 0000000000000000000000000000000000000000..a49d92fc7d10ba4a6899e6b103adbb4d4fcf9b00
--- /dev/null
+++ b/lib/gitlab/database/aggregation/query_plan/filter.rb
@@ -0,0 +1,12 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ module Aggregation
+ class QueryPlan
+ class Filter < BasePart
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/aggregation/query_plan/order.rb b/lib/gitlab/database/aggregation/query_plan/order.rb
index b7f96935aa17738ee2fa10d69b5731cfbc0f7b3c..8003f2c045aa602d5fa9be63795885cfd28a66e4 100644
--- a/lib/gitlab/database/aggregation/query_plan/order.rb
+++ b/lib/gitlab/database/aggregation/query_plan/order.rb
@@ -7,13 +7,19 @@ class QueryPlan
class Order < BasePart
attr_reader :plan_part
- delegate :definition, :instance_key, to: :plan_part
-
def initialize(plan_part, configuration)
@plan_part = plan_part
@configuration = configuration
end
+ def definition
+ plan_part&.definition
+ end
+
+ def instance_key
+ plan_part&.instance_key
+ end
+
def direction
configuration[:direction]
end
diff --git a/lib/gitlab/database/aggregation/request.rb b/lib/gitlab/database/aggregation/request.rb
index a2158c6490f7495aa736d71ddfe5bdb52ad0113d..4edb25399456d64601ebad189378713d46c9ce96 100644
--- a/lib/gitlab/database/aggregation/request.rb
+++ b/lib/gitlab/database/aggregation/request.rb
@@ -4,13 +4,18 @@ module Gitlab
module Database
module Aggregation
class Request
- attr_reader :metrics, :dimensions, :order
+ attr_reader :filters, :dimensions, :metrics, :order
- def initialize(metrics:, dimensions: [], order: [])
- @metrics = metrics
+ def initialize(metrics:, filters: [], dimensions: [], order: [])
+ @filters = filters || []
@dimensions = dimensions || []
+ @metrics = metrics
@order = order || []
end
+
+ def to_query_plan(engine)
+ QueryPlan.new(engine, self)
+ end
end
end
end
diff --git a/lib/gitlab/graphql/pagination/connections.rb b/lib/gitlab/graphql/pagination/connections.rb
index 073853b6d5d468478d63094a5ee560f42f32a29f..b654cac56ebab3ac432042077fd8358ab2e33746 100644
--- a/lib/gitlab/graphql/pagination/connections.rb
+++ b/lib/gitlab/graphql/pagination/connections.rb
@@ -30,6 +30,11 @@ def self.use(schema)
Gitlab::Graphql::Pagination::ClickHouseConnection
)
+ schema.connections.add(
+ Gitlab::Database::Aggregation::AggregationResult,
+ Gitlab::Database::Aggregation::GraphqlAggregationConnection
+ )
+
schema.connections.add(
Gitlab::Graphql::Pagination::ClickHouseAggregatedRelation,
Gitlab::Graphql::Pagination::ClickHouseAggregatedConnection
diff --git a/locale/gitlab.pot b/locale/gitlab.pot
index f084baa081bc048c133ebe3f5e92882c5b94f95a..fb75c74d62d46d482b2745de315fd5c6b121a056 100644
--- a/locale/gitlab.pot
+++ b/locale/gitlab.pot
@@ -6685,13 +6685,16 @@ msgstr ""
msgid "Agent not found for provided id."
msgstr ""
+msgid "AggregationEngine|Unknown granularity \"%{granularity}\""
+msgstr ""
+
msgid "AggregationEngine|at least one metric is required"
msgstr ""
-msgid "AggregationEngine|duplicated identifier found: %{identifier}"
+msgid "AggregationEngine|duplicated identifiers found: %{identifiers}"
msgstr ""
-msgid "AggregationEngine|maximum two dimensions are supported"
+msgid "AggregationEngine|maximum size of %{max_size} exceeded for filter `%{key}`"
msgstr ""
msgid "AggregationEngine|the specified identifier is not available: '%{identifier}'"
diff --git a/spec/lib/gitlab/database/aggregation/active_record/engine_spec.rb b/spec/lib/gitlab/database/aggregation/active_record/engine_spec.rb
index 04ecfd91d2bb9fabe7287078d6b93176704bb043..bff71a8e594469560fbae510efda082151d05d62 100644
--- a/spec/lib/gitlab/database/aggregation/active_record/engine_spec.rb
+++ b/spec/lib/gitlab/database/aggregation/active_record/engine_spec.rb
@@ -38,15 +38,15 @@
column :state_id, :integer, description: 'Integer representation of the existing merge request states'
column :updated_by_id, :integer, description: 'User id value who last updated the the merge request'
column :project_id, :integer, description: 'ID of the associated project'
- timestamp_column :created_at, :timestamp, description: 'Bucketed creation timestamp (month)',
+ date_bucket :created_at, :timestamp, description: 'Bucketed creation timestamp (month)',
parameters: { granularity: { type: :string, in: %w[monthly daily] } }
- timestamp_column :merged_at,
+ date_bucket :merged_at,
:timestamp,
-> { MergeRequest::Metrics.arel_table[:merged_at] },
parameters: { granularity: { type: :string, in: %w[monthly weekly] } },
scope_proc: ->(scope, _ctx) { scope.joins(:metrics).where.not(merge_request_metrics: { merged_at: nil }) },
description: 'Bucketed merge timestamp (month or week).'
- timestamp_column :closed_at,
+ date_bucket :closed_at,
:timestamp,
-> { MergeRequest::Metrics.arel_table[:latest_closed_at] },
parameters: { granularity: { type: :string, in: %w[monthly weekly] } },
@@ -187,7 +187,7 @@
end
end
- context 'when timestamp_column dimension returns null values' do
+ context 'when date_bucket dimension returns null values' do
it 'groups null values together' do
request = Gitlab::Database::Aggregation::Request.new(
dimensions: [{ identifier: :closed_at, parameters: { granularity: 'monthly' } }],
@@ -201,116 +201,4 @@
end
end
end
-
- describe '.to_hash' do
- it 'exposes the engine_definition definition' do
- expect(engine_definition.to_hash).to match({
- dimensions: array_including(
- hash_including({ identifier: :state_id, name: :state_id })
- ),
- metrics: array_including(
- hash_including({ identifier: :total_count })
- )
- })
- end
- end
-
- describe 'validations' do
- context 'when metric cannot be found' do
- it 'returns error' do
- request = Gitlab::Database::Aggregation::Request.new(
- dimensions: [{ identifier: :state_id }],
- metrics: [{ identifier: :missing_identfier }]
- )
-
- expect(engine).to execute_aggregation(request).with_errors(array_including(
- a_string_matching(/identifier is not available: 'missing_identfier'/)
- ))
- end
- end
-
- context 'when dimension cannot be found' do
- it 'returns error' do
- request = Gitlab::Database::Aggregation::Request.new(
- dimensions: [{ identifier: :missing_identfier }],
- metrics: [{ identifier: :mean_time_estimate }]
- )
-
- expect(engine).to execute_aggregation(request).with_errors(array_including(
- a_string_matching(/identifier is not available: 'missing_identfier'/)
- ))
- end
- end
-
- context 'when no metric is passed in' do
- it 'returns error' do
- request = Gitlab::Database::Aggregation::Request.new(
- dimensions: [{ identifier: :state_id }],
- metrics: []
- )
-
- expect(engine).to execute_aggregation(request).with_errors(array_including(
- a_string_matching(/at least one metric is required/)
- ))
- end
- end
-
- context 'when dimensions limit is reached' do
- it 'returns error' do
- request = Gitlab::Database::Aggregation::Request.new(
- dimensions: [
- { identifier: :state_id },
- { identifier: :project_id },
- { identifier: :state },
- { identifier: :merged_at, parameters: { granularity: 'monthly' } }
- ],
- metrics: [{ identifier: :mean_time_estimate }]
- )
-
- expect(engine).to execute_aggregation(request).with_errors(array_including(
- a_string_matching(/maximum two dimensions/)
- ))
- end
- end
-
- context 'when order cannot be found' do
- it 'returns error' do
- request = Gitlab::Database::Aggregation::Request.new(
- dimensions: [{ identifier: :state_id }],
- metrics: [{ identifier: :mean_time_estimate }],
- order: [{ identifier: :unknown, direction: :asc }]
- )
-
- expect(engine).to execute_aggregation(request).with_errors(array_including(
- a_string_matching(/identifier is not available/)
- ))
- end
- end
-
- context 'when duplicated dimensions are passed in' do
- it 'returns error' do
- request = Gitlab::Database::Aggregation::Request.new(
- dimensions: [{ identifier: :state_id }, { identifier: :state_id }],
- metrics: [{ identifier: :mean_time_estimate }]
- )
-
- expect(engine).to execute_aggregation(request).with_errors(array_including(
- a_string_matching(/duplicated identifier found: state_id/)
- ))
- end
- end
-
- context 'when duplicated metrics are passed in' do
- it 'returns error' do
- request = Gitlab::Database::Aggregation::Request.new(
- dimensions: [{ identifier: :state_id }],
- metrics: [{ identifier: :mean_time_estimate }, { identifier: :mean_time_estimate }]
- )
-
- expect(engine).to execute_aggregation(request).with_errors(array_including(
- a_string_matching(/duplicated identifier found: mean_time_estimate/)
- ))
- end
- end
- end
end
diff --git a/spec/lib/gitlab/database/aggregation/active_record/exact_match_filter_spec.rb b/spec/lib/gitlab/database/aggregation/active_record/exact_match_filter_spec.rb
new file mode 100644
index 0000000000000000000000000000000000000000..146538e83a1ee593ca6c57106bf2cd8dfe627b53
--- /dev/null
+++ b/spec/lib/gitlab/database/aggregation/active_record/exact_match_filter_spec.rb
@@ -0,0 +1,81 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+RSpec.describe Gitlab::Database::Aggregation::ActiveRecord::ExactMatchFilter, feature_category: :database do
+ let_it_be(:user) { create(:user) }
+ let_it_be(:project) { create(:project) }
+ let_it_be(:merge_request1) do
+ create(:merge_request, :unique_branches,
+ updated_by_id: user.id,
+ target_project: project,
+ source_project: project,
+ time_estimate: 1,
+ created_at: '2025-04-05').tap do |mr|
+ mr.metrics.update!(merged_at: '2025-05-03')
+ end
+ end
+
+ let_it_be(:merge_request2) do
+ create(:merge_request, :unique_branches,
+ target_project: project,
+ source_project: project,
+ time_estimate: 3,
+ created_at: '2025-04-04').tap do |mr|
+ mr.metrics.update!(latest_closed_at: '2025-05-03')
+ end
+ end
+
+ let_it_be(:merge_request3) do
+ create(:merge_request, updated_by_id: user.id, time_estimate: 5, created_at: '2025-06-05').tap do |mr|
+ mr.metrics.update!(merged_at: '2025-06-04')
+ end
+ end
+
+ let(:engine_definition) do
+ Gitlab::Database::Aggregation::ActiveRecord::Engine.build do
+ filters do
+ exact_match :created_at, :datetime, max_size: 2
+ end
+
+ metrics do
+ count
+ end
+ end
+ end
+
+ let(:engine) { engine_definition.new(context: { scope: MergeRequest.all }) }
+
+ it 'applies single value filter' do
+ request = Gitlab::Database::Aggregation::Request.new(
+ filters: [{ identifier: :created_at, values: ['2025-04-04'] }],
+ metrics: [{ identifier: :total_count }]
+ )
+
+ expect(engine).to execute_aggregation(request).and_return([
+ { total_count: 1 }
+ ])
+ end
+
+ it 'applies multiple values filter' do
+ request = Gitlab::Database::Aggregation::Request.new(
+ filters: [{ identifier: :created_at, values: %w[2025-04-04 2025-04-05] }],
+ metrics: [{ identifier: :total_count }]
+ )
+
+ expect(engine).to execute_aggregation(request).and_return([
+ { total_count: 2 }
+ ])
+ end
+
+ it 'respects max_size limit' do
+ request = Gitlab::Database::Aggregation::Request.new(
+ filters: [{ identifier: :created_at, values: %w[2025-04-04 2025-04-05 2025-05-05] }],
+ metrics: [{ identifier: :total_count }]
+ )
+
+ expect(engine).to execute_aggregation(request).with_errors([
+ "Values maximum size of 2 exceeded for filter `created_at`"
+ ])
+ end
+end
diff --git a/spec/lib/gitlab/database/aggregation/click_house/count_spec.rb b/spec/lib/gitlab/database/aggregation/click_house/count_spec.rb
new file mode 100644
index 0000000000000000000000000000000000000000..86ab28e39fb4c8800b43a251b17ef0f1b132d4cb
--- /dev/null
+++ b/spec/lib/gitlab/database/aggregation/click_house/count_spec.rb
@@ -0,0 +1,95 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+RSpec.describe Gitlab::Database::Aggregation::ClickHouse::Count, :click_house, feature_category: :database do
+ include_context 'with agent_platform_sessions ClickHouse aggregation engine'
+
+ let(:engine_definition) do
+ Gitlab::Database::Aggregation::ClickHouse::Engine.build do
+ self.table_name = 'agent_platform_sessions'
+ self.table_primary_key = %w[namespace_path user_id session_id flow_type]
+
+ metrics do
+ count
+ count :finished, if: -> { Arel.sql('anyIfMerge(finished_event_at) IS NOT NULL') }
+ count :users, :integer, -> { Arel.sql('user_id') }, distinct: true
+ end
+ end
+ end
+
+ let(:session1) do # finished & long
+ created_at = DateTime.parse('2025-03-01 00:00:00 UTC')
+ { session_id: 1, user_id: 1, project_id: 1, namespace_path: '1/2/', flow_type: 'chat', environment: 'prod',
+ session_year: 2025,
+ created_event_at: created_at,
+ started_event_at: created_at + 1.second,
+ finished_event_at: created_at + 10.minutes,
+ resumed_event_at: created_at + 9.minutes }
+ end
+
+ let(:session2) do # finished & short
+ created_at = DateTime.parse('2025-03-02 00:00:00 UTC')
+ { session_id: 2, user_id: 2, project_id: 1, namespace_path: '1/2/', flow_type: 'chat', environment: 'prod',
+ session_year: 2025,
+ created_event_at: created_at,
+ started_event_at: created_at + 1.second,
+ finished_event_at: created_at + 3.minutes,
+ resumed_event_at: created_at + 2.minutes }
+ end
+
+ let(:session3) do # not finished yet. in progress
+ created_at = DateTime.parse('2025-03-04 00:00:00 UTC')
+ { session_id: 3, user_id: 1, project_id: 1, namespace_path: '1/2/', flow_type: 'code_review', environment: 'prod',
+ session_year: 2025,
+ created_event_at: created_at,
+ started_event_at: created_at + 1.second,
+ resumed_event_at: created_at + 9.minutes }
+ end
+
+ let(:all_data_rows) do
+ [session1, session2, session3]
+ end
+
+ describe "count" do
+ it 'returns total sessions count' do
+ request = Gitlab::Database::Aggregation::Request.new(
+ metrics: [{ identifier: :total_count }]
+ )
+
+ expect(engine).to execute_aggregation(request).and_return([
+ { total_count: all_data_rows.count }
+ ])
+ end
+ end
+
+ describe "count distinct" do
+ it 'returns number of distinct users' do
+ request = Gitlab::Database::Aggregation::Request.new(
+ metrics: [{ identifier: :users_count }]
+ )
+
+ user_ids = all_data_rows.pluck(:user_id)
+ expect(user_ids.count).not_to eq(user_ids.uniq.count)
+
+ expect(engine).to execute_aggregation(request).and_return([
+ { users_count: user_ids.uniq.count }
+ ])
+ end
+ end
+
+ describe "conditional count" do
+ it 'returns sessions count with condition fulfilled' do
+ request = Gitlab::Database::Aggregation::Request.new(
+ metrics: [{ identifier: :finished_count }]
+ )
+
+ finished_timestamps = all_data_rows.pluck(:finished_event_at)
+ expect(finished_timestamps.count).not_to eq(finished_timestamps.compact.count)
+
+ expect(engine).to execute_aggregation(request).and_return([
+ { finished_count: finished_timestamps.compact.count }
+ ])
+ end
+ end
+end
diff --git a/spec/lib/gitlab/database/aggregation/click_house/engine_spec.rb b/spec/lib/gitlab/database/aggregation/click_house/engine_spec.rb
new file mode 100644
index 0000000000000000000000000000000000000000..85f09ccc7b1bcfaaf4f9a685bff34e409aa645d3
--- /dev/null
+++ b/spec/lib/gitlab/database/aggregation/click_house/engine_spec.rb
@@ -0,0 +1,239 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+RSpec.describe Gitlab::Database::Aggregation::ClickHouse::Engine, :click_house, feature_category: :database do
+ include_context 'with agent_platform_sessions ClickHouse aggregation engine'
+
+ let(:engine_definition) do
+ described_class.build do
+ self.table_name = 'agent_platform_sessions'
+ self.table_primary_key = %w[namespace_path user_id session_id flow_type]
+
+ filters do
+ exact_match :user_id, :integer
+ range :created_event_at, :datetime, -> { sql('anyIfMerge(created_event_at)') }, merge_column: true
+ end
+
+ dimensions do
+ column :user_id, :integer
+ column :flow_type, :string
+ column :duration, :integer, -> {
+ sql("dateDiff('seconds', anyIfMerge(created_event_at), anyIfMerge(finished_event_at))")
+ }
+ column :environment, :string, nil, formatter: ->(v) { v.upcase }
+ end
+
+ metrics do
+ count
+ mean :duration, :float, -> {
+ sql("dateDiff('seconds', anyIfMerge(created_event_at), anyIfMerge(finished_event_at))")
+ }
+ quantile :duration, :float,
+ -> { sql("dateDiff('seconds', anyIfMerge(created_event_at), anyIfMerge(finished_event_at))") },
+ parameters: { quantile: { type: :float } }
+ count :with_format, :integer, nil, formatter: ->(v) { v * -1 }
+ end
+ end
+ end
+
+ let(:session1) do # finished & long
+ created_at = DateTime.parse('2025-03-01 00:00:00 UTC')
+ { session_id: 1, user_id: 1, project_id: 1, namespace_path: '1/2/', flow_type: 'chat', environment: 'prod',
+ session_year: 2025,
+ created_event_at: created_at,
+ started_event_at: created_at + 1.second,
+ finished_event_at: created_at + 10.minutes,
+ resumed_event_at: created_at + 9.minutes }
+ end
+
+ let(:session2) do # finished & short
+ created_at = DateTime.parse('2025-03-02 00:00:00 UTC')
+ { session_id: 2, user_id: 2, project_id: 1, namespace_path: '1/2/', flow_type: 'chat', environment: 'prod',
+ session_year: 2025,
+ created_event_at: created_at,
+ started_event_at: created_at + 1.second,
+ finished_event_at: created_at + 3.minutes,
+ resumed_event_at: created_at + 2.minutes }
+ end
+
+ let(:session3) do # in progress
+ created_at = DateTime.parse('2025-03-04 00:00:00 UTC')
+ { session_id: 3, user_id: 1, project_id: 1, namespace_path: '1/2/', flow_type: 'chat', environment: 'prod',
+ session_year: 2025,
+ created_event_at: created_at,
+ started_event_at: created_at + 1.second,
+ resumed_event_at: created_at + 9.minutes }
+ end
+
+ let(:session4) do # dropped
+ created_at = DateTime.parse('2025-03-04 00:00:00 UTC')
+ { session_id: 4, user_id: 1, project_id: 1, namespace_path: '1/2/', flow_type: 'chat', environment: 'prod',
+ session_year: 2025,
+ created_event_at: created_at,
+ started_event_at: created_at + 1.second,
+ dropped_event_at: created_at + 10.minutes,
+ resumed_event_at: created_at + 9.minutes }
+ end
+
+ let(:session5) do # finished medium
+ created_at = DateTime.parse('2025-04-04 00:00:00 UTC')
+ { session_id: 5, user_id: 1, project_id: 1, namespace_path: '1/2/', flow_type: 'chat', environment: 'prod',
+ session_year: 2025,
+ created_event_at: created_at,
+ started_event_at: created_at + 1.second,
+ finished_event_at: created_at + 7.minutes,
+ resumed_event_at: created_at + 9.minutes }
+ end
+
+ let(:all_data_rows) do
+ [session1, session2, session3, session4, session5]
+ end
+
+ describe "filtering" do
+ it 'applies merge column filtering' do
+ filter_range = session1[:created_event_at].to_date..session4[:created_event_at].to_date
+
+ request = Gitlab::Database::Aggregation::Request.new(
+ filters: [{ identifier: :created_event_at, values: filter_range }],
+ dimensions: [{ identifier: :user_id }],
+ metrics: [{ identifier: :total_count }]
+ )
+
+ expect(engine).to execute_aggregation(request).and_return([
+ { user_id: 2, total_count: 1 },
+ { user_id: 1, total_count: 3 }
+ ])
+ end
+
+ it 'applies regular filtering' do
+ request = Gitlab::Database::Aggregation::Request.new(
+ filters: [{ identifier: :user_id, values: [1] }],
+ dimensions: [{ identifier: :user_id }],
+ metrics: [{ identifier: :total_count }]
+ )
+
+ expect(engine).to execute_aggregation(request).and_return([
+ { user_id: 1, total_count: 4 }
+ ])
+ end
+ end
+
+ describe "dimensions" do
+ it 'groups by single dimension' do
+ request = Gitlab::Database::Aggregation::Request.new(
+ dimensions: [{ identifier: :user_id }],
+ metrics: [{ identifier: :total_count }]
+ )
+
+ expect(engine).to execute_aggregation(request).and_return(match_array([
+ { user_id: 2, total_count: 1 },
+ { user_id: 1, total_count: 4 }
+ ]))
+ end
+
+ it 'groups by multiple dimensions' do
+ request = Gitlab::Database::Aggregation::Request.new(
+ dimensions: [{ identifier: :user_id }, { identifier: :flow_type }],
+ metrics: [{ identifier: :total_count }]
+ )
+
+ expect(engine).to execute_aggregation(request).and_return(match_array([
+ { user_id: 2, flow_type: 'chat', total_count: 1 },
+ { user_id: 1, flow_type: 'chat', total_count: 4 }
+ ]))
+ end
+
+ it 'groups by column with expression' do
+ request = Gitlab::Database::Aggregation::Request.new(
+ dimensions: [{ identifier: :duration }],
+ metrics: [{ identifier: :total_count }]
+ )
+
+ expect(engine).to execute_aggregation(request).and_return(match_array([
+ { duration: nil, total_count: 2 },
+ { duration: 600, total_count: 1 },
+ { duration: 180, total_count: 1 },
+ { duration: 420, total_count: 1 }
+ ]))
+ end
+ end
+
+ describe "sorting" do
+ it 'accepts metric sort' do
+ request = Gitlab::Database::Aggregation::Request.new(
+ dimensions: [{ identifier: :duration }],
+ metrics: [{ identifier: :total_count }],
+ order: [{ identifier: :total_count, direction: :asc }]
+ )
+
+ expect(engine).to execute_aggregation(request).and_return([
+ { duration: 420, total_count: 1 },
+ { duration: 180, total_count: 1 },
+ { duration: 600, total_count: 1 },
+ { duration: nil, total_count: 2 }
+ ])
+ end
+
+ it 'accepts dimension sort' do
+ request = Gitlab::Database::Aggregation::Request.new(
+ dimensions: [{ identifier: :duration }],
+ metrics: [{ identifier: :total_count }],
+ order: [{ identifier: :duration, direction: :asc }]
+ )
+
+ expect(engine).to execute_aggregation(request).and_return([
+ { duration: 180, total_count: 1 },
+ { duration: 420, total_count: 1 },
+ { duration: 600, total_count: 1 },
+ { duration: nil, total_count: 2 }
+ ])
+ end
+
+ it 'accepts multiple orders' do
+ request = Gitlab::Database::Aggregation::Request.new(
+ dimensions: [{ identifier: :duration }],
+ metrics: [{ identifier: :total_count }],
+ order: [
+ { identifier: :total_count, direction: :desc },
+ { identifier: :duration, direction: :asc }
+ ]
+ )
+
+ expect(engine).to execute_aggregation(request).and_return([
+ { duration: nil, total_count: 2 },
+ { duration: 180, total_count: 1 },
+ { duration: 420, total_count: 1 },
+ { duration: 600, total_count: 1 }
+ ])
+ end
+
+ it 'accepts order by parameterized metric' do
+ request = Gitlab::Database::Aggregation::Request.new(
+ dimensions: [{ identifier: :user_id }],
+ metrics: [{ identifier: :duration_quantile, parameters: { quantile: 0.1 } }],
+ order: [
+ { identifier: :duration_quantile, parameters: { quantile: 0.1 }, direction: :desc }
+ ]
+ )
+
+ expect(engine).to execute_aggregation(request).and_return([
+ { user_id: 1, duration_quantile_14be4: 438 },
+ { user_id: 2, duration_quantile_14be4: 180 }
+ ])
+ end
+ end
+
+ describe "formatting" do
+ it 'applies formatting if defined' do
+ request = Gitlab::Database::Aggregation::Request.new(
+ dimensions: [{ identifier: :environment }],
+ metrics: [{ identifier: :with_format_count }]
+ )
+
+ expect(engine).to execute_aggregation(request).and_return([
+ { environment: "PROD", with_format_count: -5 }
+ ])
+ end
+ end
+end
diff --git a/spec/lib/gitlab/database/aggregation/click_house/exact_match_filter_spec.rb b/spec/lib/gitlab/database/aggregation/click_house/exact_match_filter_spec.rb
new file mode 100644
index 0000000000000000000000000000000000000000..2ad9fa12397adcc9dce7c591c58c6e51c898f1e7
--- /dev/null
+++ b/spec/lib/gitlab/database/aggregation/click_house/exact_match_filter_spec.rb
@@ -0,0 +1,77 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+RSpec.describe Gitlab::Database::Aggregation::ClickHouse::ExactMatchFilter, :click_house, feature_category: :database do
+ include_context 'with agent_platform_sessions ClickHouse aggregation engine'
+
+ let(:engine_definition) do
+ Gitlab::Database::Aggregation::ClickHouse::Engine.build do
+ self.table_name = 'agent_platform_sessions'
+ self.table_primary_key = %w[namespace_path user_id session_id flow_type]
+
+ metrics do
+ count
+ end
+
+ filters do
+ exact_match :session_id, :integer
+ end
+ end
+ end
+
+ let(:session1) do # finished & long
+ created_at = DateTime.parse('2025-03-01 00:00:00 UTC')
+ { session_id: 1, user_id: 1, project_id: 1, namespace_path: '1/2/', flow_type: 'chat', environment: 'prod',
+ session_year: 2025,
+ created_event_at: created_at,
+ started_event_at: created_at + 1.second,
+ finished_event_at: created_at + 10.minutes,
+ resumed_event_at: created_at + 9.minutes }
+ end
+
+ let(:session2) do # finished & short
+ created_at = DateTime.parse('2025-03-02 00:00:00 UTC')
+ { session_id: 2, user_id: 2, project_id: 1, namespace_path: '1/2/', flow_type: 'chat', environment: 'prod',
+ session_year: 2025,
+ created_event_at: created_at,
+ started_event_at: created_at + 1.second,
+ finished_event_at: created_at + 3.minutes,
+ resumed_event_at: created_at + 2.minutes }
+ end
+
+ let(:session3) do # not finished yet. in progress
+ created_at = DateTime.parse('2025-03-04 00:00:00 UTC')
+ { session_id: 3, user_id: 1, project_id: 1, namespace_path: '1/2/', flow_type: 'code_review', environment: 'prod',
+ session_year: 2025,
+ created_event_at: created_at,
+ started_event_at: created_at + 1.second,
+ resumed_event_at: created_at + 9.minutes }
+ end
+
+ let(:all_data_rows) do
+ [session1, session2, session3]
+ end
+
+ it 'applies single value filter' do
+ request = Gitlab::Database::Aggregation::Request.new(
+ filters: [{ identifier: :session_id, values: [1] }],
+ metrics: [{ identifier: :total_count }]
+ )
+
+ expect(engine).to execute_aggregation(request).and_return([
+ { total_count: 1 }
+ ])
+ end
+
+ it 'applies multiple values filter' do
+ request = Gitlab::Database::Aggregation::Request.new(
+ filters: [{ identifier: :session_id, values: [1, 2] }],
+ metrics: [{ identifier: :total_count }]
+ )
+
+ expect(engine).to execute_aggregation(request).and_return([
+ { total_count: 2 }
+ ])
+ end
+end
diff --git a/spec/lib/gitlab/database/aggregation/click_house/mean_spec.rb b/spec/lib/gitlab/database/aggregation/click_house/mean_spec.rb
new file mode 100644
index 0000000000000000000000000000000000000000..b1c9e0ca7061caacb5fdbcfd55562564aba8fa02
--- /dev/null
+++ b/spec/lib/gitlab/database/aggregation/click_house/mean_spec.rb
@@ -0,0 +1,78 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+RSpec.describe Gitlab::Database::Aggregation::ClickHouse::Mean, :click_house, feature_category: :database do
+ include_context 'with agent_platform_sessions ClickHouse aggregation engine'
+
+ let(:engine_definition) do
+ Gitlab::Database::Aggregation::ClickHouse::Engine.build do
+ self.table_name = 'agent_platform_sessions'
+ self.table_primary_key = %w[namespace_path user_id session_id flow_type]
+
+ metrics do
+ mean :session_id, :float
+ mean :duration, :float, -> {
+ Arel.sql("dateDiff('seconds', anyIfMerge(created_event_at), anyIfMerge(finished_event_at))")
+ }
+ end
+ end
+ end
+
+ let(:session1) do # finished & long
+ created_at = DateTime.parse('2025-03-01 00:00:00 UTC')
+ { session_id: 1, user_id: 1, project_id: 1, namespace_path: '1/2/', flow_type: 'chat', environment: 'prod',
+ session_year: 2025,
+ created_event_at: created_at,
+ started_event_at: created_at + 1.second,
+ finished_event_at: created_at + 10.minutes,
+ resumed_event_at: created_at + 9.minutes }
+ end
+
+ let(:session2) do # finished & short
+ created_at = DateTime.parse('2025-03-02 00:00:00 UTC')
+ { session_id: 2, user_id: 2, project_id: 1, namespace_path: '1/2/', flow_type: 'chat', environment: 'prod',
+ session_year: 2025,
+ created_event_at: created_at,
+ started_event_at: created_at + 1.second,
+ finished_event_at: created_at + 3.minutes,
+ resumed_event_at: created_at + 2.minutes }
+ end
+
+ let(:session3) do # in progress
+ created_at = DateTime.parse('2025-03-04 00:00:00 UTC')
+ { session_id: 3, user_id: 1, project_id: 1, namespace_path: '1/2/', flow_type: 'code_review', environment: 'prod',
+ session_year: 2025,
+ created_event_at: created_at,
+ started_event_at: created_at + 1.second,
+ resumed_event_at: created_at + 9.minutes }
+ end
+
+ let(:all_data_rows) do
+ [session1, session2, session3]
+ end
+
+ describe "mean" do
+ it 'returns average session_id' do
+ request = Gitlab::Database::Aggregation::Request.new(
+ metrics: [{ identifier: :mean_session_id }]
+ )
+
+ expect(engine).to execute_aggregation(request).and_return([
+ { mean_session_id: 2 }
+ ])
+ end
+ end
+
+ describe "mean with expression" do
+ it 'returns average session duration' do
+ request = Gitlab::Database::Aggregation::Request.new(
+ metrics: [{ identifier: :mean_duration }]
+ )
+
+ expect(engine).to execute_aggregation(request).and_return([
+ { mean_duration: 390 }
+ ])
+ end
+ end
+end
diff --git a/spec/lib/gitlab/database/aggregation/click_house/quantile_spec.rb b/spec/lib/gitlab/database/aggregation/click_house/quantile_spec.rb
new file mode 100644
index 0000000000000000000000000000000000000000..2d06a2f2de1da87af70af8a607ef706f8ad7e1e6
--- /dev/null
+++ b/spec/lib/gitlab/database/aggregation/click_house/quantile_spec.rb
@@ -0,0 +1,139 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+RSpec.describe Gitlab::Database::Aggregation::ClickHouse::Quantile, :click_house, feature_category: :database do
+ include_context 'with agent_platform_sessions ClickHouse aggregation engine'
+
+ let(:engine_definition) do
+ Gitlab::Database::Aggregation::ClickHouse::Engine.build do
+ self.table_name = 'agent_platform_sessions'
+ self.table_primary_key = %w[namespace_path user_id session_id flow_type]
+
+ metrics do
+ quantile :duration, :float,
+ -> { Arel.sql("dateDiff('seconds', anyIfMerge(created_event_at), anyIfMerge(finished_event_at))") }
+ quantile :duration_with_param, :float,
+ -> { Arel.sql("dateDiff('seconds', anyIfMerge(created_event_at), anyIfMerge(finished_event_at))") },
+ parameters: { quantile: { type: :float } }
+ end
+ end
+ end
+
+ let(:session1) do # finished 1.minute
+ created_at = DateTime.parse('2025-03-01 00:00:00 UTC')
+ { session_id: 1, user_id: 1, project_id: 1, namespace_path: '1/2/', flow_type: 'chat', environment: 'prod',
+ session_year: 2025,
+ created_event_at: created_at,
+ started_event_at: created_at + 1.second,
+ finished_event_at: created_at + 1.minute,
+ resumed_event_at: created_at + 9.minutes }
+ end
+
+ let(:session2) do # finished 2 minutes
+ created_at = DateTime.parse('2025-03-02 00:00:00 UTC')
+ { session_id: 2, user_id: 2, project_id: 1, namespace_path: '1/2/', flow_type: 'chat', environment: 'prod',
+ session_year: 2025,
+ created_event_at: created_at,
+ started_event_at: created_at + 1.second,
+ finished_event_at: created_at + 2.minutes,
+ resumed_event_at: created_at + 2.minutes }
+ end
+
+ let(:session3) do # finished 3 minutes
+ created_at = DateTime.parse('2025-03-02 00:00:00 UTC')
+ { session_id: 3, user_id: 2, project_id: 1, namespace_path: '1/2/', flow_type: 'chat', environment: 'prod',
+ session_year: 2025,
+ created_event_at: created_at,
+ started_event_at: created_at + 1.second,
+ finished_event_at: created_at + 3.minutes,
+ resumed_event_at: created_at + 2.minutes }
+ end
+
+ let(:session4) do # finished 4 minutes
+ created_at = DateTime.parse('2025-03-02 00:00:00 UTC')
+ { session_id: 4, user_id: 2, project_id: 1, namespace_path: '1/2/', flow_type: 'chat', environment: 'prod',
+ session_year: 2025,
+ created_event_at: created_at,
+ started_event_at: created_at + 1.second,
+ finished_event_at: created_at + 4.minutes,
+ resumed_event_at: created_at + 2.minutes }
+ end
+
+ let(:session5) do # finished 5 minutes
+ created_at = DateTime.parse('2025-03-02 00:00:00 UTC')
+ { session_id: 5, user_id: 2, project_id: 1, namespace_path: '1/2/', flow_type: 'chat', environment: 'prod',
+ session_year: 2025,
+ created_event_at: created_at,
+ started_event_at: created_at + 1.second,
+ finished_event_at: created_at + 5.minutes,
+ resumed_event_at: created_at + 2.minutes }
+ end
+
+ let(:session6) do # dropped
+ created_at = DateTime.parse('2025-03-04 00:00:00 UTC')
+ { session_id: 6, user_id: 1, project_id: 1, namespace_path: '1/2/', flow_type: 'code_review', environment: 'prod',
+ session_year: 2025,
+ created_event_at: created_at,
+ started_event_at: created_at + 1.second,
+ dropped_event_at: created_at + 10.minutes,
+ resumed_event_at: created_at + 9.minutes }
+ end
+
+ let(:session7) do # in progress
+ created_at = DateTime.parse('2025-03-04 00:00:00 UTC')
+ { session_id: 7, user_id: 1, project_id: 1, namespace_path: '1/2/', flow_type: 'code_review', environment: 'prod',
+ session_year: 2025,
+ created_event_at: created_at,
+ started_event_at: created_at + 1.second,
+ resumed_event_at: created_at + 9.minutes }
+ end
+
+ let(:all_data_rows) do
+ [session1, session2, session3, session4, session5, session6, session7]
+ end
+
+ describe "quantile without param" do
+ it 'returns median' do
+ request = Gitlab::Database::Aggregation::Request.new(
+ metrics: [{ identifier: :duration_quantile }]
+ )
+
+ expect(engine).to execute_aggregation(request).and_return([
+ { duration_quantile: 180.0 }
+ ])
+ end
+
+ it 'ignores passed param' do
+ request = Gitlab::Database::Aggregation::Request.new(
+ metrics: [{ identifier: :duration_quantile, parameters: { quantile: 0.1 } }]
+ )
+
+ expect(engine).to execute_aggregation(request).and_return([
+ { duration_quantile: 180.0 }
+ ])
+ end
+ end
+
+ describe "quantile with param" do
+ it 'returns median by default' do
+ request = Gitlab::Database::Aggregation::Request.new(
+ metrics: [{ identifier: :duration_with_param_quantile }]
+ )
+
+ expect(engine).to execute_aggregation(request).and_return([
+ { duration_with_param_quantile: 180.0 }
+ ])
+ end
+
+ it 'uses passed param' do
+ request = Gitlab::Database::Aggregation::Request.new(
+ metrics: [{ identifier: :duration_with_param_quantile, parameters: { quantile: 0.1 } }]
+ )
+
+ expect(engine).to execute_aggregation(request).and_return([
+ { duration_with_param_quantile_14be4: 84.0 }
+ ])
+ end
+ end
+end
diff --git a/spec/lib/gitlab/database/aggregation/click_house/range_filter_spec.rb b/spec/lib/gitlab/database/aggregation/click_house/range_filter_spec.rb
new file mode 100644
index 0000000000000000000000000000000000000000..e1a6b7893d7485763d766c9c7b01f72c63832afd
--- /dev/null
+++ b/spec/lib/gitlab/database/aggregation/click_house/range_filter_spec.rb
@@ -0,0 +1,88 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+RSpec.describe Gitlab::Database::Aggregation::ClickHouse::RangeFilter, :click_house, feature_category: :database do
+ include_context 'with agent_platform_sessions ClickHouse aggregation engine'
+
+ let(:engine_definition) do
+ Gitlab::Database::Aggregation::ClickHouse::Engine.build do
+ self.table_name = 'agent_platform_sessions'
+ self.table_primary_key = %w[namespace_path user_id session_id flow_type]
+
+ metrics do
+ count
+ end
+
+ filters do
+ range :session_id, :integer
+ end
+ end
+ end
+
+ let(:session1) do # finished & long
+ created_at = DateTime.parse('2025-03-01 00:00:00 UTC')
+ { session_id: 1, user_id: 1, project_id: 1, namespace_path: '1/2/', flow_type: 'chat', environment: 'prod',
+ session_year: 2025,
+ created_event_at: created_at,
+ started_event_at: created_at + 1.second,
+ finished_event_at: created_at + 10.minutes,
+ resumed_event_at: created_at + 9.minutes }
+ end
+
+ let(:session2) do # finished & short
+ created_at = DateTime.parse('2025-03-02 00:00:00 UTC')
+ { session_id: 2, user_id: 2, project_id: 1, namespace_path: '1/2/', flow_type: 'chat', environment: 'prod',
+ session_year: 2025,
+ created_event_at: created_at,
+ started_event_at: created_at + 1.second,
+ finished_event_at: created_at + 3.minutes,
+ resumed_event_at: created_at + 2.minutes }
+ end
+
+ let(:session3) do # not finished yet. in progress
+ created_at = DateTime.parse('2025-03-04 00:00:00 UTC')
+ { session_id: 3, user_id: 1, project_id: 1, namespace_path: '1/2/', flow_type: 'code_review', environment: 'prod',
+ session_year: 2025,
+ created_event_at: created_at,
+ started_event_at: created_at + 1.second,
+ resumed_event_at: created_at + 9.minutes }
+ end
+
+ let(:all_data_rows) do
+ [session1, session2, session3]
+ end
+
+ it 'applies full range filter' do
+ request = Gitlab::Database::Aggregation::Request.new(
+ filters: [{ identifier: :session_id, values: 1..2 }],
+ metrics: [{ identifier: :total_count }]
+ )
+
+ expect(engine).to execute_aggregation(request).and_return([
+ { total_count: 2 }
+ ])
+ end
+
+ it 'applies 2.. range filter' do
+ request = Gitlab::Database::Aggregation::Request.new(
+ filters: [{ identifier: :session_id, values: 1..nil }],
+ metrics: [{ identifier: :total_count }]
+ )
+
+ expect(engine).to execute_aggregation(request).and_return([
+ { total_count: 3 }
+ ])
+ end
+
+ it 'applies ...3 range filter' do
+ request = Gitlab::Database::Aggregation::Request.new(
+ filters: [{ identifier: :session_id, values: nil...2 }],
+ metrics: [{ identifier: :total_count }]
+ )
+
+ expect(engine).to execute_aggregation(request).and_return([
+ { total_count: 1 }
+ ])
+ end
+end
diff --git a/spec/lib/gitlab/database/aggregation/click_house/rate_spec.rb b/spec/lib/gitlab/database/aggregation/click_house/rate_spec.rb
new file mode 100644
index 0000000000000000000000000000000000000000..e66bc42a5ddd8ba4a42abbaa6d58063f5274ea81
--- /dev/null
+++ b/spec/lib/gitlab/database/aggregation/click_house/rate_spec.rb
@@ -0,0 +1,79 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+RSpec.describe Gitlab::Database::Aggregation::ClickHouse::Rate, :click_house, feature_category: :database do
+ include_context 'with agent_platform_sessions ClickHouse aggregation engine'
+
+ let(:engine_definition) do
+ Gitlab::Database::Aggregation::ClickHouse::Engine.build do
+ self.table_name = 'agent_platform_sessions'
+ self.table_primary_key = %w[namespace_path user_id session_id flow_type]
+
+ metrics do
+ rate :completion, numerator_if: -> { Arel.sql('anyIfMerge(finished_event_at) IS NOT NULL') }
+ rate :finished_to_dropped,
+ denominator_if: -> { Arel.sql('anyIfMerge(dropped_event_at) IS NOT NULL') },
+ numerator_if: -> { Arel.sql('anyIfMerge(finished_event_at) IS NOT NULL') }
+ end
+ end
+ end
+
+ let(:session1) do # finished & long
+ created_at = DateTime.parse('2025-03-01 00:00:00 UTC')
+ { session_id: 1, user_id: 1, project_id: 1, namespace_path: '1/2/', flow_type: 'chat', environment: 'prod',
+ session_year: 2025,
+ created_event_at: created_at,
+ started_event_at: created_at + 1.second,
+ finished_event_at: created_at + 10.minutes,
+ resumed_event_at: created_at + 9.minutes }
+ end
+
+ let(:session2) do # finished & short
+ created_at = DateTime.parse('2025-03-02 00:00:00 UTC')
+ { session_id: 2, user_id: 2, project_id: 1, namespace_path: '1/2/', flow_type: 'chat', environment: 'prod',
+ session_year: 2025,
+ created_event_at: created_at,
+ started_event_at: created_at + 1.second,
+ finished_event_at: created_at + 3.minutes,
+ resumed_event_at: created_at + 2.minutes }
+ end
+
+ let(:session3) do # dropped
+ created_at = DateTime.parse('2025-03-04 00:00:00 UTC')
+ { session_id: 3, user_id: 1, project_id: 1, namespace_path: '1/2/', flow_type: 'code_review', environment: 'prod',
+ session_year: 2025,
+ created_event_at: created_at,
+ started_event_at: created_at + 1.second,
+ dropped_event_at: created_at + 10.minutes,
+ resumed_event_at: created_at + 9.minutes }
+ end
+
+ let(:all_data_rows) do
+ [session1, session2, session3]
+ end
+
+ describe "rate with numerator only" do
+ it 'returns numerator/total_count rate' do
+ request = Gitlab::Database::Aggregation::Request.new(
+ metrics: [{ identifier: :completion_rate }]
+ )
+
+ expect(engine).to execute_aggregation(request).and_return([
+ { completion_rate: 2.0 / 3 }
+ ])
+ end
+ end
+
+ describe "rate with numerator and denominator" do
+ it 'returns numerator/denominator rate' do
+ request = Gitlab::Database::Aggregation::Request.new(
+ metrics: [{ identifier: :finished_to_dropped_rate }]
+ )
+
+ expect(engine).to execute_aggregation(request).and_return([
+ { finished_to_dropped_rate: 2.0 }
+ ])
+ end
+ end
+end
diff --git a/spec/lib/gitlab/database/aggregation/definitions_collector_spec.rb b/spec/lib/gitlab/database/aggregation/definitions_collector_spec.rb
index 5baffb6e983efeb458ad3ba0ead453e91b10e29f..853f1ddee7b59ea46fe398e5b93a9e6f74bc2ef9 100644
--- a/spec/lib/gitlab/database/aggregation/definitions_collector_spec.rb
+++ b/spec/lib/gitlab/database/aggregation/definitions_collector_spec.rb
@@ -27,7 +27,7 @@ def initialize(*args)
foo(1, 2, 3)
end
- expect(collected_definitions.map(&:args)).to eq([[1, 2, 3]])
+ expect(collected_definitions.map(&:args)).to match_array([[1, 2, 3]])
end
it 'raises NoMethodError for unknown definition' do
diff --git a/spec/lib/gitlab/database/aggregation/engine_spec.rb b/spec/lib/gitlab/database/aggregation/engine_spec.rb
index d931408756694985e807e74de8594ab0fa1f51d4..e1b2a8f7f83515baf607d3430cf0fbffa8eee3ff 100644
--- a/spec/lib/gitlab/database/aggregation/engine_spec.rb
+++ b/spec/lib/gitlab/database/aggregation/engine_spec.rb
@@ -3,6 +3,44 @@
require 'spec_helper'
RSpec.describe Gitlab::Database::Aggregation::Engine, feature_category: :database do
+ let(:engine_klass) do
+ described_class.build do
+ def self.metrics_mapping
+ {
+ count: Gitlab::Database::Aggregation::PartDefinition
+ }
+ end
+
+ def self.dimensions_mapping
+ {
+ column: Gitlab::Database::Aggregation::PartDefinition
+ }
+ end
+
+ def self.filters_mapping
+ {
+ column: Gitlab::Database::Aggregation::PartDefinition
+ }
+ end
+
+ dimensions do
+ column :user_id, :integer
+ end
+
+ filters do
+ column :user_id, :integer
+ end
+
+ metrics do
+ count :total_count, :integer
+ end
+ end
+ end
+
+ it 'requires filters_mapping definition' do
+ expect(described_class).to require_method_definition(:filters_mapping)
+ end
+
it 'requires metrics_mapping definition' do
expect(described_class).to require_method_definition(:metrics_mapping)
end
@@ -15,27 +53,23 @@
expect(described_class.new(context: {})).to require_method_definition(:execute_query_plan, nil)
end
- describe 'duplicates validation' do
- let(:engine_klass) do
- Gitlab::Database::Aggregation::Engine.build do
- def self.metrics_mapping
- {
- count: Gitlab::Database::Aggregation::PartDefinition
- }
- end
-
- def self.dimensions_mapping
- {
- column: Gitlab::Database::Aggregation::PartDefinition
- }
- end
-
- dimensions do
- column :user_id, :integer
- end
- end
+ describe '.to_hash' do
+ it 'exposes the engine_definition definition' do
+ expect(engine_klass.to_hash).to match({
+ dimensions: array_including(
+ hash_including({ identifier: :user_id })
+ ),
+ metrics: array_including(
+ hash_including({ identifier: :total_count })
+ ),
+ filters: array_including(
+ hash_including({ identifier: :user_id })
+ )
+ })
end
+ end
+ describe 'duplicated definitions validation' do
it 'raises an exception if duplicate dimensions are defined' do
expect do
engine_klass.dimensions do
@@ -51,5 +85,13 @@ def self.dimensions_mapping
end
end.to raise_error("Identical engine parts found: [:user_id]. Engine parts identifiers must be unique.")
end
+
+ it 'raises an exception if duplicate filters are defined' do
+ expect do
+ engine_klass.filters do
+ column :user_id, :integer
+ end
+ end.to raise_error("Identical engine parts found: [:user_id]. Engine parts identifiers must be unique.")
+ end
end
end
diff --git a/spec/lib/gitlab/database/aggregation/formatter_spec.rb b/spec/lib/gitlab/database/aggregation/formatter_spec.rb
index 9d67cc407163069f1bd815cc311a790490695cb7..00ad697c5fd8b35651148dda99354b78e1ef3892 100644
--- a/spec/lib/gitlab/database/aggregation/formatter_spec.rb
+++ b/spec/lib/gitlab/database/aggregation/formatter_spec.rb
@@ -20,7 +20,7 @@ def self.metrics_mapping
end
end
- let(:plan) { Gitlab::Database::Aggregation::QueryPlan.build(request, engine.new(context: {})) }
+ let(:plan) { request.to_query_plan(engine.new(context: {})) }
let(:request) do
Gitlab::Database::Aggregation::Request.new(metrics: [{ identifier: :total_count },
{ identifier: :total_with_formatting_count }])
@@ -33,8 +33,7 @@ def self.metrics_mapping
end
it 'leaves unknown data untouched' do
- expect(formatter.format_data([{ 'dummy' => 3 }]))
- .to eq([{ 'dummy' => 3 }])
+ expect(formatter.format_data([{ 'dummy' => 3 }])).to eq([{ 'dummy' => 3 }])
end
end
end
diff --git a/spec/lib/gitlab/database/aggregation/graphql_aggregation_connection_spec.rb b/spec/lib/gitlab/database/aggregation/graphql_aggregation_connection_spec.rb
new file mode 100644
index 0000000000000000000000000000000000000000..9729a41a88aa0a0eb6806af5625e25525772c8b2
--- /dev/null
+++ b/spec/lib/gitlab/database/aggregation/graphql_aggregation_connection_spec.rb
@@ -0,0 +1,157 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+RSpec.describe Gitlab::Database::Aggregation::GraphqlAggregationConnection, :click_house, feature_category: :database do
+ include GraphqlHelpers
+ include ClickHouseHelpers
+
+ let_it_be(:user1) { create(:user) }
+ let_it_be(:user2) { create(:user) }
+ let_it_be(:user3) { create(:user) }
+ let_it_be(:event1) { create(:event, author: user1) }
+ let_it_be(:event2) { create(:event, author: user1) }
+ let_it_be(:event3) { create(:event, author: user2) }
+ let_it_be(:event4) { create(:event, author: user3) }
+ let_it_be(:event5) { create(:event, author: user3) }
+ let_it_be(:event6) { create(:event, author: user3) }
+
+ let(:arguments) { {} }
+ let(:ctx) { GraphQL::Query::Context.new(query: query_double, values: nil) }
+
+ let(:engine) { engine_definition.new(context: { scope: query_builder }) }
+ let(:query_builder) { query }
+
+ let(:aggregation_request) do
+ Gitlab::Database::Aggregation::Request.new(
+ dimensions: [{ identifier: :author_id }],
+ metrics: [{ identifier: :total_count }],
+ order: [{ identifier: :total_count, direction: :desc }]
+ )
+ end
+
+ let(:nodes) do
+ engine.execute(aggregation_request).payload[:data]
+ end
+
+ subject(:connection) do
+ described_class.new(nodes, **{ context: ctx, max_page_size: 10 }.merge(arguments))
+ end
+
+ context 'when using ClickHouse aggregation engine' do
+ let(:query) { ClickHouse::Client::QueryBuilder.new('events') }
+
+ let(:engine_definition) do
+ Gitlab::Database::Aggregation::ClickHouse::Engine.build do
+ self.table_primary_key = %w[id]
+
+ dimensions do
+ column :author_id, :integer
+ end
+
+ metrics do
+ count
+ end
+ end
+ end
+
+ before do
+ insert_events_into_click_house
+ end
+
+ describe 'forward pagination' do
+ context 'when requesting the first page' do
+ let(:arguments) { { first: 2 } }
+
+ it 'returns the top 2 records' do
+ expect(connection.nodes).to match([
+ a_hash_including("author_id" => user3.id, "total_count" => 3),
+ a_hash_including("author_id" => user1.id, "total_count" => 2)
+ ])
+ end
+
+ it 'sets correct page_info' do
+ expect(connection.has_next_page).to be true
+ expect(connection.has_previous_page).to be false
+ end
+
+ it 'generates correct cursors' do
+ node_1 = connection.nodes.first
+ expect(connection.cursor_for(node_1)).to eq(encode_cursor(0))
+ end
+ end
+
+ context 'when paginating with "after"' do
+ let(:arguments) { { first: 2, after: encode_cursor(0) } }
+
+ it 'returns the next records' do
+ expect(connection.nodes).to match([
+ a_hash_including("author_id" => user1.id, "total_count" => 2),
+ a_hash_including("author_id" => user2.id, "total_count" => 1)
+ ])
+ end
+
+ it 'sets correct page_info' do
+ expect(connection.has_next_page).to be false
+ expect(connection.has_previous_page).to be true
+ end
+ end
+
+ context 'when "after" is the last item' do
+ let(:arguments) { { first: 2, after: encode_cursor(2) } }
+
+ it 'returns empty list' do
+ expect(connection.nodes).to be_empty
+ expect(connection.has_next_page).to be false
+ expect(connection.has_previous_page).to be true
+ end
+ end
+ end
+
+ describe 'backward pagination' do
+ context 'when using last and before' do
+ let(:arguments) { { last: 1, before: encode_cursor(2) } }
+
+ it 'returns the preceding record' do
+ expect(connection.nodes).to match([
+ a_hash_including("author_id" => user1.id)
+ ])
+ end
+
+ it 'sets correct page_info' do
+ expect(connection.has_next_page).to be true
+ expect(connection.has_previous_page).to be true
+ end
+ end
+
+ context 'when "last" is larger than available "before" records' do
+ let(:arguments) { { last: 10, before: encode_cursor(2) } }
+
+ it 'returns all preceding records up to the start' do
+ expect(connection.nodes).to match([
+ a_hash_including("author_id" => user3.id),
+ a_hash_including("author_id" => user1.id)
+ ])
+ end
+
+ it 'indicates no previous page' do
+ expect(connection.has_previous_page).to be false
+ expect(connection.has_next_page).to be true
+ end
+ end
+
+ context 'when "last" is provided without "before"' do
+ let(:arguments) { { last: 2 } }
+
+ it 'raises an execution error' do
+ expect { connection.nodes }
+ .to raise_error(GraphQL::ExecutionError, /Argument 'last' can only be used in conjunction with 'before'/)
+ end
+ end
+ end
+ end
+
+ def encode_cursor(value)
+ GitlabSchema.cursor_encoder.encode(value.to_s, nonce: true)
+ end
+end
diff --git a/spec/lib/gitlab/database/aggregation/query_plan_spec.rb b/spec/lib/gitlab/database/aggregation/query_plan_spec.rb
new file mode 100644
index 0000000000000000000000000000000000000000..64a80963fec6a484e4888e448bde5482f301e3ec
--- /dev/null
+++ b/spec/lib/gitlab/database/aggregation/query_plan_spec.rb
@@ -0,0 +1,150 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+RSpec.describe Gitlab::Database::Aggregation::QueryPlan, feature_category: :database do
+ let_it_be(:engine_definition) do
+ Gitlab::Database::Aggregation::Engine.build do
+ def self.dimensions_mapping
+ {
+ column: Gitlab::Database::Aggregation::PartDefinition
+ }
+ end
+
+ def self.metrics_mapping
+ {
+ count: Gitlab::Database::Aggregation::PartDefinition
+ }
+ end
+
+ def self.filters_mapping
+ {
+ exact_match: Gitlab::Database::Aggregation::PartDefinition
+ }
+ end
+
+ dimensions do
+ column :state_id, :integer
+ end
+
+ metrics do
+ count :total_count, :integer
+ end
+
+ filters do
+ exact_match :state_id, :integer
+ end
+ end
+ end
+
+ let(:engine) { engine_definition.new(context: { scope: MergeRequest.all }) }
+
+ describe 'validations' do
+ context 'when filter cannot be found' do
+ it 'returns error' do
+ request = Gitlab::Database::Aggregation::Request.new(
+ filters: [{ identifier: :missing_identfier }],
+ metrics: [{ identifier: :total_count }]
+ )
+
+ query_plan = described_class.new(engine, request)
+
+ expect(query_plan).not_to be_valid
+ expect(query_plan.errors.to_a).to include(
+ a_string_matching(/identifier is not available: 'missing_identfier'/)
+ )
+ end
+ end
+
+ context 'when dimension cannot be found' do
+ it 'returns error' do
+ request = Gitlab::Database::Aggregation::Request.new(
+ dimensions: [{ identifier: :missing_identfier }],
+ metrics: [{ identifier: :total_count }]
+ )
+
+ query_plan = described_class.new(engine, request)
+
+ expect(query_plan).not_to be_valid
+ expect(query_plan.errors.to_a).to include(
+ a_string_matching(/identifier is not available: 'missing_identfier'/)
+ )
+ end
+ end
+
+ context 'when metric cannot be found' do
+ it 'returns error' do
+ request = Gitlab::Database::Aggregation::Request.new(
+ dimensions: [{ identifier: :state_id }],
+ metrics: [{ identifier: :missing_identfier }]
+ )
+
+ query_plan = described_class.new(engine, request)
+
+ expect(query_plan).not_to be_valid
+ expect(query_plan.errors.to_a).to include(
+ a_string_matching(/identifier is not available: 'missing_identfier'/)
+ )
+ end
+ end
+
+ context 'when no metric is passed in' do
+ it 'returns error' do
+ request = Gitlab::Database::Aggregation::Request.new(
+ dimensions: [{ identifier: :state_id }],
+ metrics: []
+ )
+
+ query_plan = described_class.new(engine, request)
+
+ expect(query_plan).not_to be_valid
+ expect(query_plan.errors.to_a).to include(a_string_matching(/at least one metric is required/))
+ end
+ end
+
+ context 'when order cannot be found' do
+ it 'returns error' do
+ request = Gitlab::Database::Aggregation::Request.new(
+ dimensions: [{ identifier: :state_id }],
+ metrics: [{ identifier: :total_count }],
+ order: [{ identifier: :missing_identfier, direction: :asc }]
+ )
+
+ query_plan = described_class.new(engine, request)
+
+ expect(query_plan).not_to be_valid
+ expect(query_plan.errors.to_a).to include(
+ a_string_matching(/identifier is not available: 'missing_identfier'/)
+ )
+ end
+ end
+
+ context 'when duplicated dimensions are passed in' do
+ it 'returns error' do
+ request = Gitlab::Database::Aggregation::Request.new(
+ dimensions: [{ identifier: :state_id }, { identifier: :state_id }],
+ metrics: [{ identifier: :total_count }]
+ )
+
+ query_plan = described_class.new(engine, request)
+
+ expect(query_plan).not_to be_valid
+ expect(query_plan.errors.to_a).to include(a_string_matching(/duplicated identifiers found: state_id/))
+ end
+ end
+
+ context 'when duplicated metrics are passed in' do
+ it 'returns error' do
+ request = Gitlab::Database::Aggregation::Request.new(
+ dimensions: [{ identifier: :state_id }],
+ metrics: [{ identifier: :total_count }, { identifier: :total_count }]
+ )
+
+ query_plan = described_class.new(engine, request)
+
+ expect(query_plan).not_to be_valid
+ expect(query_plan.errors.to_a).to include(a_string_matching(/duplicated identifiers found: total_count/))
+ end
+ end
+ end
+end
diff --git a/spec/support/helpers/click_house_helpers.rb b/spec/support/helpers/click_house_helpers.rb
index 12574a42d32d79146e004d97d25b5a40a36075cf..392fbf2b72bb866ea320603d56e11fbbcb5e4c36 100644
--- a/spec/support/helpers/click_house_helpers.rb
+++ b/spec/support/helpers/click_house_helpers.rb
@@ -67,7 +67,7 @@ def self.default_timezone
ActiveRecord.default_timezone
end
- def clickhouse_fixture(table, data, db = :main)
+ def clickhouse_fixture(table, data, db = :main, &block)
return if data.empty?
if data.map { |row| row.keys.sort }.uniq.size > 1
@@ -77,15 +77,21 @@ def clickhouse_fixture(table, data, db = :main)
structure = data.first.keys
rows = data.map do |row|
- cols = structure.map do |col|
+ structure.map do |col|
val = row[col].is_a?(Hash) ? row[col].to_json : row[col]
- ClickHouseHelpers.quote(val)
+ val.is_a?(Arel::Nodes::SqlLiteral) ? val : ClickHouseHelpers.quote(val)
end
-
- "(#{cols.join(', ')})"
end
- query = "INSERT INTO #{table} (#{structure.join(', ')}) VALUES #{rows.join(',')}"
+ query = if block
+ yield(rows, structure)
+ else
+ values_data = rows.map do |cols|
+ "(#{cols.join(', ')})"
+ end.join(',')
+
+ "INSERT INTO #{table} (#{structure.join(', ')}) VALUES #{values_data}"
+ end
ClickHouse::Client.execute(query, db)
end
diff --git a/spec/support/matchers/database/aggregations/execute_aggregation_matcher.rb b/spec/support/matchers/database/aggregations/execute_aggregation_matcher.rb
index ebd60dc195aae8e33bf1a9d844090766de903d5e..7a23c901ea292d2a58822d6494d5da831cbc226e 100644
--- a/spec/support/matchers/database/aggregations/execute_aggregation_matcher.rb
+++ b/spec/support/matchers/database/aggregations/execute_aggregation_matcher.rb
@@ -3,7 +3,7 @@
# Matcher specific to Gitlab::Database::Aggregation::Engine class testing
RSpec::Matchers.define :execute_aggregation do |request|
chain :and_return do |expected_data|
- @expected_data = expected_data.map(&:deep_stringify_keys)
+ @expected_data = expected_data
end
chain :with_errors do |expected_errors|
@@ -13,7 +13,7 @@
match do |engine|
response = engine.execute(request)
- @actual_data = response[:data]&.to_a
+ @actual_data = response[:data]&.to_a&.map(&:with_indifferent_access)
@actual_errors = response[:errors]&.to_a
if @expected_data
@@ -25,8 +25,15 @@
failure_message do |engine|
if @expected_data
- "expected #{engine.class} to execute aggregation and return #{@expected_data.inspect}, " \
- "but got #{@actual_data.inspect}"
+ message = "expected #{engine.class} to execute aggregation and return #{@expected_data.inspect}, "
+ if @actual_data
+ message << "but got #{@actual_data.inspect}"
+ else
+ message << 'but got no data'
+ message << " and errors #{@actual_errors.inspect}" if @actual_errors
+ end
+
+ message
elsif @expected_errors
"expected #{engine.class} to execute aggregation with errors #{@expected_errors.inspect}, " \
"but got #{@actual_errors.inspect}"
diff --git a/spec/support/shared_contexts/lib/gitlab/database/aggregation/clickhouse_engine_shared_context.rb b/spec/support/shared_contexts/lib/gitlab/database/aggregation/clickhouse_engine_shared_context.rb
new file mode 100644
index 0000000000000000000000000000000000000000..c2a3234740cf48fb5b258c05eedb484414244dca
--- /dev/null
+++ b/spec/support/shared_contexts/lib/gitlab/database/aggregation/clickhouse_engine_shared_context.rb
@@ -0,0 +1,62 @@
+# frozen_string_literal: true
+
+RSpec.shared_context 'with agent_platform_sessions ClickHouse aggregation engine' do
+ include ClickHouseHelpers
+
+ let(:engine) { engine_definition.new(context: { scope: query_builder }) }
+ let(:query_builder) { ClickHouse::Client::QueryBuilder.new(engine_definition.table_name) }
+
+ let(:row_structure) do
+ %i[user_id namespace_path project_id session_id flow_type environment session_year created_event_at
+ started_event_at finished_event_at dropped_event_at stopped_event_at resumed_event_at]
+ end
+
+ let(:prepared_all_data_rows) do
+ all_data_rows.map do |row|
+ row_structure.to_h do |key|
+ value = row.fetch(key, nil)
+ if key.to_s.end_with?('_at') # timestamp
+ value = if value
+ Arel.sql("parseDateTime64BestEffort('#{value}', 6, 'UTC')")
+ else
+ Arel.sql("CAST(NULL AS Nullable(DateTime64(6, 'UTC')))")
+ end
+ end
+
+ [key, value]
+ end
+ end
+ end
+
+ before do
+ clickhouse_fixture(engine_definition.table_name, prepared_all_data_rows) do |rows, structure|
+ subselects = rows.map do |row|
+ fields = structure.map.with_index do |field, i|
+ "#{row[i]} AS #{field}"
+ end
+
+ "SELECT #{fields.join(', ')}"
+ end.join(' UNION ALL ')
+
+ <<-SQL
+ INSERT INTO agent_platform_sessions
+ SELECT
+ user_id,
+ namespace_path,
+ project_id,
+ session_id,
+ flow_type,
+ environment,
+ session_year,
+ anyIfState(toNullable(created_event_at), created_event_at IS NOT NULL) as created_event_at,
+ anyIfState(toNullable(started_event_at), started_event_at IS NOT NULL) as started_event_at,
+ anyIfState(toNullable(finished_event_at), finished_event_at IS NOT NULL) as finished_event_at,
+ anyIfState(toNullable(dropped_event_at), dropped_event_at IS NOT NULL) as dropped_event_at,
+ anyIfState(toNullable(stopped_event_at), stopped_event_at IS NOT NULL) as stopped_event_at,
+ anyIfState(toNullable(resumed_event_at), resumed_event_at IS NOT NULL) as resumed_event_at
+ FROM (#{subselects})
+ GROUP BY user_id, namespace_path, project_id, session_id, flow_type, environment, session_year
+ SQL
+ end
+ end
+end