diff --git a/Gemfile b/Gemfile index 0dc1f94d18badafb25bad9d832a2cd5849cc44d7..0d2de378fda01fab636a31ff1d9bf706065e2867 100644 --- a/Gemfile +++ b/Gemfile @@ -380,7 +380,7 @@ gem 'rack-proxy', '~> 0.7.7', feature_category: :shared gem 'cssbundling-rails', '1.4.3', feature_category: :shared gem 'terser', '1.0.2', feature_category: :shared -gem 'click_house-client', '0.5.1', feature_category: :database +gem 'click_house-client', '0.7.0', feature_category: :database gem 'addressable', '~> 2.8', feature_category: :shared gem 'gon', '~> 6.5.0', feature_category: :shared gem 'request_store', '~> 1.7.0', feature_category: :shared diff --git a/Gemfile.checksum b/Gemfile.checksum index 995ec1dc54b1ac46dd0c88f1afbacf3de8251f92..b2907b1639576565ab03adcbeff8f86df901bfb4 100644 --- a/Gemfile.checksum +++ b/Gemfile.checksum @@ -80,7 +80,7 @@ {"name":"citrus","version":"3.0.2","platform":"ruby","checksum":"4ec2412fc389ad186735f4baee1460f7900a8e130ffe3f216b30d4f9c684f650"}, {"name":"claide","version":"1.1.0","platform":"ruby","checksum":"6d3c5c089dde904d96aa30e73306d0d4bd444b1accb9b3125ce14a3c0183f82e"}, {"name":"claide-plugins","version":"0.9.2","platform":"ruby","checksum":"c7ea78bc067ab23bce8515497cdcdcb8f01c86dadfbe13c44644e382922c1c2e"}, -{"name":"click_house-client","version":"0.5.1","platform":"ruby","checksum":"10022af0f6ae529daa3ae32e86ca62f5a236fa5fdb7893f1bd4a3133137e2d69"}, +{"name":"click_house-client","version":"0.7.0","platform":"ruby","checksum":"87df5b4ec4c757926eb570141ba618f1f5200a83494cebed49f04e671aa833c2"}, {"name":"coderay","version":"1.1.3","platform":"ruby","checksum":"dc530018a4684512f8f38143cd2a096c9f02a1fc2459edcfe534787a7fc77d4b"}, {"name":"coercible","version":"1.0.0","platform":"ruby","checksum":"5081ad24352cc8435ce5472bc2faa30260c7ea7f2102cc6a9f167c4d9bffaadc"}, {"name":"colored2","version":"3.1.2","platform":"ruby","checksum":"b13c2bd7eeae2cf7356a62501d398e72fde78780bd26aec6a979578293c28b4a"}, diff --git a/Gemfile.lock b/Gemfile.lock index 7304351193b935df9f8555f1b6e2ad837e381a55..5ffa9df019d959eba694eeca5568eb33c0eee08b 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -431,7 +431,7 @@ GEM cork nap open4 (~> 1.3) - click_house-client (0.5.1) + click_house-client (0.7.0) activerecord (>= 7.0, < 9.0) activesupport (>= 7.0, < 9.0) addressable (~> 2.8) @@ -2105,7 +2105,7 @@ DEPENDENCIES carrierwave (~> 1.3) charlock_holmes (~> 0.7.9) circuitbox (= 2.0.0) - click_house-client (= 0.5.1) + click_house-client (= 0.7.0) commonmarker (~> 0.23.10) concurrent-ruby (~> 1.1) connection_pool (~> 2.5.3) diff --git a/Gemfile.next.checksum b/Gemfile.next.checksum index c3393183a2d052ce7898368b142e92752a996d12..8d45f1b9843fa1e9c1ea8f23ed770e396b94fe88 100644 --- a/Gemfile.next.checksum +++ b/Gemfile.next.checksum @@ -80,7 +80,7 @@ {"name":"citrus","version":"3.0.2","platform":"ruby","checksum":"4ec2412fc389ad186735f4baee1460f7900a8e130ffe3f216b30d4f9c684f650"}, {"name":"claide","version":"1.1.0","platform":"ruby","checksum":"6d3c5c089dde904d96aa30e73306d0d4bd444b1accb9b3125ce14a3c0183f82e"}, {"name":"claide-plugins","version":"0.9.2","platform":"ruby","checksum":"c7ea78bc067ab23bce8515497cdcdcb8f01c86dadfbe13c44644e382922c1c2e"}, -{"name":"click_house-client","version":"0.5.1","platform":"ruby","checksum":"10022af0f6ae529daa3ae32e86ca62f5a236fa5fdb7893f1bd4a3133137e2d69"}, +{"name":"click_house-client","version":"0.7.0","platform":"ruby","checksum":"87df5b4ec4c757926eb570141ba618f1f5200a83494cebed49f04e671aa833c2"}, {"name":"coderay","version":"1.1.3","platform":"ruby","checksum":"dc530018a4684512f8f38143cd2a096c9f02a1fc2459edcfe534787a7fc77d4b"}, {"name":"coercible","version":"1.0.0","platform":"ruby","checksum":"5081ad24352cc8435ce5472bc2faa30260c7ea7f2102cc6a9f167c4d9bffaadc"}, {"name":"colored2","version":"3.1.2","platform":"ruby","checksum":"b13c2bd7eeae2cf7356a62501d398e72fde78780bd26aec6a979578293c28b4a"}, diff --git a/Gemfile.next.lock b/Gemfile.next.lock index 4411ffd7ab45109beb3d5e09cf0d78f051460860..b175accce7509bb18a420ef1bd56b978d163e3a0 100644 --- a/Gemfile.next.lock +++ b/Gemfile.next.lock @@ -425,7 +425,7 @@ GEM cork nap open4 (~> 1.3) - click_house-client (0.5.1) + click_house-client (0.7.0) activerecord (>= 7.0, < 9.0) activesupport (>= 7.0, < 9.0) addressable (~> 2.8) @@ -2100,7 +2100,7 @@ DEPENDENCIES carrierwave (~> 1.3) charlock_holmes (~> 0.7.9) circuitbox (= 2.0.0) - click_house-client (= 0.5.1) + click_house-client (= 0.7.0) commonmarker (~> 0.23.10) concurrent-ruby (~> 1.1) connection_pool (~> 2.5.3) diff --git a/app/graphql/resolvers/ci/job_analytics_resolver.rb b/app/graphql/resolvers/ci/job_analytics_resolver.rb new file mode 100644 index 0000000000000000000000000000000000000000..2c3767f9a3314de35fecf3dbd0dab8222491364f --- /dev/null +++ b/app/graphql/resolvers/ci/job_analytics_resolver.rb @@ -0,0 +1,68 @@ +# frozen_string_literal: true + +module Resolvers + module Ci + class JobAnalyticsResolver < BaseResolver + type ::Types::Ci::JobAnalyticsType.connection_type, null: true + authorize :read_build + + argument :select_fields, + [Types::Ci::JobAnalyticsFieldEnum], + required: true, + default_value: [:name], + description: 'Fields to select and group by.' + + argument :aggregations, + [Types::Ci::JobAnalyticsAggregationEnum], + required: true, + default_value: [:mean_duration_in_seconds, :rate_of_failed, :p95_duration_in_seconds], + description: 'Aggregation functions to apply.' + + argument :name_search, + GraphQL::Types::String, + required: false, + description: 'Search by name of the pipeline jobs. Supports partial matches.' + + argument :sort, + Types::Ci::JobAnalyticsSortEnum, + required: false, + description: 'Sort order for the results.' + + argument :source, Types::Ci::PipelineSourcesEnum, + required: false, + description: 'Source of the pipeline.' + + argument :ref, GraphQL::Types::String, + required: false, + description: 'Branch that triggered the pipeline.' + + argument :from_time, Types::TimeType, + required: false, + description: + 'Start of the requested time (in UTC). Defaults to the pipelines started in the past week.' + + argument :to_time, Types::TimeType, + required: false, + description: + 'End of the requested time (in UTC). Defaults to the pipelines started before the current date.' + + def resolve(**args) + context[:connection] = ClickHouse::Connection.new(:main) + + validator_response = ::Ci::JobAnalytics::ValidatorService.new( + args[:select_fields], + args[:aggregations], + args[:sort] + ).execute + + return [errors: validator_response.errors] unless validator_response.success? + + finder = ::Ci::JobAnalytics::FinderService.new(object, args).execute + + ::Gitlab::Graphql::Pagination::ClickHouseAggregatedRelation.new(finder) + rescue ArgumentError => e + raise Gitlab::Graphql::Errors::ArgumentError, e.message + end + end + end +end diff --git a/app/graphql/types/ci/job_analytics_aggregation_enum.rb b/app/graphql/types/ci/job_analytics_aggregation_enum.rb new file mode 100644 index 0000000000000000000000000000000000000000..cd9489625de6aa6b754d23c9296639285ea9b130 --- /dev/null +++ b/app/graphql/types/ci/job_analytics_aggregation_enum.rb @@ -0,0 +1,30 @@ +# frozen_string_literal: true + +module Types + module Ci + class JobAnalyticsAggregationEnum < BaseEnum + graphql_name 'CiJobAnalyticsAggregation' + description 'Aggregation functions available for CI job analytics' + + value 'MEAN_DURATION_IN_SECONDS', + value: :mean_duration_in_seconds, + description: 'Average duration of jobs in seconds.' + + value 'P95_DURATION_IN_SECONDS', + value: :p95_duration_in_seconds, + description: '95th percentile duration of jobs in seconds.' + + value 'RATE_OF_SUCCESS', + value: :rate_of_success, + description: 'Percentage of successful jobs.' + + value 'RATE_OF_FAILED', + value: :rate_of_failed, + description: 'Percentage of failed jobs.' + + value 'RATE_OF_CANCELED', + value: :rate_of_canceled, + description: 'Percentage of canceled jobs.' + end + end +end diff --git a/app/graphql/types/ci/job_analytics_field_enum.rb b/app/graphql/types/ci/job_analytics_field_enum.rb new file mode 100644 index 0000000000000000000000000000000000000000..d7eba5c4ecb1ac5eee65e65df07c3fe2e799c9c6 --- /dev/null +++ b/app/graphql/types/ci/job_analytics_field_enum.rb @@ -0,0 +1,13 @@ +# frozen_string_literal: true + +module Types + module Ci + class JobAnalyticsFieldEnum < BaseEnum + graphql_name 'CiJobAnalyticsField' + description 'Fields available for selection in CI job analytics' + + value 'NAME', value: :name, description: 'Job name.' + value 'STAGE', value: :stage_id, description: 'Stage.' + end + end +end diff --git a/app/graphql/types/ci/job_analytics_sort_enum.rb b/app/graphql/types/ci/job_analytics_sort_enum.rb new file mode 100644 index 0000000000000000000000000000000000000000..4457634742e5e5fae2998eb040a90269e28dc5a1 --- /dev/null +++ b/app/graphql/types/ci/job_analytics_sort_enum.rb @@ -0,0 +1,59 @@ +# frozen_string_literal: true + +module Types + module Ci + class JobAnalyticsSortEnum < BaseEnum + graphql_name 'CiJobAnalyticsSort' + description 'Values for sorting CI job analytics' + + value 'NAME_ASC', 'Sort by name in ascending order.', value: :name_asc + value 'NAME_DESC', 'Sort by name in descending order.', value: :name_desc + + value 'MEAN_DURATION_ASC', + 'Sort by mean duration in ascending order.', + value: :mean_duration_in_seconds_asc + + value 'MEAN_DURATION_DESC', + 'Sort by mean duration in descending order.', + value: :mean_duration_in_seconds_desc + + value 'P95_DURATION_ASC', + 'Sort by 95th percentile duration in ascending order.', + value: :p95_duration_in_seconds_asc + + value 'P95_DURATION_DESC', + 'Sort by 95th percentile duration in descending order.', + value: :p95_duration_in_seconds_desc + + value 'SUCCESS_RATE_ASC', + 'Sort by success rate in ascending order.', + value: :rate_of_success_asc + + value 'SUCCESS_RATE_DESC', + 'Sort by success rate in descending order.', + value: :rate_of_success_desc + + value 'FAILED_RATE_ASC', + 'Sort by success rate in ascending order.', + value: :rate_of_failed_asc + + value 'FAILED_RATE_DESC', + 'Sort by success rate in descending order.', + value: :rate_of_failed_desc + + value 'CANCELED_RATE_ASC', + 'Sort by success rate in ascending order.', + value: :rate_of_canceled_asc + + value 'CANCELED_RATE_DESC', + 'Sort by success rate in descending order.', + value: :rate_of_canceled_desc + + def self.extract_sort_info(value) + value.match(/(?.*)_(?.*)/) => {field:, dir:} + + [field.to_sym, dir.to_sym] + end + end + end +end diff --git a/app/graphql/types/ci/job_analytics_type.rb b/app/graphql/types/ci/job_analytics_type.rb new file mode 100644 index 0000000000000000000000000000000000000000..a58cea5694b47f09bcc7b6afdc9f81f3f21fcc60 --- /dev/null +++ b/app/graphql/types/ci/job_analytics_type.rb @@ -0,0 +1,53 @@ +# frozen_string_literal: true + +module Types + module Ci + class JobAnalyticsType < BaseObject # rubocop:disable Graphql/AuthorizeTypes -- This is authorized by the resolver + graphql_name 'CiJobAnalytics' + description 'CI job analytics data' + + field :name, GraphQL::Types::String, + null: true, + description: 'Job name.' + + field :stage, Types::Ci::StageType, + null: true, + description: 'Stage information.' + + field :mean_duration_in_seconds, GraphQL::Types::Float, + null: true, + description: 'Average duration of jobs in seconds.' + + field :p95_duration_in_seconds, GraphQL::Types::Float, + null: true, + description: '95th percentile duration of jobs in seconds.' + + # rubocop:disable GraphQL/ExtractType -- this type is based on hash data, not an ActiveRecord model + # So it creates friction to both code and the design of the API + + field :rate_of_success, GraphQL::Types::Float, + null: true, + description: 'Percentage of successful jobs.' + + field :rate_of_failed, GraphQL::Types::Float, + null: true, + description: 'Percentage of failed jobs.' + + field :rate_of_canceled, GraphQL::Types::Float, + null: true, + description: 'Percentage of canceled jobs.' + + # rubocop:enable GraphQL/ExtractType + + def stage + return if (stage_id = object['stage_id']).nil? || stage_id.to_i == 0 + + BatchLoader::GraphQL.for(stage_id).batch do |stage_ids, loader| + ::Ci::Stage.id_in(stage_ids).with_pipeline.each do |stage| + loader.call(stage.id, stage) + end + end + end + end + end +end diff --git a/app/graphql/types/project_type.rb b/app/graphql/types/project_type.rb index 7b6dc896b985344ae4a5d560960751d540ce87b2..864eb8d444e157ec509be0ab86209ca7e9cdc22e 100644 --- a/app/graphql/types/project_type.rb +++ b/app/graphql/types/project_type.rb @@ -435,6 +435,12 @@ def self.authorization_scopes description: 'ID of the job.' end + field :job_analytics, + resolver: Resolvers::Ci::JobAnalyticsResolver, + description: 'CI job analytics for the project. Available only when ClickHouse is configured.', + experiment: { milestone: '18.3' }, + authorize: :read_build + field :pipelines, null: true, calls_gitaly: true, diff --git a/app/models/ci/stage.rb b/app/models/ci/stage.rb index f330858f46463ca03cb5524a43e345e3f18f7244..11d9a32f48e3a7e00302caa3d0b6ce5726823319 100644 --- a/app/models/ci/stage.rb +++ b/app/models/ci/stage.rb @@ -65,6 +65,7 @@ class Stage < Ci::ApplicationRecord scope :in_pipelines, ->(pipelines) { where(pipeline: pipelines) } scope :by_name, ->(names) { where(name: names) } scope :by_position, ->(positions) { where(position: positions) } + scope :with_pipeline, -> { preload(:pipeline) } with_options unless: :importing? do validates :project, presence: true diff --git a/app/services/ci/job_analytics/finder_service.rb b/app/services/ci/job_analytics/finder_service.rb new file mode 100644 index 0000000000000000000000000000000000000000..d76380bf84ac585f70fcb2367172c2b32b0a1472 --- /dev/null +++ b/app/services/ci/job_analytics/finder_service.rb @@ -0,0 +1,61 @@ +# frozen_string_literal: true + +module Ci + module JobAnalytics + class FinderService + attr_reader :project, :select_fields, :aggregations, :sort, :source, :ref, :from_time, :to_time, :name_search + + # @param project [Project] The project to find jobs for + # @param options [Hash] Options for filtering and configuring the finder + # @option options [Array] :select_fields Fields to select + # @option options [Array] :aggregations Aggregations to perform + # @option options [String] :sort Sort order + # @option options [String] :source Pipeline source + # @option options [String] :ref Git reference + # @option options [Time] :from_time Start time for filtering (defaults to 7 days ago) + # @option options [Time] :to_time End time for filtering + # @option options [String] :name_search Search by name of the pipeline jobs. + def initialize(project, options = {}) + @project = project + @select_fields = options[:select_fields] || [] + @aggregations = options[:aggregations] || [] + @sort = options[:sort] + @source = options[:source] + @ref = options[:ref] + @from_time = options[:from_time] || 7.days.ago + @to_time = options[:to_time] + @name_search = options[:name_search] + end + + def execute + build_finder.query_builder + end + + private + + def build_finder + finder = ::ClickHouse::Finders::Ci::FinishedBuildsFinder.new + .for_project(project.id) + .select(*select_fields) + .select_aggregations(*aggregations) + + finder = finder.order_by(*Types::Ci::JobAnalyticsSortEnum.extract_sort_info(sort)) if sort + + finder = finder.filter_by_job_name(name_search) if name_search + + finder.where(pipeline_id: build_pipeline_finder.query_builder) # rubocop:disable CodeReuse/ActiveRecord -- not an active record where + end + + def build_pipeline_finder + finder = ::ClickHouse::Models::Ci::FinishedPipelinesBase.for_container(project).within_dates( + from_time, to_time) + + finder = finder.for_source(source) if source + + finder = finder.for_ref(ref) if ref + + finder.select(:id) + end + end + end +end diff --git a/app/services/ci/job_analytics/validator_service.rb b/app/services/ci/job_analytics/validator_service.rb new file mode 100644 index 0000000000000000000000000000000000000000..7e80abb9c39a6707a3fe852e0aa4fca6fe35f7db --- /dev/null +++ b/app/services/ci/job_analytics/validator_service.rb @@ -0,0 +1,33 @@ +# frozen_string_literal: true + +module Ci + module JobAnalytics + class ValidatorService + attr_reader :select_fields, :aggregations, :sort + + def initialize(select_fields, aggregations, sort) + @select_fields = select_fields + @aggregations = aggregations + @sort = sort + end + + def execute + validate_arguments! + end + + private + + def validate_arguments! + return ServiceResponse.success unless sort + + field, _ = Types::Ci::JobAnalyticsSortEnum.extract_sort_info(sort) + + return ServiceResponse.success if aggregations.include?(field) || select_fields.include?(field) + + ServiceResponse.error( + message: "Cannot sort by #{field} without including it in either selectFields or aggregations" + ) + end + end + end +end diff --git a/doc/api/graphql/reference/_index.md b/doc/api/graphql/reference/_index.md index 23d76d6235b55df6b894e9dc9ab014d0b3506523..cd515e44f49d958bdab97d3f80ef65bac14bf6b0 100644 --- a/doc/api/graphql/reference/_index.md +++ b/doc/api/graphql/reference/_index.md @@ -15870,6 +15870,29 @@ The edge type for [`CiInstanceVariable`](#ciinstancevariable). | `cursor` | [`String!`](#string) | A cursor for use in pagination. | | `node` | [`CiInstanceVariable`](#ciinstancevariable) | The item at the end of the edge. | +#### `CiJobAnalyticsConnection` + +The connection type for [`CiJobAnalytics`](#cijobanalytics). + +##### Fields + +| Name | Type | Description | +| ---- | ---- | ----------- | +| `edges` | [`[CiJobAnalyticsEdge]`](#cijobanalyticsedge) | A list of edges. | +| `nodes` | [`[CiJobAnalytics]`](#cijobanalytics) | A list of nodes. | +| `pageInfo` | [`PageInfo!`](#pageinfo) | Information to aid in pagination. | + +#### `CiJobAnalyticsEdge` + +The edge type for [`CiJobAnalytics`](#cijobanalytics). + +##### Fields + +| Name | Type | Description | +| ---- | ---- | ----------- | +| `cursor` | [`String!`](#string) | A cursor for use in pagination. | +| `node` | [`CiJobAnalytics`](#cijobanalytics) | The item at the end of the edge. | + #### `CiJobArtifactConnection` The connection type for [`CiJobArtifact`](#cijobartifact). @@ -25197,6 +25220,22 @@ CI/CD variables for a GitLab instance. | `userPermissions` | [`JobPermissions!`](#jobpermissions) | Permissions for the current user on the resource. | | `webPath` | [`String`](#string) | Web path of the job. | +### `CiJobAnalytics` + +CI job analytics data. + +#### Fields + +| Name | Type | Description | +| ---- | ---- | ----------- | +| `meanDurationInSeconds` | [`Float`](#float) | Average duration of jobs in seconds. | +| `name` | [`String`](#string) | Job name. | +| `p95DurationInSeconds` | [`Float`](#float) | 95th percentile duration of jobs in seconds. | +| `rateOfCanceled` | [`Float`](#float) | Percentage of canceled jobs. | +| `rateOfFailed` | [`Float`](#float) | Percentage of failed jobs. | +| `rateOfSuccess` | [`Float`](#float) | Percentage of successful jobs. | +| `stage` | [`CiStage`](#cistage) | Stage information. | + ### `CiJobArtifact` #### Fields @@ -39464,6 +39503,34 @@ Returns [`CiJob`](#cijob). | ---- | ---- | ----------- | | `id` | [`JobID!`](#jobid) | ID of the job. | +##### `Project.jobAnalytics` + +{{< details >}} +**Introduced** in GitLab 18.3. +**Status**: Experiment. +{{< /details >}} + +CI job analytics for the project. Available only when ClickHouse is configured. + +Returns [`CiJobAnalyticsConnection`](#cijobanalyticsconnection). + +This field returns a [connection](#connections). It accepts the +four standard [pagination arguments](#pagination-arguments): +`before: String`, `after: String`, `first: Int`, and `last: Int`. + +###### Arguments + +| Name | Type | Description | +| ---- | ---- | ----------- | +| `aggregations` | [`[CiJobAnalyticsAggregation!]!`](#cijobanalyticsaggregation) | Aggregation functions to apply. | +| `fromTime` | [`Time`](#time) | Start of the requested time (in UTC). Defaults to the pipelines started in the past week. | +| `nameSearch` | [`String`](#string) | Search by name of the pipeline jobs. Supports partial matches. | +| `ref` | [`String`](#string) | Branch that triggered the pipeline. | +| `selectFields` | [`[CiJobAnalyticsField!]!`](#cijobanalyticsfield) | Fields to select and group by. | +| `sort` | [`CiJobAnalyticsSort`](#cijobanalyticssort) | Sort order for the results. | +| `source` | [`CiPipelineSources`](#cipipelinesources) | Source of the pipeline. | +| `toTime` | [`Time`](#time) | End of the requested time (in UTC). Defaults to the pipelines started before the current date. | + ##### `Project.jobs` Jobs of a project. This field can only be resolved for one project in any single request. @@ -46742,6 +46809,46 @@ Available input types. | `NUMBER` | Number input. | | `STRING` | String input. | +### `CiJobAnalyticsAggregation` + +Aggregation functions available for CI job analytics. + +| Value | Description | +| ----- | ----------- | +| `MEAN_DURATION_IN_SECONDS` | Average duration of jobs in seconds. | +| `P95_DURATION_IN_SECONDS` | 95th percentile duration of jobs in seconds. | +| `RATE_OF_CANCELED` | Percentage of canceled jobs. | +| `RATE_OF_FAILED` | Percentage of failed jobs. | +| `RATE_OF_SUCCESS` | Percentage of successful jobs. | + +### `CiJobAnalyticsField` + +Fields available for selection in CI job analytics. + +| Value | Description | +| ----- | ----------- | +| `NAME` | Job name. | +| `STAGE` | Stage. | + +### `CiJobAnalyticsSort` + +Values for sorting CI job analytics. + +| Value | Description | +| ----- | ----------- | +| `CANCELED_RATE_ASC` | Sort by success rate in ascending order. | +| `CANCELED_RATE_DESC` | Sort by success rate in descending order. | +| `FAILED_RATE_ASC` | Sort by success rate in ascending order. | +| `FAILED_RATE_DESC` | Sort by success rate in descending order. | +| `MEAN_DURATION_ASC` | Sort by mean duration in ascending order. | +| `MEAN_DURATION_DESC` | Sort by mean duration in descending order. | +| `NAME_ASC` | Sort by name in ascending order. | +| `NAME_DESC` | Sort by name in descending order. | +| `P95_DURATION_ASC` | Sort by 95th percentile duration in ascending order. | +| `P95_DURATION_DESC` | Sort by 95th percentile duration in descending order. | +| `SUCCESS_RATE_ASC` | Sort by success rate in ascending order. | +| `SUCCESS_RATE_DESC` | Sort by success rate in descending order. | + ### `CiJobFailureReason` | Value | Description | diff --git a/lib/click_house/finders/ci/finished_builds_finder.rb b/lib/click_house/finders/ci/finished_builds_finder.rb index b2e34ba5b7803cb509a2396263b4c5fd6f3d82e4..ebdef8ae914b364106a5986e2e793e7647dec959 100644 --- a/lib/click_house/finders/ci/finished_builds_finder.rb +++ b/lib/click_house/finders/ci/finished_builds_finder.rb @@ -20,12 +20,14 @@ class FinishedBuildsFinder < ::ClickHouse::Models::BaseModel ERROR_MESSAGES = { select: "Cannot select columns: %{columns}. Allowed: #{ALLOWED_TO_SELECT.join(', ')}", + aggregate: "Cannot aggregate columns: %{columns}. Allowed: #{ALLOWED_AGGREGATIONS.join(', ')}", group: "Cannot group by column: %{column}. Allowed: #{ALLOWED_TO_GROUP.join(', ')}", order: "Cannot order by column: %{column}. Allowed: #{ALLOWED_TO_ORDER.join(', ')}" }.freeze ALLOWED_COLUMNS_BY_OPERATION = { select: ALLOWED_TO_SELECT, + aggregate: ALLOWED_AGGREGATIONS, group: ALLOWED_TO_GROUP, order: ALLOWED_TO_ORDER }.freeze @@ -56,6 +58,14 @@ def select(*fields, aggregate: false) aggregate ? query : query.group_by(*fields) end + def select_aggregations(*aggregations) + validate_columns!(aggregations, :aggregate) + + aggregations.reduce(self) do |query, aggregation| + query.method(aggregation).call + end + end + # Aggregation methods def mean_duration_in_seconds select( @@ -104,6 +114,10 @@ def group_by(*fields) end end + def filter_by_job_name(term) + where(query_builder.table[:name].matches("%#{term.downcase}%")) + end + private def validate_columns!(fields, operation, aggregate = false) @@ -130,9 +144,15 @@ def build_duration_aggregate(function, alias_name) [@query_builder.table[:duration]] ) - Arel::Nodes::Division.new( - duration_function, - Arel::Nodes.build_quoted(1000.0) + Arel::Nodes::NamedFunction.new( + 'round', + [ + Arel::Nodes::Division.new( + duration_function, + Arel::Nodes.build_quoted(1000.0) + ), + 2 + ] ).as(alias_name) end @@ -149,9 +169,15 @@ def build_rate_aggregate(status) total_count = Arel::Nodes::NamedFunction.new('count', []) - Arel::Nodes::Multiplication.new( - Arel::Nodes::Division.new(count_if, total_count), - Arel::Nodes.build_quoted(100) + Arel::Nodes::NamedFunction.new( + 'round', + [ + Arel::Nodes::Multiplication.new( + Arel::Nodes::Division.new(count_if, total_count), + Arel::Nodes.build_quoted(100) + ), + 2 + ] ).as("rate_of_#{status}") end diff --git a/lib/click_house/models/base_model.rb b/lib/click_house/models/base_model.rb index 8b43c7ee970009d22858496a1ecbef040e130b41..cd7fe4d0e97e44b1789c85b4f9e9576471bdcb0f 100644 --- a/lib/click_house/models/base_model.rb +++ b/lib/click_house/models/base_model.rb @@ -8,6 +8,8 @@ class BaseModel < ClickHouse::Client::QueryLike def_delegators :@query_builder, :to_sql, :to_redacted_sql + attr_reader :query_builder + def initialize(query_builder = ClickHouse::Client::QueryBuilder.new(self.class.table_name)) @query_builder = query_builder end diff --git a/lib/click_house/models/ci/finished_pipelines_base.rb b/lib/click_house/models/ci/finished_pipelines_base.rb index 7375ebde0d5380fec3b4a547f97f6cd9fd55fac4..f168961abd640db040a07a3b7775a785a4605e6d 100644 --- a/lib/click_house/models/ci/finished_pipelines_base.rb +++ b/lib/click_house/models/ci/finished_pipelines_base.rb @@ -4,6 +4,10 @@ module ClickHouse # rubocop:disable Gitlab/BoundedContexts -- Existing module module Models module Ci class FinishedPipelinesBase < ClickHouse::Models::BaseModel + def self.table_name + 'ci_finished_pipelines' + end + def self.time_window_valid?(from_time, to_time) raise NotImplementedError, "subclasses of #{self.class.name} must implement #{__method__}" end @@ -51,7 +55,11 @@ def for_group(group) def within_dates(from_time, to_time) query = self - started_at_bucket = @query_builder.table[:started_at_bucket] + started_at_bucket = if instance_of?(ClickHouse::Models::Ci::FinishedPipelinesBase) + @query_builder.table[:started_at] + else + @query_builder.table[:started_at_bucket] + end # rubocop: disable CodeReuse/ActiveRecord -- this is a ClickHouse model query = query.where(started_at_bucket.gteq(format_time(from_time))) if from_time diff --git a/lib/gitlab/graphql/pagination/click_house_aggregated_connection.rb b/lib/gitlab/graphql/pagination/click_house_aggregated_connection.rb new file mode 100644 index 0000000000000000000000000000000000000000..5763ecdd907a8e8ca36aeb5f349be68bdf069ead --- /dev/null +++ b/lib/gitlab/graphql/pagination/click_house_aggregated_connection.rb @@ -0,0 +1,307 @@ +# frozen_string_literal: true + +# Limitation: Currently, this supports only one aggregated column. +module Gitlab + module Graphql + module Pagination + class ClickHouseAggregatedConnection < GraphQL::Pagination::Connection + include Gitlab::Utils::StrongMemoize + + # rubocop:disable CodeReuse/ActiveRecord -- requires AR methods to build pagination conditions + + def initialize( + items, context: nil, first: nil, after: nil, last: nil, before: nil, max_page_size: nil, + **kwargs) + super + @query_builder = items + end + + def nodes + items = limited_nodes + + items = items.reverse if (last && !before) || (first && before) + + if last + items.last(limit_value) + else + items.first(limit_value) + end + end + + # rubocop:disable Naming/PredicateName -- methods required by paginator + def has_previous_page + if after + true + elsif last + limited_nodes.size > limit_value + else + false + end + end + strong_memoize_attr :has_previous_page + + def has_next_page + if before + true + elsif first + limited_nodes.size > limit_value + else + false + end + end + strong_memoize_attr :has_next_page + + # rubocop:enable Naming/PredicateName + + def cursor_for(item) + encode_cursor(item) + end + + private + + attr_reader :query_builder + + def limited_nodes + query = query_builder.dup + + query = apply_cursor_conditions(query) + + limit = limit_value + query = query.limit(limit + 1) + + query = reverse_order(query) if last && !first + + execute_query(query) + end + strong_memoize_attr :limited_nodes + + def apply_cursor_conditions(query) + query = ensure_stable_ordering(query) + + if after + apply_after_cursor(query, after) + elsif before + apply_before_cursor(query, before) + else + query + end + end + + def ensure_stable_ordering(query) + group_fields = extract_group_by_fields + + # Get current orders + current_orders = query.manager.ast.orders.dup + + # Extract the primary sort field from current orders + primary_sort_fields = current_orders.filter_map do |order| + if order.expr.is_a?(String) + order.expr + elsif order.expr.respond_to?(:name) + order.expr.name.to_s + end + end + + # Add GROUP BY fields to ORDER BY if they're not already there + group_fields.each do |field| + field_str = field.to_s + # Skip if this field is already in the ORDER BY + next if primary_sort_fields.include?(field_str) + + current_orders << Arel::Nodes::Ascending.new( + Arel::Nodes::SqlLiteral.new(field_str) + ) + end + + query.tap do |q| + q.manager.ast.orders = current_orders + end + end + + def apply_after_cursor(query, cursor) + decoded = decode_cursor(cursor) + conditions = build_cursor_conditions(decoded, direction: :after) + + query.having(conditions) + end + + def apply_before_cursor(query, cursor) + decoded = decode_cursor(cursor) + conditions = build_cursor_conditions(decoded, direction: :before) + + query.having(conditions) + end + + # currently this method supports only one aggregated order + def build_cursor_conditions(cursor_data, direction:) + sort_info = extract_sort_info + sort_field = sort_info[:field] + return [] if sort_field != cursor_data['sort_field'].to_sym + + sort_value = cursor_data['sort_value'] + sort_order = sort_info[:order] + sort_attr = Arel::Nodes::SqlLiteral.new(sort_field.to_s) + + # Start with the primary sort condition + or_conditions = [build_primary_sort_condition(sort_attr, sort_value, sort_order, direction)] + + # Add tie-breaking conditions for group fields + group_field_conditions = build_group_field_conditions( + cursor_data, direction, sort_attr, sort_value, sort_field + ) + + or_conditions.concat(group_field_conditions) + + # Build nested OR conditions for stable ordering + or_conditions.reduce do |accumulated, condition| + Arel::Nodes::Or.new(accumulated, condition) + end + end + + def build_primary_sort_condition(sort_attr, sort_value, sort_order, direction) + if direction == :after + sort_order == :desc ? sort_attr.lt(sort_value) : sort_attr.gt(sort_value) + else + sort_order == :desc ? sort_attr.gt(sort_value) : sort_attr.lt(sort_value) + end + end + + def build_group_field_conditions(cursor_data, direction, sort_attr, sort_value, sort_field) + group_fields = extract_group_by_fields + conditions = [] + + group_fields.each_with_index do |field, index| + field_value = cursor_data['group_by_values'][field.to_s] + next if !field_value || field.to_s == sort_field.to_s + + and_conditions = [sort_attr.eq(sort_value)] + + # Add equality conditions for previous fields + add_previous_field_conditions(and_conditions, group_fields, index, cursor_data, sort_field) + + # Add inequality condition for current field + field_attr = query_builder.table[field] + comparison = direction == :after ? field_attr.gt(field_value) : field_attr.lt(field_value) + and_conditions << comparison + + conditions << and_conditions.reduce(:and) + end + + conditions + end + + def add_previous_field_conditions(and_conditions, group_fields, current_index, cursor_data, sort_field) + group_fields.first(current_index).each do |prev_field| + prev_value = cursor_data['group_by_values'][prev_field.to_s] + next if !prev_value || prev_field.to_s == sort_field.to_s + + field_attr = query_builder.table[prev_field] + and_conditions << field_attr.eq(prev_value) + end + end + + def encode_cursor(node) + sort_info = extract_sort_info + group_fields = extract_group_by_fields + + cursor_data = { + 'sort_field' => sort_info[:field].to_s, + 'sort_value' => node[sort_info[:field].to_s], + 'group_by_values' => {} + } + + # Include all GROUP BY field values + group_fields.each do |field| + field_name = field.to_s + # Exclude the sort field + next if field_name == sort_info[:field].to_s + + cursor_data['group_by_values'][field_name] = node[field_name] + end + + encode(Gitlab::Json.dump(cursor_data)) + end + + def decode_cursor(cursor) + Gitlab::Json.parse(decode(cursor)) + rescue JSON::ParserError + raise Gitlab::Graphql::Errors::ArgumentError, 'Invalid cursor given' + end + + def extract_sort_info + orders = query_builder.manager.ast.orders + + if orders.present? + first_order = orders.first + field = if first_order.expr.is_a?(String) + first_order.expr.to_sym + elsif first_order.expr.respond_to?(:name) + first_order.expr.name.to_sym + end + + order = case first_order + when Arel::Nodes::Ascending + :asc + when Arel::Nodes::Descending + :desc + else + :asc + end + + { field: field, order: order } + else + {} + end + end + strong_memoize_attr :extract_sort_info + + def extract_group_by_fields + groups = query_builder.manager.ast.cores.first.groups + + if groups.present? && groups.first.respond_to?(:expr) + group_expr = groups.first.expr + + if group_expr.is_a?(Array) + group_expr.map do |attr| + if attr.respond_to?(:name) + attr.name.to_sym + else + attr.to_sym + end + end + else + [group_expr.respond_to?(:name) ? group_expr.name.to_sym : group_expr.to_sym] + end + else + [] + end + end + strong_memoize_attr :extract_group_by_fields + + def reverse_order(query) + reversed_orders = query.manager.ast.orders.map do |order| + if order.is_a?(Arel::Nodes::Ascending) + Arel::Nodes::Descending.new(order.expr) + else + Arel::Nodes::Ascending.new(order.expr) + end + end + + query.tap do |q| + q.manager.ast.orders = reversed_orders + end + end + + def limit_value + @limit_value ||= [first, last, max_page_size || GitlabSchema.default_max_page_size].compact.min + end + + def execute_query(query) + clickhouse_connection = context[:connection] || ::ClickHouse::Connection.new(:main) + clickhouse_connection.select(query) + end + end + # rubocop:enable CodeReuse/ActiveRecord + end + end +end diff --git a/lib/gitlab/graphql/pagination/click_house_aggregated_relation.rb b/lib/gitlab/graphql/pagination/click_house_aggregated_relation.rb new file mode 100644 index 0000000000000000000000000000000000000000..4969f87d63aee9ed8bffe040922713a18a7d908b --- /dev/null +++ b/lib/gitlab/graphql/pagination/click_house_aggregated_relation.rb @@ -0,0 +1,12 @@ +# frozen_string_literal: true + +module Gitlab + module Graphql + module Pagination + # A wrapper class for ClickHouse aggregated query results that need cursor pagination + # This is used instead of ClickHouseConnection for GROUP BY queries with aggregations + class ClickHouseAggregatedRelation < SimpleDelegator + end + end + end +end diff --git a/lib/gitlab/graphql/pagination/connections.rb b/lib/gitlab/graphql/pagination/connections.rb index 8550c2b15bb219e42da32919abca50e82af4aaae..073853b6d5d468478d63094a5ee560f42f32a29f 100644 --- a/lib/gitlab/graphql/pagination/connections.rb +++ b/lib/gitlab/graphql/pagination/connections.rb @@ -29,6 +29,11 @@ def self.use(schema) ::ClickHouse::Client::QueryBuilder, Gitlab::Graphql::Pagination::ClickHouseConnection ) + + schema.connections.add( + Gitlab::Graphql::Pagination::ClickHouseAggregatedRelation, + Gitlab::Graphql::Pagination::ClickHouseAggregatedConnection + ) end end end diff --git a/spec/lib/click_house/finders/ci/finished_builds_finder_spec.rb b/spec/lib/click_house/finders/ci/finished_builds_finder_spec.rb index 1875633af4a4862c58a9387c12415c26de16944b..d2da816c4043e5eb30e571bb8941e343bbde72e6 100644 --- a/spec/lib/click_house/finders/ci/finished_builds_finder_spec.rb +++ b/spec/lib/click_house/finders/ci/finished_builds_finder_spec.rb @@ -427,4 +427,17 @@ def create_builds(count:, status:, stage:, name:, duration_seconds:) finished_at: base_time + duration_seconds.seconds ) end + + private + + def create_builds(count:, status:, stage:, name:, duration_seconds:) + create_list(:ci_build, count, status, + project: stage.project, + pipeline: stage.pipeline, + ci_stage: stage, + name: name, + started_at: base_time, + finished_at: base_time + duration_seconds.seconds + ) + end end