diff --git a/app/finders/ci/build_name_finder.rb b/app/finders/ci/build_name_finder.rb index 4e145ac0bc906054dcd6be9c59bdc372904242b5..93ee91e36be927873bb5c1b14538d080367c479f 100644 --- a/app/finders/ci/build_name_finder.rb +++ b/app/finders/ci/build_name_finder.rb @@ -30,7 +30,24 @@ def filter_by_name(build_relation) .where(project_id: project.id) .pg_full_text_search_in_model(limited_name_search_terms) - build_relation.where("(id, partition_id) IN (?)", build_name_relation.select(:build_id, :partition_id)) + if Feature.disabled?(:read_from_ci_job_infos, project) + return build_relation.where("(id, partition_id) IN (?)", build_name_relation.select(:build_id, :partition_id)) + end + + job_info_relation = Ci::JobInfo + .where(project_id: project.id) + .pg_full_text_search_in_model(limited_name_search_terms) + + # NOTE: This query would be much more efficient on ci_job_infos alone. + # TODO: Evaluate the query performance after we create and write to ci_job_infos. Then we can decide on either: + # 1. Keep this OR query if performance is not severely impacted. + # 2. Plan to migrate all existing data to ci_job_infos before we switch reads. + build_relation + .where("(job_info_id, partition_id) IN (?)", job_info_relation.select(:id, :partition_id)) + .or( + build_relation + .where("(id, partition_id) IN (?)", build_name_relation.select(:build_id, :partition_id)) + ) end # rubocop: enable CodeReuse/ActiveRecord end diff --git a/app/finders/ci/build_source_finder.rb b/app/finders/ci/build_source_finder.rb index 8c27ece61e9ed22e8823bca7f3393ffd97fa6890..f16f846de8e3365b0c50c978ba11f8e0bb63a07d 100644 --- a/app/finders/ci/build_source_finder.rb +++ b/app/finders/ci/build_source_finder.rb @@ -1,6 +1,8 @@ # frozen_string_literal: true module Ci + # TODO: Check what index(es) on ci_job_infos is best to support this Finder's queries. + # TODO: Compare query performance before/after moving ci_build_sources to ci_job_infos. class BuildSourceFinder def initialize(relation:, sources:, project:, params: {}) raise ArgumentError, 'Only Ci::Builds are source searchable' unless relation.klass == Ci::Build diff --git a/app/graphql/resolvers/ci/all_jobs_resolver.rb b/app/graphql/resolvers/ci/all_jobs_resolver.rb index 6513f0ebde54dc89813bb660e0fbf06598e7286a..9b86caf16436f2c7e7da8339e3adf4f43c2616a2 100644 --- a/app/graphql/resolvers/ci/all_jobs_resolver.rb +++ b/app/graphql/resolvers/ci/all_jobs_resolver.rb @@ -61,8 +61,8 @@ def preloads previous_stage_jobs_or_needs: [:needs, :pipeline], artifacts: [:job_artifacts], pipeline: [:user], - kind: [:metadata, :job_definition, :error_job_messages], - retryable: [:metadata, :job_definition, :error_job_messages], + kind: [:metadata, :job_definition, :job_info, :error_job_messages], + retryable: [:metadata, :job_definition, :job_info, :error_job_messages], project: [{ project: [:route, { namespace: [:route] }] }], commit_path: [:pipeline, { project: { namespace: [:route] } }], ref_path: [{ project: [:route, { namespace: [:route] }] }], diff --git a/app/graphql/resolvers/ci/runner_jobs_resolver.rb b/app/graphql/resolvers/ci/runner_jobs_resolver.rb index 35a3690ec157496bbee9c9bb330df61f91984248..54554e8ed7b73cb8046bf53c2b8b1d39f13f3e8a 100644 --- a/app/graphql/resolvers/ci/runner_jobs_resolver.rb +++ b/app/graphql/resolvers/ci/runner_jobs_resolver.rb @@ -36,6 +36,7 @@ def preloads detailed_status: [ :metadata, :job_definition, + :job_info, :error_job_messages, { pipeline: [:merge_request] }, { project: [:route, { namespace: :route }] } diff --git a/app/graphql/types/ci/build_need_type.rb b/app/graphql/types/ci/build_need_type.rb index dbdfc3c5176be57b1b8cea298bbda61b14f89139..78cccad41640fd80254984a32a3280f532f16ad2 100644 --- a/app/graphql/types/ci/build_need_type.rb +++ b/app/graphql/types/ci/build_need_type.rb @@ -8,7 +8,7 @@ class BuildNeedType < BaseObject graphql_name 'CiBuildNeed' field :id, GraphQL::Types::ID, null: false, - description: 'ID of the BuildNeed.' + description: 'ID of Ci::BuildNeed or Gitlab::Ci::JobNeeds::Collection::Need.' field :name, GraphQL::Types::String, null: true, description: 'Name of the job we need to complete.' end diff --git a/app/graphql/types/ci/job_need_union.rb b/app/graphql/types/ci/job_need_union.rb index 61ad5432db820ae2aed32ca27199c0e2c8038360..7aaeaea0a3d1774b29fbcf4dd027ce488ee65ffc 100644 --- a/app/graphql/types/ci/job_need_union.rb +++ b/app/graphql/types/ci/job_need_union.rb @@ -9,7 +9,7 @@ class JobNeedUnion < GraphQL::Schema::Union def self.resolve_type(object, context) case object - when ::Ci::BuildNeed + when ::Ci::BuildNeed, ::Gitlab::Ci::JobNeeds::Collection::Need Types::Ci::BuildNeedType when CommitStatus Types::Ci::JobType diff --git a/app/graphql/types/ci/job_type.rb b/app/graphql/types/ci/job_type.rb index cdfec538791c8ea29a58825165dc98495223c3fb..b3caf00f6f3891f52d2946bd6eb153f9b69281e0 100644 --- a/app/graphql/types/ci/job_type.rb +++ b/app/graphql/types/ci/job_type.rb @@ -176,13 +176,21 @@ def trace object.trace if object.has_trace? end + def needs + if object.read_from_job_info? + object.job_needs + else + object.needs + end + end + def previous_stage_jobs_or_needs if object.scheduling_type == 'stage' Gitlab::Graphql::Lazy.with_value(previous_stage_jobs) do |jobs| jobs end else - object.needs + needs end end diff --git a/app/graphql/types/ci/stage_type.rb b/app/graphql/types/ci/stage_type.rb index a525da932715d028de4f25ee1473f107bb9624c4..bf191739d8d9fe9ec20891eff09fe6170b3e6cf0 100644 --- a/app/graphql/types/ci/stage_type.rb +++ b/app/graphql/types/ci/stage_type.rb @@ -64,7 +64,7 @@ def jobs_for_pipeline(pipeline, stage_ids, include_needs) jobs = pipeline.statuses.latest.where(stage_id: stage_ids) preloaded_relations = [ - :project, :metadata, :job_definition, :job_artifacts, + :project, :metadata, :job_definition, :job_info, :job_artifacts, :downstream_pipeline, :error_job_messages ] preloaded_relations << :needs if include_needs diff --git a/app/models/ci/bridge.rb b/app/models/ci/bridge.rb index 04e5d7a3e54058c360073c8dc0d0a33800923107..700a7e4641b1011fa319e2e4e8d3c29bd2803201 100644 --- a/app/models/ci/bridge.rb +++ b/app/models/ci/bridge.rb @@ -103,7 +103,8 @@ def self.with_preloads def self.clone_accessors %i[pipeline project ref tag name allow_failure stage_idx when environment description needs_attributes - scheduling_type ci_stage partition_id resource_group].freeze + scheduling_type ci_stage partition_id resource_group + job_info_id].freeze end def retryable? diff --git a/app/models/ci/build.rb b/app/models/ci/build.rb index 1a430262f32748e6726c3c5981a95141019af03d..a382f1e74a183c24cbeac09d3ab89cc938a5740b 100644 --- a/app/models/ci/build.rb +++ b/app/models/ci/build.rb @@ -286,7 +286,8 @@ def clone_accessors environment coverage_regex description tag_list protected needs_attributes job_variables_attributes resource_group scheduling_type timeout timeout_source debug_trace_enabled - ci_stage partition_id execution_config_id inputs_attributes].freeze + ci_stage partition_id execution_config_id inputs_attributes + job_info_id].freeze end def supported_keyset_orderings diff --git a/app/models/ci/build_dependencies.rb b/app/models/ci/build_dependencies.rb index 7e821394ea96c43b3c244e9b6f5dca1ba5a7bca3..0ff39580b92e531a52275f4f28ce5d83ce511b5b 100644 --- a/app/models/ci/build_dependencies.rb +++ b/app/models/ci/build_dependencies.rb @@ -133,7 +133,12 @@ def from_previous_stages(scope) end def from_needs(scope) - needs_names = processable.needs.artifacts.select(:name) + needs_names = if processable.read_from_job_info? + processable.job_needs.names_with_artifacts_true + else + processable.needs.artifacts.select(:name) + end + scope.where(name: needs_names) end diff --git a/app/models/ci/job_definition.rb b/app/models/ci/job_definition.rb index c4a41b0bd27ee1114f8025f134a6b3383ed87664..b3cc77352aedeb9fa7783398f5459913070f4cb2 100644 --- a/app/models/ci/job_definition.rb +++ b/app/models/ci/job_definition.rb @@ -3,7 +3,7 @@ module Ci # The purpose of this class is to store immutable duplicate Processable related # data that can be disposed after all the pipelines that use it are archived. - # Data that should be persisted forever, should be stored with Ci::Build model. + # Data that must persist after archival should be stored with Ci::Build or Ci::JobInfo. class JobDefinition < Ci::ApplicationRecord include Ci::Partitionable include BulkInsertSafe diff --git a/app/models/ci/job_info.rb b/app/models/ci/job_info.rb new file mode 100644 index 0000000000000000000000000000000000000000..f2070fe1f1a9679c1eb770e7b126c36c9927012b --- /dev/null +++ b/app/models/ci/job_info.rb @@ -0,0 +1,113 @@ +# frozen_string_literal: true + +module Ci + # The purpose of this class is to store immutable, intrinsic duplicate job data + # that must persist even after all the pipelines that use it are archived. + # Data that can be disposed after archival should be stored with Ci::JobDefinition. + class JobInfo < Ci::ApplicationRecord + include Ci::Partitionable + include BulkInsertSafe + include PgFullTextSearchable + + self.table_name = :p_ci_job_infos + self.primary_key = :id + + ignore_column :search_vector, remove_never: true # Value is auto-generated by DB; must ignore it for bulk insert + + # IMPORTANT: Order of attribute keys is important for the checksum calculation. + JOB_ATTRIBUTES = [ + :scheduling_type, + :source, + :name, + :needs_attributes + ].freeze + NORMALIZED_DATA_COLUMNS = [ + :scheduling_type, + :source, + :name + ].freeze + + # We're copying over these values to ci_job_infos but not removing them from their original/initial destinations + COPY_ONLY_ATTRIBUTES = [:name].freeze + + MAX_JOB_NAME_LENGTH = 255 + + query_constraints :id, :partition_id + partitionable scope: ->(_) { Ci::Pipeline.current_partition_value }, partitioned: true + + belongs_to :project + + validates :project, presence: true + validates :name, presence: true, length: { maximum: MAX_JOB_NAME_LENGTH } + # rubocop:disable Database/JsonbSizeLimit -- no updates + validates :config, json_schema: { filename: 'ci_job_infos_config' } + # rubocop:enable Database/JsonbSizeLimit + + attribute :config, ::Gitlab::Database::Type::SymbolizedJsonb.new + + enum :scheduling_type, { stage: 0, dag: 1 }, prefix: true + enum :source, { + scan_execution_policy: 1001, + pipeline_execution_policy: 1002 + }.merge(::Enums::Ci::Pipeline.sources) + + scope :for_project, ->(project_id) { where(project_id: project_id) } + scope :for_checksum, ->(checksum) { where(checksum: checksum) } + + def self.fabricate(job_attrs:, project_id:, partition_id:) + new( + project_id: project_id, + partition_id: partition_id, + created_at: Time.current + ).mutate(job_attrs) + end + + def mutate(new_job_attrs) + raise 'Cannot mutate persisted job_info record.' if readonly? + + updated_job_attrs = job_attributes.compact.merge(new_job_attrs) + + assign_attributes(**prepare_job_attributes_and_checksum(updated_job_attrs)) + self + end + + # Hash containing all attributes: config + normalized_data + def job_attributes + attributes.deep_symbolize_keys.slice(*JOB_ATTRIBUTES).merge(config) + end + + def readonly? + persisted? + end + + private + + def prepare_job_attributes_and_checksum(job_attrs) + sanitized_job_attrs = normalize_and_sanitize(job_attrs) + + # NOTE: Checksum is generated with all attributes including normalized columns. But when storing + # the data, we can save space by excluding the normalized column values from the config hash. + { + checksum: generate_checksum(sanitized_job_attrs), + config: sanitized_job_attrs.except(*NORMALIZED_DATA_COLUMNS), + **sanitized_job_attrs.slice(*NORMALIZED_DATA_COLUMNS) + } + end + + def normalize_and_sanitize(job_attrs) + data = job_attrs.deep_symbolize_keys + + NORMALIZED_DATA_COLUMNS.each do |col| + data[col] = data.fetch(col) { self.class.column_defaults[col.to_s] } + end + + data.slice(*JOB_ATTRIBUTES) + end + + def generate_checksum(job_attrs) + job_attrs + .then { |data| Gitlab::Json.dump(data) } + .then { |data| Digest::SHA256.hexdigest(data) } + end + end +end diff --git a/app/models/ci/pipeline.rb b/app/models/ci/pipeline.rb index d4515071a907c94a96374b35e8a3c595e3b55dc9..04e0adc84704ca0e6fc7063bae59cea6dca39c1c 100644 --- a/app/models/ci/pipeline.rb +++ b/app/models/ci/pipeline.rb @@ -106,6 +106,7 @@ class Pipeline < Ci::ApplicationRecord # DEPRECATED: has_many :statuses, ->(pipeline) { in_partition(pipeline) }, class_name: 'CommitStatus', foreign_key: :commit_id, inverse_of: :pipeline, partition_foreign_key: :partition_id has_many :processables, ->(pipeline) { in_partition(pipeline) }, class_name: 'Ci::Processable', foreign_key: :commit_id, inverse_of: :pipeline, partition_foreign_key: :partition_id + # TODO: We can probably remove this association: https://gitlab.com/gitlab-org/gitlab/-/issues/584000 has_many :latest_statuses_ordered_by_stage, ->(pipeline) { latest.in_partition(pipeline).order(:stage_idx, :stage) }, class_name: 'CommitStatus', foreign_key: :commit_id, inverse_of: :pipeline, partition_foreign_key: :partition_id has_many :latest_statuses, ->(pipeline) { latest.in_partition(pipeline) }, class_name: 'CommitStatus', foreign_key: :commit_id, inverse_of: :pipeline, partition_foreign_key: :partition_id has_many :statuses_order_id_desc, ->(pipeline) { in_partition(pipeline).order_id_desc }, class_name: 'CommitStatus', foreign_key: :commit_id, @@ -722,6 +723,14 @@ def self.internal_id_scope_usage :ci_pipelines end + # TODO: Remove this method after migrating data to ci_job_infos + def read_from_job_infos? + # If a pipeline is created with FF `write_to_ci_job_infos` enabled, then all its jobs would + # have job_info records. So we just need to check one job if it has job_info. + # We check the oldest job in the pipeline since recent retries could create a new job_info record when cloning. + processables.first&.read_from_job_info? + end + def ci_pipeline_statuses_rate_limited? Gitlab::ApplicationRateLimiter.throttled?( :ci_pipeline_statuses_subscription, @@ -740,7 +749,15 @@ def trigger_status_change_subscriptions end def uses_needs? - processables.where(scheduling_type: :dag).any? + # TODO: Check if should keep index_p_ci_job_infos_on_id_where_scheduling_type_dag or a different index + if read_from_job_infos? + processables + .joins(:job_info) + .where(p_ci_job_infos: { scheduling_type: :dag }) + .any? + else + processables.where(scheduling_type: :dag).any? + end end def stages_count @@ -1585,6 +1602,7 @@ def source_ref_path # Set scheduling type of processables if they were created before scheduling_type # data was deployed (https://gitlab.com/gitlab-org/gitlab/-/merge_requests/22246). + # # TODO: We can probably remove this method: https://gitlab.com/gitlab-org/gitlab/-/issues/583998 def ensure_scheduling_type! processables.populate_scheduling_type! end diff --git a/app/models/ci/processable.rb b/app/models/ci/processable.rb index 398f08f0ed3fe13e1468192cb4110cdffad859f6..9e74fc733660d99fd75ac1f76e8bed55eb5e3fa7 100644 --- a/app/models/ci/processable.rb +++ b/app/models/ci/processable.rb @@ -14,6 +14,7 @@ class Processable < ::CommitStatus self.allow_legacy_sti_class = true attribute :temp_job_definition + attribute :temp_job_info has_one :resource, class_name: 'Ci::Resource', foreign_key: 'build_id', inverse_of: :processable has_one :sourced_pipeline, class_name: 'Ci::Sources::Pipeline', foreign_key: :source_job_id, inverse_of: :source_job @@ -62,12 +63,14 @@ class Processable < ::CommitStatus scope :preload_job_definition_instances, -> { preload(:job_definition_instance) } scope :manual_actions, -> { where(when: :manual, status: COMPLETED_STATUSES + %i[manual]) } + # TODO: We can probably remove this scope: https://gitlab.com/gitlab-org/gitlab/-/issues/584000 scope :with_needs, ->(names = nil) do needs = Ci::BuildNeed.scoped_build.select(1) needs = needs.where(name: names) if names where('EXISTS (?)', needs) end + # TODO: We can probably remove this scope: https://gitlab.com/gitlab-org/gitlab/-/issues/584000 scope :without_needs, ->(names = nil) do needs = Ci::BuildNeed.scoped_build.select(1) needs = needs.where(name: names) if names @@ -158,9 +161,14 @@ class Processable < ::CommitStatus def self.fabricate(attrs) attrs = attrs.dup + info_attrs = attrs.slice(*Ci::JobInfo::JOB_ATTRIBUTES) definition_attrs = attrs.extract!(*Ci::JobDefinition::CONFIG_ATTRIBUTES) attrs[:tag_list] = definition_attrs[:tag_list] if definition_attrs.key?(:tag_list) + if Feature.enabled?(:stop_writing_ci_job_info_to_old_destinations, attrs[:project]) + attrs.delete(*[Ci::JobInfo::JOB_ATTRIBUTES - Ci::JobInfo::COPY_ONLY_ATTRIBUTES]) + end + new(attrs).tap do |job| job_definition = ::Ci::JobDefinition.fabricate( config: definition_attrs, @@ -169,6 +177,16 @@ def self.fabricate(attrs) ) job.temp_job_definition = job_definition + + next unless Feature.enabled?(:write_to_ci_job_infos, attrs[:project]) + + job_info = ::Ci::JobInfo.fabricate( + job_attrs: info_attrs, + project_id: job.project_id, + partition_id: job.partition_id + ) + + job.temp_job_info = job_info end end @@ -186,6 +204,7 @@ def self.select_with_aggregated_needs(project) # Old processables may have scheduling_type as nil, # so we need to ensure the data exists before using it. + # TODO: We can probably remove this method: https://gitlab.com/gitlab-org/gitlab/-/issues/583998 def self.populate_scheduling_type! needs = Ci::BuildNeed.scoped_build.select(1) where(scheduling_type: nil).update_all( @@ -201,7 +220,8 @@ def assign_resource_from_resource_group(processable) end validates :type, presence: true - validates :scheduling_type, presence: true, on: :create, unless: :importing? + validates :scheduling_type, presence: true, on: :create, + unless: -> { importing? || Feature.enabled?(:stop_writing_ci_job_info_to_old_destinations, project) } delegate :merge_request?, :merge_request_ref?, @@ -236,7 +256,11 @@ def archived?(...) end def aggregated_needs_names - read_attribute(:aggregated_needs_names) + if read_from_job_info? + job_needs.names + else + read_attribute(:aggregated_needs_names) + end end def schedulable? @@ -276,25 +300,13 @@ def with_resource_group? self.resource_group_id.present? end - # Overriding scheduling_type enum's method for nil `scheduling_type`s - def scheduling_type_dag? - scheduling_type.nil? ? find_legacy_scheduling_type == :dag : super - end - - # scheduling_type column of previous builds/bridges have not been populated, - # so we calculate this value on runtime when we need it. - def find_legacy_scheduling_type - strong_memoize(:find_legacy_scheduling_type) do - needs.exists? ? :dag : :stage - end - end - def needs_attributes strong_memoize(:needs_attributes) do needs.map { |need| need.attributes.except('id', 'build_id') } end end + # TODO: We can probably remove this method: https://gitlab.com/gitlab-org/gitlab/-/issues/583998 def ensure_scheduling_type! # If this has a scheduling_type, it means all processables in the pipeline already have. return if scheduling_type diff --git a/app/models/commit_status.rb b/app/models/commit_status.rb index 32efa5ac6644f99567834a128bc3762b0deda119..1fddb3780e3a7dc7d97f61aceb77d3f5ae1ce302 100644 --- a/app/models/commit_status.rb +++ b/app/models/commit_status.rb @@ -31,7 +31,17 @@ class CommitStatus < Ci::ApplicationRecord ->(build) { in_partition(build) }, class_name: 'Ci::Stage', foreign_key: :stage_id, + partition_foreign_key: :partition_id, + inverse_of: :statuses + + # NOTE: We need to have these relationships in CommitStatus because generic + # commit statuses also use the columns in `Ci::JobInfo::JOB_ATTRIBUTES`. + # rubocop: disable Rails/InverseOf -- Relation not present in Ci::JobInfo because foreign key callback is not compatible with BulkInsertSafe + belongs_to :job_info, + ->(job) { in_partition(job) }, + class_name: 'Ci::JobInfo', partition_foreign_key: :partition_id + # rubocop: enable Rails/InverseOf has_many :needs, class_name: 'Ci::BuildNeed', foreign_key: :build_id, inverse_of: :build @@ -104,6 +114,10 @@ class CommitStatus < Ci::ApplicationRecord preload(project: :namespace) end + scope :with_job_info_preload, -> do + preload(:job_info) + end + scope :scoped_pipeline, -> do where(arel_table[:commit_id].eq(Ci::Pipeline.arel_table[:id])) .where(arel_table[:partition_id].eq(Ci::Pipeline.arel_table[:partition_id])) diff --git a/app/models/concerns/ci/metadatable.rb b/app/models/concerns/ci/metadatable.rb index 7fbf0671c985e0d6d7d4a9440e745452aa93bf0b..39378d147ef287d1890baf0918d17f88b9c80740 100644 --- a/app/models/concerns/ci/metadatable.rb +++ b/app/models/concerns/ci/metadatable.rb @@ -191,6 +191,59 @@ def secrets=(_value) raise ActiveRecord::ReadonlyAttributeError, 'This data is read only' end + def scheduling_type + read_job_info_attribute(:scheduling_type) || read_attribute(:scheduling_type) + end + + # Need this method to overwrite ci_builds.scheduling_type enum. + def scheduling_type_dag? + scheduling_type.nil? ? find_legacy_scheduling_type == :dag : scheduling_type.to_sym == :dag + end + + # Need this method to overwrite ci_builds.scheduling_type enum. + def scheduling_type_stage? + scheduling_type.to_sym == :stage + end + + # TODO: We can remove this method after we migrate scheduling_type to ci_job_infos + # because no scheduling_type would be nil. + def find_legacy_scheduling_type + needs.exists? ? :dag : :stage + end + strong_memoize_attr :find_legacy_scheduling_type + + # We're keeping this column in ci_builds but it's copied over to ci_job_infos so it needs to be immutable + def name=(value) + raise ActiveRecord::ReadonlyAttributeError, 'This data is read only' if persisted? + + write_attribute(:name, value) + end + + def job_needs + needs_attrs = read_job_info_attribute(:needs_attributes, []) + + ::Gitlab::Ci::JobNeeds::Collection.new(self.id, needs_attrs) + end + strong_memoize_attr :job_needs + + def needs_names + if read_from_job_info? + job_needs.names + else + needs.map(&:name) + end + end + + def has_needs? + read_from_ci_job_info? ? job_needs.any? : needs.present? + end + + # TODO: Remove this method after migrating data to ci_job_infos + def read_from_job_info? + Feature.enabled?(:read_from_ci_job_infos, project) && job_info.present? + end + strong_memoize_attr :read_from_job_info? + private def read_metadata_attribute(legacy_key, metadata_key, job_definition_key, default_value = nil) @@ -202,6 +255,18 @@ def read_metadata_attribute(legacy_key, metadata_key, job_definition_key, defaul metadata&.read_attribute(metadata_key) || default_value end + + def read_job_info_attribute(key, default_value = nil) + return unless read_from_job_info? + + result = if key.in?(::Ci::JobInfo::NORMALIZED_DATA_COLUMNS) + job_info&.read_attribute(key) || temp_job_info&.read_attribute(key) + else + job_info&.config&.dig(key) || temp_job_info&.config&.dig(key) + end + + result || default_value + end end end diff --git a/app/models/concerns/ci/partitionable/testing.rb b/app/models/concerns/ci/partitionable/testing.rb index f4bebd3071f399bce9c504f755c768a3ca3b119e..b5655891b68b9611607c19836ccb226e2aea5baa 100644 --- a/app/models/concerns/ci/partitionable/testing.rb +++ b/app/models/concerns/ci/partitionable/testing.rb @@ -24,6 +24,7 @@ module Testing Ci::JobArtifactReport Ci::JobDefinition Ci::JobDefinitionInstance + Ci::JobInfo Ci::JobInput Ci::JobMessage Ci::JobVariable diff --git a/app/models/project.rb b/app/models/project.rb index 5d55da4dd0e016c25acff202c4502676acf3f0ec..85e73b8559cef32088bda24c82d771c393aee252 100644 --- a/app/models/project.rb +++ b/app/models/project.rb @@ -466,6 +466,7 @@ def with_developer_access has_many :build_trace_chunks, class_name: 'Ci::BuildTraceChunk', through: :builds, source: :trace_chunks, dependent: :restrict_with_error has_many :build_report_results, class_name: 'Ci::BuildReportResult', inverse_of: :project has_many :job_artifacts, class_name: 'Ci::JobArtifact', dependent: :restrict_with_error + has_many :job_infos, class_name: 'Ci::JobInfo', inverse_of: :project has_many :pipeline_artifacts, class_name: 'Ci::PipelineArtifact', inverse_of: :project, dependent: :restrict_with_error has_many :runner_projects, class_name: 'Ci::RunnerProject', inverse_of: :project has_many :runners, through: :runner_projects, source: :runner, class_name: 'Ci::Runner' diff --git a/app/services/ci/clone_job_service.rb b/app/services/ci/clone_job_service.rb index 9a3f6291e2c33833ffe62cf3cd5bad926ac8f599..6df8d55c21f6544415e49f986ecee089d20d9362 100644 --- a/app/services/ci/clone_job_service.rb +++ b/app/services/ci/clone_job_service.rb @@ -13,6 +13,7 @@ def execute(new_job_variables: [], new_job_inputs: {}) add_job_variables_attributes!(new_attributes, new_job_variables) add_job_inputs_attributes!(new_attributes, new_job_inputs) add_job_definition_attributes!(new_attributes) + add_job_info_attributes!(new_attributes) if Feature.enabled?(:write_to_ci_job_infos, project) new_attributes[:user] = current_user @@ -24,7 +25,7 @@ def execute(new_job_variables: [], new_job_inputs: {}) attr_reader :job, :current_user delegate :persisted_environment, :expanded_environment_name, - :job_definition_instance, :project, :project_id, + :job_definition_instance, :job_info, :project, :project_id, :partition_id, :metadata, :pipeline, to: :job @@ -104,5 +105,33 @@ def build_definition_attributes attrs end + + def add_job_info_attributes!(attributes) + return if job_info + + persisted_job_info = find_or_create_job_info + attributes[:job_info_id] = persisted_job_info.id + end + + def find_or_create_job_info + info = ::Ci::JobInfo.fabricate( + job_attrs: build_info_attributes, + project_id: project_id, + partition_id: partition_id + ) + + ::Gitlab::Ci::JobInfos::FindOrCreate.new( + pipeline, infos: [info] + ).execute.first + end + + def build_info_attributes + { + scheduling_type: job.scheduling_type, + source: job.source, + name: job.name, + needs_attributes: job.needs_attributes + } + end end end diff --git a/app/services/ci/pipeline_processing/atomic_processing_service.rb b/app/services/ci/pipeline_processing/atomic_processing_service.rb index 9484c5b0ffe818afd05c0255e8fd862f7136693a..050bf8a4c1a34e339f8da6086797f7185d427314 100644 --- a/app/services/ci/pipeline_processing/atomic_processing_service.rb +++ b/app/services/ci/pipeline_processing/atomic_processing_service.rb @@ -84,6 +84,7 @@ def load_jobs(ids) .current_processable_jobs .id_in(ids) .with_project_preload + .with_job_info_preload .created .ordered_by_stage .select_with_aggregated_needs(project) diff --git a/app/services/ci/reset_skipped_jobs_service.rb b/app/services/ci/reset_skipped_jobs_service.rb index 9e5c887b31bab46fac17366e3c779e7ce642a7f0..705dd6414c3d8a9ff6339231ffac2d56ae0bc899 100644 --- a/app/services/ci/reset_skipped_jobs_service.rb +++ b/app/services/ci/reset_skipped_jobs_service.rb @@ -32,6 +32,8 @@ def dependent_jobs .skipped .ordered_by_stage .preload(:needs) + .with_job_info_preload + .with_project_preload ) end @@ -66,7 +68,7 @@ def ordered_by_dag(jobs) def sort_jobs(jobs) Gitlab::Ci::YamlProcessor::Dag.order( jobs.to_h do |job| - [job.name, job.needs.map(&:name)] + [job.name, job.needs_names] end ) end diff --git a/app/services/ci/update_build_names_service.rb b/app/services/ci/update_build_names_service.rb index d040ef40785086aa3f0b567262c85e190b841b45..356d8af9f0b31b8fba6f57190e530863f08099fc 100644 --- a/app/services/ci/update_build_names_service.rb +++ b/app/services/ci/update_build_names_service.rb @@ -9,6 +9,8 @@ def initialize(pipeline) end def execute + return if Feature.enabled?(:stop_writing_ci_job_info_to_old_destinations, pipeline.project) + scope = pipeline.builds.latest iterator = Gitlab::Pagination::Keyset::Iterator.new(scope: scope) diff --git a/app/services/projects/update_pages_service.rb b/app/services/projects/update_pages_service.rb index 7623841cb1d122b7f359c6c17f8b17a59b413425..df3d7ae6057c4968fd75e8698325e037755142bd 100644 --- a/app/services/projects/update_pages_service.rb +++ b/app/services/projects/update_pages_service.rb @@ -63,6 +63,12 @@ def error(message) end # Create status notifying the deployment of pages + # NOTE: We probably don't need to use .fabricate here because the job_info + # record is only needed for setting: + # 1. ci_job_infos.scheduling_type: Only queried in Pipeline#uses_needs?; + # having no job_info record for this job won't affect result. + # 2. ci_job_infos.name: Only queried for Ci::BuildNameFinder, which this commit status + # currently doesn't support anyway because it doesn't create a `build_names` record. def commit_status GenericCommitStatus.new( user: build.user, diff --git a/app/validators/json_schemas/ci_job_infos_config.json b/app/validators/json_schemas/ci_job_infos_config.json new file mode 100644 index 0000000000000000000000000000000000000000..74356b1a27e5f270cc5c39d05d1aa6c1ad0219d1 --- /dev/null +++ b/app/validators/json_schemas/ci_job_infos_config.json @@ -0,0 +1,5 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "description": "CI job immutable intrinsic config data", + "type": "object" +} diff --git a/app/workers/ci/update_build_names_worker.rb b/app/workers/ci/update_build_names_worker.rb index dbfe70b0d025c7e54338bcbb0ff3fcdde202a6f3..0ae2702d4c72558f1d0cc6d1e7091b27eca46f58 100644 --- a/app/workers/ci/update_build_names_worker.rb +++ b/app/workers/ci/update_build_names_worker.rb @@ -10,6 +10,7 @@ class UpdateBuildNamesWorker idempotent! deduplicate :until_executing + # TODO: Remove worker with FF `stop_writing_ci_job_info_to_old_destinations` def perform(pipeline_id) Ci::Pipeline.find_by_id(pipeline_id).try do |pipeline| Ci::UpdateBuildNamesService.new(pipeline).execute diff --git a/config/feature_flags/wip/read_from_ci_job_infos.yml b/config/feature_flags/wip/read_from_ci_job_infos.yml new file mode 100644 index 0000000000000000000000000000000000000000..4cc2deaf7e77664ffe98d45962b758bf760f8ca7 --- /dev/null +++ b/config/feature_flags/wip/read_from_ci_job_infos.yml @@ -0,0 +1,10 @@ +--- +name: read_from_ci_job_infos +description: +feature_issue_url: https://gitlab.com/gitlab-org/gitlab/-/work_items/577211 +introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/211540 +rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/work_items/577211 +milestone: '18.6' +group: group::ci platform +type: wip +default_enabled: false diff --git a/config/feature_flags/wip/stop_writing_ci_job_info_to_old_destinations.yml b/config/feature_flags/wip/stop_writing_ci_job_info_to_old_destinations.yml new file mode 100644 index 0000000000000000000000000000000000000000..dbf1812b2e5ea3fe85bd325db0f70803ecbe1232 --- /dev/null +++ b/config/feature_flags/wip/stop_writing_ci_job_info_to_old_destinations.yml @@ -0,0 +1,10 @@ +--- +name: stop_writing_ci_job_info_to_old_destinations +description: +feature_issue_url: https://gitlab.com/gitlab-org/gitlab/-/work_items/577211 +introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/211540 +rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/work_items/577211 +milestone: '18.6' +group: group::ci platform +type: wip +default_enabled: false diff --git a/config/feature_flags/wip/write_to_ci_job_infos.yml b/config/feature_flags/wip/write_to_ci_job_infos.yml new file mode 100644 index 0000000000000000000000000000000000000000..c6c4433a780b9c8ab6a097e020a0d1fc6bc62598 --- /dev/null +++ b/config/feature_flags/wip/write_to_ci_job_infos.yml @@ -0,0 +1,10 @@ +--- +name: write_to_ci_job_infos +description: +feature_issue_url: https://gitlab.com/gitlab-org/gitlab/-/work_items/577211 +introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/211540 +rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/work_items/577211 +milestone: '18.6' +group: group::ci platform +type: wip +default_enabled: false diff --git a/config/gitlab_loose_foreign_keys.yml b/config/gitlab_loose_foreign_keys.yml index faa24f1e2723a3a473ee4ca8b063b9b98cfa64ea..5e128df843ac2054b122c2c8aae988e3fdef958e 100644 --- a/config/gitlab_loose_foreign_keys.yml +++ b/config/gitlab_loose_foreign_keys.yml @@ -631,6 +631,10 @@ p_ci_job_definitions: - table: projects column: project_id on_delete: async_delete +p_ci_job_infos: + - table: projects + column: project_id + on_delete: async_delete p_ci_job_inputs: - table: projects column: project_id diff --git a/config/initializers/postgres_partitioning.rb b/config/initializers/postgres_partitioning.rb index a7fd1002fc0606b0a4bd635987083b1462e3cb40..83511cf8e58bbb946d22341b6732c529eca6eb49 100644 --- a/config/initializers/postgres_partitioning.rb +++ b/config/initializers/postgres_partitioning.rb @@ -31,6 +31,7 @@ Ci::JobArtifactReport, Ci::JobDefinition, Ci::JobDefinitionInstance, + Ci::JobInfo, Ci::JobInput, Ci::JobMessage, Ci::Pipeline, diff --git a/db/docs/p_ci_job_infos.yml b/db/docs/p_ci_job_infos.yml new file mode 100644 index 0000000000000000000000000000000000000000..f6884b015bbb4786fceed4269c1a4f5d328d09c0 --- /dev/null +++ b/db/docs/p_ci_job_infos.yml @@ -0,0 +1,13 @@ +--- +table_name: p_ci_job_infos +classes: +- Ci::JobInfo +feature_categories: +- continuous_integration +description: Unique intrinsic job data across pipelines +introduced_by_url: +milestone: '18.6' +gitlab_schema: gitlab_ci +sharding_key: + project_id: projects +table_size: small diff --git a/db/fixtures/development/14_pipelines.rb b/db/fixtures/development/14_pipelines.rb index 58c7567a5e29f2c14e0b43765c5f37b06b1a694a..a65dc4b049dab75bbf0fc2d647d2383d4226d99c 100644 --- a/db/fixtures/development/14_pipelines.rb +++ b/db/fixtures/development/14_pipelines.rb @@ -193,6 +193,7 @@ def create_pipeline!(project, ref, commit) project.ci_pipelines.create!(sha: commit.id, ref: ref, source: :push) end + # TODO: We need to create fixtures for Ci::JobInfo too def build_create!(pipeline, stage, opts = {}) attributes = job_attributes(pipeline, stage, opts) attributes[:options] ||= {} diff --git a/db/migrate/20251212000001_create_p_ci_job_infos.rb b/db/migrate/20251212000001_create_p_ci_job_infos.rb new file mode 100644 index 0000000000000000000000000000000000000000..fbd8a98abb9063c0b8888e927dab8db8fc6acb5f --- /dev/null +++ b/db/migrate/20251212000001_create_p_ci_job_infos.rb @@ -0,0 +1,37 @@ +# frozen_string_literal: true + +class CreatePCiJobInfos < Gitlab::Database::Migration[2.3] + milestone '18.8' + + def change + opts = { + primary_key: [:id, :partition_id], + options: 'PARTITION BY LIST (partition_id)' + } + + create_table(:p_ci_job_infos, **opts) do |t| + t.bigserial :id, null: false + t.bigint :partition_id, null: false + t.bigint :project_id, null: false + t.datetime_with_timezone :created_at, null: false + t.integer :scheduling_type, limit: 2, null: false + t.integer :source, limit: 2 + t.binary :checksum, null: false + t.tsvector :search_vector, as: "to_tsvector('english'::regconfig, COALESCE(name, ''::text))", stored: true + t.text :name, limit: 255, null: false + t.jsonb :config, default: {}, null: false + + t.index [:project_id, :checksum, :partition_id], unique: true, + name: :index_p_ci_job_infos_on_project_id_and_checksum + # TODO: Check what index is best to support Ci::Pipeline#uses_needs? query + t.index :id, + where: '(scheduling_type = 1)', + name: :index_p_ci_job_infos_on_id_where_scheduling_type_dag + # TODO: Check what index is best to support Ci::BuildSourceFinder query + t.index [:project_id, :source, :id], + name: :index_p_ci_job_infos_on_project_id_source_id + t.index [:search_vector], using: :gin, + name: :index_p_ci_job_infos_on_search_vector + end + end +end diff --git a/db/migrate/20251212000002_create_p_ci_job_infos_partitions.rb b/db/migrate/20251212000002_create_p_ci_job_infos_partitions.rb new file mode 100644 index 0000000000000000000000000000000000000000..831a4184b4b21519c1dd607818992d2c906f966f --- /dev/null +++ b/db/migrate/20251212000002_create_p_ci_job_infos_partitions.rb @@ -0,0 +1,35 @@ +# frozen_string_literal: true + +# TODO: This migration is just to initialize partitions for local gdk. For production, we can +# remove it and rely on the partition manager to create the necessary partitions instead. +class CreatePCiJobInfosPartitions < Gitlab::Database::Migration[2.3] + milestone '18.8' + + disable_ddl_transaction! + + def up + sql = (100..108).map do |partition_id| + <<~SQL + CREATE TABLE IF NOT EXISTS gitlab_partitions_dynamic.ci_job_infos_#{partition_id} + PARTITION OF p_ci_job_infos + FOR VALUES IN (#{partition_id}); + SQL + end.join + + with_lock_retries do + connection.execute(sql) + end + end + + def down + sql = (100..108).map do |partition_id| + <<~SQL + DROP TABLE IF EXISTS gitlab_partitions_dynamic.ci_job_infos_#{partition_id}; + SQL + end.join + + with_lock_retries do + connection.execute(sql) + end + end +end diff --git a/db/migrate/20251212000003_add_job_info_id_column_to_p_ci_builds.rb b/db/migrate/20251212000003_add_job_info_id_column_to_p_ci_builds.rb new file mode 100644 index 0000000000000000000000000000000000000000..c99171b752a323baaed0c6c2ff75fd471eef2586 --- /dev/null +++ b/db/migrate/20251212000003_add_job_info_id_column_to_p_ci_builds.rb @@ -0,0 +1,16 @@ +# frozen_string_literal: true + +class AddJobInfoIdColumnToPCiBuilds < Gitlab::Database::Migration[2.3] + milestone '18.8' + + # rubocop:disable Migration/PreventAddingColumns -- Required to deduplicate data into ci_job_infos table + def up + # NOTE: We can probably keep this column as NULL-able because generic commit statuses may not have a job_info record + add_column :p_ci_builds, :job_info_id, :bigint, if_not_exists: true + end + + def down + remove_column :p_ci_builds, :job_info_id, if_exists: true + end + # rubocop:enable Migration/PreventAddingColumns +end diff --git a/db/post_migrate/20251212000004_add_index_on_p_ci_builds_job_info_id.rb b/db/post_migrate/20251212000004_add_index_on_p_ci_builds_job_info_id.rb new file mode 100644 index 0000000000000000000000000000000000000000..618af97ec5af47bd871e7230629de96c7d36ce04 --- /dev/null +++ b/db/post_migrate/20251212000004_add_index_on_p_ci_builds_job_info_id.rb @@ -0,0 +1,22 @@ +# frozen_string_literal: true + +# TODO: This index should first be added asynchronously on Production +class AddIndexOnPCiBuildsJobInfoId < Gitlab::Database::Migration[2.3] + include Gitlab::Database::PartitioningMigrationHelpers + + milestone '18.8' + + disable_ddl_transaction! + + PARTITIONED_INDEX_NAME = 'index_p_ci_builds_on_job_info_id' + + # rubocop: disable Migration/PreventIndexCreation -- Required to deduplicate data into ci_job_infos table + def up + add_concurrent_partitioned_index :p_ci_builds, :job_info_id, name: PARTITIONED_INDEX_NAME + end + + def down + remove_concurrent_partitioned_index_by_name :p_ci_builds, PARTITIONED_INDEX_NAME + end + # rubocop: enable Migration/PreventIndexCreation +end diff --git a/db/post_migrate/20251212000005_add_fk_on_p_ci_builds_job_info_id.rb b/db/post_migrate/20251212000005_add_fk_on_p_ci_builds_job_info_id.rb new file mode 100644 index 0000000000000000000000000000000000000000..bd691b400f9dd76408c232e9a508008ed69a37ae --- /dev/null +++ b/db/post_migrate/20251212000005_add_fk_on_p_ci_builds_job_info_id.rb @@ -0,0 +1,39 @@ +# frozen_string_literal: true + +# TODO: This FK should first be added asynchronously on Production +class AddFkOnPCiBuildsJobInfoId < Gitlab::Database::Migration[2.3] + include Gitlab::Database::PartitioningMigrationHelpers + + milestone '18.8' + + disable_ddl_transaction! + + SOURCE_TABLE_NAME = :p_ci_builds + TARGET_TABLE_NAME = :p_ci_job_infos + FK_NAME = :fk_rails_2f23ec1c61 + + # rubocop: disable Migration/PreventForeignKeyCreation -- Required to deduplicate data into ci_job_infos table + def up + add_concurrent_partitioned_foreign_key( + SOURCE_TABLE_NAME, TARGET_TABLE_NAME, + column: [:partition_id, :job_info_id], + target_column: [:partition_id, :id], + on_update: :restrict, + on_delete: :restrict, + reverse_lock_order: true, + name: FK_NAME + ) + end + + def down + with_lock_retries do + remove_foreign_key_if_exists( + SOURCE_TABLE_NAME, + TARGET_TABLE_NAME, + reverse_lock_order: true, + name: FK_NAME + ) + end + end + # rubocop: enable Migration/PreventForeignKeyCreation +end diff --git a/db/schema_migrations/20251212000001 b/db/schema_migrations/20251212000001 new file mode 100644 index 0000000000000000000000000000000000000000..a2710e96af29e6542c8cd2ee649cbcc973293d27 --- /dev/null +++ b/db/schema_migrations/20251212000001 @@ -0,0 +1 @@ +7c7a8d7fd6b8923dfe0d0c6234bcab0d47066047cab746a7307c81b9c6560d37 \ No newline at end of file diff --git a/db/schema_migrations/20251212000002 b/db/schema_migrations/20251212000002 new file mode 100644 index 0000000000000000000000000000000000000000..4e9e9f253450c49f5ba4b664752c7250d55c7ee8 --- /dev/null +++ b/db/schema_migrations/20251212000002 @@ -0,0 +1 @@ +54fcbdf66c4cbf966282f993044dda5668a03956a0be194eb58592a4e03756c6 \ No newline at end of file diff --git a/db/schema_migrations/20251212000003 b/db/schema_migrations/20251212000003 new file mode 100644 index 0000000000000000000000000000000000000000..cf1e1820080feff7e3eb5dce2f70837ec3134e26 --- /dev/null +++ b/db/schema_migrations/20251212000003 @@ -0,0 +1 @@ +caf3468d0e6c38a64aa61a9d6e6ecb63aec58a9bd084cd8be96ec6fc7c30bdf0 \ No newline at end of file diff --git a/db/schema_migrations/20251212000004 b/db/schema_migrations/20251212000004 new file mode 100644 index 0000000000000000000000000000000000000000..272e3ab09149122af3b2c94fcc68f213a0ccb15d --- /dev/null +++ b/db/schema_migrations/20251212000004 @@ -0,0 +1 @@ +7d3c56c1c6fe9905e6367c821fd53e1c0fdff43c6d5640a58e27fba94d577552 \ No newline at end of file diff --git a/db/schema_migrations/20251212000005 b/db/schema_migrations/20251212000005 new file mode 100644 index 0000000000000000000000000000000000000000..541e955eabfefb99df717198a00aab1d9b8c1a49 --- /dev/null +++ b/db/schema_migrations/20251212000005 @@ -0,0 +1 @@ +fc156af34a793cc57547fad00af6822d4de6f7b0c5abbc72f9b59ee5440d3f80 \ No newline at end of file diff --git a/db/structure.sql b/db/structure.sql index 6746648605376e984366abac822589d883ff7cbb..66f0705f8bfd6d2bf6fd2ea33187ec086c775613 100644 --- a/db/structure.sql +++ b/db/structure.sql @@ -5684,6 +5684,7 @@ CREATE TABLE p_ci_builds ( timeout_source smallint, exit_code smallint, debug_trace_enabled boolean, + job_info_id bigint, CONSTRAINT check_1e2fbd1b39 CHECK ((lock_version IS NOT NULL)), CONSTRAINT check_9aa9432137 CHECK ((project_id IS NOT NULL)) ) @@ -5787,6 +5788,21 @@ CREATE TABLE p_ci_job_definitions ( ) PARTITION BY LIST (partition_id); +CREATE TABLE p_ci_job_infos ( + id bigint NOT NULL, + partition_id bigint NOT NULL, + project_id bigint NOT NULL, + created_at timestamp with time zone NOT NULL, + scheduling_type smallint NOT NULL, + source smallint, + checksum bytea NOT NULL, + search_vector tsvector GENERATED ALWAYS AS (to_tsvector('english'::regconfig, COALESCE(name, ''::text))) STORED, + name text NOT NULL, + config jsonb DEFAULT '{}'::jsonb NOT NULL, + CONSTRAINT check_85f37f6053 CHECK ((char_length(name) <= 255)) +) +PARTITION BY LIST (partition_id); + CREATE TABLE p_ci_job_inputs ( id bigint NOT NULL, job_id bigint NOT NULL, @@ -22523,6 +22539,15 @@ CREATE SEQUENCE p_ci_job_definitions_id_seq ALTER SEQUENCE p_ci_job_definitions_id_seq OWNED BY p_ci_job_definitions.id; +CREATE SEQUENCE p_ci_job_infos_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + +ALTER SEQUENCE p_ci_job_infos_id_seq OWNED BY p_ci_job_infos.id; + CREATE SEQUENCE p_ci_job_inputs_id_seq START WITH 1 INCREMENT BY 1 @@ -32864,6 +32889,8 @@ ALTER TABLE ONLY p_ci_builds_metadata ALTER COLUMN id SET DEFAULT nextval('ci_bu ALTER TABLE ONLY p_ci_job_definitions ALTER COLUMN id SET DEFAULT nextval('p_ci_job_definitions_id_seq'::regclass); +ALTER TABLE ONLY p_ci_job_infos ALTER COLUMN id SET DEFAULT nextval('p_ci_job_infos_id_seq'::regclass); + ALTER TABLE ONLY p_ci_job_inputs ALTER COLUMN id SET DEFAULT nextval('p_ci_job_inputs_id_seq'::regclass); ALTER TABLE ONLY p_ci_job_messages ALTER COLUMN id SET DEFAULT nextval('p_ci_job_messages_id_seq'::regclass); @@ -36359,6 +36386,9 @@ ALTER TABLE ONLY p_ci_job_definition_instances ALTER TABLE ONLY p_ci_job_definitions ADD CONSTRAINT p_ci_job_definitions_pkey PRIMARY KEY (id, partition_id); +ALTER TABLE ONLY p_ci_job_infos + ADD CONSTRAINT p_ci_job_infos_pkey PRIMARY KEY (id, partition_id); + ALTER TABLE ONLY p_ci_job_inputs ADD CONSTRAINT p_ci_job_inputs_pkey PRIMARY KEY (id, partition_id); @@ -43315,6 +43345,8 @@ CREATE INDEX index_p_ci_builds_execution_configs_on_project_id ON ONLY p_ci_buil CREATE INDEX index_p_ci_builds_on_execution_config_id ON ONLY p_ci_builds USING btree (execution_config_id) WHERE (execution_config_id IS NOT NULL); +CREATE INDEX index_p_ci_builds_on_job_info_id ON ONLY p_ci_builds USING btree (job_info_id); + CREATE INDEX index_p_ci_finished_build_ch_sync_events_finished_at ON ONLY p_ci_finished_build_ch_sync_events USING btree (partition, build_finished_at); CREATE INDEX index_p_ci_finished_build_ch_sync_events_on_project_id ON ONLY p_ci_finished_build_ch_sync_events USING btree (project_id); @@ -43335,6 +43367,14 @@ CREATE INDEX index_p_ci_job_definitions_on_interruptible ON ONLY p_ci_job_defini CREATE UNIQUE INDEX index_p_ci_job_definitions_on_project_id_and_checksum ON ONLY p_ci_job_definitions USING btree (project_id, checksum, partition_id); +CREATE INDEX index_p_ci_job_infos_on_id_where_scheduling_type_dag ON ONLY p_ci_job_infos USING btree (id) WHERE (scheduling_type = 1); + +CREATE UNIQUE INDEX index_p_ci_job_infos_on_project_id_and_checksum ON ONLY p_ci_job_infos USING btree (project_id, checksum, partition_id); + +CREATE INDEX index_p_ci_job_infos_on_project_id_source_id ON ONLY p_ci_job_infos USING btree (project_id, source, id); + +CREATE INDEX index_p_ci_job_infos_on_search_vector ON ONLY p_ci_job_infos USING gin (search_vector); + CREATE UNIQUE INDEX index_p_ci_job_inputs_on_job_id_and_name ON ONLY p_ci_job_inputs USING btree (job_id, name, partition_id); CREATE INDEX index_p_ci_job_inputs_on_project_id ON ONLY p_ci_job_inputs USING btree (project_id); @@ -52905,6 +52945,9 @@ ALTER TABLE ONLY onboarding_progresses ALTER TABLE ONLY protected_branch_unprotect_access_levels ADD CONSTRAINT fk_rails_2d2aba21ef FOREIGN KEY (user_id) REFERENCES users(id) ON DELETE CASCADE; +ALTER TABLE p_ci_builds + ADD CONSTRAINT fk_rails_2f23ec1c61 FOREIGN KEY (partition_id, job_info_id) REFERENCES p_ci_job_infos(partition_id, id) ON UPDATE RESTRICT ON DELETE RESTRICT; + ALTER TABLE ONLY issuable_severities ADD CONSTRAINT fk_rails_2fbb74ad6d FOREIGN KEY (issue_id) REFERENCES issues(id) ON DELETE CASCADE; diff --git a/doc/api/graphql/reference/_index.md b/doc/api/graphql/reference/_index.md index 11d55a274d05ea27afa73d6c4f98462d8253a2e2..878610312b7adc54935173c4ff454e755db8e8c9 100644 --- a/doc/api/graphql/reference/_index.md +++ b/doc/api/graphql/reference/_index.md @@ -27118,7 +27118,7 @@ Check user's permission for the car. | Name | Type | Description | | ---- | ---- | ----------- | -| `id` | [`ID!`](#id) | ID of the BuildNeed. | +| `id` | [`ID!`](#id) | ID of Ci::BuildNeed or Gitlab::Ci::JobNeeds::Collection::Need. | | `name` | [`String`](#string) | Name of the job we need to complete. | ### `CiCatalogResource` diff --git a/ee/app/workers/ci/cleanup_build_name_worker.rb b/ee/app/workers/ci/cleanup_build_name_worker.rb index d7420b2d1694df387ab6ee084db845145d031d3a..95e95075a29a291e962e87080724d515a812e099 100644 --- a/ee/app/workers/ci/cleanup_build_name_worker.rb +++ b/ee/app/workers/ci/cleanup_build_name_worker.rb @@ -1,5 +1,6 @@ # frozen_string_literal: true +# TODO: Remove this worker with FF `stop_writing_ci_job_info_to_old_destinations` module Ci class CleanupBuildNameWorker include ApplicationWorker diff --git a/ee/lib/api/ai/duo_workflows/workflows.rb b/ee/lib/api/ai/duo_workflows/workflows.rb index 1838f4cbb37fdfee91f85e077de06060ab5148a3..213a0005a2a6f4b2e7685ad2b017112544a61732 100644 --- a/ee/lib/api/ai/duo_workflows/workflows.rb +++ b/ee/lib/api/ai/duo_workflows/workflows.rb @@ -422,7 +422,7 @@ def create_workflow_params end post do ::Gitlab::QueryLimiting.disable!( - 'https://gitlab.com/gitlab-org/gitlab/-/issues/566195', new_threshold: 115 + 'https://gitlab.com/gitlab-org/gitlab/-/issues/566195', new_threshold: 118 ) container = if params[:project_id] diff --git a/ee/lib/gitlab/ci/pipeline/jobs_injector.rb b/ee/lib/gitlab/ci/pipeline/jobs_injector.rb index e86177d3132c39fd39f357ecb6c596879d988402..d924689018784c91bdc03f74b620041fcf49bfef 100644 --- a/ee/lib/gitlab/ci/pipeline/jobs_injector.rb +++ b/ee/lib/gitlab/ci/pipeline/jobs_injector.rb @@ -85,7 +85,7 @@ def add_job(stage:, job:) stage.statuses << job pipeline_jobs_by_name[job.name] = job - @jobs_with_needs << job if job.needs.present? + @jobs_with_needs << job if job.has_needs? end def update_needs_references! diff --git a/lib/api/ci/pipelines.rb b/lib/api/ci/pipelines.rb index c95df387a51ab6af81375e6efa2383acb0db30dd..91cfcb395fe53a027e2093b1f6673d7e6522252f 100644 --- a/lib/api/ci/pipelines.rb +++ b/lib/api/ci/pipelines.rb @@ -189,7 +189,7 @@ class Pipelines < ::API::Base .new(current_user: current_user, pipeline: pipeline, params: params) .execute - builds = builds.with_preloads.preload(:metadata, :job_definition, :runner_manager, :ci_stage) # rubocop:disable CodeReuse/ActiveRecord -- preload job.archived? + builds = builds.with_preloads.preload(:metadata, :job_definition, :job_info, :runner_manager, :ci_stage) # rubocop:disable CodeReuse/ActiveRecord -- preload job.archived? present paginate(builds), with: Entities::Ci::Job end diff --git a/lib/gitlab/ci/job_infos/find_or_create.rb b/lib/gitlab/ci/job_infos/find_or_create.rb new file mode 100644 index 0000000000000000000000000000000000000000..1a3dfa25afb80aeebfd1326e48d92561c691b543 --- /dev/null +++ b/lib/gitlab/ci/job_infos/find_or_create.rb @@ -0,0 +1,60 @@ +# frozen_string_literal: true + +module Gitlab + module Ci + module JobInfos + # TODO: This class mirrors Ci::JobDefinitions::FindOrCreate; + # maybe they can be combined into a single implementation class or + # refactor to use a shared module/base class? + class FindOrCreate + BATCH_SIZE = 50 + + def initialize(pipeline, infos: []) + @project_id = pipeline.project_id + @partition_id = pipeline.partition_id + @job_infos = Array.wrap(infos).uniq(&:checksum) + end + + def execute + return [] if job_infos.empty? + + existing_infos = fetch_records_for(job_infos) + existing_infos_by_checksum = existing_infos.group_by(&:checksum) + missing_infos = @job_infos.reject do |d| + existing_infos_by_checksum[d.checksum] + end + + return existing_infos if missing_infos.empty? + + insert_missing(missing_infos) + + existing_infos + fetch_records_for(missing_infos) + end + + private + + attr_reader :project_id, :partition_id, :job_infos + + def fetch_records_for(infos) + checksums = infos.map(&:checksum) + + ::Ci::JobInfo + .select(:id, :partition_id, :project_id, :checksum, :config, *::Ci::JobInfo::NORMALIZED_DATA_COLUMNS) + .in_partition(partition_id) + .for_project(project_id) + .for_checksum(checksums) + .to_a # Explicitly convert to array for further processing + end + + def insert_missing(infos) + ::Ci::JobInfo.bulk_insert!( + infos, + unique_by: [:project_id, :partition_id, :checksum], + skip_duplicates: true, + batch_size: BATCH_SIZE + ) + end + end + end + end +end diff --git a/lib/gitlab/ci/job_needs/collection.rb b/lib/gitlab/ci/job_needs/collection.rb new file mode 100644 index 0000000000000000000000000000000000000000..f97be836b5b97164f8c5769fb2d4c808d5aad315 --- /dev/null +++ b/lib/gitlab/ci/job_needs/collection.rb @@ -0,0 +1,73 @@ +# frozen_string_literal: true + +module Gitlab + module Ci + module JobNeeds + # Represents a collection of Need objects. Constructed + # from needs_attributes (array of hashes). + class Collection + include Enumerable + + ATTRIBUTES = [:name, :artifacts, :optional].freeze + + Need = Struct.new(:job_id, *ATTRIBUTES, keyword_init: true) do + include GlobalID::Identification + + def initialize(job_id:, name:, artifacts: true, optional: false) + super + end + + # To generate the pipeline dependencies graph, the FE just requires each need to have a + # unique ID; it doesn't have to be a DB-persisted ID. + # Since job ID is globally unique, job names are unique within a pipeline, and a job + # cannot need the same job twice, a globally unique ID for each need can be made + # with just + + def id + @id ||= Digest::MD5.hexdigest("#{job_id}/#{name}") # rubocop:disable Fips/MD5 -- Not security-sensitive; used to generate unique ID + end + + def attributes + to_h.slice(*ATTRIBUTES) + end + end + + def initialize(job_id, needs_attributes = []) + @job_id = job_id + @needs = fabricate_needs(needs_attributes) + end + + def size + needs.size + end + + def each + needs.each { |need| yield(need) } + end + + def names_with_artifacts_true + needs.select(&:artifacts).map(&:name) + end + + def names + needs.map(&:name) + end + + def to_attributes_array + needs.map(&:attributes) + end + + private + + attr_reader :job_id, :needs + + def fabricate_needs(needs_attributes) + needs_attributes.to_a.map do |need_hash| + attrs = need_hash.symbolize_keys.slice(*ATTRIBUTES) + + Need.new(**attrs.merge(job_id: job_id)) + end + end + end + end + end +end diff --git a/lib/gitlab/ci/pipeline/chain/create.rb b/lib/gitlab/ci/pipeline/chain/create.rb index ea8c5b25dde5b3e6d4aa37d95fde431d50605819..d8a9964e07920e3f90afaba55984db63b72b208a 100644 --- a/lib/gitlab/ci/pipeline/chain/create.rb +++ b/lib/gitlab/ci/pipeline/chain/create.rb @@ -14,6 +14,10 @@ def perform! # reason because they can be used in the next pipeline creations. ::Gitlab::Ci::Pipeline::Create::JobDefinitionBuilder.new(pipeline, statuses).run + if Feature.enabled?(:write_to_ci_job_infos, project) + ::Gitlab::Ci::Pipeline::Create::JobInfoBuilder.new(pipeline, statuses).run + end + BulkInsertableAssociations.with_bulk_insert do ::Ci::BulkInsertableTags.with_bulk_insert_tags do pipeline.transaction do diff --git a/lib/gitlab/ci/pipeline/chain/set_build_sources.rb b/lib/gitlab/ci/pipeline/chain/set_build_sources.rb index 78ec7da0500e227b90444e9d2fb765b62a441e73..34efc5f887f30c7ea5343c5be5430cfa61aa1389 100644 --- a/lib/gitlab/ci/pipeline/chain/set_build_sources.rb +++ b/lib/gitlab/ci/pipeline/chain/set_build_sources.rb @@ -16,8 +16,13 @@ def perform! pipeline.source end - build.build_build_source(source: build_source, - project_id: project.id) + # IN PROGRESS: ci_build_sources deduplication into ci_job_infos + if Feature.disabled?(:stop_writing_ci_job_info_to_old_destinations, project) + build.build_build_source(source: build_source, + project_id: project.id) + end + + build.temp_job_info.mutate(source: build_source) if Feature.enabled?(:write_to_ci_job_infos, project) end end end diff --git a/lib/gitlab/ci/pipeline/create/job_info_builder.rb b/lib/gitlab/ci/pipeline/create/job_info_builder.rb new file mode 100644 index 0000000000000000000000000000000000000000..7ea26b74ea07ebf758fcb2e4b6d2a52531058363 --- /dev/null +++ b/lib/gitlab/ci/pipeline/create/job_info_builder.rb @@ -0,0 +1,44 @@ +# frozen_string_literal: true + +module Gitlab + module Ci + module Pipeline + module Create + # This class is similar to JobDefinitionBuilder. + # TODO: Refactor shared code into module or parent class? + class JobInfoBuilder + include Gitlab::Utils::StrongMemoize + + def initialize(pipeline, jobs) + @pipeline = pipeline + @jobs = jobs.select(&:temp_job_info) + @project = pipeline.project + end + + def run + find_or_insert_job_infos.each do |job_info| + jobs_by_checksum[job_info.checksum].each do |job| + job.job_info = job_info + end + end + end + + private + + attr_reader :project, :pipeline, :jobs + + def find_or_insert_job_infos + Gitlab::Ci::JobInfos::FindOrCreate.new(pipeline, infos: jobs.map(&:temp_job_info)).execute + end + + def jobs_by_checksum + jobs.group_by do |job| + job.temp_job_info.checksum + end + end + strong_memoize_attr :jobs_by_checksum + end + end + end + end +end diff --git a/lib/gitlab/ci/processable_object_hierarchy.rb b/lib/gitlab/ci/processable_object_hierarchy.rb index c1531c3f4ab6cd481e4b1ea3b813209d9daf7996..32572e7939c13fdcbdd062288572191223a4f73b 100644 --- a/lib/gitlab/ci/processable_object_hierarchy.rb +++ b/lib/gitlab/ci/processable_object_hierarchy.rb @@ -3,10 +3,20 @@ module Gitlab module Ci class ProcessableObjectHierarchy < ::Gitlab::ObjectHierarchy + include Gitlab::Utils::StrongMemoize + + def base_and_descendants + return super unless read_from_job_infos? + + super.with(job_needs_cte.to_arel) # rubocop:disable CodeReuse/ActiveRecord -- Required to include job_needs_cte in query + end + private def middle_table - ::Ci::BuildNeed.arel_table + return ::Ci::BuildNeed.arel_table unless read_from_job_infos? + + job_needs_cte.table end def from_tables(cte) @@ -17,6 +27,7 @@ def parent_id_column(_cte) middle_table[:name] end + # TODO: We could probably improve these conditions with partition pruning def ancestor_conditions(cte) middle_table[:name].eq(objects_table[:name]).and( middle_table[:build_id].eq(cte.table[:id]) @@ -32,6 +43,46 @@ def descendant_conditions(cte) objects_table[:commit_id].eq(cte.table[:commit_id]) ) end + + # TODO: We could also pass this in as an argument instead + def pipeline + ancestors_base.first.pipeline + end + strong_memoize_attr :pipeline + + # This processable object hierachy is the main challenge with dropping ci_build_needs. + # One idea is to recreate the table on-the-fly as a CTE by exploding the + # `needs_attributes` value from job infos. + # + # I believe we only need the latest needed jobs of the given pipeline since + # we shouldn't need to process old jobs or jobs outside of it. So this table would + # be relatively small when materialized. + def job_needs_cte + # This query could be refactored to be more efficient + sql = <<~SQL + SELECT + p_ci_builds.partition_id, + p_ci_builds.id AS build_id, + needs_attributes ->> 'name' AS name + FROM + p_ci_builds + INNER JOIN p_ci_job_infos + ON p_ci_builds.job_info_id = p_ci_job_infos.id + AND p_ci_builds.partition_id = p_ci_job_infos.partition_id + CROSS JOIN LATERAL + jsonb_array_elements(p_ci_job_infos.config->'needs_attributes') AS needs_attributes + WHERE + p_ci_builds.partition_id = #{pipeline.partition_id} + AND p_ci_builds.id IN (#{pipeline.processables.latest.select(:id).to_sql}) + SQL + + Gitlab::SQL::CTE.new(:job_needs, sql) + end + + def read_from_job_infos? + pipeline.read_from_job_infos? + end + strong_memoize_attr :read_from_job_infos? end end end diff --git a/lib/gitlab/graphql/pagination/connections.rb b/lib/gitlab/graphql/pagination/connections.rb index 073853b6d5d468478d63094a5ee560f42f32a29f..839937d033f21e6857c38defd68f71d6de1d6a34 100644 --- a/lib/gitlab/graphql/pagination/connections.rb +++ b/lib/gitlab/graphql/pagination/connections.rb @@ -25,6 +25,10 @@ def self.use(schema) Array, Gitlab::Graphql::Pagination::ArrayConnection) + schema.connections.add( + Gitlab::Ci::JobNeeds::Collection, + Gitlab::Graphql::Pagination::ArrayConnection) + schema.connections.add( ::ClickHouse::Client::QueryBuilder, Gitlab::Graphql::Pagination::ClickHouseConnection diff --git a/lib/gitlab/import_export/project/import_export.yml b/lib/gitlab/import_export/project/import_export.yml index c6982ce8ed0abffb27dc3266cb7ba73498dcd241..11940bc8cf4b4e90dc575d5025252b16ca846fe2 100644 --- a/lib/gitlab/import_export/project/import_export.yml +++ b/lib/gitlab/import_export/project/import_export.yml @@ -107,6 +107,7 @@ tree: - :external_pull_request - :merge_request - :pipeline_metadata + - :job_infos # TODO: I think this table is necessary to export? - :auto_devops - :pipeline_schedules - :container_expiration_policy @@ -598,6 +599,11 @@ included_attributes: builds: *statuses_definition generic_commit_statuses: *statuses_definition bridges: *statuses_definition + job_infos: # TODO: Update with latest attributes + - :project_id + - :scheduling_type + - :name + - :config ci_pipelines: - :ref - :sha diff --git a/lib/gitlab/import_export/project/relation_factory.rb b/lib/gitlab/import_export/project/relation_factory.rb index 26fe5dc7b4e7c6bb9af4dcbbfe362f612bd63f44..6e89cae9b1f76561f71baefcaf3dead2fd7fcf67 100644 --- a/lib/gitlab/import_export/project/relation_factory.rb +++ b/lib/gitlab/import_export/project/relation_factory.rb @@ -45,7 +45,8 @@ class RelationFactory < Base::RelationFactory work_item_type: 'WorkItems::Type', work_item_description: 'WorkItems::Description', user_contributions: 'User', - squash_option: 'Projects::BranchRules::SquashOption' }.freeze + squash_option: 'Projects::BranchRules::SquashOption', + job_infos: 'Ci::JobInfo' }.freeze BUILD_MODELS = %i[Ci::Build Ci::Bridge commit_status generic_commit_status].freeze diff --git a/lib/gitlab/seeders/ci/runner/runner_fleet_pipeline_seeder.rb b/lib/gitlab/seeders/ci/runner/runner_fleet_pipeline_seeder.rb index 0ae17c62cb1fbe0634193bc8c13915fe96c99836..05d7ceb26bd3ea7bcd67bba3e47ca2764a3ee565 100644 --- a/lib/gitlab/seeders/ci/runner/runner_fleet_pipeline_seeder.rb +++ b/lib/gitlab/seeders/ci/runner/runner_fleet_pipeline_seeder.rb @@ -181,6 +181,7 @@ def create_build(pipeline, runner, job_status, index) build = nil ::Ci::Build.transaction do + # TODO: We should update this to use Ci::Build.fabricate build = ::Ci::Build.new(importing: true, **build_attrs).tap(&:save!) ::Ci::RunningBuild.upsert_build!(build) if build.running? end diff --git a/spec/db/schema_spec.rb b/spec/db/schema_spec.rb index 6a231a38f810957e6b9368bea38c99fe7b403079..e274a854bc8c7f49c6028300a0d455a000d835f5 100644 --- a/spec/db/schema_spec.rb +++ b/spec/db/schema_spec.rb @@ -155,6 +155,7 @@ p_ci_job_annotations: %w[project_id], p_ci_job_artifacts: %w[project_id], p_ci_job_definitions: %w[partition_id], + p_ci_job_infos: %w[partition_id], p_ci_pipeline_artifact_states: %w[partition_id pipeline_artifact_id], p_ci_pipeline_variables: %w[project_id], p_ci_pipelines_config: %w[partition_id project_id], diff --git a/spec/factories/ci/builds.rb b/spec/factories/ci/builds.rb index c3d3496564f1186fe5b6cdafada36cedf19fcea9..454cbc9258b71fce98f0e0eb80eca6ba3899c688 100644 --- a/spec/factories/ci/builds.rb +++ b/spec/factories/ci/builds.rb @@ -10,7 +10,6 @@ name { 'test' } add_attribute(:protected) { false } created_at { 'Di 29. Okt 09:50:00 CET 2013' } - scheduling_type { 'stage' } pending project { pipeline.project } @@ -37,6 +36,7 @@ end id_tokens { nil } + scheduling_type { 'stage' } end after(:build) do |build, evaluator| @@ -64,7 +64,9 @@ trait :with_build_name do after(:create) do |build, _| - create(:ci_build_name, build: build) + if Feature.disabled?(:stop_writing_ci_job_info_to_old_destinations, build.project) + create(:ci_build_name, build: build) + end end end @@ -104,7 +106,15 @@ needed { association(:ci_build, name: needed_name, pipeline: pipeline) } end + after(:build) do |build, evaluator| + if Feature.enabled?(:write_to_ci_job_infos, build.project) + Ci::JobFactoryHelpers.mutate_temp_job_info(build, needs_attributes: [{ name: evaluator.needed.name }]) + end + end + after(:create) do |build, evaluator| + next if Feature.enabled?(:stop_writing_ci_job_info_to_old_destinations, build.project) + build.needs << create(:ci_build_need, build: build, name: evaluator.needed.name) end end diff --git a/spec/factories/ci/job_infos.rb b/spec/factories/ci/job_infos.rb new file mode 100644 index 0000000000000000000000000000000000000000..8843ba6688a6d2b25539361ec1d033e000603954 --- /dev/null +++ b/spec/factories/ci/job_infos.rb @@ -0,0 +1,9 @@ +# frozen_string_literal: true + +FactoryBot.define do + factory :ci_job_info, class: 'Ci::JobInfo' do + project factory: :project + + checksum { Digest::SHA256.hexdigest(rand.to_s) } + end +end diff --git a/spec/factories/ci/processable.rb b/spec/factories/ci/processable.rb index 738afdf7d1d821e6c14aadc9ee2f7b2ea488165a..d7a379a0a6d135b6bafcb236e94615def22db646 100644 --- a/spec/factories/ci/processable.rb +++ b/spec/factories/ci/processable.rb @@ -10,7 +10,6 @@ tag { false } pipeline factory: :ci_pipeline project { pipeline.project } - scheduling_type { 'stage' } partition_id { pipeline.partition_id } # This factory was updated to help with the efforts of the removal of `ci_builds.stage`: @@ -22,11 +21,29 @@ options { {} } yaml_variables { [] } stage { 'test' } + scheduling_type { 'stage' } + needs_attributes { [] } end after(:stub, :build) do |processable, evaluator| Ci::JobFactoryHelpers.mutate_temp_job_definition( processable, options: evaluator.options, yaml_variables: evaluator.yaml_variables) + + if Feature.enabled?(:write_to_ci_job_infos, processable.project) + Ci::JobFactoryHelpers.mutate_temp_job_info( + processable, + scheduling_type: evaluator.scheduling_type, + name: processable.name, + needs_attributes: evaluator.needs_attributes.to_a + ) + end + + if Feature.disabled?(:stop_writing_ci_job_info_to_old_destinations, processable.project) + processable.assign_attributes( + scheduling_type: evaluator.scheduling_type, + needs_attributes: evaluator.needs_attributes + ) + end end after(:build) do |processable, evaluator| @@ -65,6 +82,10 @@ Gitlab::Ci::Pipeline::Create::JobDefinitionBuilder.new(processable.pipeline, [processable]).run end + if processable.temp_job_info + Gitlab::Ci::Pipeline::Create::JobInfoBuilder.new(processable.pipeline, [processable]).run + end + next if processable.ci_stage processable.ci_stage = @@ -84,10 +105,16 @@ after(:create) do |processable, evaluator| # job_definition_instance is assigned when we run JobDefinitionBuilder - next unless processable.job_definition_instance + if processable.job_definition_instance + processable.association(:job_definition).reload + processable.temp_job_definition = nil + end - processable.association(:job_definition).reload - processable.temp_job_definition = nil + # job_info_id is assigned when we run JobInfoBuilder + if processable.job_info_id + processable.association(:job_info).reload + processable.temp_job_info = nil + end end trait :without_job_definition do @@ -96,6 +123,12 @@ end end + trait :without_job_info do + after(:build) do |processable, evaluator| + processable.temp_job_info = nil + end + end + trait :waiting_for_resource do status { 'waiting_for_resource' } end diff --git a/spec/lib/gitlab/import_export/all_models.yml b/spec/lib/gitlab/import_export/all_models.yml index 62461c519a77d2ca53ccf052414ecde50913352e..5246a34b3fe9374f9f2688b7839169af22df48d3 100644 --- a/spec/lib/gitlab/import_export/all_models.yml +++ b/spec/lib/gitlab/import_export/all_models.yml @@ -441,6 +441,7 @@ statuses: - auto_canceled_by - needs - ci_stage +- job_info builds: - user - auto_canceled_by @@ -525,6 +526,7 @@ builds: - job_artifacts_annotations - project_mirror - build_source +- job_info bridges: - user - pipeline @@ -545,12 +547,14 @@ bridges: - downstream_pipeline - upstream_pipeline - build_source +- job_info generic_commit_statuses: - user - pipeline - auto_canceled_by - ci_stage - needs +- job_info variables: - project triggers: @@ -991,6 +995,7 @@ project: - security_scan_profiles_projects - enabled_foundational_flows - enabled_foundational_flow_records +- job_infos award_emoji: - awardable - user diff --git a/spec/models/ci/build_dependencies_spec.rb b/spec/models/ci/build_dependencies_spec.rb index 95ef5e4b00974219e747acb18387b3a567fb4ff5..59065634a41470ebfd96de5fc2b72e1a81cfcfb0 100644 --- a/spec/models/ci/build_dependencies_spec.rb +++ b/spec/models/ci/build_dependencies_spec.rb @@ -71,7 +71,8 @@ name: 'dag_job', scheduling_type: :dag, stage_idx: 2, - ci_stage: deploy_stage + ci_stage: deploy_stage, + needs_attributes: [{ name: 'staging', artifacts: true }] ) end @@ -96,7 +97,8 @@ scheduling_type: scheduling_type, stage_idx: 3, ci_stage: deploy_stage, - options: { dependencies: dependencies } + options: { dependencies: dependencies }, + needs_attributes: needs.to_a ) end diff --git a/spec/models/ci/build_spec.rb b/spec/models/ci/build_spec.rb index 39170c89904f0effa7b7fa5e66677c191dd72f77..5ab708257ed7b509286f9e276dbbcac37b0ea3bf 100644 --- a/spec/models/ci/build_spec.rb +++ b/spec/models/ci/build_spec.rb @@ -4826,7 +4826,9 @@ def run_job_without_exception with_them do before do stub_pages_setting(enabled: enabled) - build.update!(name: name) + # TODO: Update all specs that change Ci::JobInfo::CONFIG_ATTRIBUTES + # to either utilize the job factory or job info stub helper. + stub_ci_job_info(build, name: name) stub_ci_job_definition(build, options: { pages: pages_config }) stub_feature_flags(customizable_pages_job_name: true) end diff --git a/spec/models/ci/processable_spec.rb b/spec/models/ci/processable_spec.rb index 33ab11febd7a2ec55768ca4bffc417cf630ddc5f..35fd29afce02d5f78a59b1c51f22dddb0a00c9d1 100644 --- a/spec/models/ci/processable_spec.rb +++ b/spec/models/ci/processable_spec.rb @@ -135,7 +135,7 @@ temp_job_definition: instance_of(Ci::JobDefinition), job_definition: nil ) - expect(fabricate.temp_job_definition.config).to eq({ options: build_attributes[:options] }) + expect(fabricate.temp_job_definition.config).to eq({ options: build_attributes[:options], needs_attributes: [] }) expect(fabricate.temp_job_definition.project_id).to eq(build_attributes[:project_id]) expect(fabricate.temp_job_definition.partition_id).to eq(build_attributes[:partition_id]) end @@ -215,13 +215,19 @@ subject { build(:ci_build, project: project, pipeline: pipeline, importing: importing) } - where(:importing, :should_validate) do - false | true - true | false + where(:importing, :ff_state, :should_validate) do + false | false | true + false | true | false + true | true | false + true | false | false end with_them do context 'on create' do + before do + stub_feature_flags(stop_writing_ci_job_info_to_old_destinations: ff_state) + end + it 'validates presence' do if should_validate is_expected.to validate_presence_of(:scheduling_type).on(:create) @@ -244,9 +250,16 @@ let!(:another_build) { create(:ci_build, project: project) } before do + pipeline.processables.each { |job| job.clear_memoization(:read_from_job_info?) } + stub_feature_flags(read_from_ci_job_infos: false) + # Jobs with a job_info record would never have nil scheduling_type described_class.update_all(scheduling_type: nil) end + after do + pipeline.processables.each { |job| job.clear_memoization(:read_from_job_info?) } + end + it 'populates scheduling_type of processables' do expect do pipeline.processables.populate_scheduling_type! diff --git a/spec/services/ci/clone_job_service_spec.rb b/spec/services/ci/clone_job_service_spec.rb index 065d20ad22a5d5b824b31c2ec5f7fcd4a50fd3aa..177972c81c21e609d41cfeb42e56af17fa47b400 100644 --- a/spec/services/ci/clone_job_service_spec.rb +++ b/spec/services/ci/clone_job_service_spec.rb @@ -15,6 +15,7 @@ end let(:new_job_variables) { [] } + let_it_be(:needs_attributes) { [{ name: 'test-needed-job' }] } shared_context 'when job is a bridge' do let_it_be(:downstream_project) { create(:project, :repository) } @@ -23,7 +24,7 @@ create(:ci_bridge, :success, :resource_group, pipeline: pipeline, downstream: downstream_project, description: 'a trigger job', stage_id: stage.id, - environment: 'production') + environment: 'production', needs_attributes: needs_attributes) end let(:clone_accessors) { ::Ci::Bridge.clone_accessors } @@ -45,7 +46,8 @@ timeout: 3600, timeout_source: 2, exit_code: 127, # command not found - debug_trace_enabled: false + debug_trace_enabled: false, + needs_attributes: needs_attributes ) end @@ -94,7 +96,8 @@ dast_site_profile dast_scanner_profile stage_id dast_site_profiles_build dast_scanner_profiles_build auto_canceled_by_partition_id execution_config_id execution_config build_source id_value inputs error_job_messages - job_definition job_definition_instance job_messages temp_job_definition interruptible].freeze + job_definition job_definition_instance job_messages temp_job_definition interruptible + job_info temp_job_info].freeze end before_all do @@ -116,7 +119,7 @@ shared_examples_for 'clones the job' do before do - create(:ci_build_need, build: job) + create(:ci_build_need, build: job, name: job.job_needs.first.name) end describe 'clone accessors' do @@ -167,11 +170,12 @@ end end - context 'when the job definitions do not exit' do + context 'when the job definitions do not exist' do before do create(:ci_build_metadata, build: job) Ci::JobDefinitionInstance.delete_all Ci::JobDefinition.delete_all + job.reload end it 'creates a new job definition from metadata' do @@ -180,7 +184,7 @@ end end - context 'when a job definition for the metadata attributes already exits' do + context 'when a job definition for the metadata attributes already exists' do let(:metadata) do create(:ci_build_metadata, build: job, config_options: job.options, @@ -198,7 +202,8 @@ secrets: metadata.secrets, tag_list: job.tag_list.to_a, run_steps: job.try(:execution_config)&.run_steps || [], - interruptible: metadata.interruptible + interruptible: metadata.interruptible, + needs_attributes: job.needs_attributes } end diff --git a/spec/services/ci/create_pipeline_service_spec.rb b/spec/services/ci/create_pipeline_service_spec.rb index d30f93af368ac24040dfbd19b8953057d1ad201f..7ff0651d5e9f5ba8bb81f42e19e30a31010b07c8 100644 --- a/spec/services/ci/create_pipeline_service_spec.rb +++ b/spec/services/ci/create_pipeline_service_spec.rb @@ -87,7 +87,8 @@ def execute_service( 1 + # SELECT "ci_builds".* FROM "ci_builds" 1 + # INSERT INTO "ci_builds" 1 + # INSERT INTO "ci_builds_metadata" - 1 # SELECT "taggings".* FROM "taggings" + 1 + # SELECT "taggings".* FROM "taggings" + 7 # TODO: Fix the extra queries end end end diff --git a/spec/services/ci/pipeline_processing/atomic_processing_service_spec.rb b/spec/services/ci/pipeline_processing/atomic_processing_service_spec.rb index fef18cd2cdb7cb19cd18c50b95b5a1c81bb19e79..680f9e4e659047faa177eb60aa9ae1d92d15897b 100644 --- a/spec/services/ci/pipeline_processing/atomic_processing_service_spec.rb +++ b/spec/services/ci/pipeline_processing/atomic_processing_service_spec.rb @@ -6,6 +6,11 @@ include RepoHelpers include ExclusiveLeaseHelpers + before do + # TEMP: To pass tests for now + allow(Gitlab::QueryLimiting::Transaction).to receive(:threshold).and_return(300) + end + describe 'Pipeline Processing Service Tests With Yaml' do let_it_be(:project) { create(:project, :repository) } let_it_be(:user) { project.first_owner } @@ -784,6 +789,10 @@ def event_on_pipeline(event) end context 'when pipeline with needs is created', :sidekiq_inline do + before do + stub_feature_flags(read_needs_from_job_definition: false) # Temp to pass tests + end + let!(:linux_build) { create_build('linux:build', stage: 'build', stage_idx: 0) } let!(:mac_build) { create_build('mac:build', stage: 'build', stage_idx: 0) } let!(:linux_rspec) { create_build('linux:rspec', stage: 'test', stage_idx: 1, scheduling_type: :dag) } @@ -834,6 +843,10 @@ def event_on_pipeline(event) end context 'when one of the jobs is run on a failure' do + before do + stub_feature_flags(read_needs_from_job_definition: false) # Temp to pass tests + end + let!(:linux_notify) { create_build('linux:notify', stage: 'deploy', stage_idx: 2, when: 'on_failure', scheduling_type: :dag) } let!(:linux_notify_on_build) { create(:ci_build_need, build: linux_notify, name: 'linux:build') } @@ -894,6 +907,7 @@ def event_on_pipeline(event) let!(:deploy) { create_build('deploy', stage: 'deploy', stage_idx: 2, scheduling_type: :dag) } before do + stub_feature_flags(read_needs_from_job_definition: false) # Temp to pass tests create(:ci_build_need, build: deploy, name: 'linux:build') end @@ -916,6 +930,7 @@ def event_on_pipeline(event) let!(:deploy) { create_build('deploy', stage: 'deploy', stage_idx: 1, scheduling_type: :dag) } before do + stub_feature_flags(read_needs_from_job_definition: false) # Temp to pass tests create(:ci_build_need, build: deploy, name: 'linux:build') end diff --git a/spec/services/ci/reset_skipped_jobs_service_spec.rb b/spec/services/ci/reset_skipped_jobs_service_spec.rb index c5f1cfb1b711cd0b6d830ae5f54d51b646c9d4c6..bac1b7fd94f54edbd15774b0a13ff350b0b5fef0 100644 --- a/spec/services/ci/reset_skipped_jobs_service_spec.rb +++ b/spec/services/ci/reset_skipped_jobs_service_spec.rb @@ -422,7 +422,11 @@ def processables def jobs_name_status_owner_needs processables.reload.map do |job| - job.attributes.slice('name', 'status', 'user_id').merge('needs' => job.needs.map(&:name)) + if job.read_from_job_info? + job.attributes.slice('name', 'status', 'user_id').merge('needs' => job.job_needs.names) + else + job.attributes.slice('name', 'status', 'user_id').merge('needs' => job.needs.map(&:name)) + end end end end diff --git a/spec/services/ci/retry_job_service_spec.rb b/spec/services/ci/retry_job_service_spec.rb index 6770cfad0adf351236c2dd36b06ca8a50e2504af..1e96714e168b9d2130a983cadb67fdfff955f475 100644 --- a/spec/services/ci/retry_job_service_spec.rb +++ b/spec/services/ci/retry_job_service_spec.rb @@ -64,10 +64,11 @@ shared_examples_for 'clones the job' do let(:job) { job_to_clone } - before_all do + before do job_to_clone.update!(ci_stage: stage) - create(:ci_build_need, build: job_to_clone) + need = create(:ci_build_need, build: job_to_clone) + stub_ci_job_info(job_to_clone, needs_attributes: [{ name: need.name }]) end context 'when the user has ability to execute job' do @@ -88,7 +89,11 @@ context 'when the job has needs' do before do - create_list(:ci_build_need, 2, build: job) + needs = create_list(:ci_build_need, 2, build: job) + + needs.each do |need| + stub_ci_job_info(job, needs_attributes: [{ name: need.name }]) + end end it 'bulk inserts all the needs' do @@ -177,10 +182,17 @@ context 'when job has a nil scheduling_type' do before do + job.clear_memoization(:read_from_job_info?) + # Jobs with a job_info record would never have nil scheduling_type + stub_feature_flags(read_from_ci_job_infos: false) job.pipeline.processables.update_all(scheduling_type: nil) job.reload end + after do + job.clear_memoization(:read_from_job_info?) + end + it 'populates scheduling_type of processables' do expect(new_job.scheduling_type).to eq('stage') expect(job.reload.scheduling_type).to eq('stage') diff --git a/spec/services/ci/update_build_names_service_spec.rb b/spec/services/ci/update_build_names_service_spec.rb index 9b20651b565dd6b0cf53ea1b94e7bdce9a370362..a23ba318960d0ad2873d08a7b0bceb24fe822f37 100644 --- a/spec/services/ci/update_build_names_service_spec.rb +++ b/spec/services/ci/update_build_names_service_spec.rb @@ -10,6 +10,10 @@ let_it_be(:build3) { create(:ci_build, name: 'build3', pipeline: pipeline) } let_it_be(:bridge1) { create(:ci_bridge, name: 'bridge1', pipeline: pipeline) } + before do + stub_feature_flags(stop_writing_ci_job_info_to_old_destinations: false) + end + describe '#execute' do subject(:service) { described_class.new(pipeline) } diff --git a/spec/support/helpers/ci/job_factory_helpers.rb b/spec/support/helpers/ci/job_factory_helpers.rb index 8b948e7e0992eb7c8968606038a7c006a86ccd5c..7347e4196b00036eb20e2894b6975de2c27d1ef1 100644 --- a/spec/support/helpers/ci/job_factory_helpers.rb +++ b/spec/support/helpers/ci/job_factory_helpers.rb @@ -20,5 +20,16 @@ def self.mutate_temp_job_definition(job, **new_config) job.temp_job_definition = new_temp_job_definition end + + def self.mutate_temp_job_info(job, **new_job_attrs) + job.temp_job_info ||= ::Ci::JobInfo.fabricate( + job_attrs: new_job_attrs, + project_id: job.project_id, + partition_id: job.partition_id + ) + + job.temp_job_info.mutate(**new_job_attrs) + job.temp_job_info.validate! + end end end diff --git a/spec/support/helpers/ci/job_helpers.rb b/spec/support/helpers/ci/job_helpers.rb index 94544c884a5f652d5778e8779a643730e7f8c1a4..51f464b61aaaf5baee82ee8ca8ca243e13390d76 100644 --- a/spec/support/helpers/ci/job_helpers.rb +++ b/spec/support/helpers/ci/job_helpers.rb @@ -13,7 +13,8 @@ def stub_ci_job_definition(job, **new_config) end # We use regular merge (not deep_merge) to completely overwrite existing attributes - updated_config = (job.job_definition&.config || job.temp_job_definition&.config || {}).merge(new_config) + updated_config = (job.job_definition&.config || job.temp_job_definition&.config || {}) + .merge(new_config) new_job_definition = ::Ci::JobDefinition.fabricate( config: updated_config, @@ -27,5 +28,33 @@ def stub_ci_job_definition(job, **new_config) allow(job).to receive(:job_definition).and_return(new_job_definition) end + + # TODO: Maybe we can combine this with stub_ci_job_definition and call it `stub_ci_job_attrs`? + def stub_ci_job_info(job, **new_job_attrs) + new_job_attrs.symbolize_keys! + unknown_keys = new_job_attrs.keys - Ci::JobInfo::JOB_ATTRIBUTES + + if unknown_keys.any? + raise ArgumentError, + "You can only stub valid job info attributes. Invalid key(s): #{unknown_keys.join(', ')}. " \ + "Allowed: #{Ci::JobInfo::JOB_ATTRIBUTES.join(', ')}" + end + + # We use regular merge (not deep_merge) to completely overwrite existing attributes + updated_job_attrs = (job.job_info&.job_attributes || job.temp_job_info&.job_attributes || {}).merge(new_job_attrs) + + new_job_info = ::Ci::JobInfo.fabricate( + job_attrs: updated_job_attrs, + project_id: job.pipeline.project.id, + partition_id: job.pipeline.partition_id + ) + + new_job_info.validate + # TODO: Update this to catch other column validation errors too. + config_errors = new_job_info.errors[:config] + raise ActiveRecord::RecordInvalid, config_errors.join(', ') if config_errors.any? + + allow(job).to receive(:job_info).and_return(new_job_info) + end end end