From 5cf5e53c35be3bcf4ac0ac5022f63ff6b8738c79 Mon Sep 17 00:00:00 2001 From: Tiger Date: Mon, 28 Jul 2025 12:16:57 +1200 Subject: [PATCH] Backfill job environments When a CI job references an environment (it uses the `environment` keyword in the CI template) a new Environments::Job record is now created to persist a link to the environment. This migration creates records for historical CI jobs that existed prior to this persisted link being added. This means that jobs will no longer rely on the associated metadata for environment information, which allows old metadata to be removed. Changelog: other --- app/models/ci/pipeline.rb | 8 +- app/models/concerns/ci/deployable.rb | 8 +- .../backfill_job_environments.yml | 8 + ...5010014_queue_backfill_job_environments.rb | 25 ++ db/schema_migrations/20250915010014 | 1 + .../backfill_job_environments.rb | 58 ++++ .../backfill_job_environments_spec.rb | 251 ++++++++++++++++++ ...14_queue_backfill_job_environments_spec.rb | 26 ++ 8 files changed, 381 insertions(+), 4 deletions(-) create mode 100644 db/docs/batched_background_migrations/backfill_job_environments.yml create mode 100644 db/post_migrate/20250915010014_queue_backfill_job_environments.rb create mode 100644 db/schema_migrations/20250915010014 create mode 100644 lib/gitlab/background_migration/backfill_job_environments.rb create mode 100644 spec/lib/gitlab/background_migration/backfill_job_environments_spec.rb create mode 100644 spec/migrations/20250915010014_queue_backfill_job_environments_spec.rb diff --git a/app/models/ci/pipeline.rb b/app/models/ci/pipeline.rb index b8d3c7819c908d..84d4c895c68e0b 100644 --- a/app/models/ci/pipeline.rb +++ b/app/models/ci/pipeline.rb @@ -1090,8 +1090,12 @@ def environments_in_self_and_project_descendants(deployment_status: nil) # We limit to 100 unique environments for application safety. # See: https://gitlab.com/gitlab-org/gitlab/-/issues/340781#note_699114700 # - # TODO: This metadata query can be removed when historical job environment - # records have been backfilled. + # TODO: Environment information is being migrated from Ci::BuildMetadata + # to Environments::Job so that old metadata records can be removed. Newly + # created jobs have an associated Environments::Job record, but historical + # jobs do not. These will be created in a background migration as part of + # https://gitlab.com/gitlab-org/gitlab/-/issues/573186, and once this has + # been finalised the metadata query here can be removed. expanded_environment_names = jobs_in_self_and_project_descendants.joins(:metadata) .where.not(Ci::BuildMetadata.table_name => { expanded_environment_name: nil }) diff --git a/app/models/concerns/ci/deployable.rb b/app/models/concerns/ci/deployable.rb index f800ee55f7b35c..7cf3fed9df73ca 100644 --- a/app/models/concerns/ci/deployable.rb +++ b/app/models/concerns/ci/deployable.rb @@ -227,8 +227,12 @@ def environment_permanent_metadata if job_environment.present? job_environment.options else - # TODO: This fallback can be removed when historical job environment - # records have been backfilled. + # TODO: Environment information is being migrated from Ci::BuildMetadata + # to Environments::Job so that old metadata records can be removed. Newly + # created jobs have an associated job_environment record, but historical + # jobs do not. These will be created in a background migration as part of + # https://gitlab.com/gitlab-org/gitlab/-/issues/573186, and once this has + # been finalised this fallback can be removed. environment_options_for_permanent_storage end end diff --git a/db/docs/batched_background_migrations/backfill_job_environments.yml b/db/docs/batched_background_migrations/backfill_job_environments.yml new file mode 100644 index 00000000000000..cf3d24ba93e543 --- /dev/null +++ b/db/docs/batched_background_migrations/backfill_job_environments.yml @@ -0,0 +1,8 @@ +--- +migration_job_name: BackfillJobEnvironments +description: Backfills Environments::Job records using associated CI jobs and CI job metadata +feature_category: deployment_management +introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/201868 +milestone: '18.5' +queued_migration_version: 20250915010014 +finalized_by: # version of the migration that finalized this BBM diff --git a/db/post_migrate/20250915010014_queue_backfill_job_environments.rb b/db/post_migrate/20250915010014_queue_backfill_job_environments.rb new file mode 100644 index 00000000000000..97467646254b8e --- /dev/null +++ b/db/post_migrate/20250915010014_queue_backfill_job_environments.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +class QueueBackfillJobEnvironments < Gitlab::Database::Migration[2.3] + milestone '18.5' + + restrict_gitlab_migration gitlab_schema: :gitlab_ci + + MIGRATION = "BackfillJobEnvironments" + BATCH_SIZE = 25000 + SUB_BATCH_SIZE = 500 + + def up + queue_batched_background_migration( + MIGRATION, + :p_ci_builds_metadata, + :id, + batch_size: BATCH_SIZE, + sub_batch_size: SUB_BATCH_SIZE + ) + end + + def down + delete_batched_background_migration(MIGRATION, :p_ci_builds_metadata, :id, []) + end +end diff --git a/db/schema_migrations/20250915010014 b/db/schema_migrations/20250915010014 new file mode 100644 index 00000000000000..27cc7bf858962a --- /dev/null +++ b/db/schema_migrations/20250915010014 @@ -0,0 +1 @@ +3dac9aafbac400233a647b20cd03cf135ce4816db4c41fa4ff2406244ac00492 \ No newline at end of file diff --git a/lib/gitlab/background_migration/backfill_job_environments.rb b/lib/gitlab/background_migration/backfill_job_environments.rb new file mode 100644 index 00000000000000..53523283171d53 --- /dev/null +++ b/lib/gitlab/background_migration/backfill_job_environments.rb @@ -0,0 +1,58 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + class BackfillJobEnvironments < BatchedMigrationJob + operation_name :backfill_job_environments + feature_category :deployment_management + + def perform + each_sub_batch do |sub_batch| + relation = sub_batch + .where.not(expanded_environment_name: nil) + .joins("INNER JOIN p_ci_builds ON p_ci_builds.partition_id = p_ci_builds_metadata.partition_id + AND p_ci_builds.id = p_ci_builds_metadata.build_id") + .select("p_ci_builds_metadata.project_id, + p_ci_builds_metadata.build_id AS ci_job_id, + p_ci_builds_metadata.expanded_environment_name, + p_ci_builds_metadata.config_options -> 'environment' AS options, + p_ci_builds.commit_id AS ci_pipeline_id") + + job_environment_attributes = relation.map { |metadata| extract_attributes(metadata) } + + next if job_environment_attributes.empty? + + values_list = Arel::Nodes::ValuesList.new(job_environment_attributes.map(&:values)).to_sql + + ApplicationRecord.connection.execute(<<~SQL) + WITH ci_job_attributes(project_id, ci_job_id, ci_pipeline_id, expanded_environment_name, options) AS (#{values_list}) + INSERT INTO job_environments(project_id, environment_id, ci_job_id, ci_pipeline_id, deployment_id, expanded_environment_name, options) + SELECT ci_job_attributes.project_id, environments.id, ci_job_id, ci_pipeline_id, deployments.id, expanded_environment_name, options::jsonb + FROM ci_job_attributes + INNER JOIN environments ON environments.project_id = ci_job_attributes.project_id + AND environments.name = ci_job_attributes.expanded_environment_name + LEFT JOIN deployments ON deployments.deployable_id = ci_job_attributes.ci_job_id + AND deployments.deployable_type = 'CommitStatus' + ON CONFLICT DO NOTHING + SQL + end + end + + private + + def extract_attributes(metadata) + attributes = metadata.attributes.slice('project_id', 'ci_job_id', 'ci_pipeline_id', + 'expanded_environment_name', 'options') + + options = attributes['options'] || {} + kubernetes_options = options['kubernetes']&.slice('namespace') + + options = options.slice('action', 'deployment_tier') + options['kubernetes'] = kubernetes_options if kubernetes_options.present? + + attributes['options'] = options.to_json + attributes + end + end + end +end diff --git a/spec/lib/gitlab/background_migration/backfill_job_environments_spec.rb b/spec/lib/gitlab/background_migration/backfill_job_environments_spec.rb new file mode 100644 index 00000000000000..5ddec48280be5c --- /dev/null +++ b/spec/lib/gitlab/background_migration/backfill_job_environments_spec.rb @@ -0,0 +1,251 @@ +# frozen_string_literal: true + +require 'spec_helper' + +# rubocop:disable RSpec/MultipleMemoizedHelpers -- Cannot use factories in migration specs +RSpec.describe Gitlab::BackgroundMigration::BackfillJobEnvironments, feature_category: :deployment_management do + let(:organization) { table(:organizations).create!(name: 'organization', path: 'organization') } + let(:namespace1) do + table(:namespaces).create!(name: "namespace1", path: "namespace1", organization_id: organization.id) + end + + let(:namespace2) do + table(:namespaces).create!(name: "namespace2", path: "namespace2", organization_id: organization.id) + end + + let(:ci_builds) { table(:p_ci_builds, database: :ci, primary_key: :id) } + let(:ci_builds_metadata) { table(:p_ci_builds_metadata, database: :ci, primary_key: :id) } + let(:ci_pipelines) { table(:p_ci_pipelines, database: :ci, primary_key: :id) } + + let(:projects) { table(:projects) } + let(:environments) { table(:environments) } + let(:deployments) { table(:deployments) } + let(:job_environments) { table(:job_environments) } + + let!(:project1) do + projects.create!( + namespace_id: namespace1.id, + project_namespace_id: namespace1.id, + organization_id: organization.id + ) + end + + let!(:project2) do + projects.create!( + namespace_id: namespace2.id, + project_namespace_id: namespace2.id, + organization_id: organization.id + ) + end + + let!(:staging1) { environments.create!(project_id: project1.id, name: 'staging1', slug: 'stg1') } + let!(:staging2) { environments.create!(project_id: project2.id, name: 'staging2', slug: 'stg2') } + let!(:production1) { environments.create!(project_id: project1.id, name: 'production1', slug: 'prod1') } + let!(:production2) { environments.create!(project_id: project2.id, name: 'production2', slug: 'prod2') } + + let!(:pipeline1) { ci_pipelines.create!(partition_id: 100, project_id: project1.id) } + let!(:pipeline2) { ci_pipelines.create!(partition_id: 100, project_id: project2.id) } + + let!(:build1) do + ci_builds.create!(partition_id: pipeline1.partition_id, commit_id: pipeline1.id, project_id: project1.id) + end + + let!(:build2) do + ci_builds.create!(partition_id: pipeline2.partition_id, commit_id: pipeline2.id, project_id: project2.id) + end + + let!(:build3) do + ci_builds.create!(partition_id: pipeline1.partition_id, commit_id: pipeline1.id, project_id: project1.id) + end + + let!(:build4) do + ci_builds.create!(partition_id: pipeline2.partition_id, commit_id: pipeline2.id, project_id: project2.id) + end + + let!(:build5) do + ci_builds.create!(partition_id: pipeline1.partition_id, commit_id: pipeline1.id, project_id: project1.id) + end + + let!(:build6) do + ci_builds.create!(partition_id: pipeline2.partition_id, commit_id: pipeline2.id, project_id: project2.id) + end + + let!(:build7) do + ci_builds.create!(partition_id: pipeline1.partition_id, commit_id: pipeline1.id, project_id: project1.id) + end + + let!(:build8) do + ci_builds.create!(partition_id: pipeline2.partition_id, commit_id: pipeline2.id, project_id: project2.id) + end + + let!(:deployment1) do + deployments.create!(project_id: project1.id, environment_id: staging1.id, deployable_type: 'CommitStatus', + deployable_id: build1.id, iid: 1, ref: 'main', sha: 'aaaaaa', tag: true, status: 0) + end + + let!(:deployment2) do + deployments.create!(project_id: project2.id, environment_id: staging2.id, deployable_id: build2.id, iid: 1, + ref: 'main', sha: 'aaaaaa', tag: false, status: 0) + end + + let!(:deployment3) do + deployments.create!(project_id: project2.id, environment_id: production2.id, deployable_type: 'CommitStatus', + deployable_id: build4.id, iid: 2, ref: 'main', sha: 'aaaaaa', tag: false, status: 0) + end + + let!(:metadata1) do + environment_name = 'staging1' + options = { script: 'example', environment: { name: environment_name } } + + ci_builds_metadata.create!(partition_id: pipeline1.partition_id, build_id: build1.id, project_id: project1.id, + expanded_environment_name: environment_name, config_options: options) + end + + let!(:metadata2) do + environment_name = 'staging2' + options = { environment: { name: 'staging2', action: 'stop', deployment_tier: 'staging' } } + + ci_builds_metadata.create!(partition_id: pipeline2.partition_id, build_id: build2.id, project_id: project2.id, + expanded_environment_name: environment_name, config_options: options) + end + + let!(:metadata3) do + environment_name = 'production1' + options = { script: 'example', environment: { deployment_tier: 'testing' } } + + ci_builds_metadata.create!(partition_id: pipeline1.partition_id, build_id: build3.id, project_id: project1.id, + expanded_environment_name: environment_name, config_options: options) + end + + let!(:metadata4) do + environment_name = 'production2' + options = { script: 'example', + environment: { name: environment_name, kubernetes: { namespace: 'namespace', agent: 'agent' } } } + + ci_builds_metadata.create!(partition_id: pipeline2.partition_id, build_id: build4.id, project_id: project2.id, + expanded_environment_name: environment_name, config_options: options) + end + + # Skipped: environment name is blank + let!(:metadata5) do + environment_name = nil + options = { script: 'example', environment: { name: 'excluded' } } + + ci_builds_metadata.create!(partition_id: pipeline1.partition_id, build_id: build5.id, project_id: project1.id, + expanded_environment_name: environment_name, config_options: options) + end + + # Skipped: environment name is present but environment has since been deleted + let!(:metadata6) do + environment_name = 'non-existing' + options = { script: 'example', environment: { name: 'deleted' } } + + ci_builds_metadata.create!(partition_id: pipeline2.partition_id, build_id: build6.id, project_id: project2.id, + expanded_environment_name: environment_name, config_options: options) + end + + let!(:metadata7) do + environment_name = 'staging1' + options = nil + + ci_builds_metadata.create!(partition_id: pipeline1.partition_id, build_id: build7.id, project_id: project1.id, + expanded_environment_name: environment_name, config_options: options) + end + + # Skipped: job environment record already exists + let!(:metadata8) do + environment_name = 'staging2' + options = { environment: { name: 'staging2', action: 'stop', deployment_tier: 'staging' } } + + ci_builds_metadata.create!(partition_id: pipeline2.partition_id, build_id: build8.id, project_id: project2.id, + expanded_environment_name: environment_name, config_options: options) + end + + let!(:existing_job_environment) do + job_environments.create!(project_id: project2.id, environment_id: staging2.id, ci_pipeline_id: pipeline2.id, + ci_job_id: build8.id, expanded_environment_name: staging2.name, + options: { action: 'stop', deployment_tier: 'staging' }) + end + + let(:migration) do + start_id, end_id = ci_builds_metadata.pick('MIN(id), MAX(id)') + + described_class.new( + start_id: start_id, + end_id: end_id, + batch_table: :p_ci_builds_metadata, + batch_column: :id, + sub_batch_size: 2, + pause_ms: 0, + job_arguments: [], + connection: Ci::ApplicationRecord.connection + ) + end + + describe '#perform' do + it 'constructs job_environment records from associated records', :aggregate_failures do + expect { migration.perform }.to change { job_environments.count }.from(1).to(6) + + job_environment1 = job_environments.where(ci_job_id: build1.id).first + expect(job_environment1).to have_attributes( + project_id: project1.id, + environment_id: staging1.id, + ci_pipeline_id: pipeline1.id, + deployment_id: deployment1.id, + expanded_environment_name: staging1.name, + options: {} + ) + + job_environment2 = job_environments.where(ci_job_id: build2.id).first + expect(job_environment2).to have_attributes( + project_id: project2.id, + environment_id: staging2.id, + ci_pipeline_id: pipeline2.id, + deployment_id: nil, + expanded_environment_name: staging2.name, + options: { 'action' => 'stop', 'deployment_tier' => 'staging' } + ) + + job_environment3 = job_environments.where(ci_job_id: build3.id).first + expect(job_environment3).to have_attributes( + project_id: project1.id, + environment_id: production1.id, + ci_pipeline_id: pipeline1.id, + deployment_id: nil, + expanded_environment_name: production1.name, + options: { 'deployment_tier' => 'testing' } + ) + + job_environment4 = job_environments.where(ci_job_id: build4.id).first + expect(job_environment4).to have_attributes( + project_id: project2.id, + environment_id: production2.id, + ci_pipeline_id: pipeline2.id, + deployment_id: deployment3.id, + expanded_environment_name: production2.name, + options: { 'kubernetes' => { 'namespace' => 'namespace' } } + ) + + job_environment5 = job_environments.where(ci_job_id: build7.id).first + expect(job_environment5).to have_attributes( + project_id: project1.id, + environment_id: staging1.id, + ci_pipeline_id: pipeline1.id, + deployment_id: nil, + expanded_environment_name: staging1.name, + options: {} + ) + end + + context 'when the batch has no applicable records' do + before do + ci_builds_metadata.update_all(expanded_environment_name: nil) + end + + it 'continues without raising' do + expect { migration.perform }.not_to raise_error + end + end + end +end +# rubocop:enable RSpec/MultipleMemoizedHelpers diff --git a/spec/migrations/20250915010014_queue_backfill_job_environments_spec.rb b/spec/migrations/20250915010014_queue_backfill_job_environments_spec.rb new file mode 100644 index 00000000000000..25f978111d23aa --- /dev/null +++ b/spec/migrations/20250915010014_queue_backfill_job_environments_spec.rb @@ -0,0 +1,26 @@ +# frozen_string_literal: true + +require 'spec_helper' +require_migration! + +RSpec.describe QueueBackfillJobEnvironments, migration: :gitlab_ci, feature_category: :deployment_management do + let!(:batched_migration) { described_class::MIGRATION } + + it 'schedules a new batched migration' do + reversible_migration do |migration| + migration.before -> { + expect(batched_migration).not_to have_scheduled_batched_migration + } + + migration.after -> { + expect(batched_migration).to have_scheduled_batched_migration( + gitlab_schema: :gitlab_ci, + table_name: :p_ci_builds_metadata, + column_name: :id, + batch_size: described_class::BATCH_SIZE, + sub_batch_size: described_class::SUB_BATCH_SIZE + ) + } + end + end +end -- GitLab