diff --git a/app/models/ci/pipeline.rb b/app/models/ci/pipeline.rb index b8d3c7819c908d96c37650fcebd6789b24eefdbc..84d4c895c68e0bad441b039f1352b8170e4dd721 100644 --- a/app/models/ci/pipeline.rb +++ b/app/models/ci/pipeline.rb @@ -1090,8 +1090,12 @@ def environments_in_self_and_project_descendants(deployment_status: nil) # We limit to 100 unique environments for application safety. # See: https://gitlab.com/gitlab-org/gitlab/-/issues/340781#note_699114700 # - # TODO: This metadata query can be removed when historical job environment - # records have been backfilled. + # TODO: Environment information is being migrated from Ci::BuildMetadata + # to Environments::Job so that old metadata records can be removed. Newly + # created jobs have an associated Environments::Job record, but historical + # jobs do not. These will be created in a background migration as part of + # https://gitlab.com/gitlab-org/gitlab/-/issues/573186, and once this has + # been finalised the metadata query here can be removed. expanded_environment_names = jobs_in_self_and_project_descendants.joins(:metadata) .where.not(Ci::BuildMetadata.table_name => { expanded_environment_name: nil }) diff --git a/app/models/concerns/ci/deployable.rb b/app/models/concerns/ci/deployable.rb index f800ee55f7b35c76782f684688d532ac163f7e0c..7cf3fed9df73cab9c5d3e3a673df55e4b9bad680 100644 --- a/app/models/concerns/ci/deployable.rb +++ b/app/models/concerns/ci/deployable.rb @@ -227,8 +227,12 @@ def environment_permanent_metadata if job_environment.present? job_environment.options else - # TODO: This fallback can be removed when historical job environment - # records have been backfilled. + # TODO: Environment information is being migrated from Ci::BuildMetadata + # to Environments::Job so that old metadata records can be removed. Newly + # created jobs have an associated job_environment record, but historical + # jobs do not. These will be created in a background migration as part of + # https://gitlab.com/gitlab-org/gitlab/-/issues/573186, and once this has + # been finalised this fallback can be removed. environment_options_for_permanent_storage end end diff --git a/db/docs/batched_background_migrations/backfill_job_environments.yml b/db/docs/batched_background_migrations/backfill_job_environments.yml new file mode 100644 index 0000000000000000000000000000000000000000..cf3d24ba93e54321fe8b22c0a5d14a4cf51a8462 --- /dev/null +++ b/db/docs/batched_background_migrations/backfill_job_environments.yml @@ -0,0 +1,8 @@ +--- +migration_job_name: BackfillJobEnvironments +description: Backfills Environments::Job records using associated CI jobs and CI job metadata +feature_category: deployment_management +introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/201868 +milestone: '18.5' +queued_migration_version: 20250915010014 +finalized_by: # version of the migration that finalized this BBM diff --git a/db/post_migrate/20250915010014_queue_backfill_job_environments.rb b/db/post_migrate/20250915010014_queue_backfill_job_environments.rb new file mode 100644 index 0000000000000000000000000000000000000000..97467646254b8e3bee8d422fd7eafa580b9c3e76 --- /dev/null +++ b/db/post_migrate/20250915010014_queue_backfill_job_environments.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +class QueueBackfillJobEnvironments < Gitlab::Database::Migration[2.3] + milestone '18.5' + + restrict_gitlab_migration gitlab_schema: :gitlab_ci + + MIGRATION = "BackfillJobEnvironments" + BATCH_SIZE = 25000 + SUB_BATCH_SIZE = 500 + + def up + queue_batched_background_migration( + MIGRATION, + :p_ci_builds_metadata, + :id, + batch_size: BATCH_SIZE, + sub_batch_size: SUB_BATCH_SIZE + ) + end + + def down + delete_batched_background_migration(MIGRATION, :p_ci_builds_metadata, :id, []) + end +end diff --git a/db/schema_migrations/20250915010014 b/db/schema_migrations/20250915010014 new file mode 100644 index 0000000000000000000000000000000000000000..27cc7bf858962ab8312e027cf6a6b7fa2ad703e1 --- /dev/null +++ b/db/schema_migrations/20250915010014 @@ -0,0 +1 @@ +3dac9aafbac400233a647b20cd03cf135ce4816db4c41fa4ff2406244ac00492 \ No newline at end of file diff --git a/lib/gitlab/background_migration/backfill_job_environments.rb b/lib/gitlab/background_migration/backfill_job_environments.rb new file mode 100644 index 0000000000000000000000000000000000000000..53523283171d53431a075a791e568185c016df6a --- /dev/null +++ b/lib/gitlab/background_migration/backfill_job_environments.rb @@ -0,0 +1,58 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + class BackfillJobEnvironments < BatchedMigrationJob + operation_name :backfill_job_environments + feature_category :deployment_management + + def perform + each_sub_batch do |sub_batch| + relation = sub_batch + .where.not(expanded_environment_name: nil) + .joins("INNER JOIN p_ci_builds ON p_ci_builds.partition_id = p_ci_builds_metadata.partition_id + AND p_ci_builds.id = p_ci_builds_metadata.build_id") + .select("p_ci_builds_metadata.project_id, + p_ci_builds_metadata.build_id AS ci_job_id, + p_ci_builds_metadata.expanded_environment_name, + p_ci_builds_metadata.config_options -> 'environment' AS options, + p_ci_builds.commit_id AS ci_pipeline_id") + + job_environment_attributes = relation.map { |metadata| extract_attributes(metadata) } + + next if job_environment_attributes.empty? + + values_list = Arel::Nodes::ValuesList.new(job_environment_attributes.map(&:values)).to_sql + + ApplicationRecord.connection.execute(<<~SQL) + WITH ci_job_attributes(project_id, ci_job_id, ci_pipeline_id, expanded_environment_name, options) AS (#{values_list}) + INSERT INTO job_environments(project_id, environment_id, ci_job_id, ci_pipeline_id, deployment_id, expanded_environment_name, options) + SELECT ci_job_attributes.project_id, environments.id, ci_job_id, ci_pipeline_id, deployments.id, expanded_environment_name, options::jsonb + FROM ci_job_attributes + INNER JOIN environments ON environments.project_id = ci_job_attributes.project_id + AND environments.name = ci_job_attributes.expanded_environment_name + LEFT JOIN deployments ON deployments.deployable_id = ci_job_attributes.ci_job_id + AND deployments.deployable_type = 'CommitStatus' + ON CONFLICT DO NOTHING + SQL + end + end + + private + + def extract_attributes(metadata) + attributes = metadata.attributes.slice('project_id', 'ci_job_id', 'ci_pipeline_id', + 'expanded_environment_name', 'options') + + options = attributes['options'] || {} + kubernetes_options = options['kubernetes']&.slice('namespace') + + options = options.slice('action', 'deployment_tier') + options['kubernetes'] = kubernetes_options if kubernetes_options.present? + + attributes['options'] = options.to_json + attributes + end + end + end +end diff --git a/spec/lib/gitlab/background_migration/backfill_job_environments_spec.rb b/spec/lib/gitlab/background_migration/backfill_job_environments_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..5ddec48280be5c5b8b8abd09a89eda79169b89d8 --- /dev/null +++ b/spec/lib/gitlab/background_migration/backfill_job_environments_spec.rb @@ -0,0 +1,251 @@ +# frozen_string_literal: true + +require 'spec_helper' + +# rubocop:disable RSpec/MultipleMemoizedHelpers -- Cannot use factories in migration specs +RSpec.describe Gitlab::BackgroundMigration::BackfillJobEnvironments, feature_category: :deployment_management do + let(:organization) { table(:organizations).create!(name: 'organization', path: 'organization') } + let(:namespace1) do + table(:namespaces).create!(name: "namespace1", path: "namespace1", organization_id: organization.id) + end + + let(:namespace2) do + table(:namespaces).create!(name: "namespace2", path: "namespace2", organization_id: organization.id) + end + + let(:ci_builds) { table(:p_ci_builds, database: :ci, primary_key: :id) } + let(:ci_builds_metadata) { table(:p_ci_builds_metadata, database: :ci, primary_key: :id) } + let(:ci_pipelines) { table(:p_ci_pipelines, database: :ci, primary_key: :id) } + + let(:projects) { table(:projects) } + let(:environments) { table(:environments) } + let(:deployments) { table(:deployments) } + let(:job_environments) { table(:job_environments) } + + let!(:project1) do + projects.create!( + namespace_id: namespace1.id, + project_namespace_id: namespace1.id, + organization_id: organization.id + ) + end + + let!(:project2) do + projects.create!( + namespace_id: namespace2.id, + project_namespace_id: namespace2.id, + organization_id: organization.id + ) + end + + let!(:staging1) { environments.create!(project_id: project1.id, name: 'staging1', slug: 'stg1') } + let!(:staging2) { environments.create!(project_id: project2.id, name: 'staging2', slug: 'stg2') } + let!(:production1) { environments.create!(project_id: project1.id, name: 'production1', slug: 'prod1') } + let!(:production2) { environments.create!(project_id: project2.id, name: 'production2', slug: 'prod2') } + + let!(:pipeline1) { ci_pipelines.create!(partition_id: 100, project_id: project1.id) } + let!(:pipeline2) { ci_pipelines.create!(partition_id: 100, project_id: project2.id) } + + let!(:build1) do + ci_builds.create!(partition_id: pipeline1.partition_id, commit_id: pipeline1.id, project_id: project1.id) + end + + let!(:build2) do + ci_builds.create!(partition_id: pipeline2.partition_id, commit_id: pipeline2.id, project_id: project2.id) + end + + let!(:build3) do + ci_builds.create!(partition_id: pipeline1.partition_id, commit_id: pipeline1.id, project_id: project1.id) + end + + let!(:build4) do + ci_builds.create!(partition_id: pipeline2.partition_id, commit_id: pipeline2.id, project_id: project2.id) + end + + let!(:build5) do + ci_builds.create!(partition_id: pipeline1.partition_id, commit_id: pipeline1.id, project_id: project1.id) + end + + let!(:build6) do + ci_builds.create!(partition_id: pipeline2.partition_id, commit_id: pipeline2.id, project_id: project2.id) + end + + let!(:build7) do + ci_builds.create!(partition_id: pipeline1.partition_id, commit_id: pipeline1.id, project_id: project1.id) + end + + let!(:build8) do + ci_builds.create!(partition_id: pipeline2.partition_id, commit_id: pipeline2.id, project_id: project2.id) + end + + let!(:deployment1) do + deployments.create!(project_id: project1.id, environment_id: staging1.id, deployable_type: 'CommitStatus', + deployable_id: build1.id, iid: 1, ref: 'main', sha: 'aaaaaa', tag: true, status: 0) + end + + let!(:deployment2) do + deployments.create!(project_id: project2.id, environment_id: staging2.id, deployable_id: build2.id, iid: 1, + ref: 'main', sha: 'aaaaaa', tag: false, status: 0) + end + + let!(:deployment3) do + deployments.create!(project_id: project2.id, environment_id: production2.id, deployable_type: 'CommitStatus', + deployable_id: build4.id, iid: 2, ref: 'main', sha: 'aaaaaa', tag: false, status: 0) + end + + let!(:metadata1) do + environment_name = 'staging1' + options = { script: 'example', environment: { name: environment_name } } + + ci_builds_metadata.create!(partition_id: pipeline1.partition_id, build_id: build1.id, project_id: project1.id, + expanded_environment_name: environment_name, config_options: options) + end + + let!(:metadata2) do + environment_name = 'staging2' + options = { environment: { name: 'staging2', action: 'stop', deployment_tier: 'staging' } } + + ci_builds_metadata.create!(partition_id: pipeline2.partition_id, build_id: build2.id, project_id: project2.id, + expanded_environment_name: environment_name, config_options: options) + end + + let!(:metadata3) do + environment_name = 'production1' + options = { script: 'example', environment: { deployment_tier: 'testing' } } + + ci_builds_metadata.create!(partition_id: pipeline1.partition_id, build_id: build3.id, project_id: project1.id, + expanded_environment_name: environment_name, config_options: options) + end + + let!(:metadata4) do + environment_name = 'production2' + options = { script: 'example', + environment: { name: environment_name, kubernetes: { namespace: 'namespace', agent: 'agent' } } } + + ci_builds_metadata.create!(partition_id: pipeline2.partition_id, build_id: build4.id, project_id: project2.id, + expanded_environment_name: environment_name, config_options: options) + end + + # Skipped: environment name is blank + let!(:metadata5) do + environment_name = nil + options = { script: 'example', environment: { name: 'excluded' } } + + ci_builds_metadata.create!(partition_id: pipeline1.partition_id, build_id: build5.id, project_id: project1.id, + expanded_environment_name: environment_name, config_options: options) + end + + # Skipped: environment name is present but environment has since been deleted + let!(:metadata6) do + environment_name = 'non-existing' + options = { script: 'example', environment: { name: 'deleted' } } + + ci_builds_metadata.create!(partition_id: pipeline2.partition_id, build_id: build6.id, project_id: project2.id, + expanded_environment_name: environment_name, config_options: options) + end + + let!(:metadata7) do + environment_name = 'staging1' + options = nil + + ci_builds_metadata.create!(partition_id: pipeline1.partition_id, build_id: build7.id, project_id: project1.id, + expanded_environment_name: environment_name, config_options: options) + end + + # Skipped: job environment record already exists + let!(:metadata8) do + environment_name = 'staging2' + options = { environment: { name: 'staging2', action: 'stop', deployment_tier: 'staging' } } + + ci_builds_metadata.create!(partition_id: pipeline2.partition_id, build_id: build8.id, project_id: project2.id, + expanded_environment_name: environment_name, config_options: options) + end + + let!(:existing_job_environment) do + job_environments.create!(project_id: project2.id, environment_id: staging2.id, ci_pipeline_id: pipeline2.id, + ci_job_id: build8.id, expanded_environment_name: staging2.name, + options: { action: 'stop', deployment_tier: 'staging' }) + end + + let(:migration) do + start_id, end_id = ci_builds_metadata.pick('MIN(id), MAX(id)') + + described_class.new( + start_id: start_id, + end_id: end_id, + batch_table: :p_ci_builds_metadata, + batch_column: :id, + sub_batch_size: 2, + pause_ms: 0, + job_arguments: [], + connection: Ci::ApplicationRecord.connection + ) + end + + describe '#perform' do + it 'constructs job_environment records from associated records', :aggregate_failures do + expect { migration.perform }.to change { job_environments.count }.from(1).to(6) + + job_environment1 = job_environments.where(ci_job_id: build1.id).first + expect(job_environment1).to have_attributes( + project_id: project1.id, + environment_id: staging1.id, + ci_pipeline_id: pipeline1.id, + deployment_id: deployment1.id, + expanded_environment_name: staging1.name, + options: {} + ) + + job_environment2 = job_environments.where(ci_job_id: build2.id).first + expect(job_environment2).to have_attributes( + project_id: project2.id, + environment_id: staging2.id, + ci_pipeline_id: pipeline2.id, + deployment_id: nil, + expanded_environment_name: staging2.name, + options: { 'action' => 'stop', 'deployment_tier' => 'staging' } + ) + + job_environment3 = job_environments.where(ci_job_id: build3.id).first + expect(job_environment3).to have_attributes( + project_id: project1.id, + environment_id: production1.id, + ci_pipeline_id: pipeline1.id, + deployment_id: nil, + expanded_environment_name: production1.name, + options: { 'deployment_tier' => 'testing' } + ) + + job_environment4 = job_environments.where(ci_job_id: build4.id).first + expect(job_environment4).to have_attributes( + project_id: project2.id, + environment_id: production2.id, + ci_pipeline_id: pipeline2.id, + deployment_id: deployment3.id, + expanded_environment_name: production2.name, + options: { 'kubernetes' => { 'namespace' => 'namespace' } } + ) + + job_environment5 = job_environments.where(ci_job_id: build7.id).first + expect(job_environment5).to have_attributes( + project_id: project1.id, + environment_id: staging1.id, + ci_pipeline_id: pipeline1.id, + deployment_id: nil, + expanded_environment_name: staging1.name, + options: {} + ) + end + + context 'when the batch has no applicable records' do + before do + ci_builds_metadata.update_all(expanded_environment_name: nil) + end + + it 'continues without raising' do + expect { migration.perform }.not_to raise_error + end + end + end +end +# rubocop:enable RSpec/MultipleMemoizedHelpers diff --git a/spec/migrations/20250915010014_queue_backfill_job_environments_spec.rb b/spec/migrations/20250915010014_queue_backfill_job_environments_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..25f978111d23aa279d68aa1bf0924dab7fc33fca --- /dev/null +++ b/spec/migrations/20250915010014_queue_backfill_job_environments_spec.rb @@ -0,0 +1,26 @@ +# frozen_string_literal: true + +require 'spec_helper' +require_migration! + +RSpec.describe QueueBackfillJobEnvironments, migration: :gitlab_ci, feature_category: :deployment_management do + let!(:batched_migration) { described_class::MIGRATION } + + it 'schedules a new batched migration' do + reversible_migration do |migration| + migration.before -> { + expect(batched_migration).not_to have_scheduled_batched_migration + } + + migration.after -> { + expect(batched_migration).to have_scheduled_batched_migration( + gitlab_schema: :gitlab_ci, + table_name: :p_ci_builds_metadata, + column_name: :id, + batch_size: described_class::BATCH_SIZE, + sub_batch_size: described_class::SUB_BATCH_SIZE + ) + } + end + end +end