diff --git a/lib/gitlab/ci/variables/builder.rb b/lib/gitlab/ci/variables/builder.rb index 23af0c903915cf58c49b73efd506bb93375f372f..91535f9972297f280e71699b4dba5fd1e788d40d 100644 --- a/lib/gitlab/ci/variables/builder.rb +++ b/lib/gitlab/ci/variables/builder.rb @@ -19,98 +19,85 @@ def initialize(pipeline) # - unprotected_scoped_variables # - scoped_variables_for_pipeline_seed def scoped_variables(job, environment:, dependencies:) - Gitlab::Ci::Variables::Collection.new.tap do |variables| - if pipeline.only_workload_variables? - # predefined_project_variables includes things like $CI_PROJECT_PATH which are used by the runner to clone - # the repo - variables.concat(project.predefined_project_variables) - - # yaml_variables is how we inject dynamic configuration into a workload - variables.concat(job.yaml_variables) - - variables.concat( - user_defined_variables(options: job.options, environment: environment, job_variables: job.manual_variables) - ) - next - end - - variables.concat(predefined_variables(job, environment)) - variables.concat(project.predefined_variables) - variables.concat(pipeline_variables_builder.predefined_variables) - variables.concat(job.runner.predefined_variables) if job.runnable? && job.runner - variables.concat(kubernetes_variables_from_job(environment: environment, job: job)) - variables.concat(job.yaml_variables) - variables.concat(user_variables(job.user)) - variables.concat(job.dependency_variables) if dependencies - variables.concat( + if pipeline.only_workload_variables? + collections = [ + project.predefined_project_variables, + job.yaml_variables, user_defined_variables(options: job.options, environment: environment, job_variables: job.manual_variables) - ) - variables.concat(release_variables) + ] + else + collections = [ + predefined_variables(job, environment), + project.predefined_variables, + pipeline_variables_builder.predefined_variables, + (job.runner.predefined_variables if job.runnable? && job.runner), + kubernetes_variables_from_job(environment: environment, job: job), + job.yaml_variables, + user_variables(job.user), + (job.dependency_variables if dependencies), + user_defined_variables(options: job.options, environment: environment, job_variables: job.manual_variables), + release_variables + ] end + + Gitlab::Ci::Variables::Collection.from_collections(*collections) end def unprotected_scoped_variables(job, expose_project_variables:, expose_group_variables:, environment:, dependencies:) - Gitlab::Ci::Variables::Collection.new.tap do |variables| - if pipeline.only_workload_variables? - # predefined_project_variables includes things like $CI_PROJECT_PATH which are used by the runner to clone - # the repo - variables.concat(project.predefined_project_variables) - - # yaml_variables is how we inject dynamic configuration into a workload - variables.concat(job.yaml_variables) - variables.concat( - user_defined_variables( - options: job.options, environment: environment, job_variables: job.manual_variables, - expose_project_variables: expose_project_variables, expose_group_variables: expose_group_variables - ) - ) - next - end - - variables.concat(predefined_variables(job, environment)) - variables.concat(project.predefined_variables) - variables.concat(pipeline_variables_builder.predefined_variables) - variables.concat(job.runner.predefined_variables) if job.runnable? && job.runner - variables.concat(kubernetes_variables_from_job(environment: environment, job: job)) - variables.concat(job.yaml_variables) - variables.concat(user_variables(job.user)) - variables.concat(job.dependency_variables) if dependencies - variables.concat( + if pipeline.only_workload_variables? + collections = [ + project.predefined_project_variables, + job.yaml_variables, user_defined_variables( options: job.options, environment: environment, job_variables: job.manual_variables, expose_project_variables: expose_project_variables, expose_group_variables: expose_group_variables ) - ) - variables.concat(release_variables) + ] + else + collections = [ + predefined_variables(job, environment), + project.predefined_variables, + pipeline_variables_builder.predefined_variables, + (job.runner.predefined_variables if job.runnable? && job.runner), + kubernetes_variables_from_job(environment: environment, job: job), + job.yaml_variables, + user_variables(job.user), + (job.dependency_variables if dependencies), + user_defined_variables( + options: job.options, environment: environment, job_variables: job.manual_variables, + expose_project_variables: expose_project_variables, expose_group_variables: expose_group_variables + ), + release_variables + ] end + + Gitlab::Ci::Variables::Collection.from_collections(*collections) end def scoped_variables_for_pipeline_seed(job_attr, environment:, kubernetes_namespace:, user:, trigger:) - Gitlab::Ci::Variables::Collection.new.tap do |variables| - if pipeline.only_workload_variables? - # predefined_project_variables includes things like $CI_PROJECT_PATH which are used by the runner to clone - # the repo - variables.concat(project.predefined_project_variables) - - # yaml_variables is how we inject dynamic configuration into a workload - variables.concat(job_attr[:yaml_variables]) - - # NOTE: We never include user_defined_variables in pipeline_seed as the workload object has not been - # persisted yet - next - end - - variables.concat(predefined_variables_from_job_attr(job_attr, environment, trigger)) - variables.concat(project.predefined_variables) - variables.concat(pipeline_variables_builder.predefined_variables) - # job.runner.predefined_variables: No need because it's not available in the Seed step. - variables.concat(kubernetes_variables_from_attr(environment: environment, kubernetes_namespace: kubernetes_namespace)) - variables.concat(job_attr[:yaml_variables]) - variables.concat(user_variables(user)) - # job.dependency_variables: No need because dependencies are not in the Seed step. - variables.concat(user_defined_variables(options: job_attr[:options], environment: environment)) - variables.concat(release_variables) + if pipeline.only_workload_variables? + collections = [ + project.predefined_project_variables, + job_attr[:yaml_variables] + ] + # NOTE: We never include user_defined_variables in pipeline_seed as the workload object has not been + # persisted yet + else + collections = [ + predefined_variables_from_job_attr(job_attr, environment, trigger), + project.predefined_variables, + pipeline_variables_builder.predefined_variables, + # job.runner.predefined_variables: No need because it's not available in the Seed step. + kubernetes_variables_from_attr(environment: environment, kubernetes_namespace: kubernetes_namespace), + job_attr[:yaml_variables], + user_variables(user), + # job.dependency_variables: No need because dependencies are not in the Seed step. + user_defined_variables(options: job_attr[:options], environment: environment), + release_variables + ] end + + Gitlab::Ci::Variables::Collection.from_collections(*collections) end def config_variables diff --git a/lib/gitlab/ci/variables/collection.rb b/lib/gitlab/ci/variables/collection.rb index da543cf129c981fe887314392dbad592f0e2d6e5..10e1015731d6384f681e6a70936899e75750710d 100644 --- a/lib/gitlab/ci/variables/collection.rb +++ b/lib/gitlab/ci/variables/collection.rb @@ -23,6 +23,15 @@ def self.fabricate(input) end end + def self.from_collections(*collections) + collections = collections.compact + return new if collections.empty? + + new.tap do |result| + result.build_variable_hash_from_collections(collections) + end + end + def initialize(variables = [], errors = nil) @variables = [] @variables_by_key = Hash.new { |h, k| h[k] = [] } @@ -120,6 +129,23 @@ def to_s "#{@variables_by_key.keys}, @errors='#{@errors}'" end + def build_variable_hash_from_collections(collections) + all_items = [] + collections.each do |collection| + collection.each do |variable| + all_items << Collection::Item.fabricate(variable) + end + end + + @variables = all_items + @variables_by_key = all_items.group_by { |item| item[:key] } + + # Restore expected hash behavior: return [] instead of nil for missing keys + @variables_by_key.default_proc = proc { |hash, missing_key| hash[missing_key] = [] } + + self + end + protected def expand_value(value, keep_undefined: false, expand_file_refs: true, expand_raw_refs: true) diff --git a/spec/lib/gitlab/ci/variables/collection_spec.rb b/spec/lib/gitlab/ci/variables/collection_spec.rb index e0e7f9b378725ea5c8c2b190c8bf498383a40212..9d150533f242e6615556f44b21244707ac7fbac9 100644 --- a/spec/lib/gitlab/ci/variables/collection_spec.rb +++ b/spec/lib/gitlab/ci/variables/collection_spec.rb @@ -633,4 +633,105 @@ it { is_expected.to eq("[\"VAR\", \"VAR2\"], @errors='circular variable reference detected'") } end + + describe '.from_collections' do + let(:collection1) do + described_class.new([ + { key: 'VAR1', value: 'value1' }, + { key: 'VAR2', value: 'value2' } + ]) + end + + let(:collection2) do + described_class.new([ + { key: 'VAR3', value: 'value3' }, + { key: 'VAR4', value: 'value4' } + ]) + end + + let(:collection3) do + described_class.new([ + { key: 'VAR5', value: 'value5' }, + { key: 'VAR1', value: 'overridden_value1' } # Duplicate key to test precedence + ]) + end + + subject(:result) { described_class.from_collections(*collections) } + + context 'with multiple collections' do + let(:collections) { [collection1, collection2, collection3] } + + it 'combines all variables from all collections' do + expect(result).to be_a(described_class) + expect(result.size).to eq(6) # VAR1 (twice), VAR2, VAR3, VAR4, VAR5 + + expect(result['VAR1']&.value).to eq('overridden_value1') # Last occurrence wins + expect(result['VAR2']&.value).to eq('value2') + expect(result['VAR3']&.value).to eq('value3') + expect(result['VAR4']&.value).to eq('value4') + expect(result['VAR5']&.value).to eq('value5') + end + + it 'maintains variable order with later collections taking precedence' do + variables_array = result.to_a + var1_positions = variables_array.each_with_index.filter_map { |var, idx| idx if var.key == 'VAR1' } + + # VAR1 should appear twice in the collection (original and override) + expect(var1_positions).to have_attributes(size: 2) + end + end + + context 'with nil collections' do + let(:collections) { [collection1, nil, collection2, nil] } + + it 'filters out nil collections and processes the rest' do + expect(result.size).to eq(4) + expect(result['VAR1']&.value).to eq('value1') + expect(result['VAR2']&.value).to eq('value2') + expect(result['VAR3']&.value).to eq('value3') + expect(result['VAR4']&.value).to eq('value4') + end + end + + context 'with empty collections array' do + let(:collections) { [] } + + it 'returns an empty collection' do + expect(result).to be_a(described_class) + expect(result.size).to eq(0) + end + end + + context 'with all nil collections' do + let(:collections) { [nil, nil, nil] } + + it 'returns an empty collection' do + expect(result).to be_a(described_class) + expect(result.size).to eq(0) + end + end + + context 'with single collection' do + let(:collections) { [collection1] } + + it 'returns equivalent collection' do + expect(result.size).to eq(2) + expect(result['VAR1']&.value).to eq('value1') + expect(result['VAR2']&.value).to eq('value2') + end + end + + context 'performance characteristics' do + it 'builds hash index efficiently' do + # Create collections beforehand + col1 = described_class.new([{ key: 'A', value: '1' }]) + col2 = described_class.new([{ key: 'B', value: '2' }]) + + # Mock new to verify from_collections creates only one new instance + expect(described_class).to receive(:new).once.and_call_original + + described_class.from_collections(col1, col2) + end + end + end end