diff --git a/lib/gitlab/ci/interpolation/access.rb b/lib/gitlab/ci/interpolation/access.rb new file mode 100644 index 0000000000000000000000000000000000000000..4259845890240a72e02cad8de518009299a896d2 --- /dev/null +++ b/lib/gitlab/ci/interpolation/access.rb @@ -0,0 +1,56 @@ +# frozen_string_literal: true + +module Gitlab + module Ci + module Interpolation + class Access + attr_reader :content, :errors + + MAX_ACCESS_OBJECTS = 5 + MAX_ACCESS_BYTESIZE = 1024 + + def initialize(access, ctx) + @content = access + @ctx = ctx + @errors = [] + + if objects.count <= 1 # rubocop:disable Style/IfUnlessModifier + @errors.push('invalid interpolation access pattern') + end + + if access.bytesize > MAX_ACCESS_BYTESIZE # rubocop:disable Style/IfUnlessModifier + @errors.push('maximum interpolation expression size exceeded') + end + + evaluate! if valid? + end + + def valid? + errors.none? + end + + def objects + @objects ||= @content.split('.', MAX_ACCESS_OBJECTS) + end + + def value + raise ArgumentError, 'access path invalid' unless valid? + + @value + end + + private + + def evaluate! + raise ArgumentError, 'access path invalid' unless valid? + + @value ||= objects.inject(@ctx) do |memo, value| + memo.fetch(value.to_sym) + end + rescue KeyError => e + @errors.push(e) + end + end + end + end +end diff --git a/lib/gitlab/ci/interpolation/block.rb b/lib/gitlab/ci/interpolation/block.rb new file mode 100644 index 0000000000000000000000000000000000000000..389cbf378a22763b70990f4c6b4faec101fe3501 --- /dev/null +++ b/lib/gitlab/ci/interpolation/block.rb @@ -0,0 +1,48 @@ +# frozen_string_literal: true + +module Gitlab + module Ci + module Interpolation + class Block + PREFIX = '$[[' + PATTERN = /(?\$\[\[\s*(?.*?)\s*\]\])/.freeze + + attr_reader :block, :data, :ctx + + def initialize(block, data, ctx) + @block = block + @ctx = ctx + @data = data + + @access = Interpolation::Access.new(@data, ctx) + end + + def valid? + errors.none? + end + + def errors + @access.errors + end + + def content + @access.content + end + + def value + raise ArgumentError, 'block invalid' unless valid? + + @access.value + end + + def self.match(data) + return data unless data.is_a?(String) && data.include?(PREFIX) + + data.gsub(PATTERN) do + yield ::Regexp.last_match(1), ::Regexp.last_match(2) + end + end + end + end + end +end diff --git a/lib/gitlab/ci/interpolation/config.rb b/lib/gitlab/ci/interpolation/config.rb new file mode 100644 index 0000000000000000000000000000000000000000..32f58521139efd4d5e439b5bc154932c7ca62736 --- /dev/null +++ b/lib/gitlab/ci/interpolation/config.rb @@ -0,0 +1,124 @@ +# frozen_string_literal: true + +module Gitlab + module Ci + module Interpolation + ## + # Interpolation::Config represents a configuration artifact that we want to perform interpolation on. + # + class Config + include Gitlab::Utils::StrongMemoize + ## + # Total number of hash nodes traversed. For example, loading a YAML below would result in a hash having 12 nodes + # instead of 9, because hash values are being counted before we recursively traverse them. + # + # test: + # spec: + # env: $[[ inputs.env ]] + # + # $[[ inputs.key ]]: + # name: $[[ inputs.key ]] + # script: my-value + # + # According to our benchmarks performed when developing this code, the worst-case scenario of processing + # a hash with 500_000 nodes takes around 1 second and consumes around 225 megabytes of memory. + # + # The typical scenario, using just a few interpolations takes 250ms and consumes around 20 megabytes of memory. + # + # Given the above the 500_000 nodes should be an upper limit, provided that the are additional safeguard + # present in other parts of the code (example: maximum number of interpolation blocks found). Typical size of a + # YAML configuration with 500k nodes might be around 10 megabytes, which is an order of magnitude higher than + # the 1MB limit for loading YAML on GitLab.com + # + MAX_NODES = 500_000 + MAX_NODE_SIZE = 1024 * 1024 # 1MB + + TooManyNodesError = Class.new(StandardError) + NodeTooLargeError = Class.new(StandardError) + + Visitor = Class.new do + def initialize + @visited = 0 + end + + def visit! + @visited += 1 + + raise Config::TooManyNodesError if @visited > Config::MAX_NODES + end + end + + attr_reader :errors + + def initialize(hash) + @config = hash + @errors = [] + end + + def to_h + @config + end + + ## + # The replace! method will yield a block and replace a each of the hash config nodes with a return value of the + # block. + # + # It returns `nil` if there were errors found during the process. + # + def replace!(&block) + recursive_replace(@config, Visitor.new, &block) + rescue TooManyNodesError + @errors.push('config too large') + nil + rescue NodeTooLargeError + @errors.push('config node too large') + nil + end + strong_memoize_attr :replace! + + def self.fabricate(config) + case config + when Hash + new(config) + when Interpolation::Config + config + else + raise ArgumentError, 'unknown interpolation config' + end + end + + private + + def recursive_replace(config, visitor, &block) + visitor.visit! + + case config + when Hash + {}.tap do |new_hash| + config.each_pair do |key, value| + new_key = recursive_replace(key, visitor, &block) + new_value = recursive_replace(value, visitor, &block) + + if new_key != key + new_hash[new_key] = new_value + else + new_hash[key] = new_value + end + end + end + when Array + config.map { |value| recursive_replace(value, visitor, &block) } + when Symbol + recursive_replace(config.to_s, visitor, &block) + when String + raise NodeTooLargeError if config.bytesize > MAX_NODE_SIZE + + yield config + else + config + end + end + end + end + end +end diff --git a/lib/gitlab/ci/interpolation/context.rb b/lib/gitlab/ci/interpolation/context.rb new file mode 100644 index 0000000000000000000000000000000000000000..ce7a86a3c9b5f5617d70f89703d05bea8fd86c77 --- /dev/null +++ b/lib/gitlab/ci/interpolation/context.rb @@ -0,0 +1,72 @@ +# frozen_string_literal: true + +module Gitlab + module Ci + module Interpolation + ## + # Interpolation::Context is a class that represents the data that can be used when performing string interpolation + # on a CI configuration. + # + class Context + ContextTooComplexError = Class.new(StandardError) + NotSymbolizedContextError = Class.new(StandardError) + + MAX_DEPTH = 3 + + def initialize(hash) + @context = hash + + raise ContextTooComplexError if depth > MAX_DEPTH + end + + def valid? + errors.none? + end + + ## + # This method is here because `Context` will be responsible for validating specs, inputs and defaults. + # + def errors + [] + end + + def depth + deep_depth(@context) + end + + def fetch(field) + @context.fetch(field) + end + + def to_h + @context.to_h + end + + private + + def deep_depth(context, depth = 0) + values = context.values.map do |value| + if value.is_a?(Hash) + deep_depth(value, depth + 1) + else + depth + 1 + end + end + + values.max + end + + def self.fabricate(context) + case context + when Hash + new(context) + when Interpolation::Context + context + else + raise ArgumentError, 'unknown interpolation context' + end + end + end + end + end +end diff --git a/lib/gitlab/ci/interpolation/template.rb b/lib/gitlab/ci/interpolation/template.rb new file mode 100644 index 0000000000000000000000000000000000000000..0211279f26680c56ef6e345e45faca7c75c2c572 --- /dev/null +++ b/lib/gitlab/ci/interpolation/template.rb @@ -0,0 +1,67 @@ +# frozen_string_literal: true + +module Gitlab + module Ci + module Interpolation + class Template + include Gitlab::Utils::StrongMemoize + + attr_reader :blocks, :ctx + + TooManyBlocksError = Class.new(StandardError) + InvalidBlockError = Class.new(StandardError) + + MAX_BLOCKS = 10_000 + + def initialize(config, ctx) + @config = Interpolation::Config.fabricate(config) + @ctx = Interpolation::Context.fabricate(ctx) + @errors = [] + @blocks = {} + + interpolate! if valid? + end + + def valid? + errors.none? + end + + def errors + @errors + @config.errors + @ctx.errors + @blocks.values.flat_map(&:errors) + end + + def size + @blocks.size + end + + def interpolated + @result if valid? + end + + private + + def interpolate! + @result = @config.replace! do |data| + Interpolation::Block.match(data) do |block, data| + evaluate_block(block, data) + end + end + rescue TooManyBlocksError + @errors.push('too many interpolation blocks') + rescue InvalidBlockError + @errors.push('interpolation interrupted by errors') + end + strong_memoize_attr :interpolate! + + def evaluate_block(block, data) + block = (@blocks[block] ||= Interpolation::Block.new(block, data, ctx)) + + raise TooManyBlocksError if @blocks.count > MAX_BLOCKS + raise InvalidBlockError unless block.valid? + + block.value + end + end + end + end +end diff --git a/spec/fast_spec_helper.rb b/spec/fast_spec_helper.rb index 393cd6f6a21b2f47ea14862a6cd205a62c25c54a..e53f0cd936fa5286d2ebfe0afc5351128fcedbd5 100644 --- a/spec/fast_spec_helper.rb +++ b/spec/fast_spec_helper.rb @@ -18,6 +18,8 @@ require_relative '../config/initializers/0_inject_enterprise_edition_module' require_relative '../config/settings' require_relative 'support/rspec' +require_relative '../lib/gitlab/utils' +require_relative '../lib/gitlab/utils/strong_memoize' require 'active_support/all' require_relative 'simplecov_env' diff --git a/spec/lib/gitlab/ci/interpolation/access_spec.rb b/spec/lib/gitlab/ci/interpolation/access_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..9f6108a328d800d98cfd635c497ff8a9643c8356 --- /dev/null +++ b/spec/lib/gitlab/ci/interpolation/access_spec.rb @@ -0,0 +1,49 @@ +# frozen_string_literal: true + +require 'fast_spec_helper' + +RSpec.describe Gitlab::Ci::Interpolation::Access, feature_category: :pipeline_authoring do + subject { described_class.new(access, ctx) } + + let(:access) do + 'inputs.data' + end + + let(:ctx) do + { inputs: { data: 'abcd' }, env: { 'ENV' => 'dev' } } + end + + it 'properly evaluates the access pattern' do + expect(subject.value).to eq 'abcd' + end + + context 'when there are too many objects in the access path' do + let(:access) { 'a.b.c.d.e.f.g.h' } + + it 'only support MAX_ACCESS_OBJECTS steps' do + expect(subject.objects.count).to eq 5 + end + end + + context 'when access expression size is too large' do + before do + stub_const("#{described_class}::MAX_ACCESS_BYTESIZE", 10) + end + + it 'returns an error' do + expect(subject).not_to be_valid + expect(subject.errors.first) + .to eq 'maximum interpolation expression size exceeded' + end + end + + context 'when there are not enough objects in the access path' do + let(:access) { 'abc[123]' } + + it 'returns an error when there are no objects found' do + expect(subject).not_to be_valid + expect(subject.errors.first) + .to eq 'invalid interpolation access pattern' + end + end +end diff --git a/spec/lib/gitlab/ci/interpolation/block_spec.rb b/spec/lib/gitlab/ci/interpolation/block_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..7f2be505d175027969a7c08815a0cccd7d91c24b --- /dev/null +++ b/spec/lib/gitlab/ci/interpolation/block_spec.rb @@ -0,0 +1,39 @@ +# frozen_string_literal: true + +require 'fast_spec_helper' + +RSpec.describe Gitlab::Ci::Interpolation::Block, feature_category: :pipeline_authoring do + subject { described_class.new(block, data, ctx) } + + let(:data) do + 'inputs.data' + end + + let(:block) do + "$[[ #{data} ]]" + end + + let(:ctx) do + { inputs: { data: 'abc' }, env: { 'ENV' => 'dev' } } + end + + it 'knows its content' do + expect(subject.content).to eq 'inputs.data' + end + + it 'properly evaluates the access pattern' do + expect(subject.value).to eq 'abc' + end + + describe '.match' do + it 'matches each block in a string' do + expect { |b| described_class.match('$[[ access1 ]] $[[ access2 ]]', &b) } + .to yield_successive_args(['$[[ access1 ]]', 'access1'], ['$[[ access2 ]]', 'access2']) + end + + it 'matches an empty block' do + expect { |b| described_class.match('$[[]]', &b) } + .to yield_with_args('$[[]]', '') + end + end +end diff --git a/spec/lib/gitlab/ci/interpolation/config_spec.rb b/spec/lib/gitlab/ci/interpolation/config_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..e5987776e006ac016ea49f130916ec32c65795aa --- /dev/null +++ b/spec/lib/gitlab/ci/interpolation/config_spec.rb @@ -0,0 +1,49 @@ +# frozen_string_literal: true + +require 'fast_spec_helper' + +RSpec.describe Gitlab::Ci::Interpolation::Config, feature_category: :pipeline_authoring do + subject { described_class.new(YAML.safe_load(config)) } + + let(:config) do + <<~CFG + test: + spec: + env: $[[ inputs.env ]] + + $[[ inputs.key ]]: + name: $[[ inputs.key ]] + script: my-value + CFG + end + + describe '#replace!' do + it 'replaces each od the nodes with a block return value' do + result = subject.replace! { |node| "abc#{node}cde" } + + expect(result).to eq({ + 'abctestcde' => { 'abcspeccde' => { 'abcenvcde' => 'abc$[[ inputs.env ]]cde' } }, + 'abc$[[ inputs.key ]]cde' => { + 'abcnamecde' => 'abc$[[ inputs.key ]]cde', + 'abcscriptcde' => 'abcmy-valuecde' + } + }) + end + end + + context 'when config size is exceeded' do + before do + stub_const("#{described_class}::MAX_NODES", 7) + end + + it 'returns a config size error' do + replaced = 0 + + subject.replace! { replaced += 1 } + + expect(replaced).to eq 4 + expect(subject.errors.size).to eq 1 + expect(subject.errors.first).to eq 'config too large' + end + end +end diff --git a/spec/lib/gitlab/ci/interpolation/context_spec.rb b/spec/lib/gitlab/ci/interpolation/context_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..ada896f4980be0bc3adeeb2cb91647675290e00a --- /dev/null +++ b/spec/lib/gitlab/ci/interpolation/context_spec.rb @@ -0,0 +1,28 @@ +# frozen_string_literal: true + +require 'fast_spec_helper' + +RSpec.describe Gitlab::Ci::Interpolation::Context, feature_category: :pipeline_authoring do + subject { described_class.new(ctx) } + + let(:ctx) do + { inputs: { key: 'abc' } } + end + + describe '#depth' do + it 'returns a max depth of the hash' do + expect(subject.depth).to eq 2 + end + end + + context 'when interpolation context is too complex' do + let(:ctx) do + { inputs: { key: { aaa: { bbb: 'ccc' } } } } + end + + it 'raises an exception' do + expect { described_class.new(ctx) } + .to raise_error(described_class::ContextTooComplexError) + end + end +end diff --git a/spec/lib/gitlab/ci/interpolation/template_spec.rb b/spec/lib/gitlab/ci/interpolation/template_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..8a243b4db05c3e836ffb44eb7f7f1c2d3139e711 --- /dev/null +++ b/spec/lib/gitlab/ci/interpolation/template_spec.rb @@ -0,0 +1,102 @@ +# frozen_string_literal: true + +require 'fast_spec_helper' + +RSpec.describe Gitlab::Ci::Interpolation::Template, feature_category: :pipeline_authoring do + subject { described_class.new(YAML.safe_load(config), ctx) } + + let(:config) do + <<~CFG + test: + spec: + env: $[[ inputs.env ]] + + $[[ inputs.key ]]: + name: $[[ inputs.key ]] + script: my-value + CFG + end + + let(:ctx) do + { inputs: { env: 'dev', key: 'abc' } } + end + + it 'collects interpolation blocks' do + expect(subject.size).to eq 2 + end + + it 'interpolates the values properly' do + expect(subject.interpolated).to eq YAML.safe_load <<~RESULT + test: + spec: + env: dev + + abc: + name: abc + script: my-value + RESULT + end + + context 'when interpolation can not be performed' do + let(:config) { '$[[ xxx.yyy ]]: abc' } + + it 'does not interpolate the config' do + expect(subject).not_to be_valid + expect(subject.interpolated).to be_nil + end + end + + context 'when template consists of nested arrays with hashes and values' do + let(:config) do + <<~CFG + test: + - a-$[[ inputs.key ]]-b + - c-$[[ inputs.key ]]-d: + d-$[[ inputs.key ]]-e + val: 1 + CFG + end + + it 'performs a valid interpolation' do + result = { 'test' => ['a-abc-b', { 'c-abc-d' => 'd-abc-e', 'val' => 1 }] } + + expect(subject).to be_valid + expect(subject.interpolated).to eq result + end + end + + context 'when template contains symbols that need interpolation' do + subject do + described_class.new({ '$[[ inputs.key ]]'.to_sym => 'cde' }, ctx) + end + + it 'performs a valid interpolation' do + expect(subject).to be_valid + expect(subject.interpolated).to eq({ 'abc' => 'cde' }) + end + end + + context 'when template is too large' do + before do + stub_const('Gitlab::Ci::Interpolation::Config::MAX_NODES', 1) + end + + it 'returns an error' do + expect(subject.interpolated).to be_nil + expect(subject.errors.count).to eq 1 + expect(subject.errors.first).to eq 'config too large' + end + end + + context 'when there are too many interpolation blocks' do + before do + stub_const("#{described_class}::MAX_BLOCKS", 1) + end + + it 'returns an error' do + expect(subject.interpolated).to be_nil + expect(subject.errors.count).to eq 1 + expect(subject.errors.first).to eq 'too many interpolation blocks' + end + end +end