From b29009c975ba6c05e4774fe6fd0367432da800c1 Mon Sep 17 00:00:00 2001 From: Grzegorz Bizon Date: Fri, 16 Dec 2022 12:07:29 +0100 Subject: [PATCH 01/18] Add initial implementation of CI templates interpolation Changelog: added --- lib/gitlab/ci/interpolation/access.rb | 33 ++++++++ lib/gitlab/ci/interpolation/block.rb | 51 +++++++++++++ lib/gitlab/ci/interpolation/template.rb | 75 +++++++++++++++++++ .../lib/gitlab/ci/interpolation/block_spec.rb | 23 ++++++ .../gitlab/ci/interpolation/template_spec.rb | 35 +++++++++ 5 files changed, 217 insertions(+) create mode 100644 lib/gitlab/ci/interpolation/access.rb create mode 100644 lib/gitlab/ci/interpolation/block.rb create mode 100644 lib/gitlab/ci/interpolation/template.rb create mode 100644 spec/lib/gitlab/ci/interpolation/block_spec.rb create mode 100644 spec/lib/gitlab/ci/interpolation/template_spec.rb diff --git a/lib/gitlab/ci/interpolation/access.rb b/lib/gitlab/ci/interpolation/access.rb new file mode 100644 index 00000000000000..d0be610e76a6c1 --- /dev/null +++ b/lib/gitlab/ci/interpolation/access.rb @@ -0,0 +1,33 @@ +# frozen_string_literal: true + +module Gitlab + module Ci + module Interpolation + class Access + attr_reader :content, :errors + + def initialize(access, ctx) + @content = access + @ctx = ctx + @errors = [] + + @errors.push('invalid interpolation access pattern') if objects.none? + end + + def valid? + errors.any? + end + + def objects + @content.split('.') + end + + def evaluate + objects.inject(@ctx) do |memo, value| + memo.fetch(value.to_sym) + end + end + end + end + end +end diff --git a/lib/gitlab/ci/interpolation/block.rb b/lib/gitlab/ci/interpolation/block.rb new file mode 100644 index 00000000000000..fa6c14a478ff7e --- /dev/null +++ b/lib/gitlab/ci/interpolation/block.rb @@ -0,0 +1,51 @@ +# frozen_string_literal: true + +module Gitlab + module Ci + module Interpolation + class Block + PATTERN = /(?\$\[\[\s*(?.+?)\s*\]\])/.freeze + + def initialize(uid, block, ctx) + @uid = uid + @block = block + @ctx = ctx + @content = self.class.access(block) + + @access = Interpolation::Access + .new(@content, ctx) + end + + def valid? + errors.any? + end + + def errors + @access.errors + end + + def content + @access.content + end + + def annotation + "$[[#{@uid}]]" + end + + def evaluate + @access.evaluate + end + + def self.gsub(data, &block) + data.gsub(PATTERN, &block) + end + + def self.access(data) + match = data.match(PATTERN) + + match[:access] if match + end + end + end + end +end diff --git a/lib/gitlab/ci/interpolation/template.rb b/lib/gitlab/ci/interpolation/template.rb new file mode 100644 index 00000000000000..594bf1142b3723 --- /dev/null +++ b/lib/gitlab/ci/interpolation/template.rb @@ -0,0 +1,75 @@ +# frozen_string_literal: true + +module Gitlab + module Ci + module Interpolation + class Template + attr_reader :blocks, :ctx + + def initialize(config, ctx) + @config = config + @context = ctx + @result = {} + @blocks = {} + @ctx = ctx + + interpolate! + end + + def valid? + errors.any? + end + + def errors + @blocks.map(&:errors).flatten + end + + def count + @blocks.size + end + + def evaluate + traverse(@result) do |value| + Interpolation::Block.gsub(value) do + uid = Interpolation::Block.access(value) + + block = blocks.fetch(uid.to_i) + + block.evaluate + end + end + end + + private + + def interpolate! + @result = traverse(@config) do |value| + annotate(value) + end + end + + def traverse(data) + data.dup.tap do |result| + result.deep_transform_keys! do |key| + yield key.to_s + end + + result.deep_transform_values! do |value| + yield value.to_s + end + end + end + + def annotate(data) + Interpolation::Block.gsub(data) do |block| + uid = blocks.size + block = Interpolation::Block.new(uid, block, ctx) + + @blocks.store(uid, block) + block.annotation + end + end + end + end + end +end diff --git a/spec/lib/gitlab/ci/interpolation/block_spec.rb b/spec/lib/gitlab/ci/interpolation/block_spec.rb new file mode 100644 index 00000000000000..c5631ab51eda1b --- /dev/null +++ b/spec/lib/gitlab/ci/interpolation/block_spec.rb @@ -0,0 +1,23 @@ +# frozen_string_literal: true + +require 'fast_spec_helper' + +RSpec.describe Gitlab::Ci::Interpolation::Block, feature_category: :pipeline_authoring do + subject { described_class.new(123, block, ctx) } + + let(:block) do + '$[[ inputs.data ]]' + end + + let(:ctx) do + { inputs: { data: 'abc' }, env: { 'ENV' => 'dev' } } + end + + it 'knows its content' do + expect(subject.content).to eq 'inputs.data' + end + + it 'generates it unique annotation' do + expect(subject.annotation).to eq '$[[123]]' + end +end diff --git a/spec/lib/gitlab/ci/interpolation/template_spec.rb b/spec/lib/gitlab/ci/interpolation/template_spec.rb new file mode 100644 index 00000000000000..4ecc9dae9c001a --- /dev/null +++ b/spec/lib/gitlab/ci/interpolation/template_spec.rb @@ -0,0 +1,35 @@ +# frozen_string_literal: true + +require 'fast_spec_helper' + +RSpec.describe Gitlab::Ci::Interpolation::Template, feature_category: :pipeline_authoring do + subject { described_class.new(YAML.safe_load(config), ctx) } + + let(:config) do + <<~CFG + test: + spec: + env: $[[ inputs.env ]] + + $[[ inputs.key ]]: my-value + CFG + end + + let(:ctx) do + { inputs: { env: 'dev', key: 'abc' } } + end + + it 'collects interpolation blocks' do + expect(subject.blocks.count).to eq 2 + end + + it 'interpolates the values properly' do + expect(subject.evaluate).to eq YAML.safe_load <<~RESULT + test: + spec: + env: dev + + abc: my-value + RESULT + end +end -- GitLab From a6855224110361c946706f6f490bff9d078532d6 Mon Sep 17 00:00:00 2001 From: Grzegorz Bizon Date: Fri, 16 Dec 2022 12:22:08 +0100 Subject: [PATCH 02/18] Add a few more specs for CI templates interpolation --- .../gitlab/ci/interpolation/access_spec.rb | 19 +++++++++++++++++++ .../lib/gitlab/ci/interpolation/block_spec.rb | 4 ++++ 2 files changed, 23 insertions(+) create mode 100644 spec/lib/gitlab/ci/interpolation/access_spec.rb diff --git a/spec/lib/gitlab/ci/interpolation/access_spec.rb b/spec/lib/gitlab/ci/interpolation/access_spec.rb new file mode 100644 index 00000000000000..362e7d72aebc2a --- /dev/null +++ b/spec/lib/gitlab/ci/interpolation/access_spec.rb @@ -0,0 +1,19 @@ +# frozen_string_literal: true + +require 'fast_spec_helper' + +RSpec.describe Gitlab::Ci::Interpolation::Access, feature_category: :pipeline_authoring do + subject { described_class.new(access, ctx) } + + let(:access) do + 'inputs.data' + end + + let(:ctx) do + { inputs: { data: 'abcd' }, env: { 'ENV' => 'dev' } } + end + + it 'properly evaluates the access pattern' do + expect(subject.evaluate).to eq 'abcd' + end +end diff --git a/spec/lib/gitlab/ci/interpolation/block_spec.rb b/spec/lib/gitlab/ci/interpolation/block_spec.rb index c5631ab51eda1b..b3b11f3904ab2c 100644 --- a/spec/lib/gitlab/ci/interpolation/block_spec.rb +++ b/spec/lib/gitlab/ci/interpolation/block_spec.rb @@ -20,4 +20,8 @@ it 'generates it unique annotation' do expect(subject.annotation).to eq '$[[123]]' end + + it 'properly evaluates the access pattern' do + expect(subject.evaluate).to eq 'abc' + end end -- GitLab From d19ad4bf706ee1e5b7159025bf553f564e456cf4 Mon Sep 17 00:00:00 2001 From: Grzegorz Bizon Date: Thu, 5 Jan 2023 13:06:56 +0100 Subject: [PATCH 03/18] Implement single-pass interpolation with block de-duplication --- lib/gitlab/ci/interpolation/block.rb | 9 +---- lib/gitlab/ci/interpolation/template.rb | 39 +++++++------------ .../lib/gitlab/ci/interpolation/block_spec.rb | 6 +-- .../gitlab/ci/interpolation/template_spec.rb | 4 +- 4 files changed, 18 insertions(+), 40 deletions(-) diff --git a/lib/gitlab/ci/interpolation/block.rb b/lib/gitlab/ci/interpolation/block.rb index fa6c14a478ff7e..b90c068b042371 100644 --- a/lib/gitlab/ci/interpolation/block.rb +++ b/lib/gitlab/ci/interpolation/block.rb @@ -6,8 +6,7 @@ module Interpolation class Block PATTERN = /(?\$\[\[\s*(?.+?)\s*\]\])/.freeze - def initialize(uid, block, ctx) - @uid = uid + def initialize(block, ctx) @block = block @ctx = ctx @content = self.class.access(block) @@ -28,12 +27,8 @@ def content @access.content end - def annotation - "$[[#{@uid}]]" - end - def evaluate - @access.evaluate + @value ||= @access.evaluate end def self.gsub(data, &block) diff --git a/lib/gitlab/ci/interpolation/template.rb b/lib/gitlab/ci/interpolation/template.rb index 594bf1142b3723..ec2578720c9a4b 100644 --- a/lib/gitlab/ci/interpolation/template.rb +++ b/lib/gitlab/ci/interpolation/template.rb @@ -9,7 +9,6 @@ class Template def initialize(config, ctx) @config = config @context = ctx - @result = {} @blocks = {} @ctx = ctx @@ -24,32 +23,30 @@ def errors @blocks.map(&:errors).flatten end - def count + def size @blocks.size end - def evaluate - traverse(@result) do |value| - Interpolation::Block.gsub(value) do - uid = Interpolation::Block.access(value) - - block = blocks.fetch(uid.to_i) - - block.evaluate - end - end + def interpolated + @result.to_h end private def interpolate! - @result = traverse(@config) do |value| - annotate(value) + @result ||= traverse do |value| + Interpolation::Block.gsub(value) do |data| + uid = ::Digest::MD5.hexdigest(data) + + block = (@blocks[uid] ||= Interpolation::Block.new(data, ctx)) + + block.evaluate + end end end - def traverse(data) - data.dup.tap do |result| + def traverse + @config.dup.tap do |result| result.deep_transform_keys! do |key| yield key.to_s end @@ -59,16 +56,6 @@ def traverse(data) end end end - - def annotate(data) - Interpolation::Block.gsub(data) do |block| - uid = blocks.size - block = Interpolation::Block.new(uid, block, ctx) - - @blocks.store(uid, block) - block.annotation - end - end end end end diff --git a/spec/lib/gitlab/ci/interpolation/block_spec.rb b/spec/lib/gitlab/ci/interpolation/block_spec.rb index b3b11f3904ab2c..a3e491cc1a337f 100644 --- a/spec/lib/gitlab/ci/interpolation/block_spec.rb +++ b/spec/lib/gitlab/ci/interpolation/block_spec.rb @@ -3,7 +3,7 @@ require 'fast_spec_helper' RSpec.describe Gitlab::Ci::Interpolation::Block, feature_category: :pipeline_authoring do - subject { described_class.new(123, block, ctx) } + subject { described_class.new(block, ctx) } let(:block) do '$[[ inputs.data ]]' @@ -17,10 +17,6 @@ expect(subject.content).to eq 'inputs.data' end - it 'generates it unique annotation' do - expect(subject.annotation).to eq '$[[123]]' - end - it 'properly evaluates the access pattern' do expect(subject.evaluate).to eq 'abc' end diff --git a/spec/lib/gitlab/ci/interpolation/template_spec.rb b/spec/lib/gitlab/ci/interpolation/template_spec.rb index 4ecc9dae9c001a..50ceeb54b1bdd8 100644 --- a/spec/lib/gitlab/ci/interpolation/template_spec.rb +++ b/spec/lib/gitlab/ci/interpolation/template_spec.rb @@ -20,11 +20,11 @@ end it 'collects interpolation blocks' do - expect(subject.blocks.count).to eq 2 + expect(subject.size).to eq 2 end it 'interpolates the values properly' do - expect(subject.evaluate).to eq YAML.safe_load <<~RESULT + expect(subject.interpolated).to eq YAML.safe_load <<~RESULT test: spec: env: dev -- GitLab From 42ea1f68c606b21cddac0fb5012c4ed9e9b5c985 Mon Sep 17 00:00:00 2001 From: Grzegorz Bizon Date: Thu, 5 Jan 2023 13:09:57 +0100 Subject: [PATCH 04/18] Improve CI interpolation specs and fix Rubocop --- lib/gitlab/ci/interpolation/template.rb | 4 ++-- spec/lib/gitlab/ci/interpolation/template_spec.rb | 8 ++++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/lib/gitlab/ci/interpolation/template.rb b/lib/gitlab/ci/interpolation/template.rb index ec2578720c9a4b..5730273a77452e 100644 --- a/lib/gitlab/ci/interpolation/template.rb +++ b/lib/gitlab/ci/interpolation/template.rb @@ -20,7 +20,7 @@ def valid? end def errors - @blocks.map(&:errors).flatten + @blocks.flat_map(&:errors) end def size @@ -36,7 +36,7 @@ def interpolated def interpolate! @result ||= traverse do |value| Interpolation::Block.gsub(value) do |data| - uid = ::Digest::MD5.hexdigest(data) + uid = ::Digest::SHA256.hexdigest(data) block = (@blocks[uid] ||= Interpolation::Block.new(data, ctx)) diff --git a/spec/lib/gitlab/ci/interpolation/template_spec.rb b/spec/lib/gitlab/ci/interpolation/template_spec.rb index 50ceeb54b1bdd8..5aca1061b1f29f 100644 --- a/spec/lib/gitlab/ci/interpolation/template_spec.rb +++ b/spec/lib/gitlab/ci/interpolation/template_spec.rb @@ -11,7 +11,9 @@ spec: env: $[[ inputs.env ]] - $[[ inputs.key ]]: my-value + $[[ inputs.key ]]: + name: $[[ inputs.key ]] + script: my-value CFG end @@ -29,7 +31,9 @@ spec: env: dev - abc: my-value + abc: + name: abc + script: my-value RESULT end end -- GitLab From edd7571335be6aa696aef4acc5213f6a019ec7c4 Mon Sep 17 00:00:00 2001 From: Grzegorz Bizon Date: Thu, 12 Jan 2023 10:27:18 +0100 Subject: [PATCH 05/18] Extract CI interpolation context and config classes --- lib/gitlab/ci/interpolation/config.rb | 45 +++++++++++++++++++++++ lib/gitlab/ci/interpolation/context.rb | 48 +++++++++++++++++++++++++ lib/gitlab/ci/interpolation/template.rb | 19 ++-------- 3 files changed, 96 insertions(+), 16 deletions(-) create mode 100644 lib/gitlab/ci/interpolation/config.rb create mode 100644 lib/gitlab/ci/interpolation/context.rb diff --git a/lib/gitlab/ci/interpolation/config.rb b/lib/gitlab/ci/interpolation/config.rb new file mode 100644 index 00000000000000..c3851515006f07 --- /dev/null +++ b/lib/gitlab/ci/interpolation/config.rb @@ -0,0 +1,45 @@ +# frozen_string_literal: true + +module Gitlab + module Ci + module Interpolation + ## + # Interpolation::Config represents a configuration artifact that we want to perform interpolation on. + # + class Config + def initialize(hash) + @config = hash + end + + def to_h + @config.to_h + end + + def deep_transform + @config.deep_dup.tap do |result| + result.deep_transform_keys! do |key| + yield key.to_s + end + + result.deep_transform_values! do |value| + yield value.to_s + end + end + end + + def self.fabricate(config) + case config + when Hash + new(config) + when Interpolation::Config + new(config.to_h) + when String + raise NotImplementedError, 'interpolation context unmarshalling needed' + else + raise ArgumentError, 'uknown interpolation config' + end + end + end + end + end +end diff --git a/lib/gitlab/ci/interpolation/context.rb b/lib/gitlab/ci/interpolation/context.rb new file mode 100644 index 00000000000000..73eb30b44b3c0f --- /dev/null +++ b/lib/gitlab/ci/interpolation/context.rb @@ -0,0 +1,48 @@ +# frozen_string_literal: true + +module Gitlab + module Ci + module Interpolation + ## + # Interpolation::Context is a class that represents the data that can be used when performing string interpolation + # on a CI configuration. + # + # It can be at most 3-levels deep hash, and all leafs have to be strings. + # + class Context + def initialize(hash) + @context = hash + + # TODO: validate + end + + def depth + raise NotImplementedError + end + + def access(field) + raise NotImplementedError + end + + def fetch(field) + @context.fetch(field) + end + + def to_h + @context.to_h + end + + def self.fabricate(context) + case context + when Hash + new(context) + when Interpolation::Context + new(context.to_h) + else + raise ArgumentError, 'uknown interpolation context' + end + end + end + end + end +end diff --git a/lib/gitlab/ci/interpolation/template.rb b/lib/gitlab/ci/interpolation/template.rb index 5730273a77452e..b1dbd866e54b41 100644 --- a/lib/gitlab/ci/interpolation/template.rb +++ b/lib/gitlab/ci/interpolation/template.rb @@ -7,10 +7,9 @@ class Template attr_reader :blocks, :ctx def initialize(config, ctx) - @config = config - @context = ctx + @config = Interpolation::Config.fabricate(config) + @ctx = Interpolation::Context.fabricate(ctx) @blocks = {} - @ctx = ctx interpolate! end @@ -34,7 +33,7 @@ def interpolated private def interpolate! - @result ||= traverse do |value| + @result ||= @config.deep_transform do |value| Interpolation::Block.gsub(value) do |data| uid = ::Digest::SHA256.hexdigest(data) @@ -44,18 +43,6 @@ def interpolate! end end end - - def traverse - @config.dup.tap do |result| - result.deep_transform_keys! do |key| - yield key.to_s - end - - result.deep_transform_values! do |value| - yield value.to_s - end - end - end end end end -- GitLab From af1287b5a4cbc1470765376b021a81043fb555ac Mon Sep 17 00:00:00 2001 From: Grzegorz Bizon Date: Thu, 12 Jan 2023 12:27:47 +0100 Subject: [PATCH 06/18] Implement two strategies of hash traversal CI interpolaton --- lib/gitlab/ci/interpolation/config.rb | 50 ++++++++++++++++--- lib/gitlab/ci/interpolation/template.rb | 2 +- .../gitlab/ci/interpolation/template_spec.rb | 17 +++++++ 3 files changed, 60 insertions(+), 9 deletions(-) diff --git a/lib/gitlab/ci/interpolation/config.rb b/lib/gitlab/ci/interpolation/config.rb index c3851515006f07..24139d99f7f2ef 100644 --- a/lib/gitlab/ci/interpolation/config.rb +++ b/lib/gitlab/ci/interpolation/config.rb @@ -15,15 +15,16 @@ def to_h @config.to_h end - def deep_transform - @config.deep_dup.tap do |result| - result.deep_transform_keys! do |key| - yield key.to_s - end + def interpolate(&block) + strategy = 2 - result.deep_transform_values! do |value| - yield value.to_s - end + case strategy + when 1 + simple_deep_transform_strategy(&block) + when 2 + smart_only_when_allocation_needed_strategy(@config, &block) + else + raise ArgumentError end end @@ -39,6 +40,39 @@ def self.fabricate(config) raise ArgumentError, 'uknown interpolation config' end end + + private + + def simple_deep_transform_strategy(&block) + @config.deep_dup.tap do |result| + result.deep_transform_keys!(&block) + result.deep_transform_values!(&block) + end + end + + def smart_only_when_allocation_needed_strategy(config, &block) + case config + when Hash + Hash.new.tap do |new_hash| # rubocop:disable Style/EmptyLiteral + config.each_pair do |key, value| + new_key = yield key + new_value = smart_only_when_allocation_needed_strategy(value, &block) + + if new_key != key + new_hash[new_key] = new_value + else + new_hash[key] = new_value + end + end + end + when Array + config.map do |value| + smart_only_when_allocation_needed_strategy(value, &block) + end + else + yield config + end + end end end end diff --git a/lib/gitlab/ci/interpolation/template.rb b/lib/gitlab/ci/interpolation/template.rb index b1dbd866e54b41..0d5331052db7af 100644 --- a/lib/gitlab/ci/interpolation/template.rb +++ b/lib/gitlab/ci/interpolation/template.rb @@ -33,7 +33,7 @@ def interpolated private def interpolate! - @result ||= @config.deep_transform do |value| + @result ||= @config.interpolate do |value| Interpolation::Block.gsub(value) do |data| uid = ::Digest::SHA256.hexdigest(data) diff --git a/spec/lib/gitlab/ci/interpolation/template_spec.rb b/spec/lib/gitlab/ci/interpolation/template_spec.rb index 5aca1061b1f29f..f469e0ccc93c3d 100644 --- a/spec/lib/gitlab/ci/interpolation/template_spec.rb +++ b/spec/lib/gitlab/ci/interpolation/template_spec.rb @@ -36,4 +36,21 @@ script: my-value RESULT end + + context 'when template consists of nested arrays with hashes and values' do + let(:config) do + <<~CFG + test: + - a-$[[ inputs.key ]]-b + - c-$[[ inputs.key ]]-d: + d-$[[ inputs.key ]]-e + CFG + end + + it 'perform a valid interpolation' do + result = { 'test' => ['a-abc-b', { 'c-abc-d' => 'd-abc-e' }] } + + expect(subject.interpolated).to eq result + end + end end -- GitLab From e0013f2354841502722ecdd3328fcff9e4311d07 Mon Sep 17 00:00:00 2001 From: Grzegorz Bizon Date: Thu, 12 Jan 2023 12:38:54 +0100 Subject: [PATCH 07/18] Only match CI interpolation blocks on strings --- lib/gitlab/ci/interpolation/block.rb | 2 ++ spec/lib/gitlab/ci/interpolation/template_spec.rb | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/lib/gitlab/ci/interpolation/block.rb b/lib/gitlab/ci/interpolation/block.rb index b90c068b042371..0f74030fb1b431 100644 --- a/lib/gitlab/ci/interpolation/block.rb +++ b/lib/gitlab/ci/interpolation/block.rb @@ -32,6 +32,8 @@ def evaluate end def self.gsub(data, &block) + return data unless data.is_a?(String) + data.gsub(PATTERN, &block) end diff --git a/spec/lib/gitlab/ci/interpolation/template_spec.rb b/spec/lib/gitlab/ci/interpolation/template_spec.rb index f469e0ccc93c3d..b576aa47a209f3 100644 --- a/spec/lib/gitlab/ci/interpolation/template_spec.rb +++ b/spec/lib/gitlab/ci/interpolation/template_spec.rb @@ -44,11 +44,12 @@ - a-$[[ inputs.key ]]-b - c-$[[ inputs.key ]]-d: d-$[[ inputs.key ]]-e + val: 1 CFG end it 'perform a valid interpolation' do - result = { 'test' => ['a-abc-b', { 'c-abc-d' => 'd-abc-e' }] } + result = { 'test' => ['a-abc-b', { 'c-abc-d' => 'd-abc-e', 'val' => 1 }] } expect(subject.interpolated).to eq result end -- GitLab From 972b0fb081a7f6a5537ca05c5157abe950765fe3 Mon Sep 17 00:00:00 2001 From: Grzegorz Bizon Date: Tue, 17 Jan 2023 17:15:03 +0100 Subject: [PATCH 08/18] Add CI interpolation benchmarking temporary class --- lib/gitlab/ci/interpolation/benchmark.rb | 184 +++++++++++++++++++++++ lib/gitlab/ci/interpolation/block.rb | 3 +- 2 files changed, 186 insertions(+), 1 deletion(-) create mode 100644 lib/gitlab/ci/interpolation/benchmark.rb diff --git a/lib/gitlab/ci/interpolation/benchmark.rb b/lib/gitlab/ci/interpolation/benchmark.rb new file mode 100644 index 00000000000000..5db034495f143c --- /dev/null +++ b/lib/gitlab/ci/interpolation/benchmark.rb @@ -0,0 +1,184 @@ +# frozen_string_literal: true + +module Gitlab + module Ci + module Interpolation + ## + # TODO: REMOVE THIS CLASS BEFORE MERGING THE MR. + # + # This is a helper-class to transparently document the benchmarking strategy in the MR that is going to introduce + # the core CI interpolation mechanisms. We will not merge the Interpolation::Benchmark class as it is supposed to + # be removed before merging, but it will remain available in the diff / commit history. + # + # How to run it in GDK / GCK: + # + # 1. Run Rails console: `bin/rails console` + # 2. Run the benchmark: `Gitlab::Ci::Interpolation::Benchmark.run!` + # 3. Update the code, and reload pry with `reload!`, go to point 2. + # + class Benchmark + class Spec + attr_reader :keys, :depth, :ratio, :level, :blocks, :nodes, :context + + def initialize(**opts) + @keys = opts[:keys] # Number of top-level keys in a hash + @depth = opts[:depth] # Number of nested hash for each entry + @ratio = opts[:ratio] # Interpolation ratio percentage: 0.01, 99 + @verbose = opts[:verbose] # Print summary in a verbose format + + @level = @depth # Memo object for recursive generation of the hash + @blocks = 0 # Memo object counting the number of interpolation blocks + @nodes = 0 # Memo object counting the number of vertices in the hash + + @context = { + inputs: { + key: 'my_key', + leaf: 'my_leaf' + } + } + end + + def next! + tap { @level -= 1 } + end + + def interpolate? + Random.rand(100 * 100) <= @ratio * 100 + end + + def interpolate_leaf! + @nodes += 1 + + if interpolate? + @blocks += 1 + '$[[ inputs.leaf ]]' + else + 'non-interpolated leaf' + end + end + + def interpolate_key! + @nodes += 1 + + if interpolate? + @blocks += 1 + '$[[ inputs.key ]]' + else + 'some key' + end + end + + def hash(spec = self) + return spec.interpolate_leaf! if spec.level < 1 + + Hash.new.tap do |h| # rubocop:disable Style/EmptyLiteral + spec.keys.times { |i| h["#{i}: #{spec.interpolate_key!}"] = hash(spec.next!) } + end + end + end + + class Result + def initialize(name, spec, count = 1) + @name = name + @spec = spec + @count = count + @results = [] + @allocation = {} + + raise Argumenterror if count == 0 + + run! + end + + def summary + # rubocop:disable Style/FormatString + node_time = '%0.08f' % (@hash_time / @spec.nodes) + block_time = '%0.08f' % (@hash_time / @spec.blocks) + hash_time = '%0.08f' % @hash_time + total_time = '%0.08f' % @total_time + + # - mem_objects: number of allocated heap slots (as reflected by GC) + # - mem_mallocs: number of malloc calls + # - mem_bytes: number of bytes allocated by malloc for objects that did not fit + # into a heap slot + # - mem_total_bytes: number of bytes allocated for both objects consuming an object slot + # and objects that required a malloc (mem_malloc_bytes) + mem_objects = @allocations[:mem_objects] + mem_total = @allocations[:mem_total_bytes] / 1024 + + if @verbose + <<~SUMMARY + Scenario name: #{@name} + ---------------------------------------------------- + Hash: hash spec parameters: #{@spec.keys} / #{@spec.depth} / #{@spec.ratio} + interpolation blocks: #{@spec.blocks} + generated hash nodes: #{@spec.nodes} + benchmark repeated t: #{@count} + + Benchmark: time per node: #{node_time} + time per block: #{block_time} + total memory allocations: #{mem_total} kB + mem objects allocated: + hash interpolation time: #{hash_time} + total interpolation time: #{total_time} + \n\n + SUMMARY + else + <<~SUMMARY + #{'%50.50s' % @name}: per hash: #{'%10.10s' % hash_time}, total: #{'%10.10s' % total_time}, objects: #{'%10.10s' % mem_objects}, mem: #{'%10.10s' % mem_total} kB + SUMMARY + end + # rubocop:enable Style/FormatString + end + + def run! + config = @spec.hash + context = @spec.context + + @allocations = Gitlab::Memory::Instrumentation.with_memory_allocations do + @results = Array.new(@count) do + ::Benchmark.realtime { Interpolation::Template.new(config, context) } + end + end + + @total_time = @results.sum(0.0) + @hash_time = @total_time / @count + end + end + + def initialize + @results = [] + end + + def run! + @results = [ + Result.new( + 'Large hash with many interpolation blocks', + Spec.new(keys: 20000, depth: 5, ratio: 80) + ), + Result.new( + 'Large hash with just a few interpolations', + Spec.new(keys: 20000, depth: 5, ratio: 0.01) + ), + Result.new( + 'Short hash frequently interpolated', + Spec.new(keys: 50, depth: 5, ratio: 5), + 10000 + ) + ] + end + + def summary # rubocop:disable Style/EmptyMethod + @results.each { |result| puts result.summary } # rubocop:disable Rails/Output + end + + def self.run! + new.tap do |bench| + bench.run! + bench.summary + end + end + end + end + end +end diff --git a/lib/gitlab/ci/interpolation/block.rb b/lib/gitlab/ci/interpolation/block.rb index 0f74030fb1b431..67ee4376150511 100644 --- a/lib/gitlab/ci/interpolation/block.rb +++ b/lib/gitlab/ci/interpolation/block.rb @@ -4,6 +4,7 @@ module Gitlab module Ci module Interpolation class Block + PREFIX = '$[['.freeze PATTERN = /(?\$\[\[\s*(?.+?)\s*\]\])/.freeze def initialize(block, ctx) @@ -32,7 +33,7 @@ def evaluate end def self.gsub(data, &block) - return data unless data.is_a?(String) + return data unless data.is_a?(String) && data.include?(PREFIX) data.gsub(PATTERN, &block) end -- GitLab From 416b93676707b00c78ed18d37c6669940307c279 Mon Sep 17 00:00:00 2001 From: Grzegorz Bizon Date: Thu, 19 Jan 2023 12:34:14 +0100 Subject: [PATCH 09/18] Pass a config object to CI interpolation strategy method --- lib/gitlab/ci/interpolation/config.rb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/gitlab/ci/interpolation/config.rb b/lib/gitlab/ci/interpolation/config.rb index 24139d99f7f2ef..2acc74e646e93e 100644 --- a/lib/gitlab/ci/interpolation/config.rb +++ b/lib/gitlab/ci/interpolation/config.rb @@ -20,7 +20,7 @@ def interpolate(&block) case strategy when 1 - simple_deep_transform_strategy(&block) + simple_deep_transform_strategy(@config, &block) when 2 smart_only_when_allocation_needed_strategy(@config, &block) else @@ -43,8 +43,8 @@ def self.fabricate(config) private - def simple_deep_transform_strategy(&block) - @config.deep_dup.tap do |result| + def simple_deep_transform_strategy(config, &block) + config.deep_dup.tap do |result| result.deep_transform_keys!(&block) result.deep_transform_values!(&block) end -- GitLab From fcaea6cdddfd32c0a607930079db20adfc7b6b0b Mon Sep 17 00:00:00 2001 From: Grzegorz Bizon Date: Thu, 19 Jan 2023 12:34:35 +0100 Subject: [PATCH 10/18] Don't use SHA256 interpolation block identifier unless needed --- lib/gitlab/ci/interpolation/template.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/gitlab/ci/interpolation/template.rb b/lib/gitlab/ci/interpolation/template.rb index 0d5331052db7af..9bbc56325898f6 100644 --- a/lib/gitlab/ci/interpolation/template.rb +++ b/lib/gitlab/ci/interpolation/template.rb @@ -35,7 +35,7 @@ def interpolated def interpolate! @result ||= @config.interpolate do |value| Interpolation::Block.gsub(value) do |data| - uid = ::Digest::SHA256.hexdigest(data) + uid = data.length > 20 ? ::Digest::SHA256.hexdigest(data) : data block = (@blocks[uid] ||= Interpolation::Block.new(data, ctx)) -- GitLab From c5dbd268c7c69516d952154d7314c1d4fb520441 Mon Sep 17 00:00:00 2001 From: Grzegorz Bizon Date: Mon, 23 Jan 2023 14:17:19 +0100 Subject: [PATCH 11/18] Refactor interpolation block / template for readability --- lib/gitlab/ci/interpolation/block.rb | 23 ++++++++++++----------- lib/gitlab/ci/interpolation/template.rb | 8 +++----- 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/lib/gitlab/ci/interpolation/block.rb b/lib/gitlab/ci/interpolation/block.rb index 67ee4376150511..c2664180bfdb86 100644 --- a/lib/gitlab/ci/interpolation/block.rb +++ b/lib/gitlab/ci/interpolation/block.rb @@ -7,13 +7,18 @@ class Block PREFIX = '$[['.freeze PATTERN = /(?\$\[\[\s*(?.+?)\s*\]\])/.freeze - def initialize(block, ctx) + attr_reader :block, :ctx + + def initialize(block, ctx, data = nil) @block = block @ctx = ctx - @content = self.class.access(block) + @data = data || begin + match = block.match(PATTERN) + match[:access] if match + end @access = Interpolation::Access - .new(@content, ctx) + .new(@data, ctx) end def valid? @@ -32,16 +37,12 @@ def evaluate @value ||= @access.evaluate end - def self.gsub(data, &block) + def self.match(data) return data unless data.is_a?(String) && data.include?(PREFIX) - data.gsub(PATTERN, &block) - end - - def self.access(data) - match = data.match(PATTERN) - - match[:access] if match + data.gsub(PATTERN) do |val| + yield val + end end end end diff --git a/lib/gitlab/ci/interpolation/template.rb b/lib/gitlab/ci/interpolation/template.rb index 9bbc56325898f6..0282dd47d30f57 100644 --- a/lib/gitlab/ci/interpolation/template.rb +++ b/lib/gitlab/ci/interpolation/template.rb @@ -33,11 +33,9 @@ def interpolated private def interpolate! - @result ||= @config.interpolate do |value| - Interpolation::Block.gsub(value) do |data| - uid = data.length > 20 ? ::Digest::SHA256.hexdigest(data) : data - - block = (@blocks[uid] ||= Interpolation::Block.new(data, ctx)) + @result ||= @config.interpolate do |data| + Interpolation::Block.match(data) do |block| + block = (@blocks[block] ||= Interpolation::Block.new(block, ctx)) block.evaluate end -- GitLab From 2d2a8d7a7aade5d4fae9dbc8ca94d1ea6492f44d Mon Sep 17 00:00:00 2001 From: Grzegorz Bizon Date: Mon, 23 Jan 2023 14:41:11 +0100 Subject: [PATCH 12/18] Match CI interpolation block regexp only once --- lib/gitlab/ci/interpolation/block.rb | 6 +++--- lib/gitlab/ci/interpolation/template.rb | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/gitlab/ci/interpolation/block.rb b/lib/gitlab/ci/interpolation/block.rb index c2664180bfdb86..176b4f6ed8dc72 100644 --- a/lib/gitlab/ci/interpolation/block.rb +++ b/lib/gitlab/ci/interpolation/block.rb @@ -4,7 +4,7 @@ module Gitlab module Ci module Interpolation class Block - PREFIX = '$[['.freeze + PREFIX = '$[[' PATTERN = /(?\$\[\[\s*(?.+?)\s*\]\])/.freeze attr_reader :block, :ctx @@ -40,8 +40,8 @@ def evaluate def self.match(data) return data unless data.is_a?(String) && data.include?(PREFIX) - data.gsub(PATTERN) do |val| - yield val + data.gsub(PATTERN) do + yield ::Regexp.last_match(1), ::Regexp.last_match(2) end end end diff --git a/lib/gitlab/ci/interpolation/template.rb b/lib/gitlab/ci/interpolation/template.rb index 0282dd47d30f57..fdcd9f3585bb1c 100644 --- a/lib/gitlab/ci/interpolation/template.rb +++ b/lib/gitlab/ci/interpolation/template.rb @@ -34,8 +34,8 @@ def interpolated def interpolate! @result ||= @config.interpolate do |data| - Interpolation::Block.match(data) do |block| - block = (@blocks[block] ||= Interpolation::Block.new(block, ctx)) + Interpolation::Block.match(data) do |block, data| + block = (@blocks[block] ||= Interpolation::Block.new(block, ctx, data)) block.evaluate end -- GitLab From f1828ccaec38051a836fb8bca2dab17b58a58092 Mon Sep 17 00:00:00 2001 From: Grzegorz Bizon Date: Mon, 23 Jan 2023 15:55:00 +0100 Subject: [PATCH 13/18] Implement a few more CI interpolation limits --- lib/gitlab/ci/interpolation/access.rb | 2 + lib/gitlab/ci/interpolation/benchmark.rb | 4 +- lib/gitlab/ci/interpolation/config.rb | 63 +++++++++++++------ lib/gitlab/ci/interpolation/context.rb | 25 +++++++- lib/gitlab/ci/interpolation/template.rb | 16 ++++- .../gitlab/ci/interpolation/config_spec.rb | 37 +++++++++++ .../gitlab/ci/interpolation/context_spec.rb | 36 +++++++++++ .../gitlab/ci/interpolation/template_spec.rb | 24 +++++++ 8 files changed, 180 insertions(+), 27 deletions(-) create mode 100644 spec/lib/gitlab/ci/interpolation/config_spec.rb create mode 100644 spec/lib/gitlab/ci/interpolation/context_spec.rb diff --git a/lib/gitlab/ci/interpolation/access.rb b/lib/gitlab/ci/interpolation/access.rb index d0be610e76a6c1..4d24d51fdd8585 100644 --- a/lib/gitlab/ci/interpolation/access.rb +++ b/lib/gitlab/ci/interpolation/access.rb @@ -23,6 +23,8 @@ def objects end def evaluate + # TODO exceptions and errors! + # objects.inject(@ctx) do |memo, value| memo.fetch(value.to_sym) end diff --git a/lib/gitlab/ci/interpolation/benchmark.rb b/lib/gitlab/ci/interpolation/benchmark.rb index 5db034495f143c..b17e96602b8f88 100644 --- a/lib/gitlab/ci/interpolation/benchmark.rb +++ b/lib/gitlab/ci/interpolation/benchmark.rb @@ -154,11 +154,11 @@ def run! @results = [ Result.new( 'Large hash with many interpolation blocks', - Spec.new(keys: 20000, depth: 5, ratio: 80) + Spec.new(keys: 50000, depth: 5, ratio: 80) ), Result.new( 'Large hash with just a few interpolations', - Spec.new(keys: 20000, depth: 5, ratio: 0.01) + Spec.new(keys: 50000, depth: 5, ratio: 0.01) ), Result.new( 'Short hash frequently interpolated', diff --git a/lib/gitlab/ci/interpolation/config.rb b/lib/gitlab/ci/interpolation/config.rb index 2acc74e646e93e..d28fc077590262 100644 --- a/lib/gitlab/ci/interpolation/config.rb +++ b/lib/gitlab/ci/interpolation/config.rb @@ -7,8 +7,37 @@ module Interpolation # Interpolation::Config represents a configuration artifact that we want to perform interpolation on. # class Config + ## + # Total number of hash nodes traversed. For example, loading a YAML below would result in a hash having 12 nodes + # instead of 9, because hash values are being counted before we recursively traverse them. + # + # test: + # spec: + # env: $[[ inputs.env ]] + # + # $[[ inputs.key ]]: + # name: $[[ inputs.key ]] + # script: my-value + # + # According to our benchmarks performed when developing this code, the worst-case scenario of processing + # a hash with 500_000 nodes takes around 1 second and consumes around 225 megabytes of memory. + # + # The typical scenario, using just a few interpolations takes 250ms and consumes around 20 megabytes of memory. + # + # Given the above the 500_000 nodes should be an upper limit, provided that the are additional safegoards + # present in other parts of the code (example: maximum number of interpolation blocks found). + # + MAX_NODES = 500_000 + + attr_reader :errors + + Visitor = Struct.new(:visited) do + def visit!; self.visited += 1; end # rubocop:disable Style/SingleLineMethods + end + def initialize(hash) @config = hash + @errors = [] end def to_h @@ -16,15 +45,14 @@ def to_h end def interpolate(&block) - strategy = 2 + visitor = Visitor.new(0) + + interpolated = recursive_traverse(@config, visitor, &block) - case strategy - when 1 - simple_deep_transform_strategy(@config, &block) - when 2 - smart_only_when_allocation_needed_strategy(@config, &block) + if visitor.visited > MAX_NODES + @errors.push('config too large') else - raise ArgumentError + interpolated end end @@ -43,20 +71,15 @@ def self.fabricate(config) private - def simple_deep_transform_strategy(config, &block) - config.deep_dup.tap do |result| - result.deep_transform_keys!(&block) - result.deep_transform_values!(&block) - end - end + def recursive_traverse(config, visitor, &block) + return if visitor.visit! > MAX_NODES - def smart_only_when_allocation_needed_strategy(config, &block) case config when Hash Hash.new.tap do |new_hash| # rubocop:disable Style/EmptyLiteral config.each_pair do |key, value| - new_key = yield key - new_value = smart_only_when_allocation_needed_strategy(value, &block) + new_key = recursive_traverse(key, visitor, &block) + new_value = recursive_traverse(value, visitor, &block) if new_key != key new_hash[new_key] = new_value @@ -66,11 +89,11 @@ def smart_only_when_allocation_needed_strategy(config, &block) end end when Array - config.map do |value| - smart_only_when_allocation_needed_strategy(value, &block) - end - else + config.map { |value| recursive_traverse(value, visitor, &block) } + when String yield config + else + config end end end diff --git a/lib/gitlab/ci/interpolation/context.rb b/lib/gitlab/ci/interpolation/context.rb index 73eb30b44b3c0f..727e1efc1773a9 100644 --- a/lib/gitlab/ci/interpolation/context.rb +++ b/lib/gitlab/ci/interpolation/context.rb @@ -10,14 +10,23 @@ module Interpolation # It can be at most 3-levels deep hash, and all leafs have to be strings. # class Context + attr_reader :errors + + MAX_DEPTH = 2 + def initialize(hash) @context = hash + @errors = [] - # TODO: validate + @errors.push('context too complex') if depth > MAX_DEPTH + end + + def valid? + @errors.none? end def depth - raise NotImplementedError + deep_depth(@context).max end def access(field) @@ -32,6 +41,18 @@ def to_h @context.to_h end + private + + def deep_depth(context, depth = 0) + context.values.map do |value| + if value.is_a?(Hash) + deep_depth(value, depth + 1).max + else + depth + 1 + end + end + end + def self.fabricate(context) case context when Hash diff --git a/lib/gitlab/ci/interpolation/template.rb b/lib/gitlab/ci/interpolation/template.rb index fdcd9f3585bb1c..ed594ee0b15473 100644 --- a/lib/gitlab/ci/interpolation/template.rb +++ b/lib/gitlab/ci/interpolation/template.rb @@ -6,12 +6,15 @@ module Interpolation class Template attr_reader :blocks, :ctx + MAX_BLOCKS = 10_000 + def initialize(config, ctx) @config = Interpolation::Config.fabricate(config) @ctx = Interpolation::Context.fabricate(ctx) + @errors = [] @blocks = {} - interpolate! + interpolate! if @ctx.valid? end def valid? @@ -19,7 +22,10 @@ def valid? end def errors - @blocks.flat_map(&:errors) + @errors + + @ctx.errors + + @config.errors + + @blocks.values.flat_map(&:errors) end def size @@ -27,7 +33,7 @@ def size end def interpolated - @result.to_h + @result if @result.is_a?(Hash) end private @@ -35,6 +41,10 @@ def interpolated def interpolate! @result ||= @config.interpolate do |data| Interpolation::Block.match(data) do |block, data| + if @blocks.count > MAX_BLOCKS # rubocop:disable Style/IfUnlessModifier + return @errors.push('too many interpolation blocks') # rubocop:disable Cop/AvoidReturnFromBlocks + end + block = (@blocks[block] ||= Interpolation::Block.new(block, ctx, data)) block.evaluate diff --git a/spec/lib/gitlab/ci/interpolation/config_spec.rb b/spec/lib/gitlab/ci/interpolation/config_spec.rb new file mode 100644 index 00000000000000..e1d0cbc9951a76 --- /dev/null +++ b/spec/lib/gitlab/ci/interpolation/config_spec.rb @@ -0,0 +1,37 @@ +# frozen_string_literal: true + +require 'fast_spec_helper' + +RSpec.describe Gitlab::Ci::Interpolation::Config, feature_category: :pipeline_authoring do + subject { described_class.new(YAML.safe_load(config)) } + + let(:config) do + <<~CFG + test: + spec: + env: $[[ inputs.env ]] + + $[[ inputs.key ]]: + name: $[[ inputs.key ]] + script: my-value + CFG + end + + context 'when config size is exceeded' do + before do + stub_const("#{described_class}::MAX_NODES", 7) + end + + it 'returns a config size error' do + interpolated = 0 + + subject.interpolate do |c| + interpolated += 1 + end + + expect(subject.errors.size).to eq 1 + expect(subject.errors.first).to eq 'config too large' + expect(interpolated).to eq 4 + end + end +end diff --git a/spec/lib/gitlab/ci/interpolation/context_spec.rb b/spec/lib/gitlab/ci/interpolation/context_spec.rb new file mode 100644 index 00000000000000..9229dbcf4d27ef --- /dev/null +++ b/spec/lib/gitlab/ci/interpolation/context_spec.rb @@ -0,0 +1,36 @@ +# frozen_string_literal: true + +require 'fast_spec_helper' + +RSpec.describe Gitlab::Ci::Interpolation::Context, feature_category: :pipeline_authoring do + subject { described_class.new(ctx) } + + let(:ctx) do + { inputs: { key: 'abc' } } + end + + describe '#valid?' do + it 'is a valid interpolation context' do + expect(subject).to be_valid + end + end + + describe '#depth' do + it 'returns a max depth of the hash' do + expect(subject.depth).to eq 2 + end + end + + context 'when interpolation context is too complex' do + let(:ctx) do + { inputs: { key: { aaa: 'bbb' } } } + end + + it 'returns context too complex error' do + expect(subject.depth).to eq 3 + expect(subject).not_to be_valid + expect(subject.errors.size).to eq 1 + expect(subject.errors.first).to eq 'context too complex' + end + end +end diff --git a/spec/lib/gitlab/ci/interpolation/template_spec.rb b/spec/lib/gitlab/ci/interpolation/template_spec.rb index b576aa47a209f3..5427f91ca95f02 100644 --- a/spec/lib/gitlab/ci/interpolation/template_spec.rb +++ b/spec/lib/gitlab/ci/interpolation/template_spec.rb @@ -54,4 +54,28 @@ expect(subject.interpolated).to eq result end end + + context 'when template is too large' do + before do + stub_const('Gitlab::Ci::Interpolation::Config::MAX_NODES', 1) + end + + it 'returns an error' do + expect(subject.interpolated).to be_nil + expect(subject.errors.count).to eq 1 + expect(subject.errors.first).to eq 'config too large' + end + end + + context 'when there are too many interpolation blocks' do + before do + stub_const("#{described_class}::MAX_BLOCKS", 1) + end + + it 'returns an error' do + expect(subject.interpolated).to be_nil + expect(subject.errors.count).to eq 1 + expect(subject.errors.first).to eq 'too many interpolation blocks' + end + end end -- GitLab From 72db2ad5060736f70afbb2dcfc3186af742d9c6e Mon Sep 17 00:00:00 2001 From: Grzegorz Bizon Date: Mon, 23 Jan 2023 16:51:01 +0100 Subject: [PATCH 14/18] Improve CI interpolation code and add specs --- lib/gitlab/ci/interpolation/access.rb | 26 ++++++--- lib/gitlab/ci/interpolation/block.rb | 23 +++++--- lib/gitlab/ci/interpolation/config.rb | 54 +++++++++++-------- lib/gitlab/ci/interpolation/context.rb | 39 +++++++++----- lib/gitlab/ci/interpolation/template.rb | 36 ++++++++----- .../gitlab/ci/interpolation/access_spec.rb | 24 ++++++++- .../lib/gitlab/ci/interpolation/block_spec.rb | 26 ++++++++- .../gitlab/ci/interpolation/config_spec.rb | 2 +- .../gitlab/ci/interpolation/context_spec.rb | 25 +++++---- .../gitlab/ci/interpolation/template_spec.rb | 23 +++++++- 10 files changed, 204 insertions(+), 74 deletions(-) diff --git a/lib/gitlab/ci/interpolation/access.rb b/lib/gitlab/ci/interpolation/access.rb index 4d24d51fdd8585..58ad1b1baf3808 100644 --- a/lib/gitlab/ci/interpolation/access.rb +++ b/lib/gitlab/ci/interpolation/access.rb @@ -6,28 +6,40 @@ module Interpolation class Access attr_reader :content, :errors + MAX_ACCESS_OBJECTS = 5 + def initialize(access, ctx) @content = access @ctx = ctx @errors = [] - @errors.push('invalid interpolation access pattern') if objects.none? + @errors.push('invalid interpolation access pattern') if objects.count <= 1 end def valid? - errors.any? + errors.none? end def objects - @content.split('.') + @objects ||= @content.split('.', MAX_ACCESS_OBJECTS) + end + + def value + return @value if instance_variable_defined?(:@value) + + return evaluate! if valid? + + raise ArgumentError, 'access path invalid' end - def evaluate - # TODO exceptions and errors! - # - objects.inject(@ctx) do |memo, value| + def evaluate! + raise ArgumentError, 'access path invalid' unless valid? + + @value ||= objects.inject(@ctx) do |memo, value| memo.fetch(value.to_sym) end + rescue KeyError => e + @errors.push(e) end end end diff --git a/lib/gitlab/ci/interpolation/block.rb b/lib/gitlab/ci/interpolation/block.rb index 176b4f6ed8dc72..49c4f354c79cc0 100644 --- a/lib/gitlab/ci/interpolation/block.rb +++ b/lib/gitlab/ci/interpolation/block.rb @@ -6,35 +6,46 @@ module Interpolation class Block PREFIX = '$[[' PATTERN = /(?\$\[\[\s*(?.+?)\s*\]\])/.freeze + MAX_BLOCK_SIZE = 1024 * 1024 # 1 KB attr_reader :block, :ctx def initialize(block, ctx, data = nil) @block = block @ctx = ctx + @errors = [] @data = data || begin match = block.match(PATTERN) match[:access] if match end - @access = Interpolation::Access - .new(@data, ctx) + @access = Interpolation::Access.new(@data, ctx) + + return unless @access.valid? + + if block.bytesize > MAX_BLOCK_SIZE + @errors.push('maximum block size exceeded') + else + @access.evaluate! + end end def valid? - errors.any? + errors.none? end def errors - @access.errors + @errors + @access.errors end def content @access.content end - def evaluate - @value ||= @access.evaluate + def value + raise ArgumentError, 'block invalid' unless valid? + + @access.value end def self.match(data) diff --git a/lib/gitlab/ci/interpolation/config.rb b/lib/gitlab/ci/interpolation/config.rb index d28fc077590262..f78fc8edfaa634 100644 --- a/lib/gitlab/ci/interpolation/config.rb +++ b/lib/gitlab/ci/interpolation/config.rb @@ -24,36 +24,44 @@ class Config # # The typical scenario, using just a few interpolations takes 250ms and consumes around 20 megabytes of memory. # - # Given the above the 500_000 nodes should be an upper limit, provided that the are additional safegoards - # present in other parts of the code (example: maximum number of interpolation blocks found). + # Given the above the 500_000 nodes should be an upper limit, provided that the are additional safeguard + # present in other parts of the code (example: maximum number of interpolation blocks found). Typical size of a + # YAML configuration with 500k nodes might be around 10 megabytes, which is an order of magnitude higher than + # the 1MB limit for loading YAML on GitLab.com # MAX_NODES = 500_000 + TooManyNodesError = Class.new(StandardError) - attr_reader :errors + Visitor = Class.new do + def initialize + @visited = 0 + end + + def visit! + @visited += 1 - Visitor = Struct.new(:visited) do - def visit!; self.visited += 1; end # rubocop:disable Style/SingleLineMethods + raise Config::TooManyNodesError if @visited > Config::MAX_NODES + end end + attr_reader :errors + def initialize(hash) @config = hash @errors = [] end def to_h - @config.to_h + @config end - def interpolate(&block) - visitor = Visitor.new(0) - - interpolated = recursive_traverse(@config, visitor, &block) + def interpolate!(&block) + visitor = Visitor.new - if visitor.visited > MAX_NODES - @errors.push('config too large') - else - interpolated - end + recursive_replace(@config, visitor, &block) + rescue TooManyNodesError + @errors.push('config too large') + nil end def self.fabricate(config) @@ -65,21 +73,23 @@ def self.fabricate(config) when String raise NotImplementedError, 'interpolation context unmarshalling needed' else - raise ArgumentError, 'uknown interpolation config' + raise ArgumentError, 'unknown interpolation config' end end private - def recursive_traverse(config, visitor, &block) - return if visitor.visit! > MAX_NODES + def recursive_replace(config, visitor, &block) + visitor.visit! + # TODO max node size + # case config when Hash Hash.new.tap do |new_hash| # rubocop:disable Style/EmptyLiteral config.each_pair do |key, value| - new_key = recursive_traverse(key, visitor, &block) - new_value = recursive_traverse(value, visitor, &block) + new_key = recursive_replace(key, visitor, &block) + new_value = recursive_replace(value, visitor, &block) if new_key != key new_hash[new_key] = new_value @@ -89,7 +99,9 @@ def recursive_traverse(config, visitor, &block) end end when Array - config.map { |value| recursive_traverse(value, visitor, &block) } + config.map { |value| recursive_replace(value, visitor, &block) } + when Symbol + yield config.to_s when String yield config else diff --git a/lib/gitlab/ci/interpolation/context.rb b/lib/gitlab/ci/interpolation/context.rb index 727e1efc1773a9..6f1f599c8c01aa 100644 --- a/lib/gitlab/ci/interpolation/context.rb +++ b/lib/gitlab/ci/interpolation/context.rb @@ -7,30 +7,35 @@ module Interpolation # Interpolation::Context is a class that represents the data that can be used when performing string interpolation # on a CI configuration. # - # It can be at most 3-levels deep hash, and all leafs have to be strings. - # class Context - attr_reader :errors + ContextTooComplexError = Class.new(StandardError) + NotSymbolizedContextError = Class.new(StandardError) MAX_DEPTH = 2 def initialize(hash) @context = hash - @errors = [] - @errors.push('context too complex') if depth > MAX_DEPTH + raise ContextTooComplexError if depth > MAX_DEPTH + + deep_keys(@context) do |key| + raise NotSymbolizedContextError unless key.is_a?(Symbol) + end end def valid? - @errors.none? + errors.none? end - def depth - deep_depth(@context).max + ## + # This method is here because `Context` will be responsible for validating specs, inputs and defaults. + # + def errors + [] end - def access(field) - raise NotImplementedError + def depth + deep_depth(@context) end def fetch(field) @@ -44,13 +49,23 @@ def to_h private def deep_depth(context, depth = 0) - context.values.map do |value| + values = context.values.map do |value| if value.is_a?(Hash) - deep_depth(value, depth + 1).max + deep_depth(value, depth + 1) else depth + 1 end end + + values.max + end + + def deep_keys(context, &block) + context.each_pair do |key, value| + yield key + + deep_keys(value, &block) if value.is_a?(Hash) + end end def self.fabricate(context) diff --git a/lib/gitlab/ci/interpolation/template.rb b/lib/gitlab/ci/interpolation/template.rb index ed594ee0b15473..3f8efa45f6e7df 100644 --- a/lib/gitlab/ci/interpolation/template.rb +++ b/lib/gitlab/ci/interpolation/template.rb @@ -6,6 +6,9 @@ module Interpolation class Template attr_reader :blocks, :ctx + TooManyBlocksError = Class.new(StandardError) + InvalidBlockError = Class.new(StandardError) + MAX_BLOCKS = 10_000 def initialize(config, ctx) @@ -14,18 +17,18 @@ def initialize(config, ctx) @errors = [] @blocks = {} - interpolate! if @ctx.valid? + interpolate! end def valid? - errors.any? + errors.none? end def errors - @errors + - @ctx.errors + - @config.errors + - @blocks.values.flat_map(&:errors) + [ + @errors + @ctx.errors + @config.errors, + @blocks.values.map(&:errors) + ].flatten end def size @@ -33,23 +36,30 @@ def size end def interpolated - @result if @result.is_a?(Hash) + return @result if instance_variable_defined?(:@result) + + interpolate! if valid? end private def interpolate! - @result ||= @config.interpolate do |data| + @result ||= @config.interpolate! do |data| Interpolation::Block.match(data) do |block, data| - if @blocks.count > MAX_BLOCKS # rubocop:disable Style/IfUnlessModifier - return @errors.push('too many interpolation blocks') # rubocop:disable Cop/AvoidReturnFromBlocks - end - block = (@blocks[block] ||= Interpolation::Block.new(block, ctx, data)) - block.evaluate + raise TooManyBlocksError if @blocks.count > MAX_BLOCKS + raise InvalidBlockError unless block.valid? + + block.value end end + rescue TooManyBlocksError + @errors.push('too many interpolation blocks') + rescue InvalidBlockError + @errors.push('invalid interpolation block') + ensure + @result if @errors.none? end end end diff --git a/spec/lib/gitlab/ci/interpolation/access_spec.rb b/spec/lib/gitlab/ci/interpolation/access_spec.rb index 362e7d72aebc2a..1eb83c827d23ad 100644 --- a/spec/lib/gitlab/ci/interpolation/access_spec.rb +++ b/spec/lib/gitlab/ci/interpolation/access_spec.rb @@ -14,6 +14,28 @@ end it 'properly evaluates the access pattern' do - expect(subject.evaluate).to eq 'abcd' + subject.evaluate! + + expect(subject.value).to eq 'abcd' + end + + context 'when there are too many objects in the access path' do + let(:access) { 'a.b.c.d.e.f.g.h' } + + it 'only support MAX_ACCESS_OBJECTS steps' do + subject.evaluate! + + expect(subject.objects.count).to eq 5 + end + end + + context 'when there are not enough objects in the access path' do + let(:access) { 'abc[123]' } + + it 'returns an error when there are no objects found' do + expect(subject).not_to be_valid + expect(subject.errors.first) + .to eq 'invalid interpolation access pattern' + end end end diff --git a/spec/lib/gitlab/ci/interpolation/block_spec.rb b/spec/lib/gitlab/ci/interpolation/block_spec.rb index a3e491cc1a337f..69bf238976e0e9 100644 --- a/spec/lib/gitlab/ci/interpolation/block_spec.rb +++ b/spec/lib/gitlab/ci/interpolation/block_spec.rb @@ -18,6 +18,30 @@ end it 'properly evaluates the access pattern' do - expect(subject.evaluate).to eq 'abc' + expect(subject.value).to eq 'abc' + end + + context 'when block size is too large' do + before do + stub_const("#{described_class}::MAX_BLOCK_SIZE", 17) + end + + it 'returns an error' do + expect(subject).not_to be_valid + expect(subject.errors.first).to eq 'maximum block size exceeded' + end + end + + describe '.match' do + it 'matches an empty block' do + result = described_class.match('$[[]]') do |block, data| + expect(block).to eq '$[[]]' + expect(data).to eq '' + + block + end + + expect(result).to eq '$[[]]' + end end end diff --git a/spec/lib/gitlab/ci/interpolation/config_spec.rb b/spec/lib/gitlab/ci/interpolation/config_spec.rb index e1d0cbc9951a76..057ba0f62498b7 100644 --- a/spec/lib/gitlab/ci/interpolation/config_spec.rb +++ b/spec/lib/gitlab/ci/interpolation/config_spec.rb @@ -25,7 +25,7 @@ it 'returns a config size error' do interpolated = 0 - subject.interpolate do |c| + subject.interpolate! do |c| interpolated += 1 end diff --git a/spec/lib/gitlab/ci/interpolation/context_spec.rb b/spec/lib/gitlab/ci/interpolation/context_spec.rb index 9229dbcf4d27ef..a2582e88a46256 100644 --- a/spec/lib/gitlab/ci/interpolation/context_spec.rb +++ b/spec/lib/gitlab/ci/interpolation/context_spec.rb @@ -9,12 +9,6 @@ { inputs: { key: 'abc' } } end - describe '#valid?' do - it 'is a valid interpolation context' do - expect(subject).to be_valid - end - end - describe '#depth' do it 'returns a max depth of the hash' do expect(subject.depth).to eq 2 @@ -26,11 +20,20 @@ { inputs: { key: { aaa: 'bbb' } } } end - it 'returns context too complex error' do - expect(subject.depth).to eq 3 - expect(subject).not_to be_valid - expect(subject.errors.size).to eq 1 - expect(subject.errors.first).to eq 'context too complex' + it 'raises an exception' do + expect { described_class.new(ctx) } + .to raise_error(described_class::ContextTooComplexError) + end + end + + context 'when interpolation context has not been symbolized correctly' do + let(:ctx) do + { inputs: { 'key' => 'bbb' } } + end + + it 'raises an exception' do + expect { described_class.new(ctx) } + .to raise_error(described_class::NotSymbolizedContextError) end end end diff --git a/spec/lib/gitlab/ci/interpolation/template_spec.rb b/spec/lib/gitlab/ci/interpolation/template_spec.rb index 5427f91ca95f02..8a243b4db05c3e 100644 --- a/spec/lib/gitlab/ci/interpolation/template_spec.rb +++ b/spec/lib/gitlab/ci/interpolation/template_spec.rb @@ -37,6 +37,15 @@ RESULT end + context 'when interpolation can not be performed' do + let(:config) { '$[[ xxx.yyy ]]: abc' } + + it 'does not interpolate the config' do + expect(subject).not_to be_valid + expect(subject.interpolated).to be_nil + end + end + context 'when template consists of nested arrays with hashes and values' do let(:config) do <<~CFG @@ -48,13 +57,25 @@ CFG end - it 'perform a valid interpolation' do + it 'performs a valid interpolation' do result = { 'test' => ['a-abc-b', { 'c-abc-d' => 'd-abc-e', 'val' => 1 }] } + expect(subject).to be_valid expect(subject.interpolated).to eq result end end + context 'when template contains symbols that need interpolation' do + subject do + described_class.new({ '$[[ inputs.key ]]'.to_sym => 'cde' }, ctx) + end + + it 'performs a valid interpolation' do + expect(subject).to be_valid + expect(subject.interpolated).to eq({ 'abc' => 'cde' }) + end + end + context 'when template is too large' do before do stub_const('Gitlab::Ci::Interpolation::Config::MAX_NODES', 1) -- GitLab From 105d07f8b491fb68ae033f7490b652fb64e2d51e Mon Sep 17 00:00:00 2001 From: Grzegorz Bizon Date: Wed, 25 Jan 2023 18:01:00 +0100 Subject: [PATCH 15/18] Implement CI interpolation node size limit --- lib/gitlab/ci/interpolation/block.rb | 2 +- lib/gitlab/ci/interpolation/config.rb | 24 ++++++++++++++++-------- lib/gitlab/ci/interpolation/context.rb | 2 +- lib/gitlab/ci/interpolation/template.rb | 2 -- 4 files changed, 18 insertions(+), 12 deletions(-) diff --git a/lib/gitlab/ci/interpolation/block.rb b/lib/gitlab/ci/interpolation/block.rb index 49c4f354c79cc0..8e8a4f720cdbd3 100644 --- a/lib/gitlab/ci/interpolation/block.rb +++ b/lib/gitlab/ci/interpolation/block.rb @@ -6,7 +6,7 @@ module Interpolation class Block PREFIX = '$[[' PATTERN = /(?\$\[\[\s*(?.+?)\s*\]\])/.freeze - MAX_BLOCK_SIZE = 1024 * 1024 # 1 KB + MAX_BLOCK_SIZE = 1024 # 1 KB attr_reader :block, :ctx diff --git a/lib/gitlab/ci/interpolation/config.rb b/lib/gitlab/ci/interpolation/config.rb index f78fc8edfaa634..9ae4b503d7173d 100644 --- a/lib/gitlab/ci/interpolation/config.rb +++ b/lib/gitlab/ci/interpolation/config.rb @@ -30,7 +30,10 @@ class Config # the 1MB limit for loading YAML on GitLab.com # MAX_NODES = 500_000 + MAX_NODE_SIZE = 1024 * 1024 # 1MB + TooManyNodesError = Class.new(StandardError) + NodeTooLargeError = Class.new(StandardError) Visitor = Class.new do def initialize @@ -56,12 +59,9 @@ def to_h end def interpolate!(&block) - visitor = Visitor.new + @result ||= interpolate(&block) - recursive_replace(@config, visitor, &block) - rescue TooManyNodesError - @errors.push('config too large') - nil + @result if @errors.none? end def self.fabricate(config) @@ -79,11 +79,17 @@ def self.fabricate(config) private + def interpolate(&block) + recursive_replace(@config, Visitor.new, &block) + rescue TooManyNodesError + @errors.push('config too large') + rescue NodeTooLargeError + @errors.push('config node too large') + end + def recursive_replace(config, visitor, &block) visitor.visit! - # TODO max node size - # case config when Hash Hash.new.tap do |new_hash| # rubocop:disable Style/EmptyLiteral @@ -101,8 +107,10 @@ def recursive_replace(config, visitor, &block) when Array config.map { |value| recursive_replace(value, visitor, &block) } when Symbol - yield config.to_s + recursive_replace(config.to_s, visitor, &block) when String + raise NodeTooLargeError if config.bytesize > MAX_NODE_SIZE + yield config else config diff --git a/lib/gitlab/ci/interpolation/context.rb b/lib/gitlab/ci/interpolation/context.rb index 6f1f599c8c01aa..39735e4a670180 100644 --- a/lib/gitlab/ci/interpolation/context.rb +++ b/lib/gitlab/ci/interpolation/context.rb @@ -75,7 +75,7 @@ def self.fabricate(context) when Interpolation::Context new(context.to_h) else - raise ArgumentError, 'uknown interpolation context' + raise ArgumentError, 'unknown interpolation context' end end end diff --git a/lib/gitlab/ci/interpolation/template.rb b/lib/gitlab/ci/interpolation/template.rb index 3f8efa45f6e7df..1b7804dc8b2732 100644 --- a/lib/gitlab/ci/interpolation/template.rb +++ b/lib/gitlab/ci/interpolation/template.rb @@ -58,8 +58,6 @@ def interpolate! @errors.push('too many interpolation blocks') rescue InvalidBlockError @errors.push('invalid interpolation block') - ensure - @result if @errors.none? end end end -- GitLab From 648a5763627cd8f78347c425c21863faa456995a Mon Sep 17 00:00:00 2001 From: Grzegorz Bizon Date: Thu, 26 Jan 2023 12:57:01 +0100 Subject: [PATCH 16/18] Simplify CI interpolation code and improve specs --- lib/gitlab/ci/interpolation/access.rb | 19 ++++++++--- lib/gitlab/ci/interpolation/block.rb | 20 +++-------- lib/gitlab/ci/interpolation/config.rb | 26 ++++++-------- lib/gitlab/ci/interpolation/context.rb | 16 ++------- lib/gitlab/ci/interpolation/template.rb | 34 ++++++++++--------- spec/fast_spec_helper.rb | 2 ++ .../gitlab/ci/interpolation/access_spec.rb | 16 ++++++--- .../lib/gitlab/ci/interpolation/block_spec.rb | 27 ++++++--------- .../gitlab/ci/interpolation/context_spec.rb | 13 +------ 9 files changed, 75 insertions(+), 98 deletions(-) diff --git a/lib/gitlab/ci/interpolation/access.rb b/lib/gitlab/ci/interpolation/access.rb index 58ad1b1baf3808..4259845890240a 100644 --- a/lib/gitlab/ci/interpolation/access.rb +++ b/lib/gitlab/ci/interpolation/access.rb @@ -7,13 +7,22 @@ class Access attr_reader :content, :errors MAX_ACCESS_OBJECTS = 5 + MAX_ACCESS_BYTESIZE = 1024 def initialize(access, ctx) @content = access @ctx = ctx @errors = [] - @errors.push('invalid interpolation access pattern') if objects.count <= 1 + if objects.count <= 1 # rubocop:disable Style/IfUnlessModifier + @errors.push('invalid interpolation access pattern') + end + + if access.bytesize > MAX_ACCESS_BYTESIZE # rubocop:disable Style/IfUnlessModifier + @errors.push('maximum interpolation expression size exceeded') + end + + evaluate! if valid? end def valid? @@ -25,13 +34,13 @@ def objects end def value - return @value if instance_variable_defined?(:@value) - - return evaluate! if valid? + raise ArgumentError, 'access path invalid' unless valid? - raise ArgumentError, 'access path invalid' + @value end + private + def evaluate! raise ArgumentError, 'access path invalid' unless valid? diff --git a/lib/gitlab/ci/interpolation/block.rb b/lib/gitlab/ci/interpolation/block.rb index 8e8a4f720cdbd3..a8bb03f5f9f53e 100644 --- a/lib/gitlab/ci/interpolation/block.rb +++ b/lib/gitlab/ci/interpolation/block.rb @@ -5,29 +5,17 @@ module Ci module Interpolation class Block PREFIX = '$[[' - PATTERN = /(?\$\[\[\s*(?.+?)\s*\]\])/.freeze - MAX_BLOCK_SIZE = 1024 # 1 KB + PATTERN = /(?\$\[\[\s*(?.*?)\s*\]\])/.freeze - attr_reader :block, :ctx + attr_reader :block, :data, :ctx - def initialize(block, ctx, data = nil) + def initialize(block, data, ctx) @block = block @ctx = ctx + @data = data @errors = [] - @data = data || begin - match = block.match(PATTERN) - match[:access] if match - end @access = Interpolation::Access.new(@data, ctx) - - return unless @access.valid? - - if block.bytesize > MAX_BLOCK_SIZE - @errors.push('maximum block size exceeded') - else - @access.evaluate! - end end def valid? diff --git a/lib/gitlab/ci/interpolation/config.rb b/lib/gitlab/ci/interpolation/config.rb index 9ae4b503d7173d..3cee1aa227f9e8 100644 --- a/lib/gitlab/ci/interpolation/config.rb +++ b/lib/gitlab/ci/interpolation/config.rb @@ -7,6 +7,7 @@ module Interpolation # Interpolation::Config represents a configuration artifact that we want to perform interpolation on. # class Config + include Gitlab::Utils::StrongMemoize ## # Total number of hash nodes traversed. For example, loading a YAML below would result in a hash having 12 nodes # instead of 9, because hash values are being counted before we recursively traverse them. @@ -59,19 +60,22 @@ def to_h end def interpolate!(&block) - @result ||= interpolate(&block) - - @result if @errors.none? + recursive_replace(@config, Visitor.new, &block) + rescue TooManyNodesError + @errors.push('config too large') + nil + rescue NodeTooLargeError + @errors.push('config node too large') + nil end + strong_memoize_attr :interpolate! def self.fabricate(config) case config when Hash new(config) when Interpolation::Config - new(config.to_h) - when String - raise NotImplementedError, 'interpolation context unmarshalling needed' + config else raise ArgumentError, 'unknown interpolation config' end @@ -79,20 +83,12 @@ def self.fabricate(config) private - def interpolate(&block) - recursive_replace(@config, Visitor.new, &block) - rescue TooManyNodesError - @errors.push('config too large') - rescue NodeTooLargeError - @errors.push('config node too large') - end - def recursive_replace(config, visitor, &block) visitor.visit! case config when Hash - Hash.new.tap do |new_hash| # rubocop:disable Style/EmptyLiteral + {}.tap do |new_hash| config.each_pair do |key, value| new_key = recursive_replace(key, visitor, &block) new_value = recursive_replace(value, visitor, &block) diff --git a/lib/gitlab/ci/interpolation/context.rb b/lib/gitlab/ci/interpolation/context.rb index 39735e4a670180..ce7a86a3c9b5f5 100644 --- a/lib/gitlab/ci/interpolation/context.rb +++ b/lib/gitlab/ci/interpolation/context.rb @@ -11,16 +11,12 @@ class Context ContextTooComplexError = Class.new(StandardError) NotSymbolizedContextError = Class.new(StandardError) - MAX_DEPTH = 2 + MAX_DEPTH = 3 def initialize(hash) @context = hash raise ContextTooComplexError if depth > MAX_DEPTH - - deep_keys(@context) do |key| - raise NotSymbolizedContextError unless key.is_a?(Symbol) - end end def valid? @@ -60,20 +56,12 @@ def deep_depth(context, depth = 0) values.max end - def deep_keys(context, &block) - context.each_pair do |key, value| - yield key - - deep_keys(value, &block) if value.is_a?(Hash) - end - end - def self.fabricate(context) case context when Hash new(context) when Interpolation::Context - new(context.to_h) + context else raise ArgumentError, 'unknown interpolation context' end diff --git a/lib/gitlab/ci/interpolation/template.rb b/lib/gitlab/ci/interpolation/template.rb index 1b7804dc8b2732..2ec833fbcfb4dd 100644 --- a/lib/gitlab/ci/interpolation/template.rb +++ b/lib/gitlab/ci/interpolation/template.rb @@ -4,6 +4,8 @@ module Gitlab module Ci module Interpolation class Template + include Gitlab::Utils::StrongMemoize + attr_reader :blocks, :ctx TooManyBlocksError = Class.new(StandardError) @@ -17,7 +19,7 @@ def initialize(config, ctx) @errors = [] @blocks = {} - interpolate! + interpolate! if valid? end def valid? @@ -25,10 +27,7 @@ def valid? end def errors - [ - @errors + @ctx.errors + @config.errors, - @blocks.values.map(&:errors) - ].flatten + @errors + @config.errors + @ctx.errors + @blocks.values.flat_map(&:errors) end def size @@ -36,28 +35,31 @@ def size end def interpolated - return @result if instance_variable_defined?(:@result) - - interpolate! if valid? + @result if valid? end private def interpolate! - @result ||= @config.interpolate! do |data| + @result = @config.interpolate! do |data| Interpolation::Block.match(data) do |block, data| - block = (@blocks[block] ||= Interpolation::Block.new(block, ctx, data)) - - raise TooManyBlocksError if @blocks.count > MAX_BLOCKS - raise InvalidBlockError unless block.valid? - - block.value + evaluate_block(block, data) end end rescue TooManyBlocksError @errors.push('too many interpolation blocks') rescue InvalidBlockError - @errors.push('invalid interpolation block') + @errors.push('interpolation interrupted by errors') + end + strong_memoize_attr :interpolate! + + def evaluate_block(block, data) + block = (@blocks[block] ||= Interpolation::Block.new(block, data, ctx)) + + raise TooManyBlocksError if @blocks.count > MAX_BLOCKS + raise InvalidBlockError unless block.valid? + + block.value end end end diff --git a/spec/fast_spec_helper.rb b/spec/fast_spec_helper.rb index 393cd6f6a21b2f..e53f0cd936fa52 100644 --- a/spec/fast_spec_helper.rb +++ b/spec/fast_spec_helper.rb @@ -18,6 +18,8 @@ require_relative '../config/initializers/0_inject_enterprise_edition_module' require_relative '../config/settings' require_relative 'support/rspec' +require_relative '../lib/gitlab/utils' +require_relative '../lib/gitlab/utils/strong_memoize' require 'active_support/all' require_relative 'simplecov_env' diff --git a/spec/lib/gitlab/ci/interpolation/access_spec.rb b/spec/lib/gitlab/ci/interpolation/access_spec.rb index 1eb83c827d23ad..9f6108a328d800 100644 --- a/spec/lib/gitlab/ci/interpolation/access_spec.rb +++ b/spec/lib/gitlab/ci/interpolation/access_spec.rb @@ -14,8 +14,6 @@ end it 'properly evaluates the access pattern' do - subject.evaluate! - expect(subject.value).to eq 'abcd' end @@ -23,12 +21,22 @@ let(:access) { 'a.b.c.d.e.f.g.h' } it 'only support MAX_ACCESS_OBJECTS steps' do - subject.evaluate! - expect(subject.objects.count).to eq 5 end end + context 'when access expression size is too large' do + before do + stub_const("#{described_class}::MAX_ACCESS_BYTESIZE", 10) + end + + it 'returns an error' do + expect(subject).not_to be_valid + expect(subject.errors.first) + .to eq 'maximum interpolation expression size exceeded' + end + end + context 'when there are not enough objects in the access path' do let(:access) { 'abc[123]' } diff --git a/spec/lib/gitlab/ci/interpolation/block_spec.rb b/spec/lib/gitlab/ci/interpolation/block_spec.rb index 69bf238976e0e9..660de99af5cbd7 100644 --- a/spec/lib/gitlab/ci/interpolation/block_spec.rb +++ b/spec/lib/gitlab/ci/interpolation/block_spec.rb @@ -3,10 +3,14 @@ require 'fast_spec_helper' RSpec.describe Gitlab::Ci::Interpolation::Block, feature_category: :pipeline_authoring do - subject { described_class.new(block, ctx) } + subject { described_class.new(block, data, ctx) } + + let(:data) do + 'inputs.data' + end let(:block) do - '$[[ inputs.data ]]' + "$[[ #{data} ]]" end let(:ctx) do @@ -21,27 +25,18 @@ expect(subject.value).to eq 'abc' end - context 'when block size is too large' do - before do - stub_const("#{described_class}::MAX_BLOCK_SIZE", 17) - end - - it 'returns an error' do - expect(subject).not_to be_valid - expect(subject.errors.first).to eq 'maximum block size exceeded' - end - end - describe '.match' do it 'matches an empty block' do - result = described_class.match('$[[]]') do |block, data| + matched = false + + described_class.match('$[[]]') do |block, data| expect(block).to eq '$[[]]' expect(data).to eq '' - block + matched = true end - expect(result).to eq '$[[]]' + expect(matched).to eq true end end end diff --git a/spec/lib/gitlab/ci/interpolation/context_spec.rb b/spec/lib/gitlab/ci/interpolation/context_spec.rb index a2582e88a46256..ada896f4980be0 100644 --- a/spec/lib/gitlab/ci/interpolation/context_spec.rb +++ b/spec/lib/gitlab/ci/interpolation/context_spec.rb @@ -17,7 +17,7 @@ context 'when interpolation context is too complex' do let(:ctx) do - { inputs: { key: { aaa: 'bbb' } } } + { inputs: { key: { aaa: { bbb: 'ccc' } } } } end it 'raises an exception' do @@ -25,15 +25,4 @@ .to raise_error(described_class::ContextTooComplexError) end end - - context 'when interpolation context has not been symbolized correctly' do - let(:ctx) do - { inputs: { 'key' => 'bbb' } } - end - - it 'raises an exception' do - expect { described_class.new(ctx) } - .to raise_error(described_class::NotSymbolizedContextError) - end - end end -- GitLab From 9fe5e3e1397cb34ed9120bac0400ec0016bff65a Mon Sep 17 00:00:00 2001 From: Grzegorz Bizon Date: Fri, 27 Jan 2023 11:44:50 +0100 Subject: [PATCH 17/18] Remove CI interpolation benchmarking class --- lib/gitlab/ci/interpolation/benchmark.rb | 184 ----------------------- 1 file changed, 184 deletions(-) delete mode 100644 lib/gitlab/ci/interpolation/benchmark.rb diff --git a/lib/gitlab/ci/interpolation/benchmark.rb b/lib/gitlab/ci/interpolation/benchmark.rb deleted file mode 100644 index b17e96602b8f88..00000000000000 --- a/lib/gitlab/ci/interpolation/benchmark.rb +++ /dev/null @@ -1,184 +0,0 @@ -# frozen_string_literal: true - -module Gitlab - module Ci - module Interpolation - ## - # TODO: REMOVE THIS CLASS BEFORE MERGING THE MR. - # - # This is a helper-class to transparently document the benchmarking strategy in the MR that is going to introduce - # the core CI interpolation mechanisms. We will not merge the Interpolation::Benchmark class as it is supposed to - # be removed before merging, but it will remain available in the diff / commit history. - # - # How to run it in GDK / GCK: - # - # 1. Run Rails console: `bin/rails console` - # 2. Run the benchmark: `Gitlab::Ci::Interpolation::Benchmark.run!` - # 3. Update the code, and reload pry with `reload!`, go to point 2. - # - class Benchmark - class Spec - attr_reader :keys, :depth, :ratio, :level, :blocks, :nodes, :context - - def initialize(**opts) - @keys = opts[:keys] # Number of top-level keys in a hash - @depth = opts[:depth] # Number of nested hash for each entry - @ratio = opts[:ratio] # Interpolation ratio percentage: 0.01, 99 - @verbose = opts[:verbose] # Print summary in a verbose format - - @level = @depth # Memo object for recursive generation of the hash - @blocks = 0 # Memo object counting the number of interpolation blocks - @nodes = 0 # Memo object counting the number of vertices in the hash - - @context = { - inputs: { - key: 'my_key', - leaf: 'my_leaf' - } - } - end - - def next! - tap { @level -= 1 } - end - - def interpolate? - Random.rand(100 * 100) <= @ratio * 100 - end - - def interpolate_leaf! - @nodes += 1 - - if interpolate? - @blocks += 1 - '$[[ inputs.leaf ]]' - else - 'non-interpolated leaf' - end - end - - def interpolate_key! - @nodes += 1 - - if interpolate? - @blocks += 1 - '$[[ inputs.key ]]' - else - 'some key' - end - end - - def hash(spec = self) - return spec.interpolate_leaf! if spec.level < 1 - - Hash.new.tap do |h| # rubocop:disable Style/EmptyLiteral - spec.keys.times { |i| h["#{i}: #{spec.interpolate_key!}"] = hash(spec.next!) } - end - end - end - - class Result - def initialize(name, spec, count = 1) - @name = name - @spec = spec - @count = count - @results = [] - @allocation = {} - - raise Argumenterror if count == 0 - - run! - end - - def summary - # rubocop:disable Style/FormatString - node_time = '%0.08f' % (@hash_time / @spec.nodes) - block_time = '%0.08f' % (@hash_time / @spec.blocks) - hash_time = '%0.08f' % @hash_time - total_time = '%0.08f' % @total_time - - # - mem_objects: number of allocated heap slots (as reflected by GC) - # - mem_mallocs: number of malloc calls - # - mem_bytes: number of bytes allocated by malloc for objects that did not fit - # into a heap slot - # - mem_total_bytes: number of bytes allocated for both objects consuming an object slot - # and objects that required a malloc (mem_malloc_bytes) - mem_objects = @allocations[:mem_objects] - mem_total = @allocations[:mem_total_bytes] / 1024 - - if @verbose - <<~SUMMARY - Scenario name: #{@name} - ---------------------------------------------------- - Hash: hash spec parameters: #{@spec.keys} / #{@spec.depth} / #{@spec.ratio} - interpolation blocks: #{@spec.blocks} - generated hash nodes: #{@spec.nodes} - benchmark repeated t: #{@count} - - Benchmark: time per node: #{node_time} - time per block: #{block_time} - total memory allocations: #{mem_total} kB - mem objects allocated: - hash interpolation time: #{hash_time} - total interpolation time: #{total_time} - \n\n - SUMMARY - else - <<~SUMMARY - #{'%50.50s' % @name}: per hash: #{'%10.10s' % hash_time}, total: #{'%10.10s' % total_time}, objects: #{'%10.10s' % mem_objects}, mem: #{'%10.10s' % mem_total} kB - SUMMARY - end - # rubocop:enable Style/FormatString - end - - def run! - config = @spec.hash - context = @spec.context - - @allocations = Gitlab::Memory::Instrumentation.with_memory_allocations do - @results = Array.new(@count) do - ::Benchmark.realtime { Interpolation::Template.new(config, context) } - end - end - - @total_time = @results.sum(0.0) - @hash_time = @total_time / @count - end - end - - def initialize - @results = [] - end - - def run! - @results = [ - Result.new( - 'Large hash with many interpolation blocks', - Spec.new(keys: 50000, depth: 5, ratio: 80) - ), - Result.new( - 'Large hash with just a few interpolations', - Spec.new(keys: 50000, depth: 5, ratio: 0.01) - ), - Result.new( - 'Short hash frequently interpolated', - Spec.new(keys: 50, depth: 5, ratio: 5), - 10000 - ) - ] - end - - def summary # rubocop:disable Style/EmptyMethod - @results.each { |result| puts result.summary } # rubocop:disable Rails/Output - end - - def self.run! - new.tap do |bench| - bench.run! - bench.summary - end - end - end - end - end -end -- GitLab From 73f9bb56ec5e7b980946c407bb5860e83bf6633a Mon Sep 17 00:00:00 2001 From: Grzegorz Bizon Date: Fri, 27 Jan 2023 15:51:40 +0100 Subject: [PATCH 18/18] Simplify interpolation code and add more specs --- lib/gitlab/ci/interpolation/block.rb | 3 +-- lib/gitlab/ci/interpolation/config.rb | 10 +++++++-- lib/gitlab/ci/interpolation/template.rb | 2 +- .../lib/gitlab/ci/interpolation/block_spec.rb | 17 ++++++-------- .../gitlab/ci/interpolation/config_spec.rb | 22 ++++++++++++++----- 5 files changed, 34 insertions(+), 20 deletions(-) diff --git a/lib/gitlab/ci/interpolation/block.rb b/lib/gitlab/ci/interpolation/block.rb index a8bb03f5f9f53e..389cbf378a2276 100644 --- a/lib/gitlab/ci/interpolation/block.rb +++ b/lib/gitlab/ci/interpolation/block.rb @@ -13,7 +13,6 @@ def initialize(block, data, ctx) @block = block @ctx = ctx @data = data - @errors = [] @access = Interpolation::Access.new(@data, ctx) end @@ -23,7 +22,7 @@ def valid? end def errors - @errors + @access.errors + @access.errors end def content diff --git a/lib/gitlab/ci/interpolation/config.rb b/lib/gitlab/ci/interpolation/config.rb index 3cee1aa227f9e8..32f58521139efd 100644 --- a/lib/gitlab/ci/interpolation/config.rb +++ b/lib/gitlab/ci/interpolation/config.rb @@ -59,7 +59,13 @@ def to_h @config end - def interpolate!(&block) + ## + # The replace! method will yield a block and replace a each of the hash config nodes with a return value of the + # block. + # + # It returns `nil` if there were errors found during the process. + # + def replace!(&block) recursive_replace(@config, Visitor.new, &block) rescue TooManyNodesError @errors.push('config too large') @@ -68,7 +74,7 @@ def interpolate!(&block) @errors.push('config node too large') nil end - strong_memoize_attr :interpolate! + strong_memoize_attr :replace! def self.fabricate(config) case config diff --git a/lib/gitlab/ci/interpolation/template.rb b/lib/gitlab/ci/interpolation/template.rb index 2ec833fbcfb4dd..0211279f26680c 100644 --- a/lib/gitlab/ci/interpolation/template.rb +++ b/lib/gitlab/ci/interpolation/template.rb @@ -41,7 +41,7 @@ def interpolated private def interpolate! - @result = @config.interpolate! do |data| + @result = @config.replace! do |data| Interpolation::Block.match(data) do |block, data| evaluate_block(block, data) end diff --git a/spec/lib/gitlab/ci/interpolation/block_spec.rb b/spec/lib/gitlab/ci/interpolation/block_spec.rb index 660de99af5cbd7..7f2be505d17502 100644 --- a/spec/lib/gitlab/ci/interpolation/block_spec.rb +++ b/spec/lib/gitlab/ci/interpolation/block_spec.rb @@ -26,17 +26,14 @@ end describe '.match' do - it 'matches an empty block' do - matched = false - - described_class.match('$[[]]') do |block, data| - expect(block).to eq '$[[]]' - expect(data).to eq '' - - matched = true - end + it 'matches each block in a string' do + expect { |b| described_class.match('$[[ access1 ]] $[[ access2 ]]', &b) } + .to yield_successive_args(['$[[ access1 ]]', 'access1'], ['$[[ access2 ]]', 'access2']) + end - expect(matched).to eq true + it 'matches an empty block' do + expect { |b| described_class.match('$[[]]', &b) } + .to yield_with_args('$[[]]', '') end end end diff --git a/spec/lib/gitlab/ci/interpolation/config_spec.rb b/spec/lib/gitlab/ci/interpolation/config_spec.rb index 057ba0f62498b7..e5987776e006ac 100644 --- a/spec/lib/gitlab/ci/interpolation/config_spec.rb +++ b/spec/lib/gitlab/ci/interpolation/config_spec.rb @@ -17,21 +17,33 @@ CFG end + describe '#replace!' do + it 'replaces each od the nodes with a block return value' do + result = subject.replace! { |node| "abc#{node}cde" } + + expect(result).to eq({ + 'abctestcde' => { 'abcspeccde' => { 'abcenvcde' => 'abc$[[ inputs.env ]]cde' } }, + 'abc$[[ inputs.key ]]cde' => { + 'abcnamecde' => 'abc$[[ inputs.key ]]cde', + 'abcscriptcde' => 'abcmy-valuecde' + } + }) + end + end + context 'when config size is exceeded' do before do stub_const("#{described_class}::MAX_NODES", 7) end it 'returns a config size error' do - interpolated = 0 + replaced = 0 - subject.interpolate! do |c| - interpolated += 1 - end + subject.replace! { replaced += 1 } + expect(replaced).to eq 4 expect(subject.errors.size).to eq 1 expect(subject.errors.first).to eq 'config too large' - expect(interpolated).to eq 4 end end end -- GitLab