diff --git a/app/models/ci/job_token/authorization.rb b/app/models/ci/job_token/authorization.rb index 11093921c5b3b589941596b03d3528365fc36828..458ef6330fa33d6dab4a1b30276e42ce2ca245d3 100644 --- a/app/models/ci/job_token/authorization.rb +++ b/app/models/ci/job_token/authorization.rb @@ -10,6 +10,7 @@ module Ci module JobToken class Authorization < Ci::ApplicationRecord extend Gitlab::InternalEventsTracking + include EachBatch self.table_name = 'ci_job_token_authorizations' diff --git a/app/models/ci/job_token/authorizations_compactor.rb b/app/models/ci/job_token/authorizations_compactor.rb new file mode 100644 index 0000000000000000000000000000000000000000..df24293922b3f9def26094274096faac4662b737 --- /dev/null +++ b/app/models/ci/job_token/authorizations_compactor.rb @@ -0,0 +1,56 @@ +# frozen_string_literal: true + +module Ci + module JobToken + class AuthorizationsCompactor + attr_reader :allowlist_groups, :allowlist_projects + + UnexpectedCompactionEntry = Class.new(StandardError) + RedundantCompactionEntry = Class.new(StandardError) + + def initialize(project_id) + @project_id = project_id + @allowlist_groups = [] + @allowlist_projects = [] + end + + def origin_project_traversal_ids + @origin_project_traversal_ids ||= begin + origin_project_traversal_ids = [] + origin_project_id_batches = [] + + # Collecting id batches to avoid cross-database transactions. + Ci::JobToken::Authorization.where( + accessed_project_id: @project_id + ).each_batch(column: :origin_project_id) do |batch| + origin_project_id_batches << batch.pluck(:origin_project_id) # rubocop:disable Database/AvoidUsingPluckWithoutLimit -- pluck limited by batch size + end + + origin_project_id_batches.each do |batch| + projects = Project.where(id: batch) + origin_project_traversal_ids += projects.map { |p| p.project_namespace.traversal_ids } + end + + origin_project_traversal_ids + end + end + + def compact(limit) + compacted_traversal_ids = Gitlab::Utils::TraversalIdCompactor.compact(origin_project_traversal_ids, limit) + + Gitlab::Utils::TraversalIdCompactor.validate!(origin_project_traversal_ids, compacted_traversal_ids) + + namespace_ids = compacted_traversal_ids.map(&:last) + namespaces = Namespace.where(id: namespace_ids) + + namespaces.each do |namespace| + if namespace.project_namespace? + @allowlist_projects << namespace.project + else + @allowlist_groups << namespace + end + end + end + end + end +end diff --git a/lib/gitlab/utils/traversal_id_compactor.rb b/lib/gitlab/utils/traversal_id_compactor.rb new file mode 100644 index 0000000000000000000000000000000000000000..c256ec401aeafc760440cf49a7520fbedba2d096 --- /dev/null +++ b/lib/gitlab/utils/traversal_id_compactor.rb @@ -0,0 +1,133 @@ +# frozen_string_literal: true + +module Gitlab + module Utils + class TraversalIdCompactor + CompactionLimitCannotBeAchievedError = Class.new(StandardError) + RedundantCompactionEntry = Class.new(StandardError) + UnexpectedCompactionEntry = Class.new(StandardError) + + class << self + # This class compacts an array of traversal_ids by finding the most common namespace + # and consolidating all children into an entry for that namespace. It continues this process + # until the size of the final array is less than the limit. If it cannot achieve the limit + # it raises a CompactionLimitCannotBeAchievedError. + # + # The traversal_ids input will look like the array below where each element in the sub-arrays + # is a namespace id. + # + # [ + # [1, 21], + # [1, 2, 3], + # [1, 2, 4], + # [1, 2, 5], + # [1, 2, 12, 13], + # [1, 6, 7], + # [1, 6, 8], + # [9, 10, 11] + # ] + # + # The limit input is the maximum number of elements in the final array. + + # The compact method calls the compact_once method until the size of the final array is less + # than the limit. It then returns the compacted list of traversal_ids + # If it cannot achieve the limit it raises a CompactionLimitCannotBeAchievedError. + + def compact(traversal_ids, limit) + traversal_ids = compact_once(traversal_ids) while traversal_ids.size > limit + + traversal_ids + end + + # The compact_once method finds the most common namespace and compacts all children into an + # entry for that namespace. It then returns the compacted list of traversal_ids. + + def compact_once(traversal_ids) + most_common_namespace_path = find_most_common_namespace_path(traversal_ids) + + compacted_traversal_ids = traversal_ids.map do |traversal_id| + if starts_with?(traversal_id, most_common_namespace_path) + most_common_namespace_path + else + traversal_id + end + end + + compacted_traversal_ids.uniq + end + + # The validate method performs two checks on the compacted_traversal_ids + # 1. If there are redundant traversal_ids, for example [1,2,3,4] and [1,2,3] + # 2. If there are unexpected entries, meaning a traversal_id not present in the origin_project_traversal_ids + # If either case is found, it will raise an error + # Otherwise, it will return true + + def validate!(origin_project_traversal_ids, compacted_traversal_ids) + compacted_traversal_ids.each do |compacted_path| + # Fail if there are unexpected entries + raise UnexpectedCompactionEntry unless origin_project_traversal_ids.find do |original_path| + starts_with?(original_path, compacted_path) + end + + # Fail if there are redundant entries + compacted_traversal_ids.each do |inner_compacted_path| + next if inner_compacted_path == compacted_path + + raise RedundantCompactionEntry if starts_with?(inner_compacted_path, compacted_path) + end + end + + true + end + + private + + # find_most_common_namespace_path method takes an array of traversal_ids and returns the most common namespace + # For example, given the following traversal_ids it would return [1, 2] + # + # [ + # [1, 21], + # [1, 2, 3], + # [1, 2, 4], + # [1, 2, 5], + # [1, 2, 12, 13], + # [1, 6, 7], + # [1, 6, 8], + # [9, 10, 11] + # ] + + def find_most_common_namespace_path(traversal_ids) + # namespace_counts is a tally of the number of times each namespace path occurs in the traversal_ids array + # after removing any namespace paths that occur only once + # The namespace path is the traversal_id without the last element + namespace_counts = traversal_ids.each_with_object([]) do |traversal_id, result| + result << traversal_id[0...-1] if traversal_id.size > 1 + end.tally + + # namespace is the namespace path that occurs the most times in the traversal_ids array after removing + # any namespace paths that occur only once since compaction isn't necessary for those + namespace = namespace_counts.reject { |_k, v| v == 1 }.sort_by { |k, v| [k.size, v] }.reverse.to_h.first + + # if namespace is nil it means there are no more namespaces to compact so + # we raise a CompactionLimitCannotBeAchievedError + raise CompactionLimitCannotBeAchievedError if namespace.nil? + + # return the most common namespace path + namespace.first + end + + # The starts_with? method returns true if the first n elements of the traversal_id match the namespace_path + # For example: + # + # starts_with?([1, 2, 3], [1, 2]) #=> true + # starts_with?([1, 2], [1, 2, 3]) #=> false + # starts_with?([1, 2, 3], [1, 2, 3]) #=> true + # starts_with?([1, 2, 3], [1, 2, 3, 4]) #=> false + + def starts_with?(traversal_id, namespace_path) + traversal_id.first(namespace_path.length) == namespace_path + end + end + end + end +end diff --git a/spec/lib/gitlab/utils/traversal_id_compactor_spec.rb b/spec/lib/gitlab/utils/traversal_id_compactor_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..5612994374d6eb3b25b0c5b93d54e26ee95279d4 --- /dev/null +++ b/spec/lib/gitlab/utils/traversal_id_compactor_spec.rb @@ -0,0 +1,109 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::Utils::TraversalIdCompactor, feature_category: :secrets_management do + let(:traversal_ids) do + [ + [1, 21], + [1, 2, 3], + [1, 2, 4], + [1, 2, 5], + [1, 2, 12, 13], + [1, 6, 7], + [1, 6, 8], + [9, 10, 11] + ] + end + + let(:compactor) { described_class } + + describe '#compact' do + it 'compacts the array of traversal_ids using compact_once two times until the limit is reached' do + expect(compactor).to receive(:compact_once).twice.and_call_original + + result = compactor.compact(traversal_ids, 4) + + expect(result).to eq([ + [1, 21], + [1, 2], + [1, 6], + [9, 10, 11] + ]) + end + + it 'compacts the array of traversal_ids using compact_once three times until the limit is reached' do + expect(compactor).to receive(:compact_once).exactly(3).times.and_call_original + + result = compactor.compact(traversal_ids, 3) + + expect(result).to eq([ + [1], + [9, 10, 11] + ]) + end + + it 'compacts the array of traversal_ids using compact_once one time to reach the limit' do + traversal_ids = [ + [1, 2], + [1, 3], + [1, 4], + [5, 6], + [6, 7] + ] + + expect(compactor).to receive(:compact_once).once.and_call_original + + result = compactor.compact(traversal_ids, 3) + + expect(result).to eq([ + [1], + [5, 6], + [6, 7] + ]) + end + + it 'raises when the compaction limit can not be achieved' do + expect do + compactor.compact(traversal_ids, 1) + end.to raise_error(described_class::CompactionLimitCannotBeAchievedError) + end + end + + describe '#compact_once' do + it 'compacts the one most common namespace path and returns the newly compacted array of traversal_ids' do + result = compactor.compact_once(traversal_ids) + + expect(result).to eq([ + [1, 21], + [1, 2], + [1, 6, 7], + [1, 6, 8], + [9, 10, 11] + ]) + end + end + + describe '#validate!' do + it 'returns true when the compacted results are valid' do + result = compactor.compact(traversal_ids, 4) + expect(compactor.validate!(traversal_ids, result)).to be true + end + + it 'raises a RedundantCompactionEntry error when redundant entries are found' do + result = compactor.compact(traversal_ids, 4) + result << [1, 2, 3] + expect do + compactor.validate!(traversal_ids, result) + end.to raise_error(described_class::RedundantCompactionEntry) + end + + it 'raises an UnexpectedCompactionEntry error when an unexpected entry is found' do + result = compactor.compact(traversal_ids, 4) + result << [1, 3, 4] + expect do + compactor.validate!(traversal_ids, result) + end.to raise_error(described_class::UnexpectedCompactionEntry) + end + end +end diff --git a/spec/models/ci/job_token/authorizations_compactor_spec.rb b/spec/models/ci/job_token/authorizations_compactor_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..0ad1e92d2f237b1f98a85bab9efb612fc16565f7 --- /dev/null +++ b/spec/models/ci/job_token/authorizations_compactor_spec.rb @@ -0,0 +1,115 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Ci::JobToken::AuthorizationsCompactor, feature_category: :secrets_management do + let_it_be(:accessed_project) { create(:project) } + let(:compactor) { described_class.new(accessed_project.id) } + + # [1, 21], ns1, p1 + # [1, 2, 3], ns1, ns2, p2 + # [1, 2, 4], ns1, ns2, p3 + # [1, 2, 5], ns1, ns2, p4 + # [1, 2, 12, 13], ns1, ns2, ns3, p5 + # [1, 6, 7], ns1, ns4, p6 + # [1, 6, 8], ns1, ns4, p7 + # [9, 10, 11] ns5, ns6, p8 + + let_it_be(:ns1) { create(:group, name: 'ns1') } + let_it_be(:ns2) { create(:group, parent: ns1, name: 'ns2') } + let_it_be(:ns3) { create(:group, parent: ns2, name: 'ns3') } + let_it_be(:ns4) { create(:group, parent: ns1, name: 'ns4') } + let_it_be(:ns5) { create(:group, name: 'ns5') } + let_it_be(:ns6) { create(:group, parent: ns5, name: 'ns6') } + + let_it_be(:pns1) { create(:project_namespace, parent: ns1) } + let_it_be(:pns2) { create(:project_namespace, parent: ns2) } + let_it_be(:pns3) { create(:project_namespace, parent: ns2) } + let_it_be(:pns4) { create(:project_namespace, parent: ns2) } + let_it_be(:pns5) { create(:project_namespace, parent: ns3) } + let_it_be(:pns6) { create(:project_namespace, parent: ns4) } + let_it_be(:pns7) { create(:project_namespace, parent: ns4) } + let_it_be(:pns8) { create(:project_namespace, parent: ns6) } + + before do + origin_project_namespaces = [ + pns1, pns2, pns3, pns4, pns5, pns6, pns7, pns8 + ] + + origin_project_namespaces.each do |project_namespace| + create(:ci_job_token_authorization, origin_project: project_namespace.project, accessed_project: accessed_project, + last_authorized_at: 1.day.ago) + end + end + + describe '#compact' do + it 'compacts the allowlist groups and projects as expected for the given limit' do + compactor.compact(4) + + expect(compactor.allowlist_groups).to match_array([ns2, ns4]) + expect(compactor.allowlist_projects).to match_array([pns1.project, pns8.project]) + end + + it 'compacts the allowlist groups and projects as expected for the given limit' do + compactor.compact(3) + + expect(compactor.allowlist_groups).to match_array([ns1]) + expect(compactor.allowlist_projects).to match_array([pns8.project]) + end + + it 'raises when the limit cannot be achieved' do + expect do + compactor.compact(1) + end.to raise_error(Gitlab::Utils::TraversalIdCompactor::CompactionLimitCannotBeAchievedError) + end + + it 'raises when an unexpected compaction entry is found' do + allow(Gitlab::Utils::TraversalIdCompactor).to receive(:compact).and_wrap_original do |original_method, *args| + original_response = original_method.call(*args) + original_response << [1, 2, 3] + end + + expect { compactor.compact(5) }.to raise_error(Gitlab::Utils::TraversalIdCompactor::UnexpectedCompactionEntry) + end + + it 'raises when a redundant compaction entry is found' do + allow(Gitlab::Utils::TraversalIdCompactor).to receive(:compact).and_wrap_original do |original_method, *args| + original_response = original_method.call(*args) + original_response << original_response.last.first(2) + end + + expect { compactor.compact(5) }.to raise_error(Gitlab::Utils::TraversalIdCompactor::RedundantCompactionEntry) + end + + context 'with three top-level namespaces' do + # [1, 21], ns1, p1 + # [1, 2, 3], ns1, ns2, p2 + # [1, 2, 4], ns1, ns2, p3 + # [1, 2, 5], ns1, ns2, p4 + # [1, 2, 12, 13], ns1, ns2, ns3, p5 + # [1, 6, 7], ns1, ns4, p6 + # [1, 6, 8], ns1, ns4, p7 + # [9, 10, 11] ns5, ns6, p8 + # [14, 15] ns7, p9 + let(:ns7) { create(:group, name: 'ns7') } + let(:pns9) { create(:project_namespace, parent: ns7) } + + before do + create(:ci_job_token_authorization, origin_project: pns9.project, accessed_project: accessed_project, + last_authorized_at: 1.day.ago) + end + + it 'raises when the limit cannot be achieved' do + expect do + compactor.compact(2) + end.to raise_error(Gitlab::Utils::TraversalIdCompactor::CompactionLimitCannotBeAchievedError) + end + + it 'does not raise when the limit cannot be achieved' do + expect do + compactor.compact(3) + end.not_to raise_error + end + end + end +end