From 44742e13717dbf0c66723fe3f1fd288859f5e40f Mon Sep 17 00:00:00 2001 From: Darby Frey Date: Fri, 8 Nov 2024 13:38:06 -0600 Subject: [PATCH 1/9] Ci::JobToken::AuthorizationsCompactor --- .../ci/job_token/authorizations_compactor.rb | 127 ++++++++++++++++++ 1 file changed, 127 insertions(+) create mode 100644 app/models/ci/job_token/authorizations_compactor.rb diff --git a/app/models/ci/job_token/authorizations_compactor.rb b/app/models/ci/job_token/authorizations_compactor.rb new file mode 100644 index 00000000000000..4799b6475b1be3 --- /dev/null +++ b/app/models/ci/job_token/authorizations_compactor.rb @@ -0,0 +1,127 @@ +# frozen_string_literal: true + +module Ci + module JobToken + class AuthorizationsCompactor + attr_reader :allowlist_groups + + RECORD_LIMIT = 20_000 + + # This class compacts the entries for a project in Ci::JobToken::Authorization and + # compacts them into group and project entries so they can fit within the limits + # of the Ci::JobToken::Allowlist + # + # The class is initialized with a project_id and minimum_coverage_percent. Calling + # .process will execute the compactor. + # + # Coverage in this case is the percentage of projects in a group (and subgroups) + # that are present in the authorization logs + # For example: if a group and its subgroups contain 100 projects and 50 of those + # are in the authorization logs, the coverage for that group would be 50 (percent) + def initialize(project_id, minimum_coverage_percent) + @project_id = project_id + @allowlist_groups = [] + @minumum_coverage_percent = minimum_coverage_percent + end + + def process + loop do + # We run compact multiple times if necessary and decrease the + # minumum_coverage_percent on each iteration + puts "Running compaction with a #{@minumum_coverage_percent}% coverage minimum" + compact + @minumum_coverage_percent -= 10 + + # Stop if we hit 10% coverage + if @minumum_coverage_percent == 10 + puts "Unable to compact authorization list" + break + end + + # Stop if the sum of groups and remaining project is less than 200 + next unless (origin_project_ids.count + allowlist_groups.count) <= 200 + + ## Creation of allowlist records will happen here + puts "Authorization list compacted" + break + end + end + + def compact + origin_project_group_ids.each do |group_id| + group = ::Group.find(group_id) + + optmimal_group?(group) + end + end + + # origin_project_ids are the project_ids of all of the projects that have accessed the given project while the + # authorization log has been active. all of these projects need to be given access via the allowlist in some way + def origin_project_ids + @origin_project_ids ||= Ci::JobToken::Authorization + .where(accessed_project_id: @project_id) + .limit(RECORD_LIMIT) + .pluck(:origin_project_id) + end + + # origin_project_group_ids are the ids of all of the groups containing the projects listed in origin_project_ids + def origin_project_group_ids + Project.where(id: origin_project_ids).limit(RECORD_LIMIT).pluck(:namespace_id).uniq + end + + # calculate a percent of the group's project ids that exist in the origin_project_ids list + def calculate_coverage_for_group(group_project_ids) + intersection = group_project_ids & origin_project_ids + ((intersection.size.to_f / group_project_ids.size) * 100).round(2) + end + + def add_group_to_allowlist_groups(group, coverage) + @allowlist_groups << { + group_id: group.id, + group_path: group.full_path, + coverage: coverage + } + end + + def remove_group_project_ids(group_project_ids) + puts "Removing #{group_project_ids.count} project ids from origin_project_ids" + @origin_project_ids -= group_project_ids + end + + def all_group_project_ids(group) + group_ids = group.self_and_descendants.limit(RECORD_LIMIT).pluck(:id) + Project.where(namespace_id: group_ids).limit(RECORD_LIMIT).pluck(:id) + end + + # This method determins the optimal group level for the allowlist based on the given + # starting group using the following process: + # 1. For the given group, get ids for all projects that belong to it or any of its ancestors + # 2. Calculate the coverage percentage for the group (and ancestors) + # 3. If coverage is above the minimum, recursively check the parent groups to find the highest + # level group that is still above the minimum. Once this is determined + # 1. Add the group to the allowlist_groups array + # 2. Remove all project ids for that group and its ancestors from the origin_project_ids list + # 3. Return true + # 4. If the coverage is below the minimum, return false to indicate we couldn't find an optimal option + # for the given group and minumum_coverage_percent + def optmimal_group?(group) + parent = group.parent + group_project_ids = all_group_project_ids(group) + coverage = calculate_coverage_for_group(group_project_ids) + + return false unless coverage >= @minumum_coverage_percent + + if parent.nil? + add_group_to_allowlist_groups(group, coverage) + remove_group_project_ids(group_project_ids) + true + else + unless optmimal_group?(parent) + add_group_to_allowlist_groups(group, coverage) + remove_group_project_ids(group_project_ids) + end + end + end + end + end +end -- GitLab From 39209efd62122c3718c6ac4849d251ac3a644cf0 Mon Sep 17 00:00:00 2001 From: Darby Frey Date: Wed, 20 Nov 2024 16:01:34 -0600 Subject: [PATCH 2/9] Adding Traversal Id Compactor --- lib/gitlab/traversal_id_compactor.rb | 43 +++++++++++++ .../lib/gitlab/traversal_id_compactor_spec.rb | 61 +++++++++++++++++++ 2 files changed, 104 insertions(+) create mode 100644 lib/gitlab/traversal_id_compactor.rb create mode 100644 spec/lib/gitlab/traversal_id_compactor_spec.rb diff --git a/lib/gitlab/traversal_id_compactor.rb b/lib/gitlab/traversal_id_compactor.rb new file mode 100644 index 00000000000000..dcba418879da4e --- /dev/null +++ b/lib/gitlab/traversal_id_compactor.rb @@ -0,0 +1,43 @@ +module Gitlab + class TraversalIdCompactor + CompactionLimitCannotBeAchievedError = Class.new(StandardError) + + def compact(traversal_ids, limit) + while traversal_ids.size > limit + starting_size = traversal_ids.size + traversal_ids = compact_once(traversal_ids) + + raise CompactionLimitCannotBeAchievedError if starting_size == traversal_ids.size + end + + traversal_ids + end + + def compact_once(traversal_ids) + most_frequent_parent_prefix = most_frequent_parent(traversal_ids) + + compacted_traversal_ids = traversal_ids.map do |traversal_id| + # if traversal_id.first(most_frequent_parent.length) == most_frequent_parent + if starts_with?(traversal_id, most_frequent_parent_prefix) + most_frequent_parent_prefix + else + traversal_id + end + end + + compacted_traversal_ids.uniq + end + + private + + def most_frequent_parent(traversal_ids) + namespace_counts = traversal_ids.map{|t| t[0...-1] }.tally + + namespace_counts.sort_by{|k,v| v}.reverse.to_h.first.first + end + + def starts_with?(traversal_id, prefix) + traversal_id.first(prefix.length) == prefix + end + end +end \ No newline at end of file diff --git a/spec/lib/gitlab/traversal_id_compactor_spec.rb b/spec/lib/gitlab/traversal_id_compactor_spec.rb new file mode 100644 index 00000000000000..ea05cf4429a60c --- /dev/null +++ b/spec/lib/gitlab/traversal_id_compactor_spec.rb @@ -0,0 +1,61 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::TraversalIdCompactor, feature_category: :secrets_management do + + let(:traversal_ids) do + [ + [1,21], + [1,2,3], + [1,2,4], + [1,2,5], + [1,2,12,13], + [1,6,7], + [1,6,8], + [9,10,11] + ] + end + + let(:compactor){Gitlab::TraversalIdCompactor.new} + + describe '#compact' do + it 'commpacts' do + result = compactor.compact(traversal_ids, 4) + + expect(result).to eq([ + [1,21], + [1,2], + [1,6], + [9,10,11] + ]) + end + + it 'keeps compacting down to the limit' do + result = compactor.compact(traversal_ids, 3) + + expect(result).to eq([ + [1], + [9,10,11] + ]) + end + + it 'raises when it cant hit the limit' do + expect { compactor.compact(traversal_ids, 1) }.to raise_error(described_class::CompactionLimitCannotBeAchievedError) + end + end + + describe '#compact_once' do + it 'commpacts' do + result = compactor.compact_once(traversal_ids) + + expect(result).to eq([ + [1,21], + [1,2], + [1,6,7], + [1,6,8], + [9,10,11] + ]) + end + end +end \ No newline at end of file -- GitLab From efd38f4bdceb742d45d4e353c12e7f2bbc6b060f Mon Sep 17 00:00:00 2001 From: Darby Frey Date: Wed, 20 Nov 2024 16:45:56 -0600 Subject: [PATCH 3/9] Updating compactor to use TraversalIdCompactor --- .../ci/job_token/authorizations_compactor.rb | 132 ++++-------------- 1 file changed, 27 insertions(+), 105 deletions(-) diff --git a/app/models/ci/job_token/authorizations_compactor.rb b/app/models/ci/job_token/authorizations_compactor.rb index 4799b6475b1be3..26f8fb5914dc59 100644 --- a/app/models/ci/job_token/authorizations_compactor.rb +++ b/app/models/ci/job_token/authorizations_compactor.rb @@ -3,122 +3,44 @@ module Ci module JobToken class AuthorizationsCompactor - attr_reader :allowlist_groups - - RECORD_LIMIT = 20_000 - - # This class compacts the entries for a project in Ci::JobToken::Authorization and - # compacts them into group and project entries so they can fit within the limits - # of the Ci::JobToken::Allowlist - # - # The class is initialized with a project_id and minimum_coverage_percent. Calling - # .process will execute the compactor. - # - # Coverage in this case is the percentage of projects in a group (and subgroups) - # that are present in the authorization logs - # For example: if a group and its subgroups contain 100 projects and 50 of those - # are in the authorization logs, the coverage for that group would be 50 (percent) - def initialize(project_id, minimum_coverage_percent) + def initialize(project_id) @project_id = project_id @allowlist_groups = [] - @minumum_coverage_percent = minimum_coverage_percent + @allowlist_projects = [] end - def process - loop do - # We run compact multiple times if necessary and decrease the - # minumum_coverage_percent on each iteration - puts "Running compaction with a #{@minumum_coverage_percent}% coverage minimum" - compact - @minumum_coverage_percent -= 10 - - # Stop if we hit 10% coverage - if @minumum_coverage_percent == 10 - puts "Unable to compact authorization list" - break + def origin_project_traversal_ids + @origin_project_traversal_ids ||= begin + origin_project_traversal_ids = [] + Ci::JobToken::Authorization.where(access_project_id: project).each_batch(column: :origin_project_id) do |batch| + projects = Project.where(id: batch.select(:origin_project_id)).with_namespace + origin_project_traversal_ids += projects.map { |p| p.namespace.traversal_ids << p.project_namespace_id } end - - # Stop if the sum of groups and remaining project is less than 200 - next unless (origin_project_ids.count + allowlist_groups.count) <= 200 - - ## Creation of allowlist records will happen here - puts "Authorization list compacted" - break + origin_project_traversal_ids end end - def compact - origin_project_group_ids.each do |group_id| - group = ::Group.find(group_id) - - optmimal_group?(group) - end - end - - # origin_project_ids are the project_ids of all of the projects that have accessed the given project while the - # authorization log has been active. all of these projects need to be given access via the allowlist in some way - def origin_project_ids - @origin_project_ids ||= Ci::JobToken::Authorization - .where(accessed_project_id: @project_id) - .limit(RECORD_LIMIT) - .pluck(:origin_project_id) - end - - # origin_project_group_ids are the ids of all of the groups containing the projects listed in origin_project_ids - def origin_project_group_ids - Project.where(id: origin_project_ids).limit(RECORD_LIMIT).pluck(:namespace_id).uniq - end - - # calculate a percent of the group's project ids that exist in the origin_project_ids list - def calculate_coverage_for_group(group_project_ids) - intersection = group_project_ids & origin_project_ids - ((intersection.size.to_f / group_project_ids.size) * 100).round(2) - end - - def add_group_to_allowlist_groups(group, coverage) - @allowlist_groups << { - group_id: group.id, - group_path: group.full_path, - coverage: coverage - } - end - - def remove_group_project_ids(group_project_ids) - puts "Removing #{group_project_ids.count} project ids from origin_project_ids" - @origin_project_ids -= group_project_ids - end - - def all_group_project_ids(group) - group_ids = group.self_and_descendants.limit(RECORD_LIMIT).pluck(:id) - Project.where(namespace_id: group_ids).limit(RECORD_LIMIT).pluck(:id) - end + # def traversal_ids + # @traversal_ids ||= origin_projects.map { |p| p.namespace.traversal_ids << p.project_namespace_id } + # end - # This method determins the optimal group level for the allowlist based on the given - # starting group using the following process: - # 1. For the given group, get ids for all projects that belong to it or any of its ancestors - # 2. Calculate the coverage percentage for the group (and ancestors) - # 3. If coverage is above the minimum, recursively check the parent groups to find the highest - # level group that is still above the minimum. Once this is determined - # 1. Add the group to the allowlist_groups array - # 2. Remove all project ids for that group and its ancestors from the origin_project_ids list - # 3. Return true - # 4. If the coverage is below the minimum, return false to indicate we couldn't find an optimal option - # for the given group and minumum_coverage_percent - def optmimal_group?(group) - parent = group.parent - group_project_ids = all_group_project_ids(group) - coverage = calculate_coverage_for_group(group_project_ids) + def compact(limit) + compacted_traversal_ids = Gitlab::TraversalIdCompactor.new.compact(origin_project_traversal_ids, limit) + # do validation on compaction here + # ensure all entries in the compacted list exist in the original list + # ensure there is no duplication in the namespaces - return false unless coverage >= @minumum_coverage_percent + namespace_ids = compacted_traversal_ids.map { |t| t.last } + namespaces = Namespace.where(id: namespace_ids) - if parent.nil? - add_group_to_allowlist_groups(group, coverage) - remove_group_project_ids(group_project_ids) - true - else - unless optmimal_group?(parent) - add_group_to_allowlist_groups(group, coverage) - remove_group_project_ids(group_project_ids) + namespaces.each do |ns| + + if ns.project_namespace? + # add allow list entry for project + @allowlist_projects << ns.project + else + # add allow list entry for group + @allowlist_groups << ns end end end -- GitLab From 05c60dd6029f89cb8f90e030b477a05c919a336d Mon Sep 17 00:00:00 2001 From: Darby Frey Date: Thu, 21 Nov 2024 14:58:40 -0600 Subject: [PATCH 4/9] Refactoring and updates to TraversalIdCompactor --- lib/gitlab/traversal_id_compactor.rb | 43 -------- lib/gitlab/utils/traversal_id_compactor.rb | 104 ++++++++++++++++++ .../lib/gitlab/traversal_id_compactor_spec.rb | 61 ---------- .../utils/traversal_id_compactor_spec.rb | 66 +++++++++++ 4 files changed, 170 insertions(+), 104 deletions(-) delete mode 100644 lib/gitlab/traversal_id_compactor.rb create mode 100644 lib/gitlab/utils/traversal_id_compactor.rb delete mode 100644 spec/lib/gitlab/traversal_id_compactor_spec.rb create mode 100644 spec/lib/gitlab/utils/traversal_id_compactor_spec.rb diff --git a/lib/gitlab/traversal_id_compactor.rb b/lib/gitlab/traversal_id_compactor.rb deleted file mode 100644 index dcba418879da4e..00000000000000 --- a/lib/gitlab/traversal_id_compactor.rb +++ /dev/null @@ -1,43 +0,0 @@ -module Gitlab - class TraversalIdCompactor - CompactionLimitCannotBeAchievedError = Class.new(StandardError) - - def compact(traversal_ids, limit) - while traversal_ids.size > limit - starting_size = traversal_ids.size - traversal_ids = compact_once(traversal_ids) - - raise CompactionLimitCannotBeAchievedError if starting_size == traversal_ids.size - end - - traversal_ids - end - - def compact_once(traversal_ids) - most_frequent_parent_prefix = most_frequent_parent(traversal_ids) - - compacted_traversal_ids = traversal_ids.map do |traversal_id| - # if traversal_id.first(most_frequent_parent.length) == most_frequent_parent - if starts_with?(traversal_id, most_frequent_parent_prefix) - most_frequent_parent_prefix - else - traversal_id - end - end - - compacted_traversal_ids.uniq - end - - private - - def most_frequent_parent(traversal_ids) - namespace_counts = traversal_ids.map{|t| t[0...-1] }.tally - - namespace_counts.sort_by{|k,v| v}.reverse.to_h.first.first - end - - def starts_with?(traversal_id, prefix) - traversal_id.first(prefix.length) == prefix - end - end -end \ No newline at end of file diff --git a/lib/gitlab/utils/traversal_id_compactor.rb b/lib/gitlab/utils/traversal_id_compactor.rb new file mode 100644 index 00000000000000..8aef216226de11 --- /dev/null +++ b/lib/gitlab/utils/traversal_id_compactor.rb @@ -0,0 +1,104 @@ +# frozen_string_literal: true + +module Gitlab + module Utils + class TraversalIdCompactor + CompactionLimitCannotBeAchievedError = Class.new(StandardError) + + class << self + # This class compacts an array of traversal_ids by finding the most common namespace + # and consolidating all children into an entry for that namespace. It continues this process + # until the size of the final array is less than the limit. If it cannot achieve the limit + # it raises a CompactionLimitCannotBeAchievedError. + # + # The traversal_ids input will look like the array below where each element in the sub-arrays + # is a namespace id. + # + # [ + # [1, 21], + # [1, 2, 3], + # [1, 2, 4], + # [1, 2, 5], + # [1, 2, 12, 13], + # [1, 6, 7], + # [1, 6, 8], + # [9, 10, 11] + # ] + # + # The limit input is the maximum number of elements in the final array. + + # The compact method calls the compact_once method until the size of the final array is less + # than the limit. It then returns the compacted list of traversal_ids + # If it cannot achieve the limit it raises a CompactionLimitCannotBeAchievedError. + + def compact(traversal_ids, limit) + traversal_ids = compact_once(traversal_ids) while traversal_ids.size > limit + + traversal_ids + end + + # The compact_once method finds the most common namespace and compacts all children into an + # entry for that namespace. It then returns the compacted list of traversal_ids. + + def compact_once(traversal_ids) + most_common_namespace_path = find_most_common_namespace_path(traversal_ids) + + compacted_traversal_ids = traversal_ids.map do |traversal_id| + if starts_with?(traversal_id, most_common_namespace_path) + most_common_namespace_path + else + traversal_id + end + end + + compacted_traversal_ids.uniq + end + + private + + # find_most_common_namespace_path method takes an array of traversal_ids and returns the most common namespace + # For example, given the following traversal_ids it would return [1, 2] + # + # [ + # [1, 21], + # [1, 2, 3], + # [1, 2, 4], + # [1, 2, 5], + # [1, 2, 12, 13], + # [1, 6, 7], + # [1, 6, 8], + # [9, 10, 11] + # ] + + def find_most_common_namespace_path(traversal_ids) + # namespace_counts is a tally of the number of times each namespace path occurs in the traversal_ids array + # the namespace path is the traversal_id without the last element + namespace_counts = traversal_ids.map { |t| t[0...-1] }.tally # rubocop:disable Rails/Pluck -- traversal_ids is not an ActiveRecord relation + + # namespace is the namespace path that occurs the most times in the traversal_ids array after removing + # any namespace paths that occur only once since compaction isn't necessary for those + namespace = namespace_counts.reject { |_k, v| v == 1 }.sort_by { |k, v| [k.size, v] }.reverse.to_h.first + + # if namespace is nil it means there are no more namespaces to compact so + # we raise a CompactionLimitCannotBeAchievedError + raise CompactionLimitCannotBeAchievedError if namespace.nil? + + # return the most common namespace path + namespace.first + end + + # The starts_with? method returns true if the first n elements of the traversal_id match the namespace_path + # For example: + # + # starts_with?([1, 2, 3], [1, 2]) #=> true + # starts_with?([1, 2], [1, 2, 3]) #=> false + # starts_with?([1, 2, 3], [1, 2, 3]) #=> true + # starts_with?([1, 2, 3], [1, 2, 3, 4]) #=> false + + def starts_with?(traversal_id, namespace_path) + traversal_id.first(namespace_path.length) == namespace_path + end + end + end + end +end diff --git a/spec/lib/gitlab/traversal_id_compactor_spec.rb b/spec/lib/gitlab/traversal_id_compactor_spec.rb deleted file mode 100644 index ea05cf4429a60c..00000000000000 --- a/spec/lib/gitlab/traversal_id_compactor_spec.rb +++ /dev/null @@ -1,61 +0,0 @@ -# frozen_string_literal: true - -require 'spec_helper' - -RSpec.describe Gitlab::TraversalIdCompactor, feature_category: :secrets_management do - - let(:traversal_ids) do - [ - [1,21], - [1,2,3], - [1,2,4], - [1,2,5], - [1,2,12,13], - [1,6,7], - [1,6,8], - [9,10,11] - ] - end - - let(:compactor){Gitlab::TraversalIdCompactor.new} - - describe '#compact' do - it 'commpacts' do - result = compactor.compact(traversal_ids, 4) - - expect(result).to eq([ - [1,21], - [1,2], - [1,6], - [9,10,11] - ]) - end - - it 'keeps compacting down to the limit' do - result = compactor.compact(traversal_ids, 3) - - expect(result).to eq([ - [1], - [9,10,11] - ]) - end - - it 'raises when it cant hit the limit' do - expect { compactor.compact(traversal_ids, 1) }.to raise_error(described_class::CompactionLimitCannotBeAchievedError) - end - end - - describe '#compact_once' do - it 'commpacts' do - result = compactor.compact_once(traversal_ids) - - expect(result).to eq([ - [1,21], - [1,2], - [1,6,7], - [1,6,8], - [9,10,11] - ]) - end - end -end \ No newline at end of file diff --git a/spec/lib/gitlab/utils/traversal_id_compactor_spec.rb b/spec/lib/gitlab/utils/traversal_id_compactor_spec.rb new file mode 100644 index 00000000000000..86042cbb8fe4ec --- /dev/null +++ b/spec/lib/gitlab/utils/traversal_id_compactor_spec.rb @@ -0,0 +1,66 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::Utils::TraversalIdCompactor, feature_category: :secrets_management do + let(:traversal_ids) do + [ + [1, 21], + [1, 2, 3], + [1, 2, 4], + [1, 2, 5], + [1, 2, 12, 13], + [1, 6, 7], + [1, 6, 8], + [9, 10, 11] + ] + end + + let(:compactor) { described_class } + + describe '#compact' do + it 'compacts the array of traversal_ids using compact_once two times until the limit is reached' do + expect(compactor).to receive(:compact_once).twice.and_call_original + + result = compactor.compact(traversal_ids, 4) + + expect(result).to eq([ + [1, 21], + [1, 2], + [1, 6], + [9, 10, 11] + ]) + end + + it 'compacts the array of traversal_ids using compact_once three times until the limit is reached' do + expect(compactor).to receive(:compact_once).exactly(3).times.and_call_original + + result = compactor.compact(traversal_ids, 3) + + expect(result).to eq([ + [1], + [9, 10, 11] + ]) + end + + it 'raises when the compaction limit can not be achieved' do + expect do + compactor.compact(traversal_ids, 1) + end.to raise_error(described_class::CompactionLimitCannotBeAchievedError) + end + end + + describe '#compact_once' do + it 'compacts the one most common namespace path and returns the newly compacted array of traversal_ids' do + result = compactor.compact_once(traversal_ids) + + expect(result).to eq([ + [1, 21], + [1, 2], + [1, 6, 7], + [1, 6, 8], + [9, 10, 11] + ]) + end + end +end -- GitLab From 15ab09ce77b715694ace547eb2fbcb2d0c5b17df Mon Sep 17 00:00:00 2001 From: Darby Frey Date: Thu, 21 Nov 2024 18:29:58 -0600 Subject: [PATCH 5/9] Updating AuthorizationsCompactor to use TraversalIdCompactor --- Gemfile.lock | 5 +-- app/models/ci/job_token/authorization.rb | 1 + .../ci/job_token/authorizations_compactor.rb | 35 +++++++++++-------- 3 files changed, 22 insertions(+), 19 deletions(-) diff --git a/Gemfile.lock b/Gemfile.lock index 5bbec1508d5ee1..1e1391880264a1 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -97,9 +97,7 @@ PATH PATH remote: gems/gitlab-secret_detection specs: - gitlab-secret_detection (0.1.1) - grpc (= 1.63.0) - grpc-tools (= 1.63.0) + gitlab-secret_detection (0.1.0) parallel (~> 1.22) re2 (~> 2.4) toml-rb (~> 2.2) @@ -948,7 +946,6 @@ GEM google-protobuf (~> 3.18) googleapis-common-protos (~> 1.4) grpc (~> 1.41) - grpc-tools (1.63.0) gssapi (1.3.1) ffi (>= 1.0.1) guard (2.16.2) diff --git a/app/models/ci/job_token/authorization.rb b/app/models/ci/job_token/authorization.rb index 11093921c5b3b5..458ef6330fa33d 100644 --- a/app/models/ci/job_token/authorization.rb +++ b/app/models/ci/job_token/authorization.rb @@ -10,6 +10,7 @@ module Ci module JobToken class Authorization < Ci::ApplicationRecord extend Gitlab::InternalEventsTracking + include EachBatch self.table_name = 'ci_job_token_authorizations' diff --git a/app/models/ci/job_token/authorizations_compactor.rb b/app/models/ci/job_token/authorizations_compactor.rb index 26f8fb5914dc59..e21ff30a1dfed0 100644 --- a/app/models/ci/job_token/authorizations_compactor.rb +++ b/app/models/ci/job_token/authorizations_compactor.rb @@ -11,36 +11,41 @@ def initialize(project_id) def origin_project_traversal_ids @origin_project_traversal_ids ||= begin - origin_project_traversal_ids = [] - Ci::JobToken::Authorization.where(access_project_id: project).each_batch(column: :origin_project_id) do |batch| - projects = Project.where(id: batch.select(:origin_project_id)).with_namespace + origin_project_traversal_ids = [] + origin_project_id_batches = [] + + # Collecting id batches to avoid cross-database transactions. + Ci::JobToken::Authorization.where( + accessed_project_id: @project_id + ).each_batch(column: :origin_project_id) do |batch| + origin_project_id_batches << batch.pluck(:origin_project_id) # rubocop:disable Database/AvoidUsingPluckWithoutLimit -- pluck limited by batch size + end + + origin_project_id_batches.each do |batch| + projects = Project.where(id: batch).with_namespace origin_project_traversal_ids += projects.map { |p| p.namespace.traversal_ids << p.project_namespace_id } end + origin_project_traversal_ids end end - # def traversal_ids - # @traversal_ids ||= origin_projects.map { |p| p.namespace.traversal_ids << p.project_namespace_id } - # end - def compact(limit) - compacted_traversal_ids = Gitlab::TraversalIdCompactor.new.compact(origin_project_traversal_ids, limit) - # do validation on compaction here + compacted_traversal_ids = Gitlab::Utils::TraversalIdCompactor.compact(origin_project_traversal_ids, limit) + # TODO: validate compaction here # ensure all entries in the compacted list exist in the original list # ensure there is no duplication in the namespaces - namespace_ids = compacted_traversal_ids.map { |t| t.last } + namespace_ids = compacted_traversal_ids.map(&:last) namespaces = Namespace.where(id: namespace_ids) - namespaces.each do |ns| - - if ns.project_namespace? + namespaces.each do |namespace| + if namespace.project_namespace? # add allow list entry for project - @allowlist_projects << ns.project + @allowlist_projects << namespace.project else # add allow list entry for group - @allowlist_groups << ns + @allowlist_groups << namespace end end end -- GitLab From 58da75317eae5d56453594b173b65927e0f84c6c Mon Sep 17 00:00:00 2001 From: Darby Frey Date: Fri, 22 Nov 2024 15:52:01 -0600 Subject: [PATCH 6/9] Adding AuthorizationsCompactor spec and data checks to the compacted ID results --- .../ci/job_token/authorizations_compactor.rb | 32 +++++-- lib/gitlab/utils/traversal_id_compactor.rb | 7 +- .../utils/traversal_id_compactor_spec.rb | 20 +++++ .../authorizations_compactor_spec.rb | 84 +++++++++++++++++++ 4 files changed, 134 insertions(+), 9 deletions(-) create mode 100644 spec/models/ci/job_token/authorizations_compactor_spec.rb diff --git a/app/models/ci/job_token/authorizations_compactor.rb b/app/models/ci/job_token/authorizations_compactor.rb index e21ff30a1dfed0..d865a37a3cc56f 100644 --- a/app/models/ci/job_token/authorizations_compactor.rb +++ b/app/models/ci/job_token/authorizations_compactor.rb @@ -3,6 +3,11 @@ module Ci module JobToken class AuthorizationsCompactor + attr_reader :allowlist_groups, :allowlist_projects + + UnexpectedCompactionEntry = Class.new(StandardError) + RedundantCompactionEntry = Class.new(StandardError) + def initialize(project_id) @project_id = project_id @allowlist_groups = [] @@ -22,8 +27,8 @@ def origin_project_traversal_ids end origin_project_id_batches.each do |batch| - projects = Project.where(id: batch).with_namespace - origin_project_traversal_ids += projects.map { |p| p.namespace.traversal_ids << p.project_namespace_id } + projects = Project.where(id: batch) + origin_project_traversal_ids += projects.map { |p| p.project_namespace.traversal_ids } end origin_project_traversal_ids @@ -32,23 +37,36 @@ def origin_project_traversal_ids def compact(limit) compacted_traversal_ids = Gitlab::Utils::TraversalIdCompactor.compact(origin_project_traversal_ids, limit) - # TODO: validate compaction here - # ensure all entries in the compacted list exist in the original list - # ensure there is no duplication in the namespaces + + validate_compaction_entries(origin_project_traversal_ids, compacted_traversal_ids) namespace_ids = compacted_traversal_ids.map(&:last) namespaces = Namespace.where(id: namespace_ids) namespaces.each do |namespace| if namespace.project_namespace? - # add allow list entry for project @allowlist_projects << namespace.project else - # add allow list entry for group @allowlist_groups << namespace end end end + + def validate_compaction_entries(origin_project_traversal_ids, compacted_traversal_ids) + compacted_traversal_ids.each do |compacted_path| + # Fail if there are redundant entries, for example, [1,2,3,4] and [1,2,3] + compacted_traversal_ids.each do |inner_compacted_path| + next if inner_compacted_path == compacted_path + + raise RedundantCompactionEntry if inner_compacted_path.first(compacted_path.size) == compacted_path + end + + # Fail if there are unexpected entries, for example, [1,2,3,4] and [1,2,5] + raise UnexpectedCompactionEntry unless origin_project_traversal_ids.find do |original_path| + compacted_path.all? { |path| original_path.include?(path) } + end + end + end end end end diff --git a/lib/gitlab/utils/traversal_id_compactor.rb b/lib/gitlab/utils/traversal_id_compactor.rb index 8aef216226de11..60c80a65b99644 100644 --- a/lib/gitlab/utils/traversal_id_compactor.rb +++ b/lib/gitlab/utils/traversal_id_compactor.rb @@ -72,8 +72,11 @@ def compact_once(traversal_ids) def find_most_common_namespace_path(traversal_ids) # namespace_counts is a tally of the number of times each namespace path occurs in the traversal_ids array - # the namespace path is the traversal_id without the last element - namespace_counts = traversal_ids.map { |t| t[0...-1] }.tally # rubocop:disable Rails/Pluck -- traversal_ids is not an ActiveRecord relation + # after removing any namespace paths that occur only once + # The namespace path is the traversal_id without the last element + namespace_counts = traversal_ids.each_with_object([]) do |traversal_id, result| + result << traversal_id[0...-1] if traversal_id.size > 1 + end.tally # namespace is the namespace path that occurs the most times in the traversal_ids array after removing # any namespace paths that occur only once since compaction isn't necessary for those diff --git a/spec/lib/gitlab/utils/traversal_id_compactor_spec.rb b/spec/lib/gitlab/utils/traversal_id_compactor_spec.rb index 86042cbb8fe4ec..8c12daed03160a 100644 --- a/spec/lib/gitlab/utils/traversal_id_compactor_spec.rb +++ b/spec/lib/gitlab/utils/traversal_id_compactor_spec.rb @@ -43,6 +43,26 @@ ]) end + it 'compacts the array of traversal_ids using compact_once one time to reach the limit' do + traversal_ids = [ + [1, 2], + [1, 3], + [1, 4], + [5, 6], + [6, 7] + ] + + expect(compactor).to receive(:compact_once).once.and_call_original + + result = compactor.compact(traversal_ids, 3) + + expect(result).to eq([ + [1], + [5, 6], + [6, 7] + ]) + end + it 'raises when the compaction limit can not be achieved' do expect do compactor.compact(traversal_ids, 1) diff --git a/spec/models/ci/job_token/authorizations_compactor_spec.rb b/spec/models/ci/job_token/authorizations_compactor_spec.rb new file mode 100644 index 00000000000000..127e594306178c --- /dev/null +++ b/spec/models/ci/job_token/authorizations_compactor_spec.rb @@ -0,0 +1,84 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Ci::JobToken::AuthorizationsCompactor, feature_category: :secrets_management do + let_it_be(:accessed_project) { create(:project) } + let(:compactor) { described_class.new(accessed_project.id) } + + # [1, 21], ns1, p1 + # [1, 2, 3], ns1, ns2, p2 + # [1, 2, 4], ns1, ns2, p3 + # [1, 2, 5], ns1, ns2, p4 + # [1, 2, 12, 13], ns1, ns2, ns3, p5 + # [1, 6, 7], ns1, ns4, p6 + # [1, 6, 8], ns1, ns4, p7 + # [9, 10, 11] ns5, ns6, p8 + + let_it_be(:ns1) { create(:group, name: 'ns1') } + let_it_be(:ns2) { create(:group, parent: ns1, name: 'ns2') } + let_it_be(:ns3) { create(:group, parent: ns2, name: 'ns3') } + let_it_be(:ns4) { create(:group, parent: ns1, name: 'ns4') } + let_it_be(:ns5) { create(:group, name: 'ns5') } + let_it_be(:ns6) { create(:group, parent: ns5, name: 'ns6') } + + let_it_be(:pns1) { create(:project_namespace, parent: ns1) } + let_it_be(:pns2) { create(:project_namespace, parent: ns2) } + let_it_be(:pns3) { create(:project_namespace, parent: ns2) } + let_it_be(:pns4) { create(:project_namespace, parent: ns2) } + let_it_be(:pns5) { create(:project_namespace, parent: ns3) } + let_it_be(:pns6) { create(:project_namespace, parent: ns4) } + let_it_be(:pns7) { create(:project_namespace, parent: ns4) } + let_it_be(:pns8) { create(:project_namespace, parent: ns6) } + + before do + origin_project_namespaces = [ + pns1, pns2, pns3, pns4, pns5, pns6, pns7, pns8 + ] + + origin_project_namespaces.each do |project_namespace| + create(:ci_job_token_authorization, origin_project: project_namespace.project, accessed_project: accessed_project, + last_authorized_at: 1.day.ago) + end + end + + describe '#compact' do + it 'compacts the allowlist groups and projects as expected for the given limit' do + compactor.compact(4) + + expect(compactor.allowlist_groups).to match_array([ns2, ns4]) + expect(compactor.allowlist_projects).to match_array([pns1.project, pns8.project]) + end + + it 'compacts the allowlist groups and projects as expected for the given limit' do + compactor.compact(3) + + expect(compactor.allowlist_groups).to match_array([ns1]) + expect(compactor.allowlist_projects).to match_array([pns8.project]) + end + + it 'raises when the limit cannot be achieved' do + expect do + compactor.compact(1) + end.to raise_error(Gitlab::Utils::TraversalIdCompactor::CompactionLimitCannotBeAchievedError) + end + + it 'raises when an unexpected compaction entry is found' do + allow(Gitlab::Utils::TraversalIdCompactor).to receive(:compact).and_wrap_original do |original_method, *args| + original_response = original_method.call(*args) + original_response << [1, 2, 3] + end + + expect { compactor.compact(5) }.to raise_error(described_class::UnexpectedCompactionEntry) + end + + it 'raises when a redundant compaction entry is found' do + allow(Gitlab::Utils::TraversalIdCompactor).to receive(:compact).and_wrap_original do |original_method, *args| + original_response = original_method.call(*args) + original_response << original_response.last.first(2) + end + + expect { compactor.compact(5) }.to raise_error(described_class::RedundantCompactionEntry) + end + end +end -- GitLab From c98ddd8e8ce124076886354378e8d52f7767c1e7 Mon Sep 17 00:00:00 2001 From: Darby Frey Date: Mon, 25 Nov 2024 08:17:36 -0600 Subject: [PATCH 7/9] Refactored validation method --- .../ci/job_token/authorizations_compactor.rb | 18 +------------ lib/gitlab/utils/traversal_id_compactor.rb | 26 +++++++++++++++++++ .../utils/traversal_id_compactor_spec.rb | 23 ++++++++++++++++ .../authorizations_compactor_spec.rb | 4 +-- 4 files changed, 52 insertions(+), 19 deletions(-) diff --git a/app/models/ci/job_token/authorizations_compactor.rb b/app/models/ci/job_token/authorizations_compactor.rb index d865a37a3cc56f..12615949432cec 100644 --- a/app/models/ci/job_token/authorizations_compactor.rb +++ b/app/models/ci/job_token/authorizations_compactor.rb @@ -38,7 +38,7 @@ def origin_project_traversal_ids def compact(limit) compacted_traversal_ids = Gitlab::Utils::TraversalIdCompactor.compact(origin_project_traversal_ids, limit) - validate_compaction_entries(origin_project_traversal_ids, compacted_traversal_ids) + Gitlab::Utils::TraversalIdCompactor.validate(origin_project_traversal_ids, compacted_traversal_ids) namespace_ids = compacted_traversal_ids.map(&:last) namespaces = Namespace.where(id: namespace_ids) @@ -51,22 +51,6 @@ def compact(limit) end end end - - def validate_compaction_entries(origin_project_traversal_ids, compacted_traversal_ids) - compacted_traversal_ids.each do |compacted_path| - # Fail if there are redundant entries, for example, [1,2,3,4] and [1,2,3] - compacted_traversal_ids.each do |inner_compacted_path| - next if inner_compacted_path == compacted_path - - raise RedundantCompactionEntry if inner_compacted_path.first(compacted_path.size) == compacted_path - end - - # Fail if there are unexpected entries, for example, [1,2,3,4] and [1,2,5] - raise UnexpectedCompactionEntry unless origin_project_traversal_ids.find do |original_path| - compacted_path.all? { |path| original_path.include?(path) } - end - end - end end end end diff --git a/lib/gitlab/utils/traversal_id_compactor.rb b/lib/gitlab/utils/traversal_id_compactor.rb index 60c80a65b99644..8858f05c01a08d 100644 --- a/lib/gitlab/utils/traversal_id_compactor.rb +++ b/lib/gitlab/utils/traversal_id_compactor.rb @@ -4,6 +4,8 @@ module Gitlab module Utils class TraversalIdCompactor CompactionLimitCannotBeAchievedError = Class.new(StandardError) + RedundantCompactionEntry = Class.new(StandardError) + UnexpectedCompactionEntry = Class.new(StandardError) class << self # This class compacts an array of traversal_ids by finding the most common namespace @@ -54,6 +56,30 @@ def compact_once(traversal_ids) compacted_traversal_ids.uniq end + # The validate method performs two checks on the compacted_traversal_ids + # 1. If there are redundant traversal_ids, for example [1,2,3,4] and [1,2,3] + # 2. If there are unexpected entries, meaning a traversal_id not present in the origin_project_traversal_ids + # If either case is found, it will raise an error + # Otherwise, it will return true + + def validate(origin_project_traversal_ids, compacted_traversal_ids) + compacted_traversal_ids.each do |compacted_path| + # Fail if there are unexpected entries + raise UnexpectedCompactionEntry unless origin_project_traversal_ids.find do |original_path| + starts_with?(original_path, compacted_path) + end + + # Fail if there are redundant entries + compacted_traversal_ids.each do |inner_compacted_path| + next if inner_compacted_path == compacted_path + + raise RedundantCompactionEntry if starts_with?(inner_compacted_path, compacted_path) + end + end + + true + end + private # find_most_common_namespace_path method takes an array of traversal_ids and returns the most common namespace diff --git a/spec/lib/gitlab/utils/traversal_id_compactor_spec.rb b/spec/lib/gitlab/utils/traversal_id_compactor_spec.rb index 8c12daed03160a..8f3ad907c8363b 100644 --- a/spec/lib/gitlab/utils/traversal_id_compactor_spec.rb +++ b/spec/lib/gitlab/utils/traversal_id_compactor_spec.rb @@ -83,4 +83,27 @@ ]) end end + + describe '#validate' do + it 'returns true when the compacted results are valid' do + result = compactor.compact(traversal_ids, 4) + expect(compactor.validate(traversal_ids, result)).to be true + end + + it 'raises a RedundantCompactionEntry error when redundant entries are found' do + result = compactor.compact(traversal_ids, 4) + result << [1, 2, 3] + expect do + compactor.validate(traversal_ids, result) + end.to raise_error(described_class::RedundantCompactionEntry) + end + + it 'raises an UnexpectedCompactionEntry error when an unexpected entry is found' do + result = compactor.compact(traversal_ids, 4) + result << [1, 3, 4] + expect do + compactor.validate(traversal_ids, result) + end.to raise_error(described_class::UnexpectedCompactionEntry) + end + end end diff --git a/spec/models/ci/job_token/authorizations_compactor_spec.rb b/spec/models/ci/job_token/authorizations_compactor_spec.rb index 127e594306178c..00f05396cdb282 100644 --- a/spec/models/ci/job_token/authorizations_compactor_spec.rb +++ b/spec/models/ci/job_token/authorizations_compactor_spec.rb @@ -69,7 +69,7 @@ original_response << [1, 2, 3] end - expect { compactor.compact(5) }.to raise_error(described_class::UnexpectedCompactionEntry) + expect { compactor.compact(5) }.to raise_error(Gitlab::Utils::TraversalIdCompactor::UnexpectedCompactionEntry) end it 'raises when a redundant compaction entry is found' do @@ -78,7 +78,7 @@ original_response << original_response.last.first(2) end - expect { compactor.compact(5) }.to raise_error(described_class::RedundantCompactionEntry) + expect { compactor.compact(5) }.to raise_error(Gitlab::Utils::TraversalIdCompactor::RedundantCompactionEntry) end end end -- GitLab From d7b4b937e11b4dd9cd19ccf98547f5813c1b118e Mon Sep 17 00:00:00 2001 From: Darby Frey Date: Mon, 2 Dec 2024 12:19:10 -0600 Subject: [PATCH 8/9] Undo Gemfile.lock changes --- Gemfile.lock | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Gemfile.lock b/Gemfile.lock index 1e1391880264a1..5bbec1508d5ee1 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -97,7 +97,9 @@ PATH PATH remote: gems/gitlab-secret_detection specs: - gitlab-secret_detection (0.1.0) + gitlab-secret_detection (0.1.1) + grpc (= 1.63.0) + grpc-tools (= 1.63.0) parallel (~> 1.22) re2 (~> 2.4) toml-rb (~> 2.2) @@ -946,6 +948,7 @@ GEM google-protobuf (~> 3.18) googleapis-common-protos (~> 1.4) grpc (~> 1.41) + grpc-tools (1.63.0) gssapi (1.3.1) ffi (>= 1.0.1) guard (2.16.2) -- GitLab From 4d65a586e3106033aa7c6f816f5e182d727a9533 Mon Sep 17 00:00:00 2001 From: Darby Frey Date: Tue, 10 Dec 2024 10:05:49 -0600 Subject: [PATCH 9/9] Updated naming, added tests --- .../ci/job_token/authorizations_compactor.rb | 2 +- lib/gitlab/utils/traversal_id_compactor.rb | 2 +- .../utils/traversal_id_compactor_spec.rb | 8 ++--- .../authorizations_compactor_spec.rb | 31 +++++++++++++++++++ 4 files changed, 37 insertions(+), 6 deletions(-) diff --git a/app/models/ci/job_token/authorizations_compactor.rb b/app/models/ci/job_token/authorizations_compactor.rb index 12615949432cec..df24293922b3f9 100644 --- a/app/models/ci/job_token/authorizations_compactor.rb +++ b/app/models/ci/job_token/authorizations_compactor.rb @@ -38,7 +38,7 @@ def origin_project_traversal_ids def compact(limit) compacted_traversal_ids = Gitlab::Utils::TraversalIdCompactor.compact(origin_project_traversal_ids, limit) - Gitlab::Utils::TraversalIdCompactor.validate(origin_project_traversal_ids, compacted_traversal_ids) + Gitlab::Utils::TraversalIdCompactor.validate!(origin_project_traversal_ids, compacted_traversal_ids) namespace_ids = compacted_traversal_ids.map(&:last) namespaces = Namespace.where(id: namespace_ids) diff --git a/lib/gitlab/utils/traversal_id_compactor.rb b/lib/gitlab/utils/traversal_id_compactor.rb index 8858f05c01a08d..c256ec401aeafc 100644 --- a/lib/gitlab/utils/traversal_id_compactor.rb +++ b/lib/gitlab/utils/traversal_id_compactor.rb @@ -62,7 +62,7 @@ def compact_once(traversal_ids) # If either case is found, it will raise an error # Otherwise, it will return true - def validate(origin_project_traversal_ids, compacted_traversal_ids) + def validate!(origin_project_traversal_ids, compacted_traversal_ids) compacted_traversal_ids.each do |compacted_path| # Fail if there are unexpected entries raise UnexpectedCompactionEntry unless origin_project_traversal_ids.find do |original_path| diff --git a/spec/lib/gitlab/utils/traversal_id_compactor_spec.rb b/spec/lib/gitlab/utils/traversal_id_compactor_spec.rb index 8f3ad907c8363b..5612994374d6eb 100644 --- a/spec/lib/gitlab/utils/traversal_id_compactor_spec.rb +++ b/spec/lib/gitlab/utils/traversal_id_compactor_spec.rb @@ -84,17 +84,17 @@ end end - describe '#validate' do + describe '#validate!' do it 'returns true when the compacted results are valid' do result = compactor.compact(traversal_ids, 4) - expect(compactor.validate(traversal_ids, result)).to be true + expect(compactor.validate!(traversal_ids, result)).to be true end it 'raises a RedundantCompactionEntry error when redundant entries are found' do result = compactor.compact(traversal_ids, 4) result << [1, 2, 3] expect do - compactor.validate(traversal_ids, result) + compactor.validate!(traversal_ids, result) end.to raise_error(described_class::RedundantCompactionEntry) end @@ -102,7 +102,7 @@ result = compactor.compact(traversal_ids, 4) result << [1, 3, 4] expect do - compactor.validate(traversal_ids, result) + compactor.validate!(traversal_ids, result) end.to raise_error(described_class::UnexpectedCompactionEntry) end end diff --git a/spec/models/ci/job_token/authorizations_compactor_spec.rb b/spec/models/ci/job_token/authorizations_compactor_spec.rb index 00f05396cdb282..0ad1e92d2f237b 100644 --- a/spec/models/ci/job_token/authorizations_compactor_spec.rb +++ b/spec/models/ci/job_token/authorizations_compactor_spec.rb @@ -80,5 +80,36 @@ expect { compactor.compact(5) }.to raise_error(Gitlab::Utils::TraversalIdCompactor::RedundantCompactionEntry) end + + context 'with three top-level namespaces' do + # [1, 21], ns1, p1 + # [1, 2, 3], ns1, ns2, p2 + # [1, 2, 4], ns1, ns2, p3 + # [1, 2, 5], ns1, ns2, p4 + # [1, 2, 12, 13], ns1, ns2, ns3, p5 + # [1, 6, 7], ns1, ns4, p6 + # [1, 6, 8], ns1, ns4, p7 + # [9, 10, 11] ns5, ns6, p8 + # [14, 15] ns7, p9 + let(:ns7) { create(:group, name: 'ns7') } + let(:pns9) { create(:project_namespace, parent: ns7) } + + before do + create(:ci_job_token_authorization, origin_project: pns9.project, accessed_project: accessed_project, + last_authorized_at: 1.day.ago) + end + + it 'raises when the limit cannot be achieved' do + expect do + compactor.compact(2) + end.to raise_error(Gitlab::Utils::TraversalIdCompactor::CompactionLimitCannotBeAchievedError) + end + + it 'does not raise when the limit cannot be achieved' do + expect do + compactor.compact(3) + end.not_to raise_error + end + end end end -- GitLab