From e3172482bb23e0ac007bb0590bc1927631f06f85 Mon Sep 17 00:00:00 2001 From: Pedro Pombeiro Date: Tue, 12 Oct 2021 14:30:00 +0200 Subject: [PATCH 01/13] Remove unused call to factory in specs --- spec/models/project_spec.rb | 1 - 1 file changed, 1 deletion(-) diff --git a/spec/models/project_spec.rb b/spec/models/project_spec.rb index 8a4e485a298cf8..e37560e1fad4ed 100644 --- a/spec/models/project_spec.rb +++ b/spec/models/project_spec.rb @@ -7193,7 +7193,6 @@ def has_external_wiki let(:super_group) { create(:group) } it 'returns both group deploy keys' do - super_group = create(:group) super_group_deploy_key = create(:group_deploy_key, groups: [super_group]) group.update!(parent: super_group) -- GitLab From a93d6b9d1f6da3c7db95dedd4779b7064a0679b7 Mon Sep 17 00:00:00 2001 From: Pedro Pombeiro Date: Fri, 8 Oct 2021 19:22:07 +0200 Subject: [PATCH 02/13] Introduce namespace/project hierarchy tables In CI database to allow splitting from main database Changelog: other --- app/models/ci/namespace_hierarchy.rb | 8 ++ app/models/ci/project_hierarchy.rb | 8 ++ ...1140930_create_ci_namespace_hierarchies.rb | 15 +++ ...011140931_create_ci_project_hierarchies.rb | 15 +++ ...1011141240_add_ci_namespace_hierarchies.rb | 28 ++++++ ...211011141241_add_ci_project_hierarchies.rb | 28 ++++++ db/schema_migrations/20211011140930 | 1 + db/schema_migrations/20211011140931 | 1 + db/schema_migrations/20211011141240 | 1 + db/schema_migrations/20211011141241 | 1 + db/structure.sql | 48 +++++++++ .../gitlab/usage_data_non_sql_metrics_spec.rb | 2 +- .../backfill_namespace_hierarchies.rb | 76 ++++++++++++++ .../backfill_project_hierarchies.rb | 98 +++++++++++++++++++ lib/gitlab/database/gitlab_schemas.yml | 2 + .../backfill_namespace_hierarchies_spec.rb | 42 ++++++++ .../backfill_project_hierarchies_spec.rb | 46 +++++++++ 17 files changed, 419 insertions(+), 1 deletion(-) create mode 100644 app/models/ci/namespace_hierarchy.rb create mode 100644 app/models/ci/project_hierarchy.rb create mode 100644 db/migrate/20211011140930_create_ci_namespace_hierarchies.rb create mode 100644 db/migrate/20211011140931_create_ci_project_hierarchies.rb create mode 100644 db/post_migrate/20211011141240_add_ci_namespace_hierarchies.rb create mode 100644 db/post_migrate/20211011141241_add_ci_project_hierarchies.rb create mode 100644 db/schema_migrations/20211011140930 create mode 100644 db/schema_migrations/20211011140931 create mode 100644 db/schema_migrations/20211011141240 create mode 100644 db/schema_migrations/20211011141241 create mode 100644 lib/gitlab/background_migration/backfill_namespace_hierarchies.rb create mode 100644 lib/gitlab/background_migration/backfill_project_hierarchies.rb create mode 100644 spec/lib/gitlab/background_migration/backfill_namespace_hierarchies_spec.rb create mode 100644 spec/lib/gitlab/background_migration/backfill_project_hierarchies_spec.rb diff --git a/app/models/ci/namespace_hierarchy.rb b/app/models/ci/namespace_hierarchy.rb new file mode 100644 index 00000000000000..4db07b477d3e72 --- /dev/null +++ b/app/models/ci/namespace_hierarchy.rb @@ -0,0 +1,8 @@ +# frozen_string_literal: true + +module Ci + # This model represents a record in a shadow table of the main database's namespaces table. + # It allows us to navigate the namespace hierarchy on the ci database without resorting to a JOIN. + class NamespaceHierarchy < ApplicationRecord + end +end diff --git a/app/models/ci/project_hierarchy.rb b/app/models/ci/project_hierarchy.rb new file mode 100644 index 00000000000000..5199924c3d0201 --- /dev/null +++ b/app/models/ci/project_hierarchy.rb @@ -0,0 +1,8 @@ +# frozen_string_literal: true + +module Ci + # This model represents a shadow table of the main database's namespaces table. + # It allows us to navigate the namespace hierarchy on the ci database for a project without resorting to a JOIN. + class ProjectHierarchy < ApplicationRecord + end +end diff --git a/db/migrate/20211011140930_create_ci_namespace_hierarchies.rb b/db/migrate/20211011140930_create_ci_namespace_hierarchies.rb new file mode 100644 index 00000000000000..a0f1999dc7a82e --- /dev/null +++ b/db/migrate/20211011140930_create_ci_namespace_hierarchies.rb @@ -0,0 +1,15 @@ +# frozen_string_literal: true + +class CreateCiNamespaceHierarchies < Gitlab::Database::Migration[1.0] + TABLE_NAME = :ci_namespace_hierarchies + INDEX_NAME = "index_gin_#{TABLE_NAME}" + + def change + create_table TABLE_NAME, id: false do |t| + t.references :namespace, primary_key: true, null: false, index: false, foreign_key: { on_delete: :cascade } + t.integer :traversal_ids, array: true, default: [], null: false + + t.index :traversal_ids, name: INDEX_NAME, using: :gin + end + end +end diff --git a/db/migrate/20211011140931_create_ci_project_hierarchies.rb b/db/migrate/20211011140931_create_ci_project_hierarchies.rb new file mode 100644 index 00000000000000..595e914ec71128 --- /dev/null +++ b/db/migrate/20211011140931_create_ci_project_hierarchies.rb @@ -0,0 +1,15 @@ +# frozen_string_literal: true + +class CreateCiProjectHierarchies < Gitlab::Database::Migration[1.0] + TABLE_NAME = :ci_project_hierarchies + INDEX_NAME = "index_gin_#{TABLE_NAME}" + + def change + create_table TABLE_NAME, id: false do |t| + t.references :project, primary_key: true, null: false, index: false, foreign_key: { on_delete: :cascade } + t.integer :traversal_ids, array: true, default: [], null: false + + t.index :traversal_ids, name: INDEX_NAME, using: :gin + end + end +end diff --git a/db/post_migrate/20211011141240_add_ci_namespace_hierarchies.rb b/db/post_migrate/20211011141240_add_ci_namespace_hierarchies.rb new file mode 100644 index 00000000000000..312ed177acfe33 --- /dev/null +++ b/db/post_migrate/20211011141240_add_ci_namespace_hierarchies.rb @@ -0,0 +1,28 @@ +# frozen_string_literal: true + +# See https://docs.gitlab.com/ee/development/migration_style_guide.html +# for more information on how to write migrations for GitLab. + +class AddCiNamespaceHierarchies < Gitlab::Database::Migration[1.0] + MIGRATION = 'BackfillNamespaceHierarchies' + BATCH_SIZE = 1_000 + SUB_BATCH_SIZE = 100 + DELAY_INTERVAL = 2.minutes + + disable_ddl_transaction! + + def up + queue_background_migration_jobs_by_range_at_intervals( + Gitlab::BackgroundMigration::BackfillNamespaceHierarchies::Namespace.base_query, + MIGRATION, + DELAY_INTERVAL, + batch_size: BATCH_SIZE, + other_job_arguments: [SUB_BATCH_SIZE], + track_jobs: true + ) + end + + def down + execute('DELETE FROM ci_namespace_hierarchies') + end +end diff --git a/db/post_migrate/20211011141241_add_ci_project_hierarchies.rb b/db/post_migrate/20211011141241_add_ci_project_hierarchies.rb new file mode 100644 index 00000000000000..11d4d1e8434ffa --- /dev/null +++ b/db/post_migrate/20211011141241_add_ci_project_hierarchies.rb @@ -0,0 +1,28 @@ +# frozen_string_literal: true + +# See https://docs.gitlab.com/ee/development/migration_style_guide.html +# for more information on how to write migrations for GitLab. + +class AddCiProjectHierarchies < Gitlab::Database::Migration[1.0] + MIGRATION = 'BackfillProjectHierarchies' + BATCH_SIZE = 1_000 + SUB_BATCH_SIZE = 100 + DELAY_INTERVAL = 2.minutes + + disable_ddl_transaction! + + def up + queue_background_migration_jobs_by_range_at_intervals( + Gitlab::BackgroundMigration::BackfillProjectHierarchies::Project.base_query, + MIGRATION, + DELAY_INTERVAL, + batch_size: BATCH_SIZE, + other_job_arguments: [SUB_BATCH_SIZE], + track_jobs: true + ) + end + + def down + execute('DELETE FROM ci_project_hierarchies') + end +end diff --git a/db/schema_migrations/20211011140930 b/db/schema_migrations/20211011140930 new file mode 100644 index 00000000000000..6347ee5d51d84d --- /dev/null +++ b/db/schema_migrations/20211011140930 @@ -0,0 +1 @@ +cdae819e8de3b5ad721014376bfd9af97a45e953e2d345daf62784f986a5eb31 \ No newline at end of file diff --git a/db/schema_migrations/20211011140931 b/db/schema_migrations/20211011140931 new file mode 100644 index 00000000000000..c959d97074e02d --- /dev/null +++ b/db/schema_migrations/20211011140931 @@ -0,0 +1 @@ +7e51eb4443fd74da9bef4d9c1c3cc40376c311abbc05ca7871f725fada79b48a \ No newline at end of file diff --git a/db/schema_migrations/20211011141240 b/db/schema_migrations/20211011141240 new file mode 100644 index 00000000000000..ee736c5e0267cf --- /dev/null +++ b/db/schema_migrations/20211011141240 @@ -0,0 +1 @@ +f5dad108d30fe1d3a36027cd3f6fd7db006444892e5ec145921117d490be11c7 \ No newline at end of file diff --git a/db/schema_migrations/20211011141241 b/db/schema_migrations/20211011141241 new file mode 100644 index 00000000000000..33c6386425da19 --- /dev/null +++ b/db/schema_migrations/20211011141241 @@ -0,0 +1 @@ +347aa02789f2016271cd669de005bb722283a4c59e48a2ca831342e123e38b27 \ No newline at end of file diff --git a/db/structure.sql b/db/structure.sql index 7e366cd67d3a6b..1b145e8cf130b6 100644 --- a/db/structure.sql +++ b/db/structure.sql @@ -11740,6 +11740,20 @@ CREATE SEQUENCE ci_minutes_additional_packs_id_seq ALTER SEQUENCE ci_minutes_additional_packs_id_seq OWNED BY ci_minutes_additional_packs.id; +CREATE TABLE ci_namespace_hierarchies ( + namespace_id bigint NOT NULL, + traversal_ids integer[] DEFAULT '{}'::integer[] NOT NULL +); + +CREATE SEQUENCE ci_namespace_hierarchies_namespace_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + +ALTER SEQUENCE ci_namespace_hierarchies_namespace_id_seq OWNED BY ci_namespace_hierarchies.namespace_id; + CREATE TABLE ci_namespace_monthly_usages ( id bigint NOT NULL, namespace_id bigint NOT NULL, @@ -11987,6 +12001,20 @@ CREATE SEQUENCE ci_platform_metrics_id_seq ALTER SEQUENCE ci_platform_metrics_id_seq OWNED BY ci_platform_metrics.id; +CREATE TABLE ci_project_hierarchies ( + project_id bigint NOT NULL, + traversal_ids integer[] DEFAULT '{}'::integer[] NOT NULL +); + +CREATE SEQUENCE ci_project_hierarchies_project_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + +ALTER SEQUENCE ci_project_hierarchies_project_id_seq OWNED BY ci_project_hierarchies.project_id; + CREATE TABLE ci_project_monthly_usages ( id bigint NOT NULL, project_id bigint NOT NULL, @@ -21239,6 +21267,8 @@ ALTER TABLE ONLY ci_job_variables ALTER COLUMN id SET DEFAULT nextval('ci_job_va ALTER TABLE ONLY ci_minutes_additional_packs ALTER COLUMN id SET DEFAULT nextval('ci_minutes_additional_packs_id_seq'::regclass); +ALTER TABLE ONLY ci_namespace_hierarchies ALTER COLUMN namespace_id SET DEFAULT nextval('ci_namespace_hierarchies_namespace_id_seq'::regclass); + ALTER TABLE ONLY ci_namespace_monthly_usages ALTER COLUMN id SET DEFAULT nextval('ci_namespace_monthly_usages_id_seq'::regclass); ALTER TABLE ONLY ci_pending_builds ALTER COLUMN id SET DEFAULT nextval('ci_pending_builds_id_seq'::regclass); @@ -21261,6 +21291,8 @@ ALTER TABLE ONLY ci_pipelines_config ALTER COLUMN pipeline_id SET DEFAULT nextva ALTER TABLE ONLY ci_platform_metrics ALTER COLUMN id SET DEFAULT nextval('ci_platform_metrics_id_seq'::regclass); +ALTER TABLE ONLY ci_project_hierarchies ALTER COLUMN project_id SET DEFAULT nextval('ci_project_hierarchies_project_id_seq'::regclass); + ALTER TABLE ONLY ci_project_monthly_usages ALTER COLUMN id SET DEFAULT nextval('ci_project_monthly_usages_id_seq'::regclass); ALTER TABLE ONLY ci_refs ALTER COLUMN id SET DEFAULT nextval('ci_refs_id_seq'::regclass); @@ -22696,6 +22728,9 @@ ALTER TABLE ONLY ci_job_variables ALTER TABLE ONLY ci_minutes_additional_packs ADD CONSTRAINT ci_minutes_additional_packs_pkey PRIMARY KEY (id); +ALTER TABLE ONLY ci_namespace_hierarchies + ADD CONSTRAINT ci_namespace_hierarchies_pkey PRIMARY KEY (namespace_id); + ALTER TABLE ONLY ci_namespace_monthly_usages ADD CONSTRAINT ci_namespace_monthly_usages_pkey PRIMARY KEY (id); @@ -22729,6 +22764,9 @@ ALTER TABLE ONLY ci_pipelines ALTER TABLE ONLY ci_platform_metrics ADD CONSTRAINT ci_platform_metrics_pkey PRIMARY KEY (id); +ALTER TABLE ONLY ci_project_hierarchies + ADD CONSTRAINT ci_project_hierarchies_pkey PRIMARY KEY (project_id); + ALTER TABLE ONLY ci_project_monthly_usages ADD CONSTRAINT ci_project_monthly_usages_pkey PRIMARY KEY (id); @@ -25958,8 +25996,12 @@ CREATE INDEX index_geo_repository_updated_events_on_source ON geo_repository_upd CREATE INDEX index_geo_reset_checksum_events_on_project_id ON geo_reset_checksum_events USING btree (project_id); +CREATE INDEX index_gin_ci_namespace_hierarchies ON ci_namespace_hierarchies USING gin (traversal_ids); + CREATE INDEX index_gin_ci_pending_builds_on_namespace_traversal_ids ON ci_pending_builds USING gin (namespace_traversal_ids); +CREATE INDEX index_gin_ci_project_hierarchies ON ci_project_hierarchies USING gin (traversal_ids); + CREATE INDEX index_gitlab_subscription_histories_on_gitlab_subscription_id ON gitlab_subscription_histories USING btree (gitlab_subscription_id); CREATE INDEX index_gitlab_subscriptions_on_end_date_and_namespace_id ON gitlab_subscriptions USING btree (end_date, namespace_id); @@ -30382,6 +30424,9 @@ ALTER TABLE ONLY operations_scopes ALTER TABLE ONLY milestone_releases ADD CONSTRAINT fk_rails_7ae0756a2d FOREIGN KEY (milestone_id) REFERENCES milestones(id) ON DELETE CASCADE; +ALTER TABLE ONLY ci_project_hierarchies + ADD CONSTRAINT fk_rails_7d69e0670e FOREIGN KEY (project_id) REFERENCES projects(id) ON DELETE CASCADE; + ALTER TABLE ONLY resource_state_events ADD CONSTRAINT fk_rails_7ddc5f7457 FOREIGN KEY (source_merge_request_id) REFERENCES merge_requests(id) ON DELETE SET NULL; @@ -30934,6 +30979,9 @@ ALTER TABLE ONLY packages_debian_group_component_files ALTER TABLE ONLY user_callouts ADD CONSTRAINT fk_rails_ddfdd80f3d FOREIGN KEY (user_id) REFERENCES users(id) ON DELETE CASCADE; +ALTER TABLE ONLY ci_namespace_hierarchies + ADD CONSTRAINT fk_rails_de9e668a9e FOREIGN KEY (namespace_id) REFERENCES namespaces(id) ON DELETE CASCADE; + ALTER TABLE ONLY vulnerability_feedback ADD CONSTRAINT fk_rails_debd54e456 FOREIGN KEY (project_id) REFERENCES projects(id) ON DELETE CASCADE; diff --git a/ee/spec/lib/ee/gitlab/usage_data_non_sql_metrics_spec.rb b/ee/spec/lib/ee/gitlab/usage_data_non_sql_metrics_spec.rb index 19add5ac384490..0ce25d2dea58b4 100644 --- a/ee/spec/lib/ee/gitlab/usage_data_non_sql_metrics_spec.rb +++ b/ee/spec/lib/ee/gitlab/usage_data_non_sql_metrics_spec.rb @@ -15,7 +15,7 @@ described_class.uncached_data end - expect(recorder.count).to eq(59) + expect(recorder.count).to eq(60) end end end diff --git a/lib/gitlab/background_migration/backfill_namespace_hierarchies.rb b/lib/gitlab/background_migration/backfill_namespace_hierarchies.rb new file mode 100644 index 00000000000000..bbb2e1cca1ad7d --- /dev/null +++ b/lib/gitlab/background_migration/backfill_namespace_hierarchies.rb @@ -0,0 +1,76 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + # A job to create ci_namespace_hierarchies entries in sub-batches, relative to all namespaces + # that don't yet have a match in ci_namespace_hierarchies. + # rubocop:disable Style/Documentation + class BackfillNamespaceHierarchies + class Namespace < ActiveRecord::Base + include ::EachBatch + + self.table_name = 'namespaces' + self.inheritance_column = nil + + scope :base_query, -> do + where.not(id: ::Ci::NamespaceHierarchy.select(:namespace_id)) + .allow_cross_joins_across_databases(url: '') + end + end + + PAUSE_SECONDS = 0.1 + + def perform(start_id, end_id, sub_batch_size) + batch_query = Namespace.base_query.where(id: start_id..end_id) + batch_query.each_batch(of: sub_batch_size) do |sub_batch| + first, last = sub_batch.pluck(Arel.sql('min(id), max(id)')).first + ranged_query = Namespace.unscoped.base_query.where(id: first..last) + + update_sql = <<~SQL + INSERT INTO ci_namespace_hierarchies (namespace_id, traversal_ids) + #{calculated_traversal_ids(ranged_query)} + -- ON CONFLICT (namespace_id) DO NOTHING + SQL + ActiveRecord::Base.connection.execute(update_sql) + + sleep PAUSE_SECONDS + end + + # We have to add all arguments when marking a job as succeeded as they + # are all used to track the job by `queue_background_migration_jobs_by_range_at_intervals` + mark_job_as_succeeded(start_id, end_id, sub_batch_size) + end + + private + + # Calculate the ancestor path for a given set of namespaces. + def calculated_traversal_ids(batch) + <<~SQL + WITH RECURSIVE cte(source_id, namespace_id, parent_id, height) AS ( + ( + SELECT batch.id, batch.id, batch.parent_id, 1 + FROM (#{batch.to_sql}) AS batch + ) + UNION ALL + ( + SELECT cte.source_id, n.id, n.parent_id, cte.height+1 + FROM namespaces n, cte + WHERE n.id = cte.parent_id + ) + ) + SELECT flat_hierarchy.source_id as namespace_id, + array_agg(flat_hierarchy.namespace_id ORDER BY flat_hierarchy.height DESC) as traversal_ids + FROM (SELECT * FROM cte FOR UPDATE) flat_hierarchy + GROUP BY flat_hierarchy.source_id + SQL + end + + def mark_job_as_succeeded(*arguments) + Gitlab::Database::BackgroundMigrationJob.mark_all_as_succeeded( + 'BackfillNamespaceHierarchies', + arguments + ) + end + end + end +end diff --git a/lib/gitlab/background_migration/backfill_project_hierarchies.rb b/lib/gitlab/background_migration/backfill_project_hierarchies.rb new file mode 100644 index 00000000000000..6285a10e711603 --- /dev/null +++ b/lib/gitlab/background_migration/backfill_project_hierarchies.rb @@ -0,0 +1,98 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + # A job to set ci_project_hierarchies.traversal_ids in sub-batches, of all projects + # that don't yet have a match in ci_project_hierarchies. + # rubocop:disable Style/Documentation + class BackfillProjectHierarchies + class Project < ActiveRecord::Base + include ::EachBatch + + belongs_to :namespace + + self.table_name = 'projects' + + scope :base_query, -> do + where.not(id: ::Ci::ProjectHierarchy.select(:project_id)) + .joins(:namespace) + .select(:id, 'namespaces.id AS parent_id') + .allow_cross_joins_across_databases(url: '') + end + end + + PAUSE_SECONDS = 0.1 + + def perform(start_id, end_id, sub_batch_size) + batch_query = Project.base_query.where(id: start_id..end_id) + batch_query.each_batch(of: sub_batch_size) do |sub_batch| + first, last = sub_batch.pluck(Arel.sql('min(projects.id), max(projects.id)')).first + ranged_query = Project.unscoped.base_query.where(id: first..last) + + update_sql = <<~SQL + INSERT INTO ci_project_hierarchies (project_id, traversal_ids) + #{calculated_traversal_ids(ranged_query)} + ON CONFLICT (project_id) DO NOTHING + SQL + ActiveRecord::Base.connection.execute(update_sql) + + sleep PAUSE_SECONDS + end + + # We have to add all arguments when marking a job as succeeded as they + # are all used to track the job by `queue_background_migration_jobs_by_range_at_intervals` + mark_job_as_succeeded(start_id, end_id, sub_batch_size) + end + + private + + # Calculate the ancestor path for a given set of projects. + def calculated_traversal_ids(batch) + <<~SQL + ( + WITH RECURSIVE cte ( + source_id, + namespace_id, + parent_id, + height + ) AS (( + SELECT + batch.id, + 0, + batch.parent_id, + 1 + FROM (#{batch.to_sql}) AS batch) + UNION ALL ( + SELECT + cte.source_id, + n.id, + n.parent_id, + cte.height + 1 + FROM + namespaces n, + cte + WHERE + n.id = cte.parent_id)) + SELECT + h.project_id, + h.traversal_ids[1:array_length(h.traversal_ids, 1) - 1] + FROM ( + SELECT + flat_hierarchy.source_id AS project_id, + array_agg(flat_hierarchy.namespace_id ORDER BY flat_hierarchy.height DESC) AS traversal_ids + FROM (SELECT * FROM cte FOR UPDATE) flat_hierarchy + GROUP BY + flat_hierarchy.source_id) AS h + ) + SQL + end + + def mark_job_as_succeeded(*arguments) + Gitlab::Database::BackgroundMigrationJob.mark_all_as_succeeded( + 'BackfillProjectHierarchies', + arguments + ) + end + end + end +end diff --git a/lib/gitlab/database/gitlab_schemas.yml b/lib/gitlab/database/gitlab_schemas.yml index ee5039ccab8389..68e8950198916a 100644 --- a/lib/gitlab/database/gitlab_schemas.yml +++ b/lib/gitlab/database/gitlab_schemas.yml @@ -84,6 +84,7 @@ ci_job_artifacts: :gitlab_ci ci_job_token_project_scope_links: :gitlab_ci ci_job_variables: :gitlab_ci ci_minutes_additional_packs: :gitlab_ci +ci_namespace_hierarchies: :gitlab_ci ci_namespace_monthly_usages: :gitlab_ci ci_pending_builds: :gitlab_ci ci_pipeline_artifacts: :gitlab_ci @@ -95,6 +96,7 @@ ci_pipelines_config: :gitlab_ci ci_pipelines: :gitlab_ci ci_pipeline_variables: :gitlab_ci ci_platform_metrics: :gitlab_ci +ci_project_hierarchies: :gitlab_ci ci_project_monthly_usages: :gitlab_ci ci_refs: :gitlab_ci ci_resource_groups: :gitlab_ci diff --git a/spec/lib/gitlab/background_migration/backfill_namespace_hierarchies_spec.rb b/spec/lib/gitlab/background_migration/backfill_namespace_hierarchies_spec.rb new file mode 100644 index 00000000000000..e6a6521ac1a5da --- /dev/null +++ b/spec/lib/gitlab/background_migration/backfill_namespace_hierarchies_spec.rb @@ -0,0 +1,42 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::BackgroundMigration::BackfillNamespaceHierarchies, :migration, schema: 20211011141240 do + let(:namespaces) { table(:namespaces) } + let(:ci_namespace_hierarchies) { table(:ci_namespace_hierarchies) } + + subject { described_class.new } + + describe '#perform' do + it 'creates hierarchies for all namespaces in range' do + namespaces.create!(id: 5, name: 'test1', path: 'test1') + namespaces.create!(id: 7, name: 'test2', path: 'test2') + namespaces.create!(id: 8, name: 'test3', path: 'test3') + + subject.perform(5, 7, 1) + + expect(ci_namespace_hierarchies.all).to contain_exactly( + an_object_having_attributes(namespace_id: 5, traversal_ids: [5]), + an_object_having_attributes(namespace_id: 7, traversal_ids: [7]) + ) + end + + it 'handles existing hierarchies gracefully' do + namespaces.create!(id: 5, name: 'test1', path: 'test1') + namespaces.create!(id: 7, name: 'test2', path: 'test2') + namespaces.create!(id: 8, name: 'test3', path: 'test3') + + # Simulate a situation where a user has had a chance to move a group to another parent + # before the background migration has had a chance to run + ci_namespace_hierarchies.create!(namespace_id: 7, traversal_ids: [5, 7]) + + subject.perform(5, 7, 1) + + expect(ci_namespace_hierarchies.all).to contain_exactly( + an_object_having_attributes(namespace_id: 5, traversal_ids: [5]), + an_object_having_attributes(namespace_id: 7, traversal_ids: [5, 7]) + ) + end + end +end diff --git a/spec/lib/gitlab/background_migration/backfill_project_hierarchies_spec.rb b/spec/lib/gitlab/background_migration/backfill_project_hierarchies_spec.rb new file mode 100644 index 00000000000000..fe475408ba6996 --- /dev/null +++ b/spec/lib/gitlab/background_migration/backfill_project_hierarchies_spec.rb @@ -0,0 +1,46 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::BackgroundMigration::BackfillProjectHierarchies, :migration, schema: 20211011141241 do + let(:namespaces) { table(:namespaces) } + let(:projects) { table(:projects) } + let(:ci_project_hierarchies) { table(:ci_project_hierarchies) } + + subject { described_class.new } + + describe '#perform' do + it 'creates hierarchies for all projects in range' do + namespaces.create!(id: 10, name: 'namespace1', path: 'namespace1') + projects.create!(id: 5, namespace_id: 10, name: 'test1', path: 'test1') + projects.create!(id: 7, namespace_id: 10, name: 'test2', path: 'test2') + projects.create!(id: 8, namespace_id: 10, name: 'test3', path: 'test3') + + subject.perform(5, 7, 1) + + expect(ci_project_hierarchies.all).to contain_exactly( + an_object_having_attributes(project_id: 5, traversal_ids: [10]), + an_object_having_attributes(project_id: 7, traversal_ids: [10]) + ) + end + + it 'handles existing hierarchies gracefully' do + namespaces.create!(id: 10, name: 'namespace1', path: 'namespace1') + namespaces.create!(id: 11, name: 'namespace2', path: 'namespace2', parent_id: 10) + projects.create!(id: 5, namespace_id: 10, name: 'test1', path: 'test1') + projects.create!(id: 7, namespace_id: 11, name: 'test2', path: 'test2') + projects.create!(id: 8, namespace_id: 11, name: 'test3', path: 'test3') + + # Simulate a situation where a user has had a chance to move a project to another namespace + # before the background migration has had a chance to run + ci_project_hierarchies.create!(project_id: 7, traversal_ids: [10, 11]) + + subject.perform(5, 7, 1) + + expect(ci_project_hierarchies.all).to contain_exactly( + an_object_having_attributes(project_id: 5, traversal_ids: [10]), + an_object_having_attributes(project_id: 7, traversal_ids: [10, 11]) + ) + end + end +end -- GitLab From 6469e05fc30f21cb6456a302490754bd88320355 Mon Sep 17 00:00:00 2001 From: Pedro Pombeiro Date: Fri, 8 Oct 2021 19:22:51 +0200 Subject: [PATCH 03/13] Implement sidekiq jobs to keep new tables in sync Changes to the traversal_ids in namespace tables in main database generate new event records which will be consumed by a Sidekiq job to replicate the changes in the ci database --- app/models/ci/namespace_hierarchy.rb | 7 ++ app/models/ci/project_hierarchy.rb | 18 +++++ app/models/namespace.rb | 21 +++++ app/models/namespaces/sync_event.rb | 7 ++ app/models/project.rb | 21 +++++ app/models/projects/sync_event.rb | 7 ++ app/workers/all_queues.yml | 18 +++++ .../ci/process_namespace_events_worker.rb | 38 +++++++++ .../ci/process_project_events_worker.rb | 37 +++++++++ config/sidekiq_queues.yml | 4 + ...011140932_create_namespaces_sync_events.rb | 10 +++ ...11011141239_create_projects_sync_events.rb | 11 +++ db/schema_migrations/20211011140932 | 1 + db/schema_migrations/20211011141239 | 1 + db/structure.sql | 46 +++++++++++ .../gitlab/usage_data_non_sql_metrics_spec.rb | 2 +- lib/gitlab/database/gitlab_schemas.yml | 2 + spec/models/ci/namespace_hierarchy_spec.rb | 66 +++++++++++++++ spec/models/ci/project_hierarchy_spec.rb | 81 +++++++++++++++++++ spec/models/namespace_spec.rb | 45 +++++++++++ spec/models/project_spec.rb | 38 ++++++++- spec/models/user_spec.rb | 3 +- spec/services/notification_service_spec.rb | 3 +- .../process_namespace_events_worker_spec.rb | 45 +++++++++++ .../ci/process_project_events_worker_spec.rb | 62 ++++++++++++++ 25 files changed, 589 insertions(+), 5 deletions(-) create mode 100644 app/models/namespaces/sync_event.rb create mode 100644 app/models/projects/sync_event.rb create mode 100644 app/workers/ci/process_namespace_events_worker.rb create mode 100644 app/workers/ci/process_project_events_worker.rb create mode 100644 db/migrate/20211011140932_create_namespaces_sync_events.rb create mode 100644 db/migrate/20211011141239_create_projects_sync_events.rb create mode 100644 db/schema_migrations/20211011140932 create mode 100644 db/schema_migrations/20211011141239 create mode 100644 spec/models/ci/namespace_hierarchy_spec.rb create mode 100644 spec/models/ci/project_hierarchy_spec.rb create mode 100644 spec/workers/ci/process_namespace_events_worker_spec.rb create mode 100644 spec/workers/ci/process_project_events_worker_spec.rb diff --git a/app/models/ci/namespace_hierarchy.rb b/app/models/ci/namespace_hierarchy.rb index 4db07b477d3e72..de6b0a91a14855 100644 --- a/app/models/ci/namespace_hierarchy.rb +++ b/app/models/ci/namespace_hierarchy.rb @@ -4,5 +4,12 @@ module Ci # This model represents a record in a shadow table of the main database's namespaces table. # It allows us to navigate the namespace hierarchy on the ci database without resorting to a JOIN. class NamespaceHierarchy < ApplicationRecord + class << self + def update_traversal_ids(old_self_and_ancestor_ids, new_self_and_ancestor_ids) + # Update the traversal IDs with the new_self_and_ancestor_ids plus any descendants in the record + where('traversal_ids @> ARRAY[?]::int[]', old_self_and_ancestor_ids) + .update_all("traversal_ids = ARRAY#{new_self_and_ancestor_ids}::int[] || traversal_ids[#{old_self_and_ancestor_ids.length + 1}:]") + end + end end end diff --git a/app/models/ci/project_hierarchy.rb b/app/models/ci/project_hierarchy.rb index 5199924c3d0201..c9b235a5da3d4e 100644 --- a/app/models/ci/project_hierarchy.rb +++ b/app/models/ci/project_hierarchy.rb @@ -4,5 +4,23 @@ module Ci # This model represents a shadow table of the main database's namespaces table. # It allows us to navigate the namespace hierarchy on the ci database for a project without resorting to a JOIN. class ProjectHierarchy < ApplicationRecord + belongs_to :project, class_name: 'Project' + + class << self + def update_traversal_ids(project_id, new_ancestor_ids) + where(project_id: project_id).update_all("traversal_ids = ARRAY#{new_ancestor_ids}::int[]") + end + + def update_namespace_traversal_ids(old_ancestor_ids, new_ancestor_ids) + update_all_traversal_ids(where('traversal_ids @> ARRAY[?]::int[]', old_ancestor_ids), old_ancestor_ids, new_ancestor_ids) + end + + private + + def update_all_traversal_ids(assoc, old_ancestor_ids, new_ancestor_ids) + # Update the traversal IDs with the new_ancestor_ids plus any descendants in the record + assoc.update_all("traversal_ids = ARRAY#{new_ancestor_ids}::int[] || traversal_ids[#{old_ancestor_ids.length + 1}:]") + end + end end end diff --git a/app/models/namespace.rb b/app/models/namespace.rb index 9565d1a6a692bc..c1244651f8df6d 100644 --- a/app/models/namespace.rb +++ b/app/models/namespace.rb @@ -107,6 +107,10 @@ class Namespace < ApplicationRecord delegate :name, to: :owner, allow_nil: true, prefix: true delegate :avatar_url, to: :owner, allow_nil: true + before_update :save_previous_traversal_ids, if: :parent_changed? + after_update :create_sync_event, if: :saved_change_to_parent_id? + after_create :create_sync_event + after_commit :refresh_access_of_projects_invited_groups, on: :update, if: -> { previous_changes.key?('share_with_group_lock') } before_create :sync_share_with_group_lock_with_parent @@ -616,6 +620,23 @@ def write_projects_repository_config def enforce_minimum_path_length? path_changed? && !project_namespace? end + + def save_previous_traversal_ids + previous_namespace = Namespace.find(parent_id_was) if parent_id_was + @previous_traversal_ids = (previous_namespace&.self_and_ancestor_ids(hierarchy_order: :desc) || []) + [id] + end + + def create_sync_event + Namespaces::SyncEvent.insert({ + traversal_ids: @previous_traversal_ids || [], + new_traversal_ids: self_and_ancestor_ids(hierarchy_order: :desc) + }) + @previous_traversal_ids = nil + + run_after_commit do + Ci::ProcessNamespaceEventsWorker.perform_async + end + end end Namespace.prepend_mod_with('Namespace') diff --git a/app/models/namespaces/sync_event.rb b/app/models/namespaces/sync_event.rb new file mode 100644 index 00000000000000..89d1b4c36a7ec6 --- /dev/null +++ b/app/models/namespaces/sync_event.rb @@ -0,0 +1,7 @@ +# frozen_string_literal: true + +# This model serves to keep track of changes to the namespaces table in the main database, and allowing to safely +# replicate these changes to other databases. +class Namespaces::SyncEvent < ApplicationRecord + self.table_name_prefix = 'namespaces_' +end diff --git a/app/models/project.rb b/app/models/project.rb index 604158d1a6eb22..2c6b8167fd4af6 100644 --- a/app/models/project.rb +++ b/app/models/project.rb @@ -101,6 +101,10 @@ class Project < ApplicationRecord after_save :update_project_statistics, if: :saved_change_to_namespace_id? + before_update :save_previous_traversal_ids, if: :namespace_id_changed? + after_update :create_sync_event, if: :saved_change_to_namespace_id? + after_create :create_sync_event + after_save :create_import_state, if: ->(project) { project.import? && project.import_state.nil? } after_save :save_topics @@ -2955,6 +2959,23 @@ def sync_attributes(project_namespace) project_namespace.shared_runners_enabled = shared_runners_enabled project_namespace.visibility_level = visibility_level end + + def save_previous_traversal_ids + previous_namespace = Namespace.find_by(id: namespace_id_was) if changes.include?(:namespace_id) + @previous_traversal_ids = previous_namespace&.self_and_ancestor_ids(hierarchy_order: :desc) + end + + def create_sync_event + Projects::SyncEvent.insert({ + project_id: id, + traversal_ids: @previous_traversal_ids || [], + new_traversal_ids: namespace.self_and_ancestor_ids(hierarchy_order: :desc) + }) + + run_after_commit do + Ci::ProcessProjectEventsWorker.perform_async + end + end end Project.prepend_mod_with('Project') diff --git a/app/models/projects/sync_event.rb b/app/models/projects/sync_event.rb new file mode 100644 index 00000000000000..55e3bc36c350e0 --- /dev/null +++ b/app/models/projects/sync_event.rb @@ -0,0 +1,7 @@ +# frozen_string_literal: true + +# This model serves to keep track of changes to the namespaces table in the main database as they relate to projects, +# allowing to safely replicate changes to other databases. +class Projects::SyncEvent < ApplicationRecord + self.table_name_prefix = 'projects_' +end diff --git a/app/workers/all_queues.yml b/app/workers/all_queues.yml index 4fa74195a993bb..3e25cf0dcf05db 100644 --- a/app/workers/all_queues.yml +++ b/app/workers/all_queues.yml @@ -1951,6 +1951,24 @@ :weight: 1 :idempotent: true :tags: [] +- :name: ci_process_namespace_events + :worker_name: Ci::ProcessNamespaceEventsWorker + :feature_category: :subgroups + :has_external_dependencies: + :urgency: :low + :resource_boundary: :unknown + :weight: 1 + :idempotent: true + :tags: [] +- :name: ci_process_project_events + :worker_name: Ci::ProcessProjectEventsWorker + :feature_category: :subgroups + :has_external_dependencies: + :urgency: :low + :resource_boundary: :unknown + :weight: 1 + :idempotent: true + :tags: [] - :name: create_commit_signature :worker_name: CreateCommitSignatureWorker :feature_category: :source_code_management diff --git a/app/workers/ci/process_namespace_events_worker.rb b/app/workers/ci/process_namespace_events_worker.rb new file mode 100644 index 00000000000000..ed628f43d80e77 --- /dev/null +++ b/app/workers/ci/process_namespace_events_worker.rb @@ -0,0 +1,38 @@ +# frozen_string_literal: true + +module Ci + class ProcessNamespaceEventsWorker + include ApplicationWorker + + data_consistency :always + + feature_category :subgroups + + deduplicate :until_executed + idempotent! + + def perform + # rubocop: disable CodeReuse/ActiveRecord + while first_event = Namespaces::SyncEvent.lock('FOR UPDATE SKIP LOCKED').order(:id).first + sync_namespace_traversal_ids(first_event) if first_event.new_traversal_ids.present? + + first_event.delete + end + # rubocop: enable CodeReuse/ActiveRecord + end + + private + + def sync_namespace_traversal_ids(event) + if event.traversal_ids.present? + ::Ci::NamespaceHierarchy.update_traversal_ids(event.traversal_ids, event.new_traversal_ids) + ::Ci::ProjectHierarchy.update_namespace_traversal_ids(event.traversal_ids, event.new_traversal_ids) + else + ::Ci::NamespaceHierarchy.create!( + namespace_id: event.new_traversal_ids.last, + traversal_ids: event.new_traversal_ids + ) + end + end + end +end diff --git a/app/workers/ci/process_project_events_worker.rb b/app/workers/ci/process_project_events_worker.rb new file mode 100644 index 00000000000000..7191e38c2b8e60 --- /dev/null +++ b/app/workers/ci/process_project_events_worker.rb @@ -0,0 +1,37 @@ +# frozen_string_literal: true + +module Ci + class ProcessProjectEventsWorker + include ApplicationWorker + + data_consistency :always + + feature_category :subgroups + + deduplicate :until_executed + idempotent! + + def perform + # rubocop: disable CodeReuse/ActiveRecord + while first_event = Projects::SyncEvent.lock('FOR UPDATE SKIP LOCKED').order(:id).first + sync_project_traversal_ids(first_event) if first_event.new_traversal_ids.present? + + first_event.delete + end + # rubocop: enable CodeReuse/ActiveRecord + end + + private + + def sync_project_traversal_ids(event) + if event.traversal_ids.present? + ::Ci::ProjectHierarchy.update_traversal_ids( + event.project_id, + event.new_traversal_ids + ) + else + ::Ci::ProjectHierarchy.create!(project_id: event.project_id, traversal_ids: event.new_traversal_ids) + end + end + end +end diff --git a/config/sidekiq_queues.yml b/config/sidekiq_queues.yml index f7e3f036c53547..e1591f898af1e1 100644 --- a/config/sidekiq_queues.yml +++ b/config/sidekiq_queues.yml @@ -73,6 +73,10 @@ - 1 - - ci_delete_objects - 1 +- - ci_process_namespace_events + - 1 +- - ci_process_project_events + - 1 - - container_repository - 1 - - create_commit_signature diff --git a/db/migrate/20211011140932_create_namespaces_sync_events.rb b/db/migrate/20211011140932_create_namespaces_sync_events.rb new file mode 100644 index 00000000000000..1f26d0db906667 --- /dev/null +++ b/db/migrate/20211011140932_create_namespaces_sync_events.rb @@ -0,0 +1,10 @@ +# frozen_string_literal: true + +class CreateNamespacesSyncEvents < Gitlab::Database::Migration[1.0] + def change + create_table :namespaces_sync_events do |t| + t.integer :traversal_ids, array: true, default: [], null: false + t.integer :new_traversal_ids, array: true, default: [], null: false + end + end +end diff --git a/db/migrate/20211011141239_create_projects_sync_events.rb b/db/migrate/20211011141239_create_projects_sync_events.rb new file mode 100644 index 00000000000000..be1c89ca570350 --- /dev/null +++ b/db/migrate/20211011141239_create_projects_sync_events.rb @@ -0,0 +1,11 @@ +# frozen_string_literal: true + +class CreateProjectsSyncEvents < Gitlab::Database::Migration[1.0] + def change + create_table :projects_sync_events do |t| + t.references :project, null: false, index: true, foreign_key: { on_delete: :cascade } + t.integer :traversal_ids, array: true, default: [], null: false + t.integer :new_traversal_ids, array: true, default: [], null: false + end + end +end diff --git a/db/schema_migrations/20211011140932 b/db/schema_migrations/20211011140932 new file mode 100644 index 00000000000000..af0e000b9f3e2a --- /dev/null +++ b/db/schema_migrations/20211011140932 @@ -0,0 +1 @@ +0209db1e7be48bcbf0e52b451d37da0ef2ecadd567cdfa47907fc5032c258a27 \ No newline at end of file diff --git a/db/schema_migrations/20211011141239 b/db/schema_migrations/20211011141239 new file mode 100644 index 00000000000000..f215f234a7e1a5 --- /dev/null +++ b/db/schema_migrations/20211011141239 @@ -0,0 +1 @@ +bc0ae055b331801fbe020c12a66e4e6ae790780121bfd66fd161093c94c7a84a \ No newline at end of file diff --git a/db/structure.sql b/db/structure.sql index 1b145e8cf130b6..447fcecae7a726 100644 --- a/db/structure.sql +++ b/db/structure.sql @@ -16393,6 +16393,21 @@ CREATE SEQUENCE namespaces_id_seq ALTER SEQUENCE namespaces_id_seq OWNED BY namespaces.id; +CREATE TABLE namespaces_sync_events ( + id bigint NOT NULL, + traversal_ids integer[] DEFAULT '{}'::integer[] NOT NULL, + new_traversal_ids integer[] DEFAULT '{}'::integer[] NOT NULL +); + +CREATE SEQUENCE namespaces_sync_events_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + +ALTER SEQUENCE namespaces_sync_events_id_seq OWNED BY namespaces_sync_events.id; + CREATE TABLE note_diff_files ( id integer NOT NULL, diff_note_id integer NOT NULL, @@ -18469,6 +18484,22 @@ CREATE SEQUENCE projects_id_seq ALTER SEQUENCE projects_id_seq OWNED BY projects.id; +CREATE TABLE projects_sync_events ( + id bigint NOT NULL, + project_id bigint NOT NULL, + traversal_ids integer[] DEFAULT '{}'::integer[] NOT NULL, + new_traversal_ids integer[] DEFAULT '{}'::integer[] NOT NULL +); + +CREATE SEQUENCE projects_sync_events_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + +ALTER SEQUENCE projects_sync_events_id_seq OWNED BY projects_sync_events.id; + CREATE TABLE prometheus_alert_events ( id bigint NOT NULL, project_id integer NOT NULL, @@ -21679,6 +21710,8 @@ ALTER TABLE ONLY namespace_statistics ALTER COLUMN id SET DEFAULT nextval('names ALTER TABLE ONLY namespaces ALTER COLUMN id SET DEFAULT nextval('namespaces_id_seq'::regclass); +ALTER TABLE ONLY namespaces_sync_events ALTER COLUMN id SET DEFAULT nextval('namespaces_sync_events_id_seq'::regclass); + ALTER TABLE ONLY note_diff_files ALTER COLUMN id SET DEFAULT nextval('note_diff_files_id_seq'::regclass); ALTER TABLE ONLY notes ALTER COLUMN id SET DEFAULT nextval('notes_id_seq'::regclass); @@ -21831,6 +21864,8 @@ ALTER TABLE ONLY project_tracing_settings ALTER COLUMN id SET DEFAULT nextval('p ALTER TABLE ONLY projects ALTER COLUMN id SET DEFAULT nextval('projects_id_seq'::regclass); +ALTER TABLE ONLY projects_sync_events ALTER COLUMN id SET DEFAULT nextval('projects_sync_events_id_seq'::regclass); + ALTER TABLE ONLY prometheus_alert_events ALTER COLUMN id SET DEFAULT nextval('prometheus_alert_events_id_seq'::regclass); ALTER TABLE ONLY prometheus_alerts ALTER COLUMN id SET DEFAULT nextval('prometheus_alerts_id_seq'::regclass); @@ -23433,6 +23468,9 @@ ALTER TABLE ONLY namespace_statistics ALTER TABLE ONLY namespaces ADD CONSTRAINT namespaces_pkey PRIMARY KEY (id); +ALTER TABLE ONLY namespaces_sync_events + ADD CONSTRAINT namespaces_sync_events_pkey PRIMARY KEY (id); + ALTER TABLE ONLY note_diff_files ADD CONSTRAINT note_diff_files_pkey PRIMARY KEY (id); @@ -23706,6 +23744,9 @@ ALTER TABLE ONLY project_tracing_settings ALTER TABLE ONLY projects ADD CONSTRAINT projects_pkey PRIMARY KEY (id); +ALTER TABLE ONLY projects_sync_events + ADD CONSTRAINT projects_sync_events_pkey PRIMARY KEY (id); + ALTER TABLE ONLY prometheus_alert_events ADD CONSTRAINT prometheus_alert_events_pkey PRIMARY KEY (id); @@ -27016,6 +27057,8 @@ CREATE INDEX index_projects_on_star_count ON projects USING btree (star_count); CREATE INDEX index_projects_on_updated_at_and_id ON projects USING btree (updated_at, id); +CREATE INDEX index_projects_sync_events_on_project_id ON projects_sync_events USING btree (project_id); + CREATE UNIQUE INDEX index_prometheus_alert_event_scoped_payload_key ON prometheus_alert_events USING btree (prometheus_alert_id, payload_key); CREATE INDEX index_prometheus_alert_events_on_project_id_and_status ON prometheus_alert_events USING btree (project_id, status); @@ -30784,6 +30827,9 @@ ALTER TABLE ONLY security_findings ALTER TABLE ONLY packages_debian_project_component_files ADD CONSTRAINT fk_rails_bbe9ebfbd9 FOREIGN KEY (component_id) REFERENCES packages_debian_project_components(id) ON DELETE RESTRICT; +ALTER TABLE ONLY projects_sync_events + ADD CONSTRAINT fk_rails_bbf0eef59f FOREIGN KEY (project_id) REFERENCES projects(id) ON DELETE CASCADE; + ALTER TABLE ONLY approval_merge_request_rules_users ADD CONSTRAINT fk_rails_bc8972fa55 FOREIGN KEY (user_id) REFERENCES users(id) ON DELETE CASCADE; diff --git a/ee/spec/lib/ee/gitlab/usage_data_non_sql_metrics_spec.rb b/ee/spec/lib/ee/gitlab/usage_data_non_sql_metrics_spec.rb index 0ce25d2dea58b4..c74cc34d563308 100644 --- a/ee/spec/lib/ee/gitlab/usage_data_non_sql_metrics_spec.rb +++ b/ee/spec/lib/ee/gitlab/usage_data_non_sql_metrics_spec.rb @@ -15,7 +15,7 @@ described_class.uncached_data end - expect(recorder.count).to eq(60) + expect(recorder.count).to eq(63) end end end diff --git a/lib/gitlab/database/gitlab_schemas.yml b/lib/gitlab/database/gitlab_schemas.yml index 68e8950198916a..ac544bff2f88f6 100644 --- a/lib/gitlab/database/gitlab_schemas.yml +++ b/lib/gitlab/database/gitlab_schemas.yml @@ -316,6 +316,7 @@ namespace_package_settings: :gitlab_main namespace_root_storage_statistics: :gitlab_main namespace_settings: :gitlab_main namespaces: :gitlab_main +namespaces_sync_events: :gitlab_main namespace_statistics: :gitlab_main note_diff_files: :gitlab_main notes: :gitlab_main @@ -411,6 +412,7 @@ project_repository_storage_moves: :gitlab_main project_security_settings: :gitlab_main project_settings: :gitlab_main projects: :gitlab_main +projects_sync_events: :gitlab_main project_statistics: :gitlab_main project_topics: :gitlab_main project_tracing_settings: :gitlab_main diff --git a/spec/models/ci/namespace_hierarchy_spec.rb b/spec/models/ci/namespace_hierarchy_spec.rb new file mode 100644 index 00000000000000..8fc132cb3e0b47 --- /dev/null +++ b/spec/models/ci/namespace_hierarchy_spec.rb @@ -0,0 +1,66 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Ci::NamespaceHierarchy do + describe '.update_traversal_ids' do + subject { described_class.update_traversal_ids(old_self_and_ancestor_ids, new_self_and_ancestor_ids) } + + context 'when changing parent group' do + before do + described_class.create!( + namespace_id: old_self_and_ancestor_ids.last, + traversal_ids: old_self_and_ancestor_ids + ) + end + + context 'on a top level group' do + let_it_be(:root_group) { create(:group) } + let_it_be(:group) { create(:group) } + + let(:old_self_and_ancestor_ids) { [group.id] } + let(:new_self_and_ancestor_ids) { [root_group, group].map(&:id) } + + it 'updates the related ci_namespace_hierarchy record' do + subject + + expect(described_class.find_by!(namespace_id: group.id)).to have_attributes( + namespace_id: group.id, + traversal_ids: new_self_and_ancestor_ids + ) + end + end + + context 'on a group in the middle of the hierarchy' do + let_it_be(:root_group) { create(:group) } + let_it_be(:sub_group) { create(:group, parent: root_group) } + let_it_be(:group) { create(:group, parent: sub_group) } + + let(:old_self_and_ancestor_ids) { [root_group, sub_group].map(&:id) } + let(:new_self_and_ancestor_ids) { [sub_group].map(&:id) } + + before do + described_class.create!( + namespace_id: group.id, + traversal_ids: [root_group, sub_group, group].map(&:id) + ) + end + + it 'updates the related ci_namespace_hierarchy record' do + subject + + expect(described_class.find(sub_group.id)).to have_attributes( + namespace_id: sub_group.id, + traversal_ids: new_self_and_ancestor_ids + ) + end + + it 'updates the records for children groups with the new hierarchy' do + subject + + expect(described_class.find(group.id)).to have_attributes(traversal_ids: [sub_group, group].map(&:id)) + end + end + end + end +end diff --git a/spec/models/ci/project_hierarchy_spec.rb b/spec/models/ci/project_hierarchy_spec.rb new file mode 100644 index 00000000000000..2cfbaff44a35ba --- /dev/null +++ b/spec/models/ci/project_hierarchy_spec.rb @@ -0,0 +1,81 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Ci::ProjectHierarchy do + let_it_be(:root_group) { create(:group) } + + describe '.update_traversal_ids' do + subject { described_class.update_traversal_ids(project.id, new_self_and_ancestor_ids) } + + before do + described_class.create!(project_id: project.id, traversal_ids: old_self_and_ancestor_ids) + end + + context 'on a top level project' do + let_it_be(:group) { create(:group) } + let_it_be(:project) { create(:project) } + + let(:old_self_and_ancestor_ids) { [] } + let(:new_self_and_ancestor_ids) { [root_group, group].map(&:id) } + + it 'updates the related ci_project_hierarchy record' do + subject + + expect(described_class.find(project.id)).to have_attributes(traversal_ids: new_self_and_ancestor_ids) + end + end + + context 'on a group in the middle of the hierarchy' do + let_it_be(:sub_group) { create(:group, parent: root_group) } + let_it_be(:group) { create(:group, parent: sub_group) } + let_it_be(:project) { create(:project, group: group) } + + let(:old_self_and_ancestor_ids) { [root_group, sub_group].map(&:id) } + let(:new_self_and_ancestor_ids) { [sub_group].map(&:id) } + + it 'updates the related ci_project_hierarchy record' do + subject + + expect(described_class.find(project.id)).to have_attributes(traversal_ids: new_self_and_ancestor_ids) + end + end + end + + describe '.update_namespace_traversal_ids' do + subject { described_class.update_namespace_traversal_ids(old_self_and_ancestor_ids, new_self_and_ancestor_ids) } + + before do + described_class.create!(project_id: project.id, traversal_ids: old_self_and_ancestor_ids) + end + + context 'on a top level project' do + let_it_be(:group) { create(:group) } + let_it_be(:project) { create(:project, group: group) } + + let(:old_self_and_ancestor_ids) { [group.id] } + let(:new_self_and_ancestor_ids) { [root_group, group].map(&:id) } + + it 'updates the related ci_project_hierarchy record' do + subject + + expect(described_class.find(project.id)).to have_attributes(traversal_ids: new_self_and_ancestor_ids) + end + end + + context 'on a group with 3 levels of hierarchy' do + let_it_be(:sub_group) { create(:group, parent: root_group) } + let_it_be(:group) { create(:group, parent: sub_group) } + let_it_be(:project) { create(:project, group: group) } + + let(:old_self_and_ancestor_ids) { [root_group, sub_group].map(&:id) } + let(:new_self_and_ancestor_ids) { [sub_group].map(&:id) } + + it 'updates the related ci_project_hierarchy record' do + subject + + expect(described_class.find(project.id)).to have_attributes(traversal_ids: new_self_and_ancestor_ids) + end + end + end +end diff --git a/spec/models/namespace_spec.rb b/spec/models/namespace_spec.rb index 8f5860c799c4df..86d4728923ac93 100644 --- a/spec/models/namespace_spec.rb +++ b/spec/models/namespace_spec.rb @@ -2066,4 +2066,49 @@ def project_rugged(project) it { is_expected.to be(true) } end end + + describe 'namespace syncing' do + subject { child_group } + + let_it_be(:parent_group) { create(:group) } + + let(:child_group) { create(:group, parent: parent_group) } + + it 'creates a matching namespaces_sync_event record for record without parent' do + expect(Namespaces::SyncEvent.where(new_traversal_ids: [parent_group.id])).to contain_exactly( + an_object_having_attributes( traversal_ids: [] ) + ) + end + + it 'creates a matching namespaces_sync_event record for record with parent' do + expect(Namespaces::SyncEvent.where(new_traversal_ids: [parent_group.id, child_group.id])).to contain_exactly( + an_object_having_attributes( traversal_ids: [] ) + ) + end + + context 'when update to new parent is saved' do + before do + subject.parent = nil + subject.save! + end + + it 'creates a matching namespaces_sync_event record' do + sync_events = Namespaces::SyncEvent.where(new_traversal_ids: [subject.id]) + + expect(sync_events).to contain_exactly( + an_object_having_attributes( + traversal_ids: [parent_group.id, subject.id], + new_traversal_ids: [subject.id] + ) + ) + end + end + + specify 'update kicks off ProcessNamespaceEventsWorker' do + expect(Ci::ProcessNamespaceEventsWorker).to receive(:perform_async).twice + + subject.parent = nil + subject.save! + end + end end diff --git a/spec/models/project_spec.rb b/spec/models/project_spec.rb index e37560e1fad4ed..46676f9c1a92b8 100644 --- a/spec/models/project_spec.rb +++ b/spec/models/project_spec.rb @@ -7173,7 +7173,7 @@ def has_external_wiki end describe '#enabled_group_deploy_keys' do - let_it_be(:project) { create(:project) } + let(:project) { create(:project) } subject { project.enabled_group_deploy_keys } @@ -7202,7 +7202,7 @@ def has_external_wiki end context 'and another group has a group deploy key enabled' do - let_it_be(:group_deploy_key) { create(:group_deploy_key) } + let(:group_deploy_key) { create(:group_deploy_key) } it 'does not return this group deploy key' do another_group = create(:group) @@ -7390,6 +7390,40 @@ def has_external_wiki end end + describe 'updating parent namespace' do + subject { project } + + let_it_be(:parent_namespace) { create(:namespace) } + + let(:project) { create(:project) } + let!(:original_namespace) { subject.namespace } + + it 'creates a matching projects_sync_event record' do + expect(Projects::SyncEvent.find_by(project_id: subject.id)).to have_attributes({ + project_id: subject.id, traversal_ids: [], new_traversal_ids: [original_namespace.id] + }) + + subject.namespace = parent_namespace + subject.save! + + sync_events = Projects::SyncEvent.where(project_id: subject.id) + + expect(sync_events).to match( + [ + an_object_having_attributes(project_id: subject.id, traversal_ids: [], new_traversal_ids: [original_namespace.id]), + an_object_having_attributes(project_id: subject.id, traversal_ids: [original_namespace.id], new_traversal_ids: [parent_namespace.id]) + ] + ) + end + + it 'kicks off ProcessProjectEventsWorker' do + expect(Ci::ProcessProjectEventsWorker).to receive(:perform_async) + + subject.namespace = parent_namespace + subject.save! + end + end + def finish_job(export_job) export_job.start export_job.finish diff --git a/spec/models/user_spec.rb b/spec/models/user_spec.rb index b5d4614d206063..b01c71a95682f1 100644 --- a/spec/models/user_spec.rb +++ b/spec/models/user_spec.rb @@ -1540,7 +1540,8 @@ allow(user).to receive(:update_highest_role) end - expect(SecureRandom).to receive(:hex).and_return('3b8ca303') + allow(SecureRandom).to receive(:hex).and_call_original + expect(SecureRandom).to receive(:hex).with(no_args).and_return('3b8ca303') user = create(:user) diff --git a/spec/services/notification_service_spec.rb b/spec/services/notification_service_spec.rb index fbf5b1833658ad..2295a20cb0d646 100644 --- a/spec/services/notification_service_spec.rb +++ b/spec/services/notification_service_spec.rb @@ -2942,7 +2942,7 @@ def create_member! describe 'Pipelines', :deliver_mails_inline do describe '#pipeline_finished' do - let_it_be(:project) { create(:project, :public, :repository) } + let_it_be_with_reload(:project) { create(:project, :public, :repository) } let_it_be(:u_member) { create(:user) } let_it_be(:u_watcher) { create_user_with_notification(:watch, 'watcher') } @@ -3088,6 +3088,7 @@ def create_pipeline(user, status) let(:group_notification_email) { 'user+group@example.com' } before do + project.reload group = create(:group) project.update!(group: group) diff --git a/spec/workers/ci/process_namespace_events_worker_spec.rb b/spec/workers/ci/process_namespace_events_worker_spec.rb new file mode 100644 index 00000000000000..5c613ca8d5ec11 --- /dev/null +++ b/spec/workers/ci/process_namespace_events_worker_spec.rb @@ -0,0 +1,45 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Ci::ProcessNamespaceEventsWorker do + describe '#perform' do + subject(:perform) { described_class.new.perform } + + context 'on namespace with project changing parent multiple times' do + let_it_be(:group) { create(:group) } + let_it_be(:parent_group_1) { create(:group) } + let_it_be(:parent_group_2) { create(:group) } + + let(:expected_initial_sync_event_count) { 3 } + let(:new_sync_events) do + [ + { traversal_ids: [group.id], new_traversal_ids: [parent_group_1.id, group.id] }, + { traversal_ids: [parent_group_1.id, group.id], new_traversal_ids: [parent_group_2.id, group.id] }, + { traversal_ids: [parent_group_2.id], new_traversal_ids: [parent_group_1.id, parent_group_2.id] } + ] + end + + before do + # Group creation from let_it_be causes 3 records to be implicitly created + expect(Namespaces::SyncEvent.count).to eq(expected_initial_sync_event_count) + + Namespaces::SyncEvent.insert_all! new_sync_events + end + + include_examples 'an idempotent worker' do + it 'consumes all sync events' do + expect { subject }.to change(Namespaces::SyncEvent, :count).from(expected_initial_sync_event_count + new_sync_events.count).to(0) + end + + it 'syncs namespace hierarchy traversal ids' do + expect { subject }.to change(Ci::NamespaceHierarchy, :all).to contain_exactly( + an_object_having_attributes(namespace_id: parent_group_1.id, traversal_ids: [parent_group_1.id]), + an_object_having_attributes(namespace_id: parent_group_2.id, traversal_ids: [parent_group_1.id, parent_group_2.id]), + an_object_having_attributes(namespace_id: group.id, traversal_ids: [parent_group_1.id, parent_group_2.id, group.id]) + ) + end + end + end + end +end diff --git a/spec/workers/ci/process_project_events_worker_spec.rb b/spec/workers/ci/process_project_events_worker_spec.rb new file mode 100644 index 00000000000000..0087dacca763af --- /dev/null +++ b/spec/workers/ci/process_project_events_worker_spec.rb @@ -0,0 +1,62 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Ci::ProcessProjectEventsWorker do + describe '#perform' do + subject(:perform) { described_class.new.perform } + + include_examples 'an idempotent worker' do + let_it_be(:group) { create(:group) } + let_it_be(:project) { create(:project, group: group) } + let_it_be(:parent_group_1) { create(:group) } + + let(:expected_initial_sync_event_count) { 1 } + let(:new_sync_records) do + [{ project_id: project.id, traversal_ids: [group.id], new_traversal_ids: [parent_group_1.id, group.id] }] + end + + before do + # Project creation from let_it_be causes one record to be implicitly created + expect(Projects::SyncEvent.where(project_id: project.id).count).to eq(expected_initial_sync_event_count) + + Projects::SyncEvent.insert_all! new_sync_records + end + + it 'consumes all sync events' do + expect { subject }.to change(Projects::SyncEvent, :count).from(expected_initial_sync_event_count + new_sync_records.count).to(0) + end + + it 'syncs project hierarchy traversal ids' do + expect { subject }.to change(Ci::ProjectHierarchy, :all).to contain_exactly( + an_object_having_attributes(project_id: project.id, traversal_ids: [parent_group_1.id, group.id]) + ) + end + end + + context 'on project changing parent multiple times' do + let_it_be(:group) { create(:group) } + let_it_be(:project) { create(:project, group: group) } + let_it_be(:parent_group_1) { create(:group) } + let_it_be(:parent_group_2) { create(:group) } + + before do + Projects::SyncEvent.insert_all! [ + { project_id: project.id, traversal_ids: [group.id], new_traversal_ids: [parent_group_1.id, group.id] }, + { project_id: project.id, traversal_ids: [parent_group_1.id, group.id], new_traversal_ids: [parent_group_2.id, group.id] }, + { project_id: project.id, traversal_ids: [parent_group_2.id, group.id], new_traversal_ids: [parent_group_1.id, parent_group_2.id, group.id] } + ] + end + + it 'consumes all sync events' do + expect { subject }.to change(Projects::SyncEvent, :count).to(0) + end + + it 'syncs project hierarchy traversal ids' do + expect { subject }.to change(Ci::ProjectHierarchy.where(project_id: project.id), :all).to contain_exactly( + an_object_having_attributes(traversal_ids: [parent_group_1.id, parent_group_2.id, group.id]) + ) + end + end + end +end -- GitLab From a6dc11d1cd0a089a57b147d2879a8b11b2c1fd6e Mon Sep 17 00:00:00 2001 From: Pedro Pombeiro Date: Thu, 4 Nov 2021 14:52:11 +0100 Subject: [PATCH 04/13] Process sync events in batches --- app/workers/ci/process_namespace_events_worker.rb | 12 +++++++++--- app/workers/ci/process_project_events_worker.rb | 12 +++++++++--- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/app/workers/ci/process_namespace_events_worker.rb b/app/workers/ci/process_namespace_events_worker.rb index ed628f43d80e77..fac9e865f42127 100644 --- a/app/workers/ci/process_namespace_events_worker.rb +++ b/app/workers/ci/process_namespace_events_worker.rb @@ -13,10 +13,16 @@ class ProcessNamespaceEventsWorker def perform # rubocop: disable CodeReuse/ActiveRecord - while first_event = Namespaces::SyncEvent.lock('FOR UPDATE SKIP LOCKED').order(:id).first - sync_namespace_traversal_ids(first_event) if first_event.new_traversal_ids.present? + Namespaces::SyncEvent.find_in_batches(batch_size: 100) do |batch| + batch.each { |evt| sync_namespace_traversal_ids(evt) if evt.new_traversal_ids.present? } - first_event.delete + select_query = Namespaces::SyncEvent + .id_in(batch) + .select(:id) + .lock!('FOR UPDATE SKIP LOCKED') + .to_sql + + Namespaces::SyncEvent.connection.execute("DELETE FROM #{Namespaces::SyncEvent.quoted_table_name} WHERE id IN (#{select_query})") end # rubocop: enable CodeReuse/ActiveRecord end diff --git a/app/workers/ci/process_project_events_worker.rb b/app/workers/ci/process_project_events_worker.rb index 7191e38c2b8e60..d411de6ab1eb6d 100644 --- a/app/workers/ci/process_project_events_worker.rb +++ b/app/workers/ci/process_project_events_worker.rb @@ -13,10 +13,16 @@ class ProcessProjectEventsWorker def perform # rubocop: disable CodeReuse/ActiveRecord - while first_event = Projects::SyncEvent.lock('FOR UPDATE SKIP LOCKED').order(:id).first - sync_project_traversal_ids(first_event) if first_event.new_traversal_ids.present? + Projects::SyncEvent.find_in_batches(batch_size: 100) do |batch| + batch.each { |evt| sync_project_traversal_ids(evt) if evt.new_traversal_ids.present? } - first_event.delete + select_query = Projects::SyncEvent + .id_in(batch) + .select(:id) + .lock!('FOR UPDATE SKIP LOCKED') + .to_sql + + Projects::SyncEvent.connection.execute("DELETE FROM #{Projects::SyncEvent.quoted_table_name} WHERE id IN (#{select_query})") end # rubocop: enable CodeReuse/ActiveRecord end -- GitLab From 9ec5883a6748a4241a69bf1b085b2df07ad77859 Mon Sep 17 00:00:00 2001 From: Pedro Pombeiro Date: Mon, 8 Nov 2021 16:11:02 +0100 Subject: [PATCH 05/13] Address MR review comments --- app/models/namespace.rb | 8 ++--- app/models/project.rb | 9 ++--- .../ci/process_namespace_events_worker.rb | 4 ++- .../ci/process_project_events_worker.rb | 4 ++- spec/models/namespace_spec.rb | 33 +++++++++++++++---- spec/models/project_spec.rb | 21 ++++++++++++ spec/requests/api/project_import_spec.rb | 2 +- 7 files changed, 63 insertions(+), 18 deletions(-) diff --git a/app/models/namespace.rb b/app/models/namespace.rb index c1244651f8df6d..e6d6329cec58d2 100644 --- a/app/models/namespace.rb +++ b/app/models/namespace.rb @@ -107,9 +107,9 @@ class Namespace < ApplicationRecord delegate :name, to: :owner, allow_nil: true, prefix: true delegate :avatar_url, to: :owner, allow_nil: true - before_update :save_previous_traversal_ids, if: :parent_changed? - after_update :create_sync_event, if: :saved_change_to_parent_id? - after_create :create_sync_event + before_update :save_previous_traversal_ids, if: -> { :parent_changed? && @previous_traversal_ids.nil? } + after_update :schedule_ci_hierarchy_update, if: :saved_change_to_parent_id? + after_create :schedule_ci_hierarchy_update after_commit :refresh_access_of_projects_invited_groups, on: :update, if: -> { previous_changes.key?('share_with_group_lock') } @@ -626,7 +626,7 @@ def save_previous_traversal_ids @previous_traversal_ids = (previous_namespace&.self_and_ancestor_ids(hierarchy_order: :desc) || []) + [id] end - def create_sync_event + def schedule_ci_hierarchy_update Namespaces::SyncEvent.insert({ traversal_ids: @previous_traversal_ids || [], new_traversal_ids: self_and_ancestor_ids(hierarchy_order: :desc) diff --git a/app/models/project.rb b/app/models/project.rb index 2c6b8167fd4af6..d6ea542df3f0b6 100644 --- a/app/models/project.rb +++ b/app/models/project.rb @@ -101,9 +101,9 @@ class Project < ApplicationRecord after_save :update_project_statistics, if: :saved_change_to_namespace_id? - before_update :save_previous_traversal_ids, if: :namespace_id_changed? - after_update :create_sync_event, if: :saved_change_to_namespace_id? - after_create :create_sync_event + before_update :save_previous_traversal_ids, if: -> { :namespace_id_changed? && @previous_traversal_ids.nil? } + after_update :schedule_ci_hierarchy_update, if: :saved_change_to_namespace_id? + after_create :schedule_ci_hierarchy_update after_save :create_import_state, if: ->(project) { project.import? && project.import_state.nil? } @@ -2965,12 +2965,13 @@ def save_previous_traversal_ids @previous_traversal_ids = previous_namespace&.self_and_ancestor_ids(hierarchy_order: :desc) end - def create_sync_event + def schedule_ci_hierarchy_update Projects::SyncEvent.insert({ project_id: id, traversal_ids: @previous_traversal_ids || [], new_traversal_ids: namespace.self_and_ancestor_ids(hierarchy_order: :desc) }) + @previous_traversal_ids = nil run_after_commit do Ci::ProcessProjectEventsWorker.perform_async diff --git a/app/workers/ci/process_namespace_events_worker.rb b/app/workers/ci/process_namespace_events_worker.rb index fac9e865f42127..b9ae0d2ef138c5 100644 --- a/app/workers/ci/process_namespace_events_worker.rb +++ b/app/workers/ci/process_namespace_events_worker.rb @@ -11,9 +11,11 @@ class ProcessNamespaceEventsWorker deduplicate :until_executed idempotent! + BATCH_SIZE = 100 + def perform # rubocop: disable CodeReuse/ActiveRecord - Namespaces::SyncEvent.find_in_batches(batch_size: 100) do |batch| + Namespaces::SyncEvent.find_in_batches(batch_size: BATCH_SIZE) do |batch| batch.each { |evt| sync_namespace_traversal_ids(evt) if evt.new_traversal_ids.present? } select_query = Namespaces::SyncEvent diff --git a/app/workers/ci/process_project_events_worker.rb b/app/workers/ci/process_project_events_worker.rb index d411de6ab1eb6d..2e91a56a50d855 100644 --- a/app/workers/ci/process_project_events_worker.rb +++ b/app/workers/ci/process_project_events_worker.rb @@ -11,9 +11,11 @@ class ProcessProjectEventsWorker deduplicate :until_executed idempotent! + BATCH_SIZE = 100 + def perform # rubocop: disable CodeReuse/ActiveRecord - Projects::SyncEvent.find_in_batches(batch_size: 100) do |batch| + Projects::SyncEvent.find_in_batches(batch_size: BATCH_SIZE) do |batch| batch.each { |evt| sync_project_traversal_ids(evt) if evt.new_traversal_ids.present? } select_query = Projects::SyncEvent diff --git a/spec/models/namespace_spec.rb b/spec/models/namespace_spec.rb index 86d4728923ac93..ae397156ed6597 100644 --- a/spec/models/namespace_spec.rb +++ b/spec/models/namespace_spec.rb @@ -2071,6 +2071,7 @@ def project_rugged(project) subject { child_group } let_it_be(:parent_group) { create(:group) } + let_it_be(:parent_group2) { create(:group) } let(:child_group) { create(:group, parent: parent_group) } @@ -2087,27 +2088,45 @@ def project_rugged(project) end context 'when update to new parent is saved' do - before do - subject.parent = nil - subject.save! - end + let(:sync_events) { Namespaces::SyncEvent.all.to_a.select { |r| r.traversal_ids.include?(subject.id) } } it 'creates a matching namespaces_sync_event record' do - sync_events = Namespaces::SyncEvent.where(new_traversal_ids: [subject.id]) + subject.parent = parent_group2 + subject.save! expect(sync_events).to contain_exactly( an_object_having_attributes( traversal_ids: [parent_group.id, subject.id], - new_traversal_ids: [subject.id] + new_traversal_ids: [parent_group2.id, subject.id] ) ) end end + context 'when update to new parent is saved twice in same transaction' do + let(:sync_events) { Namespaces::SyncEvent.all.to_a.select { |r| r.traversal_ids.include?(subject.id) } } + + it 'creates two namespaces_sync_event records' do + intermediate_group = create(:group) + Namespace.transaction do + subject.parent = intermediate_group + subject.save! + + subject.parent = parent_group2 + subject.save! + end + + expect(sync_events).to match([ + an_object_having_attributes(traversal_ids: [parent_group.id, subject.id], new_traversal_ids: [intermediate_group.id, subject.id]), + an_object_having_attributes(traversal_ids: [intermediate_group.id, subject.id], new_traversal_ids: [parent_group2.id, subject.id]) + ]) + end + end + specify 'update kicks off ProcessNamespaceEventsWorker' do expect(Ci::ProcessNamespaceEventsWorker).to receive(:perform_async).twice - subject.parent = nil + subject.parent = parent_group2 subject.save! end end diff --git a/spec/models/project_spec.rb b/spec/models/project_spec.rb index 46676f9c1a92b8..3222985a76688d 100644 --- a/spec/models/project_spec.rb +++ b/spec/models/project_spec.rb @@ -7416,6 +7416,27 @@ def has_external_wiki ) end + context 'twice in the same transaction' do + let(:sync_events) { Projects::SyncEvent.where(project_id: subject.id) } + + it 'creates two update projects_sync_event records' do + intermediate_namespace = create(:namespace) + Project.transaction do + subject.namespace = intermediate_namespace + subject.save! + + subject.namespace = parent_namespace + subject.save! + end + + expect(sync_events).to match([ + an_object_having_attributes(project_id: subject.id, traversal_ids: [], new_traversal_ids: [original_namespace.id]), + an_object_having_attributes(project_id: subject.id, traversal_ids: [original_namespace.id], new_traversal_ids: [intermediate_namespace.id]), + an_object_having_attributes(project_id: subject.id, traversal_ids: [intermediate_namespace.id], new_traversal_ids: [parent_namespace.id]) + ]) + end + end + it 'kicks off ProcessProjectEventsWorker' do expect(Ci::ProcessProjectEventsWorker).to receive(:perform_async) diff --git a/spec/requests/api/project_import_spec.rb b/spec/requests/api/project_import_spec.rb index 097d374640c66b..3ed08afd57db4d 100644 --- a/spec/requests/api/project_import_spec.rb +++ b/spec/requests/api/project_import_spec.rb @@ -47,7 +47,7 @@ it 'executes a limited number of queries' do control_count = ActiveRecord::QueryRecorder.new { subject }.count - expect(control_count).to be <= 101 + expect(control_count).to be <= 104 end it 'schedules an import using a namespace' do -- GitLab From 936c0796ab66165f72374ddd055b7f8d61a862b4 Mon Sep 17 00:00:00 2001 From: Pedro Pombeiro Date: Mon, 8 Nov 2021 19:02:41 +0100 Subject: [PATCH 06/13] Add namespace_id field to Namespaces::SyncEvent --- app/models/namespace.rb | 1 + db/migrate/20211011140932_create_namespaces_sync_events.rb | 1 + db/structure.sql | 6 ++++++ spec/workers/ci/process_namespace_events_worker_spec.rb | 6 +++--- 4 files changed, 11 insertions(+), 3 deletions(-) diff --git a/app/models/namespace.rb b/app/models/namespace.rb index e6d6329cec58d2..945c56093e7789 100644 --- a/app/models/namespace.rb +++ b/app/models/namespace.rb @@ -628,6 +628,7 @@ def save_previous_traversal_ids def schedule_ci_hierarchy_update Namespaces::SyncEvent.insert({ + namespace_id: id, traversal_ids: @previous_traversal_ids || [], new_traversal_ids: self_and_ancestor_ids(hierarchy_order: :desc) }) diff --git a/db/migrate/20211011140932_create_namespaces_sync_events.rb b/db/migrate/20211011140932_create_namespaces_sync_events.rb index 1f26d0db906667..bf2dbb41623bd9 100644 --- a/db/migrate/20211011140932_create_namespaces_sync_events.rb +++ b/db/migrate/20211011140932_create_namespaces_sync_events.rb @@ -3,6 +3,7 @@ class CreateNamespacesSyncEvents < Gitlab::Database::Migration[1.0] def change create_table :namespaces_sync_events do |t| + t.references :namespace, null: false, index: true, foreign_key: { on_delete: :cascade } t.integer :traversal_ids, array: true, default: [], null: false t.integer :new_traversal_ids, array: true, default: [], null: false end diff --git a/db/structure.sql b/db/structure.sql index 447fcecae7a726..12988dec77e7dd 100644 --- a/db/structure.sql +++ b/db/structure.sql @@ -16395,6 +16395,7 @@ ALTER SEQUENCE namespaces_id_seq OWNED BY namespaces.id; CREATE TABLE namespaces_sync_events ( id bigint NOT NULL, + namespace_id bigint NOT NULL, traversal_ids integer[] DEFAULT '{}'::integer[] NOT NULL, new_traversal_ids integer[] DEFAULT '{}'::integer[] NOT NULL ); @@ -26595,6 +26596,8 @@ CREATE INDEX index_namespaces_on_type_and_id ON namespaces USING btree (type, id CREATE INDEX index_namespaces_public_groups_name_id ON namespaces USING btree (name, id) WHERE (((type)::text = 'Group'::text) AND (visibility_level = 20)); +CREATE INDEX index_namespaces_sync_events_on_namespace_id ON namespaces_sync_events USING btree (namespace_id); + CREATE INDEX index_non_requested_project_members_on_source_id_and_type ON members USING btree (source_id, source_type) WHERE ((requested_at IS NULL) AND ((type)::text = 'ProjectMember'::text)); CREATE UNIQUE INDEX index_note_diff_files_on_diff_note_id ON note_diff_files USING btree (diff_note_id); @@ -30653,6 +30656,9 @@ ALTER TABLE ONLY gpg_keys ALTER TABLE ONLY analytics_language_trend_repository_languages ADD CONSTRAINT fk_rails_9d851d566c FOREIGN KEY (programming_language_id) REFERENCES programming_languages(id) ON DELETE CASCADE; +ALTER TABLE ONLY namespaces_sync_events + ADD CONSTRAINT fk_rails_9da32a0431 FOREIGN KEY (namespace_id) REFERENCES namespaces(id) ON DELETE CASCADE; + ALTER TABLE ONLY badges ADD CONSTRAINT fk_rails_9df4a56538 FOREIGN KEY (group_id) REFERENCES namespaces(id) ON DELETE CASCADE; diff --git a/spec/workers/ci/process_namespace_events_worker_spec.rb b/spec/workers/ci/process_namespace_events_worker_spec.rb index 5c613ca8d5ec11..9f9837d7d3fc37 100644 --- a/spec/workers/ci/process_namespace_events_worker_spec.rb +++ b/spec/workers/ci/process_namespace_events_worker_spec.rb @@ -14,9 +14,9 @@ let(:expected_initial_sync_event_count) { 3 } let(:new_sync_events) do [ - { traversal_ids: [group.id], new_traversal_ids: [parent_group_1.id, group.id] }, - { traversal_ids: [parent_group_1.id, group.id], new_traversal_ids: [parent_group_2.id, group.id] }, - { traversal_ids: [parent_group_2.id], new_traversal_ids: [parent_group_1.id, parent_group_2.id] } + { namespace_id: group.id, traversal_ids: [group.id], new_traversal_ids: [parent_group_1.id, group.id] }, + { namespace_id: group.id, traversal_ids: [parent_group_1.id, group.id], new_traversal_ids: [parent_group_2.id, group.id] }, + { namespace_id: parent_group_2.id, traversal_ids: [parent_group_2.id], new_traversal_ids: [parent_group_1.id, parent_group_2.id] } ] end -- GitLab From e8f0d1efd929c82d67527a08846a5d1e57311035 Mon Sep 17 00:00:00 2001 From: Pedro Pombeiro Date: Tue, 9 Nov 2021 14:48:27 +0100 Subject: [PATCH 07/13] Rename worker classes --- app/models/namespace.rb | 2 +- app/models/project.rb | 2 +- app/workers/all_queues.yml | 8 ++++---- ..._worker.rb => process_namespace_sync_events_worker.rb} | 2 +- ...ts_worker.rb => process_project_sync_events_worker.rb} | 2 +- config/sidekiq_queues.yml | 4 ++-- spec/models/namespace_spec.rb | 4 ++-- spec/models/project_spec.rb | 4 ++-- ...ec.rb => process_namespace_sync_events_worker_spec.rb} | 4 ++-- ...spec.rb => process_project_sync_events_worker_spec.rb} | 2 +- 10 files changed, 17 insertions(+), 17 deletions(-) rename app/workers/ci/{process_namespace_events_worker.rb => process_namespace_sync_events_worker.rb} (96%) rename app/workers/ci/{process_project_events_worker.rb => process_project_sync_events_worker.rb} (96%) rename spec/workers/ci/{process_namespace_events_worker_spec.rb => process_namespace_sync_events_worker_spec.rb} (93%) rename spec/workers/ci/{process_project_events_worker_spec.rb => process_project_sync_events_worker_spec.rb} (97%) diff --git a/app/models/namespace.rb b/app/models/namespace.rb index 945c56093e7789..a793f436b9c8a1 100644 --- a/app/models/namespace.rb +++ b/app/models/namespace.rb @@ -635,7 +635,7 @@ def schedule_ci_hierarchy_update @previous_traversal_ids = nil run_after_commit do - Ci::ProcessNamespaceEventsWorker.perform_async + Ci::ProcessNamespaceSyncEventsWorker.perform_async end end end diff --git a/app/models/project.rb b/app/models/project.rb index d6ea542df3f0b6..0684fd7d894f77 100644 --- a/app/models/project.rb +++ b/app/models/project.rb @@ -2974,7 +2974,7 @@ def schedule_ci_hierarchy_update @previous_traversal_ids = nil run_after_commit do - Ci::ProcessProjectEventsWorker.perform_async + Ci::ProcessProjectSyncEventsWorker.perform_async end end end diff --git a/app/workers/all_queues.yml b/app/workers/all_queues.yml index 3e25cf0dcf05db..f8fef9061e52e9 100644 --- a/app/workers/all_queues.yml +++ b/app/workers/all_queues.yml @@ -1951,8 +1951,8 @@ :weight: 1 :idempotent: true :tags: [] -- :name: ci_process_namespace_events - :worker_name: Ci::ProcessNamespaceEventsWorker +- :name: ci_process_namespace_sync_events + :worker_name: Ci::ProcessNamespaceSyncEventsWorker :feature_category: :subgroups :has_external_dependencies: :urgency: :low @@ -1960,8 +1960,8 @@ :weight: 1 :idempotent: true :tags: [] -- :name: ci_process_project_events - :worker_name: Ci::ProcessProjectEventsWorker +- :name: ci_process_project_sync_events + :worker_name: Ci::ProcessProjectSyncEventsWorker :feature_category: :subgroups :has_external_dependencies: :urgency: :low diff --git a/app/workers/ci/process_namespace_events_worker.rb b/app/workers/ci/process_namespace_sync_events_worker.rb similarity index 96% rename from app/workers/ci/process_namespace_events_worker.rb rename to app/workers/ci/process_namespace_sync_events_worker.rb index b9ae0d2ef138c5..01f9a62489c648 100644 --- a/app/workers/ci/process_namespace_events_worker.rb +++ b/app/workers/ci/process_namespace_sync_events_worker.rb @@ -1,7 +1,7 @@ # frozen_string_literal: true module Ci - class ProcessNamespaceEventsWorker + class ProcessNamespaceSyncEventsWorker include ApplicationWorker data_consistency :always diff --git a/app/workers/ci/process_project_events_worker.rb b/app/workers/ci/process_project_sync_events_worker.rb similarity index 96% rename from app/workers/ci/process_project_events_worker.rb rename to app/workers/ci/process_project_sync_events_worker.rb index 2e91a56a50d855..e1ecc0b97c8024 100644 --- a/app/workers/ci/process_project_events_worker.rb +++ b/app/workers/ci/process_project_sync_events_worker.rb @@ -1,7 +1,7 @@ # frozen_string_literal: true module Ci - class ProcessProjectEventsWorker + class ProcessProjectSyncEventsWorker include ApplicationWorker data_consistency :always diff --git a/config/sidekiq_queues.yml b/config/sidekiq_queues.yml index e1591f898af1e1..77c901c3d09475 100644 --- a/config/sidekiq_queues.yml +++ b/config/sidekiq_queues.yml @@ -73,9 +73,9 @@ - 1 - - ci_delete_objects - 1 -- - ci_process_namespace_events +- - ci_process_namespace_sync_events - 1 -- - ci_process_project_events +- - ci_process_project_sync_events - 1 - - container_repository - 1 diff --git a/spec/models/namespace_spec.rb b/spec/models/namespace_spec.rb index ae397156ed6597..1d94c1a7f33765 100644 --- a/spec/models/namespace_spec.rb +++ b/spec/models/namespace_spec.rb @@ -2123,8 +2123,8 @@ def project_rugged(project) end end - specify 'update kicks off ProcessNamespaceEventsWorker' do - expect(Ci::ProcessNamespaceEventsWorker).to receive(:perform_async).twice + specify 'update kicks off ProcessNamespaceSyncEventsWorker' do + expect(Ci::ProcessNamespaceSyncEventsWorker).to receive(:perform_async).twice subject.parent = parent_group2 subject.save! diff --git a/spec/models/project_spec.rb b/spec/models/project_spec.rb index 3222985a76688d..af1d149364485e 100644 --- a/spec/models/project_spec.rb +++ b/spec/models/project_spec.rb @@ -7437,8 +7437,8 @@ def has_external_wiki end end - it 'kicks off ProcessProjectEventsWorker' do - expect(Ci::ProcessProjectEventsWorker).to receive(:perform_async) + it 'kicks off ProcessProjectSyncEventsWorker' do + expect(Ci::ProcessProjectSyncEventsWorker).to receive(:perform_async) subject.namespace = parent_namespace subject.save! diff --git a/spec/workers/ci/process_namespace_events_worker_spec.rb b/spec/workers/ci/process_namespace_sync_events_worker_spec.rb similarity index 93% rename from spec/workers/ci/process_namespace_events_worker_spec.rb rename to spec/workers/ci/process_namespace_sync_events_worker_spec.rb index 9f9837d7d3fc37..3490f1b84f326b 100644 --- a/spec/workers/ci/process_namespace_events_worker_spec.rb +++ b/spec/workers/ci/process_namespace_sync_events_worker_spec.rb @@ -2,11 +2,11 @@ require 'spec_helper' -RSpec.describe Ci::ProcessNamespaceEventsWorker do +RSpec.describe Ci::ProcessNamespaceSyncEventsWorker do describe '#perform' do subject(:perform) { described_class.new.perform } - context 'on namespace with project changing parent multiple times' do + context 'on namespace with group changing parent multiple times' do let_it_be(:group) { create(:group) } let_it_be(:parent_group_1) { create(:group) } let_it_be(:parent_group_2) { create(:group) } diff --git a/spec/workers/ci/process_project_events_worker_spec.rb b/spec/workers/ci/process_project_sync_events_worker_spec.rb similarity index 97% rename from spec/workers/ci/process_project_events_worker_spec.rb rename to spec/workers/ci/process_project_sync_events_worker_spec.rb index 0087dacca763af..56ee23828ea4bd 100644 --- a/spec/workers/ci/process_project_events_worker_spec.rb +++ b/spec/workers/ci/process_project_sync_events_worker_spec.rb @@ -2,7 +2,7 @@ require 'spec_helper' -RSpec.describe Ci::ProcessProjectEventsWorker do +RSpec.describe Ci::ProcessProjectSyncEventsWorker do describe '#perform' do subject(:perform) { described_class.new.perform } -- GitLab From a82c8001fa2773053104e7b2b57170cc4a4a5ccf Mon Sep 17 00:00:00 2001 From: Pedro Pombeiro Date: Tue, 9 Nov 2021 16:13:41 +0100 Subject: [PATCH 08/13] Address MR review comments --- app/models/ci/namespace_hierarchy.rb | 9 ++++++ app/models/ci/project_hierarchy.rb | 8 +++++ .../process_hierarchy_sync_events_service.rb | 27 +++++++++++++++++ .../process_namespace_sync_events_worker.rb | 30 +------------------ .../ci/process_project_sync_events_worker.rb | 29 +----------------- 5 files changed, 46 insertions(+), 57 deletions(-) create mode 100644 app/services/ci/process_hierarchy_sync_events_service.rb diff --git a/app/models/ci/namespace_hierarchy.rb b/app/models/ci/namespace_hierarchy.rb index de6b0a91a14855..945bf0a8f5619b 100644 --- a/app/models/ci/namespace_hierarchy.rb +++ b/app/models/ci/namespace_hierarchy.rb @@ -10,6 +10,15 @@ def update_traversal_ids(old_self_and_ancestor_ids, new_self_and_ancestor_ids) where('traversal_ids @> ARRAY[?]::int[]', old_self_and_ancestor_ids) .update_all("traversal_ids = ARRAY#{new_self_and_ancestor_ids}::int[] || traversal_ids[#{old_self_and_ancestor_ids.length + 1}:]") end + + def sync_traversal_ids(event) + if event.traversal_ids.present? + update_traversal_ids(event.traversal_ids, event.new_traversal_ids) + ::Ci::ProjectHierarchy.update_namespace_traversal_ids(event.traversal_ids, event.new_traversal_ids) + else + create!(namespace_id: event.namespace_id, traversal_ids: event.new_traversal_ids) + end + end end end end diff --git a/app/models/ci/project_hierarchy.rb b/app/models/ci/project_hierarchy.rb index c9b235a5da3d4e..49ab42e70d9953 100644 --- a/app/models/ci/project_hierarchy.rb +++ b/app/models/ci/project_hierarchy.rb @@ -15,6 +15,14 @@ def update_namespace_traversal_ids(old_ancestor_ids, new_ancestor_ids) update_all_traversal_ids(where('traversal_ids @> ARRAY[?]::int[]', old_ancestor_ids), old_ancestor_ids, new_ancestor_ids) end + def sync_traversal_ids(event) + if event.traversal_ids.present? + update_traversal_ids(event.project_id, event.new_traversal_ids) + else + create!(project_id: event.project_id, traversal_ids: event.new_traversal_ids) + end + end + private def update_all_traversal_ids(assoc, old_ancestor_ids, new_ancestor_ids) diff --git a/app/services/ci/process_hierarchy_sync_events_service.rb b/app/services/ci/process_hierarchy_sync_events_service.rb new file mode 100644 index 00000000000000..05f48411aecd06 --- /dev/null +++ b/app/services/ci/process_hierarchy_sync_events_service.rb @@ -0,0 +1,27 @@ +# frozen_string_literal: true + +module Ci + class ProcessHierarchySyncEventsService + BATCH_SIZE = 100 + + def initialize(sync_event_class, hierarchy_class) + @sync_event_class = sync_event_class + @hierarchy_class = hierarchy_class + end + + def execute + # rubocop: disable CodeReuse/ActiveRecord + @sync_event_class.find_in_batches(batch_size: BATCH_SIZE) do |batch| + batch.each { |evt| @hierarchy_class.sync_traversal_ids(evt) if evt.new_traversal_ids.present? } + + select_query = @sync_event_class + .id_in(batch) + .lock!('FOR UPDATE SKIP LOCKED') + .select(:id) + + @sync_event_class.where(id: select_query).delete_all + end + # rubocop: enable CodeReuse/ActiveRecord + end + end +end diff --git a/app/workers/ci/process_namespace_sync_events_worker.rb b/app/workers/ci/process_namespace_sync_events_worker.rb index 01f9a62489c648..0eedab208bdfa2 100644 --- a/app/workers/ci/process_namespace_sync_events_worker.rb +++ b/app/workers/ci/process_namespace_sync_events_worker.rb @@ -11,36 +11,8 @@ class ProcessNamespaceSyncEventsWorker deduplicate :until_executed idempotent! - BATCH_SIZE = 100 - def perform - # rubocop: disable CodeReuse/ActiveRecord - Namespaces::SyncEvent.find_in_batches(batch_size: BATCH_SIZE) do |batch| - batch.each { |evt| sync_namespace_traversal_ids(evt) if evt.new_traversal_ids.present? } - - select_query = Namespaces::SyncEvent - .id_in(batch) - .select(:id) - .lock!('FOR UPDATE SKIP LOCKED') - .to_sql - - Namespaces::SyncEvent.connection.execute("DELETE FROM #{Namespaces::SyncEvent.quoted_table_name} WHERE id IN (#{select_query})") - end - # rubocop: enable CodeReuse/ActiveRecord - end - - private - - def sync_namespace_traversal_ids(event) - if event.traversal_ids.present? - ::Ci::NamespaceHierarchy.update_traversal_ids(event.traversal_ids, event.new_traversal_ids) - ::Ci::ProjectHierarchy.update_namespace_traversal_ids(event.traversal_ids, event.new_traversal_ids) - else - ::Ci::NamespaceHierarchy.create!( - namespace_id: event.new_traversal_ids.last, - traversal_ids: event.new_traversal_ids - ) - end + Ci::ProcessHierarchySyncEventsService.new(Namespaces::SyncEvent, ::Ci::NamespaceHierarchy).execute end end end diff --git a/app/workers/ci/process_project_sync_events_worker.rb b/app/workers/ci/process_project_sync_events_worker.rb index e1ecc0b97c8024..1e40e1ae343967 100644 --- a/app/workers/ci/process_project_sync_events_worker.rb +++ b/app/workers/ci/process_project_sync_events_worker.rb @@ -11,35 +11,8 @@ class ProcessProjectSyncEventsWorker deduplicate :until_executed idempotent! - BATCH_SIZE = 100 - def perform - # rubocop: disable CodeReuse/ActiveRecord - Projects::SyncEvent.find_in_batches(batch_size: BATCH_SIZE) do |batch| - batch.each { |evt| sync_project_traversal_ids(evt) if evt.new_traversal_ids.present? } - - select_query = Projects::SyncEvent - .id_in(batch) - .select(:id) - .lock!('FOR UPDATE SKIP LOCKED') - .to_sql - - Projects::SyncEvent.connection.execute("DELETE FROM #{Projects::SyncEvent.quoted_table_name} WHERE id IN (#{select_query})") - end - # rubocop: enable CodeReuse/ActiveRecord - end - - private - - def sync_project_traversal_ids(event) - if event.traversal_ids.present? - ::Ci::ProjectHierarchy.update_traversal_ids( - event.project_id, - event.new_traversal_ids - ) - else - ::Ci::ProjectHierarchy.create!(project_id: event.project_id, traversal_ids: event.new_traversal_ids) - end + Ci::ProcessHierarchySyncEventsService.new(Projects::SyncEvent, ::Ci::ProjectHierarchy).execute end end end -- GitLab From d35fae730f4234b2673882dc68547661d962912c Mon Sep 17 00:00:00 2001 From: Pedro Pombeiro Date: Wed, 10 Nov 2021 15:00:06 +0100 Subject: [PATCH 09/13] Address database MR review comments --- .../20211011141240_add_ci_namespace_hierarchies.rb | 2 +- .../20211011141241_add_ci_project_hierarchies.rb | 2 +- .../background_migration/backfill_namespace_hierarchies.rb | 7 +++---- .../background_migration/backfill_project_hierarchies.rb | 7 ++----- 4 files changed, 7 insertions(+), 11 deletions(-) diff --git a/db/post_migrate/20211011141240_add_ci_namespace_hierarchies.rb b/db/post_migrate/20211011141240_add_ci_namespace_hierarchies.rb index 312ed177acfe33..54acc0fe9d335d 100644 --- a/db/post_migrate/20211011141240_add_ci_namespace_hierarchies.rb +++ b/db/post_migrate/20211011141240_add_ci_namespace_hierarchies.rb @@ -23,6 +23,6 @@ def up end def down - execute('DELETE FROM ci_namespace_hierarchies') + # no-op end end diff --git a/db/post_migrate/20211011141241_add_ci_project_hierarchies.rb b/db/post_migrate/20211011141241_add_ci_project_hierarchies.rb index 11d4d1e8434ffa..c37e15d8f968c7 100644 --- a/db/post_migrate/20211011141241_add_ci_project_hierarchies.rb +++ b/db/post_migrate/20211011141241_add_ci_project_hierarchies.rb @@ -23,6 +23,6 @@ def up end def down - execute('DELETE FROM ci_project_hierarchies') + # no-op end end diff --git a/lib/gitlab/background_migration/backfill_namespace_hierarchies.rb b/lib/gitlab/background_migration/backfill_namespace_hierarchies.rb index bbb2e1cca1ad7d..4bb44d76875666 100644 --- a/lib/gitlab/background_migration/backfill_namespace_hierarchies.rb +++ b/lib/gitlab/background_migration/backfill_namespace_hierarchies.rb @@ -13,8 +13,7 @@ class Namespace < ActiveRecord::Base self.inheritance_column = nil scope :base_query, -> do - where.not(id: ::Ci::NamespaceHierarchy.select(:namespace_id)) - .allow_cross_joins_across_databases(url: '') + select(:id, :parent_id) end end @@ -28,8 +27,8 @@ def perform(start_id, end_id, sub_batch_size) update_sql = <<~SQL INSERT INTO ci_namespace_hierarchies (namespace_id, traversal_ids) - #{calculated_traversal_ids(ranged_query)} - -- ON CONFLICT (namespace_id) DO NOTHING + #{calculated_traversal_ids(ranged_query.allow_cross_joins_across_databases(url: ''))} + ON CONFLICT (namespace_id) DO NOTHING SQL ActiveRecord::Base.connection.execute(update_sql) diff --git a/lib/gitlab/background_migration/backfill_project_hierarchies.rb b/lib/gitlab/background_migration/backfill_project_hierarchies.rb index 6285a10e711603..e58f708f61532d 100644 --- a/lib/gitlab/background_migration/backfill_project_hierarchies.rb +++ b/lib/gitlab/background_migration/backfill_project_hierarchies.rb @@ -14,10 +14,7 @@ class Project < ActiveRecord::Base self.table_name = 'projects' scope :base_query, -> do - where.not(id: ::Ci::ProjectHierarchy.select(:project_id)) - .joins(:namespace) - .select(:id, 'namespaces.id AS parent_id') - .allow_cross_joins_across_databases(url: '') + joins(:namespace).select(:id, 'namespaces.id AS parent_id') end end @@ -31,7 +28,7 @@ def perform(start_id, end_id, sub_batch_size) update_sql = <<~SQL INSERT INTO ci_project_hierarchies (project_id, traversal_ids) - #{calculated_traversal_ids(ranged_query)} + #{calculated_traversal_ids(ranged_query.allow_cross_joins_across_databases(url: ''))} ON CONFLICT (project_id) DO NOTHING SQL ActiveRecord::Base.connection.execute(update_sql) -- GitLab From ad5272079a54a53d691d83d1bf29257501fcc4ff Mon Sep 17 00:00:00 2001 From: Pedro Pombeiro Date: Wed, 10 Nov 2021 17:18:28 +0100 Subject: [PATCH 10/13] Address MR review comments --- app/models/ci/project_hierarchy.rb | 6 +++--- app/models/namespace.rb | 20 ++++++++++---------- app/models/namespaces/sync_event.rb | 2 +- app/models/project.rb | 20 ++++++++++---------- app/models/projects/sync_event.rb | 2 +- spec/models/namespace_spec.rb | 4 ++-- 6 files changed, 27 insertions(+), 27 deletions(-) diff --git a/app/models/ci/project_hierarchy.rb b/app/models/ci/project_hierarchy.rb index 49ab42e70d9953..0d6c1ae6806825 100644 --- a/app/models/ci/project_hierarchy.rb +++ b/app/models/ci/project_hierarchy.rb @@ -8,7 +8,7 @@ class ProjectHierarchy < ApplicationRecord class << self def update_traversal_ids(project_id, new_ancestor_ids) - where(project_id: project_id).update_all("traversal_ids = ARRAY#{new_ancestor_ids}::int[]") + where(project_id: project_id).update_all(traversal_ids: new_ancestor_ids) end def update_namespace_traversal_ids(old_ancestor_ids, new_ancestor_ids) @@ -25,9 +25,9 @@ def sync_traversal_ids(event) private - def update_all_traversal_ids(assoc, old_ancestor_ids, new_ancestor_ids) + def update_all_traversal_ids(subject, old_ancestor_ids, new_ancestor_ids) # Update the traversal IDs with the new_ancestor_ids plus any descendants in the record - assoc.update_all("traversal_ids = ARRAY#{new_ancestor_ids}::int[] || traversal_ids[#{old_ancestor_ids.length + 1}:]") + subject.update_all("traversal_ids = ARRAY#{new_ancestor_ids}::int[] || traversal_ids[#{old_ancestor_ids.length + 1}:]") end end end diff --git a/app/models/namespace.rb b/app/models/namespace.rb index a793f436b9c8a1..d7ade8791b714a 100644 --- a/app/models/namespace.rb +++ b/app/models/namespace.rb @@ -107,9 +107,7 @@ class Namespace < ApplicationRecord delegate :name, to: :owner, allow_nil: true, prefix: true delegate :avatar_url, to: :owner, allow_nil: true - before_update :save_previous_traversal_ids, if: -> { :parent_changed? && @previous_traversal_ids.nil? } - after_update :schedule_ci_hierarchy_update, if: :saved_change_to_parent_id? - after_create :schedule_ci_hierarchy_update + after_save :schedule_ci_hierarchy_update, if: -> { saved_change_to_id? || saved_change_to_parent_id? } after_commit :refresh_access_of_projects_invited_groups, on: :update, if: -> { previous_changes.key?('share_with_group_lock') } @@ -621,18 +619,20 @@ def enforce_minimum_path_length? path_changed? && !project_namespace? end - def save_previous_traversal_ids - previous_namespace = Namespace.find(parent_id_was) if parent_id_was - @previous_traversal_ids = (previous_namespace&.self_and_ancestor_ids(hierarchy_order: :desc) || []) + [id] - end - def schedule_ci_hierarchy_update + previous_namespace_ids = + if previously_new_record? + [] + else + previous_namespace = Namespace.find(parent_id_previously_was) if parent_id_previously_was + (previous_namespace&.self_and_ancestor_ids(hierarchy_order: :desc) || []) + [id] + end + Namespaces::SyncEvent.insert({ namespace_id: id, - traversal_ids: @previous_traversal_ids || [], + traversal_ids: previous_namespace_ids, new_traversal_ids: self_and_ancestor_ids(hierarchy_order: :desc) }) - @previous_traversal_ids = nil run_after_commit do Ci::ProcessNamespaceSyncEventsWorker.perform_async diff --git a/app/models/namespaces/sync_event.rb b/app/models/namespaces/sync_event.rb index 89d1b4c36a7ec6..5acd4410a89f6a 100644 --- a/app/models/namespaces/sync_event.rb +++ b/app/models/namespaces/sync_event.rb @@ -3,5 +3,5 @@ # This model serves to keep track of changes to the namespaces table in the main database, and allowing to safely # replicate these changes to other databases. class Namespaces::SyncEvent < ApplicationRecord - self.table_name_prefix = 'namespaces_' + self.table_name = 'namespaces_sync_events' end diff --git a/app/models/project.rb b/app/models/project.rb index 0684fd7d894f77..a6503ad1ff7a19 100644 --- a/app/models/project.rb +++ b/app/models/project.rb @@ -101,9 +101,7 @@ class Project < ApplicationRecord after_save :update_project_statistics, if: :saved_change_to_namespace_id? - before_update :save_previous_traversal_ids, if: -> { :namespace_id_changed? && @previous_traversal_ids.nil? } - after_update :schedule_ci_hierarchy_update, if: :saved_change_to_namespace_id? - after_create :schedule_ci_hierarchy_update + after_save :schedule_ci_hierarchy_update, if: -> { saved_change_to_id? || saved_change_to_namespace_id? } after_save :create_import_state, if: ->(project) { project.import? && project.import_state.nil? } @@ -2960,18 +2958,20 @@ def sync_attributes(project_namespace) project_namespace.visibility_level = visibility_level end - def save_previous_traversal_ids - previous_namespace = Namespace.find_by(id: namespace_id_was) if changes.include?(:namespace_id) - @previous_traversal_ids = previous_namespace&.self_and_ancestor_ids(hierarchy_order: :desc) - end - def schedule_ci_hierarchy_update + previous_namespace_ids = + if previously_new_record? + [] + else + previous_namespace = Namespace.find(namespace_id_previously_was) if namespace_id_previously_was + previous_namespace&.self_and_ancestor_ids(hierarchy_order: :desc) || [] + end + Projects::SyncEvent.insert({ project_id: id, - traversal_ids: @previous_traversal_ids || [], + traversal_ids: previous_namespace_ids, new_traversal_ids: namespace.self_and_ancestor_ids(hierarchy_order: :desc) }) - @previous_traversal_ids = nil run_after_commit do Ci::ProcessProjectSyncEventsWorker.perform_async diff --git a/app/models/projects/sync_event.rb b/app/models/projects/sync_event.rb index 55e3bc36c350e0..548710fe255d01 100644 --- a/app/models/projects/sync_event.rb +++ b/app/models/projects/sync_event.rb @@ -3,5 +3,5 @@ # This model serves to keep track of changes to the namespaces table in the main database as they relate to projects, # allowing to safely replicate changes to other databases. class Projects::SyncEvent < ApplicationRecord - self.table_name_prefix = 'projects_' + self.table_name = 'projects_sync_events' end diff --git a/spec/models/namespace_spec.rb b/spec/models/namespace_spec.rb index 1d94c1a7f33765..e6d96fa7b43e8c 100644 --- a/spec/models/namespace_spec.rb +++ b/spec/models/namespace_spec.rb @@ -2076,13 +2076,13 @@ def project_rugged(project) let(:child_group) { create(:group, parent: parent_group) } it 'creates a matching namespaces_sync_event record for record without parent' do - expect(Namespaces::SyncEvent.where(new_traversal_ids: [parent_group.id])).to contain_exactly( + expect(Namespaces::SyncEvent.where(namespace_id: parent_group.id)).to contain_exactly( an_object_having_attributes( traversal_ids: [] ) ) end it 'creates a matching namespaces_sync_event record for record with parent' do - expect(Namespaces::SyncEvent.where(new_traversal_ids: [parent_group.id, child_group.id])).to contain_exactly( + expect(Namespaces::SyncEvent.where(namespace_id: child_group.id)).to contain_exactly( an_object_having_attributes( traversal_ids: [] ) ) end -- GitLab From 12ff119d76c411511c65eee61d8a6e29fcab5674 Mon Sep 17 00:00:00 2001 From: Pedro Pombeiro Date: Thu, 11 Nov 2021 13:15:49 +0100 Subject: [PATCH 11/13] Simplify tests --- spec/models/namespace_spec.rb | 13 ++++--------- spec/models/project_spec.rb | 13 ++++--------- 2 files changed, 8 insertions(+), 18 deletions(-) diff --git a/spec/models/namespace_spec.rb b/spec/models/namespace_spec.rb index e6d96fa7b43e8c..87c707e2ce64d2 100644 --- a/spec/models/namespace_spec.rb +++ b/spec/models/namespace_spec.rb @@ -2091,8 +2091,7 @@ def project_rugged(project) let(:sync_events) { Namespaces::SyncEvent.all.to_a.select { |r| r.traversal_ids.include?(subject.id) } } it 'creates a matching namespaces_sync_event record' do - subject.parent = parent_group2 - subject.save! + subject.update!(parent_id: parent_group2.id) expect(sync_events).to contain_exactly( an_object_having_attributes( @@ -2109,11 +2108,8 @@ def project_rugged(project) it 'creates two namespaces_sync_event records' do intermediate_group = create(:group) Namespace.transaction do - subject.parent = intermediate_group - subject.save! - - subject.parent = parent_group2 - subject.save! + subject.update!(parent_id: intermediate_group.id) + subject.update!(parent_id: parent_group2.id) end expect(sync_events).to match([ @@ -2126,8 +2122,7 @@ def project_rugged(project) specify 'update kicks off ProcessNamespaceSyncEventsWorker' do expect(Ci::ProcessNamespaceSyncEventsWorker).to receive(:perform_async).twice - subject.parent = parent_group2 - subject.save! + subject.update!(parent_id: parent_group2.id) end end end diff --git a/spec/models/project_spec.rb b/spec/models/project_spec.rb index af1d149364485e..4413a1724813eb 100644 --- a/spec/models/project_spec.rb +++ b/spec/models/project_spec.rb @@ -7403,8 +7403,7 @@ def has_external_wiki project_id: subject.id, traversal_ids: [], new_traversal_ids: [original_namespace.id] }) - subject.namespace = parent_namespace - subject.save! + subject.update!(namespace_id: parent_namespace.id) sync_events = Projects::SyncEvent.where(project_id: subject.id) @@ -7422,11 +7421,8 @@ def has_external_wiki it 'creates two update projects_sync_event records' do intermediate_namespace = create(:namespace) Project.transaction do - subject.namespace = intermediate_namespace - subject.save! - - subject.namespace = parent_namespace - subject.save! + subject.update!(namespace_id: intermediate_namespace.id) + subject.update!(namespace_id: parent_namespace.id) end expect(sync_events).to match([ @@ -7440,8 +7436,7 @@ def has_external_wiki it 'kicks off ProcessProjectSyncEventsWorker' do expect(Ci::ProcessProjectSyncEventsWorker).to receive(:perform_async) - subject.namespace = parent_namespace - subject.save! + subject.update!(namespace_id: parent_namespace.id) end end -- GitLab From c29b7857e5131e3ba412bcf4f9d483d343cf6302 Mon Sep 17 00:00:00 2001 From: Pedro Pombeiro Date: Thu, 11 Nov 2021 17:35:31 +0100 Subject: [PATCH 12/13] Fix tests preventing using Namespace.find --- ee/spec/models/ci/daily_build_group_report_result_spec.rb | 5 +++-- spec/models/ci/runner_spec.rb | 5 +++++ spec/services/notification_service_spec.rb | 1 + 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/ee/spec/models/ci/daily_build_group_report_result_spec.rb b/ee/spec/models/ci/daily_build_group_report_result_spec.rb index d622e92d963907..e647c58516280b 100644 --- a/ee/spec/models/ci/daily_build_group_report_result_spec.rb +++ b/ee/spec/models/ci/daily_build_group_report_result_spec.rb @@ -100,8 +100,9 @@ end context 'when group has projects with several coverage' do - let!(:project_2) { create(:project) } - let!(:group) { create(:group, projects: [project, project_2]) } + let_it_be(:project_2) { create(:project) } + let_it_be(:group) { create(:group, projects: [project, project_2]) } + let!(:coverage_1) { create(:ci_daily_build_group_report_result, project: project) } let!(:coverage_2) { create(:ci_daily_build_group_report_result, project: project_2, group_name: 'karma') } diff --git a/spec/models/ci/runner_spec.rb b/spec/models/ci/runner_spec.rb index 2e79159cc60b85..7aaec55a9f4c30 100644 --- a/spec/models/ci/runner_spec.rb +++ b/spec/models/ci/runner_spec.rb @@ -547,6 +547,11 @@ def stub_redis_runner_contacted_at(value) let(:group) { create(:group, projects: [build.project]) } let(:runner) { create(:ci_runner, :group, tag_list: tag_list, run_untagged: run_untagged, groups: [group]) } + before do + # Ensure the project namespace is reloaded + build.project.reload + end + it 'can handle builds' do expect(runner.can_pick?(build)).to be_truthy end diff --git a/spec/services/notification_service_spec.rb b/spec/services/notification_service_spec.rb index 2295a20cb0d646..e8f14c723aa05d 100644 --- a/spec/services/notification_service_spec.rb +++ b/spec/services/notification_service_spec.rb @@ -3552,6 +3552,7 @@ def build_team(project) # with different notification settings def build_group(project, visibility: :public) group = create_nested_group(visibility) + project.reload project.update!(namespace_id: group.id) # Group member: global=disabled, group=watch -- GitLab From f6bfdbbe210a708a24b508f7c457620238ee709c Mon Sep 17 00:00:00 2001 From: Pedro Pombeiro Date: Thu, 11 Nov 2021 17:46:04 +0100 Subject: [PATCH 13/13] Fix PostgresSQL array generation --- app/models/ci/namespace_hierarchy.rb | 2 +- app/models/ci/project_hierarchy.rb | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/app/models/ci/namespace_hierarchy.rb b/app/models/ci/namespace_hierarchy.rb index 945bf0a8f5619b..14482b9ea5ad8e 100644 --- a/app/models/ci/namespace_hierarchy.rb +++ b/app/models/ci/namespace_hierarchy.rb @@ -8,7 +8,7 @@ class << self def update_traversal_ids(old_self_and_ancestor_ids, new_self_and_ancestor_ids) # Update the traversal IDs with the new_self_and_ancestor_ids plus any descendants in the record where('traversal_ids @> ARRAY[?]::int[]', old_self_and_ancestor_ids) - .update_all("traversal_ids = ARRAY#{new_self_and_ancestor_ids}::int[] || traversal_ids[#{old_self_and_ancestor_ids.length + 1}:]") + .update_all("traversal_ids = ARRAY[#{new_self_and_ancestor_ids.join(',')}]::int[] || traversal_ids[#{old_self_and_ancestor_ids.length + 1}:]") end def sync_traversal_ids(event) diff --git a/app/models/ci/project_hierarchy.rb b/app/models/ci/project_hierarchy.rb index 0d6c1ae6806825..392ef9a0b45cb4 100644 --- a/app/models/ci/project_hierarchy.rb +++ b/app/models/ci/project_hierarchy.rb @@ -27,7 +27,7 @@ def sync_traversal_ids(event) def update_all_traversal_ids(subject, old_ancestor_ids, new_ancestor_ids) # Update the traversal IDs with the new_ancestor_ids plus any descendants in the record - subject.update_all("traversal_ids = ARRAY#{new_ancestor_ids}::int[] || traversal_ids[#{old_ancestor_ids.length + 1}:]") + subject.update_all("traversal_ids = ARRAY[#{new_ancestor_ids.join(',')}]::int[] || traversal_ids[#{old_ancestor_ids.length + 1}:]") end end end -- GitLab