From 81443ac092bf085acc86ca76b11270de60ebe61a Mon Sep 17 00:00:00 2001 From: Ivan Sebastian Date: Mon, 25 Mar 2024 23:58:26 +0700 Subject: [PATCH 1/4] Fix absolute_image_urls for wiki path --- lib/gitlab/hook_data/base_builder.rb | 1 + lib/gitlab/hook_data/wiki_page_builder.rb | 2 +- spec/models/wiki_page_spec.rb | 10 +++++++--- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/lib/gitlab/hook_data/base_builder.rb b/lib/gitlab/hook_data/base_builder.rb index 4a81f6b8a0efeb..f02678b4244cdd 100644 --- a/lib/gitlab/hook_data/base_builder.rb +++ b/lib/gitlab/hook_data/base_builder.rb @@ -43,6 +43,7 @@ def absolute_image_urls(markdown_text) if match[:image] && !match[:https] url = match[:url] url = "#{uploads_prefix}#{url}" if url.start_with?('/uploads') + url = "#{uploads_prefix}/#{url}" if url.start_with?('uploads') # wiki doesn't have a leading slash url = "/#{url}" unless url.start_with?('/') "![#{match[:title]}](#{Gitlab.config.gitlab.url}#{url})" diff --git a/lib/gitlab/hook_data/wiki_page_builder.rb b/lib/gitlab/hook_data/wiki_page_builder.rb index 78ae896a3d8663..6f1a9fbea1eec0 100644 --- a/lib/gitlab/hook_data/wiki_page_builder.rb +++ b/lib/gitlab/hook_data/wiki_page_builder.rb @@ -14,7 +14,7 @@ def build end def uploads_prefix - '' + wiki_page.wiki.wiki_base_path end end end diff --git a/spec/models/wiki_page_spec.rb b/spec/models/wiki_page_spec.rb index 1c55eb3c38acdb..bbe8547479be7a 100644 --- a/spec/models/wiki_page_spec.rb +++ b/spec/models/wiki_page_spec.rb @@ -1063,12 +1063,16 @@ def force_wiki_change_branch end describe '#hook_attrs' do - subject { build_wiki_page(container) } + let_it_be(:namespace) { create(:group, path: 'namespace-a') } + let_it_be(:project) { create(:project, path: 'project-a', namespace: namespace) } + + subject { build_wiki_page(project) } it 'adds absolute urls for images in the content' do - subject.attributes[:content] = 'test![WikiPage_Image](/uploads/abc/WikiPage_Image.png)' + subject.attributes[:content] = 'test![WikiPage_Image](uploads/abc/WikiPage_Image.png)' - expect(subject.hook_attrs['content']).to eq("test![WikiPage_Image](#{Settings.gitlab.url}/uploads/abc/WikiPage_Image.png)") + expected_path = "#{Settings.gitlab.url}/namespace-a/project-a/-/wikis/uploads/abc/WikiPage_Image.png)" + expect(subject.hook_attrs['content']).to eq("test![WikiPage_Image](#{expected_path}") end end -- GitLab From 48292987fd48482ebba1c74bf4d1b6ae242bf89b Mon Sep 17 00:00:00 2001 From: Rodrigo Tomonari Date: Thu, 28 Mar 2024 15:31:58 -0300 Subject: [PATCH 2/4] User mapping POC --- app/models/bulk_imports/configuration.rb | 4 + app/models/concerns/has_user_type.rb | 5 +- app/models/concerns/importable.rb | 4 + app/models/event.rb | 1 + app/models/import/detail.rb | 28 ++++++ app/models/import/source_user.rb | 36 ++++++++ app/models/label_link.rb | 2 +- app/models/merge_request/metrics.rb | 2 + app/models/snippet.rb | 1 + app/models/user.rb | 3 + .../wip/improved_user_mapping.yml | 8 ++ db/docs/import_details.yml | 19 +++++ db/docs/import_source_users.yml | 19 +++++ ...240321222143_create_import_source_users.rb | 27 ++++++ db/migrate/20240325183812_import_details.rb | 50 +++++++++++ db/schema_migrations/20240321222143 | 1 + db/schema_migrations/20240325183812 | 1 + db/structure.sql | 82 +++++++++++++++++- .../approval_group_rules_user.rb | 1 + ee/app/models/board_assignee.rb | 2 + ee/app/models/epic_issue.rb | 1 + .../deploy_access_level.rb | 1 + .../common/graphql/get_members_query.rb | 1 + .../member_attributes_transformer.rb | 47 +++++++++- lib/bulk_imports/ndjson_pipeline.rb | 48 +++++++++++ lib/bulk_imports/users_mapper.rb | 41 ++++++++- lib/gitlab/hook_data/base_builder.rb | 1 - lib/gitlab/hook_data/wiki_page_builder.rb | 2 +- lib/gitlab/import/create_placeholder.rb | 52 ++++++++++++ .../import/reassign_imported_contributions.rb | 42 +++++++++ lib/gitlab/import/source_user_mapper.rb | 85 +++++++++++++++++++ .../import_export/base/relation_factory.rb | 51 +++++++++++ .../import_export/group/import_export.yml | 36 +++++++- .../group/relation_tree_restorer.rb | 4 +- spec/factories/bulk_import.rb | 4 + .../member_attributes_transformer_spec.rb | 12 +-- spec/models/wiki_page_spec.rb | 10 +-- 37 files changed, 708 insertions(+), 26 deletions(-) create mode 100644 app/models/import/detail.rb create mode 100644 app/models/import/source_user.rb create mode 100644 config/feature_flags/wip/improved_user_mapping.yml create mode 100644 db/docs/import_details.yml create mode 100644 db/docs/import_source_users.yml create mode 100644 db/migrate/20240321222143_create_import_source_users.rb create mode 100644 db/migrate/20240325183812_import_details.rb create mode 100644 db/schema_migrations/20240321222143 create mode 100644 db/schema_migrations/20240325183812 create mode 100644 lib/gitlab/import/create_placeholder.rb create mode 100644 lib/gitlab/import/reassign_imported_contributions.rb create mode 100644 lib/gitlab/import/source_user_mapper.rb diff --git a/app/models/bulk_imports/configuration.rb b/app/models/bulk_imports/configuration.rb index 6d9f598583ea05..2c4aa6c14b0e3f 100644 --- a/app/models/bulk_imports/configuration.rb +++ b/app/models/bulk_imports/configuration.rb @@ -19,4 +19,8 @@ class BulkImports::Configuration < ApplicationRecord key: Settings.attr_encrypted_db_key_base_32, mode: :per_attribute_iv, algorithm: 'aes-256-gcm' + + def source_hostname + URI.parse(url).host + end end diff --git a/app/models/concerns/has_user_type.rb b/app/models/concerns/has_user_type.rb index 871ffda3d90e32..fd15797383d0ea 100644 --- a/app/models/concerns/has_user_type.rb +++ b/app/models/concerns/has_user_type.rb @@ -18,7 +18,8 @@ module HasUserType admin_bot: 11, suggested_reviewers_bot: 12, service_account: 13, - llm_bot: 14 + llm_bot: 14, + placeholder: 15 }.with_indifferent_access.freeze BOT_USER_TYPES = %w[ @@ -47,7 +48,7 @@ module HasUserType scope :bots, -> { where(user_type: BOT_USER_TYPES) } scope :without_bots, -> { where(user_type: USER_TYPES.keys - BOT_USER_TYPES) } scope :non_internal, -> { where(user_type: NON_INTERNAL_USER_TYPES) } - scope :without_ghosts, -> { where(user_type: USER_TYPES.keys - ['ghost']) } + scope :without_ghosts, -> { where(user_type: USER_TYPES.keys - %w[ghost placeholder]) } scope :without_project_bot, -> { where(user_type: USER_TYPES.keys - ['project_bot']) } scope :human_or_service_user, -> { where(user_type: %i[human service_user]) } diff --git a/app/models/concerns/importable.rb b/app/models/concerns/importable.rb index 4d2707b08abdda..29c08bbb2bb1c5 100644 --- a/app/models/concerns/importable.rb +++ b/app/models/concerns/importable.rb @@ -8,4 +8,8 @@ module Importable attr_accessor :imported alias_method :imported?, :imported + + included do + has_one :import_detail, as: :importable, class_name: "Import::Detail" + end end diff --git a/app/models/event.rb b/app/models/event.rb index 1e2704f2c68df1..940e18e40115f1 100644 --- a/app/models/event.rb +++ b/app/models/event.rb @@ -10,6 +10,7 @@ class Event < ApplicationRecord include UsageStatistics include ShaAttribute include EachBatch + include Importable ACTIONS = HashWithIndifferentAccess.new( created: 1, diff --git a/app/models/import/detail.rb b/app/models/import/detail.rb new file mode 100644 index 00000000000000..d5e073671e4f2e --- /dev/null +++ b/app/models/import/detail.rb @@ -0,0 +1,28 @@ +# frozen_string_literal: true + +module Import + class Detail < ApplicationRecord + belongs_to :namespace + belongs_to :project + + # Using polymorphic association to avoid creating ~30 tables + belongs_to :importable, polymorphic: true # rubocop:disable Cop/PolymorphicAssociations -- Conscious exception + + self.table_name = "import_details" + + USER_REFERENCES = %w[ + author_id + assignee_id + updated_by_id + merged_by_id + latest_closed_by_id + user_id + created_by_id + last_edited_by_id + merge_user_id + resolved_by_id + closed_by_id + owner_id + ].freeze + end +end diff --git a/app/models/import/source_user.rb b/app/models/import/source_user.rb new file mode 100644 index 00000000000000..c87d9e08b8fb4f --- /dev/null +++ b/app/models/import/source_user.rb @@ -0,0 +1,36 @@ +# frozen_string_literal: true + +module Import + class SourceUser < ApplicationRecord + self.table_name = 'import_source_users' + + belongs_to :placeholder_user, class_name: 'User', optional: true + belongs_to :assignee_user, class_name: 'User', optional: true + belongs_to :namespace + + validates :namespace, presence: true + validates :import_type, presence: true + + validates :source_name, presence: true + validates :source_username, presence: true + validates :source_user_identifier, presence: true + validates :source_hostname, presence: true + + def self.find_source_user(namespace:, import_type:, source_hostname:, source_user_identifier:) + find_by( + namespace_id: namespace.id, + import_type: import_type, + source_hostname: source_hostname, + source_user_identifier: source_user_identifier + ) + end + + def assigned? + !!assigned_user_id + end + + def user + assignee_user || placeholder_user + end + end +end diff --git a/app/models/label_link.rb b/app/models/label_link.rb index 0c2d205c641312..67abb04605f27a 100644 --- a/app/models/label_link.rb +++ b/app/models/label_link.rb @@ -1,8 +1,8 @@ # frozen_string_literal: true class LabelLink < ApplicationRecord - include BulkInsertSafe include Importable + include BulkInsertSafe belongs_to :target, polymorphic: true, inverse_of: :label_links # rubocop:disable Cop/PolymorphicAssociations belongs_to :label diff --git a/app/models/merge_request/metrics.rb b/app/models/merge_request/metrics.rb index 300152c57e01f7..ce1b0e1f99bd6b 100644 --- a/app/models/merge_request/metrics.rb +++ b/app/models/merge_request/metrics.rb @@ -1,6 +1,8 @@ # frozen_string_literal: true class MergeRequest::Metrics < ApplicationRecord + include Importable + belongs_to :merge_request, inverse_of: :metrics belongs_to :pipeline, class_name: 'Ci::Pipeline', foreign_key: :pipeline_id belongs_to :latest_closed_by, class_name: 'User' diff --git a/app/models/snippet.rb b/app/models/snippet.rb index 3e075fdaa9eb97..fad9cf477af325 100644 --- a/app/models/snippet.rb +++ b/app/models/snippet.rb @@ -20,6 +20,7 @@ class Snippet < ApplicationRecord extend ::Gitlab::Utils::Override include CreatedAtFilterable include EachBatch + include Importable MAX_FILE_COUNT = 10 diff --git a/app/models/user.rb b/app/models/user.rb index 5a02c3e58224cd..91255bad01251f 100644 --- a/app/models/user.rb +++ b/app/models/user.rb @@ -308,6 +308,9 @@ def update_tracked_fields!(request) has_many :requested_member_approvals, class_name: 'Members::MemberApproval', foreign_key: 'requested_by_id' has_many :reviewed_member_approvals, class_name: 'Members::MemberApproval', foreign_key: 'reviewed_by_id' + # Import placeholder user + has_one :placeholder_detail, class_name: 'Import::SourceUser', foreign_key: 'placeholder_user_id', inverse_of: :placeholder_user + # # Validations # diff --git a/config/feature_flags/wip/improved_user_mapping.yml b/config/feature_flags/wip/improved_user_mapping.yml new file mode 100644 index 00000000000000..84cca48f0efc1a --- /dev/null +++ b/config/feature_flags/wip/improved_user_mapping.yml @@ -0,0 +1,8 @@ +--- +name: improved_user_mapping +introduced_by_url: +rollout_issue_url: +milestone: '16.11' +type: wip +group: group::import and integrate +default_enabled: false diff --git a/db/docs/import_details.yml b/db/docs/import_details.yml new file mode 100644 index 00000000000000..00b49a3dccaf71 --- /dev/null +++ b/db/docs/import_details.yml @@ -0,0 +1,19 @@ +--- +table_name: import_details +classes: +- Import::Detail +feature_categories: +- importers +description: +introduced_by_url: +milestone: '16.11' +gitlab_schema: gitlab_main_cell +allow_cross_joins: +- gitlab_main_clusterwide +allow_cross_transactions: +- gitlab_main_clusterwide +allow_cross_foreign_keys: +- gitlab_main_clusterwide +sharding_key: + project_id: projects + namespace_id: namespaces diff --git a/db/docs/import_source_users.yml b/db/docs/import_source_users.yml new file mode 100644 index 00000000000000..485d8c4a254e1c --- /dev/null +++ b/db/docs/import_source_users.yml @@ -0,0 +1,19 @@ +--- +table_name: import_source_users +classes: +- Import::SourceUser +feature_categories: +- importers +description: Used to map imported user attributes to internal placeholder users or + real users +introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/147488 +milestone: '16.11' +gitlab_schema: gitlab_main_cell +allow_cross_joins: +- gitlab_main_clusterwide +allow_cross_transactions: +- gitlab_main_clusterwide +allow_cross_foreign_keys: +- gitlab_main_clusterwide +sharding_key: + namespace_id: namespaces diff --git a/db/migrate/20240321222143_create_import_source_users.rb b/db/migrate/20240321222143_create_import_source_users.rb new file mode 100644 index 00000000000000..2a0a374c221474 --- /dev/null +++ b/db/migrate/20240321222143_create_import_source_users.rb @@ -0,0 +1,27 @@ +# frozen_string_literal: true + +class CreateImportSourceUsers < Gitlab::Database::Migration[2.2] + milestone '16.11' + + INDEX_NAME = 'index_users_on_source_by_namespace' + + def change + create_table :import_source_users do |t| + t.bigint :placeholder_user_id, null: true + t.bigint :assigned_user_id, null: true + t.bigint :namespace_id + t.text :source_username, limit: 255 + t.text :source_name, limit: 255 + t.text :source_user_identifier, null: false, limit: 255 + t.text :source_hostname, null: false, limit: 255 + t.text :import_type, null: false, limit: 255 + + t.timestamps_with_timezone null: false + + t.index [:namespace_id, :source_user_identifier, :source_hostname, :import_type], unique: true, name: INDEX_NAME + + t.index :placeholder_user_id + t.index :assigned_user_id + end + end +end diff --git a/db/migrate/20240325183812_import_details.rb b/db/migrate/20240325183812_import_details.rb new file mode 100644 index 00000000000000..272a892ef5bef6 --- /dev/null +++ b/db/migrate/20240325183812_import_details.rb @@ -0,0 +1,50 @@ +# frozen_string_literal: true + +# See https://docs.gitlab.com/ee/development/migration_style_guide.html +# for more information on how to write migrations for GitLab. + +class ImportDetails < Gitlab::Database::Migration[2.2] + # When using the methods "add_concurrent_index" or "remove_concurrent_index" + # you must disable the use of transactions + # as these methods can not run in an existing transaction. + # When using "add_concurrent_index" or "remove_concurrent_index" methods make sure + # that either of them is the _only_ method called in the migration, + # any other changes should go in a separate migration. + # This ensures that upon failure _only_ the index creation or removing fails + # and can be retried or reverted easily. + # + # To disable transactions uncomment the following line and remove these + # comments: + # disable_ddl_transaction! + # + # Configure the `gitlab_schema` to perform data manipulation (DML). + # Visit: https://docs.gitlab.com/ee/development/database/migrations_for_multiple_databases.html + # restrict_gitlab_migration gitlab_schema: :gitlab_main + + # Add dependent 'batched_background_migrations.queued_migration_version' values. + # DEPENDENT_BATCHED_BACKGROUND_MIGRATIONS = [] + milestone '16.11' + + def change + create_table :import_details do |t| + t.bigint :namespace_id + t.bigint :project_id + t.text :importable_type, null: false, limit: 255 + t.bigint :importable_id + t.bigint :assignee_id + t.bigint :author_id + t.bigint :closed_by_id + t.bigint :created_by_id + t.bigint :last_edited_by_id + t.bigint :latest_closed_by_id + t.bigint :merge_user_id + t.bigint :merged_by_id + t.bigint :owner_id + t.bigint :resolved_by_id + t.bigint :updated_by_id + t.bigint :user_id + + t.timestamps_with_timezone + end + end +end diff --git a/db/schema_migrations/20240321222143 b/db/schema_migrations/20240321222143 new file mode 100644 index 00000000000000..c0666fdd69fdff --- /dev/null +++ b/db/schema_migrations/20240321222143 @@ -0,0 +1 @@ +c8785de4ec91041c6dddbaf8ae0dbf2ec7ddfd46b84229f33539e19b8bebea79 \ No newline at end of file diff --git a/db/schema_migrations/20240325183812 b/db/schema_migrations/20240325183812 new file mode 100644 index 00000000000000..b093b028fed18e --- /dev/null +++ b/db/schema_migrations/20240325183812 @@ -0,0 +1 @@ +69eb1d2f81ff1c8d9dfbfd1dce1b97a33d8146c79195890d072f0c3d492d201b \ No newline at end of file diff --git a/db/structure.sql b/db/structure.sql index b89005da409282..c2fddc44bdff8c 100644 --- a/db/structure.sql +++ b/db/structure.sql @@ -8644,6 +8644,7 @@ CREATE TABLE epics ( total_opened_issue_count integer DEFAULT 0 NOT NULL, total_closed_issue_count integer DEFAULT 0 NOT NULL, issue_id integer, + imported boolean DEFAULT false NOT NULL, CONSTRAINT check_ca608c40b3 CHECK ((char_length(color) <= 7)), CONSTRAINT check_fcfb4a93ff CHECK ((lock_version IS NOT NULL)) ); @@ -9665,6 +9666,38 @@ CREATE SEQUENCE identities_id_seq ALTER SEQUENCE identities_id_seq OWNED BY identities.id; +CREATE TABLE import_details ( + id bigint NOT NULL, + namespace_id bigint, + project_id bigint, + importable_type text NOT NULL, + importable_id bigint, + assignee_id bigint, + author_id bigint, + closed_by_id bigint, + created_by_id bigint, + last_edited_by_id bigint, + latest_closed_by_id bigint, + merge_user_id bigint, + merged_by_id bigint, + owner_id bigint, + resolved_by_id bigint, + updated_by_id bigint, + user_id bigint, + created_at timestamp with time zone NOT NULL, + updated_at timestamp with time zone NOT NULL, + CONSTRAINT check_affa74cf4f CHECK ((char_length(importable_type) <= 255)) +); + +CREATE SEQUENCE import_details_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + +ALTER SEQUENCE import_details_id_seq OWNED BY import_details.id; + CREATE TABLE import_export_uploads ( id integer NOT NULL, updated_at timestamp with time zone NOT NULL, @@ -9710,6 +9743,34 @@ CREATE SEQUENCE import_failures_id_seq ALTER SEQUENCE import_failures_id_seq OWNED BY import_failures.id; +CREATE TABLE import_source_users ( + id bigint NOT NULL, + placeholder_user_id bigint, + assigned_user_id bigint, + namespace_id bigint, + source_username text, + source_name text, + source_user_identifier text NOT NULL, + source_hostname text NOT NULL, + import_type text NOT NULL, + created_at timestamp with time zone NOT NULL, + updated_at timestamp with time zone NOT NULL, + CONSTRAINT check_0d7295a307 CHECK ((char_length(import_type) <= 255)), + CONSTRAINT check_199c28ec54 CHECK ((char_length(source_username) <= 255)), + CONSTRAINT check_562655155f CHECK ((char_length(source_name) <= 255)), + CONSTRAINT check_cc9d4093b5 CHECK ((char_length(source_user_identifier) <= 255)), + CONSTRAINT check_e2039840c5 CHECK ((char_length(source_hostname) <= 255)) +); + +CREATE SEQUENCE import_source_users_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + +ALTER SEQUENCE import_source_users_id_seq OWNED BY import_source_users.id; + CREATE TABLE incident_management_escalation_policies ( id bigint NOT NULL, project_id bigint NOT NULL, @@ -10332,6 +10393,7 @@ CREATE TABLE issues ( namespace_id bigint, start_date date, tmp_epic_id bigint, + imported boolean DEFAULT false NOT NULL, CONSTRAINT check_2addf801cd CHECK ((work_item_type_id IS NOT NULL)), CONSTRAINT check_c33362cd43 CHECK ((namespace_id IS NOT NULL)), CONSTRAINT check_fba63f706d CHECK ((lock_version IS NOT NULL)) @@ -11230,6 +11292,7 @@ CREATE TABLE merge_requests ( draft boolean DEFAULT false NOT NULL, prepared_at timestamp with time zone, merged_commit_sha bytea, + imported boolean DEFAULT false NOT NULL, CONSTRAINT check_970d272570 CHECK ((lock_version IS NOT NULL)) ); @@ -11914,7 +11977,8 @@ CREATE TABLE notes ( last_edited_at timestamp with time zone, internal boolean DEFAULT false NOT NULL, id bigint NOT NULL, - namespace_id bigint + namespace_id bigint, + imported boolean DEFAULT false NOT NULL ); CREATE SEQUENCE notes_id_seq @@ -19073,10 +19137,14 @@ ALTER TABLE ONLY historical_data ALTER COLUMN id SET DEFAULT nextval('historical ALTER TABLE ONLY identities ALTER COLUMN id SET DEFAULT nextval('identities_id_seq'::regclass); +ALTER TABLE ONLY import_details ALTER COLUMN id SET DEFAULT nextval('import_details_id_seq'::regclass); + ALTER TABLE ONLY import_export_uploads ALTER COLUMN id SET DEFAULT nextval('import_export_uploads_id_seq'::regclass); ALTER TABLE ONLY import_failures ALTER COLUMN id SET DEFAULT nextval('import_failures_id_seq'::regclass); +ALTER TABLE ONLY import_source_users ALTER COLUMN id SET DEFAULT nextval('import_source_users_id_seq'::regclass); + ALTER TABLE ONLY incident_management_escalation_policies ALTER COLUMN id SET DEFAULT nextval('incident_management_escalation_policies_id_seq'::regclass); ALTER TABLE ONLY incident_management_escalation_rules ALTER COLUMN id SET DEFAULT nextval('incident_management_escalation_rules_id_seq'::regclass); @@ -21224,12 +21292,18 @@ ALTER TABLE ONLY historical_data ALTER TABLE ONLY identities ADD CONSTRAINT identities_pkey PRIMARY KEY (id); +ALTER TABLE ONLY import_details + ADD CONSTRAINT import_details_pkey PRIMARY KEY (id); + ALTER TABLE ONLY import_export_uploads ADD CONSTRAINT import_export_uploads_pkey PRIMARY KEY (id); ALTER TABLE ONLY import_failures ADD CONSTRAINT import_failures_pkey PRIMARY KEY (id); +ALTER TABLE ONLY import_source_users + ADD CONSTRAINT import_source_users_pkey PRIMARY KEY (id); + ALTER TABLE ONLY incident_management_oncall_shifts ADD CONSTRAINT inc_mgmnt_no_overlapping_oncall_shifts EXCLUDE USING gist (rotation_id WITH =, tstzrange(starts_at, ends_at, '[)'::text) WITH &&); @@ -25474,6 +25548,10 @@ CREATE INDEX index_import_failures_on_project_id_not_null ON import_failures USI CREATE INDEX index_import_failures_on_user_id_not_null ON import_failures USING btree (user_id) WHERE (user_id IS NOT NULL); +CREATE INDEX index_import_source_users_on_assigned_user_id ON import_source_users USING btree (assigned_user_id); + +CREATE INDEX index_import_source_users_on_placeholder_user_id ON import_source_users USING btree (placeholder_user_id); + CREATE INDEX index_imported_projects_on_import_type_creator_id_created_at ON projects USING btree (import_type, creator_id, created_at) WHERE (import_type IS NOT NULL); CREATE INDEX index_imported_projects_on_import_type_id ON projects USING btree (import_type, id) WHERE (import_type IS NOT NULL); @@ -27342,6 +27420,8 @@ CREATE INDEX index_users_on_public_email_excluding_null_and_empty ON users USING CREATE UNIQUE INDEX index_users_on_reset_password_token ON users USING btree (reset_password_token); +CREATE UNIQUE INDEX index_users_on_source_by_namespace ON import_source_users USING btree (namespace_id, source_user_identifier, source_hostname, import_type); + CREATE INDEX index_users_on_state_and_user_type ON users USING btree (state, user_type); CREATE UNIQUE INDEX index_users_on_static_object_token ON users USING btree (static_object_token); diff --git a/ee/app/models/approval_rules/approval_group_rules_user.rb b/ee/app/models/approval_rules/approval_group_rules_user.rb index 5b5da9fa6df3f6..908105e2e29d11 100644 --- a/ee/app/models/approval_rules/approval_group_rules_user.rb +++ b/ee/app/models/approval_rules/approval_group_rules_user.rb @@ -5,6 +5,7 @@ module ApprovalRules class ApprovalGroupRulesUser < ApplicationRecord include ApprovalRuleUserLike + include Importable belongs_to :user belongs_to :approval_group_rule, class_name: 'ApprovalRules::ApprovalGroupRule' diff --git a/ee/app/models/board_assignee.rb b/ee/app/models/board_assignee.rb index d18d0a4678298c..5b58befe9fe735 100644 --- a/ee/app/models/board_assignee.rb +++ b/ee/app/models/board_assignee.rb @@ -1,6 +1,8 @@ # frozen_string_literal: true class BoardAssignee < ApplicationRecord + include Importable + belongs_to :board belongs_to :assignee, class_name: 'User' diff --git a/ee/app/models/epic_issue.rb b/ee/app/models/epic_issue.rb index 57ca1652dee599..e70344274de398 100644 --- a/ee/app/models/epic_issue.rb +++ b/ee/app/models/epic_issue.rb @@ -5,6 +5,7 @@ class EpicIssue < ApplicationRecord include EachBatch include AfterCommitQueue include Epics::MetadataCacheUpdate + include Importable validates :epic, :issue, presence: true validates :issue, uniqueness: true diff --git a/ee/app/models/protected_environments/deploy_access_level.rb b/ee/app/models/protected_environments/deploy_access_level.rb index 49d2e7008dba97..b1a111577839f9 100644 --- a/ee/app/models/protected_environments/deploy_access_level.rb +++ b/ee/app/models/protected_environments/deploy_access_level.rb @@ -3,6 +3,7 @@ module ProtectedEnvironments class DeployAccessLevel < ApplicationRecord include Authorizable + include Importable self.table_name = 'protected_environment_deploy_access_levels' diff --git a/lib/bulk_imports/common/graphql/get_members_query.rb b/lib/bulk_imports/common/graphql/get_members_query.rb index 4f7533ee25f1ef..9fcd1da7013dab 100644 --- a/lib/bulk_imports/common/graphql/get_members_query.rb +++ b/lib/bulk_imports/common/graphql/get_members_query.rb @@ -30,6 +30,7 @@ def to_s user_gid: id public_email: publicEmail username: username + name: name } } } diff --git a/lib/bulk_imports/common/transformers/member_attributes_transformer.rb b/lib/bulk_imports/common/transformers/member_attributes_transformer.rb index 4cd87ec2b59a4d..50112f8b41f48b 100644 --- a/lib/bulk_imports/common/transformers/member_attributes_transformer.rb +++ b/lib/bulk_imports/common/transformers/member_attributes_transformer.rb @@ -5,6 +5,21 @@ module Common module Transformers class MemberAttributesTransformer def transform(context, data) + # Creates source users ahead of time to support previous GitLab versions that do not export user information + # (such as id, name, username). + # + # This method relies on the members GraphQL request to identify users and create corresponding source users. + # However, it's important to note that this approach may not be perfect, as the GraphQL request may not return + # all users who have contributed. For instance, users may not be returned if they have been removed from the + # members list or for public projects. + # + # For users who are not returned by the GraphQL request, import source users can still be created and + # associated with the Import User. However, owners may find it more challenging to identify these users, as + # they will only have the source_user_identifier available. To address this issue, an alternative approach is + # to create an additional pipeline that backfills any missing information by making an extra GraphQL request + # to the users endpoint. + find_or_create_source_users(context, data) if Feature.enabled?(:improved_user_mapping) + user = find_user(data&.dig('user', 'public_email')) access_level = data&.dig('access_level', 'integer_value') @@ -46,12 +61,42 @@ def cache_source_user_data(data, user, context) mapper = ::BulkImports::UsersMapper.new(context: context) - mapper.cache_source_user_id(source_user_id, user.id) + mapper.cache_source_user_id(source_user_id, user.id) if Feature.disabled?(:improved_user_mapping) + return unless source_username return if source_username == user.username mapper.cache_source_username(source_username, user.username) end + + def find_or_create_source_users(context, data) + gid = data&.dig('user', 'user_gid') + + source_user_id = GlobalID.parse(gid).model_id + source_name = data.dig('user', 'name') + source_username = data.dig('user', 'username') + + Logger.info(source_user_id: source_user_id, source_name: source_name, source_username: source_username) + + return if source_user_id.blank? || source_name.blank? || source_username.blank? + + source_user = source_user_mapper(context).find_or_create( + source_user_identifier: source_user_id, + source_name: source_name, + source_username: source_username + ) + + mapper = ::BulkImports::UsersMapper.new(context: context) + mapper.cache_import_source_user(source_user) + end + + def source_user_mapper(context) + Gitlab::Import::SourceUserMapper.new( + import_type: 'gitlab_migration', + namespace: context.portable.root_ancestor, + source_hostname: context.configuration.source_hostname + ) + end end end end diff --git a/lib/bulk_imports/ndjson_pipeline.rb b/lib/bulk_imports/ndjson_pipeline.rb index e817bfe80345d8..c9433c560dd12e 100644 --- a/lib/bulk_imports/ndjson_pipeline.rb +++ b/lib/bulk_imports/ndjson_pipeline.rb @@ -84,6 +84,8 @@ def deep_transform_relation!(relation_hash, relation_key, relation_definition, & end end + cache_import_source_user(relation_key, relation_hash) if Feature.enabled?(:improved_user_mapping) + yield(relation_key, relation_hash) end @@ -121,6 +123,52 @@ def members_mapper @members_mapper ||= BulkImports::UsersMapper.new(context: context) end + def source_user_mapper + @source_user_mapper ||= Gitlab::Import::SourceUserMapper.new( + import_type: 'gitlab_migration', + namespace: context.portable.root_ancestor, + source_hostname: context.configuration.source_hostname + ) + end + + # This method create a import source user for every contribution that has a user reference + # + # This approach rely on the import_export.yml to include the user reference in the relation_definition similar to + # the example below: + # + # { + # "title": "Nemo beatae aut quaerat est numquam dolorem.", + # "closed_by_id": null, + # "assignee_id": null, + # "updated_by_id": 1, + # "updated_by": { + # "id": 1, + # "name": "User 1", + # "username": "user1" + # }, + # "author_id": 12, + # "author": { + # "id": 12, + # "name": "User 12", + # "username": "user2" + # } + # } + # + # With this approach we can create a import source user for contributions from member and non member, but requires + # changing the export payload and would only work for new GitLab verions + # + def cache_import_source_user(relation_key, relation_hash) + return unless relation_factory::USER_NAME_REFERENCES.include?(relation_key) && relation_hash['id'] + + source_user = source_user_mapper(context).find_or_create( + source_user_identifier: relation_hash['id'], + source_name: relation_hash['name'], + source_username: relation_hash['username'] + ) + + members_mapper.cache_import_source_user(source_user) + end + def portable_class_sym portable.class.to_s.downcase.to_sym end diff --git a/lib/bulk_imports/users_mapper.rb b/lib/bulk_imports/users_mapper.rb index af002c6736718b..3ca37a99d481d6 100644 --- a/lib/bulk_imports/users_mapper.rb +++ b/lib/bulk_imports/users_mapper.rb @@ -4,14 +4,17 @@ module BulkImports class UsersMapper include Gitlab::Utils::StrongMemoize - SOURCE_USER_IDS_CACHE_KEY = 'bulk_imports/%{bulk_import}/%{entity}/source_user_ids' + SOURCE_USER_IDS_CACHE_KEY = 'bulk_imports/%{bulk_import}/source_user_ids' - SOURCE_USERNAMES_CACHE_KEY = 'bulk_imports/%{bulk_import}/%{entity}/source_usernames' + SOURCE_USERNAMES_CACHE_KEY = 'bulk_imports/%{bulk_import}/source_usernames' + + IMPORT_SOURCE_USER_IDS_CACHE_KEY = 'bulk_imports/%{bulk_import}/import_source_user_ids' def initialize(context:) @context = context @user_ids_cache_key = generate_cache_key(SOURCE_USER_IDS_CACHE_KEY) @usernames_cache_key = generate_cache_key(SOURCE_USERNAMES_CACHE_KEY) + @import_source_user_ids_cache_key = generate_cache_key(IMPORT_SOURCE_USER_IDS_CACHE_KEY) end def map @@ -26,6 +29,18 @@ def map end end + def map_import_source_users + strong_memoize(:map_import_source_users) do + map = Hash.new { default_user_id } + + cached_import_source_user_ids.each_pair do |source_id, import_source_user_id| + map[source_id.to_i] = import_source_user_id.to_i + end + + map + end + end + def map_usernames strong_memoize(:map_usernames) do map = {} @@ -54,6 +69,24 @@ def cache_source_username(source_username, destination_username) ::Gitlab::Cache::Import::Caching.hash_add(@usernames_cache_key, source_username, destination_username) end + # This method stores user mapping IDs which later is used by RelationFactory + # + # On the SOURCE_USER_IDS_CACHE_KEY cache is stored a map between source_user_identifier and a User#id. The User#id + # may be from a Placeholder user, Import User or a real user. This cache is later used when RelationFactory replaces + # the source user ids from the exported JSON objects with the corresponding destination user id. + # + # On the IMPORT_SOURCE_USER_IDS_CACHE_KEY cache is stored a map between source_user_identifier and an + # Import::SourceUser#id. This cache is later used by RelationFactory when building the `import_detail` records. + def cache_import_source_user(import_source_user) + cache_source_user_id(import_source_user.source_user_identifier, import_source_user.user.id) + + ::Gitlab::Cache::Import::Caching.hash_add( + @import_source_user_ids_cache_key, + import_source_user.source_user_identifier, + import_source_user.id + ) + end + private def generate_cache_key(pattern) @@ -70,5 +103,9 @@ def cached_source_user_ids def cached_source_usernames ::Gitlab::Cache::Import::Caching.values_from_hash(@usernames_cache_key) end + + def cached_import_source_user_ids + ::Gitlab::Cache::Import::Caching.values_from_hash(@import_source_user_ids_cache_key) + end end end diff --git a/lib/gitlab/hook_data/base_builder.rb b/lib/gitlab/hook_data/base_builder.rb index f02678b4244cdd..4a81f6b8a0efeb 100644 --- a/lib/gitlab/hook_data/base_builder.rb +++ b/lib/gitlab/hook_data/base_builder.rb @@ -43,7 +43,6 @@ def absolute_image_urls(markdown_text) if match[:image] && !match[:https] url = match[:url] url = "#{uploads_prefix}#{url}" if url.start_with?('/uploads') - url = "#{uploads_prefix}/#{url}" if url.start_with?('uploads') # wiki doesn't have a leading slash url = "/#{url}" unless url.start_with?('/') "![#{match[:title]}](#{Gitlab.config.gitlab.url}#{url})" diff --git a/lib/gitlab/hook_data/wiki_page_builder.rb b/lib/gitlab/hook_data/wiki_page_builder.rb index 6f1a9fbea1eec0..78ae896a3d8663 100644 --- a/lib/gitlab/hook_data/wiki_page_builder.rb +++ b/lib/gitlab/hook_data/wiki_page_builder.rb @@ -14,7 +14,7 @@ def build end def uploads_prefix - wiki_page.wiki.wiki_base_path + '' end end end diff --git a/lib/gitlab/import/create_placeholder.rb b/lib/gitlab/import/create_placeholder.rb new file mode 100644 index 00000000000000..59b1b07ebe9774 --- /dev/null +++ b/lib/gitlab/import/create_placeholder.rb @@ -0,0 +1,52 @@ +# frozen_string_literal: true + +# TODO: This class needs lof of refactoring +# +# * We should use a better strategy to generate unique username +# * We should handle ActiveRecord::RecordNotUnique exception +# * What should happen if the placeholder user cannot be created. Ideally, the import process should retry +# * What should we do if the source_username is too long? +# * We should handle missing params attributes + +module Gitlab + module Import + class CreatePlaceholder + def initialize( + namespace:, source_name:, source_username:, source_user_identifier:, + source_hostname:, import_type: + ) + @namespace = namespace + @source_name = source_name + @source_username = source_username + @source_user_identifier = source_user_identifier + @source_hostname = source_hostname + @import_type = import_type + end + + def execute + username = "#{source_username}_placeholder_#{SecureRandom.uuid}" + name = "Placeholder #{source_name}" + email = "#{username}@#{Settings.gitlab.host}" + + user = User.new(username: username, email: email, name: name, user_type: :placeholder) + user.assign_personal_namespace + user.build_placeholder_detail( + namespace: namespace, + import_type: import_type, + source_name: source_name, + source_username: source_username, + source_user_identifier: source_user_identifier, + source_hostname: source_hostname + ) + + user.save! + + user + end + + private + + attr_reader :namespace, :source_name, :source_username, :source_user_identifier, :source_hostname, :import_type + end + end +end diff --git a/lib/gitlab/import/reassign_imported_contributions.rb b/lib/gitlab/import/reassign_imported_contributions.rb new file mode 100644 index 00000000000000..19c28b6718f92d --- /dev/null +++ b/lib/gitlab/import/reassign_imported_contributions.rb @@ -0,0 +1,42 @@ +# frozen_string_literal: true + +module Gitlab + module Import + class ReassignImportedContributions + def initialize(import_source_user) + @import_source_user = import_source_user + end + + # rubocop: disable CodeReuse/ActiveRecord -- POC + # rubocop: disable Cop/InBatches -- POC + def execute + # The process may differ depending on whether we choose to create an import detail record for all the imported + # resources. If import_details are created for all records, we can use it to figure out all resources that we + # should reassign the user. If import_details are created only when an Import Bot is used then the we need to + # distinct approaches + + ::Import::Detail::USER_REFERENCES.each do |reference| + import_details = ::Import::Detail.where( + reference => import_source_user.id + ) + + importable_types = import_details.pluck(:importable_type) + importable_types.each do |importable_type| + import_details.where(importable_type: importable_type).in_batches.each do |batch| + importable_type.constantize.where( + id: batch.pluck(:importable_id), + reference => import_source_user.placeholder_user_id + ).update_all(reference => import_source_user.assigned_user_id) + end + end + end + end + # rubocop: enable Cop/InBatches + # rubocop: enable CodeReuse/ActiveRecord + + private + + attr_reader :import_source_user + end + end +end diff --git a/lib/gitlab/import/source_user_mapper.rb b/lib/gitlab/import/source_user_mapper.rb new file mode 100644 index 00000000000000..c24705356669cd --- /dev/null +++ b/lib/gitlab/import/source_user_mapper.rb @@ -0,0 +1,85 @@ +# frozen_string_literal: true + +module Gitlab + module Import + # Responsible for managing import source user records within GitLab's database. + # + # An import source user represents an external user entity and can be associated with different user types within + # GitLab, such as placeholder users, import user, or regular users. + # + # This class provides functionality to find existing import source users or create new ones as needed. It can also + # be extend to limit the number of placeholder users that can be created within a top-level group. For reference, + # if the number of placeholder users is reached, new import source users should be associated with an import user + # instead. + # + # The `find_or_create` method is the main entry point, allowing the retrieval or creation of import source user + # records. To prevent race conditions during creation, an exclusive lock mechanism (`in_lock`) is employed, + # ensuring that multiple import jobs do not attempt to create duplicate records simultaneously. + # + # Example usage: + # + # ```ruby + # mapper = SourceUserMapper.new(namespace: my_namespace, import_type: 'github', source_hostname: 'github.com') + # mapper.find_or_create(source_user_identifier: '123', source_name: 'John Doe', source_username: 'johndoe') + # ``` + # + class SourceUserMapper + include Gitlab::ExclusiveLeaseHelpers + + def initialize(namespace:, import_type:, source_hostname:) + @namespace = namespace + @import_type = import_type + @source_hostname = source_hostname + end + + def find_or_create(source_user_identifier:, source_name:, source_username:) + source_user = find_source_user(source_user_identifier) + + return source_user if source_user + + in_lock(lock_key(source_user_identifier), sleep_sec: 2.seconds) do |retried| + if retried + source_user = find_source_user(source_user_identifier) + + next source_user if source_user + end + + Logger.info( + message: 'Creating placeholder user', + source_user_identifier: source_user_identifier, + source_name: source_name, + source_username: source_username + ) + + user = CreatePlaceholder.new( + namespace: namespace, + import_type: import_type, + source_hostname: source_hostname, + source_name: source_name, + source_user_identifier: source_user_identifier, + source_username: source_username + ).execute + + user.placeholder_detail + end + end + + private + + attr_reader :namespace, :import_type, :source_hostname + + def find_source_user(source_user_identifier) + ::Import::SourceUser.find_source_user( + namespace: namespace, + import_type: import_type, + source_hostname: source_hostname, + source_user_identifier: source_user_identifier + ) + end + + def lock_key(source_user_identifier) + "import:source_user_mapper:#{namespace.id}:#{import_type}:#{source_hostname}:#{source_user_identifier}" + end + end + end +end diff --git a/lib/gitlab/import_export/base/relation_factory.rb b/lib/gitlab/import_export/base/relation_factory.rb index c3021f034cd578..7ee2fa1073a0ee 100644 --- a/lib/gitlab/import_export/base/relation_factory.rb +++ b/lib/gitlab/import_export/base/relation_factory.rb @@ -29,6 +29,21 @@ class RelationFactory owner_id ].freeze + USER_NAME_REFERENCES = %w[ + author + assignee + updated_by + merged_by + latest_closed_by + user + created_by + last_edited_by + merge_user + resolved_by + closed_by + owner + ].freeze + TOKEN_RESET_MODELS = %i[Project Namespace Group Ci::Trigger Ci::Build Ci::Runner ProjectHook ErrorTracking::ProjectErrorTrackingSetting].freeze attr_reader :relation_name, :importable @@ -78,6 +93,8 @@ def create setup_base_models setup_models + remove_user_name_references + generate_imported_object end @@ -128,6 +145,13 @@ def update_user_references end end + # Remove user details that were added to the export files + def remove_user_name_references + self.class::USER_NAME_REFERENCES.each do |reference| + @relation_hash.delete(reference) + end + end + # When an assignee (or any other listed association) did not exist in the members mapper, the importer is # assigned. We only need to assign each user once. def remove_duplicate_assignees @@ -180,6 +204,10 @@ def importable_class_name def imported_object if existing_or_new_object.respond_to?(:importing) existing_or_new_object.importing = true + + if Feature.enabled?(:improved_user_mapping) && @original_user.present? && @members_mapper.is_a?(::BulkImports::UsersMapper) + map_import_source_users(existing_or_new_object) + end end existing_or_new_object @@ -190,6 +218,29 @@ def imported_object retry if @imported_object_retries < IMPORTED_OBJECT_MAX_RETRIES end + # This method replaces the original_user hash values with the mapped import_source_users + # + # For example, original_user may contain a hash mapping association keys to a source user id + # + # { author_id: 1, closed_by_id: 2, updated_by_id: 3 } + # + # Therefore, this method transform this hash with the corresponding representation of the external user in + # GitLab database, in other words to the corresponding ImportSourceUser. Then the transformed hash is saved + # in the import_details + def map_import_source_users(existing_or_new_object) + import_detail = existing_or_new_object.import_detail || existing_or_new_object.build_import_detail + + mapped_source_users = @original_user.transform_values { |source_id| @members_mapper.map_import_source_users[source_id] } + + import_detail.assign_attributes(mapped_source_users) + + if importable.is_a?(Group) + import_detail.namespace_id = importable.id + else + import_detail.project_id = importable.id + end + end + def parsed_relation_hash strong_memoize(:parsed_relation_hash) do if use_attributes_permitter? && attributes_permitter.permitted_attributes_defined?(@relation_sym) diff --git a/lib/gitlab/import_export/group/import_export.yml b/lib/gitlab/import_export/group/import_export.yml index 7a91cfb340a764..7343fdbc754d4c 100644 --- a/lib/gitlab/import_export/group/import_export.yml +++ b/lib/gitlab/import_export/group/import_export.yml @@ -12,6 +12,7 @@ tree: - lists: - :label - :board + - :user - members: - :user - :namespace_settings @@ -21,8 +22,23 @@ included_attributes: - :id - :public_email - :username + - :name author: + - :id - :name + - :username + assignee: + - :id + - :name + - :username + updated_by: + - :id + - :name + - :username + closed_by: + - :id + - :name + - :username namespace_settings: - :prevent_sharing_groups_outside_hierarchy iterations_cadence: &iterations_cadence_definition @@ -108,26 +124,38 @@ ee: tree: group: - epics: + - :assignee + - :updated_by + - :closed_by - :parent - - :award_emoji + - award_emoji: + - :user - events: + - :author - :push_event_payload - label_links: - :label - notes: - :author - - :award_emoji + - :updated_by + - :resolved_by + - award_emoji: + - :user - events: + - :author - :push_event_payload - :system_note_metadata - - :resource_state_events + - resource_state_events: + - :user - boards: - - :board_assignee + - board_assignee: + - :assignee - :milestone - :labels - lists: - milestone: - events: + - :author - :push_event_payload - iterations_cadences: - :iterations diff --git a/lib/gitlab/import_export/group/relation_tree_restorer.rb b/lib/gitlab/import_export/group/relation_tree_restorer.rb index ae47c95036ea50..90a67b1781c167 100644 --- a/lib/gitlab/import_export/group/relation_tree_restorer.rb +++ b/lib/gitlab/import_export/group/relation_tree_restorer.rb @@ -187,7 +187,9 @@ def build_relations(relation_key, relation_definition, relation_index, data_hash def build_relation(relation_key, relation_definition, relation_index, data_hash) # TODO: This is hack to not create relation for the author # Rather make `RelationFactory#set_note_author` to take care of that - return data_hash if relation_key == 'author' || already_restored?(data_hash) + if @relation_factory::USER_NAME_REFERENCES.include?(relation_key) || already_restored?(data_hash) + return data_hash + end # create relation objects recursively for all sub-objects relation_definition.each do |sub_relation_key, sub_relation_definition| diff --git a/spec/factories/bulk_import.rb b/spec/factories/bulk_import.rb index 097bec43543e10..832b4a993a774e 100644 --- a/spec/factories/bulk_import.rb +++ b/spec/factories/bulk_import.rb @@ -26,5 +26,9 @@ trait :timeout do status { 3 } end + + trait :with_configuration do + association :configuration, factory: :bulk_import_configuration + end end end diff --git a/spec/lib/bulk_imports/common/transformers/member_attributes_transformer_spec.rb b/spec/lib/bulk_imports/common/transformers/member_attributes_transformer_spec.rb index 4565de32c7010d..1867514d29f47b 100644 --- a/spec/lib/bulk_imports/common/transformers/member_attributes_transformer_spec.rb +++ b/spec/lib/bulk_imports/common/transformers/member_attributes_transformer_spec.rb @@ -5,7 +5,7 @@ RSpec.describe BulkImports::Common::Transformers::MemberAttributesTransformer, feature_category: :importers do let_it_be(:user) { create(:user) } let_it_be(:secondary_email) { 'secondary@email.com' } - let_it_be(:bulk_import) { create(:bulk_import, user: user) } + let_it_be(:bulk_import) { create(:bulk_import, :with_configuration, user: user) } shared_examples 'members attribute transformer' do let_it_be(:tracker) { create(:bulk_import_tracker, entity: entity) } @@ -143,15 +143,15 @@ end context 'with a project' do - let_it_be(:entity) { create(:bulk_import_entity, bulk_import: bulk_import, project: project) } let_it_be(:project) { create(:project) } + let_it_be(:entity) { create(:bulk_import_entity, :project_entity, bulk_import: bulk_import, project: project) } include_examples 'members attribute transformer' end context 'with a group' do - let_it_be(:entity) { create(:bulk_import_entity, bulk_import: bulk_import, group: group) } let_it_be(:group) { create(:group) } + let_it_be(:entity) { create(:bulk_import_entity, :group_entity, bulk_import: bulk_import, group: group) } include_examples 'members attribute transformer' end @@ -167,7 +167,8 @@ def member_data(email: '', gid: nil, access_level: 30) 'user' => { 'user_gid' => gid, 'public_email' => email, - 'username' => 'source_username' + 'username' => 'source_username', + 'name' => 'source_name' } } end @@ -183,7 +184,8 @@ def nil_username_member_data(email: '', gid: nil, access_level: 30) 'user' => { 'user_gid' => gid, 'public_email' => email, - 'username' => nil + 'username' => nil, + 'name' => 'source_name' } } end diff --git a/spec/models/wiki_page_spec.rb b/spec/models/wiki_page_spec.rb index bbe8547479be7a..1c55eb3c38acdb 100644 --- a/spec/models/wiki_page_spec.rb +++ b/spec/models/wiki_page_spec.rb @@ -1063,16 +1063,12 @@ def force_wiki_change_branch end describe '#hook_attrs' do - let_it_be(:namespace) { create(:group, path: 'namespace-a') } - let_it_be(:project) { create(:project, path: 'project-a', namespace: namespace) } - - subject { build_wiki_page(project) } + subject { build_wiki_page(container) } it 'adds absolute urls for images in the content' do - subject.attributes[:content] = 'test![WikiPage_Image](uploads/abc/WikiPage_Image.png)' + subject.attributes[:content] = 'test![WikiPage_Image](/uploads/abc/WikiPage_Image.png)' - expected_path = "#{Settings.gitlab.url}/namespace-a/project-a/-/wikis/uploads/abc/WikiPage_Image.png)" - expect(subject.hook_attrs['content']).to eq("test![WikiPage_Image](#{expected_path}") + expect(subject.hook_attrs['content']).to eq("test![WikiPage_Image](#{Settings.gitlab.url}/uploads/abc/WikiPage_Image.png)") end end -- GitLab From 642cfb9368ad7d6ba705f3f23157297509d0d5e1 Mon Sep 17 00:00:00 2001 From: Rodrigo Tomonari Date: Mon, 1 Apr 2024 23:41:33 -0300 Subject: [PATCH 3/4] Improved class docs --- .../member_attributes_transformer.rb | 16 ++++++++-------- lib/bulk_imports/ndjson_pipeline.rb | 6 ++++-- .../import/reassign_imported_contributions.rb | 4 ++-- 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/lib/bulk_imports/common/transformers/member_attributes_transformer.rb b/lib/bulk_imports/common/transformers/member_attributes_transformer.rb index 50112f8b41f48b..44ae342f5f148e 100644 --- a/lib/bulk_imports/common/transformers/member_attributes_transformer.rb +++ b/lib/bulk_imports/common/transformers/member_attributes_transformer.rb @@ -6,18 +6,18 @@ module Transformers class MemberAttributesTransformer def transform(context, data) # Creates source users ahead of time to support previous GitLab versions that do not export user information - # (such as id, name, username). + # (such as id, name, username). For new GitLab versions, this is not necessary since we will start to export + # the information. # - # This method relies on the members GraphQL request to identify users and create corresponding source users. - # However, it's important to note that this approach may not be perfect, as the GraphQL request may not return + # This method relies on the members GraphQL request to get extra required information about the users + # (name and username) to create the corresponding import source users. + # However, it's important to note that this approach is not be perfect, as the GraphQL request may not return # all users who have contributed. For instance, users may not be returned if they have been removed from the # members list or for public projects. # - # For users who are not returned by the GraphQL request, import source users can still be created and - # associated with the Import User. However, owners may find it more challenging to identify these users, as - # they will only have the source_user_identifier available. To address this issue, an alternative approach is - # to create an additional pipeline that backfills any missing information by making an extra GraphQL request - # to the users endpoint. + # So to import non-members, we could still create and import source users with just the source_user_identifier + # set and map the contribution to the Import User. Then create an additional pipeline that populate the + # import source user name and username by making an extra GraphQL request to the users endpoint. find_or_create_source_users(context, data) if Feature.enabled?(:improved_user_mapping) user = find_user(data&.dig('user', 'public_email')) diff --git a/lib/bulk_imports/ndjson_pipeline.rb b/lib/bulk_imports/ndjson_pipeline.rb index c9433c560dd12e..c242740bf43df3 100644 --- a/lib/bulk_imports/ndjson_pipeline.rb +++ b/lib/bulk_imports/ndjson_pipeline.rb @@ -154,9 +154,11 @@ def source_user_mapper # } # } # - # With this approach we can create a import source user for contributions from member and non member, but requires - # changing the export payload and would only work for new GitLab verions + # With this approach, we can create a import source user for contributions from member and non member, but + # requires changing the export payload and would only work for new GitLab versions # + # Another approach, and probably the best one is to not change how TREE relations are exported, and instead + # create a new export file that contains information about all users that contributed. def cache_import_source_user(relation_key, relation_hash) return unless relation_factory::USER_NAME_REFERENCES.include?(relation_key) && relation_hash['id'] diff --git a/lib/gitlab/import/reassign_imported_contributions.rb b/lib/gitlab/import/reassign_imported_contributions.rb index 19c28b6718f92d..1f5b4748c8423d 100644 --- a/lib/gitlab/import/reassign_imported_contributions.rb +++ b/lib/gitlab/import/reassign_imported_contributions.rb @@ -12,8 +12,8 @@ def initialize(import_source_user) def execute # The process may differ depending on whether we choose to create an import detail record for all the imported # resources. If import_details are created for all records, we can use it to figure out all resources that we - # should reassign the user. If import_details are created only when an Import Bot is used then the we need to - # distinct approaches + # should reassign the user. If import_details are created only when an Import User is used, then the we need to + # have distinct approaches. ::Import::Detail::USER_REFERENCES.each do |reference| import_details = ::Import::Detail.where( -- GitLab From f842075ea2e715b846e34f665a251ef6d4ea37f2 Mon Sep 17 00:00:00 2001 From: Rodrigo Tomonari Date: Tue, 2 Apr 2024 16:39:03 -0300 Subject: [PATCH 4/4] Renamed field and fix issues --- app/models/merge_request_assignee.rb | 1 + app/models/merge_request_reviewer.rb | 1 + ...240321222143_create_import_source_users.rb | 4 ++-- db/structure.sql | 2 +- lib/bulk_imports/ndjson_pipeline.rb | 2 +- lib/bulk_imports/users_mapper.rb | 24 ++++++++----------- .../import/reassign_imported_contributions.rb | 2 +- 7 files changed, 17 insertions(+), 19 deletions(-) diff --git a/app/models/merge_request_assignee.rb b/app/models/merge_request_assignee.rb index 3e4a35805c9e74..22e7edf70259dc 100644 --- a/app/models/merge_request_assignee.rb +++ b/app/models/merge_request_assignee.rb @@ -2,6 +2,7 @@ class MergeRequestAssignee < ApplicationRecord include EachBatch + include Importable belongs_to :merge_request, touch: true belongs_to :assignee, class_name: "User", foreign_key: :user_id, inverse_of: :merge_request_assignees diff --git a/app/models/merge_request_reviewer.rb b/app/models/merge_request_reviewer.rb index e1e2805cd8f349..5fdec994b152c7 100644 --- a/app/models/merge_request_reviewer.rb +++ b/app/models/merge_request_reviewer.rb @@ -2,6 +2,7 @@ class MergeRequestReviewer < ApplicationRecord include MergeRequestReviewerState + include Importable include BulkInsertSafe # must be included _last_ i.e. after any other concerns belongs_to :merge_request diff --git a/db/migrate/20240321222143_create_import_source_users.rb b/db/migrate/20240321222143_create_import_source_users.rb index 2a0a374c221474..1165afb576c572 100644 --- a/db/migrate/20240321222143_create_import_source_users.rb +++ b/db/migrate/20240321222143_create_import_source_users.rb @@ -8,7 +8,7 @@ class CreateImportSourceUsers < Gitlab::Database::Migration[2.2] def change create_table :import_source_users do |t| t.bigint :placeholder_user_id, null: true - t.bigint :assigned_user_id, null: true + t.bigint :assignee_user_id, null: true t.bigint :namespace_id t.text :source_username, limit: 255 t.text :source_name, limit: 255 @@ -21,7 +21,7 @@ def change t.index [:namespace_id, :source_user_identifier, :source_hostname, :import_type], unique: true, name: INDEX_NAME t.index :placeholder_user_id - t.index :assigned_user_id + t.index :assignee_user_id end end end diff --git a/db/structure.sql b/db/structure.sql index c2fddc44bdff8c..720a948dcee47c 100644 --- a/db/structure.sql +++ b/db/structure.sql @@ -9746,7 +9746,7 @@ ALTER SEQUENCE import_failures_id_seq OWNED BY import_failures.id; CREATE TABLE import_source_users ( id bigint NOT NULL, placeholder_user_id bigint, - assigned_user_id bigint, + assignee_user_id bigint, namespace_id bigint, source_username text, source_name text, diff --git a/lib/bulk_imports/ndjson_pipeline.rb b/lib/bulk_imports/ndjson_pipeline.rb index c242740bf43df3..57f595c48f2d0e 100644 --- a/lib/bulk_imports/ndjson_pipeline.rb +++ b/lib/bulk_imports/ndjson_pipeline.rb @@ -123,7 +123,7 @@ def members_mapper @members_mapper ||= BulkImports::UsersMapper.new(context: context) end - def source_user_mapper + def source_user_mapper(context) @source_user_mapper ||= Gitlab::Import::SourceUserMapper.new( import_type: 'gitlab_migration', namespace: context.portable.root_ancestor, diff --git a/lib/bulk_imports/users_mapper.rb b/lib/bulk_imports/users_mapper.rb index 3ca37a99d481d6..4d82be5f0a9960 100644 --- a/lib/bulk_imports/users_mapper.rb +++ b/lib/bulk_imports/users_mapper.rb @@ -18,27 +18,23 @@ def initialize(context:) end def map - strong_memoize(:map) do - map = Hash.new { default_user_id } + map = Hash.new { default_user_id } - cached_source_user_ids.each_pair do |source_id, destination_id| - map[source_id.to_i] = destination_id.to_i - end - - map + cached_source_user_ids.each_pair do |source_id, destination_id| + map[source_id.to_i] = destination_id.to_i end + + map end def map_import_source_users - strong_memoize(:map_import_source_users) do - map = Hash.new { default_user_id } + map = Hash.new { default_user_id } - cached_import_source_user_ids.each_pair do |source_id, import_source_user_id| - map[source_id.to_i] = import_source_user_id.to_i - end - - map + cached_import_source_user_ids.each_pair do |source_id, import_source_user_id| + map[source_id.to_i] = import_source_user_id.to_i end + + map end def map_usernames diff --git a/lib/gitlab/import/reassign_imported_contributions.rb b/lib/gitlab/import/reassign_imported_contributions.rb index 1f5b4748c8423d..3b80ea9958b247 100644 --- a/lib/gitlab/import/reassign_imported_contributions.rb +++ b/lib/gitlab/import/reassign_imported_contributions.rb @@ -26,7 +26,7 @@ def execute importable_type.constantize.where( id: batch.pluck(:importable_id), reference => import_source_user.placeholder_user_id - ).update_all(reference => import_source_user.assigned_user_id) + ).update_all(reference => import_source_user.assignee_user_id) end end end -- GitLab