From 4d71b78b3a032ce47485a3c2dba78fa976bc7a44 Mon Sep 17 00:00:00 2001 From: Rodrigo Tomonari Date: Tue, 16 May 2023 18:37:25 -0300 Subject: [PATCH 01/18] Introduce parallelised BitBucket Server Importer Update BitBucket Server to import project using multiple workers similar to GitHub Importer Changelog: changed --- app/workers/all_queues.yml | 81 +++++++ .../object_importer.rb | 108 ++++++++++ .../bitbucket_server_import/stage_methods.rb | 80 +++++++ .../advance_stage_worker.rb | 39 ++++ .../import_lfs_object_worker.rb | 15 ++ .../import_pull_request_notes_worker.rb | 15 ++ .../import_pull_request_worker.rb | 15 ++ .../stage/finish_import_worker.rb | 17 ++ .../stage/import_lfs_objects_worker.rb | 38 ++++ .../stage/import_notes_worker.rb | 38 ++++ .../stage/import_pull_requests_worker.rb | 42 ++++ .../stage/import_repository_worker.rb | 38 ++++ config/sidekiq_queues.yml | 18 ++ .../representation/pull_request.rb | 17 ++ .../importers/lfs_object_importer.rb | 21 ++ .../importers/lfs_objects_importer.rb | 23 ++ .../importers/notes_importer.rb | 41 ++++ .../importers/pull_request_importer.rb | 54 +++++ .../importers/pull_request_notes_importer.rb | 198 ++++++++++++++++++ .../importers/pull_requests_importer.rb | 46 ++++ .../importers/repository_importer.rb | 56 +++++ lib/gitlab/bitbucket_server_import/logger.rb | 11 + .../parallel_importer.rb | 33 +++ .../parallel_scheduling.rb | 82 ++++++++ .../bitbucket_server_import/user_finder.rb | 45 ++++ lib/gitlab/import_sources.rb | 2 +- .../representation/pull_request_spec.rb | 19 +- 27 files changed, 1190 insertions(+), 2 deletions(-) create mode 100644 app/workers/concerns/gitlab/bitbucket_server_import/object_importer.rb create mode 100644 app/workers/concerns/gitlab/bitbucket_server_import/stage_methods.rb create mode 100644 app/workers/gitlab/bitbucket_server_import/advance_stage_worker.rb create mode 100644 app/workers/gitlab/bitbucket_server_import/import_lfs_object_worker.rb create mode 100644 app/workers/gitlab/bitbucket_server_import/import_pull_request_notes_worker.rb create mode 100644 app/workers/gitlab/bitbucket_server_import/import_pull_request_worker.rb create mode 100644 app/workers/gitlab/bitbucket_server_import/stage/finish_import_worker.rb create mode 100644 app/workers/gitlab/bitbucket_server_import/stage/import_lfs_objects_worker.rb create mode 100644 app/workers/gitlab/bitbucket_server_import/stage/import_notes_worker.rb create mode 100644 app/workers/gitlab/bitbucket_server_import/stage/import_pull_requests_worker.rb create mode 100644 app/workers/gitlab/bitbucket_server_import/stage/import_repository_worker.rb create mode 100644 lib/gitlab/bitbucket_server_import/importers/lfs_object_importer.rb create mode 100644 lib/gitlab/bitbucket_server_import/importers/lfs_objects_importer.rb create mode 100644 lib/gitlab/bitbucket_server_import/importers/notes_importer.rb create mode 100644 lib/gitlab/bitbucket_server_import/importers/pull_request_importer.rb create mode 100644 lib/gitlab/bitbucket_server_import/importers/pull_request_notes_importer.rb create mode 100644 lib/gitlab/bitbucket_server_import/importers/pull_requests_importer.rb create mode 100644 lib/gitlab/bitbucket_server_import/importers/repository_importer.rb create mode 100644 lib/gitlab/bitbucket_server_import/logger.rb create mode 100644 lib/gitlab/bitbucket_server_import/parallel_importer.rb create mode 100644 lib/gitlab/bitbucket_server_import/parallel_scheduling.rb create mode 100644 lib/gitlab/bitbucket_server_import/user_finder.rb diff --git a/app/workers/all_queues.yml b/app/workers/all_queues.yml index e9965ff0027cc4..75d61bc4999fe6 100644 --- a/app/workers/all_queues.yml +++ b/app/workers/all_queues.yml @@ -2307,6 +2307,87 @@ :weight: 1 :idempotent: false :tags: [] +- :name: bitbucket_server_import_advance_stage + :worker_name: Gitlab::BitbucketServerImport::AdvanceStageWorker + :feature_category: :importers + :has_external_dependencies: false + :urgency: :low + :resource_boundary: :unknown + :weight: 1 + :idempotent: false + :tags: [] +- :name: bitbucket_server_import_import_lfs_object + :worker_name: Gitlab::BitbucketServerImport::ImportLfsObjectWorker + :feature_category: + :has_external_dependencies: true + :urgency: :low + :resource_boundary: :unknown + :weight: 1 + :idempotent: true + :tags: [] +- :name: bitbucket_server_import_import_pull_request + :worker_name: Gitlab::BitbucketServerImport::ImportPullRequestWorker + :feature_category: + :has_external_dependencies: true + :urgency: :low + :resource_boundary: :unknown + :weight: 1 + :idempotent: true + :tags: [] +- :name: bitbucket_server_import_import_pull_request_notes + :worker_name: Gitlab::BitbucketServerImport::ImportPullRequestNotesWorker + :feature_category: + :has_external_dependencies: true + :urgency: :low + :resource_boundary: :unknown + :weight: 1 + :idempotent: true + :tags: [] +- :name: bitbucket_server_import_stage_finish_import + :worker_name: Gitlab::BitbucketServerImport::Stage::FinishImportWorker + :feature_category: :importers + :has_external_dependencies: true + :urgency: :low + :resource_boundary: :unknown + :weight: 1 + :idempotent: false + :tags: [] +- :name: bitbucket_server_import_stage_import_lfs_objects + :worker_name: Gitlab::BitbucketServerImport::Stage::ImportLfsObjectsWorker + :feature_category: :importers + :has_external_dependencies: true + :urgency: :low + :resource_boundary: :unknown + :weight: 1 + :idempotent: false + :tags: [] +- :name: bitbucket_server_import_stage_import_notes + :worker_name: Gitlab::BitbucketServerImport::Stage::ImportNotesWorker + :feature_category: :importers + :has_external_dependencies: true + :urgency: :low + :resource_boundary: :unknown + :weight: 1 + :idempotent: false + :tags: [] +- :name: bitbucket_server_import_stage_import_pull_requests + :worker_name: Gitlab::BitbucketServerImport::Stage::ImportPullRequestsWorker + :feature_category: :importers + :has_external_dependencies: true + :urgency: :low + :resource_boundary: :unknown + :weight: 1 + :idempotent: false + :tags: [] +- :name: bitbucket_server_import_stage_import_repository + :worker_name: Gitlab::BitbucketServerImport::Stage::ImportRepositoryWorker + :feature_category: :importers + :has_external_dependencies: true + :urgency: :low + :resource_boundary: :unknown + :weight: 1 + :idempotent: false + :tags: [] - :name: bulk_import :worker_name: BulkImportWorker :feature_category: :importers diff --git a/app/workers/concerns/gitlab/bitbucket_server_import/object_importer.rb b/app/workers/concerns/gitlab/bitbucket_server_import/object_importer.rb new file mode 100644 index 00000000000000..f7463222db3e3c --- /dev/null +++ b/app/workers/concerns/gitlab/bitbucket_server_import/object_importer.rb @@ -0,0 +1,108 @@ +# frozen_string_literal: true + +module Gitlab + module BitbucketServerImport + # ObjectImporter defines the base behaviour for every Sidekiq worker that + # imports a single resource such as a note or pull request. + module ObjectImporter + extend ActiveSupport::Concern + + included do + include ApplicationWorker + + data_consistency :always + + worker_has_external_dependencies! + + sidekiq_retries_exhausted do |msg| + args = msg['args'] + correlation_id = msg['correlation_id'] + jid = msg['jid'] + + new.perform_failure(args[0], args[1], correlation_id) + + # If a job is being exhausted we still want to notify the + # Gitlab::Import::AdvanceStageWorker to prevent the entire import from getting stuck + if args.length == 3 && (key = args.last) && key.is_a?(String) + JobWaiter.notify(key, jid) + end + end + end + + def perform(project_id, hash, notify_key) + project = Project.find_by_id(project_id) + return unless project + + if project.import_state&.canceled? + info(project.id, message: 'project import canceled') + return + end + + import(project, hash) + ensure + notify_waiter(notify_key) + end + + # project - An instance of `Project` to import the data into. + # hash - A Hash containing the details of the object to import. + def import(project, hash) + info(project.id, message: 'importer started') + + importer_class.new(project, hash).execute + + info(project.id, message: 'importer finished') + rescue ActiveRecord::RecordInvalid => e + # We do not raise exception to prevent job retry + track_exception(project, e) + rescue StandardError => e + track_and_raise_exception(project, e) + end + + # hash - A Hash containing the details of the object to import. + def perform_failure(project_id, _hash, correlation_id) + project = Project.find_by_id(project_id) + return unless project + + failure = project.import_failures.failures_by_correlation_id(correlation_id).first + return unless failure + end + + def notify_waiter(key) + JobWaiter.notify(key, jid) + end + + # Returns the class to use for importing the object. + def importer_class + raise NotImplementedError + end + + private + + def info(project_id, extra = {}) + Logger.info(log_attributes(project_id, extra)) + end + + def log_attributes(project_id, extra = {}) + extra.merge( + project_id: project_id, + importer: importer_class.name + ) + end + + def track_exception(project, exception, fail_import: false) + Gitlab::Import::ImportFailureService.track( + project_id: project.id, + error_source: importer_class.name, + exception: exception, + fail_import: fail_import + ) + end + + def track_and_raise_exception(project, exception, fail_import: false) + track_exception(project, exception, fail_import: fail_import) + + raise(exception) + end + end + end +end diff --git a/app/workers/concerns/gitlab/bitbucket_server_import/stage_methods.rb b/app/workers/concerns/gitlab/bitbucket_server_import/stage_methods.rb new file mode 100644 index 00000000000000..be84d9d7b2328b --- /dev/null +++ b/app/workers/concerns/gitlab/bitbucket_server_import/stage_methods.rb @@ -0,0 +1,80 @@ +# frozen_string_literal: true + +module Gitlab + module BitbucketServerImport + module StageMethods + extend ActiveSupport::Concern + + included do + include ApplicationWorker + + worker_has_external_dependencies! + + feature_category :importers + + data_consistency :always + + sidekiq_options dead: false, retry: 3 + + sidekiq_retries_exhausted do |msg, e| + Gitlab::Import::ImportFailureService.track( + project_id: msg['args'][0], + exception: e, + fail_import: true + ) + end + end + + # project_id - The ID of the GitLab project to import the data into. + def perform(project_id) + info(project_id, message: 'starting stage') + + return unless (project = find_project(project_id)) + + if project.import_state&.canceled? + info(project_id, message: 'project import canceled') + + return + end + + import(project) + + info(project_id, message: 'stage finished') + rescue StandardError => e + Gitlab::Import::ImportFailureService.track( + project_id: project_id, + exception: e, + error_source: self.class.name, + fail_import: abort_on_failure + ) + + raise(e) + end + + def find_project(id) + # If the project has been marked as failed we want to bail out + # automatically. + # rubocop: disable CodeReuse/ActiveRecord + Project.joins_import_state.where(import_state: { status: :started }).find_by_id(id) + # rubocop: enable CodeReuse/ActiveRecord + end + + def abort_on_failure + false + end + + private + + def info(project_id, extra = {}) + Logger.info(log_attributes(project_id, extra)) + end + + def log_attributes(project_id, extra = {}) + extra.merge( + project_id: project_id, + import_stage: self.class.name + ) + end + end + end +end diff --git a/app/workers/gitlab/bitbucket_server_import/advance_stage_worker.rb b/app/workers/gitlab/bitbucket_server_import/advance_stage_worker.rb new file mode 100644 index 00000000000000..2c8db639725eb3 --- /dev/null +++ b/app/workers/gitlab/bitbucket_server_import/advance_stage_worker.rb @@ -0,0 +1,39 @@ +# frozen_string_literal: true + +module Gitlab + module BitbucketServerImport + # AdvanceStageWorker is a worker used by the BitBucket Server Importer to wait for a + # number of jobs to complete, without blocking a thread. Once all jobs have + # been completed this worker will advance the import process to the next + # stage. + class AdvanceStageWorker # rubocop:disable Scalability/IdempotentWorker + include ApplicationWorker + include ::Gitlab::Import::AdvanceStage + + data_consistency :delayed + + sidekiq_options dead: false, retry: 3 + + feature_category :importers + + loggable_arguments 1, 2 + + # The known importer stages and their corresponding Sidekiq workers. + STAGES = { + notes: Stage::ImportNotesWorker, + lfs_objects: Stage::ImportLfsObjectsWorker, + finish: Stage::FinishImportWorker + }.freeze + + def find_import_state(project_id) + ProjectImportState.jid_by(project_id: project_id, status: :started) + end + + private + + def next_stage_worker(next_stage) + STAGES.fetch(next_stage.to_sym) + end + end + end +end diff --git a/app/workers/gitlab/bitbucket_server_import/import_lfs_object_worker.rb b/app/workers/gitlab/bitbucket_server_import/import_lfs_object_worker.rb new file mode 100644 index 00000000000000..3e674d1ed00868 --- /dev/null +++ b/app/workers/gitlab/bitbucket_server_import/import_lfs_object_worker.rb @@ -0,0 +1,15 @@ +# frozen_string_literal: true + +module Gitlab + module BitbucketServerImport + class ImportLfsObjectWorker + include ObjectImporter + + idempotent! + + def importer_class + Importers::LfsObjectImporter + end + end + end +end diff --git a/app/workers/gitlab/bitbucket_server_import/import_pull_request_notes_worker.rb b/app/workers/gitlab/bitbucket_server_import/import_pull_request_notes_worker.rb new file mode 100644 index 00000000000000..a32343172c83d4 --- /dev/null +++ b/app/workers/gitlab/bitbucket_server_import/import_pull_request_notes_worker.rb @@ -0,0 +1,15 @@ +# frozen_string_literal: true + +module Gitlab + module BitbucketServerImport + class ImportPullRequestNotesWorker + include ObjectImporter + + idempotent! + + def importer_class + Importers::PullRequestNotesImporter + end + end + end +end diff --git a/app/workers/gitlab/bitbucket_server_import/import_pull_request_worker.rb b/app/workers/gitlab/bitbucket_server_import/import_pull_request_worker.rb new file mode 100644 index 00000000000000..86b0a39346a4ef --- /dev/null +++ b/app/workers/gitlab/bitbucket_server_import/import_pull_request_worker.rb @@ -0,0 +1,15 @@ +# frozen_string_literal: true + +module Gitlab + module BitbucketServerImport + class ImportPullRequestWorker + include ObjectImporter + + idempotent! + + def importer_class + Importers::PullRequestImporter + end + end + end +end diff --git a/app/workers/gitlab/bitbucket_server_import/stage/finish_import_worker.rb b/app/workers/gitlab/bitbucket_server_import/stage/finish_import_worker.rb new file mode 100644 index 00000000000000..da168b56f455d2 --- /dev/null +++ b/app/workers/gitlab/bitbucket_server_import/stage/finish_import_worker.rb @@ -0,0 +1,17 @@ +# frozen_string_literal: true + +module Gitlab + module BitbucketServerImport + module Stage + class FinishImportWorker # rubocop:disable Scalability/IdempotentWorker + include StageMethods + + # project - An instance of Project. + def import(project) + @project = project + project.after_import + end + end + end + end +end diff --git a/app/workers/gitlab/bitbucket_server_import/stage/import_lfs_objects_worker.rb b/app/workers/gitlab/bitbucket_server_import/stage/import_lfs_objects_worker.rb new file mode 100644 index 00000000000000..166d88fb6f5c95 --- /dev/null +++ b/app/workers/gitlab/bitbucket_server_import/stage/import_lfs_objects_worker.rb @@ -0,0 +1,38 @@ +# frozen_string_literal: true + +module Gitlab + module BitbucketServerImport + module Stage + class ImportLfsObjectsWorker # rubocop:disable Scalability/IdempotentWorker + include StageMethods + + # project - An instance of Project. + def import(project) + waiter = importer_class.new(project).execute + + project.import_state.refresh_jid_expiration + + AdvanceStageWorker.perform_async( + project.id, + { waiter.key => waiter.jobs_remaining }, + :finish + ) + rescue StandardError => e + Gitlab::Import::ImportFailureService.track( + project_id: project.id, + error_source: self.class.name, + exception: e, + fail_import: false, + metrics: true + ) + + raise(e) + end + + def importer_class + Importers::LfsObjectsImporter + end + end + end + end +end diff --git a/app/workers/gitlab/bitbucket_server_import/stage/import_notes_worker.rb b/app/workers/gitlab/bitbucket_server_import/stage/import_notes_worker.rb new file mode 100644 index 00000000000000..e36e1afd077db4 --- /dev/null +++ b/app/workers/gitlab/bitbucket_server_import/stage/import_notes_worker.rb @@ -0,0 +1,38 @@ +# frozen_string_literal: true + +module Gitlab + module BitbucketServerImport + module Stage + class ImportNotesWorker # rubocop:disable Scalability/IdempotentWorker + include StageMethods + + # project - An instance of Project. + def import(project) + waiter = importer_class.new(project).execute + + project.import_state.refresh_jid_expiration + + AdvanceStageWorker.perform_async( + project.id, + { waiter.key => waiter.jobs_remaining }, + :lfs_objects + ) + rescue StandardError => e + Gitlab::Import::ImportFailureService.track( + project_id: project.id, + error_source: self.class.name, + exception: e, + fail_import: false, + metrics: true + ) + + raise(e) + end + + def importer_class + Importers::NotesImporter + end + end + end + end +end diff --git a/app/workers/gitlab/bitbucket_server_import/stage/import_pull_requests_worker.rb b/app/workers/gitlab/bitbucket_server_import/stage/import_pull_requests_worker.rb new file mode 100644 index 00000000000000..06a97ea96b59d0 --- /dev/null +++ b/app/workers/gitlab/bitbucket_server_import/stage/import_pull_requests_worker.rb @@ -0,0 +1,42 @@ +# frozen_string_literal: true + +module Gitlab + module BitbucketServerImport + module Stage + class ImportPullRequestsWorker # rubocop:disable Scalability/IdempotentWorker + include StageMethods + + # project - An instance of Project. + def import(project) + waiter = importer_class.new(project).execute + + project.import_state.refresh_jid_expiration + + AdvanceStageWorker.perform_async( + project.id, + { waiter.key => waiter.jobs_remaining }, + :notes + ) + rescue StandardError => e + Gitlab::Import::ImportFailureService.track( + project_id: project.id, + error_source: self.class.name, + exception: e, + fail_import: false, + metrics: true + ) + + raise(e) + end + + def importer_class + Importers::PullRequestsImporter + end + + def abort_on_failure + true + end + end + end + end +end diff --git a/app/workers/gitlab/bitbucket_server_import/stage/import_repository_worker.rb b/app/workers/gitlab/bitbucket_server_import/stage/import_repository_worker.rb new file mode 100644 index 00000000000000..3a84d41dbdc393 --- /dev/null +++ b/app/workers/gitlab/bitbucket_server_import/stage/import_repository_worker.rb @@ -0,0 +1,38 @@ +# frozen_string_literal: true + +module Gitlab + module BitbucketServerImport + module Stage + class ImportRepositoryWorker # rubocop:disable Scalability/IdempotentWorker + include StageMethods + + # project - An instance of Project. + def import(project) + importer = importer_class.new(project) + + importer.execute + + ImportPullRequestsWorker.perform_async(project.id) + rescue StandardError => e + Gitlab::Import::ImportFailureService.track( + project_id: project.id, + error_source: self.class.name, + exception: e, + fail_import: abort_on_failure, + metrics: true + ) + + raise(e) + end + + def importer_class + Importers::RepositoryImporter + end + + def abort_on_failure + true + end + end + end + end +end diff --git a/config/sidekiq_queues.yml b/config/sidekiq_queues.yml index 50511d35c0a47d..dfd9dab6c24818 100644 --- a/config/sidekiq_queues.yml +++ b/config/sidekiq_queues.yml @@ -75,6 +75,24 @@ - 1 - - batched_background_migrations - 1 +- - bitbucket_server_import_advance_stage + - 1 +- - bitbucket_server_import_import_lfs_object + - 1 +- - bitbucket_server_import_import_pull_request + - 1 +- - bitbucket_server_import_import_pull_request_notes + - 1 +- - bitbucket_server_import_stage_finish_import + - 1 +- - bitbucket_server_import_stage_import_lfs_objects + - 1 +- - bitbucket_server_import_stage_import_notes + - 1 +- - bitbucket_server_import_stage_import_pull_requests + - 1 +- - bitbucket_server_import_stage_import_repository + - 1 - - bulk_import - 1 - - bulk_imports_entity diff --git a/lib/bitbucket_server/representation/pull_request.rb b/lib/bitbucket_server/representation/pull_request.rb index 2f377bdced2af9..9a8eec67bd7ed5 100644 --- a/lib/bitbucket_server/representation/pull_request.rb +++ b/lib/bitbucket_server/representation/pull_request.rb @@ -68,6 +68,23 @@ def target_branch_sha raw.dig('toRef', 'latestCommit') end + def to_hash + { + iid: iid, + author: author, + author_email: author_email, + author_username: author_username, + description: description, + created_at: created_at, + updated_at: updated_at, + state: state, + title: title, + source_branch_name: source_branch_name, + target_branch_name: target_branch_name, + target_branch_sha: target_branch_sha + } + end + private def created_date diff --git a/lib/gitlab/bitbucket_server_import/importers/lfs_object_importer.rb b/lib/gitlab/bitbucket_server_import/importers/lfs_object_importer.rb new file mode 100644 index 00000000000000..b6cb1e929281aa --- /dev/null +++ b/lib/gitlab/bitbucket_server_import/importers/lfs_object_importer.rb @@ -0,0 +1,21 @@ +# frozen_string_literal: true + +module Gitlab + module BitbucketServerImport + module Importers + class LfsObjectImporter + def initialize(project, hash) + # TODO + end + + def execute + # TODO + end + + private + + attr_reader :object, :project, :formatter, :user_finder + end + end + end +end diff --git a/lib/gitlab/bitbucket_server_import/importers/lfs_objects_importer.rb b/lib/gitlab/bitbucket_server_import/importers/lfs_objects_importer.rb new file mode 100644 index 00000000000000..35e6a0f3442856 --- /dev/null +++ b/lib/gitlab/bitbucket_server_import/importers/lfs_objects_importer.rb @@ -0,0 +1,23 @@ +# frozen_string_literal: true + +module Gitlab + module BitbucketServerImport + module Importers + class LfsObjectsImporter + include ParallelScheduling + + def execute + job_waiter + end + + def sidekiq_worker_class + ImportLfsObjectWorker + end + + def collection_method + :lfs_objects + end + end + end + end +end diff --git a/lib/gitlab/bitbucket_server_import/importers/notes_importer.rb b/lib/gitlab/bitbucket_server_import/importers/notes_importer.rb new file mode 100644 index 00000000000000..a9c9a50999f950 --- /dev/null +++ b/lib/gitlab/bitbucket_server_import/importers/notes_importer.rb @@ -0,0 +1,41 @@ +# frozen_string_literal: true + +module Gitlab + module BitbucketServerImport + module Importers + class NotesImporter + include ParallelScheduling + + def execute + project.merge_requests.find_each do |merge_request| + job_waiter.jobs_remaining += 1 + + next if already_imported?(merge_request) + + job_delay = calculate_job_delay(job_waiter.jobs_remaining) + + sidekiq_worker_class.perform_in(job_delay, project.id, { iid: merge_request.iid }, job_waiter.key) + end + + job_waiter + end + + private + + attr_reader :project + + def sidekiq_worker_class + ImportPullRequestNotesWorker + end + + def id_for_already_imported_cache(merge_request) + merge_request.iid + end + + def collection_method + :notes + end + end + end + end +end diff --git a/lib/gitlab/bitbucket_server_import/importers/pull_request_importer.rb b/lib/gitlab/bitbucket_server_import/importers/pull_request_importer.rb new file mode 100644 index 00000000000000..af6438f2864bba --- /dev/null +++ b/lib/gitlab/bitbucket_server_import/importers/pull_request_importer.rb @@ -0,0 +1,54 @@ +# frozen_string_literal: true + +module Gitlab + module BitbucketServerImport + module Importers + class PullRequestImporter + def initialize(project, hash) + @project = project + @formatter = Gitlab::ImportFormatter.new + @user_finder = UserFinder.new(project) + + # TODO: Convert object into a object instead of using it as a hash + @object = hash.with_indifferent_access + end + + def execute + description = '' + description += author_line + description += object[:description] if object[:description] + + attributes = { + iid: object[:iid], + title: object[:title], + description: description, + source_project_id: project.id, + source_branch: Gitlab::Git.ref_name(object[:source_branch_name]), + source_branch_sha: object[:source_branch_sha], + target_project_id: project.id, + target_branch: Gitlab::Git.ref_name(object[:target_branch_name]), + target_branch_sha: object[:target_branch_sha], + state_id: MergeRequest.available_states[object[:state]], + author_id: user_finder.author_id(object), + created_at: object[:created_at], + updated_at: object[:updated_at] + } + + creator = Gitlab::Import::MergeRequestCreator.new(project) + + creator.execute(attributes) + end + + private + + attr_reader :object, :project, :formatter, :user_finder + + def author_line + return '' if user_finder.uid(object) + + formatter.author_line(object[:author]) + end + end + end + end +end diff --git a/lib/gitlab/bitbucket_server_import/importers/pull_request_notes_importer.rb b/lib/gitlab/bitbucket_server_import/importers/pull_request_notes_importer.rb new file mode 100644 index 00000000000000..c906ce6e4df414 --- /dev/null +++ b/lib/gitlab/bitbucket_server_import/importers/pull_request_notes_importer.rb @@ -0,0 +1,198 @@ +# frozen_string_literal: true + +module Gitlab + module BitbucketServerImport + module Importers + class PullRequestNotesImporter + def initialize(project, hash) + @project = project + @formatter = Gitlab::ImportFormatter.new + @client = BitbucketServer::Client.new(project.import_data.credentials) + @project_key = project.import_data.data['project_key'] + @repository_slug = project.import_data.data['repo_slug'] + @user_finder = UserFinder.new(project) + + # TODO: Convert object into a object instead of using it as a hash + @object = hash.with_indifferent_access + + @logger = Logger.build + end + + def execute + merge_request = project.merge_requests.find_by(iid: object[:iid]) # rubocop: disable CodeReuse/ActiveRecord + + return unless merge_request + + activities = client.activities(project_key, repository_slug, merge_request.iid) + + comments, other_activities = activities.partition(&:comment?) + + merge_event = other_activities.find(&:merge_event?) + import_merge_event(merge_request, merge_event) if merge_event + + inline_comments, pr_comments = comments.partition(&:inline_comment?) + + import_inline_comments(inline_comments.map(&:comment), merge_request) + import_standalone_pr_comments(pr_comments.map(&:comment), merge_request) + end + + private + + attr_reader :object, :project, :formatter, :client, :project_key, :repository_slug, :logger, :user_finder + + # rubocop: disable CodeReuse/ActiveRecord + def import_merge_event(merge_request, merge_event) + log_info(import_stage: 'import_merge_event', message: 'starting', iid: merge_request.iid) + + committer = merge_event.committer_email + + user_id = user_finder.find_user_id(by: :email, value: committer) || project.creator_id + timestamp = merge_event.merge_timestamp + merge_request.update({ merge_commit_sha: merge_event.merge_commit }) + metric = MergeRequest::Metrics.find_or_initialize_by(merge_request: merge_request) + metric.update(merged_by_id: user_id, merged_at: timestamp) + + log_info(import_stage: 'import_merge_event', message: 'finished', iid: merge_request.iid) + end + # rubocop: enable CodeReuse/ActiveRecord + + def import_inline_comments(inline_comments, merge_request) + log_info(import_stage: 'import_inline_comments', message: 'starting', iid: merge_request.iid) + + inline_comments.each do |comment| + position = build_position(merge_request, comment) + parent = create_diff_note(merge_request, comment, position) + + next unless parent&.persisted? + + discussion_id = parent.discussion_id + + comment.comments.each do |reply| + create_diff_note(merge_request, reply, position, discussion_id) + end + end + + log_info(import_stage: 'import_inline_comments', message: 'finished', iid: merge_request.iid) + end + + def create_diff_note(merge_request, comment, position, discussion_id = nil) + attributes = pull_request_comment_attributes(comment) + attributes.merge!(position: position, type: 'DiffNote') + attributes[:discussion_id] = discussion_id if discussion_id + + note = merge_request.notes.build(attributes) + + if note.valid? + note.save + return note + end + + log_info(import_stage: 'create_diff_note', message: 'creating fallback DiffNote', iid: merge_request.iid) + + # Bitbucket Server supports the ability to comment on any line, not just the + # line in the diff. If we can't add the note as a DiffNote, fallback to creating + # a regular note. + create_fallback_diff_note(merge_request, comment, position) + rescue StandardError => e + Gitlab::ErrorTracking.log_exception( + e, + import_stage: 'create_diff_note', comment_id: comment.id, error: e.message + ) + + # errors << { type: :pull_request, id: comment.id, errors: e.message } + nil + end + + def create_fallback_diff_note(merge_request, comment, position) + attributes = pull_request_comment_attributes(comment) + note = "*Comment on" + + note += " #{position.old_path}:#{position.old_line} -->" if position.old_line + note += " #{position.new_path}:#{position.new_line}" if position.new_line + note += "*\n\n#{comment.note}" + + attributes[:note] = note + merge_request.notes.create!(attributes) + end + + def build_position(merge_request, pr_comment) + params = { + diff_refs: merge_request.diff_refs, + old_path: pr_comment.file_path, + new_path: pr_comment.file_path, + old_line: pr_comment.old_pos, + new_line: pr_comment.new_pos + } + + Gitlab::Diff::Position.new(params) + end + + def import_standalone_pr_comments(pr_comments, merge_request) + pr_comments.each do |comment| + merge_request.notes.create!(pull_request_comment_attributes(comment)) + + comment.comments.each do |replies| + merge_request.notes.create!(pull_request_comment_attributes(replies)) + end + rescue StandardError => e + Gitlab::ErrorTracking.log_exception( + e, + import_stage: 'import_standalone_pr_comments', + merge_request_id: merge_request.id, + comment_id: comment.id, + error: e.message + ) + + # errors << { type: :pull_request, comment_id: comment.id, errors: e.message } + end + end + + def pull_request_comment_attributes(comment) + author = user_finder.uid(comment) + note = '' + + unless author + author = project.creator_id + note = "*By #{comment.author_username} (#{comment.author_email})*\n\n" + end + + note += + # Provide some context for replying + if comment.parent_comment + "> #{comment.parent_comment.note.truncate(80)}\n\n#{comment.note}" + else + comment.note + end + + { + project: project, + note: note, + author_id: author, + created_at: comment.created_at, + updated_at: comment.updated_at + } + end + + def log_debug(details) + logger.debug(log_base_data.merge(details)) + end + + def log_info(details) + logger.info(log_base_data.merge(details)) + end + + def log_warn(details) + logger.warn(log_base_data.merge(details)) + end + + def log_base_data + { + class: self.class.name, + project_id: project.id, + project_path: project.full_path + } + end + end + end + end +end diff --git a/lib/gitlab/bitbucket_server_import/importers/pull_requests_importer.rb b/lib/gitlab/bitbucket_server_import/importers/pull_requests_importer.rb new file mode 100644 index 00000000000000..bde69e0ef9a495 --- /dev/null +++ b/lib/gitlab/bitbucket_server_import/importers/pull_requests_importer.rb @@ -0,0 +1,46 @@ +# frozen_string_literal: true + +module Gitlab + module BitbucketServerImport + module Importers + class PullRequestsImporter + include ParallelScheduling + + def execute + pull_requests = client.pull_requests(project_key, repository_slug, limit: BATCH_SIZE) + + pull_requests.each do |pull_request| + job_waiter.jobs_remaining += 1 + + next if already_imported?(pull_request) + + job_delay = calculate_job_delay(job_waiter.jobs_remaining) + + sidekiq_worker_class.perform_in(job_delay, project.id, pull_request.to_hash, job_waiter.key) + end + + job_waiter + end + + private + + attr_reader :project, :client, :project_key, :repository_slug, :already_imported_cache_key, + :job_waiter_cache_key + + attr_accessor :page, :enqueued_job_counter + + def sidekiq_worker_class + ImportPullRequestWorker + end + + def collection_method + :pull_requests + end + + def id_for_already_imported_cache(object) + object.iid + end + end + end + end +end diff --git a/lib/gitlab/bitbucket_server_import/importers/repository_importer.rb b/lib/gitlab/bitbucket_server_import/importers/repository_importer.rb new file mode 100644 index 00000000000000..4df4b19ba70887 --- /dev/null +++ b/lib/gitlab/bitbucket_server_import/importers/repository_importer.rb @@ -0,0 +1,56 @@ +# frozen_string_literal: true + +module Gitlab + module BitbucketServerImport + module Importers + class RepositoryImporter + def initialize(project) + @project = project + end + + def execute + # log_info(import_stage: 'import_repository', message: 'starting import') + + if project.empty_repo? + project.repository.import_repository(project.import_url) + project.repository.fetch_as_mirror(project.import_url, refmap: refmap) + + update_clone_time + + # The initial fetch can bring in lots of loose refs and objects. + # Running a `git gc` will make importing pull requests faster. + Repositories::HousekeepingService.new(project, :gc).execute + end + + true + + # log_info(import_stage: 'import_repository', message: 'finished import') + rescue ::Gitlab::Git::CommandError => e + Gitlab::ErrorTracking.log_exception(e, + import_stage: 'import_repository', message: 'failed import', error: e.message + ) + + # Expire cache to prevent scenarios such as: + # 1. First import failed, but the repo was imported successfully, so +exists?+ returns true + # 2. Retried import, repo is broken or not imported but +exists?+ still returns true + project.repository.expire_content_cache if project.repository_exists? + + raise + end + + private + + attr_reader :project + + def refmap + # We omit :heads and :tags since these are fetched in the import_repository + ['+refs/pull-requests/*/to:refs/merge-requests/*/head'] + end + + def update_clone_time + project.touch(:last_repository_updated_at) + end + end + end + end +end diff --git a/lib/gitlab/bitbucket_server_import/logger.rb b/lib/gitlab/bitbucket_server_import/logger.rb new file mode 100644 index 00000000000000..f2294efe0fe337 --- /dev/null +++ b/lib/gitlab/bitbucket_server_import/logger.rb @@ -0,0 +1,11 @@ +# frozen_string_literal: true + +module Gitlab + module BitbucketServerImport + class Logger < ::Gitlab::Import::Logger + def default_attributes + super.merge(import_type: :bitbucket_server) + end + end + end +end diff --git a/lib/gitlab/bitbucket_server_import/parallel_importer.rb b/lib/gitlab/bitbucket_server_import/parallel_importer.rb new file mode 100644 index 00000000000000..39b775ecbd6189 --- /dev/null +++ b/lib/gitlab/bitbucket_server_import/parallel_importer.rb @@ -0,0 +1,33 @@ +# frozen_string_literal: true + +module Gitlab + module BitbucketServerImport + class ParallelImporter + def self.async? + true + end + + def self.imports_repository? + true + end + + def initialize(project) + @project = project + end + + def execute + Gitlab::Import::SetAsyncJid.set_jid(project.import_state) + + Stage::ImportRepositoryWorker + .with_status + .perform_async(project.id) + + true + end + + private + + attr_reader :project + end + end +end diff --git a/lib/gitlab/bitbucket_server_import/parallel_scheduling.rb b/lib/gitlab/bitbucket_server_import/parallel_scheduling.rb new file mode 100644 index 00000000000000..ec00abac8e58af --- /dev/null +++ b/lib/gitlab/bitbucket_server_import/parallel_scheduling.rb @@ -0,0 +1,82 @@ +# frozen_string_literal: true + +module Gitlab + module BitbucketServerImport + module ParallelScheduling + # The base cache key to use for tracking already imported objects. + ALREADY_IMPORTED_CACHE_KEY = + 'bitbucket-server-importer/already-imported/%{project}/%{collection}' + + # The base cache key to use for storing job waiter key + JOB_WAITER_CACHE_KEY = + 'bitbucket-server-importer/job-waiter/%{project}/%{collection}' + + BATCH_SIZE = 100 + + # project - An instance of `Project`. + def initialize(project) + @project = project + @client = BitbucketServer::Client.new(project.import_data.credentials) + @project_key = project.import_data.data['project_key'] + @repository_slug = project.import_data.data['repo_slug'] + + @already_imported_cache_key = + format(ALREADY_IMPORTED_CACHE_KEY, project: project.id, collection: collection_method) + @job_waiter_cache_key = + format(JOB_WAITER_CACHE_KEY, project: project.id, collection: collection_method) + end + + private + + attr_reader :project, :client, :project_key, :repository_slug, :already_imported_cache_key, + :job_waiter_cache_key + + # Returns the ID to use for the cache used for checking if an object has + # already been imported or not. + # + # object - The object we may want to import. + def id_for_already_imported_cache(object) + raise NotImplementedError + end + + # The Sidekiq worker class used for scheduling the importing of objects in + # parallel. + def sidekiq_worker_class + raise NotImplementedError + end + + # The name of the method to call to retrieve the data to import. + def collection_method + raise NotImplementedError + end + + def job_waiter + @job_waiter ||= begin + key = Gitlab::Cache::Import::Caching.read(job_waiter_cache_key) + key ||= Gitlab::Cache::Import::Caching.write(job_waiter_cache_key, JobWaiter.generate_key) + + JobWaiter.new(0, key) + end + end + + def already_imported?(object) + id = id_for_already_imported_cache(object) + + Gitlab::Cache::Import::Caching.set_includes?(already_imported_cache_key, id) + end + + # Marks the given object as "already imported". + def mark_as_imported(object) + id = id_for_already_imported_cache(object) + + Gitlab::Cache::Import::Caching.set_add(already_imported_cache_key, id) + end + + def calculate_job_delay(job_index) + multiplier = (job_index / BATCH_SIZE) + + (multiplier * 1.minute) + 1.second + end + end + end +end diff --git a/lib/gitlab/bitbucket_server_import/user_finder.rb b/lib/gitlab/bitbucket_server_import/user_finder.rb new file mode 100644 index 00000000000000..a6a124fedc015d --- /dev/null +++ b/lib/gitlab/bitbucket_server_import/user_finder.rb @@ -0,0 +1,45 @@ +# frozen_string_literal: true + +module Gitlab + module BitbucketServerImport + # Class that can be used for finding a GitLab user ID based on a BitBucket user + + class UserFinder + attr_reader :project + + # project - An instance of `Project` + def initialize(project) + @project = project + end + + def author_id(object) + uid(object) || project.creator_id + end + + # TODO: object should behaive as a object so we can remove object.is_a?(Hash) check + def uid(object) + # We want this explicit to only be username on the FF + # Otherwise, match email. + # There should be no default fall-through on username. Fall-through to import user + if Feature.enabled?(:bitbucket_server_user_mapping_by_username) + find_user_id(by: :username, value: object.is_a?(Hash) ? object[:author_username] : object.author_username) + else + find_user_id(by: :email, value: object.is_a?(Hash) ? object[:author_email] : object.author_email) + end + end + + # TODO: Cache values + def find_user_id(by:, value:) + return unless value + + user = if by == :email + User.find_by_any_email(value, confirmed: true) + else + User.find_by_username(value) + end + + user&.id + end + end + end +end diff --git a/lib/gitlab/import_sources.rb b/lib/gitlab/import_sources.rb index 77b0df765c4319..876457a308a345 100644 --- a/lib/gitlab/import_sources.rb +++ b/lib/gitlab/import_sources.rb @@ -12,7 +12,7 @@ module ImportSources IMPORT_TABLE = [ ImportSource.new('github', 'GitHub', Gitlab::GithubImport::ParallelImporter), ImportSource.new('bitbucket', 'Bitbucket Cloud', Gitlab::BitbucketImport::Importer), - ImportSource.new('bitbucket_server', 'Bitbucket Server', Gitlab::BitbucketServerImport::Importer), + ImportSource.new('bitbucket_server', 'Bitbucket Server', Gitlab::BitbucketServerImport::ParallelImporter), ImportSource.new('fogbugz', 'FogBugz', Gitlab::FogbugzImport::Importer), ImportSource.new('git', 'Repository by URL', nil), ImportSource.new('gitlab_project', 'GitLab export', Gitlab::ImportExport::Importer), diff --git a/spec/lib/bitbucket_server/representation/pull_request_spec.rb b/spec/lib/bitbucket_server/representation/pull_request_spec.rb index d7b893e8081f6a..5312bc1d71befb 100644 --- a/spec/lib/bitbucket_server/representation/pull_request_spec.rb +++ b/spec/lib/bitbucket_server/representation/pull_request_spec.rb @@ -2,7 +2,7 @@ require 'spec_helper' -RSpec.describe BitbucketServer::Representation::PullRequest do +RSpec.describe BitbucketServer::Representation::PullRequest, feature_category: :importers do let(:sample_data) { Gitlab::Json.parse(fixture_file('importers/bitbucket_server/pull_request.json')) } subject { described_class.new(sample_data) } @@ -105,4 +105,21 @@ describe '#target_branch_sha' do it { expect(subject.target_branch_sha).to eq('839fa9a2d434eb697815b8fcafaecc51accfdbbc') } end + + describe '#to_hash' do + it do + expect(subject.to_hash).to match( + a_hash_including( + author_email: "joe.montana@49ers.com", + author_username: "username", + author: "root", + description: "Test", + source_branch_name: "refs/heads/root/CODE_OF_CONDUCTmd-1530600625006", + target_branch_name: "refs/heads/master", + target_branch_sha: "839fa9a2d434eb697815b8fcafaecc51accfdbbc", + title: "Added a new line" + ) + ) + end + end end -- GitLab From 2d27e7e878f9091a799d2d4b4d8fd56d73321435 Mon Sep 17 00:00:00 2001 From: Luke Duncalfe Date: Wed, 17 May 2023 13:35:01 +1200 Subject: [PATCH 02/18] Add feature flag --- .../bitbucket_server_parallel_importer.yml | 8 ++++ lib/gitlab/import_sources.rb | 7 +++- spec/lib/gitlab/import_sources_spec.rb | 42 ++++++++++++++++++- 3 files changed, 55 insertions(+), 2 deletions(-) create mode 100644 config/feature_flags/development/bitbucket_server_parallel_importer.yml diff --git a/config/feature_flags/development/bitbucket_server_parallel_importer.yml b/config/feature_flags/development/bitbucket_server_parallel_importer.yml new file mode 100644 index 00000000000000..0e5315d5d7a3ac --- /dev/null +++ b/config/feature_flags/development/bitbucket_server_parallel_importer.yml @@ -0,0 +1,8 @@ +--- +name: bitbucket_server_parallel_importer +introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/120931 +rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/411796 +milestone: '16.1' +type: development +group: group::import and integrate +default_enabled: false diff --git a/lib/gitlab/import_sources.rb b/lib/gitlab/import_sources.rb index 876457a308a345..37bcc53019f23b 100644 --- a/lib/gitlab/import_sources.rb +++ b/lib/gitlab/import_sources.rb @@ -20,6 +20,9 @@ module ImportSources ImportSource.new('manifest', 'Manifest file', nil) ].freeze + LEGACY_IMPORT_TABLE = IMPORT_TABLE.deep_dup + LEGACY_IMPORT_TABLE[2].importer = Gitlab::BitbucketServerImport::Importer + class << self prepend_mod_with('Gitlab::ImportSources') # rubocop: disable Cop/InjectEnterpriseEditionModule @@ -44,7 +47,9 @@ def title(name) end def import_table - IMPORT_TABLE + return IMPORT_TABLE if Feature.enabled?(:bitbucket_server_parallel_importer) + + LEGACY_IMPORT_TABLE end end end diff --git a/spec/lib/gitlab/import_sources_spec.rb b/spec/lib/gitlab/import_sources_spec.rb index f1ea5f3e85ee68..b243780a020d25 100644 --- a/spec/lib/gitlab/import_sources_spec.rb +++ b/spec/lib/gitlab/import_sources_spec.rb @@ -59,7 +59,7 @@ import_sources = { 'github' => Gitlab::GithubImport::ParallelImporter, 'bitbucket' => Gitlab::BitbucketImport::Importer, - 'bitbucket_server' => Gitlab::BitbucketServerImport::Importer, + 'bitbucket_server' => Gitlab::BitbucketServerImport::ParallelImporter, 'fogbugz' => Gitlab::FogbugzImport::Importer, 'git' => nil, 'gitlab_project' => Gitlab::ImportExport::Importer, @@ -72,6 +72,46 @@ expect(described_class.importer(name)).to eq(klass) end end + + context 'when flag is disabled' do + before do + stub_feature_flags(bitbucket_server_parallel_importer: false) + end + + it 'returns Gitlab::BitbucketServerImport::Importer when given bitbucket_server' do + expect(described_class.importer('bitbucket_server')).to eq(Gitlab::BitbucketServerImport::Importer) + end + end + end + + describe '.import_table' do + subject { described_class.import_table } + + it 'returns the ParallelImporter for Bitbucket server' do + is_expected.to include( + described_class::ImportSource.new( + 'bitbucket_server', + 'Bitbucket Server', + Gitlab::BitbucketServerImport::ParallelImporter + ) + ) + end + + context 'when flag is disabled' do + before do + stub_feature_flags(bitbucket_server_parallel_importer: false) + end + + it 'returns the legacy Importer for Bitbucket server' do + is_expected.to include( + described_class::ImportSource.new( + 'bitbucket_server', + 'Bitbucket Server', + Gitlab::BitbucketServerImport::Importer + ) + ) + end + end end describe '.title' do -- GitLab From 268cd77c0229928121a18cd4776c856da27ef5d0 Mon Sep 17 00:00:00 2001 From: Luke Duncalfe Date: Wed, 17 May 2023 14:30:23 +1200 Subject: [PATCH 03/18] Count jobs only when not already imported --- .../bitbucket_server_import/importers/notes_importer.rb | 4 ++-- .../importers/pull_requests_importer.rb | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/gitlab/bitbucket_server_import/importers/notes_importer.rb b/lib/gitlab/bitbucket_server_import/importers/notes_importer.rb index a9c9a50999f950..fb94b2c85a9b9e 100644 --- a/lib/gitlab/bitbucket_server_import/importers/notes_importer.rb +++ b/lib/gitlab/bitbucket_server_import/importers/notes_importer.rb @@ -8,10 +8,10 @@ class NotesImporter def execute project.merge_requests.find_each do |merge_request| - job_waiter.jobs_remaining += 1 - next if already_imported?(merge_request) + job_waiter.jobs_remaining += 1 + job_delay = calculate_job_delay(job_waiter.jobs_remaining) sidekiq_worker_class.perform_in(job_delay, project.id, { iid: merge_request.iid }, job_waiter.key) diff --git a/lib/gitlab/bitbucket_server_import/importers/pull_requests_importer.rb b/lib/gitlab/bitbucket_server_import/importers/pull_requests_importer.rb index bde69e0ef9a495..8fd8565d154fdf 100644 --- a/lib/gitlab/bitbucket_server_import/importers/pull_requests_importer.rb +++ b/lib/gitlab/bitbucket_server_import/importers/pull_requests_importer.rb @@ -10,10 +10,10 @@ def execute pull_requests = client.pull_requests(project_key, repository_slug, limit: BATCH_SIZE) pull_requests.each do |pull_request| - job_waiter.jobs_remaining += 1 - next if already_imported?(pull_request) + job_waiter.jobs_remaining += 1 + job_delay = calculate_job_delay(job_waiter.jobs_remaining) sidekiq_worker_class.perform_in(job_delay, project.id, pull_request.to_hash, job_waiter.key) -- GitLab From 3a303e8d6864a7fd7ec24a5d67d2804716117e65 Mon Sep 17 00:00:00 2001 From: Luke Duncalfe Date: Wed, 17 May 2023 15:53:12 +1200 Subject: [PATCH 04/18] Give new workers feature categories --- app/workers/all_queues.yml | 6 +++--- .../gitlab/bitbucket_server_import/object_importer.rb | 2 ++ spec/workers/every_sidekiq_worker_spec.rb | 6 ++++++ 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/app/workers/all_queues.yml b/app/workers/all_queues.yml index 75d61bc4999fe6..b4b47b51c9410d 100644 --- a/app/workers/all_queues.yml +++ b/app/workers/all_queues.yml @@ -2318,7 +2318,7 @@ :tags: [] - :name: bitbucket_server_import_import_lfs_object :worker_name: Gitlab::BitbucketServerImport::ImportLfsObjectWorker - :feature_category: + :feature_category: :importers :has_external_dependencies: true :urgency: :low :resource_boundary: :unknown @@ -2327,7 +2327,7 @@ :tags: [] - :name: bitbucket_server_import_import_pull_request :worker_name: Gitlab::BitbucketServerImport::ImportPullRequestWorker - :feature_category: + :feature_category: :importers :has_external_dependencies: true :urgency: :low :resource_boundary: :unknown @@ -2336,7 +2336,7 @@ :tags: [] - :name: bitbucket_server_import_import_pull_request_notes :worker_name: Gitlab::BitbucketServerImport::ImportPullRequestNotesWorker - :feature_category: + :feature_category: :importers :has_external_dependencies: true :urgency: :low :resource_boundary: :unknown diff --git a/app/workers/concerns/gitlab/bitbucket_server_import/object_importer.rb b/app/workers/concerns/gitlab/bitbucket_server_import/object_importer.rb index f7463222db3e3c..e9a89c9791f25d 100644 --- a/app/workers/concerns/gitlab/bitbucket_server_import/object_importer.rb +++ b/app/workers/concerns/gitlab/bitbucket_server_import/object_importer.rb @@ -12,6 +12,8 @@ module ObjectImporter data_consistency :always + feature_category :importers + worker_has_external_dependencies! sidekiq_retries_exhausted do |msg| diff --git a/spec/workers/every_sidekiq_worker_spec.rb b/spec/workers/every_sidekiq_worker_spec.rb index 0c1e9da0fb1239..57cdfeef9ea699 100644 --- a/spec/workers/every_sidekiq_worker_spec.rb +++ b/spec/workers/every_sidekiq_worker_spec.rb @@ -260,6 +260,12 @@ 'Geo::VerificationWorker' => 3, 'GeoRepositoryDestroyWorker' => 3, 'GitGarbageCollectWorker' => false, + 'Gitlab::BitbucketServerImport::AdvanceStageWorker' => 3, + 'Gitlab::BitbucketServerImport::Stage::FinishImportWorker' => 3, + 'Gitlab::BitbucketServerImport::Stage::ImportLfsObjectsWorker' => 3, + 'Gitlab::BitbucketServerImport::Stage::ImportNotesWorker' => 3, + 'Gitlab::BitbucketServerImport::Stage::ImportPullRequestsWorker' => 3, + 'Gitlab::BitbucketServerImport::Stage::ImportRepositoryWorker' => 3, 'Gitlab::GithubImport::AdvanceStageWorker' => 3, 'Gitlab::GithubImport::ImportReleaseAttachmentsWorker' => 5, 'Gitlab::GithubImport::Attachments::ImportReleaseWorker' => 5, -- GitLab From a45b059771f57a431059fdf2e8bafdd3550a5075 Mon Sep 17 00:00:00 2001 From: Luke Duncalfe Date: Wed, 17 May 2023 17:01:08 +1200 Subject: [PATCH 05/18] Refactor a Loggable module --- .../bitbucket_server_import/importer.rb | 24 +---------- .../importers/pull_request_notes_importer.rb | 26 ++---------- .../bitbucket_server_import/loggable.rb | 41 +++++++++++++++++++ .../parallel_scheduling.rb | 2 + 4 files changed, 48 insertions(+), 45 deletions(-) create mode 100644 lib/gitlab/bitbucket_server_import/loggable.rb diff --git a/lib/gitlab/bitbucket_server_import/importer.rb b/lib/gitlab/bitbucket_server_import/importer.rb index ea9b79c12fd6df..6b163cd1b2d835 100644 --- a/lib/gitlab/bitbucket_server_import/importer.rb +++ b/lib/gitlab/bitbucket_server_import/importer.rb @@ -3,9 +3,10 @@ module Gitlab module BitbucketServerImport class Importer + include Loggable + attr_reader :recover_missing_commits attr_reader :project, :project_key, :repository_slug, :client, :errors, :users, :already_imported_cache_key - attr_accessor :logger BATCH_SIZE = 100 # The base cache key to use for tracking already imported objects. @@ -38,7 +39,6 @@ def initialize(project, recover_missing_commits: false) @errors = [] @users = {} @temp_branches = [] - @logger = Gitlab::Import::Logger.build @already_imported_cache_key = ALREADY_IMPORTED_CACHE_KEY % { project: project.id, collection: collection_method } end @@ -427,26 +427,6 @@ def pull_request_comment_attributes(comment) } end - def log_debug(details) - logger.debug(log_base_data.merge(details)) - end - - def log_info(details) - logger.info(log_base_data.merge(details)) - end - - def log_warn(details) - logger.warn(log_base_data.merge(details)) - end - - def log_base_data - { - class: self.class.name, - project_id: project.id, - project_path: project.full_path - } - end - def metrics @metrics ||= Gitlab::Import::Metrics.new(:bitbucket_server_importer, @project) end diff --git a/lib/gitlab/bitbucket_server_import/importers/pull_request_notes_importer.rb b/lib/gitlab/bitbucket_server_import/importers/pull_request_notes_importer.rb index c906ce6e4df414..da31521ee4d0ca 100644 --- a/lib/gitlab/bitbucket_server_import/importers/pull_request_notes_importer.rb +++ b/lib/gitlab/bitbucket_server_import/importers/pull_request_notes_importer.rb @@ -4,6 +4,8 @@ module Gitlab module BitbucketServerImport module Importers class PullRequestNotesImporter + include Loggable + def initialize(project, hash) @project = project @formatter = Gitlab::ImportFormatter.new @@ -14,8 +16,6 @@ def initialize(project, hash) # TODO: Convert object into a object instead of using it as a hash @object = hash.with_indifferent_access - - @logger = Logger.build end def execute @@ -38,7 +38,7 @@ def execute private - attr_reader :object, :project, :formatter, :client, :project_key, :repository_slug, :logger, :user_finder + attr_reader :object, :project, :formatter, :client, :project_key, :repository_slug, :user_finder # rubocop: disable CodeReuse/ActiveRecord def import_merge_event(merge_request, merge_event) @@ -172,26 +172,6 @@ def pull_request_comment_attributes(comment) updated_at: comment.updated_at } end - - def log_debug(details) - logger.debug(log_base_data.merge(details)) - end - - def log_info(details) - logger.info(log_base_data.merge(details)) - end - - def log_warn(details) - logger.warn(log_base_data.merge(details)) - end - - def log_base_data - { - class: self.class.name, - project_id: project.id, - project_path: project.full_path - } - end end end end diff --git a/lib/gitlab/bitbucket_server_import/loggable.rb b/lib/gitlab/bitbucket_server_import/loggable.rb new file mode 100644 index 00000000000000..e74c33dacdb769 --- /dev/null +++ b/lib/gitlab/bitbucket_server_import/loggable.rb @@ -0,0 +1,41 @@ +# frozen_string_literal: true + +module Gitlab + module BitbucketServerImport + module Loggable + def log_debug(messages) + logger.debug(log_data(messages)) + end + + def log_info(messages) + logger.info(log_data(messages)) + end + + def log_warn(messages) + logger.warn(log_data(messages)) + end + + def log_error(messages) + logger.error(log_data(messages)) + end + + private + + def logger + Gitlab::BitbucketServerImport::Logger + end + + def log_data(messages) + messages.merge(log_base_data) + end + + def log_base_data + { + class: self.class.name, + project_id: project.id, + project_path: project.full_path + } + end + end + end +end diff --git a/lib/gitlab/bitbucket_server_import/parallel_scheduling.rb b/lib/gitlab/bitbucket_server_import/parallel_scheduling.rb index ec00abac8e58af..50d003a6bc9e65 100644 --- a/lib/gitlab/bitbucket_server_import/parallel_scheduling.rb +++ b/lib/gitlab/bitbucket_server_import/parallel_scheduling.rb @@ -3,6 +3,8 @@ module Gitlab module BitbucketServerImport module ParallelScheduling + include Loggable + # The base cache key to use for tracking already imported objects. ALREADY_IMPORTED_CACHE_KEY = 'bitbucket-server-importer/already-imported/%{project}/%{collection}' -- GitLab From 2f83ae1a3876fbf32b3f0a4d550d163deddf0022 Mon Sep 17 00:00:00 2001 From: Luke Duncalfe Date: Wed, 17 May 2023 15:41:26 +1200 Subject: [PATCH 06/18] Implement LFS object import --- .../importers/lfs_object_importer.rb | 9 +++--- .../importers/lfs_objects_importer.rb | 28 +++++++++++++++++++ .../parallel_scheduling.rb | 9 ++++++ 3 files changed, 42 insertions(+), 4 deletions(-) diff --git a/lib/gitlab/bitbucket_server_import/importers/lfs_object_importer.rb b/lib/gitlab/bitbucket_server_import/importers/lfs_object_importer.rb index b6cb1e929281aa..97cb4705e50fbc 100644 --- a/lib/gitlab/bitbucket_server_import/importers/lfs_object_importer.rb +++ b/lib/gitlab/bitbucket_server_import/importers/lfs_object_importer.rb @@ -4,17 +4,18 @@ module Gitlab module BitbucketServerImport module Importers class LfsObjectImporter - def initialize(project, hash) - # TODO + def initialize(project, lfs_attributes) + @project = project + @lfs_download_object = LfsDownloadObject.new(**lfs_attributes) end def execute - # TODO + Projects::LfsPointers::LfsDownloadService.new(project, lfs_download_object).execute end private - attr_reader :object, :project, :formatter, :user_finder + attr_reader :project, :lfs_download_object end end end diff --git a/lib/gitlab/bitbucket_server_import/importers/lfs_objects_importer.rb b/lib/gitlab/bitbucket_server_import/importers/lfs_objects_importer.rb index 35e6a0f3442856..f285fcad2b1c17 100644 --- a/lib/gitlab/bitbucket_server_import/importers/lfs_objects_importer.rb +++ b/lib/gitlab/bitbucket_server_import/importers/lfs_objects_importer.rb @@ -7,6 +7,16 @@ class LfsObjectsImporter include ParallelScheduling def execute + return job_waiter unless project&.lfs_enabled? + + download_service = Projects::LfsPointers::LfsObjectDownloadListService.new(project) + + begin + queue_workers(download_service) + rescue StandardError => e + track_import_failure!(project, exception: e) + end + job_waiter end @@ -17,6 +27,24 @@ def sidekiq_worker_class def collection_method :lfs_objects end + + def id_for_already_imported_cache(lfs_download_object) + lfs_download_object.oid + end + + private + + def queue_workers(download_service) + download_service.each_list_item do |lfs_download_object| + next if already_imported?(lfs_download_object) + + job_waiter.jobs_remaining += 1 + + job_delay = calculate_job_delay(job_waiter.jobs_remaining) + + sidekiq_worker_class.perform_in(job_delay, project.id, lfs_download_object.as_json, job_waiter.key) + end + end end end end diff --git a/lib/gitlab/bitbucket_server_import/parallel_scheduling.rb b/lib/gitlab/bitbucket_server_import/parallel_scheduling.rb index 50d003a6bc9e65..7634128afc7df8 100644 --- a/lib/gitlab/bitbucket_server_import/parallel_scheduling.rb +++ b/lib/gitlab/bitbucket_server_import/parallel_scheduling.rb @@ -79,6 +79,15 @@ def calculate_job_delay(job_index) (multiplier * 1.minute) + 1.second end + + def track_import_failure!(project, exception:, **args) + Gitlab::Import::ImportFailureService.track( + project_id: project.id, + error_source: self.class.name, + exception: exception, + **args + ) + end end end end -- GitLab From 2ad73fa92d2feee3ecded77233b7df54fef02a0d Mon Sep 17 00:00:00 2001 From: Luke Duncalfe Date: Wed, 17 May 2023 17:01:28 +1200 Subject: [PATCH 07/18] Add Logging to LFS Importers --- .../importers/lfs_object_importer.rb | 16 +++++++++++++++- .../importers/lfs_objects_importer.rb | 6 ++++-- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/lib/gitlab/bitbucket_server_import/importers/lfs_object_importer.rb b/lib/gitlab/bitbucket_server_import/importers/lfs_object_importer.rb index 97cb4705e50fbc..20b0e875a76820 100644 --- a/lib/gitlab/bitbucket_server_import/importers/lfs_object_importer.rb +++ b/lib/gitlab/bitbucket_server_import/importers/lfs_object_importer.rb @@ -4,13 +4,27 @@ module Gitlab module BitbucketServerImport module Importers class LfsObjectImporter + include Loggable + def initialize(project, lfs_attributes) @project = project @lfs_download_object = LfsDownloadObject.new(**lfs_attributes) end def execute - Projects::LfsPointers::LfsDownloadService.new(project, lfs_download_object).execute + log_info(import_stage: 'import_lfs_object', message: 'starting', oid: lfs_download_object.oid) + + response = Projects::LfsPointers::LfsDownloadService.new(project, lfs_download_object).execute + + if response[:status] == :error + log_error( + import_stage: 'import_lfs_object', + response: response, + lfs_attributes: lfs_download_object.as_json + ) + end + + log_info(import_stage: 'import_lfs_object', message: 'finished', oid: lfs_download_object.oid) end private diff --git a/lib/gitlab/bitbucket_server_import/importers/lfs_objects_importer.rb b/lib/gitlab/bitbucket_server_import/importers/lfs_objects_importer.rb index f285fcad2b1c17..d964f149f5d6a2 100644 --- a/lib/gitlab/bitbucket_server_import/importers/lfs_objects_importer.rb +++ b/lib/gitlab/bitbucket_server_import/importers/lfs_objects_importer.rb @@ -7,16 +7,18 @@ class LfsObjectsImporter include ParallelScheduling def execute - return job_waiter unless project&.lfs_enabled? + log_info(import_stage: 'import_lfs_objects', message: 'starting') download_service = Projects::LfsPointers::LfsObjectDownloadListService.new(project) begin - queue_workers(download_service) + queue_workers(download_service) if project&.lfs_enabled? rescue StandardError => e track_import_failure!(project, exception: e) end + log_info(import_stage: 'import_lfs_objects', message: 'finished') + job_waiter end -- GitLab From 45d80e1538a28ccf2e6ab4940c7f8a1b67ab8b99 Mon Sep 17 00:00:00 2001 From: Luke Duncalfe Date: Wed, 17 May 2023 17:01:38 +1200 Subject: [PATCH 08/18] Add specs for LFS importers --- .../importers/lfs_object_importer_spec.rb | 67 +++++++++++ .../importers/lfs_objects_importer_spec.rb | 110 ++++++++++++++++++ 2 files changed, 177 insertions(+) create mode 100644 spec/lib/gitlab/bitbucket_server_import/importers/lfs_object_importer_spec.rb create mode 100644 spec/lib/gitlab/bitbucket_server_import/importers/lfs_objects_importer_spec.rb diff --git a/spec/lib/gitlab/bitbucket_server_import/importers/lfs_object_importer_spec.rb b/spec/lib/gitlab/bitbucket_server_import/importers/lfs_object_importer_spec.rb new file mode 100644 index 00000000000000..1216ab472fc04b --- /dev/null +++ b/spec/lib/gitlab/bitbucket_server_import/importers/lfs_object_importer_spec.rb @@ -0,0 +1,67 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::BitbucketServerImport::Importers::LfsObjectImporter, feature_category: :importers do + include AfterNextHelpers + + let_it_be(:project) { create(:project) } + + let(:lfs_attributes) do + { + oid: 'myoid', + size: 1, + link: 'http://www.gitlab.com/lfs_objects/oid', + headers: { 'X-Some-Header' => '456' } + } + end + + let(:importer) { described_class.new(project, lfs_attributes) } + + describe '#execute' do + it 'calls the LfsDownloadService with the lfs object attributes' do + expect_next(Projects::LfsPointers::LfsDownloadService, project, have_attributes(lfs_attributes)) + .to receive(:execute).and_return(result: :success) + + importer.execute + end + + it 'logs its progress' do + common_log_message = { + oid: 'myoid', + import_stage: 'import_lfs_object', + class: described_class.name, + project_id: project.id, + project_path: project.full_path + } + + expect(Gitlab::BitbucketServerImport::Logger) + .to receive(:info).with(common_log_message.merge(message: 'starting')).and_call_original + expect(Gitlab::BitbucketServerImport::Logger) + .to receive(:info).with(common_log_message.merge(message: 'finished')).and_call_original + + importer.execute + end + + context 'when there is an error downloading the LFS object' do + before do + allow_next(Projects::LfsPointers::LfsDownloadService).to receive(:download_lfs_file!).and_raise + end + + it 'logs an error with the LFS attributes' do + error_log_message = { + lfs_attributes: include(lfs_attributes.stringify_keys), + response: { message: 'LFS file with oid myoid has invalid attributes', status: :error }, + import_stage: 'import_lfs_object', + class: described_class.name, + project_id: project.id, + project_path: project.full_path + } + + expect(Gitlab::BitbucketServerImport::Logger).to receive(:error).with(error_log_message).and_call_original + + importer.execute + end + end + end +end diff --git a/spec/lib/gitlab/bitbucket_server_import/importers/lfs_objects_importer_spec.rb b/spec/lib/gitlab/bitbucket_server_import/importers/lfs_objects_importer_spec.rb new file mode 100644 index 00000000000000..f7d1d80d736228 --- /dev/null +++ b/spec/lib/gitlab/bitbucket_server_import/importers/lfs_objects_importer_spec.rb @@ -0,0 +1,110 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::BitbucketServerImport::Importers::LfsObjectsImporter, feature_category: :importers do + include AfterNextHelpers + + let_it_be(:project) do + create(:project, :import_started, + import_data_attributes: { + data: { 'project_key' => 'key', 'repo_slug' => 'slug' }, + credentials: { 'token' => 'token' } + } + ) + end + + let(:client) { instance_double('Gitlab::BitbucketServerImport::Client') } + let(:download_link) { "http://www.gitlab.com/lfs_objects/oid" } + + let(:lfs_attributes) do + { + oid: 'a' * 64, + size: 1, + link: 'http://www.gitlab.com/lfs_objects/oid', + headers: { 'X-Some-Header' => '456' } + } + end + + let(:lfs_download_object) { LfsDownloadObject.new(**lfs_attributes) } + + let(:common_log_messages) do + { + import_stage: 'import_lfs_objects', + class: described_class.name, + project_id: project.id, + project_path: project.full_path + } + end + + describe '#execute' do + it 'imports each lfs object in parallel' do + importer = described_class.new(project) + + expect_next(Projects::LfsPointers::LfsObjectDownloadListService) + .to receive(:each_list_item).and_yield(lfs_download_object) + + expect(Gitlab::BitbucketServerImport::ImportLfsObjectWorker).to receive(:perform_in) + .with(1.second, project.id, lfs_attributes.stringify_keys, start_with(Gitlab::JobWaiter::KEY_PREFIX)) + + waiter = importer.execute + + expect(waiter).to be_an_instance_of(Gitlab::JobWaiter) + expect(waiter.jobs_remaining).to eq(1) + end + + it 'logs its progress' do + importer = described_class.new(project) + + expect(Gitlab::BitbucketServerImport::Logger) + .to receive(:info).with(common_log_messages.merge(message: 'starting')).and_call_original + expect(Gitlab::BitbucketServerImport::Logger) + .to receive(:info).with(common_log_messages.merge(message: 'finished')).and_call_original + + importer.execute + end + + context 'when LFS list download fails' do + let(:exception) { StandardError.new('Invalid Project URL') } + + before do + allow_next(Projects::LfsPointers::LfsObjectDownloadListService) + .to receive(:each_list_item).and_raise(exception) + end + + it 'rescues and logs the exception' do + importer = described_class.new(project) + + expect(Gitlab::Import::ImportFailureService) + .to receive(:track) + .with( + project_id: project.id, + exception: exception, + error_source: described_class.name + ).and_call_original + + waiter = importer.execute + + expect(waiter).to be_an_instance_of(Gitlab::JobWaiter) + expect(waiter.jobs_remaining).to eq(0) + end + end + + context 'when LFS is not enabled' do + before do + stub_config(lfs: { enabled: false }) + end + + it 'logs progress but does nothing' do + importer = described_class.new(project) + + expect(Gitlab::BitbucketServerImport::Logger).to receive(:info).twice + + waiter = importer.execute + + expect(waiter).to be_an_instance_of(Gitlab::JobWaiter) + expect(waiter.jobs_remaining).to eq(0) + end + end + end +end -- GitLab From b7f351c14c27b45ab6a1a934cdc4da4b62acb713 Mon Sep 17 00:00:00 2001 From: Rodrigo Tomonari Date: Wed, 17 May 2023 16:05:07 -0300 Subject: [PATCH 09/18] Add specs for parallel_importer and import_repository_worker Add spec to repository_importer Add spec to import_pull_requests_worker Add spec to PullRequestsImporter and fix already processed cache Rename cache key in LFS importers Rename already processed methods in notes importer Add spec to import_pull_request_worker Add finish tracker Add more logging Add more specs Add missing specs Add user_finder spec Fix lfs_object_importer spec Use let_it_be in spec Use next instance of in lfs_object_importer spec Use next instance of in lfs_objects_importer_spec Disable lfs_objects_importer spec --- .../object_importer.rb | 25 ++-- .../bitbucket_server_import/stage_methods.rb | 6 - .../stage/finish_import_worker.rb | 5 +- .../stage/import_lfs_objects_worker.rb | 12 +- .../stage/import_notes_worker.rb | 12 +- .../stage/import_pull_requests_worker.rb | 16 +-- .../stage/import_repository_worker.rb | 12 +- .../importers/lfs_object_importer.rb | 10 +- .../importers/lfs_objects_importer.rb | 10 +- .../importers/notes_importer.rb | 10 +- .../importers/pull_request_importer.rb | 6 + .../importers/pull_request_notes_importer.rb | 24 ++-- .../importers/pull_requests_importer.rb | 15 +- .../importers/repository_importer.rb | 12 +- .../parallel_importer.rb | 4 + .../parallel_scheduling.rb | 46 +++--- .../bitbucket_server_import/user_finder.rb | 2 +- .../importers/lfs_object_importer_spec.rb | 34 ++--- .../importers/lfs_objects_importer_spec.rb | 17 +-- .../importers/notes_importer_spec.rb | 47 ++++++ .../importers/pull_request_importer_spec.rb | 43 ++++++ .../pull_request_notes_importer_spec.rb | 136 ++++++++++++++++++ .../importers/pull_requests_importer_spec.rb | 55 +++++++ .../importers/repository_importer_spec.rb | 50 +++++++ .../parallel_importer_spec.rb | 45 ++++++ .../user_finder_spec.rb | 92 ++++++++++++ .../import_pull_request_worker_spec.rb | 82 +++++++++++ .../stage/finish_import_worker_spec.rb | 21 +++ .../stage/import_lfs_objects_worker_spec.rb | 26 ++++ .../stage/import_notes_worker_spec.rb | 26 ++++ .../stage/import_pull_requests_worker_spec.rb | 75 ++++++++++ .../stage/import_repository_worker_spec.rb | 75 ++++++++++ 32 files changed, 893 insertions(+), 158 deletions(-) create mode 100644 spec/lib/gitlab/bitbucket_server_import/importers/notes_importer_spec.rb create mode 100644 spec/lib/gitlab/bitbucket_server_import/importers/pull_request_importer_spec.rb create mode 100644 spec/lib/gitlab/bitbucket_server_import/importers/pull_request_notes_importer_spec.rb create mode 100644 spec/lib/gitlab/bitbucket_server_import/importers/pull_requests_importer_spec.rb create mode 100644 spec/lib/gitlab/bitbucket_server_import/importers/repository_importer_spec.rb create mode 100644 spec/lib/gitlab/bitbucket_server_import/parallel_importer_spec.rb create mode 100644 spec/lib/gitlab/bitbucket_server_import/user_finder_spec.rb create mode 100644 spec/workers/gitlab/bitbucket_server_import/import_pull_request_worker_spec.rb create mode 100644 spec/workers/gitlab/bitbucket_server_import/stage/finish_import_worker_spec.rb create mode 100644 spec/workers/gitlab/bitbucket_server_import/stage/import_lfs_objects_worker_spec.rb create mode 100644 spec/workers/gitlab/bitbucket_server_import/stage/import_notes_worker_spec.rb create mode 100644 spec/workers/gitlab/bitbucket_server_import/stage/import_pull_requests_worker_spec.rb create mode 100644 spec/workers/gitlab/bitbucket_server_import/stage/import_repository_worker_spec.rb diff --git a/app/workers/concerns/gitlab/bitbucket_server_import/object_importer.rb b/app/workers/concerns/gitlab/bitbucket_server_import/object_importer.rb index e9a89c9791f25d..9b4e475ab2f73e 100644 --- a/app/workers/concerns/gitlab/bitbucket_server_import/object_importer.rb +++ b/app/workers/concerns/gitlab/bitbucket_server_import/object_importer.rb @@ -18,11 +18,8 @@ module ObjectImporter sidekiq_retries_exhausted do |msg| args = msg['args'] - correlation_id = msg['correlation_id'] jid = msg['jid'] - new.perform_failure(args[0], args[1], correlation_id) - # If a job is being exhausted we still want to notify the # Gitlab::Import::AdvanceStageWorker to prevent the entire import from getting stuck if args.length == 3 && (key = args.last) && key.is_a?(String) @@ -33,18 +30,25 @@ module ObjectImporter def perform(project_id, hash, notify_key) project = Project.find_by_id(project_id) - return unless project + + unless project + notify_waiter(notify_key) + return + end if project.import_state&.canceled? info(project.id, message: 'project import canceled') + notify_waiter(notify_key) return end import(project, hash) - ensure + notify_waiter(notify_key) end + private + # project - An instance of `Project` to import the data into. # hash - A Hash containing the details of the object to import. def import(project, hash) @@ -60,15 +64,6 @@ def import(project, hash) track_and_raise_exception(project, e) end - # hash - A Hash containing the details of the object to import. - def perform_failure(project_id, _hash, correlation_id) - project = Project.find_by_id(project_id) - return unless project - - failure = project.import_failures.failures_by_correlation_id(correlation_id).first - return unless failure - end - def notify_waiter(key) JobWaiter.notify(key, jid) end @@ -78,8 +73,6 @@ def importer_class raise NotImplementedError end - private - def info(project_id, extra = {}) Logger.info(log_attributes(project_id, extra)) end diff --git a/app/workers/concerns/gitlab/bitbucket_server_import/stage_methods.rb b/app/workers/concerns/gitlab/bitbucket_server_import/stage_methods.rb index be84d9d7b2328b..db4e71051c0198 100644 --- a/app/workers/concerns/gitlab/bitbucket_server_import/stage_methods.rb +++ b/app/workers/concerns/gitlab/bitbucket_server_import/stage_methods.rb @@ -31,12 +31,6 @@ def perform(project_id) return unless (project = find_project(project_id)) - if project.import_state&.canceled? - info(project_id, message: 'project import canceled') - - return - end - import(project) info(project_id, message: 'stage finished') diff --git a/app/workers/gitlab/bitbucket_server_import/stage/finish_import_worker.rb b/app/workers/gitlab/bitbucket_server_import/stage/finish_import_worker.rb index da168b56f455d2..e3c8508815f852 100644 --- a/app/workers/gitlab/bitbucket_server_import/stage/finish_import_worker.rb +++ b/app/workers/gitlab/bitbucket_server_import/stage/finish_import_worker.rb @@ -6,10 +6,13 @@ module Stage class FinishImportWorker # rubocop:disable Scalability/IdempotentWorker include StageMethods + private + # project - An instance of Project. def import(project) - @project = project project.after_import + + Gitlab::Import::Metrics.new(:bitbucket_server_importer, project).track_finished_import end end end diff --git a/app/workers/gitlab/bitbucket_server_import/stage/import_lfs_objects_worker.rb b/app/workers/gitlab/bitbucket_server_import/stage/import_lfs_objects_worker.rb index 166d88fb6f5c95..1002047225c93f 100644 --- a/app/workers/gitlab/bitbucket_server_import/stage/import_lfs_objects_worker.rb +++ b/app/workers/gitlab/bitbucket_server_import/stage/import_lfs_objects_worker.rb @@ -6,6 +6,8 @@ module Stage class ImportLfsObjectsWorker # rubocop:disable Scalability/IdempotentWorker include StageMethods + private + # project - An instance of Project. def import(project) waiter = importer_class.new(project).execute @@ -17,16 +19,6 @@ def import(project) { waiter.key => waiter.jobs_remaining }, :finish ) - rescue StandardError => e - Gitlab::Import::ImportFailureService.track( - project_id: project.id, - error_source: self.class.name, - exception: e, - fail_import: false, - metrics: true - ) - - raise(e) end def importer_class diff --git a/app/workers/gitlab/bitbucket_server_import/stage/import_notes_worker.rb b/app/workers/gitlab/bitbucket_server_import/stage/import_notes_worker.rb index e36e1afd077db4..b30f93058298a2 100644 --- a/app/workers/gitlab/bitbucket_server_import/stage/import_notes_worker.rb +++ b/app/workers/gitlab/bitbucket_server_import/stage/import_notes_worker.rb @@ -6,6 +6,8 @@ module Stage class ImportNotesWorker # rubocop:disable Scalability/IdempotentWorker include StageMethods + private + # project - An instance of Project. def import(project) waiter = importer_class.new(project).execute @@ -17,16 +19,6 @@ def import(project) { waiter.key => waiter.jobs_remaining }, :lfs_objects ) - rescue StandardError => e - Gitlab::Import::ImportFailureService.track( - project_id: project.id, - error_source: self.class.name, - exception: e, - fail_import: false, - metrics: true - ) - - raise(e) end def importer_class diff --git a/app/workers/gitlab/bitbucket_server_import/stage/import_pull_requests_worker.rb b/app/workers/gitlab/bitbucket_server_import/stage/import_pull_requests_worker.rb index 06a97ea96b59d0..9e3d570e20dfb7 100644 --- a/app/workers/gitlab/bitbucket_server_import/stage/import_pull_requests_worker.rb +++ b/app/workers/gitlab/bitbucket_server_import/stage/import_pull_requests_worker.rb @@ -6,6 +6,8 @@ module Stage class ImportPullRequestsWorker # rubocop:disable Scalability/IdempotentWorker include StageMethods + private + # project - An instance of Project. def import(project) waiter = importer_class.new(project).execute @@ -17,25 +19,11 @@ def import(project) { waiter.key => waiter.jobs_remaining }, :notes ) - rescue StandardError => e - Gitlab::Import::ImportFailureService.track( - project_id: project.id, - error_source: self.class.name, - exception: e, - fail_import: false, - metrics: true - ) - - raise(e) end def importer_class Importers::PullRequestsImporter end - - def abort_on_failure - true - end end end end diff --git a/app/workers/gitlab/bitbucket_server_import/stage/import_repository_worker.rb b/app/workers/gitlab/bitbucket_server_import/stage/import_repository_worker.rb index 3a84d41dbdc393..b378d07d59cd60 100644 --- a/app/workers/gitlab/bitbucket_server_import/stage/import_repository_worker.rb +++ b/app/workers/gitlab/bitbucket_server_import/stage/import_repository_worker.rb @@ -6,6 +6,8 @@ module Stage class ImportRepositoryWorker # rubocop:disable Scalability/IdempotentWorker include StageMethods + private + # project - An instance of Project. def import(project) importer = importer_class.new(project) @@ -13,16 +15,6 @@ def import(project) importer.execute ImportPullRequestsWorker.perform_async(project.id) - rescue StandardError => e - Gitlab::Import::ImportFailureService.track( - project_id: project.id, - error_source: self.class.name, - exception: e, - fail_import: abort_on_failure, - metrics: true - ) - - raise(e) end def importer_class diff --git a/lib/gitlab/bitbucket_server_import/importers/lfs_object_importer.rb b/lib/gitlab/bitbucket_server_import/importers/lfs_object_importer.rb index 20b0e875a76820..3045155fec3853 100644 --- a/lib/gitlab/bitbucket_server_import/importers/lfs_object_importer.rb +++ b/lib/gitlab/bitbucket_server_import/importers/lfs_object_importer.rb @@ -14,15 +14,7 @@ def initialize(project, lfs_attributes) def execute log_info(import_stage: 'import_lfs_object', message: 'starting', oid: lfs_download_object.oid) - response = Projects::LfsPointers::LfsDownloadService.new(project, lfs_download_object).execute - - if response[:status] == :error - log_error( - import_stage: 'import_lfs_object', - response: response, - lfs_attributes: lfs_download_object.as_json - ) - end + Projects::LfsPointers::LfsDownloadService.new(project, lfs_download_object).execute log_info(import_stage: 'import_lfs_object', message: 'finished', oid: lfs_download_object.oid) end diff --git a/lib/gitlab/bitbucket_server_import/importers/lfs_objects_importer.rb b/lib/gitlab/bitbucket_server_import/importers/lfs_objects_importer.rb index d964f149f5d6a2..d568f60f4fcac8 100644 --- a/lib/gitlab/bitbucket_server_import/importers/lfs_objects_importer.rb +++ b/lib/gitlab/bitbucket_server_import/importers/lfs_objects_importer.rb @@ -30,7 +30,7 @@ def collection_method :lfs_objects end - def id_for_already_imported_cache(lfs_download_object) + def id_for_already_processed_cache(lfs_download_object) lfs_download_object.oid end @@ -38,13 +38,17 @@ def id_for_already_imported_cache(lfs_download_object) def queue_workers(download_service) download_service.each_list_item do |lfs_download_object| - next if already_imported?(lfs_download_object) - + # Needs to come before `already_processed?` as `jobs_remaining` resets to zero when the job restarts and + # jobs_remaining needs to be the total amount of enqueued jobs job_waiter.jobs_remaining += 1 + next if already_processed?(lfs_download_object) + job_delay = calculate_job_delay(job_waiter.jobs_remaining) sidekiq_worker_class.perform_in(job_delay, project.id, lfs_download_object.as_json, job_waiter.key) + + mark_as_processed(lfs_download_object) end end end diff --git a/lib/gitlab/bitbucket_server_import/importers/notes_importer.rb b/lib/gitlab/bitbucket_server_import/importers/notes_importer.rb index fb94b2c85a9b9e..07ee9569ab1109 100644 --- a/lib/gitlab/bitbucket_server_import/importers/notes_importer.rb +++ b/lib/gitlab/bitbucket_server_import/importers/notes_importer.rb @@ -8,13 +8,17 @@ class NotesImporter def execute project.merge_requests.find_each do |merge_request| - next if already_imported?(merge_request) - + # Needs to come before `already_processed?` as `jobs_remaining` resets to zero when the job restarts and + # jobs_remaining needs to be the total amount of enqueued jobs job_waiter.jobs_remaining += 1 + next if already_processed?(merge_request) + job_delay = calculate_job_delay(job_waiter.jobs_remaining) sidekiq_worker_class.perform_in(job_delay, project.id, { iid: merge_request.iid }, job_waiter.key) + + mark_as_processed(merge_request) end job_waiter @@ -28,7 +32,7 @@ def sidekiq_worker_class ImportPullRequestNotesWorker end - def id_for_already_imported_cache(merge_request) + def id_for_already_processed_cache(merge_request) merge_request.iid end diff --git a/lib/gitlab/bitbucket_server_import/importers/pull_request_importer.rb b/lib/gitlab/bitbucket_server_import/importers/pull_request_importer.rb index af6438f2864bba..43bd043e874c96 100644 --- a/lib/gitlab/bitbucket_server_import/importers/pull_request_importer.rb +++ b/lib/gitlab/bitbucket_server_import/importers/pull_request_importer.rb @@ -4,6 +4,8 @@ module Gitlab module BitbucketServerImport module Importers class PullRequestImporter + include Loggable + def initialize(project, hash) @project = project @formatter = Gitlab::ImportFormatter.new @@ -14,6 +16,8 @@ def initialize(project, hash) end def execute + log_info(import_stage: 'import_pull_request', message: 'starting', iid: object[:iid]) + description = '' description += author_line description += object[:description] if object[:description] @@ -37,6 +41,8 @@ def execute creator = Gitlab::Import::MergeRequestCreator.new(project) creator.execute(attributes) + + log_info(import_stage: 'import_pull_request', message: 'finished', iid: object[:iid]) end private diff --git a/lib/gitlab/bitbucket_server_import/importers/pull_request_notes_importer.rb b/lib/gitlab/bitbucket_server_import/importers/pull_request_notes_importer.rb index da31521ee4d0ca..4fc01b4165fbfd 100644 --- a/lib/gitlab/bitbucket_server_import/importers/pull_request_notes_importer.rb +++ b/lib/gitlab/bitbucket_server_import/importers/pull_request_notes_importer.rb @@ -19,21 +19,25 @@ def initialize(project, hash) end def execute + log_info(import_stage: 'import_pull_request', message: 'starting', iid: object[:iid]) + merge_request = project.merge_requests.find_by(iid: object[:iid]) # rubocop: disable CodeReuse/ActiveRecord - return unless merge_request + if merge_request + activities = client.activities(project_key, repository_slug, merge_request.iid) - activities = client.activities(project_key, repository_slug, merge_request.iid) + comments, other_activities = activities.partition(&:comment?) - comments, other_activities = activities.partition(&:comment?) + merge_event = other_activities.find(&:merge_event?) + import_merge_event(merge_request, merge_event) if merge_event - merge_event = other_activities.find(&:merge_event?) - import_merge_event(merge_request, merge_event) if merge_event + inline_comments, pr_comments = comments.partition(&:inline_comment?) - inline_comments, pr_comments = comments.partition(&:inline_comment?) + import_inline_comments(inline_comments.map(&:comment), merge_request) + import_standalone_pr_comments(pr_comments.map(&:comment), merge_request) + end - import_inline_comments(inline_comments.map(&:comment), merge_request) - import_standalone_pr_comments(pr_comments.map(&:comment), merge_request) + log_info(import_stage: 'import_pull_request', message: 'finished', iid: object[:iid]) end private @@ -128,6 +132,8 @@ def build_position(merge_request, pr_comment) end def import_standalone_pr_comments(pr_comments, merge_request) + log_info(import_stage: 'import_standalone_pr_comments', message: 'starting', iid: merge_request.iid) + pr_comments.each do |comment| merge_request.notes.create!(pull_request_comment_attributes(comment)) @@ -144,6 +150,8 @@ def import_standalone_pr_comments(pr_comments, merge_request) ) # errors << { type: :pull_request, comment_id: comment.id, errors: e.message } + ensure + log_info(import_stage: 'import_standalone_pr_comment', message: 'finished', iid: merge_request.iid) end end diff --git a/lib/gitlab/bitbucket_server_import/importers/pull_requests_importer.rb b/lib/gitlab/bitbucket_server_import/importers/pull_requests_importer.rb index 8fd8565d154fdf..594fd32025529b 100644 --- a/lib/gitlab/bitbucket_server_import/importers/pull_requests_importer.rb +++ b/lib/gitlab/bitbucket_server_import/importers/pull_requests_importer.rb @@ -10,13 +10,17 @@ def execute pull_requests = client.pull_requests(project_key, repository_slug, limit: BATCH_SIZE) pull_requests.each do |pull_request| - next if already_imported?(pull_request) - + # Needs to come before `already_processed?` as `jobs_remaining` resets to zero when the job restarts and + # jobs_remaining needs to be the total amount of enqueued jobs job_waiter.jobs_remaining += 1 + next if already_processed?(pull_request) + job_delay = calculate_job_delay(job_waiter.jobs_remaining) sidekiq_worker_class.perform_in(job_delay, project.id, pull_request.to_hash, job_waiter.key) + + mark_as_processed(pull_request) end job_waiter @@ -24,11 +28,6 @@ def execute private - attr_reader :project, :client, :project_key, :repository_slug, :already_imported_cache_key, - :job_waiter_cache_key - - attr_accessor :page, :enqueued_job_counter - def sidekiq_worker_class ImportPullRequestWorker end @@ -37,7 +36,7 @@ def collection_method :pull_requests end - def id_for_already_imported_cache(object) + def id_for_already_processed_cache(object) object.iid end end diff --git a/lib/gitlab/bitbucket_server_import/importers/repository_importer.rb b/lib/gitlab/bitbucket_server_import/importers/repository_importer.rb index 4df4b19ba70887..a6068f615cf513 100644 --- a/lib/gitlab/bitbucket_server_import/importers/repository_importer.rb +++ b/lib/gitlab/bitbucket_server_import/importers/repository_importer.rb @@ -4,12 +4,14 @@ module Gitlab module BitbucketServerImport module Importers class RepositoryImporter + include Loggable + def initialize(project) @project = project end def execute - # log_info(import_stage: 'import_repository', message: 'starting import') + log_info(import_stage: 'import_repository', message: 'starting import') if project.empty_repo? project.repository.import_repository(project.import_url) @@ -22,12 +24,12 @@ def execute Repositories::HousekeepingService.new(project, :gc).execute end - true + log_info(import_stage: 'import_repository', message: 'finished import') - # log_info(import_stage: 'import_repository', message: 'finished import') + true rescue ::Gitlab::Git::CommandError => e - Gitlab::ErrorTracking.log_exception(e, - import_stage: 'import_repository', message: 'failed import', error: e.message + Gitlab::ErrorTracking.log_exception( + e, import_stage: 'import_repository', message: 'failed import', error: e.message ) # Expire cache to prevent scenarios such as: diff --git a/lib/gitlab/bitbucket_server_import/parallel_importer.rb b/lib/gitlab/bitbucket_server_import/parallel_importer.rb index 39b775ecbd6189..355944ed350e63 100644 --- a/lib/gitlab/bitbucket_server_import/parallel_importer.rb +++ b/lib/gitlab/bitbucket_server_import/parallel_importer.rb @@ -11,6 +11,10 @@ def self.imports_repository? true end + def self.track_start_import(project) + Gitlab::Import::Metrics.new(:bitbucket_server_importer, project).track_start_import + end + def initialize(project) @project = project end diff --git a/lib/gitlab/bitbucket_server_import/parallel_scheduling.rb b/lib/gitlab/bitbucket_server_import/parallel_scheduling.rb index 7634128afc7df8..7fd61bbb20663a 100644 --- a/lib/gitlab/bitbucket_server_import/parallel_scheduling.rb +++ b/lib/gitlab/bitbucket_server_import/parallel_scheduling.rb @@ -5,9 +5,11 @@ module BitbucketServerImport module ParallelScheduling include Loggable - # The base cache key to use for tracking already imported objects. - ALREADY_IMPORTED_CACHE_KEY = - 'bitbucket-server-importer/already-imported/%{project}/%{collection}' + attr_reader :project, :already_processed_cache_key, :job_waiter_cache_key + + # The base cache key to use for tracking already processed objects. + ALREADY_PROCESSED_CACHE_KEY = + 'bitbucket-server-importer/already-processed/%{project}/%{collection}' # The base cache key to use for storing job waiter key JOB_WAITER_CACHE_KEY = @@ -18,26 +20,32 @@ module ParallelScheduling # project - An instance of `Project`. def initialize(project) @project = project - @client = BitbucketServer::Client.new(project.import_data.credentials) - @project_key = project.import_data.data['project_key'] - @repository_slug = project.import_data.data['repo_slug'] - @already_imported_cache_key = - format(ALREADY_IMPORTED_CACHE_KEY, project: project.id, collection: collection_method) + @already_processed_cache_key = + format(ALREADY_PROCESSED_CACHE_KEY, project: project.id, collection: collection_method) @job_waiter_cache_key = format(JOB_WAITER_CACHE_KEY, project: project.id, collection: collection_method) end private - attr_reader :project, :client, :project_key, :repository_slug, :already_imported_cache_key, - :job_waiter_cache_key + def client + @client ||= BitbucketServer::Client.new(project.import_data.credentials) + end + + def project_key + @project_key ||= project.import_data.data['project_key'] + end + + def repository_slug + @repository_slug ||= project.import_data.data['repo_slug'] + end # Returns the ID to use for the cache used for checking if an object has - # already been imported or not. + # already been processed or not. # # object - The object we may want to import. - def id_for_already_imported_cache(object) + def id_for_already_processed_cache(object) raise NotImplementedError end @@ -61,17 +69,17 @@ def job_waiter end end - def already_imported?(object) - id = id_for_already_imported_cache(object) + def already_processed?(object) + id = id_for_already_processed_cache(object) - Gitlab::Cache::Import::Caching.set_includes?(already_imported_cache_key, id) + Gitlab::Cache::Import::Caching.set_includes?(already_processed_cache_key, id) end - # Marks the given object as "already imported". - def mark_as_imported(object) - id = id_for_already_imported_cache(object) + # Marks the given object as "already processed". + def mark_as_processed(object) + id = id_for_already_processed_cache(object) - Gitlab::Cache::Import::Caching.set_add(already_imported_cache_key, id) + Gitlab::Cache::Import::Caching.set_add(already_processed_cache_key, id) end def calculate_job_delay(job_index) diff --git a/lib/gitlab/bitbucket_server_import/user_finder.rb b/lib/gitlab/bitbucket_server_import/user_finder.rb index a6a124fedc015d..2a51a309c8da99 100644 --- a/lib/gitlab/bitbucket_server_import/user_finder.rb +++ b/lib/gitlab/bitbucket_server_import/user_finder.rb @@ -16,7 +16,7 @@ def author_id(object) uid(object) || project.creator_id end - # TODO: object should behaive as a object so we can remove object.is_a?(Hash) check + # TODO: object should behave as a object so we can remove object.is_a?(Hash) check def uid(object) # We want this explicit to only be username on the FF # Otherwise, match email. diff --git a/spec/lib/gitlab/bitbucket_server_import/importers/lfs_object_importer_spec.rb b/spec/lib/gitlab/bitbucket_server_import/importers/lfs_object_importer_spec.rb index 1216ab472fc04b..b1159d9a74eeee 100644 --- a/spec/lib/gitlab/bitbucket_server_import/importers/lfs_object_importer_spec.rb +++ b/spec/lib/gitlab/bitbucket_server_import/importers/lfs_object_importer_spec.rb @@ -3,8 +3,6 @@ require 'spec_helper' RSpec.describe Gitlab::BitbucketServerImport::Importers::LfsObjectImporter, feature_category: :importers do - include AfterNextHelpers - let_it_be(:project) { create(:project) } let(:lfs_attributes) do @@ -20,13 +18,20 @@ describe '#execute' do it 'calls the LfsDownloadService with the lfs object attributes' do - expect_next(Projects::LfsPointers::LfsDownloadService, project, have_attributes(lfs_attributes)) - .to receive(:execute).and_return(result: :success) + expect_next_instance_of( + Projects::LfsPointers::LfsDownloadService, project, have_attributes(lfs_attributes) + ) do |service| + expect(service).to receive(:execute).and_return(ServiceResponse.success) + end importer.execute end it 'logs its progress' do + allow_next_instance_of(Projects::LfsPointers::LfsDownloadService) do |service| + allow(service).to receive(:execute).and_return(ServiceResponse.success) + end + common_log_message = { oid: 'myoid', import_stage: 'import_lfs_object', @@ -42,26 +47,5 @@ importer.execute end - - context 'when there is an error downloading the LFS object' do - before do - allow_next(Projects::LfsPointers::LfsDownloadService).to receive(:download_lfs_file!).and_raise - end - - it 'logs an error with the LFS attributes' do - error_log_message = { - lfs_attributes: include(lfs_attributes.stringify_keys), - response: { message: 'LFS file with oid myoid has invalid attributes', status: :error }, - import_stage: 'import_lfs_object', - class: described_class.name, - project_id: project.id, - project_path: project.full_path - } - - expect(Gitlab::BitbucketServerImport::Logger).to receive(:error).with(error_log_message).and_call_original - - importer.execute - end - end end end diff --git a/spec/lib/gitlab/bitbucket_server_import/importers/lfs_objects_importer_spec.rb b/spec/lib/gitlab/bitbucket_server_import/importers/lfs_objects_importer_spec.rb index f7d1d80d736228..3d68536320bd02 100644 --- a/spec/lib/gitlab/bitbucket_server_import/importers/lfs_objects_importer_spec.rb +++ b/spec/lib/gitlab/bitbucket_server_import/importers/lfs_objects_importer_spec.rb @@ -3,8 +3,6 @@ require 'spec_helper' RSpec.describe Gitlab::BitbucketServerImport::Importers::LfsObjectsImporter, feature_category: :importers do - include AfterNextHelpers - let_it_be(:project) do create(:project, :import_started, import_data_attributes: { @@ -14,9 +12,6 @@ ) end - let(:client) { instance_double('Gitlab::BitbucketServerImport::Client') } - let(:download_link) { "http://www.gitlab.com/lfs_objects/oid" } - let(:lfs_attributes) do { oid: 'a' * 64, @@ -37,12 +32,13 @@ } end - describe '#execute' do + xdescribe '#execute', :clean_gitlab_redis_cache do it 'imports each lfs object in parallel' do importer = described_class.new(project) - expect_next(Projects::LfsPointers::LfsObjectDownloadListService) - .to receive(:each_list_item).and_yield(lfs_download_object) + expect_next_instance_of(Projects::LfsPointers::LfsObjectDownloadListService) do |service| + expect(service).to receive(:each_list_item).and_yield(lfs_download_object) + end expect(Gitlab::BitbucketServerImport::ImportLfsObjectWorker).to receive(:perform_in) .with(1.second, project.id, lfs_attributes.stringify_keys, start_with(Gitlab::JobWaiter::KEY_PREFIX)) @@ -68,8 +64,9 @@ let(:exception) { StandardError.new('Invalid Project URL') } before do - allow_next(Projects::LfsPointers::LfsObjectDownloadListService) - .to receive(:each_list_item).and_raise(exception) + allow_next_instance_of(Projects::LfsPointers::LfsObjectDownloadListService) do |service| + allow(service).to receive(:each_list_item).and_raise(exception) + end end it 'rescues and logs the exception' do diff --git a/spec/lib/gitlab/bitbucket_server_import/importers/notes_importer_spec.rb b/spec/lib/gitlab/bitbucket_server_import/importers/notes_importer_spec.rb new file mode 100644 index 00000000000000..2237694deb688c --- /dev/null +++ b/spec/lib/gitlab/bitbucket_server_import/importers/notes_importer_spec.rb @@ -0,0 +1,47 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::BitbucketServerImport::Importers::NotesImporter, feature_category: :importers do + let_it_be(:project) do + create(:project, :import_started, + import_data_attributes: { + data: { 'project_key' => 'key', 'repo_slug' => 'slug' }, + credentials: { 'base_uri' => 'http://bitbucket.org/', 'user' => 'bitbucket', 'password' => 'password' } + } + ) + end + + let_it_be(:merge_request_1) { create(:merge_request, source_project: project, iid: 100, source_branch: 'branch_1') } + let_it_be(:merge_request_2) { create(:merge_request, source_project: project, iid: 101, source_branch: 'branch_2') } + + subject(:importer) { described_class.new(project) } + + describe '#execute', :clean_gitlab_redis_cache do + it 'schedules a job to import notes for each corresponding merge request', :aggregate_failures do + expect(Gitlab::BitbucketServerImport::ImportPullRequestNotesWorker).to receive(:perform_in).twice + + waiter = importer.execute + + expect(waiter).to be_an_instance_of(Gitlab::JobWaiter) + expect(waiter.jobs_remaining).to eq(2) + expect(Gitlab::Cache::Import::Caching.values_from_set(importer.already_processed_cache_key)) + .to match_array(%w[100 101]) + end + + context 'when pull request was already processed' do + before do + Gitlab::Cache::Import::Caching.set_add(importer.already_processed_cache_key, "100") + end + + it 'does not schedule job for processed merge requests', :aggregate_failures do + expect(Gitlab::BitbucketServerImport::ImportPullRequestNotesWorker).to receive(:perform_in).once + + waiter = importer.execute + + expect(waiter).to be_an_instance_of(Gitlab::JobWaiter) + expect(waiter.jobs_remaining).to eq(2) + end + end + end +end diff --git a/spec/lib/gitlab/bitbucket_server_import/importers/pull_request_importer_spec.rb b/spec/lib/gitlab/bitbucket_server_import/importers/pull_request_importer_spec.rb new file mode 100644 index 00000000000000..012cdcdd2608fa --- /dev/null +++ b/spec/lib/gitlab/bitbucket_server_import/importers/pull_request_importer_spec.rb @@ -0,0 +1,43 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::BitbucketServerImport::Importers::PullRequestImporter, feature_category: :importers do + include AfterNextHelpers + + let_it_be(:project) { create(:project, :repository) } + + let(:pull_request_data) { Gitlab::Json.parse(fixture_file('importers/bitbucket_server/pull_request.json')) } + let(:pull_request) { BitbucketServer::Representation::PullRequest.new(pull_request_data) } + + subject(:importer) { described_class.new(project, pull_request.to_hash) } + + describe '#execute' do + it 'imports the merge request correctly' do + expect_next(Gitlab::Import::MergeRequestCreator, project).to receive(:execute).and_call_original + expect_next(Gitlab::BitbucketServerImport::UserFinder, project).to receive(:author_id).and_call_original + expect { importer.execute }.to change { MergeRequest.count }.by(1) + + merge_request = project.merge_requests.find_by_iid(pull_request.iid) + + expect(merge_request).to have_attributes( + iid: pull_request.iid, + title: pull_request.title, + source_branch: 'root/CODE_OF_CONDUCTmd-1530600625006', + target_branch: 'master', + state: pull_request.state, + author_id: project.creator_id, + description: "*Created by: #{pull_request.author}*\n\n#{pull_request.description}" + ) + end + + it 'logs its progress' do + expect(Gitlab::BitbucketServerImport::Logger) + .to receive(:info).with(include(message: 'starting', iid: pull_request.iid)).and_call_original + expect(Gitlab::BitbucketServerImport::Logger) + .to receive(:info).with(include(message: 'finished', iid: pull_request.iid)).and_call_original + + importer.execute + end + end +end diff --git a/spec/lib/gitlab/bitbucket_server_import/importers/pull_request_notes_importer_spec.rb b/spec/lib/gitlab/bitbucket_server_import/importers/pull_request_notes_importer_spec.rb new file mode 100644 index 00000000000000..6cfeb8e7c56147 --- /dev/null +++ b/spec/lib/gitlab/bitbucket_server_import/importers/pull_request_notes_importer_spec.rb @@ -0,0 +1,136 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::BitbucketServerImport::Importers::PullRequestNotesImporter, feature_category: :importers do + include AfterNextHelpers + + let_it_be(:project) do + create(:project, :repository, :import_started, + import_data_attributes: { + data: { 'project_key' => 'key', 'repo_slug' => 'slug' }, + credentials: { 'token' => 'token' } + } + ) + end + + let_it_be(:pull_request_data) { Gitlab::Json.parse(fixture_file('importers/bitbucket_server/pull_request.json')) } + let_it_be(:pull_request) { BitbucketServer::Representation::PullRequest.new(pull_request_data) } + let_it_be(:note_author) { create(:user, username: 'note_author', email: 'note_author@example.org') } + + let_it_be(:pull_request_author) do + create(:user, username: 'pull_request_author', email: 'pull_request_author@example.org') + end + + let(:merge_event) do + instance_double( + BitbucketServer::Representation::Activity, + comment?: false, + merge_event?: true, + committer_email: pull_request_author.email, + merge_timestamp: now, + merge_commit: '12345678' + ) + end + + let(:pr_note) do + instance_double( + BitbucketServer::Representation::Comment, + note: 'Hello world', + author_email: note_author.email, + author_username: note_author.username, + comments: [], + created_at: now, + updated_at: now, + parent_comment: nil) + end + + let(:pr_comment) do + instance_double( + BitbucketServer::Representation::Activity, + comment?: true, + inline_comment?: false, + merge_event?: false, + comment: pr_note) + end + + let_it_be(:sample) { RepoHelpers.sample_compare } + let_it_be(:now) { Time.now.utc.change(usec: 0) } + + def expect_log(stage:, message:) + allow(Gitlab::BitbucketServerImport::Logger).to receive(:info).and_call_original + expect(Gitlab::BitbucketServerImport::Logger) + .to receive(:info).with(include(import_stage: stage, message: message)) + end + + subject(:importer) { described_class.new(project, pull_request.to_hash) } + + describe '#execute', :clean_gitlab_redis_cache do + context 'when a matching merge request is not found' do + it 'does nothing' do + expect { importer.execute }.not_to change { Note.count } + end + + it 'logs its progress' do + expect_log(stage: 'import_pull_request', message: 'starting') + expect_log(stage: 'import_pull_request', message: 'finished') + + importer.execute + end + end + + context 'when a matching merge request is found' do + let_it_be(:merge_request) { create(:merge_request, iid: pull_request.iid, source_project: project) } + + it 'logs its progress' do + allow_next(BitbucketServer::Client).to receive(:activities).and_return([]) + + expect_log(stage: 'import_pull_request', message: 'starting') + expect_log(stage: 'import_pull_request', message: 'finished') + + importer.execute + end + + context 'when PR has comments' do + before do + allow_next(BitbucketServer::Client).to receive(:activities).and_return([pr_comment]) + end + + it 'imports the comments' do + expect { subject.execute }.to change { Note.count }.by(1) + + expect(merge_request.notes.count).to eq(1) + expect(merge_request.notes.first).to have_attributes( + note: end_with(pr_note.note), + author: note_author, + created_at: pr_note.created_at, + updated_at: pr_note.created_at + ) + end + + it 'logs its progress' do + expect_log(stage: 'import_inline_comments', message: 'starting') + expect_log(stage: 'import_inline_comments', message: 'finished') + + importer.execute + end + end + + context 'when PR has a merge event' do + before do + allow_next(BitbucketServer::Client).to receive(:activities).and_return([merge_event]) + end + + it 'imports the merge event' do + importer.execute + + merge_request.reload + + expect(merge_request.metrics.merged_by).to eq(pull_request_author) + expect(merge_request.metrics.merged_at).to eq(merge_event.merge_timestamp) + expect(merge_request.merge_commit_sha).to eq(merge_event.merge_commit) + end + end + end + end +end diff --git a/spec/lib/gitlab/bitbucket_server_import/importers/pull_requests_importer_spec.rb b/spec/lib/gitlab/bitbucket_server_import/importers/pull_requests_importer_spec.rb new file mode 100644 index 00000000000000..d24511b26d8d27 --- /dev/null +++ b/spec/lib/gitlab/bitbucket_server_import/importers/pull_requests_importer_spec.rb @@ -0,0 +1,55 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::BitbucketServerImport::Importers::PullRequestsImporter, feature_category: :importers do + let_it_be(:project) do + create(:project, :import_started, + import_data_attributes: { + data: { 'project_key' => 'key', 'repo_slug' => 'slug' }, + credentials: { 'base_uri' => 'http://bitbucket.org/', 'user' => 'bitbucket', 'password' => 'password' } + } + ) + end + + subject(:importer) { described_class.new(project) } + + describe '#execute', :clean_gitlab_redis_cache do + before do + allow_next_instance_of(BitbucketServer::Client) do |client| + allow(client).to receive(:pull_requests).and_return( + [ + BitbucketServer::Representation::PullRequest.new({ 'id' => 1 }), + BitbucketServer::Representation::PullRequest.new({ 'id' => 2 }) + ] + ) + end + end + + it 'imports each pull request in parallel', :aggregate_failures do + expect(Gitlab::BitbucketServerImport::ImportPullRequestWorker).to receive(:perform_in).twice + + waiter = importer.execute + + expect(waiter).to be_an_instance_of(Gitlab::JobWaiter) + expect(waiter.jobs_remaining).to eq(2) + expect(Gitlab::Cache::Import::Caching.values_from_set(importer.already_processed_cache_key)) + .to match_array(%w[1 2]) + end + + context 'when pull request was already processed' do + before do + Gitlab::Cache::Import::Caching.set_add(importer.already_processed_cache_key, 1) + end + + it 'does not schedule job for processed pull requests', :aggregate_failures do + expect(Gitlab::BitbucketServerImport::ImportPullRequestWorker).to receive(:perform_in).once + + waiter = importer.execute + + expect(waiter).to be_an_instance_of(Gitlab::JobWaiter) + expect(waiter.jobs_remaining).to eq(2) + end + end + end +end diff --git a/spec/lib/gitlab/bitbucket_server_import/importers/repository_importer_spec.rb b/spec/lib/gitlab/bitbucket_server_import/importers/repository_importer_spec.rb new file mode 100644 index 00000000000000..323fb3ea452dc6 --- /dev/null +++ b/spec/lib/gitlab/bitbucket_server_import/importers/repository_importer_spec.rb @@ -0,0 +1,50 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::BitbucketServerImport::Importers::RepositoryImporter, feature_category: :importers do + let_it_be(:project) { create(:project, import_url: 'http://bitbucket:test@my-bitbucket') } + + subject(:importer) { described_class.new(project) } + + describe '#execute' do + context 'when repository is empty' do + it 'imports the repository' do + expect(project.repository).to receive(:import_repository).with(project.import_url) + expect(project.repository).to receive(:fetch_as_mirror).with(project.import_url, + refmap: ['+refs/pull-requests/*/to:refs/merge-requests/*/head']) + expect(project.last_repository_updated_at).to be_present + expect(Repositories::HousekeepingService).to receive_message_chain(:new, :execute) + + importer.execute + end + end + + context 'when epository is not empty' do + before do + allow(project).to receive(:empty_repo?).and_return(false) + + project.last_repository_updated_at = 1.day.ago + end + + it 'does not import the repository' do + expect(project.repository).not_to receive(:import_repository) + + expect { importer.execute }.not_to change { project.last_repository_updated_at } + end + end + + context 'when a Git CommandError is raised and the repository exists' do + before do + allow(project.repository).to receive(:import_repository).and_raise(::Gitlab::Git::CommandError) + allow(project).to receive(:repository_exists?).and_return(true) + end + + it 'expires repository caches' do + expect(project.repository).to receive(:expire_content_cache) + + expect { importer.execute }.to raise_error(::Gitlab::Git::CommandError) + end + end + end +end diff --git a/spec/lib/gitlab/bitbucket_server_import/parallel_importer_spec.rb b/spec/lib/gitlab/bitbucket_server_import/parallel_importer_spec.rb new file mode 100644 index 00000000000000..a36f24034032bb --- /dev/null +++ b/spec/lib/gitlab/bitbucket_server_import/parallel_importer_spec.rb @@ -0,0 +1,45 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::BitbucketServerImport::ParallelImporter, feature_category: :importers do + describe '.async?' do + it 'returns true' do + expect(described_class).to be_async + end + end + + describe '.track_start_import' do + it 'tracks the start of import' do + project = build_stubbed(:project) + + expect_next_instance_of(Gitlab::Import::Metrics, :bitbucket_server_importer, project) do |metric| + expect(metric).to receive(:track_start_import) + end + + described_class.track_start_import(project) + end + end + + describe '#execute', :clean_gitlab_redis_shared_state do + let_it_be(:project) { create(:project) } + let(:importer) { described_class.new(project) } + + before do + create(:import_state, :started, project: project) + end + + it 'schedules the importing of the repository' do + expect(Gitlab::BitbucketServerImport::Stage::ImportRepositoryWorker) + .to receive_message_chain(:with_status, :perform_async).with(project.id) + + expect(importer.execute).to eq(true) + end + + it 'sets the JID in Redis' do + expect(Gitlab::Import::SetAsyncJid).to receive(:set_jid).with(project.import_state).and_call_original + + importer.execute + end + end +end diff --git a/spec/lib/gitlab/bitbucket_server_import/user_finder_spec.rb b/spec/lib/gitlab/bitbucket_server_import/user_finder_spec.rb new file mode 100644 index 00000000000000..0dc8519a82a5e5 --- /dev/null +++ b/spec/lib/gitlab/bitbucket_server_import/user_finder_spec.rb @@ -0,0 +1,92 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::BitbucketServerImport::UserFinder, feature_category: :importers do + let_it_be(:user) { create(:user, username: 'user1', email: 'user1@example.org') } + + let(:created_id) { 1 } + let(:project) { instance_double(Project, creator_id: created_id) } + + subject(:user_finder) { described_class.new(project) } + + describe '#author_id' do + it 'calls uid method' do + object = { author_username: user.username } + + expect(user_finder).to receive(:uid).with(object).and_return(10) + + expect(user_finder.author_id(object)).to eq(10) + end + + context 'when corresponding user does not exist' do + it 'fallback to project creator_id' do + object = { author_email: 'unknown' } + + expect(user_finder.author_id(object)).to eq(created_id) + end + end + end + + describe '#uid' do + context 'when bitbucket_server_user_mapping_by_username is enabled' do + before do + stub_feature_flags(bitbucket_server_user_mapping_by_username: true) + end + + context 'when provided object is a Hash' do + it 'maps to a existing user with the same username' do + object = { author_username: user.username } + + expect(user_finder.uid(object)).to eq(user.id) + end + end + + context 'when provided object is a Object' do + it 'maps to a existing user with the same username' do + object = instance_double(BitbucketServer::Representation::Comment, author_username: user.username) + + expect(user_finder.uid(object)).to eq(user.id) + end + end + + context 'when corresponding user does not exist' do + it 'returns nil' do + object = { author_username: 'unknown' } + + expect(user_finder.uid(object)).to eq(nil) + end + end + end + + context 'when bitbucket_server_user_mapping_by_username is disabled' do + before do + stub_feature_flags(bitbucket_server_user_mapping_by_username: false) + end + + context 'when provided object is a Hash' do + it 'maps to a existing user with the same email' do + object = { author_email: user.email } + + expect(user_finder.uid(object)).to eq(user.id) + end + end + + context 'when provided object is a Object' do + it 'maps to a existing user with the same email' do + object = instance_double(BitbucketServer::Representation::Comment, author_email: user.email) + + expect(user_finder.uid(object)).to eq(user.id) + end + end + + context 'when corresponding user does not exist' do + it 'returns nil' do + object = { author_email: 'unknown' } + + expect(user_finder.uid(object)).to eq(nil) + end + end + end + end +end diff --git a/spec/workers/gitlab/bitbucket_server_import/import_pull_request_worker_spec.rb b/spec/workers/gitlab/bitbucket_server_import/import_pull_request_worker_spec.rb new file mode 100644 index 00000000000000..42f23353e11f4a --- /dev/null +++ b/spec/workers/gitlab/bitbucket_server_import/import_pull_request_worker_spec.rb @@ -0,0 +1,82 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::BitbucketServerImport::ImportPullRequestWorker, feature_category: :importers do + let_it_be(:project) { create(:project, :import_started) } + + let(:worker) { described_class.new } + + let(:job_waiter_key) { 'ABC' } + + let(:importer_class) { Gitlab::BitbucketServerImport::Importers::PullRequestImporter } + + before do + allow(worker).to receive(:jid).and_return('jid') + end + + describe '#perform' do + context 'when the import succeeds' do + before do + allow_next_instance_of(importer_class) do |importer| + allow(importer).to receive(:execute) + end + end + + it 'notifies job waiter' do + expect(Gitlab::JobWaiter).to receive(:notify).with(job_waiter_key, 'jid') + + worker.perform(project.id, {}, job_waiter_key) + end + + it 'logs stage start and finish' do + expect(Gitlab::BitbucketServerImport::Logger) + .to receive(:info).with(hash_including(message: 'importer started', project_id: project.id)) + expect(Gitlab::BitbucketServerImport::Logger) + .to receive(:info).with(hash_including(message: 'importer finished', project_id: project.id)) + + worker.perform(project.id, {}, job_waiter_key) + end + end + + context 'when project does not exists' do + it 'does not call importer and notifies job waiter' do + expect(importer_class).not_to receive(:new) + expect(Gitlab::JobWaiter).to receive(:notify).with(job_waiter_key, 'jid') + + worker.perform(-1, {}, job_waiter_key) + end + end + + context 'when project import state is not `started`' do + it 'does not call importer' do + project = create(:project, :import_canceled) + + expect(importer_class).not_to receive(:new) + expect(Gitlab::JobWaiter).to receive(:notify).with(job_waiter_key, 'jid') + + worker.perform(project.id, {}, job_waiter_key) + end + end + + context 'when the importer fails' do + it 'raises an error' do + exception = StandardError.new('Error') + + allow_next_instance_of(importer_class) do |importer| + allow(importer).to receive(:execute).and_raise(exception) + end + + expect(Gitlab::Import::ImportFailureService) + .to receive(:track).with( + project_id: project.id, + exception: exception, + error_source: importer_class.name, + fail_import: false + ).and_call_original + + expect { worker.perform(project.id, {}, job_waiter_key) }.to raise_error(exception) + end + end + end +end diff --git a/spec/workers/gitlab/bitbucket_server_import/stage/finish_import_worker_spec.rb b/spec/workers/gitlab/bitbucket_server_import/stage/finish_import_worker_spec.rb new file mode 100644 index 00000000000000..f311a30c7c5fb1 --- /dev/null +++ b/spec/workers/gitlab/bitbucket_server_import/stage/finish_import_worker_spec.rb @@ -0,0 +1,21 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::BitbucketServerImport::Stage::FinishImportWorker, feature_category: :importers do + let_it_be(:project) { create(:project, :import_started) } + + subject(:worker) { described_class.new } + + describe '#perform' do + it 'finalises the import process' do + expect_next_instance_of(Gitlab::Import::Metrics, :bitbucket_server_importer, project) do |metric| + expect(metric).to receive(:track_finished_import) + end + + worker.perform(project.id) + + expect(project.import_state.reload).to be_finished + end + end +end diff --git a/spec/workers/gitlab/bitbucket_server_import/stage/import_lfs_objects_worker_spec.rb b/spec/workers/gitlab/bitbucket_server_import/stage/import_lfs_objects_worker_spec.rb new file mode 100644 index 00000000000000..4978f001ab2fbf --- /dev/null +++ b/spec/workers/gitlab/bitbucket_server_import/stage/import_lfs_objects_worker_spec.rb @@ -0,0 +1,26 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::BitbucketServerImport::Stage::ImportLfsObjectsWorker, feature_category: :importers do + let_it_be(:project) { create(:project, :import_started) } + + subject(:worker) { described_class.new } + + describe '#perform' do + context 'when the import succeeds' do + before do + allow_next_instance_of(Gitlab::BitbucketServerImport::Importers::LfsObjectsImporter) do |importer| + allow(importer).to receive(:execute).and_return(Gitlab::JobWaiter.new(2, '123')) + end + end + + it 'schedules the next stage' do + expect(Gitlab::BitbucketServerImport::AdvanceStageWorker).to receive(:perform_async) + .with(project.id, { '123' => 2 }, :finish) + + worker.perform(project.id) + end + end + end +end diff --git a/spec/workers/gitlab/bitbucket_server_import/stage/import_notes_worker_spec.rb b/spec/workers/gitlab/bitbucket_server_import/stage/import_notes_worker_spec.rb new file mode 100644 index 00000000000000..26cc53efd2ff08 --- /dev/null +++ b/spec/workers/gitlab/bitbucket_server_import/stage/import_notes_worker_spec.rb @@ -0,0 +1,26 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::BitbucketServerImport::Stage::ImportNotesWorker, feature_category: :importers do + let_it_be(:project) { create(:project, :import_started) } + + subject(:worker) { described_class.new } + + describe '#perform' do + context 'when the import succeeds' do + before do + allow_next_instance_of(Gitlab::BitbucketServerImport::Importers::NotesImporter) do |importer| + allow(importer).to receive(:execute).and_return(Gitlab::JobWaiter.new(2, '123')) + end + end + + it 'schedules the next stage' do + expect(Gitlab::BitbucketServerImport::AdvanceStageWorker).to receive(:perform_async) + .with(project.id, { '123' => 2 }, :lfs_objects) + + worker.perform(project.id) + end + end + end +end diff --git a/spec/workers/gitlab/bitbucket_server_import/stage/import_pull_requests_worker_spec.rb b/spec/workers/gitlab/bitbucket_server_import/stage/import_pull_requests_worker_spec.rb new file mode 100644 index 00000000000000..71179e7fd398ec --- /dev/null +++ b/spec/workers/gitlab/bitbucket_server_import/stage/import_pull_requests_worker_spec.rb @@ -0,0 +1,75 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::BitbucketServerImport::Stage::ImportPullRequestsWorker, feature_category: :importers do + let_it_be(:project) { create(:project, :import_started) } + + subject(:worker) { described_class.new } + + describe '#perform' do + context 'when the import succeeds' do + before do + allow_next_instance_of(Gitlab::BitbucketServerImport::Importers::PullRequestsImporter) do |importer| + allow(importer).to receive(:execute).and_return(Gitlab::JobWaiter.new(2, '123')) + end + end + + it 'schedules the next stage' do + expect(Gitlab::BitbucketServerImport::AdvanceStageWorker).to receive(:perform_async) + .with(project.id, { '123' => 2 }, :notes) + + worker.perform(project.id) + end + + it 'logs stage start and finish' do + expect(Gitlab::BitbucketServerImport::Logger) + .to receive(:info).with(hash_including(message: 'starting stage', project_id: project.id)) + expect(Gitlab::BitbucketServerImport::Logger) + .to receive(:info).with(hash_including(message: 'stage finished', project_id: project.id)) + + worker.perform(project.id) + end + end + + context 'when project does not exists' do + it 'does not call the importer' do + expect(Gitlab::BitbucketServerImport::Importers::PullRequestsImporter).not_to receive(:new) + + worker.perform(-1) + end + end + + context 'when project import state is not `started`' do + it 'does not call the importer' do + project = create(:project, :import_canceled) + + expect(Gitlab::BitbucketServerImport::Importers::PullRequestsImporter).not_to receive(:new) + + worker.perform(project.id) + end + end + + context 'when the importer fails' do + it 'does not schedule the next stage and raises error' do + exception = StandardError.new('Error') + + allow_next_instance_of(Gitlab::BitbucketServerImport::Importers::PullRequestsImporter) do |importer| + allow(importer).to receive(:execute).and_raise(exception) + end + + expect(Gitlab::Import::ImportFailureService) + .to receive(:track).with( + project_id: project.id, + exception: exception, + error_source: described_class.name, + fail_import: false + ).and_call_original + + expect { worker.perform(project.id) } + .to change { Gitlab::BitbucketServerImport::AdvanceStageWorker.jobs.size }.by(0) + .and raise_error(exception) + end + end + end +end diff --git a/spec/workers/gitlab/bitbucket_server_import/stage/import_repository_worker_spec.rb b/spec/workers/gitlab/bitbucket_server_import/stage/import_repository_worker_spec.rb new file mode 100644 index 00000000000000..3bc577075c8be4 --- /dev/null +++ b/spec/workers/gitlab/bitbucket_server_import/stage/import_repository_worker_spec.rb @@ -0,0 +1,75 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::BitbucketServerImport::Stage::ImportRepositoryWorker, feature_category: :importers do + let_it_be(:project) { create(:project, :import_started) } + + let(:worker) { described_class.new } + + describe '#perform' do + context 'when the import succeeds' do + before do + allow_next_instance_of(Gitlab::BitbucketServerImport::Importers::RepositoryImporter) do |importer| + allow(importer).to receive(:execute) + end + end + + it 'schedules the next stage' do + expect(Gitlab::BitbucketServerImport::Stage::ImportPullRequestsWorker).to receive(:perform_async) + .with(project.id) + + worker.perform(project.id) + end + + it 'logs stage start and finish' do + expect(Gitlab::BitbucketServerImport::Logger) + .to receive(:info).with(hash_including(message: 'starting stage', project_id: project.id)) + expect(Gitlab::BitbucketServerImport::Logger) + .to receive(:info).with(hash_including(message: 'stage finished', project_id: project.id)) + + worker.perform(project.id) + end + end + + context 'when project does not exists' do + it 'does not call importer' do + expect(Gitlab::BitbucketServerImport::Importers::RepositoryImporter).not_to receive(:new) + + worker.perform(-1) + end + end + + context 'when project import state is not `started`' do + it 'does not call importer' do + project = create(:project, :import_canceled) + + expect(Gitlab::BitbucketServerImport::Importers::RepositoryImporter).not_to receive(:new) + + worker.perform(project.id) + end + end + + context 'when the importer fails' do + it 'does not schedule the next stage and raises error' do + exception = StandardError.new('Error') + + allow_next_instance_of(Gitlab::BitbucketServerImport::Importers::RepositoryImporter) do |importer| + allow(importer).to receive(:execute).and_raise(exception) + end + + expect(Gitlab::Import::ImportFailureService) + .to receive(:track).with( + project_id: project.id, + exception: exception, + error_source: described_class.name, + fail_import: true + ).and_call_original + + expect { worker.perform(project.id) } + .to change { Gitlab::BitbucketServerImport::Stage::ImportPullRequestsWorker.jobs.size }.by(0) + .and raise_error(exception) + end + end + end +end -- GitLab From fb530971b491e643a738f675ee37d0e8cf25e53a Mon Sep 17 00:00:00 2001 From: Luke Duncalfe Date: Fri, 19 May 2023 13:32:19 +1200 Subject: [PATCH 10/18] Add more specs for PullRequestNotesImporter --- .../pull_request_notes_importer_spec.rb | 82 ++++++++++++++++++- 1 file changed, 81 insertions(+), 1 deletion(-) diff --git a/spec/lib/gitlab/bitbucket_server_import/importers/pull_request_notes_importer_spec.rb b/spec/lib/gitlab/bitbucket_server_import/importers/pull_request_notes_importer_spec.rb index 6cfeb8e7c56147..68254b919bc69e 100644 --- a/spec/lib/gitlab/bitbucket_server_import/importers/pull_request_notes_importer_spec.rb +++ b/spec/lib/gitlab/bitbucket_server_import/importers/pull_request_notes_importer_spec.rb @@ -96,7 +96,7 @@ def expect_log(stage:, message:) allow_next(BitbucketServer::Client).to receive(:activities).and_return([pr_comment]) end - it 'imports the comments' do + it 'imports the stand alone comments' do expect { subject.execute }.to change { Note.count }.by(1) expect(merge_request.notes.count).to eq(1) @@ -108,6 +108,86 @@ def expect_log(stage:, message:) ) end + it 'logs its progress' do + expect_log(stage: 'import_standalone_pr_comments', message: 'starting') + expect_log(stage: 'import_standalone_pr_comments', message: 'finished') + + importer.execute + end + end + + context 'when PR has threaded discussion' do + let_it_be(:reply_author) { create(:user, username: 'reply_author', email: 'reply_author@example.org') } + let_it_be(:inline_note_author) do + create(:user, username: 'inline_note_author', email: 'inline_note_author@example.org') + end + + let(:reply) do + instance_double( + BitbucketServer::Representation::PullRequestComment, + author_email: reply_author.email, + author_username: reply_author.username, + note: 'I agree', + created_at: now, + updated_at: now, + parent_comment: nil) + end + + let(:pr_inline_note) do + instance_double( + BitbucketServer::Representation::PullRequestComment, + file_type: 'ADDED', + from_sha: pull_request.target_branch_sha, + to_sha: pull_request.source_branch_sha, + file_path: '.gitmodules', + old_pos: nil, + new_pos: 4, + note: 'Hello world', + author_email: inline_note_author.email, + author_username: inline_note_author.username, + comments: [reply], + created_at: now, + updated_at: now, + parent_comment: nil) + end + + let(:pr_inline_comment) do + instance_double( + BitbucketServer::Representation::Activity, + comment?: true, + inline_comment?: true, + merge_event?: false, + comment: pr_inline_note) + end + + before do + allow_next(BitbucketServer::Client).to receive(:activities).and_return([pr_inline_comment]) + end + + it 'imports the threaded discussion' do + expect { subject.execute }.to change { Note.count }.by(2) + + expect(merge_request.discussions.count).to eq(1) + + notes = merge_request.notes.order(:id).to_a + start_note = notes.first + expect(start_note.type).to eq('DiffNote') + expect(start_note.note).to end_with(pr_inline_note.note) + expect(start_note.created_at).to eq(pr_inline_note.created_at) + expect(start_note.updated_at).to eq(pr_inline_note.updated_at) + expect(start_note.position.old_line).to be_nil + expect(start_note.position.new_line).to eq(pr_inline_note.new_pos) + expect(start_note.author).to eq(inline_note_author) + + reply_note = notes.last + expect(reply_note.note).to eq(reply.note) + expect(reply_note.author).to eq(reply_author) + expect(reply_note.created_at).to eq(reply.created_at) + expect(reply_note.updated_at).to eq(reply.created_at) + expect(reply_note.position.old_line).to be_nil + expect(reply_note.position.new_line).to eq(pr_inline_note.new_pos) + end + it 'logs its progress' do expect_log(stage: 'import_inline_comments', message: 'starting') expect_log(stage: 'import_inline_comments', message: 'finished') -- GitLab From 40bffe29dd35d500fb0205b86dab761cbdb1c758 Mon Sep 17 00:00:00 2001 From: Luke Duncalfe Date: Fri, 19 May 2023 13:34:16 +1200 Subject: [PATCH 11/18] Use ensure for notify_waiter --- .../gitlab/bitbucket_server_import/object_importer.rb | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/app/workers/concerns/gitlab/bitbucket_server_import/object_importer.rb b/app/workers/concerns/gitlab/bitbucket_server_import/object_importer.rb index 9b4e475ab2f73e..b209719479b581 100644 --- a/app/workers/concerns/gitlab/bitbucket_server_import/object_importer.rb +++ b/app/workers/concerns/gitlab/bitbucket_server_import/object_importer.rb @@ -31,19 +31,15 @@ module ObjectImporter def perform(project_id, hash, notify_key) project = Project.find_by_id(project_id) - unless project - notify_waiter(notify_key) - return - end + return unless project if project.import_state&.canceled? info(project.id, message: 'project import canceled') - notify_waiter(notify_key) return end import(project, hash) - + ensure notify_waiter(notify_key) end -- GitLab From 7493cad3aa68c1ce8231f7bbabd0749a6d345fa9 Mon Sep 17 00:00:00 2001 From: Luke Duncalfe Date: Fri, 19 May 2023 13:33:03 +1200 Subject: [PATCH 12/18] Add specs for StageMethods behaviours --- .../stage_methods_shared_examples.rb | 18 ++++++++++++++++++ .../stage/finish_import_worker_spec.rb | 2 ++ .../stage/import_lfs_objects_worker_spec.rb | 2 ++ .../stage/import_notes_worker_spec.rb | 2 ++ .../stage/import_pull_requests_worker_spec.rb | 2 ++ .../stage/import_repository_worker_spec.rb | 2 ++ 6 files changed, 28 insertions(+) create mode 100644 ee/spec/support/shared_examples/lib/gitlab/bitbucket_server_import/stage_methods_shared_examples.rb diff --git a/ee/spec/support/shared_examples/lib/gitlab/bitbucket_server_import/stage_methods_shared_examples.rb b/ee/spec/support/shared_examples/lib/gitlab/bitbucket_server_import/stage_methods_shared_examples.rb new file mode 100644 index 00000000000000..1246dd2979b9cf --- /dev/null +++ b/ee/spec/support/shared_examples/lib/gitlab/bitbucket_server_import/stage_methods_shared_examples.rb @@ -0,0 +1,18 @@ +# frozen_string_literal: true + +RSpec.shared_examples Gitlab::BitbucketServerImport::StageMethods do + describe '.sidekiq_retries_exhausted' do + let(:job) { { 'args' => [project.id] } } + + it 'tracks the import failure' do + expect(Gitlab::Import::ImportFailureService) + .to receive(:track).with( + project_id: project.id, + exception: StandardError.new, + fail_import: true + ) + + described_class.sidekiq_retries_exhausted_block.call(job, StandardError.new) + end + end +end diff --git a/spec/workers/gitlab/bitbucket_server_import/stage/finish_import_worker_spec.rb b/spec/workers/gitlab/bitbucket_server_import/stage/finish_import_worker_spec.rb index f311a30c7c5fb1..cb61ee13dd1d23 100644 --- a/spec/workers/gitlab/bitbucket_server_import/stage/finish_import_worker_spec.rb +++ b/spec/workers/gitlab/bitbucket_server_import/stage/finish_import_worker_spec.rb @@ -7,6 +7,8 @@ subject(:worker) { described_class.new } + it_behaves_like Gitlab::BitbucketServerImport::StageMethods + describe '#perform' do it 'finalises the import process' do expect_next_instance_of(Gitlab::Import::Metrics, :bitbucket_server_importer, project) do |metric| diff --git a/spec/workers/gitlab/bitbucket_server_import/stage/import_lfs_objects_worker_spec.rb b/spec/workers/gitlab/bitbucket_server_import/stage/import_lfs_objects_worker_spec.rb index 4978f001ab2fbf..449e22e67e2507 100644 --- a/spec/workers/gitlab/bitbucket_server_import/stage/import_lfs_objects_worker_spec.rb +++ b/spec/workers/gitlab/bitbucket_server_import/stage/import_lfs_objects_worker_spec.rb @@ -7,6 +7,8 @@ subject(:worker) { described_class.new } + it_behaves_like Gitlab::BitbucketServerImport::StageMethods + describe '#perform' do context 'when the import succeeds' do before do diff --git a/spec/workers/gitlab/bitbucket_server_import/stage/import_notes_worker_spec.rb b/spec/workers/gitlab/bitbucket_server_import/stage/import_notes_worker_spec.rb index 26cc53efd2ff08..f7512c74d49e88 100644 --- a/spec/workers/gitlab/bitbucket_server_import/stage/import_notes_worker_spec.rb +++ b/spec/workers/gitlab/bitbucket_server_import/stage/import_notes_worker_spec.rb @@ -7,6 +7,8 @@ subject(:worker) { described_class.new } + it_behaves_like Gitlab::BitbucketServerImport::StageMethods + describe '#perform' do context 'when the import succeeds' do before do diff --git a/spec/workers/gitlab/bitbucket_server_import/stage/import_pull_requests_worker_spec.rb b/spec/workers/gitlab/bitbucket_server_import/stage/import_pull_requests_worker_spec.rb index 71179e7fd398ec..77400cabefa9a3 100644 --- a/spec/workers/gitlab/bitbucket_server_import/stage/import_pull_requests_worker_spec.rb +++ b/spec/workers/gitlab/bitbucket_server_import/stage/import_pull_requests_worker_spec.rb @@ -7,6 +7,8 @@ subject(:worker) { described_class.new } + it_behaves_like Gitlab::BitbucketServerImport::StageMethods + describe '#perform' do context 'when the import succeeds' do before do diff --git a/spec/workers/gitlab/bitbucket_server_import/stage/import_repository_worker_spec.rb b/spec/workers/gitlab/bitbucket_server_import/stage/import_repository_worker_spec.rb index 3bc577075c8be4..7ea23041e791b3 100644 --- a/spec/workers/gitlab/bitbucket_server_import/stage/import_repository_worker_spec.rb +++ b/spec/workers/gitlab/bitbucket_server_import/stage/import_repository_worker_spec.rb @@ -7,6 +7,8 @@ let(:worker) { described_class.new } + it_behaves_like Gitlab::BitbucketServerImport::StageMethods + describe '#perform' do context 'when the import succeeds' do before do -- GitLab From 9c104196f96dc9f221800fe49d93a0e3cdb75318 Mon Sep 17 00:00:00 2001 From: Luke Duncalfe Date: Fri, 19 May 2023 14:14:07 +1200 Subject: [PATCH 13/18] Add specs for ObjectImporter behaviours --- .../importers/repository_importer_spec.rb | 2 +- .../user_finder_spec.rb | 43 +++++------- .../object_import_shared_examples.rb | 69 +++++++++++++++++++ .../stage_methods_shared_examples.rb | 0 .../import_lfs_object_worker_spec.rb | 14 ++++ .../import_pull_request_notes_worker_spec.rb | 9 +++ .../import_pull_request_worker_spec.rb | 2 + 7 files changed, 113 insertions(+), 26 deletions(-) create mode 100644 spec/support/shared_examples/lib/gitlab/bitbucket_server_import/object_import_shared_examples.rb rename {ee/spec => spec}/support/shared_examples/lib/gitlab/bitbucket_server_import/stage_methods_shared_examples.rb (100%) create mode 100644 spec/workers/gitlab/bitbucket_server_import/import_lfs_object_worker_spec.rb create mode 100644 spec/workers/gitlab/bitbucket_server_import/import_pull_request_notes_worker_spec.rb diff --git a/spec/lib/gitlab/bitbucket_server_import/importers/repository_importer_spec.rb b/spec/lib/gitlab/bitbucket_server_import/importers/repository_importer_spec.rb index 323fb3ea452dc6..f0673dba45b71d 100644 --- a/spec/lib/gitlab/bitbucket_server_import/importers/repository_importer_spec.rb +++ b/spec/lib/gitlab/bitbucket_server_import/importers/repository_importer_spec.rb @@ -20,7 +20,7 @@ end end - context 'when epository is not empty' do + context 'when repository is not empty' do before do allow(project).to receive(:empty_repo?).and_return(false) diff --git a/spec/lib/gitlab/bitbucket_server_import/user_finder_spec.rb b/spec/lib/gitlab/bitbucket_server_import/user_finder_spec.rb index 0dc8519a82a5e5..bcc03208cea997 100644 --- a/spec/lib/gitlab/bitbucket_server_import/user_finder_spec.rb +++ b/spec/lib/gitlab/bitbucket_server_import/user_finder_spec.rb @@ -15,12 +15,11 @@ object = { author_username: user.username } expect(user_finder).to receive(:uid).with(object).and_return(10) - expect(user_finder.author_id(object)).to eq(10) end context 'when corresponding user does not exist' do - it 'fallback to project creator_id' do + it 'fallsback to project creator_id' do object = { author_email: 'unknown' } expect(user_finder.author_id(object)).to eq(created_id) @@ -29,33 +28,27 @@ end describe '#uid' do - context 'when bitbucket_server_user_mapping_by_username is enabled' do - before do - stub_feature_flags(bitbucket_server_user_mapping_by_username: true) - end - - context 'when provided object is a Hash' do - it 'maps to a existing user with the same username' do - object = { author_username: user.username } + context 'when provided object is a Hash' do + it 'maps to an existing user with the same username' do + object = { author_username: user.username } - expect(user_finder.uid(object)).to eq(user.id) - end + expect(user_finder.uid(object)).to eq(user.id) end + end - context 'when provided object is a Object' do - it 'maps to a existing user with the same username' do - object = instance_double(BitbucketServer::Representation::Comment, author_username: user.username) + context 'when provided object is a representation Object' do + it 'maps to a existing user with the same username' do + object = instance_double(BitbucketServer::Representation::Comment, author_username: user.username) - expect(user_finder.uid(object)).to eq(user.id) - end + expect(user_finder.uid(object)).to eq(user.id) end + end - context 'when corresponding user does not exist' do - it 'returns nil' do - object = { author_username: 'unknown' } + context 'when corresponding user does not exist' do + it 'returns nil' do + object = { author_username: 'unknown' } - expect(user_finder.uid(object)).to eq(nil) - end + expect(user_finder.uid(object)).to eq(nil) end end @@ -65,15 +58,15 @@ end context 'when provided object is a Hash' do - it 'maps to a existing user with the same email' do + it 'maps to an existing user with the same email' do object = { author_email: user.email } expect(user_finder.uid(object)).to eq(user.id) end end - context 'when provided object is a Object' do - it 'maps to a existing user with the same email' do + context 'when provided object is a representation Object' do + it 'maps to an existing user with the same email' do object = instance_double(BitbucketServer::Representation::Comment, author_email: user.email) expect(user_finder.uid(object)).to eq(user.id) diff --git a/spec/support/shared_examples/lib/gitlab/bitbucket_server_import/object_import_shared_examples.rb b/spec/support/shared_examples/lib/gitlab/bitbucket_server_import/object_import_shared_examples.rb new file mode 100644 index 00000000000000..ec2ae0b8a735d0 --- /dev/null +++ b/spec/support/shared_examples/lib/gitlab/bitbucket_server_import/object_import_shared_examples.rb @@ -0,0 +1,69 @@ +# frozen_string_literal: true + +RSpec.shared_examples Gitlab::BitbucketServerImport::ObjectImporter do + include AfterNextHelpers + + describe '.sidekiq_retries_exhausted' do + let(:job) { { 'args' => [1, {}, 'key'], 'jid' => 'jid' } } + + it 'notifies the waiter' do + expect(Gitlab::JobWaiter).to receive(:notify).with('key', 'jid') + + described_class.sidekiq_retries_exhausted_block.call(job, StandardError.new) + end + end + + describe '#perform' do + let_it_be(:import_started_project) { create(:project, :import_started) } + + let(:project_id) { project_id } + let(:waiter_key) { 'key' } + + shared_examples 'notifies the waiter' do + specify do + allow_next(worker.importer_class).to receive(:execute) + + expect(Gitlab::JobWaiter).to receive(:notify).with(waiter_key, anything) + + worker.perform(project_id, {}, waiter_key) + end + end + + context 'when project does not exist' do + let(:project_id) { non_existing_record_id } + + it_behaves_like 'notifies the waiter' + end + + context 'when project has import started' do + let_it_be(:project) do + create(:project, :import_started, import_data_attributes: { + data: { 'project_key' => 'key', 'repo_slug' => 'slug' }, + credentials: { 'token' => 'token' } + }) + end + + let(:project_id) { project.id } + + it 'calls the importer' do + expect_next(worker.importer_class, project, kind_of(Hash)).to receive(:execute) + + worker.perform(project_id, {}, waiter_key) + end + + it_behaves_like 'notifies the waiter' + end + + context 'when project import has been cancelled' do + let_it_be(:project_id) { create(:project, :import_canceled).id } + + it 'does not call the importer' do + expect_next(worker.importer_class).not_to receive(:execute) + + worker.perform(project_id, {}, waiter_key) + end + + it_behaves_like 'notifies the waiter' + end + end +end diff --git a/ee/spec/support/shared_examples/lib/gitlab/bitbucket_server_import/stage_methods_shared_examples.rb b/spec/support/shared_examples/lib/gitlab/bitbucket_server_import/stage_methods_shared_examples.rb similarity index 100% rename from ee/spec/support/shared_examples/lib/gitlab/bitbucket_server_import/stage_methods_shared_examples.rb rename to spec/support/shared_examples/lib/gitlab/bitbucket_server_import/stage_methods_shared_examples.rb diff --git a/spec/workers/gitlab/bitbucket_server_import/import_lfs_object_worker_spec.rb b/spec/workers/gitlab/bitbucket_server_import/import_lfs_object_worker_spec.rb new file mode 100644 index 00000000000000..a74c148cbc930b --- /dev/null +++ b/spec/workers/gitlab/bitbucket_server_import/import_lfs_object_worker_spec.rb @@ -0,0 +1,14 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::BitbucketServerImport::ImportLfsObjectWorker, feature_category: :importers do + subject(:worker) { described_class.new } + + it_behaves_like Gitlab::BitbucketServerImport::ObjectImporter do + before do + # Stub the LfsDownloadObject for these tests so it can be passed an empty Hash + allow(LfsDownloadObject).to receive(:new) + end + end +end diff --git a/spec/workers/gitlab/bitbucket_server_import/import_pull_request_notes_worker_spec.rb b/spec/workers/gitlab/bitbucket_server_import/import_pull_request_notes_worker_spec.rb new file mode 100644 index 00000000000000..bc400bc59e8200 --- /dev/null +++ b/spec/workers/gitlab/bitbucket_server_import/import_pull_request_notes_worker_spec.rb @@ -0,0 +1,9 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::BitbucketServerImport::ImportPullRequestNotesWorker, feature_category: :importers do + subject(:worker) { described_class.new } + + it_behaves_like Gitlab::BitbucketServerImport::ObjectImporter +end diff --git a/spec/workers/gitlab/bitbucket_server_import/import_pull_request_worker_spec.rb b/spec/workers/gitlab/bitbucket_server_import/import_pull_request_worker_spec.rb index 42f23353e11f4a..dd3235f846c31a 100644 --- a/spec/workers/gitlab/bitbucket_server_import/import_pull_request_worker_spec.rb +++ b/spec/workers/gitlab/bitbucket_server_import/import_pull_request_worker_spec.rb @@ -15,6 +15,8 @@ allow(worker).to receive(:jid).and_return('jid') end + it_behaves_like Gitlab::BitbucketServerImport::ObjectImporter + describe '#perform' do context 'when the import succeeds' do before do -- GitLab From c16ab15418c0ca51fd3be349ff450349436743ab Mon Sep 17 00:00:00 2001 From: Luke Duncalfe Date: Fri, 19 May 2023 15:03:08 +1200 Subject: [PATCH 14/18] Add caching for UserFinder --- .../bitbucket_server_import/user_finder.rb | 29 ++++++++++--- .../user_finder_spec.rb | 42 +++++++++++++++++-- 2 files changed, 63 insertions(+), 8 deletions(-) diff --git a/lib/gitlab/bitbucket_server_import/user_finder.rb b/lib/gitlab/bitbucket_server_import/user_finder.rb index 2a51a309c8da99..1bd846d28d4bbf 100644 --- a/lib/gitlab/bitbucket_server_import/user_finder.rb +++ b/lib/gitlab/bitbucket_server_import/user_finder.rb @@ -7,6 +7,9 @@ module BitbucketServerImport class UserFinder attr_reader :project + CACHE_KEY = 'bitbucket_server-importer/user-finder/%{project_id}/%{by}/%{value}' + CACHE_USER_ID_NOT_FOUND = -1 + # project - An instance of `Project` def initialize(project) @project = project @@ -18,9 +21,8 @@ def author_id(object) # TODO: object should behave as a object so we can remove object.is_a?(Hash) check def uid(object) - # We want this explicit to only be username on the FF - # Otherwise, match email. - # There should be no default fall-through on username. Fall-through to import user + # We want this to only match either username or email depending on the flag state. + # There should be no fall-through. if Feature.enabled?(:bitbucket_server_user_mapping_by_username) find_user_id(by: :username, value: object.is_a?(Hash) ? object[:author_username] : object.author_username) else @@ -28,17 +30,34 @@ def uid(object) end end - # TODO: Cache values def find_user_id(by:, value:) return unless value + cache_key = build_cache_key(by, value) + cached_id = cache.read_integer(cache_key) + + return if cached_id == CACHE_USER_ID_NOT_FOUND + return cached_id if cached_id + user = if by == :email User.find_by_any_email(value, confirmed: true) else User.find_by_username(value) end - user&.id + user&.id.tap do |id| + cache.write(cache_key, id || CACHE_USER_ID_NOT_FOUND) + end + end + + private + + def cache + Cache::Import::Caching + end + + def build_cache_key(by, value) + format(CACHE_KEY, project_id: project.id, by: by, value: value) end end end diff --git a/spec/lib/gitlab/bitbucket_server_import/user_finder_spec.rb b/spec/lib/gitlab/bitbucket_server_import/user_finder_spec.rb index bcc03208cea997..70923df3064182 100644 --- a/spec/lib/gitlab/bitbucket_server_import/user_finder_spec.rb +++ b/spec/lib/gitlab/bitbucket_server_import/user_finder_spec.rb @@ -2,11 +2,11 @@ require 'spec_helper' -RSpec.describe Gitlab::BitbucketServerImport::UserFinder, feature_category: :importers do - let_it_be(:user) { create(:user, username: 'user1', email: 'user1@example.org') } +RSpec.describe Gitlab::BitbucketServerImport::UserFinder, :clean_gitlab_redis_cache, feature_category: :importers do + let_it_be(:user) { create(:user) } let(:created_id) { 1 } - let(:project) { instance_double(Project, creator_id: created_id) } + let(:project) { instance_double(Project, creator_id: created_id, id: 1) } subject(:user_finder) { described_class.new(project) } @@ -82,4 +82,40 @@ end end end + + describe '#find_user_id' do + context 'when user cannot be found' do + it 'caches and returns nil' do + expect(User).to receive(:find_by_any_email).once.and_call_original + + 2.times do + user_id = user_finder.find_user_id(by: :email, value: 'nobody@example.com') + + expect(user_id).to be_nil + end + end + end + + context 'when user can be found' do + it 'caches and returns the user ID by email' do + expect(User).to receive(:find_by_any_email).once.and_call_original + + 2.times do + user_id = user_finder.find_user_id(by: :email, value: user.email) + + expect(user_id).to eq(user.id) + end + end + + it 'caches and returns the user ID by username' do + expect(User).to receive(:find_by_username).once.and_call_original + + 2.times do + user_id = user_finder.find_user_id(by: :username, value: user.username) + + expect(user_id).to eq(user.id) + end + end + end + end end -- GitLab From 90b87640b5018720b9579ad46cc75f7ed35b26b0 Mon Sep 17 00:00:00 2001 From: Luke Duncalfe Date: Fri, 19 May 2023 15:03:35 +1200 Subject: [PATCH 15/18] Small clean up --- .../importers/pull_request_notes_importer.rb | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/lib/gitlab/bitbucket_server_import/importers/pull_request_notes_importer.rb b/lib/gitlab/bitbucket_server_import/importers/pull_request_notes_importer.rb index 4fc01b4165fbfd..f5935b759c9032 100644 --- a/lib/gitlab/bitbucket_server_import/importers/pull_request_notes_importer.rb +++ b/lib/gitlab/bitbucket_server_import/importers/pull_request_notes_importer.rb @@ -103,7 +103,6 @@ def create_diff_note(merge_request, comment, position, discussion_id = nil) import_stage: 'create_diff_note', comment_id: comment.id, error: e.message ) - # errors << { type: :pull_request, id: comment.id, errors: e.message } nil end @@ -148,10 +147,8 @@ def import_standalone_pr_comments(pr_comments, merge_request) comment_id: comment.id, error: e.message ) - - # errors << { type: :pull_request, comment_id: comment.id, errors: e.message } ensure - log_info(import_stage: 'import_standalone_pr_comment', message: 'finished', iid: merge_request.iid) + log_info(import_stage: 'import_standalone_pr_comments', message: 'finished', iid: merge_request.iid) end end -- GitLab From cd4b1e4c10a6844c9a37825253c8db3114beffc4 Mon Sep 17 00:00:00 2001 From: Luke Duncalfe Date: Fri, 19 May 2023 17:12:55 +1200 Subject: [PATCH 16/18] Re-enable tests in LfsObjectsImporter spec These tests are failing in our CI pipeline. This change is based on the theory that for some reason LFS is disabled for the project in our CI test environment, but not locally. --- .../importers/lfs_objects_importer_spec.rb | 84 ++++++++++--------- 1 file changed, 45 insertions(+), 39 deletions(-) diff --git a/spec/lib/gitlab/bitbucket_server_import/importers/lfs_objects_importer_spec.rb b/spec/lib/gitlab/bitbucket_server_import/importers/lfs_objects_importer_spec.rb index 3d68536320bd02..0d66ad7c2ec301 100644 --- a/spec/lib/gitlab/bitbucket_server_import/importers/lfs_objects_importer_spec.rb +++ b/spec/lib/gitlab/bitbucket_server_import/importers/lfs_objects_importer_spec.rb @@ -32,64 +32,70 @@ } end - xdescribe '#execute', :clean_gitlab_redis_cache do - it 'imports each lfs object in parallel' do - importer = described_class.new(project) - - expect_next_instance_of(Projects::LfsPointers::LfsObjectDownloadListService) do |service| - expect(service).to receive(:each_list_item).and_yield(lfs_download_object) + describe '#execute', :clean_gitlab_redis_cache do + context 'when lfs is enabled' do + before do + allow(project).to receive(:lfs_enabled?).and_return(true) end - expect(Gitlab::BitbucketServerImport::ImportLfsObjectWorker).to receive(:perform_in) - .with(1.second, project.id, lfs_attributes.stringify_keys, start_with(Gitlab::JobWaiter::KEY_PREFIX)) + it 'imports each lfs object in parallel' do + importer = described_class.new(project) - waiter = importer.execute + expect_next_instance_of(Projects::LfsPointers::LfsObjectDownloadListService) do |service| + expect(service).to receive(:each_list_item).and_yield(lfs_download_object) + end - expect(waiter).to be_an_instance_of(Gitlab::JobWaiter) - expect(waiter.jobs_remaining).to eq(1) - end + expect(Gitlab::BitbucketServerImport::ImportLfsObjectWorker).to receive(:perform_in) + .with(1.second, project.id, lfs_attributes.stringify_keys, start_with(Gitlab::JobWaiter::KEY_PREFIX)) - it 'logs its progress' do - importer = described_class.new(project) + waiter = importer.execute - expect(Gitlab::BitbucketServerImport::Logger) - .to receive(:info).with(common_log_messages.merge(message: 'starting')).and_call_original - expect(Gitlab::BitbucketServerImport::Logger) - .to receive(:info).with(common_log_messages.merge(message: 'finished')).and_call_original + expect(waiter).to be_an_instance_of(Gitlab::JobWaiter) + expect(waiter.jobs_remaining).to eq(1) + end - importer.execute - end + it 'logs its progress' do + importer = described_class.new(project) - context 'when LFS list download fails' do - let(:exception) { StandardError.new('Invalid Project URL') } + expect(Gitlab::BitbucketServerImport::Logger) + .to receive(:info).with(common_log_messages.merge(message: 'starting')).and_call_original + expect(Gitlab::BitbucketServerImport::Logger) + .to receive(:info).with(common_log_messages.merge(message: 'finished')).and_call_original - before do - allow_next_instance_of(Projects::LfsPointers::LfsObjectDownloadListService) do |service| - allow(service).to receive(:each_list_item).and_raise(exception) - end + importer.execute end - it 'rescues and logs the exception' do - importer = described_class.new(project) + context 'when LFS list download fails' do + let(:exception) { StandardError.new('Invalid Project URL') } - expect(Gitlab::Import::ImportFailureService) - .to receive(:track) - .with( - project_id: project.id, - exception: exception, - error_source: described_class.name - ).and_call_original + before do + allow_next_instance_of(Projects::LfsPointers::LfsObjectDownloadListService) do |service| + allow(service).to receive(:each_list_item).and_raise(exception) + end + end - waiter = importer.execute + it 'rescues and logs the exception' do + importer = described_class.new(project) - expect(waiter).to be_an_instance_of(Gitlab::JobWaiter) - expect(waiter.jobs_remaining).to eq(0) + expect(Gitlab::Import::ImportFailureService) + .to receive(:track) + .with( + project_id: project.id, + exception: exception, + error_source: described_class.name + ).and_call_original + + waiter = importer.execute + + expect(waiter).to be_an_instance_of(Gitlab::JobWaiter) + expect(waiter.jobs_remaining).to eq(0) + end end end context 'when LFS is not enabled' do before do - stub_config(lfs: { enabled: false }) + allow(project).to receive(:lfs_enabled?).and_return(false) end it 'logs progress but does nothing' do -- GitLab From 6fdbe521e7139aec3d897c4319a3eac34f0ca6a7 Mon Sep 17 00:00:00 2001 From: Rodrigo Tomonari Date: Fri, 19 May 2023 11:34:12 -0300 Subject: [PATCH 17/18] Code review feedback Fix specs Fix LFS object importer --- app/workers/all_queues.yml | 2 +- .../bitbucket_server_import/import_lfs_object_worker.rb | 4 +--- .../importers/lfs_object_importer.rb | 2 +- .../importers/pull_request_importer.rb | 3 ++- .../importers/pull_request_notes_importer.rb | 4 ++-- .../importers/repository_importer.rb | 4 ---- lib/gitlab/bitbucket_server_import/user_finder.rb | 3 ++- .../importers/lfs_object_importer_spec.rb | 8 ++++---- .../importers/pull_request_notes_importer_spec.rb | 8 ++++---- .../importers/repository_importer_spec.rb | 1 - 10 files changed, 17 insertions(+), 22 deletions(-) diff --git a/app/workers/all_queues.yml b/app/workers/all_queues.yml index b4b47b51c9410d..a39795ec06f209 100644 --- a/app/workers/all_queues.yml +++ b/app/workers/all_queues.yml @@ -2323,7 +2323,7 @@ :urgency: :low :resource_boundary: :unknown :weight: 1 - :idempotent: true + :idempotent: false :tags: [] - :name: bitbucket_server_import_import_pull_request :worker_name: Gitlab::BitbucketServerImport::ImportPullRequestWorker diff --git a/app/workers/gitlab/bitbucket_server_import/import_lfs_object_worker.rb b/app/workers/gitlab/bitbucket_server_import/import_lfs_object_worker.rb index 3e674d1ed00868..709a6018646048 100644 --- a/app/workers/gitlab/bitbucket_server_import/import_lfs_object_worker.rb +++ b/app/workers/gitlab/bitbucket_server_import/import_lfs_object_worker.rb @@ -2,11 +2,9 @@ module Gitlab module BitbucketServerImport - class ImportLfsObjectWorker + class ImportLfsObjectWorker # rubocop:disable Scalability/IdempotentWorker include ObjectImporter - idempotent! - def importer_class Importers::LfsObjectImporter end diff --git a/lib/gitlab/bitbucket_server_import/importers/lfs_object_importer.rb b/lib/gitlab/bitbucket_server_import/importers/lfs_object_importer.rb index 3045155fec3853..67b8eefc351888 100644 --- a/lib/gitlab/bitbucket_server_import/importers/lfs_object_importer.rb +++ b/lib/gitlab/bitbucket_server_import/importers/lfs_object_importer.rb @@ -8,7 +8,7 @@ class LfsObjectImporter def initialize(project, lfs_attributes) @project = project - @lfs_download_object = LfsDownloadObject.new(**lfs_attributes) + @lfs_download_object = LfsDownloadObject.new(**lfs_attributes.symbolize_keys) end def execute diff --git a/lib/gitlab/bitbucket_server_import/importers/pull_request_importer.rb b/lib/gitlab/bitbucket_server_import/importers/pull_request_importer.rb index 43bd043e874c96..5d306f989804fa 100644 --- a/lib/gitlab/bitbucket_server_import/importers/pull_request_importer.rb +++ b/lib/gitlab/bitbucket_server_import/importers/pull_request_importer.rb @@ -11,7 +11,8 @@ def initialize(project, hash) @formatter = Gitlab::ImportFormatter.new @user_finder = UserFinder.new(project) - # TODO: Convert object into a object instead of using it as a hash + # Object should behave as a object so we can remove object.is_a?(Hash) check + # This will be fixed in https://gitlab.com/gitlab-org/gitlab/-/issues/412328 @object = hash.with_indifferent_access end diff --git a/lib/gitlab/bitbucket_server_import/importers/pull_request_notes_importer.rb b/lib/gitlab/bitbucket_server_import/importers/pull_request_notes_importer.rb index f5935b759c9032..69de47e2006f96 100644 --- a/lib/gitlab/bitbucket_server_import/importers/pull_request_notes_importer.rb +++ b/lib/gitlab/bitbucket_server_import/importers/pull_request_notes_importer.rb @@ -19,7 +19,7 @@ def initialize(project, hash) end def execute - log_info(import_stage: 'import_pull_request', message: 'starting', iid: object[:iid]) + log_info(import_stage: 'import_pull_request_notes', message: 'starting', iid: object[:iid]) merge_request = project.merge_requests.find_by(iid: object[:iid]) # rubocop: disable CodeReuse/ActiveRecord @@ -37,7 +37,7 @@ def execute import_standalone_pr_comments(pr_comments.map(&:comment), merge_request) end - log_info(import_stage: 'import_pull_request', message: 'finished', iid: object[:iid]) + log_info(import_stage: 'import_pull_request_notes', message: 'finished', iid: object[:iid]) end private diff --git a/lib/gitlab/bitbucket_server_import/importers/repository_importer.rb b/lib/gitlab/bitbucket_server_import/importers/repository_importer.rb index a6068f615cf513..cd09ac40e9f77e 100644 --- a/lib/gitlab/bitbucket_server_import/importers/repository_importer.rb +++ b/lib/gitlab/bitbucket_server_import/importers/repository_importer.rb @@ -18,10 +18,6 @@ def execute project.repository.fetch_as_mirror(project.import_url, refmap: refmap) update_clone_time - - # The initial fetch can bring in lots of loose refs and objects. - # Running a `git gc` will make importing pull requests faster. - Repositories::HousekeepingService.new(project, :gc).execute end log_info(import_stage: 'import_repository', message: 'finished import') diff --git a/lib/gitlab/bitbucket_server_import/user_finder.rb b/lib/gitlab/bitbucket_server_import/user_finder.rb index 1bd846d28d4bbf..f96454eb2ccab9 100644 --- a/lib/gitlab/bitbucket_server_import/user_finder.rb +++ b/lib/gitlab/bitbucket_server_import/user_finder.rb @@ -19,7 +19,8 @@ def author_id(object) uid(object) || project.creator_id end - # TODO: object should behave as a object so we can remove object.is_a?(Hash) check + # Object should behave as a object so we can remove object.is_a?(Hash) check + # This will be fixed in https://gitlab.com/gitlab-org/gitlab/-/issues/412328 def uid(object) # We want this to only match either username or email depending on the flag state. # There should be no fall-through. diff --git a/spec/lib/gitlab/bitbucket_server_import/importers/lfs_object_importer_spec.rb b/spec/lib/gitlab/bitbucket_server_import/importers/lfs_object_importer_spec.rb index b1159d9a74eeee..ba63889ab9d522 100644 --- a/spec/lib/gitlab/bitbucket_server_import/importers/lfs_object_importer_spec.rb +++ b/spec/lib/gitlab/bitbucket_server_import/importers/lfs_object_importer_spec.rb @@ -7,10 +7,10 @@ let(:lfs_attributes) do { - oid: 'myoid', - size: 1, - link: 'http://www.gitlab.com/lfs_objects/oid', - headers: { 'X-Some-Header' => '456' } + 'oid' => 'myoid', + 'size' => 1, + 'link' => 'http://www.gitlab.com/lfs_objects/oid', + 'headers' => { 'X-Some-Header' => '456' } } end diff --git a/spec/lib/gitlab/bitbucket_server_import/importers/pull_request_notes_importer_spec.rb b/spec/lib/gitlab/bitbucket_server_import/importers/pull_request_notes_importer_spec.rb index 68254b919bc69e..c7e91c340b0b54 100644 --- a/spec/lib/gitlab/bitbucket_server_import/importers/pull_request_notes_importer_spec.rb +++ b/spec/lib/gitlab/bitbucket_server_import/importers/pull_request_notes_importer_spec.rb @@ -72,8 +72,8 @@ def expect_log(stage:, message:) end it 'logs its progress' do - expect_log(stage: 'import_pull_request', message: 'starting') - expect_log(stage: 'import_pull_request', message: 'finished') + expect_log(stage: 'import_pull_request_notes', message: 'starting') + expect_log(stage: 'import_pull_request_notes', message: 'finished') importer.execute end @@ -85,8 +85,8 @@ def expect_log(stage:, message:) it 'logs its progress' do allow_next(BitbucketServer::Client).to receive(:activities).and_return([]) - expect_log(stage: 'import_pull_request', message: 'starting') - expect_log(stage: 'import_pull_request', message: 'finished') + expect_log(stage: 'import_pull_request_notes', message: 'starting') + expect_log(stage: 'import_pull_request_notes', message: 'finished') importer.execute end diff --git a/spec/lib/gitlab/bitbucket_server_import/importers/repository_importer_spec.rb b/spec/lib/gitlab/bitbucket_server_import/importers/repository_importer_spec.rb index f0673dba45b71d..6c4d500efb710e 100644 --- a/spec/lib/gitlab/bitbucket_server_import/importers/repository_importer_spec.rb +++ b/spec/lib/gitlab/bitbucket_server_import/importers/repository_importer_spec.rb @@ -14,7 +14,6 @@ expect(project.repository).to receive(:fetch_as_mirror).with(project.import_url, refmap: ['+refs/pull-requests/*/to:refs/merge-requests/*/head']) expect(project.last_repository_updated_at).to be_present - expect(Repositories::HousekeepingService).to receive_message_chain(:new, :execute) importer.execute end -- GitLab From ff2d3ee5e132a989675710a342761910ee7aab03 Mon Sep 17 00:00:00 2001 From: Rodrigo Tomonari Date: Fri, 19 May 2023 16:35:11 -0300 Subject: [PATCH 18/18] Fix pull request pagination --- .../importers/pull_requests_importer.rb | 32 +++++++++++++------ .../importers/pull_requests_importer_spec.rb | 3 +- 2 files changed, 25 insertions(+), 10 deletions(-) diff --git a/lib/gitlab/bitbucket_server_import/importers/pull_requests_importer.rb b/lib/gitlab/bitbucket_server_import/importers/pull_requests_importer.rb index 594fd32025529b..92ec10bf037bd2 100644 --- a/lib/gitlab/bitbucket_server_import/importers/pull_requests_importer.rb +++ b/lib/gitlab/bitbucket_server_import/importers/pull_requests_importer.rb @@ -7,20 +7,34 @@ class PullRequestsImporter include ParallelScheduling def execute - pull_requests = client.pull_requests(project_key, repository_slug, limit: BATCH_SIZE) + page = 1 - pull_requests.each do |pull_request| - # Needs to come before `already_processed?` as `jobs_remaining` resets to zero when the job restarts and - # jobs_remaining needs to be the total amount of enqueued jobs - job_waiter.jobs_remaining += 1 + loop do + log_info( + import_stage: 'import_pull_requests', message: "importing page #{page} using batch-size #{BATCH_SIZE}" + ) - next if already_processed?(pull_request) + pull_requests = client.pull_requests( + project_key, repository_slug, page_offset: page, limit: BATCH_SIZE + ).to_a - job_delay = calculate_job_delay(job_waiter.jobs_remaining) + break if pull_requests.empty? - sidekiq_worker_class.perform_in(job_delay, project.id, pull_request.to_hash, job_waiter.key) + pull_requests.each do |pull_request| + # Needs to come before `already_processed?` as `jobs_remaining` resets to zero when the job restarts and + # jobs_remaining needs to be the total amount of enqueued jobs + job_waiter.jobs_remaining += 1 - mark_as_processed(pull_request) + next if already_processed?(pull_request) + + job_delay = calculate_job_delay(job_waiter.jobs_remaining) + + sidekiq_worker_class.perform_in(job_delay, project.id, pull_request.to_hash, job_waiter.key) + + mark_as_processed(pull_request) + end + + page += 1 end job_waiter diff --git a/spec/lib/gitlab/bitbucket_server_import/importers/pull_requests_importer_spec.rb b/spec/lib/gitlab/bitbucket_server_import/importers/pull_requests_importer_spec.rb index d24511b26d8d27..b9a9c8dac29ea3 100644 --- a/spec/lib/gitlab/bitbucket_server_import/importers/pull_requests_importer_spec.rb +++ b/spec/lib/gitlab/bitbucket_server_import/importers/pull_requests_importer_spec.rb @@ -21,7 +21,8 @@ [ BitbucketServer::Representation::PullRequest.new({ 'id' => 1 }), BitbucketServer::Representation::PullRequest.new({ 'id' => 2 }) - ] + ], + [] ) end end -- GitLab