From 84f301eb94d215236e465753a6b4d564fe95fdfd Mon Sep 17 00:00:00 2001 From: maddievn Date: Thu, 7 Dec 2023 15:25:16 +0200 Subject: [PATCH 01/11] Cache users from Bitbucket Server Changelog: added --- app/workers/all_queues.yml | 9 +++ .../stage/import_repository_worker.rb | 6 +- .../stage/import_users_worker.rb | 25 ++++++ .../bitbucket_server_cache_users.yml | 8 ++ config/sidekiq_queues.yml | 2 + lib/bitbucket_server/client.rb | 5 ++ lib/bitbucket_server/representation/user.rb | 21 +++++ lib/bitbucket_server/users_mapper.rb | 29 +++++++ .../importers/users_importer.rb | 51 ++++++++++++ spec/lib/bitbucket_server/client_spec.rb | 10 +++ .../representation/user_spec.rb | 19 +++++ .../lib/bitbucket_server/users_mapper_spec.rb | 38 +++++++++ .../importers/users_importer_spec.rb | 50 ++++++++++++ spec/workers/every_sidekiq_worker_spec.rb | 1 + .../stage/import_repository_worker_spec.rb | 15 +++- .../stage/import_users_worker_spec.rb | 77 +++++++++++++++++++ 16 files changed, 364 insertions(+), 2 deletions(-) create mode 100644 app/workers/gitlab/bitbucket_server_import/stage/import_users_worker.rb create mode 100644 config/feature_flags/development/bitbucket_server_cache_users.yml create mode 100644 lib/bitbucket_server/representation/user.rb create mode 100644 lib/bitbucket_server/users_mapper.rb create mode 100644 lib/gitlab/bitbucket_server_import/importers/users_importer.rb create mode 100644 spec/lib/bitbucket_server/representation/user_spec.rb create mode 100644 spec/lib/bitbucket_server/users_mapper_spec.rb create mode 100644 spec/lib/gitlab/bitbucket_server_import/importers/users_importer_spec.rb create mode 100644 spec/workers/gitlab/bitbucket_server_import/stage/import_users_worker_spec.rb diff --git a/app/workers/all_queues.yml b/app/workers/all_queues.yml index 3d8c349910ee96..bfbadc16960da3 100644 --- a/app/workers/all_queues.yml +++ b/app/workers/all_queues.yml @@ -2577,6 +2577,15 @@ :weight: 1 :idempotent: false :tags: [] +- :name: bitbucket_server_import_stage_import_users + :worker_name: Gitlab::BitbucketServerImport::Stage::ImportUsersWorker + :feature_category: :importers + :has_external_dependencies: true + :urgency: :low + :resource_boundary: :unknown + :weight: 1 + :idempotent: false + :tags: [] - :name: bulk_import :worker_name: BulkImportWorker :feature_category: :importers diff --git a/app/workers/gitlab/bitbucket_server_import/stage/import_repository_worker.rb b/app/workers/gitlab/bitbucket_server_import/stage/import_repository_worker.rb index b378d07d59cd60..113f984d573fb4 100644 --- a/app/workers/gitlab/bitbucket_server_import/stage/import_repository_worker.rb +++ b/app/workers/gitlab/bitbucket_server_import/stage/import_repository_worker.rb @@ -14,7 +14,11 @@ def import(project) importer.execute - ImportPullRequestsWorker.perform_async(project.id) + if Feature.enabled?(:bitbucket_server_cache_users, project.creator) + ImportUsersWorker.perform_async(project.id) + else + ImportPullRequestsWorker.perform_async(project.id) + end end def importer_class diff --git a/app/workers/gitlab/bitbucket_server_import/stage/import_users_worker.rb b/app/workers/gitlab/bitbucket_server_import/stage/import_users_worker.rb new file mode 100644 index 00000000000000..203ca50ea0f884 --- /dev/null +++ b/app/workers/gitlab/bitbucket_server_import/stage/import_users_worker.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +module Gitlab + module BitbucketServerImport + module Stage + class ImportUsersWorker # rubocop:disable Scalability/IdempotentWorker -- users should be cached once only + include StageMethods + + private + + def import(project) + importer = importer_class.new(project) + + importer.execute + + ImportPullRequestsWorker.perform_async(project.id) + end + + def importer_class + Importers::UsersImporter + end + end + end + end +end diff --git a/config/feature_flags/development/bitbucket_server_cache_users.yml b/config/feature_flags/development/bitbucket_server_cache_users.yml new file mode 100644 index 00000000000000..1458521151c219 --- /dev/null +++ b/config/feature_flags/development/bitbucket_server_cache_users.yml @@ -0,0 +1,8 @@ +--- +name: bitbucket_server_cache_users +introduced_by_url: +rollout_issue_url: +milestone: '16.7' +type: development +group: group::import and integrate +default_enabled: false diff --git a/config/sidekiq_queues.yml b/config/sidekiq_queues.yml index 77503814158d3b..cb45c09560dd49 100644 --- a/config/sidekiq_queues.yml +++ b/config/sidekiq_queues.yml @@ -127,6 +127,8 @@ - 1 - - bitbucket_server_import_stage_import_repository - 1 +- - bitbucket_server_import_stage_import_users + - 1 - - bulk_import - 1 - - bulk_imports_entity diff --git a/lib/bitbucket_server/client.rb b/lib/bitbucket_server/client.rb index 8e84afe51d771a..5f09f029d18f71 100644 --- a/lib/bitbucket_server/client.rb +++ b/lib/bitbucket_server/client.rb @@ -29,6 +29,11 @@ def repos(page_offset: 0, limit: nil, filter: nil) get_collection(path, :repo, page_offset: page_offset, limit: limit) end + def users(project) + path = "/projects/#{project}/permissions/users" + get_collection(path, :user) + end + def create_branch(project_key, repo, branch_name, sha) payload = { name: branch_name, diff --git a/lib/bitbucket_server/representation/user.rb b/lib/bitbucket_server/representation/user.rb new file mode 100644 index 00000000000000..433baec1c42f96 --- /dev/null +++ b/lib/bitbucket_server/representation/user.rb @@ -0,0 +1,21 @@ +# frozen_string_literal: true + +module BitbucketServer + module Representation + class User < Representation::Base + def email + user['emailAddress'] + end + + def username + user['slug'] + end + + private + + def user + raw['user'] + end + end + end +end diff --git a/lib/bitbucket_server/users_mapper.rb b/lib/bitbucket_server/users_mapper.rb new file mode 100644 index 00000000000000..f24d4465692500 --- /dev/null +++ b/lib/bitbucket_server/users_mapper.rb @@ -0,0 +1,29 @@ +# frozen_string_literal: true + +module BitbucketServer + class UsersMapper + include Gitlab::Utils::StrongMemoize + + SOURCE_USER_CACHE_KEY = 'bitbucket_server/project/%s/source/username/%s' + + def initialize(project) + @project_id = project.id + end + + attr_reader :project_id + + def write_source_email_to_cache(username, email) + ::Gitlab::Cache::Import::Caching.write(cache_key(username), email) + end + + def read_source_email_from_cache(username) + ::Gitlab::Cache::Import::Caching.read(cache_key(username)) + end + + private + + def cache_key(username) + format(SOURCE_USER_CACHE_KEY, project_id, username) + end + end +end diff --git a/lib/gitlab/bitbucket_server_import/importers/users_importer.rb b/lib/gitlab/bitbucket_server_import/importers/users_importer.rb new file mode 100644 index 00000000000000..777b677449d9a4 --- /dev/null +++ b/lib/gitlab/bitbucket_server_import/importers/users_importer.rb @@ -0,0 +1,51 @@ +# frozen_string_literal: true + +module Gitlab + module BitbucketServerImport + module Importers + class UsersImporter + include Loggable + + BATCH_SIZE = 100 + + def initialize(project) + @project = project + end + + attr_reader :project + + def execute + log_info(import_stage: 'import_users', message: 'starting') + + users = client.users(project_key) + + users.each_slice(BATCH_SIZE) do |batch| + log_info(import_stage: 'import_users', message: "caching batch of #{batch.count} users") + + batch.each { |user| cache_source_user(user) } + end + + log_info(import_stage: 'import_users', message: 'finished') + end + + private + + def cache_source_user(user) + users_mapper.write_source_email_to_cache(user.username, user.email) + end + + def users_mapper + @users_mapper ||= BitbucketServer::UsersMapper.new(project) + end + + def client + BitbucketServer::Client.new(project.import_data.credentials) + end + + def project_key + project.import_data.data['project_key'] + end + end + end + end +end diff --git a/spec/lib/bitbucket_server/client_spec.rb b/spec/lib/bitbucket_server/client_spec.rb index 1d6e8492760129..b11367ae01ccb4 100644 --- a/spec/lib/bitbucket_server/client_spec.rb +++ b/spec/lib/bitbucket_server/client_spec.rb @@ -80,6 +80,16 @@ end end + describe '#users' do + let(:path) { "/projects/#{project}/permissions/users" } + + it 'requests a collection' do + expect(BitbucketServer::Paginator).to receive(:new).with(anything, path, :user, page_offset: 0, limit: nil) + + subject.users(project) + end + end + describe '#create_branch' do let(:branch) { 'test-branch' } let(:sha) { '12345678' } diff --git a/spec/lib/bitbucket_server/representation/user_spec.rb b/spec/lib/bitbucket_server/representation/user_spec.rb new file mode 100644 index 00000000000000..32470e3a12ffc2 --- /dev/null +++ b/spec/lib/bitbucket_server/representation/user_spec.rb @@ -0,0 +1,19 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe BitbucketServer::Representation::User, feature_category: :importers do + let(:email) { 'test@email.com' } + let(:username) { 'test_user' } + let(:sample_data) { { 'user' => { 'emailAddress' => email, 'slug' => username } } } + + subject(:user) { described_class.new(sample_data) } + + describe '#email' do + it { expect(user.email).to eq(email) } + end + + describe '#username' do + it { expect(user.username).to eq(username) } + end +end diff --git a/spec/lib/bitbucket_server/users_mapper_spec.rb b/spec/lib/bitbucket_server/users_mapper_spec.rb new file mode 100644 index 00000000000000..79bb3f343d7089 --- /dev/null +++ b/spec/lib/bitbucket_server/users_mapper_spec.rb @@ -0,0 +1,38 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe BitbucketServer::UsersMapper, feature_category: :importers do + let_it_be(:project) { create(:project) } + let(:username) { 'test_user' } + let(:email) { 'test@email.com' } + + subject(:users_mapper) { described_class.new(project) } + + describe '#write_source_email_to_cache' do + it 'caches provided source username and email' do + expect(Gitlab::Cache::Import::Caching).to receive(:write) + .with("bitbucket_server/project/#{project.id}/source/username/#{username}", email) + + users_mapper.write_source_email_to_cache(username, email) + end + end + + describe '#read_source_email_from_cache', :clean_gitlab_redis_cache do + context 'when it has been cached' do + before do + users_mapper.write_source_email_to_cache(username, email) + end + + it 'returns the matching email' do + expect(users_mapper.read_source_email_from_cache(username)).to eq(email) + end + end + + context 'when it has not been cached' do + it 'returns the matching email' do + expect(users_mapper.read_source_email_from_cache(username)).to be_nil + end + end + end +end diff --git a/spec/lib/gitlab/bitbucket_server_import/importers/users_importer_spec.rb b/spec/lib/gitlab/bitbucket_server_import/importers/users_importer_spec.rb new file mode 100644 index 00000000000000..33803718ffd6bb --- /dev/null +++ b/spec/lib/gitlab/bitbucket_server_import/importers/users_importer_spec.rb @@ -0,0 +1,50 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::BitbucketServerImport::Importers::UsersImporter, feature_category: :importers do + let(:users_mapper) { BitbucketServer::UsersMapper.new(project) } + + let_it_be(:project) do + create(:project, :with_import_url, :import_started, :empty_repo, + import_data_attributes: { + data: { 'project_key' => 'key', 'repo_slug' => 'slug' }, + credentials: { 'base_uri' => 'http://bitbucket.org/', 'user' => 'bitbucket', 'password' => 'password' } + } + ) + end + + before do + allow(BitbucketServer::UsersMapper).to receive(:new).and_return(users_mapper) + stub_const("#{described_class}::BATCH_SIZE", 1) + allow(Gitlab::BitbucketServerImport::Logger).to receive(:info).and_call_original + + allow_next_instance_of(BitbucketServer::Client) do |client| + allow(client).to receive(:users).with('key').and_return( + [ + BitbucketServer::Representation::User.new( + { 'user' => { 'emailAddress' => 'email1', 'slug' => 'username1' } } + ), + BitbucketServer::Representation::User.new( + { 'user' => { 'emailAddress' => 'email2', 'slug' => 'username2' } } + ) + ] + ) + end + end + + subject(:importer) { described_class.new(project) } + + describe '#execute' do + it 'writes the username and email to cache for every user in batches' do + expect(Gitlab::BitbucketServerImport::Logger).to receive(:info).exactly(4).times + + expect(users_mapper).to receive(:write_source_email_to_cache).and_call_original.twice + + importer.execute + + expect(users_mapper.read_source_email_from_cache('username1')).to eq('email1') + expect(users_mapper.read_source_email_from_cache('username2')).to eq('email2') + end + end +end diff --git a/spec/workers/every_sidekiq_worker_spec.rb b/spec/workers/every_sidekiq_worker_spec.rb index 136fa6b4aa0f6c..ecc2774afc4df9 100644 --- a/spec/workers/every_sidekiq_worker_spec.rb +++ b/spec/workers/every_sidekiq_worker_spec.rb @@ -262,6 +262,7 @@ 'Gitlab::BitbucketServerImport::Stage::ImportNotesWorker' => 3, 'Gitlab::BitbucketServerImport::Stage::ImportPullRequestsWorker' => 3, 'Gitlab::BitbucketServerImport::Stage::ImportRepositoryWorker' => 3, + 'Gitlab::BitbucketServerImport::Stage::ImportUsersWorker' => 3, 'Gitlab::GithubImport::AdvanceStageWorker' => 3, 'Gitlab::GithubImport::Attachments::ImportReleaseWorker' => 5, 'Gitlab::GithubImport::Attachments::ImportNoteWorker' => 5, diff --git a/spec/workers/gitlab/bitbucket_server_import/stage/import_repository_worker_spec.rb b/spec/workers/gitlab/bitbucket_server_import/stage/import_repository_worker_spec.rb index 7ea23041e791b3..619202a1c46f75 100644 --- a/spec/workers/gitlab/bitbucket_server_import/stage/import_repository_worker_spec.rb +++ b/spec/workers/gitlab/bitbucket_server_import/stage/import_repository_worker_spec.rb @@ -18,12 +18,25 @@ end it 'schedules the next stage' do - expect(Gitlab::BitbucketServerImport::Stage::ImportPullRequestsWorker).to receive(:perform_async) + expect(Gitlab::BitbucketServerImport::Stage::ImportUsersWorker).to receive(:perform_async) .with(project.id) worker.perform(project.id) end + context 'when the bitbucket_server_cache_users flag is disabled' do + before do + stub_feature_flags(bitbucket_server_cache_users: false) + end + + it 'skips the user import and schedules the next stage' do + expect(Gitlab::BitbucketServerImport::Stage::ImportPullRequestsWorker).to receive(:perform_async) + .with(project.id) + + worker.perform(project.id) + end + end + it 'logs stage start and finish' do expect(Gitlab::BitbucketServerImport::Logger) .to receive(:info).with(hash_including(message: 'starting stage', project_id: project.id)) diff --git a/spec/workers/gitlab/bitbucket_server_import/stage/import_users_worker_spec.rb b/spec/workers/gitlab/bitbucket_server_import/stage/import_users_worker_spec.rb new file mode 100644 index 00000000000000..d4cd1b82349529 --- /dev/null +++ b/spec/workers/gitlab/bitbucket_server_import/stage/import_users_worker_spec.rb @@ -0,0 +1,77 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::BitbucketServerImport::Stage::ImportUsersWorker, feature_category: :importers do + let_it_be(:project) { create(:project, :import_started) } + + let(:worker) { described_class.new } + + it_behaves_like Gitlab::BitbucketServerImport::StageMethods + + describe '#perform' do + context 'when the import succeeds' do + before do + allow_next_instance_of(Gitlab::BitbucketServerImport::Importers::UsersImporter) do |importer| + allow(importer).to receive(:execute) + end + end + + it 'schedules the next stage' do + expect(Gitlab::BitbucketServerImport::Stage::ImportPullRequestsWorker).to receive(:perform_async) + .with(project.id) + + worker.perform(project.id) + end + + it 'logs stage start and finish' do + expect(Gitlab::BitbucketServerImport::Logger) + .to receive(:info).with(hash_including(message: 'starting stage', project_id: project.id)) + expect(Gitlab::BitbucketServerImport::Logger) + .to receive(:info).with(hash_including(message: 'stage finished', project_id: project.id)) + + worker.perform(project.id) + end + end + + context 'when project does not exists' do + it 'does not call importer' do + expect(Gitlab::BitbucketServerImport::Importers::UsersImporter).not_to receive(:new) + + worker.perform(-1) + end + end + + context 'when project import state is not `started`' do + it 'does not call importer' do + project = create(:project, :import_canceled) + + expect(Gitlab::BitbucketServerImport::Importers::UsersImporter).not_to receive(:new) + + worker.perform(project.id) + end + end + + context 'when the importer fails' do + it 'does not schedule the next stage and raises error' do + exception = StandardError.new('Error') + + allow_next_instance_of(Gitlab::BitbucketServerImport::Importers::UsersImporter) do |importer| + allow(importer).to receive(:execute).and_raise(exception) + end + + expect(Gitlab::Import::ImportFailureService) + .to receive(:track).with( + project_id: project.id, + exception: exception, + error_source: described_class.name, + fail_import: false + ).and_call_original + + expect { worker.perform(project.id) } + .to change { Gitlab::BitbucketServerImport::Stage::ImportUsersWorker.jobs.size }.by(0) + .and raise_error(exception) + end + end + end +end -- GitLab From 42703afe242fe6cca57da47f78587ec646a4fb73 Mon Sep 17 00:00:00 2001 From: maddievn Date: Fri, 8 Dec 2023 10:13:08 +0200 Subject: [PATCH 02/11] Add details for feature flag --- .../stage/import_repository_worker.rb | 2 +- .../development/bitbucket_server_cache_users.yml | 8 -------- .../bitbucket_server_convert_mentions_to_users.yml | 8 ++++++++ .../stage/import_repository_worker_spec.rb | 4 ++-- 4 files changed, 11 insertions(+), 11 deletions(-) delete mode 100644 config/feature_flags/development/bitbucket_server_cache_users.yml create mode 100644 config/feature_flags/development/bitbucket_server_convert_mentions_to_users.yml diff --git a/app/workers/gitlab/bitbucket_server_import/stage/import_repository_worker.rb b/app/workers/gitlab/bitbucket_server_import/stage/import_repository_worker.rb index 113f984d573fb4..573c73cd7dfe66 100644 --- a/app/workers/gitlab/bitbucket_server_import/stage/import_repository_worker.rb +++ b/app/workers/gitlab/bitbucket_server_import/stage/import_repository_worker.rb @@ -14,7 +14,7 @@ def import(project) importer.execute - if Feature.enabled?(:bitbucket_server_cache_users, project.creator) + if Feature.enabled?(:bitbucket_server_convert_mentions_to_users, project.creator) ImportUsersWorker.perform_async(project.id) else ImportPullRequestsWorker.perform_async(project.id) diff --git a/config/feature_flags/development/bitbucket_server_cache_users.yml b/config/feature_flags/development/bitbucket_server_cache_users.yml deleted file mode 100644 index 1458521151c219..00000000000000 --- a/config/feature_flags/development/bitbucket_server_cache_users.yml +++ /dev/null @@ -1,8 +0,0 @@ ---- -name: bitbucket_server_cache_users -introduced_by_url: -rollout_issue_url: -milestone: '16.7' -type: development -group: group::import and integrate -default_enabled: false diff --git a/config/feature_flags/development/bitbucket_server_convert_mentions_to_users.yml b/config/feature_flags/development/bitbucket_server_convert_mentions_to_users.yml new file mode 100644 index 00000000000000..90ea8d56c0acca --- /dev/null +++ b/config/feature_flags/development/bitbucket_server_convert_mentions_to_users.yml @@ -0,0 +1,8 @@ +--- +name: bitbucket_server_convert_mentions_to_users +introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/139097 +rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/434453 +milestone: '16.7' +type: development +group: group::import and integrate +default_enabled: false diff --git a/spec/workers/gitlab/bitbucket_server_import/stage/import_repository_worker_spec.rb b/spec/workers/gitlab/bitbucket_server_import/stage/import_repository_worker_spec.rb index 619202a1c46f75..1531b30089c9c2 100644 --- a/spec/workers/gitlab/bitbucket_server_import/stage/import_repository_worker_spec.rb +++ b/spec/workers/gitlab/bitbucket_server_import/stage/import_repository_worker_spec.rb @@ -24,9 +24,9 @@ worker.perform(project.id) end - context 'when the bitbucket_server_cache_users flag is disabled' do + context 'when the bitbucket_server_convert_mentions_to_users flag is disabled' do before do - stub_feature_flags(bitbucket_server_cache_users: false) + stub_feature_flags(bitbucket_server_convert_mentions_to_users: false) end it 'skips the user import and schedules the next stage' do -- GitLab From 7c6fe4831c2fc24253b82ef37160b2c42c9b5e69 Mon Sep 17 00:00:00 2001 From: maddievn Date: Fri, 8 Dec 2023 16:17:08 +0200 Subject: [PATCH 03/11] Add a limit --- lib/bitbucket_server/client.rb | 2 +- spec/lib/bitbucket_server/client_spec.rb | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/bitbucket_server/client.rb b/lib/bitbucket_server/client.rb index 5f09f029d18f71..a7f30eed8a6a7d 100644 --- a/lib/bitbucket_server/client.rb +++ b/lib/bitbucket_server/client.rb @@ -31,7 +31,7 @@ def repos(page_offset: 0, limit: nil, filter: nil) def users(project) path = "/projects/#{project}/permissions/users" - get_collection(path, :user) + get_collection(path, :user, limit: 10_000) end def create_branch(project_key, repo, branch_name, sha) diff --git a/spec/lib/bitbucket_server/client_spec.rb b/spec/lib/bitbucket_server/client_spec.rb index b11367ae01ccb4..f345718b830a1f 100644 --- a/spec/lib/bitbucket_server/client_spec.rb +++ b/spec/lib/bitbucket_server/client_spec.rb @@ -84,7 +84,7 @@ let(:path) { "/projects/#{project}/permissions/users" } it 'requests a collection' do - expect(BitbucketServer::Paginator).to receive(:new).with(anything, path, :user, page_offset: 0, limit: nil) + expect(BitbucketServer::Paginator).to receive(:new).with(anything, path, :user, page_offset: 0, limit: 10_000) subject.users(project) end -- GitLab From 807216954431643481d8ed5f0fc9698e2ef179a8 Mon Sep 17 00:00:00 2001 From: maddievn Date: Mon, 11 Dec 2023 09:54:19 +0200 Subject: [PATCH 04/11] Loop through all users --- lib/bitbucket_server/client.rb | 4 +-- .../importers/users_importer.rb | 17 +++++++--- spec/lib/bitbucket_server/client_spec.rb | 11 ++++++- .../importers/users_importer_spec.rb | 33 ++++++++++++------- 4 files changed, 46 insertions(+), 19 deletions(-) diff --git a/lib/bitbucket_server/client.rb b/lib/bitbucket_server/client.rb index a7f30eed8a6a7d..d27719390ea432 100644 --- a/lib/bitbucket_server/client.rb +++ b/lib/bitbucket_server/client.rb @@ -29,9 +29,9 @@ def repos(page_offset: 0, limit: nil, filter: nil) get_collection(path, :repo, page_offset: page_offset, limit: limit) end - def users(project) + def users(project, page_offset: 0, limit: nil) path = "/projects/#{project}/permissions/users" - get_collection(path, :user, limit: 10_000) + get_collection(path, :user, page_offset: page_offset, limit: limit) end def create_branch(project_key, repo, branch_name, sha) diff --git a/lib/gitlab/bitbucket_server_import/importers/users_importer.rb b/lib/gitlab/bitbucket_server_import/importers/users_importer.rb index 777b677449d9a4..6298151e91b5ef 100644 --- a/lib/gitlab/bitbucket_server_import/importers/users_importer.rb +++ b/lib/gitlab/bitbucket_server_import/importers/users_importer.rb @@ -17,12 +17,21 @@ def initialize(project) def execute log_info(import_stage: 'import_users', message: 'starting') - users = client.users(project_key) + page = 1 - users.each_slice(BATCH_SIZE) do |batch| - log_info(import_stage: 'import_users', message: "caching batch of #{batch.count} users") + loop do + log_info( + import_stage: 'import_users', + message: "importing page #{page} using batch size #{BATCH_SIZE}" + ) - batch.each { |user| cache_source_user(user) } + users = client.users(project_key, page_offset: page, limit: BATCH_SIZE).to_a + + break if users.empty? + + users.each { |user| cache_source_user(user) } + + page += 1 end log_info(import_stage: 'import_users', message: 'finished') diff --git a/spec/lib/bitbucket_server/client_spec.rb b/spec/lib/bitbucket_server/client_spec.rb index f345718b830a1f..0d027234a0d393 100644 --- a/spec/lib/bitbucket_server/client_spec.rb +++ b/spec/lib/bitbucket_server/client_spec.rb @@ -84,10 +84,19 @@ let(:path) { "/projects/#{project}/permissions/users" } it 'requests a collection' do - expect(BitbucketServer::Paginator).to receive(:new).with(anything, path, :user, page_offset: 0, limit: 10_000) + expect(BitbucketServer::Paginator).to receive(:new).with(anything, path, :user, page_offset: 0, limit: nil) subject.users(project) end + + it 'requests a collection with offset and limit' do + offset = 10 + limit = 100 + + expect(BitbucketServer::Paginator).to receive(:new).with(anything, path, :user, page_offset: offset, limit: limit) + + subject.users(project, page_offset: offset, limit: limit) + end end describe '#create_branch' do diff --git a/spec/lib/gitlab/bitbucket_server_import/importers/users_importer_spec.rb b/spec/lib/gitlab/bitbucket_server_import/importers/users_importer_spec.rb index 33803718ffd6bb..6bee47ea819a19 100644 --- a/spec/lib/gitlab/bitbucket_server_import/importers/users_importer_spec.rb +++ b/spec/lib/gitlab/bitbucket_server_import/importers/users_importer_spec.rb @@ -4,6 +4,7 @@ RSpec.describe Gitlab::BitbucketServerImport::Importers::UsersImporter, feature_category: :importers do let(:users_mapper) { BitbucketServer::UsersMapper.new(project) } + let(:logger) { Gitlab::BitbucketServerImport::Logger } let_it_be(:project) do create(:project, :with_import_url, :import_started, :empty_repo, @@ -14,22 +15,26 @@ ) end + let(:user_1) do + BitbucketServer::Representation::User.new( + { 'user' => { 'emailAddress' => 'email1', 'slug' => 'username1' } } + ) + end + + let(:user_2) do + BitbucketServer::Representation::User.new( + { 'user' => { 'emailAddress' => 'email2', 'slug' => 'username2' } } + ) + end + before do allow(BitbucketServer::UsersMapper).to receive(:new).and_return(users_mapper) stub_const("#{described_class}::BATCH_SIZE", 1) - allow(Gitlab::BitbucketServerImport::Logger).to receive(:info).and_call_original allow_next_instance_of(BitbucketServer::Client) do |client| - allow(client).to receive(:users).with('key').and_return( - [ - BitbucketServer::Representation::User.new( - { 'user' => { 'emailAddress' => 'email1', 'slug' => 'username1' } } - ), - BitbucketServer::Representation::User.new( - { 'user' => { 'emailAddress' => 'email2', 'slug' => 'username2' } } - ) - ] - ) + allow(client).to receive(:users).with('key', limit: 1, page_offset: 1).and_return([user_1]) + allow(client).to receive(:users).with('key', limit: 1, page_offset: 2).and_return([user_2]) + allow(client).to receive(:users).with('key', limit: 1, page_offset: 3).and_return([]) end end @@ -37,7 +42,11 @@ describe '#execute' do it 'writes the username and email to cache for every user in batches' do - expect(Gitlab::BitbucketServerImport::Logger).to receive(:info).exactly(4).times + expect(logger).to receive(:info).with(hash_including(message: 'starting')) + expect(logger).to receive(:info).with(hash_including(message: 'importing page 1 using batch size 1')) + expect(logger).to receive(:info).with(hash_including(message: 'importing page 2 using batch size 1')) + expect(logger).to receive(:info).with(hash_including(message: 'importing page 3 using batch size 1')) + expect(logger).to receive(:info).with(hash_including(message: 'finished')) expect(users_mapper).to receive(:write_source_email_to_cache).and_call_original.twice -- GitLab From b56fec5d50601b693d0a500b95f4c17907631179 Mon Sep 17 00:00:00 2001 From: maddievn Date: Tue, 12 Dec 2023 12:19:06 +0200 Subject: [PATCH 05/11] Use write_multiple instead of writing each user --- .../stage/import_users_worker.rb | 2 +- lib/bitbucket_server/client.rb | 4 +- lib/bitbucket_server/users_mapper.rb | 29 -------------- .../importers/users_importer.rb | 17 +++++---- .../bitbucket_server_import/user_caching.rb | 13 +++++++ .../lib/bitbucket_server/users_mapper_spec.rb | 38 ------------------- .../importers/users_importer_spec.rb | 30 +++++++++------ 7 files changed, 44 insertions(+), 89 deletions(-) delete mode 100644 lib/bitbucket_server/users_mapper.rb create mode 100644 lib/gitlab/bitbucket_server_import/user_caching.rb delete mode 100644 spec/lib/bitbucket_server/users_mapper_spec.rb diff --git a/app/workers/gitlab/bitbucket_server_import/stage/import_users_worker.rb b/app/workers/gitlab/bitbucket_server_import/stage/import_users_worker.rb index 203ca50ea0f884..dd18139fc9e3dc 100644 --- a/app/workers/gitlab/bitbucket_server_import/stage/import_users_worker.rb +++ b/app/workers/gitlab/bitbucket_server_import/stage/import_users_worker.rb @@ -3,7 +3,7 @@ module Gitlab module BitbucketServerImport module Stage - class ImportUsersWorker # rubocop:disable Scalability/IdempotentWorker -- users should be cached once only + class ImportUsersWorker # rubocop:disable Scalability/IdempotentWorker -- ImportPullRequestsWorker is not idempotent include StageMethods private diff --git a/lib/bitbucket_server/client.rb b/lib/bitbucket_server/client.rb index d27719390ea432..432928b0591c4f 100644 --- a/lib/bitbucket_server/client.rb +++ b/lib/bitbucket_server/client.rb @@ -29,8 +29,8 @@ def repos(page_offset: 0, limit: nil, filter: nil) get_collection(path, :repo, page_offset: page_offset, limit: limit) end - def users(project, page_offset: 0, limit: nil) - path = "/projects/#{project}/permissions/users" + def users(project_key, page_offset: 0, limit: nil) + path = "/projects/#{project_key}/permissions/users" get_collection(path, :user, page_offset: page_offset, limit: limit) end diff --git a/lib/bitbucket_server/users_mapper.rb b/lib/bitbucket_server/users_mapper.rb deleted file mode 100644 index f24d4465692500..00000000000000 --- a/lib/bitbucket_server/users_mapper.rb +++ /dev/null @@ -1,29 +0,0 @@ -# frozen_string_literal: true - -module BitbucketServer - class UsersMapper - include Gitlab::Utils::StrongMemoize - - SOURCE_USER_CACHE_KEY = 'bitbucket_server/project/%s/source/username/%s' - - def initialize(project) - @project_id = project.id - end - - attr_reader :project_id - - def write_source_email_to_cache(username, email) - ::Gitlab::Cache::Import::Caching.write(cache_key(username), email) - end - - def read_source_email_from_cache(username) - ::Gitlab::Cache::Import::Caching.read(cache_key(username)) - end - - private - - def cache_key(username) - format(SOURCE_USER_CACHE_KEY, project_id, username) - end - end -end diff --git a/lib/gitlab/bitbucket_server_import/importers/users_importer.rb b/lib/gitlab/bitbucket_server_import/importers/users_importer.rb index 6298151e91b5ef..bd7e84a3b0c28e 100644 --- a/lib/gitlab/bitbucket_server_import/importers/users_importer.rb +++ b/lib/gitlab/bitbucket_server_import/importers/users_importer.rb @@ -5,14 +5,16 @@ module BitbucketServerImport module Importers class UsersImporter include Loggable + include UserCaching BATCH_SIZE = 100 def initialize(project) @project = project + @project_id = project.id end - attr_reader :project + attr_reader :project, :project_id def execute log_info(import_stage: 'import_users', message: 'starting') @@ -29,7 +31,7 @@ def execute break if users.empty? - users.each { |user| cache_source_user(user) } + cache_users(users) page += 1 end @@ -39,12 +41,13 @@ def execute private - def cache_source_user(user) - users_mapper.write_source_email_to_cache(user.username, user.email) - end + def cache_users(users) + users_hash = users.each_with_object({}) do |user, hash| + cache_key = source_user_cache_key(project_id, user.username) + hash[cache_key] = user.email + end - def users_mapper - @users_mapper ||= BitbucketServer::UsersMapper.new(project) + ::Gitlab::Cache::Import::Caching.write_multiple(users_hash) end def client diff --git a/lib/gitlab/bitbucket_server_import/user_caching.rb b/lib/gitlab/bitbucket_server_import/user_caching.rb new file mode 100644 index 00000000000000..0f0169122c513d --- /dev/null +++ b/lib/gitlab/bitbucket_server_import/user_caching.rb @@ -0,0 +1,13 @@ +# frozen_string_literal: true + +module Gitlab + module BitbucketServerImport + module UserCaching + SOURCE_USER_CACHE_KEY = 'bitbucket_server/project/%s/source/username/%s' + + def source_user_cache_key(project_id, username) + format(SOURCE_USER_CACHE_KEY, project_id, username) + end + end + end +end diff --git a/spec/lib/bitbucket_server/users_mapper_spec.rb b/spec/lib/bitbucket_server/users_mapper_spec.rb deleted file mode 100644 index 79bb3f343d7089..00000000000000 --- a/spec/lib/bitbucket_server/users_mapper_spec.rb +++ /dev/null @@ -1,38 +0,0 @@ -# frozen_string_literal: true - -require 'spec_helper' - -RSpec.describe BitbucketServer::UsersMapper, feature_category: :importers do - let_it_be(:project) { create(:project) } - let(:username) { 'test_user' } - let(:email) { 'test@email.com' } - - subject(:users_mapper) { described_class.new(project) } - - describe '#write_source_email_to_cache' do - it 'caches provided source username and email' do - expect(Gitlab::Cache::Import::Caching).to receive(:write) - .with("bitbucket_server/project/#{project.id}/source/username/#{username}", email) - - users_mapper.write_source_email_to_cache(username, email) - end - end - - describe '#read_source_email_from_cache', :clean_gitlab_redis_cache do - context 'when it has been cached' do - before do - users_mapper.write_source_email_to_cache(username, email) - end - - it 'returns the matching email' do - expect(users_mapper.read_source_email_from_cache(username)).to eq(email) - end - end - - context 'when it has not been cached' do - it 'returns the matching email' do - expect(users_mapper.read_source_email_from_cache(username)).to be_nil - end - end - end -end diff --git a/spec/lib/gitlab/bitbucket_server_import/importers/users_importer_spec.rb b/spec/lib/gitlab/bitbucket_server_import/importers/users_importer_spec.rb index 6bee47ea819a19..33d6ab94513640 100644 --- a/spec/lib/gitlab/bitbucket_server_import/importers/users_importer_spec.rb +++ b/spec/lib/gitlab/bitbucket_server_import/importers/users_importer_spec.rb @@ -3,7 +3,6 @@ require 'spec_helper' RSpec.describe Gitlab::BitbucketServerImport::Importers::UsersImporter, feature_category: :importers do - let(:users_mapper) { BitbucketServer::UsersMapper.new(project) } let(:logger) { Gitlab::BitbucketServerImport::Logger } let_it_be(:project) do @@ -27,14 +26,19 @@ ) end + let(:user_3) do + BitbucketServer::Representation::User.new( + { 'user' => { 'emailAddress' => 'email3', 'slug' => 'username3' } } + ) + end + before do - allow(BitbucketServer::UsersMapper).to receive(:new).and_return(users_mapper) - stub_const("#{described_class}::BATCH_SIZE", 1) + stub_const("#{described_class}::BATCH_SIZE", 2) allow_next_instance_of(BitbucketServer::Client) do |client| - allow(client).to receive(:users).with('key', limit: 1, page_offset: 1).and_return([user_1]) - allow(client).to receive(:users).with('key', limit: 1, page_offset: 2).and_return([user_2]) - allow(client).to receive(:users).with('key', limit: 1, page_offset: 3).and_return([]) + allow(client).to receive(:users).with('key', limit: 2, page_offset: 1).and_return([user_1, user_2]) + allow(client).to receive(:users).with('key', limit: 2, page_offset: 2).and_return([user_3]) + allow(client).to receive(:users).with('key', limit: 2, page_offset: 3).and_return([]) end end @@ -43,17 +47,19 @@ describe '#execute' do it 'writes the username and email to cache for every user in batches' do expect(logger).to receive(:info).with(hash_including(message: 'starting')) - expect(logger).to receive(:info).with(hash_including(message: 'importing page 1 using batch size 1')) - expect(logger).to receive(:info).with(hash_including(message: 'importing page 2 using batch size 1')) - expect(logger).to receive(:info).with(hash_including(message: 'importing page 3 using batch size 1')) + expect(logger).to receive(:info).with(hash_including(message: 'importing page 1 using batch size 2')) + expect(logger).to receive(:info).with(hash_including(message: 'importing page 2 using batch size 2')) + expect(logger).to receive(:info).with(hash_including(message: 'importing page 3 using batch size 2')) expect(logger).to receive(:info).with(hash_including(message: 'finished')) - expect(users_mapper).to receive(:write_source_email_to_cache).and_call_original.twice + expect(Gitlab::Cache::Import::Caching).to receive(:write_multiple).and_call_original.twice importer.execute - expect(users_mapper.read_source_email_from_cache('username1')).to eq('email1') - expect(users_mapper.read_source_email_from_cache('username2')).to eq('email2') + cache_key_prefix = "bitbucket_server/project/#{project.id}/source/username" + expect(Gitlab::Cache::Import::Caching.read("#{cache_key_prefix}/username1")).to eq('email1') + expect(Gitlab::Cache::Import::Caching.read("#{cache_key_prefix}/username2")).to eq('email2') + expect(Gitlab::Cache::Import::Caching.read("#{cache_key_prefix}/username3")).to eq('email3') end end end -- GitLab From 140d96d87cc9608416a7c03b16e41dbc02d9ff06 Mon Sep 17 00:00:00 2001 From: maddievn Date: Thu, 14 Dec 2023 12:34:26 +0200 Subject: [PATCH 06/11] Memoize client --- lib/gitlab/bitbucket_server_import/importers/users_importer.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/gitlab/bitbucket_server_import/importers/users_importer.rb b/lib/gitlab/bitbucket_server_import/importers/users_importer.rb index bd7e84a3b0c28e..f8d0521afb2611 100644 --- a/lib/gitlab/bitbucket_server_import/importers/users_importer.rb +++ b/lib/gitlab/bitbucket_server_import/importers/users_importer.rb @@ -51,7 +51,7 @@ def cache_users(users) end def client - BitbucketServer::Client.new(project.import_data.credentials) + @client ||= BitbucketServer::Client.new(project.import_data.credentials) end def project_key -- GitLab From ed697bcbe6acb9654ba653fc34cc9e9806e32c44 Mon Sep 17 00:00:00 2001 From: maddievn Date: Thu, 14 Dec 2023 13:35:30 +0200 Subject: [PATCH 07/11] Refactor PageCounter from GithubImport to Import Changelog: changed --- .rubocop_todo/rspec/verified_doubles.yml | 2 +- .../gitlab/github_gists_import/start_import_worker.rb | 2 +- lib/gitlab/github_gists_import/importer/gists_importer.rb | 2 +- .../importer/pull_requests/reviews_importer.rb | 2 +- lib/gitlab/github_import/parallel_scheduling.rb | 2 +- lib/gitlab/github_import/single_endpoint_notes_importing.rb | 2 +- lib/gitlab/{github_import => import}/page_counter.rb | 4 ++-- .../github_gists_import/importer/gists_importer_spec.rb | 4 ++-- .../importer/protected_branches_importer_spec.rb | 2 +- .../importer/pull_requests/reviews_importer_spec.rb | 2 +- .../importer/single_endpoint_diff_notes_importer_spec.rb | 2 +- .../importer/single_endpoint_issue_events_importer_spec.rb | 6 +++--- .../importer/single_endpoint_issue_notes_importer_spec.rb | 2 +- .../single_endpoint_merge_request_notes_importer_spec.rb | 2 +- .../gitlab/{github_import => import}/page_counter_spec.rb | 2 +- spec/support/rspec_order_todo.yml | 2 +- 16 files changed, 20 insertions(+), 20 deletions(-) rename lib/gitlab/{github_import => import}/page_counter.rb (88%) rename spec/lib/gitlab/{github_import => import}/page_counter_spec.rb (93%) diff --git a/.rubocop_todo/rspec/verified_doubles.yml b/.rubocop_todo/rspec/verified_doubles.yml index 24b70629ae4f21..b79312bd84f89a 100644 --- a/.rubocop_todo/rspec/verified_doubles.yml +++ b/.rubocop_todo/rspec/verified_doubles.yml @@ -537,7 +537,6 @@ RSpec/VerifiedDoubles: - 'spec/lib/gitlab/github_import/markdown_text_spec.rb' - 'spec/lib/gitlab/github_import/milestone_finder_spec.rb' - 'spec/lib/gitlab/github_import/object_counter_spec.rb' - - 'spec/lib/gitlab/github_import/page_counter_spec.rb' - 'spec/lib/gitlab/github_import/parallel_importer_spec.rb' - 'spec/lib/gitlab/github_import/parallel_scheduling_spec.rb' - 'spec/lib/gitlab/github_import/representation/diff_note_spec.rb' @@ -566,6 +565,7 @@ RSpec/VerifiedDoubles: - 'spec/lib/gitlab/hotlinking_detector_spec.rb' - 'spec/lib/gitlab/import/import_failure_service_spec.rb' - 'spec/lib/gitlab/import/metrics_spec.rb' + - 'spec/lib/gitlab/import/page_counter_spec.rb' - 'spec/lib/gitlab/import_export/attribute_cleaner_spec.rb' - 'spec/lib/gitlab/import_export/base/relation_factory_spec.rb' - 'spec/lib/gitlab/import_export/decompressed_archive_size_validator_spec.rb' diff --git a/app/workers/gitlab/github_gists_import/start_import_worker.rb b/app/workers/gitlab/github_gists_import/start_import_worker.rb index f7d3eb1d759f91..d6c637f6d49375 100644 --- a/app/workers/gitlab/github_gists_import/start_import_worker.rb +++ b/app/workers/gitlab/github_gists_import/start_import_worker.rb @@ -17,7 +17,7 @@ class StartImportWorker # rubocop:disable Scalability/IdempotentWorker Gitlab::GithubGistsImport::Status.new(msg['args'][0]).fail! user = User.find(msg['args'][0]) - Gitlab::GithubImport::PageCounter.new(user, :gists, 'github-gists-importer').expire! + Gitlab::Import::PageCounter.new(user, :gists, 'github-gists-importer').expire! end def perform(user_id, encrypted_token) diff --git a/lib/gitlab/github_gists_import/importer/gists_importer.rb b/lib/gitlab/github_gists_import/importer/gists_importer.rb index 08744dbaf5f626..4ed6b2db5f8fae 100644 --- a/lib/gitlab/github_gists_import/importer/gists_importer.rb +++ b/lib/gitlab/github_gists_import/importer/gists_importer.rb @@ -39,7 +39,7 @@ def spread_parallel_import end def fetch_gists_to_import - page_counter = Gitlab::GithubImport::PageCounter.new(user, :gists, 'github-gists-importer') + page_counter = Gitlab::Import::PageCounter.new(user, :gists, 'github-gists-importer') collection = [] client.each_page(:gists, nil, page: page_counter.current) do |page| diff --git a/lib/gitlab/github_import/importer/pull_requests/reviews_importer.rb b/lib/gitlab/github_import/importer/pull_requests/reviews_importer.rb index 347423b0e21c54..62c9e6469d74f0 100644 --- a/lib/gitlab/github_import/importer/pull_requests/reviews_importer.rb +++ b/lib/gitlab/github_import/importer/pull_requests/reviews_importer.rb @@ -72,7 +72,7 @@ def each_review_page merge_requests_to_import.find_each do |merge_request| # The page counter needs to be scoped by merge request to avoid skipping # pages of reviews from already imported merge requests. - page_counter = PageCounter.new(project, page_counter_id(merge_request)) + page_counter = Gitlab::Import::PageCounter.new(project, page_counter_id(merge_request)) repo = project.import_source options = collection_options.merge(page: page_counter.current) diff --git a/lib/gitlab/github_import/parallel_scheduling.rb b/lib/gitlab/github_import/parallel_scheduling.rb index ce93b5203df6b3..da685d41140b5f 100644 --- a/lib/gitlab/github_import/parallel_scheduling.rb +++ b/lib/gitlab/github_import/parallel_scheduling.rb @@ -25,7 +25,7 @@ def initialize(project, client, parallel: true) @project = project @client = client @parallel = parallel - @page_counter = PageCounter.new(project, collection_method) + @page_counter = Gitlab::Import::PageCounter.new(project, collection_method) @already_imported_cache_key = format(ALREADY_IMPORTED_CACHE_KEY, project: project.id, collection: collection_method) @job_waiter_cache_key = format(JOB_WAITER_CACHE_KEY, project: project.id, collection: collection_method) diff --git a/lib/gitlab/github_import/single_endpoint_notes_importing.rb b/lib/gitlab/github_import/single_endpoint_notes_importing.rb index 3584288da57310..e2eb2671636ac3 100644 --- a/lib/gitlab/github_import/single_endpoint_notes_importing.rb +++ b/lib/gitlab/github_import/single_endpoint_notes_importing.rb @@ -75,7 +75,7 @@ def process_batch(batch) batch.each do |parent_record| # The page counter needs to be scoped by parent_record to avoid skipping # pages of notes from already imported parent_record. - page_counter = PageCounter.new(project, page_counter_id(parent_record)) + page_counter = Gitlab::Import::PageCounter.new(project, page_counter_id(parent_record)) repo = project.import_source options = collection_options.merge(page: page_counter.current) diff --git a/lib/gitlab/github_import/page_counter.rb b/lib/gitlab/import/page_counter.rb similarity index 88% rename from lib/gitlab/github_import/page_counter.rb rename to lib/gitlab/import/page_counter.rb index c238ccb893214a..44ab9c256d796d 100644 --- a/lib/gitlab/github_import/page_counter.rb +++ b/lib/gitlab/import/page_counter.rb @@ -1,7 +1,7 @@ # frozen_string_literal: true module Gitlab - module GithubImport + module Import # PageCounter can be used to keep track of the last imported page of a # collection, allowing workers to resume where they left off in the event of # an error. @@ -12,7 +12,7 @@ class PageCounter CACHE_KEY = '%{import_type}/page-counter/%{object}/%{collection}' def initialize(object, collection, import_type = 'github-importer') - @cache_key = CACHE_KEY % { import_type: import_type, object: object.id, collection: collection } + @cache_key = format(CACHE_KEY, import_type: import_type, object: object.id, collection: collection) end # Sets the page number to the given value. diff --git a/spec/lib/gitlab/github_gists_import/importer/gists_importer_spec.rb b/spec/lib/gitlab/github_gists_import/importer/gists_importer_spec.rb index d555a847ea54a3..b6a57ef8b57ca0 100644 --- a/spec/lib/gitlab/github_gists_import/importer/gists_importer_spec.rb +++ b/spec/lib/gitlab/github_gists_import/importer/gists_importer_spec.rb @@ -8,7 +8,7 @@ let_it_be(:user) { create(:user) } let(:client) { instance_double('Gitlab::GithubImport::Client', rate_limit_resets_in: 5) } let(:token) { 'token' } - let(:page_counter) { instance_double('Gitlab::GithubImport::PageCounter', current: 1, set: true, expire!: true) } + let(:page_counter) { instance_double('Gitlab::Import::PageCounter', current: 1, set: true, expire!: true) } let(:page) { instance_double('Gitlab::GithubImport::Client::Page', objects: [gist], number: 1) } let(:url) { 'https://gist.github.com/foo/bar.git' } let(:waiter) { Gitlab::JobWaiter.new(0, 'some-job-key') } @@ -62,7 +62,7 @@ .with(token, parallel: true) .and_return(client) - allow(Gitlab::GithubImport::PageCounter) + allow(Gitlab::Import::PageCounter) .to receive(:new) .with(user, :gists, 'github-gists-importer') .and_return(page_counter) diff --git a/spec/lib/gitlab/github_import/importer/protected_branches_importer_spec.rb b/spec/lib/gitlab/github_import/importer/protected_branches_importer_spec.rb index e30f3f34de1671..7aa72e6cef1237 100644 --- a/spec/lib/gitlab/github_import/importer/protected_branches_importer_spec.rb +++ b/spec/lib/gitlab/github_import/importer/protected_branches_importer_spec.rb @@ -165,7 +165,7 @@ # when user has no admin rights on repo let(:unknown_protection_branch) { branch_struct.new(name: 'development', protection: nil) } - let(:page_counter) { instance_double(Gitlab::GithubImport::PageCounter) } + let(:page_counter) { instance_double(Gitlab::Import::PageCounter) } before do allow(client).to receive(:branches).with(project.import_source) diff --git a/spec/lib/gitlab/github_import/importer/pull_requests/reviews_importer_spec.rb b/spec/lib/gitlab/github_import/importer/pull_requests/reviews_importer_spec.rb index 4321997815a333..12a172288d481e 100644 --- a/spec/lib/gitlab/github_import/importer/pull_requests/reviews_importer_spec.rb +++ b/spec/lib/gitlab/github_import/importer/pull_requests/reviews_importer_spec.rb @@ -65,7 +65,7 @@ end it 'skips cached pages' do - Gitlab::GithubImport::PageCounter + Gitlab::Import::PageCounter .new(project, "merge_request/#{merge_request.id}/pull_request_reviews") .set(2) diff --git a/spec/lib/gitlab/github_import/importer/single_endpoint_diff_notes_importer_spec.rb b/spec/lib/gitlab/github_import/importer/single_endpoint_diff_notes_importer_spec.rb index 081d08edfb3ff2..1d309db3beece4 100644 --- a/spec/lib/gitlab/github_import/importer/single_endpoint_diff_notes_importer_spec.rb +++ b/spec/lib/gitlab/github_import/importer/single_endpoint_diff_notes_importer_spec.rb @@ -49,7 +49,7 @@ end it 'skips cached pages' do - Gitlab::GithubImport::PageCounter + Gitlab::Import::PageCounter .new(project, "merge_request/#{merge_request.id}/pull_request_comments") .set(2) diff --git a/spec/lib/gitlab/github_import/importer/single_endpoint_issue_events_importer_spec.rb b/spec/lib/gitlab/github_import/importer/single_endpoint_issue_events_importer_spec.rb index 8f4d62e53d6a33..d8fefab2bf0c50 100644 --- a/spec/lib/gitlab/github_import/importer/single_endpoint_issue_events_importer_spec.rb +++ b/spec/lib/gitlab/github_import/importer/single_endpoint_issue_events_importer_spec.rb @@ -98,7 +98,7 @@ ) end - let(:page_counter) { instance_double(Gitlab::GithubImport::PageCounter) } + let(:page_counter) { instance_double(Gitlab::Import::PageCounter) } before do allow(client).to receive(:each_page) @@ -156,7 +156,7 @@ end it 'triggers page number increment' do - expect(Gitlab::GithubImport::PageCounter) + expect(Gitlab::Import::PageCounter) .to receive(:new).with(project, 'issues/1/issue_timeline') .and_return(page_counter) expect(page_counter).to receive(:current).and_return(1) @@ -170,7 +170,7 @@ context 'when page is already processed' do before do - page_counter = Gitlab::GithubImport::PageCounter.new( + page_counter = Gitlab::Import::PageCounter.new( project, subject.page_counter_id(issuable) ) page_counter.set(page.number) diff --git a/spec/lib/gitlab/github_import/importer/single_endpoint_issue_notes_importer_spec.rb b/spec/lib/gitlab/github_import/importer/single_endpoint_issue_notes_importer_spec.rb index e1f65546e1d517..1032f9a61fd688 100644 --- a/spec/lib/gitlab/github_import/importer/single_endpoint_issue_notes_importer_spec.rb +++ b/spec/lib/gitlab/github_import/importer/single_endpoint_issue_notes_importer_spec.rb @@ -48,7 +48,7 @@ end it 'skips cached pages' do - Gitlab::GithubImport::PageCounter + Gitlab::Import::PageCounter .new(project, "issue/#{issue.id}/issue_comments") .set(2) diff --git a/spec/lib/gitlab/github_import/importer/single_endpoint_merge_request_notes_importer_spec.rb b/spec/lib/gitlab/github_import/importer/single_endpoint_merge_request_notes_importer_spec.rb index 5523b97acc3fea..b9e9e9efcb5ff5 100644 --- a/spec/lib/gitlab/github_import/importer/single_endpoint_merge_request_notes_importer_spec.rb +++ b/spec/lib/gitlab/github_import/importer/single_endpoint_merge_request_notes_importer_spec.rb @@ -49,7 +49,7 @@ end it 'skips cached pages' do - Gitlab::GithubImport::PageCounter + Gitlab::Import::PageCounter .new(project, "merge_request/#{merge_request.id}/issue_comments") .set(2) diff --git a/spec/lib/gitlab/github_import/page_counter_spec.rb b/spec/lib/gitlab/import/page_counter_spec.rb similarity index 93% rename from spec/lib/gitlab/github_import/page_counter_spec.rb rename to spec/lib/gitlab/import/page_counter_spec.rb index ddb62cc8fad7c0..cebbeb2350c444 100644 --- a/spec/lib/gitlab/github_import/page_counter_spec.rb +++ b/spec/lib/gitlab/import/page_counter_spec.rb @@ -2,7 +2,7 @@ require 'spec_helper' -RSpec.describe Gitlab::GithubImport::PageCounter, :clean_gitlab_redis_cache, feature_category: :importers do +RSpec.describe Gitlab::Import::PageCounter, :clean_gitlab_redis_cache, feature_category: :importers do let(:project) { double(:project, id: 1) } let(:counter) { described_class.new(project, :issues) } diff --git a/spec/support/rspec_order_todo.yml b/spec/support/rspec_order_todo.yml index 015f63cbf45bdf..63f11a851242ef 100644 --- a/spec/support/rspec_order_todo.yml +++ b/spec/support/rspec_order_todo.yml @@ -6131,7 +6131,6 @@ - './spec/lib/gitlab/github_import/markdown_text_spec.rb' - './spec/lib/gitlab/github_import/milestone_finder_spec.rb' - './spec/lib/gitlab/github_import/object_counter_spec.rb' -- './spec/lib/gitlab/github_import/page_counter_spec.rb' - './spec/lib/gitlab/github_import/parallel_importer_spec.rb' - './spec/lib/gitlab/github_import/parallel_scheduling_spec.rb' - './spec/lib/gitlab/github_import/representation/diff_note_spec.rb' @@ -6259,6 +6258,7 @@ - './spec/lib/gitlab/i18n/translation_entry_spec.rb' - './spec/lib/gitlab/identifier_spec.rb' - './spec/lib/gitlab/import/database_helpers_spec.rb' +- './spec/lib/gitlab/import/page_counter_spec.rb' - './spec/lib/gitlab/import_export/after_export_strategies/base_after_export_strategy_spec.rb' - './spec/lib/gitlab/import_export/after_export_strategies/web_upload_strategy_spec.rb' - './spec/lib/gitlab/import_export/after_export_strategy_builder_spec.rb' -- GitLab From fe2ceff6b911ee7181d32ba78287ede8b1c409d1 Mon Sep 17 00:00:00 2001 From: maddievn Date: Mon, 18 Dec 2023 11:51:20 +0200 Subject: [PATCH 08/11] Add PageCounter for UsersImporter --- .../importers/users_importer.rb | 16 +++++++++++----- lib/gitlab/import/page_counter.rb | 4 ++++ .../importers/users_importer_spec.rb | 6 ++++++ 3 files changed, 21 insertions(+), 5 deletions(-) diff --git a/lib/gitlab/bitbucket_server_import/importers/users_importer.rb b/lib/gitlab/bitbucket_server_import/importers/users_importer.rb index f8d0521afb2611..2f53faa3e72f6e 100644 --- a/lib/gitlab/bitbucket_server_import/importers/users_importer.rb +++ b/lib/gitlab/bitbucket_server_import/importers/users_importer.rb @@ -19,23 +19,25 @@ def initialize(project) def execute log_info(import_stage: 'import_users', message: 'starting') - page = 1 - loop do + current = page_counter.current + log_info( import_stage: 'import_users', - message: "importing page #{page} using batch size #{BATCH_SIZE}" + message: "importing page #{current} using batch size #{BATCH_SIZE}" ) - users = client.users(project_key, page_offset: page, limit: BATCH_SIZE).to_a + users = client.users(project_key, page_offset: current, limit: BATCH_SIZE).to_a break if users.empty? cache_users(users) - page += 1 + page_counter.increment!(current) end + page_counter.expire! + log_info(import_stage: 'import_users', message: 'finished') end @@ -57,6 +59,10 @@ def client def project_key project.import_data.data['project_key'] end + + def page_counter + @page_counter ||= Gitlab::Import::PageCounter.new(project, :users, 'bitbucket-server-importer') + end end end end diff --git a/lib/gitlab/import/page_counter.rb b/lib/gitlab/import/page_counter.rb index 44ab9c256d796d..c9c2a4cd7dac29 100644 --- a/lib/gitlab/import/page_counter.rb +++ b/lib/gitlab/import/page_counter.rb @@ -30,6 +30,10 @@ def current def expire! Gitlab::Cache::Import::Caching.expire(cache_key, 0) end + + def increment!(value) + Gitlab::Cache::Import::Caching.write(cache_key, value + 1) + end end end end diff --git a/spec/lib/gitlab/bitbucket_server_import/importers/users_importer_spec.rb b/spec/lib/gitlab/bitbucket_server_import/importers/users_importer_spec.rb index 33d6ab94513640..52c6f8bfb302cd 100644 --- a/spec/lib/gitlab/bitbucket_server_import/importers/users_importer_spec.rb +++ b/spec/lib/gitlab/bitbucket_server_import/importers/users_importer_spec.rb @@ -52,6 +52,12 @@ expect(logger).to receive(:info).with(hash_including(message: 'importing page 3 using batch size 2')) expect(logger).to receive(:info).with(hash_including(message: 'finished')) + expect_next_instance_of(Gitlab::Import::PageCounter) do |page_counter| + expect(page_counter).to receive(:current).and_call_original.exactly(3).times + expect(page_counter).to receive(:increment!).and_call_original.twice + expect(page_counter).to receive(:expire!).and_call_original.once + end + expect(Gitlab::Cache::Import::Caching).to receive(:write_multiple).and_call_original.twice importer.execute -- GitLab From a636e8098b7e8d1badf83d40d518d691e1b9cdeb Mon Sep 17 00:00:00 2001 From: maddievn Date: Mon, 18 Dec 2023 12:39:17 +0200 Subject: [PATCH 09/11] Make worker idempotent --- app/workers/all_queues.yml | 2 +- .../stage/import_users_worker.rb | 4 +++- .../stage/import_users_worker_spec.rb | 15 ++++++++++++++- 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/app/workers/all_queues.yml b/app/workers/all_queues.yml index 63cc855e4bd9ad..a07e05f5d8e76a 100644 --- a/app/workers/all_queues.yml +++ b/app/workers/all_queues.yml @@ -2584,7 +2584,7 @@ :urgency: :low :resource_boundary: :unknown :weight: 1 - :idempotent: false + :idempotent: true :tags: [] - :name: bulk_import :worker_name: BulkImportWorker diff --git a/app/workers/gitlab/bitbucket_server_import/stage/import_users_worker.rb b/app/workers/gitlab/bitbucket_server_import/stage/import_users_worker.rb index dd18139fc9e3dc..4c323a11755687 100644 --- a/app/workers/gitlab/bitbucket_server_import/stage/import_users_worker.rb +++ b/app/workers/gitlab/bitbucket_server_import/stage/import_users_worker.rb @@ -3,9 +3,11 @@ module Gitlab module BitbucketServerImport module Stage - class ImportUsersWorker # rubocop:disable Scalability/IdempotentWorker -- ImportPullRequestsWorker is not idempotent + class ImportUsersWorker include StageMethods + idempotent! + private def import(project) diff --git a/spec/workers/gitlab/bitbucket_server_import/stage/import_users_worker_spec.rb b/spec/workers/gitlab/bitbucket_server_import/stage/import_users_worker_spec.rb index d4cd1b82349529..1141d08729d356 100644 --- a/spec/workers/gitlab/bitbucket_server_import/stage/import_users_worker_spec.rb +++ b/spec/workers/gitlab/bitbucket_server_import/stage/import_users_worker_spec.rb @@ -3,7 +3,14 @@ require 'spec_helper' RSpec.describe Gitlab::BitbucketServerImport::Stage::ImportUsersWorker, feature_category: :importers do - let_it_be(:project) { create(:project, :import_started) } + let_it_be(:project) do + create(:project, :import_started, + import_data_attributes: { + data: { 'project_key' => 'key', 'repo_slug' => 'slug' }, + credentials: { 'base_uri' => 'http://bitbucket.org/', 'user' => 'bitbucket', 'password' => 'password' } + } + ) + end let(:worker) { described_class.new } @@ -15,6 +22,12 @@ allow_next_instance_of(Gitlab::BitbucketServerImport::Importers::UsersImporter) do |importer| allow(importer).to receive(:execute) end + + allow(Gitlab::BitbucketServerImport::Stage::ImportPullRequestsWorker).to receive(:perform_async).and_return(nil) + end + + it_behaves_like 'an idempotent worker' do + let(:job_args) { [project.id] } end it 'schedules the next stage' do -- GitLab From 0e95e7c9890e4d8288e62ba069b28fa1725c7fbc Mon Sep 17 00:00:00 2001 From: maddievn Date: Tue, 19 Dec 2023 10:16:15 +0200 Subject: [PATCH 10/11] Apply reviewer comments --- .rubocop_todo/rspec/verified_doubles.yml | 1 - .../importers/users_importer.rb | 2 +- lib/gitlab/import/page_counter.rb | 4 +-- .../importers/users_importer_spec.rb | 2 +- spec/lib/gitlab/import/page_counter_spec.rb | 25 ++++++++++++++++++- 5 files changed, 28 insertions(+), 6 deletions(-) diff --git a/.rubocop_todo/rspec/verified_doubles.yml b/.rubocop_todo/rspec/verified_doubles.yml index 9e933d9c70b104..4824f2007d3200 100644 --- a/.rubocop_todo/rspec/verified_doubles.yml +++ b/.rubocop_todo/rspec/verified_doubles.yml @@ -564,7 +564,6 @@ RSpec/VerifiedDoubles: - 'spec/lib/gitlab/hotlinking_detector_spec.rb' - 'spec/lib/gitlab/import/import_failure_service_spec.rb' - 'spec/lib/gitlab/import/metrics_spec.rb' - - 'spec/lib/gitlab/import/page_counter_spec.rb' - 'spec/lib/gitlab/import_export/attribute_cleaner_spec.rb' - 'spec/lib/gitlab/import_export/base/relation_factory_spec.rb' - 'spec/lib/gitlab/import_export/decompressed_archive_size_validator_spec.rb' diff --git a/lib/gitlab/bitbucket_server_import/importers/users_importer.rb b/lib/gitlab/bitbucket_server_import/importers/users_importer.rb index 2f53faa3e72f6e..87db0b3793a0c8 100644 --- a/lib/gitlab/bitbucket_server_import/importers/users_importer.rb +++ b/lib/gitlab/bitbucket_server_import/importers/users_importer.rb @@ -33,7 +33,7 @@ def execute cache_users(users) - page_counter.increment!(current) + page_counter.increment! end page_counter.expire! diff --git a/lib/gitlab/import/page_counter.rb b/lib/gitlab/import/page_counter.rb index c9c2a4cd7dac29..b6e18926681ea2 100644 --- a/lib/gitlab/import/page_counter.rb +++ b/lib/gitlab/import/page_counter.rb @@ -31,8 +31,8 @@ def expire! Gitlab::Cache::Import::Caching.expire(cache_key, 0) end - def increment!(value) - Gitlab::Cache::Import::Caching.write(cache_key, value + 1) + def increment! + set(current + 1) end end end diff --git a/spec/lib/gitlab/bitbucket_server_import/importers/users_importer_spec.rb b/spec/lib/gitlab/bitbucket_server_import/importers/users_importer_spec.rb index 52c6f8bfb302cd..2614f7e2453d7c 100644 --- a/spec/lib/gitlab/bitbucket_server_import/importers/users_importer_spec.rb +++ b/spec/lib/gitlab/bitbucket_server_import/importers/users_importer_spec.rb @@ -53,7 +53,7 @@ expect(logger).to receive(:info).with(hash_including(message: 'finished')) expect_next_instance_of(Gitlab::Import::PageCounter) do |page_counter| - expect(page_counter).to receive(:current).and_call_original.exactly(3).times + expect(page_counter).to receive(:current).and_call_original.exactly(5).times expect(page_counter).to receive(:increment!).and_call_original.twice expect(page_counter).to receive(:expire!).and_call_original.once end diff --git a/spec/lib/gitlab/import/page_counter_spec.rb b/spec/lib/gitlab/import/page_counter_spec.rb index cebbeb2350c444..332fd4461cc54c 100644 --- a/spec/lib/gitlab/import/page_counter_spec.rb +++ b/spec/lib/gitlab/import/page_counter_spec.rb @@ -3,7 +3,7 @@ require 'spec_helper' RSpec.describe Gitlab::Import::PageCounter, :clean_gitlab_redis_cache, feature_category: :importers do - let(:project) { double(:project, id: 1) } + let(:project) { instance_double(Project, id: 1) } let(:counter) { described_class.new(project, :issues) } describe '#initialize' do @@ -52,4 +52,27 @@ expect(counter.current).to eq(1) end end + + describe '#increment!' do + it 'expires the current page counter' do + counter.set(2) + + counter.increment! + + expect(Gitlab::Cache::Import::Caching.read_integer(counter.cache_key)).to eq(3) + expect(counter.current).to eq(3) + end + + context 'when the cache is empty at first' do + it 'increments from 1 to 2' do + expect(Gitlab::Cache::Import::Caching.read_integer(counter.cache_key)).to be_nil + expect(counter.current).to eq(1) + + counter.increment! + + expect(Gitlab::Cache::Import::Caching.read_integer(counter.cache_key)).to eq(2) + expect(counter.current).to eq(2) + end + end + end end -- GitLab From 36e181685021263597804981a7c8c1a0a1003e88 Mon Sep 17 00:00:00 2001 From: maddievn Date: Wed, 20 Dec 2023 10:02:43 +0200 Subject: [PATCH 11/11] Apply reviewer feedback --- .../importers/users_importer.rb | 7 +++--- lib/gitlab/import/page_counter.rb | 4 ---- .../importers/users_importer_spec.rb | 5 ++-- spec/lib/gitlab/import/page_counter_spec.rb | 23 ------------------- 4 files changed, 7 insertions(+), 32 deletions(-) diff --git a/lib/gitlab/bitbucket_server_import/importers/users_importer.rb b/lib/gitlab/bitbucket_server_import/importers/users_importer.rb index 87db0b3793a0c8..fc96f894a998bd 100644 --- a/lib/gitlab/bitbucket_server_import/importers/users_importer.rb +++ b/lib/gitlab/bitbucket_server_import/importers/users_importer.rb @@ -19,9 +19,9 @@ def initialize(project) def execute log_info(import_stage: 'import_users', message: 'starting') - loop do - current = page_counter.current + current = page_counter.current + loop do log_info( import_stage: 'import_users', message: "importing page #{current} using batch size #{BATCH_SIZE}" @@ -33,7 +33,8 @@ def execute cache_users(users) - page_counter.increment! + current += 1 + page_counter.set(current) end page_counter.expire! diff --git a/lib/gitlab/import/page_counter.rb b/lib/gitlab/import/page_counter.rb index b6e18926681ea2..44ab9c256d796d 100644 --- a/lib/gitlab/import/page_counter.rb +++ b/lib/gitlab/import/page_counter.rb @@ -30,10 +30,6 @@ def current def expire! Gitlab::Cache::Import::Caching.expire(cache_key, 0) end - - def increment! - set(current + 1) - end end end end diff --git a/spec/lib/gitlab/bitbucket_server_import/importers/users_importer_spec.rb b/spec/lib/gitlab/bitbucket_server_import/importers/users_importer_spec.rb index 2614f7e2453d7c..79010390628b67 100644 --- a/spec/lib/gitlab/bitbucket_server_import/importers/users_importer_spec.rb +++ b/spec/lib/gitlab/bitbucket_server_import/importers/users_importer_spec.rb @@ -53,8 +53,9 @@ expect(logger).to receive(:info).with(hash_including(message: 'finished')) expect_next_instance_of(Gitlab::Import::PageCounter) do |page_counter| - expect(page_counter).to receive(:current).and_call_original.exactly(5).times - expect(page_counter).to receive(:increment!).and_call_original.twice + expect(page_counter).to receive(:current).and_call_original.once + expect(page_counter).to receive(:set).with(2).and_call_original.once + expect(page_counter).to receive(:set).with(3).and_call_original.once expect(page_counter).to receive(:expire!).and_call_original.once end diff --git a/spec/lib/gitlab/import/page_counter_spec.rb b/spec/lib/gitlab/import/page_counter_spec.rb index 332fd4461cc54c..a7a4e301aa30c8 100644 --- a/spec/lib/gitlab/import/page_counter_spec.rb +++ b/spec/lib/gitlab/import/page_counter_spec.rb @@ -52,27 +52,4 @@ expect(counter.current).to eq(1) end end - - describe '#increment!' do - it 'expires the current page counter' do - counter.set(2) - - counter.increment! - - expect(Gitlab::Cache::Import::Caching.read_integer(counter.cache_key)).to eq(3) - expect(counter.current).to eq(3) - end - - context 'when the cache is empty at first' do - it 'increments from 1 to 2' do - expect(Gitlab::Cache::Import::Caching.read_integer(counter.cache_key)).to be_nil - expect(counter.current).to eq(1) - - counter.increment! - - expect(Gitlab::Cache::Import::Caching.read_integer(counter.cache_key)).to eq(2) - expect(counter.current).to eq(2) - end - end - end end -- GitLab