From 26ae1a7237657e5774d7894392ffca7b32cb1365 Mon Sep 17 00:00:00 2001 From: Kassio Borges Date: Thu, 29 Apr 2021 21:19:12 +0100 Subject: [PATCH] Github Importer: Add Cache to Pull Request Reviews importer Github importer have a mechanism to re-schedule an Importer job if it gets throttled. But, repositories with large number of pull requests, were in a _throttling loop_ for the Pull Request Reviews Importer. In other words, when re-scheduled, the job was re-starting importing reviews to all imported Pull Requests, regardless if already imported or not, causing another throttling. To fix this, already imported Pull Requests ids are being cached and skipped, this way, when the job re-start, we don't do requests related to already imported Pull Requests. The same is applied to the Pull Request MergedBy importer. Related to: https://gitlab.com/gitlab-org/gitlab/-/issues/329552 Changelog: changed MR: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/60668 --- ...b-importer-add-missing-importes-caches.yml | 5 +++++ lib/gitlab/github_import/client.rb | 2 +- .../pull_requests_merged_by_importer.rb | 8 ++++++-- .../pull_requests_reviews_importer.rb | 19 ++++++++++++------- spec/lib/gitlab/github_import/client_spec.rb | 5 +++-- .../pull_requests_merged_by_importer_spec.rb | 13 +++++++++---- .../pull_requests_reviews_importer_spec.rb | 18 ++++++++++++++---- 7 files changed, 50 insertions(+), 20 deletions(-) create mode 100644 changelogs/unreleased/kassio-github-importer-add-missing-importes-caches.yml diff --git a/changelogs/unreleased/kassio-github-importer-add-missing-importes-caches.yml b/changelogs/unreleased/kassio-github-importer-add-missing-importes-caches.yml new file mode 100644 index 00000000000000..d949cba7a2d65e --- /dev/null +++ b/changelogs/unreleased/kassio-github-importer-add-missing-importes-caches.yml @@ -0,0 +1,5 @@ +--- +title: 'Github Importer: Add Cache to Pull Request Reviews importer' +merge_request: 60668 +author: +type: changed diff --git a/lib/gitlab/github_import/client.rb b/lib/gitlab/github_import/client.rb index 328f1f742c51d9..138716b1b53bf4 100644 --- a/lib/gitlab/github_import/client.rb +++ b/lib/gitlab/github_import/client.rb @@ -70,7 +70,7 @@ def user(username) end def pull_request_reviews(repo_name, iid) - with_rate_limit { octokit.pull_request_reviews(repo_name, iid) } + each_object(:pull_request_reviews, repo_name, iid) end # Returns the details of a GitHub repository. diff --git a/lib/gitlab/github_import/importer/pull_requests_merged_by_importer.rb b/lib/gitlab/github_import/importer/pull_requests_merged_by_importer.rb index 466288fde4c88c..94472cd341eb2a 100644 --- a/lib/gitlab/github_import/importer/pull_requests_merged_by_importer.rb +++ b/lib/gitlab/github_import/importer/pull_requests_merged_by_importer.rb @@ -22,14 +22,18 @@ def collection_method :pull_requests_merged_by end - def id_for_already_imported_cache(pr) - pr.number + def id_for_already_imported_cache(merge_request) + merge_request.id end def each_object_to_import project.merge_requests.with_state(:merged).find_each do |merge_request| + next if already_imported?(merge_request) + pull_request = client.pull_request(project.import_source, merge_request.iid) yield(pull_request) + + mark_as_imported(merge_request) end end end diff --git a/lib/gitlab/github_import/importer/pull_requests_reviews_importer.rb b/lib/gitlab/github_import/importer/pull_requests_reviews_importer.rb index 6d1b588f0e01b7..827027203ff40f 100644 --- a/lib/gitlab/github_import/importer/pull_requests_reviews_importer.rb +++ b/lib/gitlab/github_import/importer/pull_requests_reviews_importer.rb @@ -22,17 +22,22 @@ def collection_method :pull_request_reviews end - def id_for_already_imported_cache(review) - review.github_id + def id_for_already_imported_cache(merge_request) + merge_request.id end def each_object_to_import project.merge_requests.find_each do |merge_request| - reviews = client.pull_request_reviews(project.import_source, merge_request.iid) - reviews.each do |review| - review.merge_request_id = merge_request.id - yield(review) - end + next if already_imported?(merge_request) + + client + .pull_request_reviews(project.import_source, merge_request.iid) + .each do |review| + review.merge_request_id = merge_request.id + yield(review) + end + + mark_as_imported(merge_request) end end end diff --git a/spec/lib/gitlab/github_import/client_spec.rb b/spec/lib/gitlab/github_import/client_spec.rb index 4000e0b2611514..194dfb228ee464 100644 --- a/spec/lib/gitlab/github_import/client_spec.rb +++ b/spec/lib/gitlab/github_import/client_spec.rb @@ -32,8 +32,9 @@ it 'returns the pull request reviews' do client = described_class.new('foo') - expect(client.octokit).to receive(:pull_request_reviews).with('foo/bar', 999) - expect(client).to receive(:with_rate_limit).and_yield + expect(client) + .to receive(:each_object) + .with(:pull_request_reviews, 'foo/bar', 999) client.pull_request_reviews('foo/bar', 999) end diff --git a/spec/lib/gitlab/github_import/importer/pull_requests_merged_by_importer_spec.rb b/spec/lib/gitlab/github_import/importer/pull_requests_merged_by_importer_spec.rb index b859cc727a668b..4a47d103cde507 100644 --- a/spec/lib/gitlab/github_import/importer/pull_requests_merged_by_importer_spec.rb +++ b/spec/lib/gitlab/github_import/importer/pull_requests_merged_by_importer_spec.rb @@ -23,12 +23,11 @@ end describe '#id_for_already_imported_cache' do - it { expect(subject.id_for_already_imported_cache(double(number: 1))).to eq(1) } + it { expect(subject.id_for_already_imported_cache(double(id: 1))).to eq(1) } end - describe '#each_object_to_import' do + describe '#each_object_to_import', :clean_gitlab_redis_cache do it 'fetchs the merged pull requests data' do - pull_request = double create( :merged_merge_request, iid: 999, @@ -36,12 +35,18 @@ target_project: project ) + pull_request = double + allow(client) .to receive(:pull_request) + .exactly(:once) # ensure to be cached on the second call .with('http://somegithub.com', 999) .and_return(pull_request) - expect { |b| subject.each_object_to_import(&b) }.to yield_with_args(pull_request) + expect { |b| subject.each_object_to_import(&b) } + .to yield_with_args(pull_request) + + subject.each_object_to_import {} end end end diff --git a/spec/lib/gitlab/github_import/importer/pull_requests_reviews_importer_spec.rb b/spec/lib/gitlab/github_import/importer/pull_requests_reviews_importer_spec.rb index 5e2302f96625ca..f18064f10aab7d 100644 --- a/spec/lib/gitlab/github_import/importer/pull_requests_reviews_importer_spec.rb +++ b/spec/lib/gitlab/github_import/importer/pull_requests_reviews_importer_spec.rb @@ -23,12 +23,18 @@ end describe '#id_for_already_imported_cache' do - it { expect(subject.id_for_already_imported_cache(double(github_id: 1))).to eq(1) } + it { expect(subject.id_for_already_imported_cache(double(id: 1))).to eq(1) } end - describe '#each_object_to_import' do + describe '#each_object_to_import', :clean_gitlab_redis_cache do it 'fetchs the merged pull requests data' do - merge_request = create(:merge_request, source_project: project) + merge_request = create( + :merged_merge_request, + iid: 999, + source_project: project, + target_project: project + ) + review = double expect(review) @@ -37,10 +43,14 @@ allow(client) .to receive(:pull_request_reviews) + .exactly(:once) # ensure to be cached on the second call .with('github/repo', merge_request.iid) .and_return([review]) - expect { |b| subject.each_object_to_import(&b) }.to yield_with_args(review) + expect { |b| subject.each_object_to_import(&b) } + .to yield_with_args(review) + + subject.each_object_to_import {} end end end -- GitLab