From 3c61372ce52f7c630cc6028d97d614892c0b599a Mon Sep 17 00:00:00 2001 From: Luke Duncalfe Date: Tue, 24 Oct 2023 14:45:02 +1300 Subject: [PATCH 1/4] Increase interruption retries for GitHub workers https://gitlab.com/gitlab-org/gitlab/-/issues/416777 When the Sidekiq process is asked to stop it will wait for a period of time for a running job to finish, before forcefully terminating and requeuing the job if it does not finish within this time. Normally Sidekiq will do this over and over for a job indefinitely. The `reliable-fetcher` gem puts a limit on the number of times a job will be terminated and requeued to 3 times. After this the job will not be requeued when terminated. This offers protection from jobs that can never complete in time between Sidekiq restarts (for example, the time between deploys). Many of our GitHub importer "stage" workers: - can work for a very long time as they fetch all the data from GitHub to be imported - pick up from where they left off when interrupted For these "stage" workers we're increasing the number of times they will be requeued after being terminated. https://gitlab.com/gitlab-org/gitlab/-/issues/416777 --- .../gitlab/github_import/stage_methods.rb | 21 +++++++++++++++++++ .../stage/import_attachments_worker.rb | 2 ++ .../stage/import_issue_events_worker.rb | 2 ++ .../import_issues_and_diff_notes_worker.rb | 2 ++ .../stage/import_lfs_objects_worker.rb | 5 +++++ .../stage/import_notes_worker.rb | 2 ++ .../import_pull_requests_merged_by_worker.rb | 2 ++ ...rt_pull_requests_review_requests_worker.rb | 2 ++ .../import_pull_requests_reviews_worker.rb | 2 ++ .../stage/import_pull_requests_worker.rb | 2 ++ .../lib/sidekiq/base_reliable_fetch.rb | 2 +- .../spec/base_reliable_fetch_spec.rb | 13 ++++++++++++ 12 files changed, 56 insertions(+), 1 deletion(-) diff --git a/app/workers/concerns/gitlab/github_import/stage_methods.rb b/app/workers/concerns/gitlab/github_import/stage_methods.rb index 1414ff8d6bdc88..e1fa5bf1054e55 100644 --- a/app/workers/concerns/gitlab/github_import/stage_methods.rb +++ b/app/workers/concerns/gitlab/github_import/stage_methods.rb @@ -18,6 +18,27 @@ module StageMethods end end + class_methods do + # We can increase the number of times a GitHubImport::Stage worker is retried + # after being interrupted if the importer it executes can restart exactly + # from where it left off. + # + # It is not safe to call this method if the importer loops over its data from + # the beginning when restarted, even if it skips data that is already imported + # inside the loop, as there is a possibility the importer will never reach + # the end of the loop. + # + # Examples of stage workers that call this method are ones that execute services that: + # + # - Continue paging an endpoint from where it left off: + # https://gitlab.com/gitlab-org/gitlab/-/blob/487521cc/lib/gitlab/github_import/parallel_scheduling.rb#L114-117 + # - Continue their loop from where it left off: + # https://gitlab.com/gitlab-org/gitlab/-/blob/024235ec/lib/gitlab/github_import/importer/pull_requests/review_requests_importer.rb#L15 + def resumes_work_when_interrupted! + sidekiq_options max_retries_after_interruption: 20 + end + end + # project_id - The ID of the GitLab project to import the data into. def perform(project_id) info(project_id, message: 'starting stage') diff --git a/app/workers/gitlab/github_import/stage/import_attachments_worker.rb b/app/workers/gitlab/github_import/stage/import_attachments_worker.rb index f9952f04e998e9..6f8190fc7e69d2 100644 --- a/app/workers/gitlab/github_import/stage/import_attachments_worker.rb +++ b/app/workers/gitlab/github_import/stage/import_attachments_worker.rb @@ -11,6 +11,8 @@ class ImportAttachmentsWorker # rubocop:disable Scalability/IdempotentWorker include GithubImport::Queue include StageMethods + resumes_work_when_interrupted! + # client - An instance of Gitlab::GithubImport::Client. # project - An instance of Project. def import(client, project) diff --git a/app/workers/gitlab/github_import/stage/import_issue_events_worker.rb b/app/workers/gitlab/github_import/stage/import_issue_events_worker.rb index c80412d941b89a..77d286dc466b9e 100644 --- a/app/workers/gitlab/github_import/stage/import_issue_events_worker.rb +++ b/app/workers/gitlab/github_import/stage/import_issue_events_worker.rb @@ -11,6 +11,8 @@ class ImportIssueEventsWorker # rubocop:disable Scalability/IdempotentWorker include GithubImport::Queue include StageMethods + resumes_work_when_interrupted! + # client - An instance of Gitlab::GithubImport::Client. # project - An instance of Project. def import(client, project) diff --git a/app/workers/gitlab/github_import/stage/import_issues_and_diff_notes_worker.rb b/app/workers/gitlab/github_import/stage/import_issues_and_diff_notes_worker.rb index 592b789cc946df..e70f10e3ce9f31 100644 --- a/app/workers/gitlab/github_import/stage/import_issues_and_diff_notes_worker.rb +++ b/app/workers/gitlab/github_import/stage/import_issues_and_diff_notes_worker.rb @@ -11,6 +11,8 @@ class ImportIssuesAndDiffNotesWorker # rubocop:disable Scalability/IdempotentWor include GithubImport::Queue include StageMethods + resumes_work_when_interrupted! + # client - An instance of Gitlab::GithubImport::Client. # project - An instance of Project. def import(client, project) diff --git a/app/workers/gitlab/github_import/stage/import_lfs_objects_worker.rb b/app/workers/gitlab/github_import/stage/import_lfs_objects_worker.rb index e89a850c991bb7..700c04f392f237 100644 --- a/app/workers/gitlab/github_import/stage/import_lfs_objects_worker.rb +++ b/app/workers/gitlab/github_import/stage/import_lfs_objects_worker.rb @@ -11,6 +11,11 @@ class ImportLfsObjectsWorker # rubocop:disable Scalability/IdempotentWorker include GithubImport::Queue include StageMethods + # Importer::LfsObjectsImporter can resume work when interrupted as + # it uses Projects::LfsPointers::LfsObjectDownloadListService + # https://gitlab.com/gitlab-org/gitlab/-/blob/eabf0800/app/services/projects/lfs_pointers/lfs_object_download_list_service.rb#L69-71 + resumes_work_when_interrupted! + def perform(project_id) return unless (project = find_project(project_id)) diff --git a/app/workers/gitlab/github_import/stage/import_notes_worker.rb b/app/workers/gitlab/github_import/stage/import_notes_worker.rb index c1fdb76d03eb2d..8e88034ba15d20 100644 --- a/app/workers/gitlab/github_import/stage/import_notes_worker.rb +++ b/app/workers/gitlab/github_import/stage/import_notes_worker.rb @@ -11,6 +11,8 @@ class ImportNotesWorker # rubocop:disable Scalability/IdempotentWorker include GithubImport::Queue include StageMethods + resumes_work_when_interrupted! + # client - An instance of Gitlab::GithubImport::Client. # project - An instance of Project. def import(client, project) diff --git a/app/workers/gitlab/github_import/stage/import_pull_requests_merged_by_worker.rb b/app/workers/gitlab/github_import/stage/import_pull_requests_merged_by_worker.rb index 889f938318f2e6..376581c633f889 100644 --- a/app/workers/gitlab/github_import/stage/import_pull_requests_merged_by_worker.rb +++ b/app/workers/gitlab/github_import/stage/import_pull_requests_merged_by_worker.rb @@ -11,6 +11,8 @@ class ImportPullRequestsMergedByWorker # rubocop:disable Scalability/IdempotentW include GithubImport::Queue include StageMethods + resumes_work_when_interrupted! + # client - An instance of Gitlab::GithubImport::Client. # project - An instance of Project. def import(client, project) diff --git a/app/workers/gitlab/github_import/stage/import_pull_requests_review_requests_worker.rb b/app/workers/gitlab/github_import/stage/import_pull_requests_review_requests_worker.rb index 44cd7cdb9d152f..f2907006d9cddc 100644 --- a/app/workers/gitlab/github_import/stage/import_pull_requests_review_requests_worker.rb +++ b/app/workers/gitlab/github_import/stage/import_pull_requests_review_requests_worker.rb @@ -11,6 +11,8 @@ class ImportPullRequestsReviewRequestsWorker # rubocop:disable Scalability/Idemp include GithubImport::Queue include StageMethods + resumes_work_when_interrupted! + # client - An instance of Gitlab::GithubImport::Client. # project - An instance of Project. def import(client, project) diff --git a/app/workers/gitlab/github_import/stage/import_pull_requests_reviews_worker.rb b/app/workers/gitlab/github_import/stage/import_pull_requests_reviews_worker.rb index 9947a89b92ce9f..5c516555387e62 100644 --- a/app/workers/gitlab/github_import/stage/import_pull_requests_reviews_worker.rb +++ b/app/workers/gitlab/github_import/stage/import_pull_requests_reviews_worker.rb @@ -11,6 +11,8 @@ class ImportPullRequestsReviewsWorker # rubocop:disable Scalability/IdempotentWo include GithubImport::Queue include StageMethods + resumes_work_when_interrupted! + # client - An instance of Gitlab::GithubImport::Client. # project - An instance of Project. def import(client, project) diff --git a/app/workers/gitlab/github_import/stage/import_pull_requests_worker.rb b/app/workers/gitlab/github_import/stage/import_pull_requests_worker.rb index 9bdf3a31776475..50527dfa2d84ce 100644 --- a/app/workers/gitlab/github_import/stage/import_pull_requests_worker.rb +++ b/app/workers/gitlab/github_import/stage/import_pull_requests_worker.rb @@ -11,6 +11,8 @@ class ImportPullRequestsWorker # rubocop:disable Scalability/IdempotentWorker include GithubImport::Queue include StageMethods + resumes_work_when_interrupted! + # client - An instance of Gitlab::GithubImport::Client. # project - An instance of Project. def import(client, project) diff --git a/vendor/gems/sidekiq-reliable-fetch/lib/sidekiq/base_reliable_fetch.rb b/vendor/gems/sidekiq-reliable-fetch/lib/sidekiq/base_reliable_fetch.rb index 39b98a0109ff11..006aad87abefb9 100644 --- a/vendor/gems/sidekiq-reliable-fetch/lib/sidekiq/base_reliable_fetch.rb +++ b/vendor/gems/sidekiq-reliable-fetch/lib/sidekiq/base_reliable_fetch.rb @@ -230,7 +230,7 @@ def max_retries_after_interruption(worker_class) max_retries_after_interruption = nil max_retries_after_interruption ||= begin - Object.const_get(worker_class).sidekiq_options[:max_retries_after_interruption] + Object.const_get(worker_class).sidekiq_options['max_retries_after_interruption'] rescue NameError end diff --git a/vendor/gems/sidekiq-reliable-fetch/spec/base_reliable_fetch_spec.rb b/vendor/gems/sidekiq-reliable-fetch/spec/base_reliable_fetch_spec.rb index cdc4409f0d566f..32e62925aafd36 100644 --- a/vendor/gems/sidekiq-reliable-fetch/spec/base_reliable_fetch_spec.rb +++ b/vendor/gems/sidekiq-reliable-fetch/spec/base_reliable_fetch_spec.rb @@ -76,6 +76,19 @@ expect(queue2.size).to eq 1 expect(Sidekiq::InterruptedSet.new.size).to eq 0 end + + it 'does not put jobs into interrupted queue if it is disabled on the worker' do + stub_const('Bob', double(sidekiq_options: { 'max_retries_after_interruption' => -1 })) + + uow = described_class::UnitOfWork + interrupted_job = Sidekiq.dump_json(class: 'Bob', args: [1, 2, 'foo'], interrupted_count: 3) + jobs = [ uow.new('queue:foo', interrupted_job), uow.new('queue:foo', job), uow.new('queue:bar', job) ] + described_class.new(options).bulk_requeue(jobs, nil) + + expect(queue1.size).to eq 2 + expect(queue2.size).to eq 1 + expect(Sidekiq::InterruptedSet.new.size).to eq 0 + end end it 'sets heartbeat' do -- GitLab From 97724f5c0ba98c924716d17c9d1654054d0ae7ee Mon Sep 17 00:00:00 2001 From: Luke Duncalfe Date: Wed, 25 Oct 2023 13:00:52 +1300 Subject: [PATCH 2/4] Add feature flag --- .../concerns/gitlab/github_import/stage_methods.rb | 2 ++ .../github_importer_raise_max_interruptions.yml | 8 ++++++++ 2 files changed, 10 insertions(+) create mode 100644 config/feature_flags/development/github_importer_raise_max_interruptions.yml diff --git a/app/workers/concerns/gitlab/github_import/stage_methods.rb b/app/workers/concerns/gitlab/github_import/stage_methods.rb index e1fa5bf1054e55..548a2d6321770e 100644 --- a/app/workers/concerns/gitlab/github_import/stage_methods.rb +++ b/app/workers/concerns/gitlab/github_import/stage_methods.rb @@ -35,6 +35,8 @@ module StageMethods # - Continue their loop from where it left off: # https://gitlab.com/gitlab-org/gitlab/-/blob/024235ec/lib/gitlab/github_import/importer/pull_requests/review_requests_importer.rb#L15 def resumes_work_when_interrupted! + return unless Feature.enabled?(:github_importer_raise_max_interruptions) + sidekiq_options max_retries_after_interruption: 20 end end diff --git a/config/feature_flags/development/github_importer_raise_max_interruptions.yml b/config/feature_flags/development/github_importer_raise_max_interruptions.yml new file mode 100644 index 00000000000000..3cbcc10865fd4a --- /dev/null +++ b/config/feature_flags/development/github_importer_raise_max_interruptions.yml @@ -0,0 +1,8 @@ +--- +name: github_importer_raise_max_interruptions +introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/134949 +rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/429306 +milestone: '16.6' +type: development +group: group::import and integrate +default_enabled: false -- GitLab From 18a75c93c41b661e490eaf148a81bada05cf2ad9 Mon Sep 17 00:00:00 2001 From: Luke Duncalfe Date: Wed, 25 Oct 2023 16:30:22 +1300 Subject: [PATCH 3/4] Add spec coverage --- .../github_import/stage_methods_spec.rb | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/spec/workers/concerns/gitlab/github_import/stage_methods_spec.rb b/spec/workers/concerns/gitlab/github_import/stage_methods_spec.rb index 898606845a2985..fa782967441b70 100644 --- a/spec/workers/concerns/gitlab/github_import/stage_methods_spec.rb +++ b/spec/workers/concerns/gitlab/github_import/stage_methods_spec.rb @@ -185,4 +185,30 @@ def self.name expect(worker.find_project(-1)).to be_nil end end + + describe '.resumes_work_when_interrupted!' do + subject(:sidekiq_options) { worker.class.sidekiq_options } + + it 'does not set the `max_retries_after_interruption` if not called' do + is_expected.not_to have_key('max_retries_after_interruption') + end + + it 'sets the `max_retries_after_interruption`' do + worker.class.resumes_work_when_interrupted! + + is_expected.to include('max_retries_after_interruption' => 20) + end + + context 'when the flag is disabled' do + before do + stub_feature_flags(github_importer_raise_max_interruptions: false) + end + + it 'does not set `max_retries_after_interruption`' do + worker.class.resumes_work_when_interrupted! + + is_expected.not_to have_key('max_retries_after_interruption') + end + end + end end -- GitLab From 462938d5f4f6f252cb8dc9c944a5aef6472a982c Mon Sep 17 00:00:00 2001 From: Madelein van Niekerk Date: Wed, 25 Oct 2023 20:34:45 +0000 Subject: [PATCH 4/4] Add review feedback --- Gemfile.lock | 2 +- app/workers/concerns/gitlab/github_import/stage_methods.rb | 4 +++- .../gitlab/github_import/stage/import_lfs_objects_worker.rb | 2 +- vendor/gems/sidekiq-reliable-fetch/Gemfile.lock | 2 +- .../sidekiq-reliable-fetch/gitlab-sidekiq-fetcher.gemspec | 2 +- 5 files changed, 7 insertions(+), 5 deletions(-) diff --git a/Gemfile.lock b/Gemfile.lock index 729408d5850095..6e266fd285a23a 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -152,7 +152,7 @@ PATH PATH remote: vendor/gems/sidekiq-reliable-fetch specs: - gitlab-sidekiq-fetcher (0.9.0) + gitlab-sidekiq-fetcher (0.10.0) json (>= 2.5) sidekiq (~> 6.1) diff --git a/app/workers/concerns/gitlab/github_import/stage_methods.rb b/app/workers/concerns/gitlab/github_import/stage_methods.rb index 548a2d6321770e..5c63c667a03ff6 100644 --- a/app/workers/concerns/gitlab/github_import/stage_methods.rb +++ b/app/workers/concerns/gitlab/github_import/stage_methods.rb @@ -5,6 +5,8 @@ module GithubImport module StageMethods extend ActiveSupport::Concern + MAX_RETRIES_AFTER_INTERRUPTION = 20 + included do include ApplicationWorker @@ -37,7 +39,7 @@ module StageMethods def resumes_work_when_interrupted! return unless Feature.enabled?(:github_importer_raise_max_interruptions) - sidekiq_options max_retries_after_interruption: 20 + sidekiq_options max_retries_after_interruption: MAX_RETRIES_AFTER_INTERRUPTION end end diff --git a/app/workers/gitlab/github_import/stage/import_lfs_objects_worker.rb b/app/workers/gitlab/github_import/stage/import_lfs_objects_worker.rb index 700c04f392f237..9db72de59b751a 100644 --- a/app/workers/gitlab/github_import/stage/import_lfs_objects_worker.rb +++ b/app/workers/gitlab/github_import/stage/import_lfs_objects_worker.rb @@ -12,7 +12,7 @@ class ImportLfsObjectsWorker # rubocop:disable Scalability/IdempotentWorker include StageMethods # Importer::LfsObjectsImporter can resume work when interrupted as - # it uses Projects::LfsPointers::LfsObjectDownloadListService + # it uses Projects::LfsPointers::LfsObjectDownloadListService which excludes LFS objects that already exist. # https://gitlab.com/gitlab-org/gitlab/-/blob/eabf0800/app/services/projects/lfs_pointers/lfs_object_download_list_service.rb#L69-71 resumes_work_when_interrupted! diff --git a/vendor/gems/sidekiq-reliable-fetch/Gemfile.lock b/vendor/gems/sidekiq-reliable-fetch/Gemfile.lock index 57767ee8c3bfb1..aeb163db018b28 100644 --- a/vendor/gems/sidekiq-reliable-fetch/Gemfile.lock +++ b/vendor/gems/sidekiq-reliable-fetch/Gemfile.lock @@ -1,7 +1,7 @@ PATH remote: . specs: - gitlab-sidekiq-fetcher (0.9.0) + gitlab-sidekiq-fetcher (0.10.0) json (>= 2.5) sidekiq (~> 6.1) diff --git a/vendor/gems/sidekiq-reliable-fetch/gitlab-sidekiq-fetcher.gemspec b/vendor/gems/sidekiq-reliable-fetch/gitlab-sidekiq-fetcher.gemspec index 0d0e5e3f6fa198..b656267003aa45 100644 --- a/vendor/gems/sidekiq-reliable-fetch/gitlab-sidekiq-fetcher.gemspec +++ b/vendor/gems/sidekiq-reliable-fetch/gitlab-sidekiq-fetcher.gemspec @@ -1,6 +1,6 @@ Gem::Specification.new do |s| s.name = 'gitlab-sidekiq-fetcher' - s.version = '0.9.0' + s.version = '0.10.0' s.authors = ['TEA', 'GitLab'] s.email = 'valery@gitlab.com' s.license = 'LGPL-3.0' -- GitLab