From 3b6f1ab7098d4d0c4f40c900729bd26918c4a289 Mon Sep 17 00:00:00 2001 From: Keeyan Nejad Date: Tue, 8 Oct 2024 17:40:03 +0100 Subject: [PATCH 01/17] Add limit to concurrent batch exports Allows administrators to configure a maximum number of simultaneous exports (default 25) to prevent exhausting resources. Changelog: added --- app/helpers/application_settings_helper.rb | 1 + app/models/application_setting.rb | 1 + app/models/bulk_imports/export_batch.rb | 7 +++++-- .../relation_batch_export_worker.rb | 13 +++++++++++++ .../concurrent_relation_batch_export_limit.yml | 12 ++++++++++++ ...tch_export_limit_to_application_settings.rb | 9 +++++++++ db/schema_migrations/20241008161532 | 1 + db/structure.sql | 1 + doc/api/settings.md | 3 +++ lib/api/settings.rb | 1 + spec/factories/bulk_import/export_batches.rb | 6 +++++- .../relation_batch_export_worker_spec.rb | 18 ++++++++++++++++++ 12 files changed, 70 insertions(+), 3 deletions(-) create mode 100644 config/application_setting_columns/concurrent_relation_batch_export_limit.yml create mode 100644 db/migrate/20241008161532_add_concurrent_relation_batch_export_limit_to_application_settings.rb create mode 100644 db/schema_migrations/20241008161532 diff --git a/app/helpers/application_settings_helper.rb b/app/helpers/application_settings_helper.rb index 25ea827fb03666..3bacd9e93f0255 100644 --- a/app/helpers/application_settings_helper.rb +++ b/app/helpers/application_settings_helper.rb @@ -501,6 +501,7 @@ def visible_attributes :invitation_flow_enforcement, :can_create_group, :bulk_import_concurrent_pipeline_batch_limit, + :concurrent_relation_batch_export_limit, :bulk_import_enabled, :bulk_import_max_download_file_size, :silent_admin_exports_enabled, diff --git a/app/models/application_setting.rb b/app/models/application_setting.rb index 83d4e16fae5f6b..39ed46622ded1c 100644 --- a/app/models/application_setting.rb +++ b/app/models/application_setting.rb @@ -527,6 +527,7 @@ def self.kroki_formats_attributes with_options(numericality: { only_integer: true, greater_than: 0 }) do validates :ai_action_api_rate_limit, :bulk_import_concurrent_pipeline_batch_limit, + :concurrent_relation_batch_export_limit, :code_suggestions_api_rate_limit, :concurrent_bitbucket_import_jobs_limit, :concurrent_bitbucket_server_import_jobs_limit, diff --git a/app/models/bulk_imports/export_batch.rb b/app/models/bulk_imports/export_batch.rb index 9d34dae12d03d8..3b2d8e91c75e3a 100644 --- a/app/models/bulk_imports/export_batch.rb +++ b/app/models/bulk_imports/export_batch.rb @@ -6,13 +6,16 @@ class ExportBatch < ApplicationRecord BATCH_SIZE = 1000 + scope :started, -> { with_status(:started) } + belongs_to :export, class_name: 'BulkImports::Export' has_one :upload, class_name: 'BulkImports::ExportUpload', foreign_key: :batch_id, inverse_of: :batch validates :batch_number, presence: true, uniqueness: { scope: :export_id } - state_machine :status, initial: :started do - state :started, value: 0 + state_machine :status, initial: :created do + state :created, value: 0 + state :started, value: 2 state :finished, value: 1 state :failed, value: -1 diff --git a/app/workers/bulk_imports/relation_batch_export_worker.rb b/app/workers/bulk_imports/relation_batch_export_worker.rb index 08c5fb81460227..ae6c749540066f 100644 --- a/app/workers/bulk_imports/relation_batch_export_worker.rb +++ b/app/workers/bulk_imports/relation_batch_export_worker.rb @@ -3,6 +3,9 @@ module BulkImports class RelationBatchExportWorker include ApplicationWorker + include Gitlab::Utils::StrongMemoize + + PERFORM_DELAY = 1.minute idempotent! data_consistency :always # rubocop:disable SidekiqLoadBalancing/WorkerDataConsistency @@ -23,10 +26,20 @@ def perform(user_id, batch_id) @user = User.find(user_id) @batch = BulkImports::ExportBatch.find(batch_id) + return self.class.perform_in(PERFORM_DELAY, user_id, batch_id) if max_exports_already_running? + log_extra_metadata_on_done(:relation, @batch.export.relation) log_extra_metadata_on_done(:objects_count, @batch.objects_count) RelationBatchExportService.new(@user, @batch).execute end + + def max_exports_already_running? + BulkImports::ExportBatch.started.limit(max_exports).count == max_exports + end + + strong_memoize_attr def max_exports + ::Gitlab::CurrentSettings.concurrent_relation_batch_export_limit + end end end diff --git a/config/application_setting_columns/concurrent_relation_batch_export_limit.yml b/config/application_setting_columns/concurrent_relation_batch_export_limit.yml new file mode 100644 index 00000000000000..69cf8ec4520365 --- /dev/null +++ b/config/application_setting_columns/concurrent_relation_batch_export_limit.yml @@ -0,0 +1,12 @@ +--- +api_type: integer +attr: concurrent_relation_batch_export_limit +clusterwide: false +column: concurrent_relation_batch_export_limit +db_type: smallint +default: '25' +description: Maximum simultaneous relation exports to process. +encrypted: false +gitlab_com_different_than_default: false +jihu: false +not_null: true diff --git a/db/migrate/20241008161532_add_concurrent_relation_batch_export_limit_to_application_settings.rb b/db/migrate/20241008161532_add_concurrent_relation_batch_export_limit_to_application_settings.rb new file mode 100644 index 00000000000000..2e9f065c1cb0d8 --- /dev/null +++ b/db/migrate/20241008161532_add_concurrent_relation_batch_export_limit_to_application_settings.rb @@ -0,0 +1,9 @@ +# frozen_string_literal: true + +class AddConcurrentRelationBatchExportLimitToApplicationSettings < Gitlab::Database::Migration[2.2] + milestone '17.6' + + def change + add_column :application_settings, :concurrent_relation_batch_export_limit, :smallint, default: 25, null: false + end +end diff --git a/db/schema_migrations/20241008161532 b/db/schema_migrations/20241008161532 new file mode 100644 index 00000000000000..3ac5c0c9cc0281 --- /dev/null +++ b/db/schema_migrations/20241008161532 @@ -0,0 +1 @@ +fe6bdba721e7ef0f886edc7f27f77b1279e1a093f2daef0c44afb24b46f5c030 \ No newline at end of file diff --git a/db/structure.sql b/db/structure.sql index 7c709957b1b8c2..ac8f16f184bfce 100644 --- a/db/structure.sql +++ b/db/structure.sql @@ -6541,6 +6541,7 @@ CREATE TABLE application_settings ( enforce_ci_inbound_job_token_scope_enabled boolean DEFAULT false NOT NULL, allow_top_level_group_owners_to_create_service_accounts boolean DEFAULT false NOT NULL, sign_in_restrictions jsonb DEFAULT '{}'::jsonb NOT NULL, + concurrent_relation_batch_export_limit smallint DEFAULT 25 NOT NULL, CONSTRAINT app_settings_container_reg_cleanup_tags_max_list_size_positive CHECK ((container_registry_cleanup_tags_service_max_list_size >= 0)), CONSTRAINT app_settings_dep_proxy_ttl_policies_worker_capacity_positive CHECK ((dependency_proxy_ttl_group_policy_worker_capacity >= 0)), CONSTRAINT app_settings_ext_pipeline_validation_service_url_text_limit CHECK ((char_length(external_pipeline_validation_service_url) <= 255)), diff --git a/doc/api/settings.md b/doc/api/settings.md index ba8df1e5f37406..3458bf224f3c87 100644 --- a/doc/api/settings.md +++ b/doc/api/settings.md @@ -147,6 +147,7 @@ Example response: "project_jobs_api_rate_limit": 600, "security_txt_content": null, "bulk_import_concurrent_pipeline_batch_limit": 25, + "concurrent_relation_batch_export_limit": 25, "concurrent_github_import_jobs_limit": 1000, "concurrent_bitbucket_import_jobs_limit": 100, "concurrent_bitbucket_server_import_jobs_limit": 100, @@ -322,6 +323,7 @@ Example response: "project_jobs_api_rate_limit": 600, "security_txt_content": null, "bulk_import_concurrent_pipeline_batch_limit": 25, + "concurrent_relation_batch_export_limit": 25, "downstream_pipeline_trigger_limit_per_project_user_sha": 0, "concurrent_github_import_jobs_limit": 1000, "concurrent_bitbucket_import_jobs_limit": 100, @@ -699,6 +701,7 @@ listed in the descriptions of the relevant settings. | `whats_new_variant` | string | no | What's new variant, possible values: `all_tiers`, `current_tier`, and `disabled`. | | `wiki_page_max_content_bytes` | integer | no | Maximum wiki page content size in **bytes**. Default: 52428800 Bytes (50 MB). The minimum value is 1024 bytes. | | `bulk_import_concurrent_pipeline_batch_limit` | integer | no | Maximum simultaneous Direct Transfer batches to process. | +| `concurrent_relation_batch_export_limit` | integer | no | Maximum simultaneous relation exports to process. | | `asciidoc_max_includes` | integer | no | Maximum limit of AsciiDoc include directives being processed in any one document. Default: 32. Maximum: 64. | | `duo_features_enabled` | boolean | no | Indicates whether GitLab Duo features are enabled for this instance. [Introduced](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/144931) in GitLab 16.10. Self-managed, Premium and Ultimate only. | | `lock_duo_features_enabled` | boolean | no | Indicates whether the GitLab Duo features enabled setting is enforced for all subgroups. [Introduced](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/144931) in GitLab 16.10. Self-managed, Premium and Ultimate only. | diff --git a/lib/api/settings.rb b/lib/api/settings.rb index c65369c76cc200..ef9f4c980e3249 100644 --- a/lib/api/settings.rb +++ b/lib/api/settings.rb @@ -215,6 +215,7 @@ def filter_attributes_using_license(attrs) optional :jira_connect_public_key_storage_enabled, type: Boolean, desc: 'Enable public key storage for the GitLab for Jira Cloud app.' optional :jira_connect_proxy_url, type: String, desc: "URL of the GitLab instance used as a proxy for the GitLab for Jira Cloud app." optional :bulk_import_concurrent_pipeline_batch_limit, type: Integer, desc: 'Maximum simultaneous Direct Transfer pipeline batches to process' + optional :concurrent_relation_batch_export_limit, type: Integer, desc: 'Maximum simultaneous relation exports to process.' optional :bulk_import_enabled, type: Boolean, desc: 'Enable migrating GitLab groups and projects by direct transfer' optional :bulk_import_max_download_file, type: Integer, desc: 'Maximum download file size in MB when importing from source GitLab instances by direct transfer' optional :concurrent_github_import_jobs_limit, type: Integer, desc: 'Github Importer maximum number of simultaneous import jobs' diff --git a/spec/factories/bulk_import/export_batches.rb b/spec/factories/bulk_import/export_batches.rb index f5f12696f5f13d..529a165a3e1735 100644 --- a/spec/factories/bulk_import/export_batches.rb +++ b/spec/factories/bulk_import/export_batches.rb @@ -9,7 +9,7 @@ status { 0 } batch_number { 1 } - trait :started do + trait :created do status { 0 } end @@ -17,6 +17,10 @@ status { 1 } end + trait :started do + status { 2 } + end + trait :failed do status { -1 } end diff --git a/spec/workers/bulk_imports/relation_batch_export_worker_spec.rb b/spec/workers/bulk_imports/relation_batch_export_worker_spec.rb index 8ee55d64a1b6b7..e19fcbe40e86a2 100644 --- a/spec/workers/bulk_imports/relation_batch_export_worker_spec.rb +++ b/spec/workers/bulk_imports/relation_batch_export_worker_spec.rb @@ -9,6 +9,8 @@ let(:job_args) { [user.id, batch.id] } describe '#perform' do + subject(:perform) { described_class.new.perform(user.id, batch.id) } + include_examples 'an idempotent worker' do it 'executes RelationBatchExportService' do service = instance_double(BulkImports::RelationBatchExportService) @@ -22,6 +24,22 @@ perform_multiple(job_args) end end + + context 'when the max number of exports have already started' do + let!(:existing_export) { create(:bulk_import_export_batch, :started) } + + before do + allow(::Gitlab::CurrentSettings).to receive(:concurrent_relation_batch_export_limit).and_return(1) + end + + it 'does not start the export and schedules it for later' do + expect(described_class).to receive(:perform_in).with(described_class::PERFORM_DELAY, user.id, batch.id) + + expect(BulkImports::RelationBatchExportService).not_to receive(:new) + + perform + end + end end describe '.sidekiq_retries_exhausted' do -- GitLab From 7ecb1d817ecc625fd48233e37203d237f58087a8 Mon Sep 17 00:00:00 2001 From: Keeyan Nejad Date: Wed, 16 Oct 2024 12:07:42 +0100 Subject: [PATCH 02/17] Use let_it_be instead of let! --- spec/workers/bulk_imports/relation_batch_export_worker_spec.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/workers/bulk_imports/relation_batch_export_worker_spec.rb b/spec/workers/bulk_imports/relation_batch_export_worker_spec.rb index e19fcbe40e86a2..46e41fffac250c 100644 --- a/spec/workers/bulk_imports/relation_batch_export_worker_spec.rb +++ b/spec/workers/bulk_imports/relation_batch_export_worker_spec.rb @@ -26,7 +26,7 @@ end context 'when the max number of exports have already started' do - let!(:existing_export) { create(:bulk_import_export_batch, :started) } + let_it_be(:existing_export) { create(:bulk_import_export_batch, :started) } before do allow(::Gitlab::CurrentSettings).to receive(:concurrent_relation_batch_export_limit).and_return(1) -- GitLab From b104bd175ba087e1980d4065fa6fd3a97c85d92a Mon Sep 17 00:00:00 2001 From: Keeyan Nejad Date: Fri, 18 Oct 2024 17:19:23 +0100 Subject: [PATCH 03/17] Reset cache timeout when re-enqueuing job --- .../bulk_imports/relation_batch_export_worker.rb | 14 +++++++++++++- .../relation_batch_export_worker_spec.rb | 14 ++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/app/workers/bulk_imports/relation_batch_export_worker.rb b/app/workers/bulk_imports/relation_batch_export_worker.rb index ae6c749540066f..f4c6564d9d6ef2 100644 --- a/app/workers/bulk_imports/relation_batch_export_worker.rb +++ b/app/workers/bulk_imports/relation_batch_export_worker.rb @@ -26,7 +26,7 @@ def perform(user_id, batch_id) @user = User.find(user_id) @batch = BulkImports::ExportBatch.find(batch_id) - return self.class.perform_in(PERFORM_DELAY, user_id, batch_id) if max_exports_already_running? + return re_enqueue_job(@user, @batch) if max_exports_already_running? log_extra_metadata_on_done(:relation, @batch.export.relation) log_extra_metadata_on_done(:objects_count, @batch.objects_count) @@ -41,5 +41,17 @@ def max_exports_already_running? strong_memoize_attr def max_exports ::Gitlab::CurrentSettings.concurrent_relation_batch_export_limit end + + def re_enqueue_job(user, batch) + reset_cache_timeout(batch) + + self.class.perform_in(PERFORM_DELAY, user.id, batch.id) + end + + def reset_cache_timeout(batch) + cache_service = BulkImports::BatchedRelationExportService + cache_key = cache_service.cache_key(batch.export_id, batch.id) + Gitlab::Cache::Import::Caching.expire(cache_key, cache_service::CACHE_DURATION.from_now) + end end end diff --git a/spec/workers/bulk_imports/relation_batch_export_worker_spec.rb b/spec/workers/bulk_imports/relation_batch_export_worker_spec.rb index 46e41fffac250c..3ea70b1028fd44 100644 --- a/spec/workers/bulk_imports/relation_batch_export_worker_spec.rb +++ b/spec/workers/bulk_imports/relation_batch_export_worker_spec.rb @@ -39,6 +39,20 @@ perform end + + it 'resets the expiration date for the cache key' do + cache_key = BulkImports::BatchedRelationExportService.cache_key(batch.export_id, batch.id) + Gitlab::Cache::Import::Caching.write(cache_key, "test", timeout: 1.hour.from_now) + + perform + + expires_at = Gitlab::Cache::Import::Caching.with_redis do |redis| + ttl = redis.ttl(Gitlab::Cache::Import::Caching.cache_key_for(cache_key)) + Time.zone.at(ttl) + end + + expect(expires_at).to be_within(1.minute).of(BulkImports::BatchedRelationExportService::CACHE_DURATION.from_now) + end end end -- GitLab From bfe8c59ed1ecbbb43819591dd771a68136236ff8 Mon Sep 17 00:00:00 2001 From: Keeyan Nejad Date: Tue, 22 Oct 2024 11:33:22 +0100 Subject: [PATCH 04/17] Update export updated_at so the timeout resets --- app/services/bulk_imports/relation_batch_export_service.rb | 1 + .../bulk_imports/relation_batch_export_service_spec.rb | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/app/services/bulk_imports/relation_batch_export_service.rb b/app/services/bulk_imports/relation_batch_export_service.rb index 3f98d49aa1b15c..dc97bb58a61ebd 100644 --- a/app/services/bulk_imports/relation_batch_export_service.rb +++ b/app/services/bulk_imports/relation_batch_export_service.rb @@ -17,6 +17,7 @@ def execute ensure_export_file_exists! compress_exported_relation upload_compressed_file + export.touch finish_batch! ensure diff --git a/spec/services/bulk_imports/relation_batch_export_service_spec.rb b/spec/services/bulk_imports/relation_batch_export_service_spec.rb index a18099a418974e..b3085e7b2040e7 100644 --- a/spec/services/bulk_imports/relation_batch_export_service_spec.rb +++ b/spec/services/bulk_imports/relation_batch_export_service_spec.rb @@ -42,6 +42,10 @@ service.execute end + it 'updates export updated_at so the timeout resets' do + expect { service.execute }.to change { export.reload.updated_at } + end + context 'when relation is empty and there is nothing to export' do let_it_be(:export) { create(:bulk_import_export, :batched, project: project, relation: 'milestones') } let_it_be(:batch) { create(:bulk_import_export_batch, export: export) } -- GitLab From 149a175440cb9a8d3a81607c48c46913bf2aa06e Mon Sep 17 00:00:00 2001 From: Keeyan Nejad Date: Tue, 22 Oct 2024 13:58:59 +0100 Subject: [PATCH 05/17] Allow export to continue if batch has timed out --- app/models/bulk_imports/export_batch.rb | 3 ++- .../bulk_imports/relation_batch_export_worker.rb | 2 +- spec/models/bulk_imports/export_batch_spec.rb | 14 ++++++++++++++ .../relation_batch_export_worker_spec.rb | 16 ++++++++++++++++ 4 files changed, 33 insertions(+), 2 deletions(-) diff --git a/app/models/bulk_imports/export_batch.rb b/app/models/bulk_imports/export_batch.rb index 3b2d8e91c75e3a..b4cac733262029 100644 --- a/app/models/bulk_imports/export_batch.rb +++ b/app/models/bulk_imports/export_batch.rb @@ -5,8 +5,9 @@ class ExportBatch < ApplicationRecord self.table_name = 'bulk_import_export_batches' BATCH_SIZE = 1000 + TIMEOUT_AFTER_START = 1.hour - scope :started, -> { with_status(:started) } + scope :started_and_not_timed_out, -> { with_status(:started).where(updated_at: TIMEOUT_AFTER_START.ago...) } belongs_to :export, class_name: 'BulkImports::Export' has_one :upload, class_name: 'BulkImports::ExportUpload', foreign_key: :batch_id, inverse_of: :batch diff --git a/app/workers/bulk_imports/relation_batch_export_worker.rb b/app/workers/bulk_imports/relation_batch_export_worker.rb index f4c6564d9d6ef2..1411ee506cae8d 100644 --- a/app/workers/bulk_imports/relation_batch_export_worker.rb +++ b/app/workers/bulk_imports/relation_batch_export_worker.rb @@ -35,7 +35,7 @@ def perform(user_id, batch_id) end def max_exports_already_running? - BulkImports::ExportBatch.started.limit(max_exports).count == max_exports + BulkImports::ExportBatch.started_and_not_timed_out.limit(max_exports).count == max_exports end strong_memoize_attr def max_exports diff --git a/spec/models/bulk_imports/export_batch_spec.rb b/spec/models/bulk_imports/export_batch_spec.rb index 43209921b9c257..ff1c1c3c2e03d8 100644 --- a/spec/models/bulk_imports/export_batch_spec.rb +++ b/spec/models/bulk_imports/export_batch_spec.rb @@ -14,4 +14,18 @@ it { is_expected.to validate_presence_of(:batch_number) } it { is_expected.to validate_uniqueness_of(:batch_number).scoped_to(:export_id) } end + + describe '.started_and_not_timed_out' do + subject(:started_and_not_timed_out) { described_class.started_and_not_timed_out } + + let_it_be(:recently_started_export_batch) { create(:bulk_import_export_batch, :started, updated_at: 1.minute.ago) } + let_it_be(:old_started_export_batch) { create(:bulk_import_export_batch, :started, updated_at: 2.hours.ago) } + let_it_be(:recently_finished_export_batch) do + create(:bulk_import_export_batch, :finished, updated_at: 1.minute.ago) + end + + it 'returns records with status started, which were last updated less that 1 hour ago' do + is_expected.to contain_exactly(recently_started_export_batch) + end + end end diff --git a/spec/workers/bulk_imports/relation_batch_export_worker_spec.rb b/spec/workers/bulk_imports/relation_batch_export_worker_spec.rb index 3ea70b1028fd44..7d5fe5b0da0552 100644 --- a/spec/workers/bulk_imports/relation_batch_export_worker_spec.rb +++ b/spec/workers/bulk_imports/relation_batch_export_worker_spec.rb @@ -53,6 +53,22 @@ expect(expires_at).to be_within(1.minute).of(BulkImports::BatchedRelationExportService::CACHE_DURATION.from_now) end + + context 'when the export batch started longer ago than the timeout time' do + before do + existing_export.update!(updated_at: (BulkImports::ExportBatch::TIMEOUT_AFTER_START + 1.minute).ago) + end + + it 'starts the export and does not schedule it for later' do + expect(described_class).not_to receive(:perform_in).with(described_class::PERFORM_DELAY, user.id, batch.id) + + expect_next_instance_of(BulkImports::RelationBatchExportService) do |instance| + expect(instance).to receive(:execute) + end + + perform + end + end end end -- GitLab From cb47c47e0b3677ec5d98040ad9cd1984940f7da4 Mon Sep 17 00:00:00 2001 From: Keeyan Nejad Date: Tue, 22 Oct 2024 16:05:46 +0100 Subject: [PATCH 06/17] Move batch export limit setting to rate_limits --- app/models/application_setting.rb | 3 ++- .../json_schemas/application_setting_rate_limits.json | 5 +++++ ...elation_batch_export_limit_to_application_settings.rb | 9 --------- db/schema_migrations/20241008161532 | 1 - db/structure.sql | 1 - doc/api/settings.md | 2 +- .../bulk_imports/relation_batch_export_worker_spec.rb | 2 +- 7 files changed, 9 insertions(+), 14 deletions(-) delete mode 100644 db/migrate/20241008161532_add_concurrent_relation_batch_export_limit_to_application_settings.rb delete mode 100644 db/schema_migrations/20241008161532 diff --git a/app/models/application_setting.rb b/app/models/application_setting.rb index 39ed46622ded1c..8c04a1f288463c 100644 --- a/app/models/application_setting.rb +++ b/app/models/application_setting.rb @@ -527,11 +527,11 @@ def self.kroki_formats_attributes with_options(numericality: { only_integer: true, greater_than: 0 }) do validates :ai_action_api_rate_limit, :bulk_import_concurrent_pipeline_batch_limit, - :concurrent_relation_batch_export_limit, :code_suggestions_api_rate_limit, :concurrent_bitbucket_import_jobs_limit, :concurrent_bitbucket_server_import_jobs_limit, :concurrent_github_import_jobs_limit, + :concurrent_relation_batch_export_limit, :container_registry_token_expire_delay, :housekeeping_optimize_repository_period, :inactive_projects_delete_after_months, @@ -625,6 +625,7 @@ def self.kroki_formats_attributes concurrent_bitbucket_import_jobs_limit: [:integer, { default: 100 }], concurrent_bitbucket_server_import_jobs_limit: [:integer, { default: 100 }], concurrent_github_import_jobs_limit: [:integer, { default: 1000 }], + concurrent_relation_batch_export_limit: [:integer, { default: 8 }], downstream_pipeline_trigger_limit_per_project_user_sha: [:integer, { default: 0 }], group_api_limit: [:integer, { default: 400 }], group_invited_groups_api_limit: [:integer, { default: 60 }], diff --git a/app/validators/json_schemas/application_setting_rate_limits.json b/app/validators/json_schemas/application_setting_rate_limits.json index 24c27737cd6bc5..3f563180b1a960 100644 --- a/app/validators/json_schemas/application_setting_rate_limits.json +++ b/app/validators/json_schemas/application_setting_rate_limits.json @@ -19,6 +19,11 @@ "minimum": 1, "description": "Maximum number of simultaneous import jobs for GitHub importer" }, + "concurrent_relation_batch_export_limit": { + "type": "integer", + "minimum": 1, + "description": "Maximum simultaneous relation exports to process" + }, "create_organization_api_limit": { "type": "integer", "minimum": 0, diff --git a/db/migrate/20241008161532_add_concurrent_relation_batch_export_limit_to_application_settings.rb b/db/migrate/20241008161532_add_concurrent_relation_batch_export_limit_to_application_settings.rb deleted file mode 100644 index 2e9f065c1cb0d8..00000000000000 --- a/db/migrate/20241008161532_add_concurrent_relation_batch_export_limit_to_application_settings.rb +++ /dev/null @@ -1,9 +0,0 @@ -# frozen_string_literal: true - -class AddConcurrentRelationBatchExportLimitToApplicationSettings < Gitlab::Database::Migration[2.2] - milestone '17.6' - - def change - add_column :application_settings, :concurrent_relation_batch_export_limit, :smallint, default: 25, null: false - end -end diff --git a/db/schema_migrations/20241008161532 b/db/schema_migrations/20241008161532 deleted file mode 100644 index 3ac5c0c9cc0281..00000000000000 --- a/db/schema_migrations/20241008161532 +++ /dev/null @@ -1 +0,0 @@ -fe6bdba721e7ef0f886edc7f27f77b1279e1a093f2daef0c44afb24b46f5c030 \ No newline at end of file diff --git a/db/structure.sql b/db/structure.sql index ac8f16f184bfce..7c709957b1b8c2 100644 --- a/db/structure.sql +++ b/db/structure.sql @@ -6541,7 +6541,6 @@ CREATE TABLE application_settings ( enforce_ci_inbound_job_token_scope_enabled boolean DEFAULT false NOT NULL, allow_top_level_group_owners_to_create_service_accounts boolean DEFAULT false NOT NULL, sign_in_restrictions jsonb DEFAULT '{}'::jsonb NOT NULL, - concurrent_relation_batch_export_limit smallint DEFAULT 25 NOT NULL, CONSTRAINT app_settings_container_reg_cleanup_tags_max_list_size_positive CHECK ((container_registry_cleanup_tags_service_max_list_size >= 0)), CONSTRAINT app_settings_dep_proxy_ttl_policies_worker_capacity_positive CHECK ((dependency_proxy_ttl_group_policy_worker_capacity >= 0)), CONSTRAINT app_settings_ext_pipeline_validation_service_url_text_limit CHECK ((char_length(external_pipeline_validation_service_url) <= 255)), diff --git a/doc/api/settings.md b/doc/api/settings.md index 3458bf224f3c87..0b13f7e70a1a69 100644 --- a/doc/api/settings.md +++ b/doc/api/settings.md @@ -701,7 +701,7 @@ listed in the descriptions of the relevant settings. | `whats_new_variant` | string | no | What's new variant, possible values: `all_tiers`, `current_tier`, and `disabled`. | | `wiki_page_max_content_bytes` | integer | no | Maximum wiki page content size in **bytes**. Default: 52428800 Bytes (50 MB). The minimum value is 1024 bytes. | | `bulk_import_concurrent_pipeline_batch_limit` | integer | no | Maximum simultaneous Direct Transfer batches to process. | -| `concurrent_relation_batch_export_limit` | integer | no | Maximum simultaneous relation exports to process. | +| `concurrent_relation_batch_export_limit` | integer | no | Maximum simultaneous relation exports to process. | | `asciidoc_max_includes` | integer | no | Maximum limit of AsciiDoc include directives being processed in any one document. Default: 32. Maximum: 64. | | `duo_features_enabled` | boolean | no | Indicates whether GitLab Duo features are enabled for this instance. [Introduced](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/144931) in GitLab 16.10. Self-managed, Premium and Ultimate only. | | `lock_duo_features_enabled` | boolean | no | Indicates whether the GitLab Duo features enabled setting is enforced for all subgroups. [Introduced](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/144931) in GitLab 16.10. Self-managed, Premium and Ultimate only. | diff --git a/spec/workers/bulk_imports/relation_batch_export_worker_spec.rb b/spec/workers/bulk_imports/relation_batch_export_worker_spec.rb index 7d5fe5b0da0552..7c1f4c249aba95 100644 --- a/spec/workers/bulk_imports/relation_batch_export_worker_spec.rb +++ b/spec/workers/bulk_imports/relation_batch_export_worker_spec.rb @@ -29,7 +29,7 @@ let_it_be(:existing_export) { create(:bulk_import_export_batch, :started) } before do - allow(::Gitlab::CurrentSettings).to receive(:concurrent_relation_batch_export_limit).and_return(1) + stub_application_setting(concurrent_relation_batch_export_limit: 1) end it 'does not start the export and schedules it for later' do -- GitLab From 85c880e2289f3132f0dd8fcece11fbae672f3c5b Mon Sep 17 00:00:00 2001 From: Keeyan Nejad Date: Tue, 22 Oct 2024 17:14:58 +0100 Subject: [PATCH 07/17] Update docs for batch export limit --- ...concurrent_relation_batch_export_limit.yml | 12 ----------- .../concurrent_relation_batch_export_limit.md | 21 +++++++++++++++++++ 2 files changed, 21 insertions(+), 12 deletions(-) delete mode 100644 config/application_setting_columns/concurrent_relation_batch_export_limit.yml create mode 100644 doc/administration/settings/concurrent_relation_batch_export_limit.md diff --git a/config/application_setting_columns/concurrent_relation_batch_export_limit.yml b/config/application_setting_columns/concurrent_relation_batch_export_limit.yml deleted file mode 100644 index 69cf8ec4520365..00000000000000 --- a/config/application_setting_columns/concurrent_relation_batch_export_limit.yml +++ /dev/null @@ -1,12 +0,0 @@ ---- -api_type: integer -attr: concurrent_relation_batch_export_limit -clusterwide: false -column: concurrent_relation_batch_export_limit -db_type: smallint -default: '25' -description: Maximum simultaneous relation exports to process. -encrypted: false -gitlab_com_different_than_default: false -jihu: false -not_null: true diff --git a/doc/administration/settings/concurrent_relation_batch_export_limit.md b/doc/administration/settings/concurrent_relation_batch_export_limit.md new file mode 100644 index 00000000000000..35b59e17bd067c --- /dev/null +++ b/doc/administration/settings/concurrent_relation_batch_export_limit.md @@ -0,0 +1,21 @@ +--- +stage: Foundations +group: Import and Integrate +info: To determine the technical writer assigned to the Stage/Group associated with this page, see https://handbook.gitlab.com/handbook/product/ux/technical-writing/#assignments +--- + +# Limit number of concurrent batch export workers + +DETAILS: +**Tier:** Free, Premium, Ultimate +**Offering:** Self-managed + +> - [Introduced](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/169122) for self-managed in GitLab 17.6. + +Exports for Direct Transfer can consume a lot of resources, to prevent exhausting either the database or the Sidekiq processes, administrators can configure the `concurrent_relation_batch_export_limit` setting. + +The default limit of 8 should be appropriate for [the 2k user reference architecture](../../administration/reference_architectures/2k_users.md). + +If you are experiencing `ActiveRecord::QueryCanceled PG::QueryCanceled: ERROR: canceling statement due to statement timeout` errors, or jobs getting interrupted due to Sidekiq Memory Killer restarting multiple times relating to the batch exports, you may want to consider lowering this value. + +On the other hand, if you have more resources available, you can increase this value to allow more concurrent export jobs to process. -- GitLab From dde953e6729feda8f8d6f9190caf10806bdb933b Mon Sep 17 00:00:00 2001 From: Keeyan Nejad Date: Tue, 22 Oct 2024 19:29:59 +0100 Subject: [PATCH 08/17] Set a higher limit for batch export limit on .com The limit of 8 is better suited for self hosted instances. As we haven't seen any errors on .com we can put an arbitrarily high value. --- ..._concurrent_relation_batch_export_limit.rb | 27 ++++++++++ db/schema_migrations/20241022175028 | 1 + ...urrent_relation_batch_export_limit_spec.rb | 53 +++++++++++++++++++ 3 files changed, 81 insertions(+) create mode 100644 db/migrate/20241022175028_increase_gitlab_com_concurrent_relation_batch_export_limit.rb create mode 100644 db/schema_migrations/20241022175028 create mode 100644 spec/migrations/increase_gitlab_com_concurrent_relation_batch_export_limit_spec.rb diff --git a/db/migrate/20241022175028_increase_gitlab_com_concurrent_relation_batch_export_limit.rb b/db/migrate/20241022175028_increase_gitlab_com_concurrent_relation_batch_export_limit.rb new file mode 100644 index 00000000000000..9d602aa62b2e5b --- /dev/null +++ b/db/migrate/20241022175028_increase_gitlab_com_concurrent_relation_batch_export_limit.rb @@ -0,0 +1,27 @@ +# frozen_string_literal: true + +class IncreaseGitlabComConcurrentRelationBatchExportLimit < Gitlab::Database::Migration[2.2] + milestone '17.6' + + restrict_gitlab_migration gitlab_schema: :gitlab_main + + class ApplicationSetting < MigrationRecord; end + + def up + return unless Gitlab.com? + + application_setting = ApplicationSetting.last + + application_setting.rate_limits['concurrent_relation_batch_export_limit'] = 10_000 + application_setting.save! + end + + def down + return unless Gitlab.com? + + application_setting = ApplicationSetting.last + + application_setting.rate_limits['concurrent_relation_batch_export_limit'] = 8 + application_setting.save! + end +end diff --git a/db/schema_migrations/20241022175028 b/db/schema_migrations/20241022175028 new file mode 100644 index 00000000000000..0c4f88b815457d --- /dev/null +++ b/db/schema_migrations/20241022175028 @@ -0,0 +1 @@ +bf1cc7f9d4dbc3f83919c59db50e1f8a2e01eaa9e8d52511e8f8302ba5374e38 \ No newline at end of file diff --git a/spec/migrations/increase_gitlab_com_concurrent_relation_batch_export_limit_spec.rb b/spec/migrations/increase_gitlab_com_concurrent_relation_batch_export_limit_spec.rb new file mode 100644 index 00000000000000..e283f77d10d801 --- /dev/null +++ b/spec/migrations/increase_gitlab_com_concurrent_relation_batch_export_limit_spec.rb @@ -0,0 +1,53 @@ +# frozen_string_literal: true + +require 'spec_helper' + +require_migration! + +RSpec.describe IncreaseGitlabComConcurrentRelationBatchExportLimit, feature_category: :database do + let(:application_settings_table) { table(:application_settings) } + + let!(:application_settings) do + application_settings_table.create!(rate_limits: { 'concurrent_relation_batch_export_limit' => 8 }) + end + + context 'when Gitlab.com? is false' do + before do + allow(Gitlab).to receive(:com?).and_return(false) + end + + it 'does not change the rate limit concurrent_relation_batch_export_limit' do + disable_migrations_output do + reversible_migration do |migration| + migration.before -> { + expect(application_settings.reload.rate_limits['concurrent_relation_batch_export_limit']).to eq(8) + } + + migration.after -> { + expect(application_settings.reload.rate_limits['concurrent_relation_batch_export_limit']).to eq(8) + } + end + end + end + end + + context 'when Gitlab.com? is true' do + before do + allow(Gitlab).to receive(:com?).and_return(true) + end + + it 'sets the rate_limits concurrent_relation_batch_export_limit to 10k' do + disable_migrations_output do + reversible_migration do |migration| + migration.before -> { + expect(application_settings.reload.rate_limits['concurrent_relation_batch_export_limit']).to eq(8) + } + + migration.after -> { + expect(application_settings.reload.rate_limits['concurrent_relation_batch_export_limit']).to eq(10_000) + } + end + end + end + end +end -- GitLab From ed52b5ba7a25bf04b182e8c6b676c93635db3192 Mon Sep 17 00:00:00 2001 From: Keeyan Nejad Date: Wed, 23 Oct 2024 17:32:54 +0100 Subject: [PATCH 09/17] Update migration to delete export limit on down Also return early if there are no application settings --- ..._concurrent_relation_batch_export_limit.rb | 4 +++- ...urrent_relation_batch_export_limit_spec.rb | 20 +++++++++++++------ 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/db/migrate/20241022175028_increase_gitlab_com_concurrent_relation_batch_export_limit.rb b/db/migrate/20241022175028_increase_gitlab_com_concurrent_relation_batch_export_limit.rb index 9d602aa62b2e5b..dc544dfccf5aff 100644 --- a/db/migrate/20241022175028_increase_gitlab_com_concurrent_relation_batch_export_limit.rb +++ b/db/migrate/20241022175028_increase_gitlab_com_concurrent_relation_batch_export_limit.rb @@ -11,6 +11,7 @@ def up return unless Gitlab.com? application_setting = ApplicationSetting.last + return if application_setting.nil? application_setting.rate_limits['concurrent_relation_batch_export_limit'] = 10_000 application_setting.save! @@ -20,8 +21,9 @@ def down return unless Gitlab.com? application_setting = ApplicationSetting.last + return if application_setting.nil? - application_setting.rate_limits['concurrent_relation_batch_export_limit'] = 8 + application_setting.rate_limits.delete('concurrent_relation_batch_export_limit') application_setting.save! end end diff --git a/spec/migrations/increase_gitlab_com_concurrent_relation_batch_export_limit_spec.rb b/spec/migrations/increase_gitlab_com_concurrent_relation_batch_export_limit_spec.rb index e283f77d10d801..99d3ef35be9707 100644 --- a/spec/migrations/increase_gitlab_com_concurrent_relation_batch_export_limit_spec.rb +++ b/spec/migrations/increase_gitlab_com_concurrent_relation_batch_export_limit_spec.rb @@ -7,9 +7,7 @@ RSpec.describe IncreaseGitlabComConcurrentRelationBatchExportLimit, feature_category: :database do let(:application_settings_table) { table(:application_settings) } - let!(:application_settings) do - application_settings_table.create!(rate_limits: { 'concurrent_relation_batch_export_limit' => 8 }) - end + let!(:application_settings) { application_settings_table.create! } context 'when Gitlab.com? is false' do before do @@ -20,11 +18,11 @@ disable_migrations_output do reversible_migration do |migration| migration.before -> { - expect(application_settings.reload.rate_limits['concurrent_relation_batch_export_limit']).to eq(8) + expect(application_settings.reload.rate_limits).not_to have_key('concurrent_relation_batch_export_limit') } migration.after -> { - expect(application_settings.reload.rate_limits['concurrent_relation_batch_export_limit']).to eq(8) + expect(application_settings.reload.rate_limits).not_to have_key('concurrent_relation_batch_export_limit') } end end @@ -40,7 +38,7 @@ disable_migrations_output do reversible_migration do |migration| migration.before -> { - expect(application_settings.reload.rate_limits['concurrent_relation_batch_export_limit']).to eq(8) + expect(application_settings.reload.rate_limits).not_to have_key('concurrent_relation_batch_export_limit') } migration.after -> { @@ -49,5 +47,15 @@ end end end + + context 'when there is no application setting' do + let!(:application_settings) { nil } + + it 'does not fail' do + disable_migrations_output do + expect { migrate! }.not_to raise_exception + end + end + end end end -- GitLab From cd8ded33c119de3f44f10226cc225ca15cffb913 Mon Sep 17 00:00:00 2001 From: Keeyan Nejad Date: Mon, 28 Oct 2024 14:57:43 +0000 Subject: [PATCH 10/17] Fix Redis expire to pass int instead of datetime --- app/workers/bulk_imports/relation_batch_export_worker.rb | 2 +- .../bulk_imports/relation_batch_export_worker_spec.rb | 9 ++++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/app/workers/bulk_imports/relation_batch_export_worker.rb b/app/workers/bulk_imports/relation_batch_export_worker.rb index 1411ee506cae8d..501f30ae991afb 100644 --- a/app/workers/bulk_imports/relation_batch_export_worker.rb +++ b/app/workers/bulk_imports/relation_batch_export_worker.rb @@ -51,7 +51,7 @@ def re_enqueue_job(user, batch) def reset_cache_timeout(batch) cache_service = BulkImports::BatchedRelationExportService cache_key = cache_service.cache_key(batch.export_id, batch.id) - Gitlab::Cache::Import::Caching.expire(cache_key, cache_service::CACHE_DURATION.from_now) + Gitlab::Cache::Import::Caching.expire(cache_key, cache_service::CACHE_DURATION.to_i) end end end diff --git a/spec/workers/bulk_imports/relation_batch_export_worker_spec.rb b/spec/workers/bulk_imports/relation_batch_export_worker_spec.rb index 7c1f4c249aba95..9dcab862b2d2fb 100644 --- a/spec/workers/bulk_imports/relation_batch_export_worker_spec.rb +++ b/spec/workers/bulk_imports/relation_batch_export_worker_spec.rb @@ -42,16 +42,15 @@ it 'resets the expiration date for the cache key' do cache_key = BulkImports::BatchedRelationExportService.cache_key(batch.export_id, batch.id) - Gitlab::Cache::Import::Caching.write(cache_key, "test", timeout: 1.hour.from_now) + Gitlab::Cache::Import::Caching.write(cache_key, "test", timeout: 1.hour.to_i) perform - expires_at = Gitlab::Cache::Import::Caching.with_redis do |redis| - ttl = redis.ttl(Gitlab::Cache::Import::Caching.cache_key_for(cache_key)) - Time.zone.at(ttl) + expires_in_seconds = Gitlab::Cache::Import::Caching.with_redis do |redis| + redis.ttl(Gitlab::Cache::Import::Caching.cache_key_for(cache_key)) end - expect(expires_at).to be_within(1.minute).of(BulkImports::BatchedRelationExportService::CACHE_DURATION.from_now) + expect(expires_in_seconds).to be_within(10).of(BulkImports::BatchedRelationExportService::CACHE_DURATION.to_i) end context 'when the export batch started longer ago than the timeout time' do -- GitLab From efe81ffdb4599650ec6dd9aeb8fb3c8cb1e7a5ad Mon Sep 17 00:00:00 2001 From: Keeyan Nejad Date: Mon, 28 Oct 2024 15:43:54 +0000 Subject: [PATCH 11/17] Log objects_count and batch number --- app/workers/bulk_imports/relation_batch_export_worker.rb | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/app/workers/bulk_imports/relation_batch_export_worker.rb b/app/workers/bulk_imports/relation_batch_export_worker.rb index 501f30ae991afb..6e014c22f8c25c 100644 --- a/app/workers/bulk_imports/relation_batch_export_worker.rb +++ b/app/workers/bulk_imports/relation_batch_export_worker.rb @@ -29,9 +29,11 @@ def perform(user_id, batch_id) return re_enqueue_job(@user, @batch) if max_exports_already_running? log_extra_metadata_on_done(:relation, @batch.export.relation) - log_extra_metadata_on_done(:objects_count, @batch.objects_count) + log_extra_metadata_on_done(:relation, @batch.batch_number) RelationBatchExportService.new(@user, @batch).execute + + log_extra_metadata_on_done(:objects_count, @batch.objects_count) end def max_exports_already_running? -- GitLab From dd4e8b0396722c6104ccb70e771b1827f2eb3422 Mon Sep 17 00:00:00 2001 From: Keeyan Nejad Date: Mon, 28 Oct 2024 15:44:36 +0000 Subject: [PATCH 12/17] Log if job was re enqueued --- app/workers/bulk_imports/relation_batch_export_worker.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/app/workers/bulk_imports/relation_batch_export_worker.rb b/app/workers/bulk_imports/relation_batch_export_worker.rb index 6e014c22f8c25c..95e9f444874a2b 100644 --- a/app/workers/bulk_imports/relation_batch_export_worker.rb +++ b/app/workers/bulk_imports/relation_batch_export_worker.rb @@ -46,6 +46,7 @@ def max_exports_already_running? def re_enqueue_job(user, batch) reset_cache_timeout(batch) + log_extra_metadata_on_done(:re_enqueue, true) self.class.perform_in(PERFORM_DELAY, user.id, batch.id) end -- GitLab From 86c65da219a558d2e14f484a40586d899ce711a9 Mon Sep 17 00:00:00 2001 From: Keeyan Nejad Date: Tue, 29 Oct 2024 11:33:57 +0000 Subject: [PATCH 13/17] Reenqueue finish worker on created state --- app/models/bulk_imports/export_batch.rb | 3 +++ .../finish_batched_relation_export_worker.rb | 2 +- spec/models/bulk_imports/export_batch_spec.rb | 13 ++++++++++++ ...ish_batched_relation_export_worker_spec.rb | 20 ++++++++++++++++--- 4 files changed, 34 insertions(+), 4 deletions(-) diff --git a/app/models/bulk_imports/export_batch.rb b/app/models/bulk_imports/export_batch.rb index b4cac733262029..79e0970836fc3a 100644 --- a/app/models/bulk_imports/export_batch.rb +++ b/app/models/bulk_imports/export_batch.rb @@ -6,6 +6,7 @@ class ExportBatch < ApplicationRecord BATCH_SIZE = 1000 TIMEOUT_AFTER_START = 1.hour + IN_PROGRESS_STATES = %i[created started].freeze scope :started_and_not_timed_out, -> { with_status(:started).where(updated_at: TIMEOUT_AFTER_START.ago...) } @@ -14,6 +15,8 @@ class ExportBatch < ApplicationRecord validates :batch_number, presence: true, uniqueness: { scope: :export_id } + scope :in_progress, -> { with_status(IN_PROGRESS_STATES) } + state_machine :status, initial: :created do state :created, value: 0 state :started, value: 2 diff --git a/app/workers/bulk_imports/finish_batched_relation_export_worker.rb b/app/workers/bulk_imports/finish_batched_relation_export_worker.rb index 5e2777673d8d9d..f5c5da0d9c4e28 100644 --- a/app/workers/bulk_imports/finish_batched_relation_export_worker.rb +++ b/app/workers/bulk_imports/finish_batched_relation_export_worker.rb @@ -42,7 +42,7 @@ def export_timeout? end def export_in_progress? - export.batches.any?(&:started?) + export.batches.in_progress.any? end def finish_export! diff --git a/spec/models/bulk_imports/export_batch_spec.rb b/spec/models/bulk_imports/export_batch_spec.rb index ff1c1c3c2e03d8..f90745c01d875c 100644 --- a/spec/models/bulk_imports/export_batch_spec.rb +++ b/spec/models/bulk_imports/export_batch_spec.rb @@ -15,6 +15,19 @@ it { is_expected.to validate_uniqueness_of(:batch_number).scoped_to(:export_id) } end + describe 'scopes' do + describe '.in_progress' do + it 'returns only batches that are in progress' do + created = create(:bulk_import_export_batch, :created) + started = create(:bulk_import_export_batch, :started) + create(:bulk_import_export_batch, :finished) + create(:bulk_import_export_batch, :failed) + + expect(described_class.in_progress).to contain_exactly(created, started) + end + end + end + describe '.started_and_not_timed_out' do subject(:started_and_not_timed_out) { described_class.started_and_not_timed_out } diff --git a/spec/workers/bulk_imports/finish_batched_relation_export_worker_spec.rb b/spec/workers/bulk_imports/finish_batched_relation_export_worker_spec.rb index 82416ada952c09..c6649d7db42bd0 100644 --- a/spec/workers/bulk_imports/finish_batched_relation_export_worker_spec.rb +++ b/spec/workers/bulk_imports/finish_batched_relation_export_worker_spec.rb @@ -40,10 +40,8 @@ end end - context 'when export is in progress' do + shared_examples 'reenqueues itself' do it 'reenqueues itself' do - create(:bulk_import_export_batch, :started, export: export) - expect(described_class).to receive(:perform_in).twice.with(described_class::REENQUEUE_DELAY, export.id) perform_multiple(job_args) @@ -52,6 +50,22 @@ end end + context 'when export has started' do + before do + create(:bulk_import_export_batch, :started, export: export) + end + + it_behaves_like 'reenqueues itself' + end + + context 'when export has been created' do + before do + create(:bulk_import_export_batch, :created, export: export) + end + + it_behaves_like 'reenqueues itself' + end + context 'when export timed out' do it 'marks export as failed' do expect(export.reload.failed?).to eq(false) -- GitLab From 2d70c823bb5e8aee86433c7b14a79b767ef7ccbf Mon Sep 17 00:00:00 2001 From: Keeyan Nejad Date: Tue, 29 Oct 2024 14:50:44 +0000 Subject: [PATCH 14/17] Move batch export limit docs to import_and_integrate_settings --- .../concurrent_relation_batch_export_limit.md | 21 ------------------- .../settings/import_and_export_settings.md | 19 +++++++++++++++++ 2 files changed, 19 insertions(+), 21 deletions(-) delete mode 100644 doc/administration/settings/concurrent_relation_batch_export_limit.md diff --git a/doc/administration/settings/concurrent_relation_batch_export_limit.md b/doc/administration/settings/concurrent_relation_batch_export_limit.md deleted file mode 100644 index 35b59e17bd067c..00000000000000 --- a/doc/administration/settings/concurrent_relation_batch_export_limit.md +++ /dev/null @@ -1,21 +0,0 @@ ---- -stage: Foundations -group: Import and Integrate -info: To determine the technical writer assigned to the Stage/Group associated with this page, see https://handbook.gitlab.com/handbook/product/ux/technical-writing/#assignments ---- - -# Limit number of concurrent batch export workers - -DETAILS: -**Tier:** Free, Premium, Ultimate -**Offering:** Self-managed - -> - [Introduced](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/169122) for self-managed in GitLab 17.6. - -Exports for Direct Transfer can consume a lot of resources, to prevent exhausting either the database or the Sidekiq processes, administrators can configure the `concurrent_relation_batch_export_limit` setting. - -The default limit of 8 should be appropriate for [the 2k user reference architecture](../../administration/reference_architectures/2k_users.md). - -If you are experiencing `ActiveRecord::QueryCanceled PG::QueryCanceled: ERROR: canceling statement due to statement timeout` errors, or jobs getting interrupted due to Sidekiq Memory Killer restarting multiple times relating to the batch exports, you may want to consider lowering this value. - -On the other hand, if you have more resources available, you can increase this value to allow more concurrent export jobs to process. diff --git a/doc/administration/settings/import_and_export_settings.md b/doc/administration/settings/import_and_export_settings.md index 43a1d4067a8603..bbacc17212615c 100644 --- a/doc/administration/settings/import_and_export_settings.md +++ b/doc/administration/settings/import_and_export_settings.md @@ -213,6 +213,25 @@ To modify this setting: 1. Expand **Import and export settings**. 1. Set another value for **Maximum number of simultaneous import jobs** for the desired importer. +## Maximum number of simultaneous batch export jobs + +> - [Introduced](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/169122) for self-managed in GitLab 17.6. + +Exports for Direct Transfer can consume a lot of resources, to prevent exhausting either the database or the Sidekiq processes, +administrators can configure the `concurrent_relation_batch_export_limit` setting. + +The default limit of 8 should be appropriate for [the 2k user reference architecture](../../administration/reference_architectures/2k_users.md). + +If you are experiencing `ActiveRecord::QueryCanceled PG::QueryCanceled: ERROR: canceling statement due to statement timeout` errors, +or jobs getting interrupted due to Sidekiq Memory Killer restarting multiple times relating to the batch exports, +you may want to consider lowering this value. + +On the other hand, if you have more resources available, you can increase this value to allow more concurrent export jobs to process. + +To modify this setting: + +1. Make an API request to `/api/v4/application/settings` as described in [Application settings API](../../api/settings.md) with the `concurrent_relation_batch_export_limit` key. + ## Troubleshooting ## Error: `Help page documentation base url is blocked: execution expired` -- GitLab From a67bba5b9a222522b67545290236e1386ea12e51 Mon Sep 17 00:00:00 2001 From: Keeyan Nejad Date: Wed, 30 Oct 2024 16:53:12 +0000 Subject: [PATCH 15/17] Apply various documentation changes --- .../settings/import_and_export_settings.md | 23 +++++++++---------- doc/api/settings.md | 2 +- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/doc/administration/settings/import_and_export_settings.md b/doc/administration/settings/import_and_export_settings.md index bbacc17212615c..8830d1d127f818 100644 --- a/doc/administration/settings/import_and_export_settings.md +++ b/doc/administration/settings/import_and_export_settings.md @@ -215,22 +215,21 @@ To modify this setting: ## Maximum number of simultaneous batch export jobs -> - [Introduced](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/169122) for self-managed in GitLab 17.6. +> - [Introduced](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/169122) in GitLab 17.6. -Exports for Direct Transfer can consume a lot of resources, to prevent exhausting either the database or the Sidekiq processes, +Direct transfer exports can consume a significant amount of resources. +To prevent using up the database or Sidekiq processes, administrators can configure the `concurrent_relation_batch_export_limit` setting. -The default limit of 8 should be appropriate for [the 2k user reference architecture](../../administration/reference_architectures/2k_users.md). +The default value is `8` jobs, which corresponds to a +[reference architecture for up to 40 RPS or 2,000 users](../../administration/reference_architectures/2k_users.md). +If you encounter `PG::QueryCanceled: ERROR: canceling statement due to statement timeout` errors +or jobs getting interrupted due to Sidekiq memory limits, you might want to reduce this number. +If you have enough resources, you can increase this number to process more concurrent export jobs. -If you are experiencing `ActiveRecord::QueryCanceled PG::QueryCanceled: ERROR: canceling statement due to statement timeout` errors, -or jobs getting interrupted due to Sidekiq Memory Killer restarting multiple times relating to the batch exports, -you may want to consider lowering this value. - -On the other hand, if you have more resources available, you can increase this value to allow more concurrent export jobs to process. - -To modify this setting: - -1. Make an API request to `/api/v4/application/settings` as described in [Application settings API](../../api/settings.md) with the `concurrent_relation_batch_export_limit` key. +To modify this setting, send an API request to `/api/v4/application/settings` +with `concurrent_relation_batch_export_limit`. +For more information, see [application settings API](../../api/settings.md). ## Troubleshooting diff --git a/doc/api/settings.md b/doc/api/settings.md index 0b13f7e70a1a69..42eb4a481785c5 100644 --- a/doc/api/settings.md +++ b/doc/api/settings.md @@ -701,7 +701,7 @@ listed in the descriptions of the relevant settings. | `whats_new_variant` | string | no | What's new variant, possible values: `all_tiers`, `current_tier`, and `disabled`. | | `wiki_page_max_content_bytes` | integer | no | Maximum wiki page content size in **bytes**. Default: 52428800 Bytes (50 MB). The minimum value is 1024 bytes. | | `bulk_import_concurrent_pipeline_batch_limit` | integer | no | Maximum simultaneous Direct Transfer batches to process. | -| `concurrent_relation_batch_export_limit` | integer | no | Maximum simultaneous relation exports to process. | +| `concurrent_relation_batch_export_limit` | integer | no | Maximum number of simultaneous bulk export jobs to process. [Introduced](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/169122) in GitLab 17.6. | | `asciidoc_max_includes` | integer | no | Maximum limit of AsciiDoc include directives being processed in any one document. Default: 32. Maximum: 64. | | `duo_features_enabled` | boolean | no | Indicates whether GitLab Duo features are enabled for this instance. [Introduced](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/144931) in GitLab 16.10. Self-managed, Premium and Ultimate only. | | `lock_duo_features_enabled` | boolean | no | Indicates whether the GitLab Duo features enabled setting is enforced for all subgroups. [Introduced](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/144931) in GitLab 16.10. Self-managed, Premium and Ultimate only. | -- GitLab From 89cf9be056ffd1ec62ac55590c53366150c77cd3 Mon Sep 17 00:00:00 2001 From: Keeyan Nejad Date: Wed, 30 Oct 2024 16:54:21 +0000 Subject: [PATCH 16/17] Fix logging batch number --- app/workers/bulk_imports/relation_batch_export_worker.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/workers/bulk_imports/relation_batch_export_worker.rb b/app/workers/bulk_imports/relation_batch_export_worker.rb index 95e9f444874a2b..d682a818b9ec45 100644 --- a/app/workers/bulk_imports/relation_batch_export_worker.rb +++ b/app/workers/bulk_imports/relation_batch_export_worker.rb @@ -29,7 +29,7 @@ def perform(user_id, batch_id) return re_enqueue_job(@user, @batch) if max_exports_already_running? log_extra_metadata_on_done(:relation, @batch.export.relation) - log_extra_metadata_on_done(:relation, @batch.batch_number) + log_extra_metadata_on_done(:batch_number, @batch.batch_number) RelationBatchExportService.new(@user, @batch).execute -- GitLab From 9992a950f024af75bb7cfb46d06a5924b1c7a8c9 Mon Sep 17 00:00:00 2001 From: Keeyan Nejad Date: Thu, 31 Oct 2024 09:47:00 +0000 Subject: [PATCH 17/17] Use batch phrasing in docs consistently --- .../json_schemas/application_setting_rate_limits.json | 2 +- doc/api/settings.md | 4 ++-- lib/api/settings.rb | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/app/validators/json_schemas/application_setting_rate_limits.json b/app/validators/json_schemas/application_setting_rate_limits.json index 3f563180b1a960..e97eee3185df3d 100644 --- a/app/validators/json_schemas/application_setting_rate_limits.json +++ b/app/validators/json_schemas/application_setting_rate_limits.json @@ -22,7 +22,7 @@ "concurrent_relation_batch_export_limit": { "type": "integer", "minimum": 1, - "description": "Maximum simultaneous relation exports to process" + "description": "Maximum number of simultaneous batch export jobs to process" }, "create_organization_api_limit": { "type": "integer", diff --git a/doc/api/settings.md b/doc/api/settings.md index 42eb4a481785c5..f0359c162bc5a6 100644 --- a/doc/api/settings.md +++ b/doc/api/settings.md @@ -700,8 +700,8 @@ listed in the descriptions of the relevant settings. | `valid_runner_registrars` | array of strings | no | List of types which are allowed to register a GitLab Runner. Can be `[]`, `['group']`, `['project']` or `['group', 'project']`. | | `whats_new_variant` | string | no | What's new variant, possible values: `all_tiers`, `current_tier`, and `disabled`. | | `wiki_page_max_content_bytes` | integer | no | Maximum wiki page content size in **bytes**. Default: 52428800 Bytes (50 MB). The minimum value is 1024 bytes. | -| `bulk_import_concurrent_pipeline_batch_limit` | integer | no | Maximum simultaneous Direct Transfer batches to process. | -| `concurrent_relation_batch_export_limit` | integer | no | Maximum number of simultaneous bulk export jobs to process. [Introduced](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/169122) in GitLab 17.6. | +| `bulk_import_concurrent_pipeline_batch_limit` | integer | no | Maximum simultaneous direct transfer batch exports to process. | +| `concurrent_relation_batch_export_limit` | integer | no | Maximum number of simultaneous batch export jobs to process. [Introduced](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/169122) in GitLab 17.6. | | `asciidoc_max_includes` | integer | no | Maximum limit of AsciiDoc include directives being processed in any one document. Default: 32. Maximum: 64. | | `duo_features_enabled` | boolean | no | Indicates whether GitLab Duo features are enabled for this instance. [Introduced](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/144931) in GitLab 16.10. Self-managed, Premium and Ultimate only. | | `lock_duo_features_enabled` | boolean | no | Indicates whether the GitLab Duo features enabled setting is enforced for all subgroups. [Introduced](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/144931) in GitLab 16.10. Self-managed, Premium and Ultimate only. | diff --git a/lib/api/settings.rb b/lib/api/settings.rb index ef9f4c980e3249..d3afe73d512fd7 100644 --- a/lib/api/settings.rb +++ b/lib/api/settings.rb @@ -214,8 +214,8 @@ def filter_attributes_using_license(attrs) optional :jira_connect_application_key, type: String, desc: "ID of the OAuth application used to authenticate with the GitLab for Jira Cloud app." optional :jira_connect_public_key_storage_enabled, type: Boolean, desc: 'Enable public key storage for the GitLab for Jira Cloud app.' optional :jira_connect_proxy_url, type: String, desc: "URL of the GitLab instance used as a proxy for the GitLab for Jira Cloud app." - optional :bulk_import_concurrent_pipeline_batch_limit, type: Integer, desc: 'Maximum simultaneous Direct Transfer pipeline batches to process' - optional :concurrent_relation_batch_export_limit, type: Integer, desc: 'Maximum simultaneous relation exports to process.' + optional :bulk_import_concurrent_pipeline_batch_limit, type: Integer, desc: 'Maximum simultaneous direct transfer batch exports to process.' + optional :concurrent_relation_batch_export_limit, type: Integer, desc: 'Maximum number of simultaneous batch export jobs to process.' optional :bulk_import_enabled, type: Boolean, desc: 'Enable migrating GitLab groups and projects by direct transfer' optional :bulk_import_max_download_file, type: Integer, desc: 'Maximum download file size in MB when importing from source GitLab instances by direct transfer' optional :concurrent_github_import_jobs_limit, type: Integer, desc: 'Github Importer maximum number of simultaneous import jobs' -- GitLab