From d42f5077357a1e4dda89d208b68025c970dde1e1 Mon Sep 17 00:00:00 2001 From: James Nutt Date: Mon, 16 Jun 2025 10:28:37 +0100 Subject: [PATCH 1/3] Make relation export batch size configurable as an application setting This MR makes the batch size used by batched relation export configurable via application setting. This application setting can be updated via the REST API, and applies consistently across all batches for an export. We cache the batch size for the duration of an export so that the cache size is consistent for a given export regardless of job restarts or settings changes. Changelog: added --- app/helpers/application_settings_helper.rb | 1 + app/models/application_setting.rb | 4 ++- .../application_setting_implementation.rb | 1 + .../batched_relation_export_service.rb | 29 +++++++++++++++++-- .../application_setting_importers.json | 3 ++ .../finish_batched_relation_export_worker.rb | 5 ++++ spec/models/application_setting_spec.rb | 2 ++ .../batched_relation_export_service_spec.rb | 8 ++++- ...ish_batched_relation_export_worker_spec.rb | 10 +++++-- 9 files changed, 55 insertions(+), 8 deletions(-) diff --git a/app/helpers/application_settings_helper.rb b/app/helpers/application_settings_helper.rb index 83083f66450f8d..5825a7d1052f51 100644 --- a/app/helpers/application_settings_helper.rb +++ b/app/helpers/application_settings_helper.rb @@ -570,6 +570,7 @@ def visible_attributes :can_create_organization, :bulk_import_concurrent_pipeline_batch_limit, :concurrent_relation_batch_export_limit, + :relation_export_batch_size, :bulk_import_enabled, :bulk_import_max_download_file_size, :silent_admin_exports_enabled, diff --git a/app/models/application_setting.rb b/app/models/application_setting.rb index f4bd488d531750..e9af4f924001b8 100644 --- a/app/models/application_setting.rb +++ b/app/models/application_setting.rb @@ -598,6 +598,7 @@ def self.kroki_formats_attributes :max_yaml_size_bytes, :namespace_aggregation_schedule_lease_duration_in_seconds, :project_jobs_api_rate_limit, + :relation_export_batch_size, :session_expire_delay, :snippet_size_limit, :throttle_authenticated_api_period_in_seconds, @@ -725,7 +726,8 @@ def self.kroki_formats_attributes jsonb_accessor :importers, silent_admin_exports_enabled: [:boolean, { default: false }], allow_contribution_mapping_to_admins: [:boolean, { default: false }], - allow_bypass_placeholder_confirmation: [:boolean, { default: false }] + allow_bypass_placeholder_confirmation: [:boolean, { default: false }], + relation_export_batch_size: [:integer, { default: 50 }] jsonb_accessor :sign_in_restrictions, disable_password_authentication_for_users_with_sso_identities: [:boolean, { default: false }], diff --git a/app/models/application_setting_implementation.rb b/app/models/application_setting_implementation.rb index b955ff223730b3..10cf1150092f5f 100644 --- a/app/models/application_setting_implementation.rb +++ b/app/models/application_setting_implementation.rb @@ -180,6 +180,7 @@ def defaults # rubocop:disable Metrics/AbcSize raw_blob_request_limit: 300, recaptcha_enabled: false, receptive_cluster_agents_enabled: false, + relation_export_batch_size: 50, repository_checks_enabled: true, repository_storages_weighted: { 'default' => 100 }, require_admin_approval_after_user_signup: true, diff --git a/app/services/bulk_imports/batched_relation_export_service.rb b/app/services/bulk_imports/batched_relation_export_service.rb index 16cff6df2b9b7c..924063ad8e1b1e 100644 --- a/app/services/bulk_imports/batched_relation_export_service.rb +++ b/app/services/bulk_imports/batched_relation_export_service.rb @@ -4,14 +4,18 @@ module BulkImports class BatchedRelationExportService include Gitlab::Utils::StrongMemoize - BATCH_SIZE = 1000 BATCH_CACHE_KEY = 'bulk_imports/batched_relation_export/%{export_id}/%{batch_id}' + BATCH_SIZE_CACHE_KEY = 'bulk_imports/batched_relation_export/%{export_id}/batch_size' CACHE_DURATION = 4.hours def self.cache_key(export_id, batch_id) Kernel.format(BATCH_CACHE_KEY, export_id: export_id, batch_id: batch_id) end + def self.batch_size_cache_key(export_id) + Kernel.format(BATCH_SIZE_CACHE_KEY, export_id: export_id) + end + def initialize(user, portable, relation, jid) @user = user @portable = portable @@ -34,6 +38,25 @@ def execute attr_reader :user, :portable, :relation, :jid, :config, :resolved_relation + # Returns the batch size for processing relation exports. + # + # The batch size determines how many records are processed together in each batch + # during the export operation. We cache the batch size so that any retried workers + # for the same relation export use the same batch size. + # + # @return [Integer] The number of records to process per batch + def batch_size + key = self.class.batch_size_cache_key(export.id) + + Gitlab::Cache::Import::Caching.read_integer(key) || + Gitlab::Cache::Import::Caching.write( + key, + Gitlab::CurrentSettings.relation_export_batch_size, + timeout: CACHE_DURATION + ) + end + strong_memoize_attr :batch_size + def export # rubocop:disable Performance/ActiveRecordSubtransactionMethods -- This is only executed from within a worker @export ||= portable.bulk_import_exports.safe_find_or_create_by!(relation: relation, user: user) @@ -45,7 +68,7 @@ def objects_count end def batches_count - objects_count.fdiv(BATCH_SIZE).ceil + objects_count.fdiv(batch_size).ceil end def start_export! @@ -72,7 +95,7 @@ def update_export!(event) def enqueue_batch_exports batch_number = 0 - resolved_relation.in_batches(of: BATCH_SIZE) do |batch| + resolved_relation.in_batches(of: batch_size) do |batch| batch_number += 1 batch_id = find_or_create_batch(batch_number).id diff --git a/app/validators/json_schemas/application_setting_importers.json b/app/validators/json_schemas/application_setting_importers.json index d0d4f06adb132c..0969f91e77a0f4 100644 --- a/app/validators/json_schemas/application_setting_importers.json +++ b/app/validators/json_schemas/application_setting_importers.json @@ -11,6 +11,9 @@ }, "allow_bypass_placeholder_confirmation": { "type": "boolean" + }, + "relation_export_batch_size": { + "type": "integer" } }, "additionalProperties": false diff --git a/app/workers/bulk_imports/finish_batched_relation_export_worker.rb b/app/workers/bulk_imports/finish_batched_relation_export_worker.rb index 54fcaeb791d02e..628f216b3c1779 100644 --- a/app/workers/bulk_imports/finish_batched_relation_export_worker.rb +++ b/app/workers/bulk_imports/finish_batched_relation_export_worker.rb @@ -52,6 +52,11 @@ def finish_export! end def expire_cache! + Gitlab::Cache::Import::Caching.expire( + BulkImports::BatchedRelationExportService.batch_size_cache_key(export.id), + 0 + ) + export.batches.each do |batch| key = BulkImports::BatchedRelationExportService.cache_key(export.id, batch.id) diff --git a/spec/models/application_setting_spec.rb b/spec/models/application_setting_spec.rb index 69f95999b46b9d..c3c1369772f755 100644 --- a/spec/models/application_setting_spec.rb +++ b/spec/models/application_setting_spec.rb @@ -213,6 +213,7 @@ recaptcha_enabled: false, reindexing_minimum_index_size: 1.gigabyte, reindexing_minimum_relative_bloat_size: 0.2, + relation_export_batch_size: 50, remember_me_enabled: true, repository_checks_enabled: true, repository_storages_weighted: { 'default' => 100 }, @@ -597,6 +598,7 @@ def many_usernames(num = 100) max_yaml_size_bytes namespace_aggregation_schedule_lease_duration_in_seconds project_jobs_api_rate_limit + relation_export_batch_size session_expire_delay snippet_size_limit throttle_authenticated_api_period_in_seconds diff --git a/spec/services/bulk_imports/batched_relation_export_service_spec.rb b/spec/services/bulk_imports/batched_relation_export_service_spec.rb index cb356b90c61375..37bff06c3622b7 100644 --- a/spec/services/bulk_imports/batched_relation_export_service_spec.rb +++ b/spec/services/bulk_imports/batched_relation_export_service_spec.rb @@ -46,7 +46,7 @@ context 'when there are multiple batches' do it 'creates a batch record for each batch of records' do - stub_const("#{described_class.name}::BATCH_SIZE", 1) + stub_application_setting(relation_export_batch_size: 1) create_list(:group_label, 10, group: portable) @@ -88,4 +88,10 @@ expect(described_class.cache_key(1, 1)).to eq('bulk_imports/batched_relation_export/1/1') end end + + describe '.batch_size_cache_key' do + it 'returns the cache key for the export batch size' do + expect(described_class.batch_size_cache_key(1)).to eq('bulk_imports/batched_relation_export/1/batch_size') + end + end end diff --git a/spec/workers/bulk_imports/finish_batched_relation_export_worker_spec.rb b/spec/workers/bulk_imports/finish_batched_relation_export_worker_spec.rb index c6649d7db42bd0..88474e865c848e 100644 --- a/spec/workers/bulk_imports/finish_batched_relation_export_worker_spec.rb +++ b/spec/workers/bulk_imports/finish_batched_relation_export_worker_spec.rb @@ -10,10 +10,14 @@ describe '#perform' do it_behaves_like 'an idempotent worker' do - it 'marks export as finished and expires batches cache' do - cache_key = BulkImports::BatchedRelationExportService.cache_key(export.id, batch.id) + it 'marks export as finished and expires batches cache', :aggregate_failures do + allow(Gitlab::Cache::Import::Caching).to receive(:expire) - expect(Gitlab::Cache::Import::Caching).to receive(:expire).with(cache_key, 0) + batch_cache_key = BulkImports::BatchedRelationExportService.cache_key(export.id, batch.id) + expect(Gitlab::Cache::Import::Caching).to receive(:expire).with(batch_cache_key, 0) + + batch_size_cache_key = BulkImports::BatchedRelationExportService.batch_size_cache_key(export.id) + expect(Gitlab::Cache::Import::Caching).to receive(:expire).with(batch_size_cache_key, 0) perform_multiple(job_args) -- GitLab From 97309864613454b40b362ab64aee0cf57e1e832f Mon Sep 17 00:00:00 2001 From: James Nutt Date: Mon, 16 Jun 2025 15:31:03 +0100 Subject: [PATCH 2/3] Documentation --- .../settings/import_and_export_settings.md | 13 +++++++++++++ doc/api/settings.md | 3 +++ 2 files changed, 16 insertions(+) diff --git a/doc/administration/settings/import_and_export_settings.md b/doc/administration/settings/import_and_export_settings.md index 9da46f2e7ebc59..5684d5484bc026 100644 --- a/doc/administration/settings/import_and_export_settings.md +++ b/doc/administration/settings/import_and_export_settings.md @@ -319,6 +319,19 @@ To modify this setting, send an API request to `/api/v4/application/settings` with `concurrent_relation_batch_export_limit`. For more information, see [application settings API](../../api/settings.md). +### Export batch size + +{{< history >}} + +- [Introduced](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/194607) in GitLab 18.2. + +{{< /history >}} + +To further manage memory usage and database load, use the `relation_export_batch_size` setting to control the number of records processed in each batch during export operations. + +The default value is `50` records per batch. To modify this setting, send an API request to `/api/v4/application/settings` with `relation_export_batch_size`. +For more information, see [application settings API](../../api/settings.md). + ## Troubleshooting ## Error: `Help page documentation base url is blocked: execution expired` diff --git a/doc/api/settings.md b/doc/api/settings.md index 6e0dfe7d086797..a870dc7fb72a98 100644 --- a/doc/api/settings.md +++ b/doc/api/settings.md @@ -162,6 +162,7 @@ Example response: "security_txt_content": null, "bulk_import_concurrent_pipeline_batch_limit": 25, "concurrent_relation_batch_export_limit": 25, + "relation_export_batch_size": 50, "concurrent_github_import_jobs_limit": 1000, "concurrent_bitbucket_import_jobs_limit": 100, "concurrent_bitbucket_server_import_jobs_limit": 100, @@ -362,6 +363,7 @@ Example response: "security_txt_content": null, "bulk_import_concurrent_pipeline_batch_limit": 25, "concurrent_relation_batch_export_limit": 25, + "relation_export_batch_size": 50, "downstream_pipeline_trigger_limit_per_project_user_sha": 0, "concurrent_github_import_jobs_limit": 1000, "concurrent_bitbucket_import_jobs_limit": 100, @@ -690,6 +692,7 @@ to configure other related settings. These requirements are | `recaptcha_site_key` | string | required by: `recaptcha_enabled` | Site key for reCAPTCHA. | | `receptive_cluster_agents_enabled` | boolean | no | Enable receptive mode for GitLab Agents for Kubernetes. | | `receive_max_input_size` | integer | no | Maximum push size (MB). | +| `relation_export_batch_size` | integer | no | The size of each batch when exporting batched relations. [Introduced](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/194607) in GitLab 18.2. | | `remember_me_enabled` | boolean | no | Enable [**Remember me** setting](../administration/settings/account_and_limit_settings.md#configure-the-remember-me-option). [Introduced](https://gitlab.com/gitlab-org/gitlab/-/issues/369133) in GitLab 16.0. | | `repository_checks_enabled` | boolean | no | GitLab periodically runs `git fsck` in all project and wiki repositories to look for silent disk corruption issues. | | `repository_size_limit` | integer | no | Size limit per repository (MB). Premium and Ultimate only. | -- GitLab From 5e3a3ac11c39a27a32ea0fdab497458cfdf5b427 Mon Sep 17 00:00:00 2001 From: James Nutt Date: Tue, 17 Jun 2025 10:30:17 +0100 Subject: [PATCH 3/3] Add a spec for the batch size cache --- .../batched_relation_export_service_spec.rb | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/spec/services/bulk_imports/batched_relation_export_service_spec.rb b/spec/services/bulk_imports/batched_relation_export_service_spec.rb index 37bff06c3622b7..9d2f12b581c263 100644 --- a/spec/services/bulk_imports/batched_relation_export_service_spec.rb +++ b/spec/services/bulk_imports/batched_relation_export_service_spec.rb @@ -45,13 +45,27 @@ end context 'when there are multiple batches' do - it 'creates a batch record for each batch of records' do + before do stub_application_setting(relation_export_batch_size: 1) - create_list(:group_label, 10, group: portable) + end + + it 'creates a batch record for each batch of records' do + service.execute + + export = portable.bulk_import_exports.first + + expect(export.batches.count).to eq(11) + end + it 'caches the batch size for the export' do + # Execute once to set the cache service.execute + # Run a new instance of the export service for the same relation with + # a different batch size + stub_application_setting(relation_export_batch_size: 2) + described_class.new(user, portable, relation, jid).execute export = portable.bulk_import_exports.first expect(export.batches.count).to eq(11) -- GitLab