diff --git a/app/helpers/application_settings_helper.rb b/app/helpers/application_settings_helper.rb index 83083f66450f8d0e40a42d4ea15fbf46654f37f0..5825a7d1052f51fe9401181f87d2fbfb42a10046 100644 --- a/app/helpers/application_settings_helper.rb +++ b/app/helpers/application_settings_helper.rb @@ -570,6 +570,7 @@ def visible_attributes :can_create_organization, :bulk_import_concurrent_pipeline_batch_limit, :concurrent_relation_batch_export_limit, + :relation_export_batch_size, :bulk_import_enabled, :bulk_import_max_download_file_size, :silent_admin_exports_enabled, diff --git a/app/models/application_setting.rb b/app/models/application_setting.rb index f4bd488d53175065bf80aacd3ec30765adcfe58e..e9af4f924001b84fce29c981b5c4a7d6d753f3ad 100644 --- a/app/models/application_setting.rb +++ b/app/models/application_setting.rb @@ -598,6 +598,7 @@ def self.kroki_formats_attributes :max_yaml_size_bytes, :namespace_aggregation_schedule_lease_duration_in_seconds, :project_jobs_api_rate_limit, + :relation_export_batch_size, :session_expire_delay, :snippet_size_limit, :throttle_authenticated_api_period_in_seconds, @@ -725,7 +726,8 @@ def self.kroki_formats_attributes jsonb_accessor :importers, silent_admin_exports_enabled: [:boolean, { default: false }], allow_contribution_mapping_to_admins: [:boolean, { default: false }], - allow_bypass_placeholder_confirmation: [:boolean, { default: false }] + allow_bypass_placeholder_confirmation: [:boolean, { default: false }], + relation_export_batch_size: [:integer, { default: 50 }] jsonb_accessor :sign_in_restrictions, disable_password_authentication_for_users_with_sso_identities: [:boolean, { default: false }], diff --git a/app/models/application_setting_implementation.rb b/app/models/application_setting_implementation.rb index b955ff223730b36c6c632fa763ae5a9b7fcf0f05..10cf1150092f5f273f5c7d4b72938078781f60d8 100644 --- a/app/models/application_setting_implementation.rb +++ b/app/models/application_setting_implementation.rb @@ -180,6 +180,7 @@ def defaults # rubocop:disable Metrics/AbcSize raw_blob_request_limit: 300, recaptcha_enabled: false, receptive_cluster_agents_enabled: false, + relation_export_batch_size: 50, repository_checks_enabled: true, repository_storages_weighted: { 'default' => 100 }, require_admin_approval_after_user_signup: true, diff --git a/app/services/bulk_imports/batched_relation_export_service.rb b/app/services/bulk_imports/batched_relation_export_service.rb index 16cff6df2b9b7cb0260ddb58807b4ac1f9ed095d..924063ad8e1b1ee6a94f4b8204b4962e8e5ca668 100644 --- a/app/services/bulk_imports/batched_relation_export_service.rb +++ b/app/services/bulk_imports/batched_relation_export_service.rb @@ -4,14 +4,18 @@ module BulkImports class BatchedRelationExportService include Gitlab::Utils::StrongMemoize - BATCH_SIZE = 1000 BATCH_CACHE_KEY = 'bulk_imports/batched_relation_export/%{export_id}/%{batch_id}' + BATCH_SIZE_CACHE_KEY = 'bulk_imports/batched_relation_export/%{export_id}/batch_size' CACHE_DURATION = 4.hours def self.cache_key(export_id, batch_id) Kernel.format(BATCH_CACHE_KEY, export_id: export_id, batch_id: batch_id) end + def self.batch_size_cache_key(export_id) + Kernel.format(BATCH_SIZE_CACHE_KEY, export_id: export_id) + end + def initialize(user, portable, relation, jid) @user = user @portable = portable @@ -34,6 +38,25 @@ def execute attr_reader :user, :portable, :relation, :jid, :config, :resolved_relation + # Returns the batch size for processing relation exports. + # + # The batch size determines how many records are processed together in each batch + # during the export operation. We cache the batch size so that any retried workers + # for the same relation export use the same batch size. + # + # @return [Integer] The number of records to process per batch + def batch_size + key = self.class.batch_size_cache_key(export.id) + + Gitlab::Cache::Import::Caching.read_integer(key) || + Gitlab::Cache::Import::Caching.write( + key, + Gitlab::CurrentSettings.relation_export_batch_size, + timeout: CACHE_DURATION + ) + end + strong_memoize_attr :batch_size + def export # rubocop:disable Performance/ActiveRecordSubtransactionMethods -- This is only executed from within a worker @export ||= portable.bulk_import_exports.safe_find_or_create_by!(relation: relation, user: user) @@ -45,7 +68,7 @@ def objects_count end def batches_count - objects_count.fdiv(BATCH_SIZE).ceil + objects_count.fdiv(batch_size).ceil end def start_export! @@ -72,7 +95,7 @@ def update_export!(event) def enqueue_batch_exports batch_number = 0 - resolved_relation.in_batches(of: BATCH_SIZE) do |batch| + resolved_relation.in_batches(of: batch_size) do |batch| batch_number += 1 batch_id = find_or_create_batch(batch_number).id diff --git a/app/validators/json_schemas/application_setting_importers.json b/app/validators/json_schemas/application_setting_importers.json index d0d4f06adb132cd9a6491606b596e57ae8fffcae..0969f91e77a0f4836794ee2fe033805d723e7cc1 100644 --- a/app/validators/json_schemas/application_setting_importers.json +++ b/app/validators/json_schemas/application_setting_importers.json @@ -11,6 +11,9 @@ }, "allow_bypass_placeholder_confirmation": { "type": "boolean" + }, + "relation_export_batch_size": { + "type": "integer" } }, "additionalProperties": false diff --git a/app/workers/bulk_imports/finish_batched_relation_export_worker.rb b/app/workers/bulk_imports/finish_batched_relation_export_worker.rb index 54fcaeb791d02e0a1ad0254f1e716473a3c1bf94..628f216b3c1779b444febbfa940fd6c6a32e3438 100644 --- a/app/workers/bulk_imports/finish_batched_relation_export_worker.rb +++ b/app/workers/bulk_imports/finish_batched_relation_export_worker.rb @@ -52,6 +52,11 @@ def finish_export! end def expire_cache! + Gitlab::Cache::Import::Caching.expire( + BulkImports::BatchedRelationExportService.batch_size_cache_key(export.id), + 0 + ) + export.batches.each do |batch| key = BulkImports::BatchedRelationExportService.cache_key(export.id, batch.id) diff --git a/doc/administration/settings/import_and_export_settings.md b/doc/administration/settings/import_and_export_settings.md index 9da46f2e7ebc599273db502bbaaf33303080d6d3..5684d5484bc0265c5c46b802825a9278bc2e9d2e 100644 --- a/doc/administration/settings/import_and_export_settings.md +++ b/doc/administration/settings/import_and_export_settings.md @@ -319,6 +319,19 @@ To modify this setting, send an API request to `/api/v4/application/settings` with `concurrent_relation_batch_export_limit`. For more information, see [application settings API](../../api/settings.md). +### Export batch size + +{{< history >}} + +- [Introduced](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/194607) in GitLab 18.2. + +{{< /history >}} + +To further manage memory usage and database load, use the `relation_export_batch_size` setting to control the number of records processed in each batch during export operations. + +The default value is `50` records per batch. To modify this setting, send an API request to `/api/v4/application/settings` with `relation_export_batch_size`. +For more information, see [application settings API](../../api/settings.md). + ## Troubleshooting ## Error: `Help page documentation base url is blocked: execution expired` diff --git a/doc/api/settings.md b/doc/api/settings.md index 6e0dfe7d0867979de83b8da1ac71f0215dd91573..a870dc7fb72a9839f95edc833fc0d3d8550e1a39 100644 --- a/doc/api/settings.md +++ b/doc/api/settings.md @@ -162,6 +162,7 @@ Example response: "security_txt_content": null, "bulk_import_concurrent_pipeline_batch_limit": 25, "concurrent_relation_batch_export_limit": 25, + "relation_export_batch_size": 50, "concurrent_github_import_jobs_limit": 1000, "concurrent_bitbucket_import_jobs_limit": 100, "concurrent_bitbucket_server_import_jobs_limit": 100, @@ -362,6 +363,7 @@ Example response: "security_txt_content": null, "bulk_import_concurrent_pipeline_batch_limit": 25, "concurrent_relation_batch_export_limit": 25, + "relation_export_batch_size": 50, "downstream_pipeline_trigger_limit_per_project_user_sha": 0, "concurrent_github_import_jobs_limit": 1000, "concurrent_bitbucket_import_jobs_limit": 100, @@ -690,6 +692,7 @@ to configure other related settings. These requirements are | `recaptcha_site_key` | string | required by: `recaptcha_enabled` | Site key for reCAPTCHA. | | `receptive_cluster_agents_enabled` | boolean | no | Enable receptive mode for GitLab Agents for Kubernetes. | | `receive_max_input_size` | integer | no | Maximum push size (MB). | +| `relation_export_batch_size` | integer | no | The size of each batch when exporting batched relations. [Introduced](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/194607) in GitLab 18.2. | | `remember_me_enabled` | boolean | no | Enable [**Remember me** setting](../administration/settings/account_and_limit_settings.md#configure-the-remember-me-option). [Introduced](https://gitlab.com/gitlab-org/gitlab/-/issues/369133) in GitLab 16.0. | | `repository_checks_enabled` | boolean | no | GitLab periodically runs `git fsck` in all project and wiki repositories to look for silent disk corruption issues. | | `repository_size_limit` | integer | no | Size limit per repository (MB). Premium and Ultimate only. | diff --git a/spec/models/application_setting_spec.rb b/spec/models/application_setting_spec.rb index 69f95999b46b9df574fa89f698a2b3b5cf581820..c3c1369772f755994603f77cc56d1c0ac9b25efd 100644 --- a/spec/models/application_setting_spec.rb +++ b/spec/models/application_setting_spec.rb @@ -213,6 +213,7 @@ recaptcha_enabled: false, reindexing_minimum_index_size: 1.gigabyte, reindexing_minimum_relative_bloat_size: 0.2, + relation_export_batch_size: 50, remember_me_enabled: true, repository_checks_enabled: true, repository_storages_weighted: { 'default' => 100 }, @@ -597,6 +598,7 @@ def many_usernames(num = 100) max_yaml_size_bytes namespace_aggregation_schedule_lease_duration_in_seconds project_jobs_api_rate_limit + relation_export_batch_size session_expire_delay snippet_size_limit throttle_authenticated_api_period_in_seconds diff --git a/spec/services/bulk_imports/batched_relation_export_service_spec.rb b/spec/services/bulk_imports/batched_relation_export_service_spec.rb index cb356b90c613757f4813816f613d881928411c6f..9d2f12b581c263c228120558594c5735bd40dd45 100644 --- a/spec/services/bulk_imports/batched_relation_export_service_spec.rb +++ b/spec/services/bulk_imports/batched_relation_export_service_spec.rb @@ -45,13 +45,27 @@ end context 'when there are multiple batches' do + before do + stub_application_setting(relation_export_batch_size: 1) + create_list(:group_label, 10, group: portable) + end + it 'creates a batch record for each batch of records' do - stub_const("#{described_class.name}::BATCH_SIZE", 1) + service.execute - create_list(:group_label, 10, group: portable) + export = portable.bulk_import_exports.first + expect(export.batches.count).to eq(11) + end + + it 'caches the batch size for the export' do + # Execute once to set the cache service.execute + # Run a new instance of the export service for the same relation with + # a different batch size + stub_application_setting(relation_export_batch_size: 2) + described_class.new(user, portable, relation, jid).execute export = portable.bulk_import_exports.first expect(export.batches.count).to eq(11) @@ -88,4 +102,10 @@ expect(described_class.cache_key(1, 1)).to eq('bulk_imports/batched_relation_export/1/1') end end + + describe '.batch_size_cache_key' do + it 'returns the cache key for the export batch size' do + expect(described_class.batch_size_cache_key(1)).to eq('bulk_imports/batched_relation_export/1/batch_size') + end + end end diff --git a/spec/workers/bulk_imports/finish_batched_relation_export_worker_spec.rb b/spec/workers/bulk_imports/finish_batched_relation_export_worker_spec.rb index c6649d7db42bd08e44ddbf6e3af665b98a334cf2..88474e865c848e70cd3e5640da3ce5d6528d2bb9 100644 --- a/spec/workers/bulk_imports/finish_batched_relation_export_worker_spec.rb +++ b/spec/workers/bulk_imports/finish_batched_relation_export_worker_spec.rb @@ -10,10 +10,14 @@ describe '#perform' do it_behaves_like 'an idempotent worker' do - it 'marks export as finished and expires batches cache' do - cache_key = BulkImports::BatchedRelationExportService.cache_key(export.id, batch.id) + it 'marks export as finished and expires batches cache', :aggregate_failures do + allow(Gitlab::Cache::Import::Caching).to receive(:expire) - expect(Gitlab::Cache::Import::Caching).to receive(:expire).with(cache_key, 0) + batch_cache_key = BulkImports::BatchedRelationExportService.cache_key(export.id, batch.id) + expect(Gitlab::Cache::Import::Caching).to receive(:expire).with(batch_cache_key, 0) + + batch_size_cache_key = BulkImports::BatchedRelationExportService.batch_size_cache_key(export.id) + expect(Gitlab::Cache::Import::Caching).to receive(:expire).with(batch_size_cache_key, 0) perform_multiple(job_args)