diff --git a/doc/raketasks/backup_restore.md b/doc/raketasks/backup_restore.md index 4befb9f62d4d5435763ac559b27d9f162f4a11f4..7ff03989c61ac5596e4440adc3df60169b10e2f7 100644 --- a/doc/raketasks/backup_restore.md +++ b/doc/raketasks/backup_restore.md @@ -405,6 +405,27 @@ Incremental backups can also be created from [an untarred backup](#skipping-tar- sudo gitlab-backup create INCREMENTAL=yes SKIP=tar ``` +#### Back up specific repository storages + +> [Introduced](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/86896) in GitLab 15.0. + +When using [multiple repository storages](../administration/repository_storage_paths.md), +repositories from specific repository storages can be backed up separately +using the `REPOSITORIES_STORAGES` option. The option accepts a comma-separated list of +storage names. + +For example, for Omnibus GitLab installations: + +```shell +sudo gitlab-backup create REPOSITORIES_STORAGES=storage1,storage2 +``` + +For example, for installations from source: + +```shell +sudo -u git -H bundle exec rake gitlab:backup:create REPOSITORIES_STORAGES=storage1,storage2 +``` + #### Uploading backups to a remote (cloud) storage You can let the backup script upload (using the [Fog library](http://fog.io/)) @@ -1217,6 +1238,27 @@ For installations from source: sudo -u git -H bundle exec rake gitlab:backup:restore BACKUP=timestamp_of_backup SKIP=db,uploads RAILS_ENV=production ``` +#### Restore specific repository storages + +> [Introduced](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/86896) in GitLab 15.0. + +When using [multiple repository storages](../administration/repository_storage_paths.md), +repositories from specific repository storages can be restored separately +using the `REPOSITORIES_STORAGES` option. The option accepts a comma-separated list of +storage names. + +For example, for Omnibus GitLab installations: + +```shell +sudo gitlab-backup restore BACKUP=timestamp_of_backup REPOSITORIES_STORAGES=storage1,storage2 +``` + +For example, for installations from source: + +```shell +sudo -u git -H bundle exec rake gitlab:backup:restore BACKUP=timestamp_of_backup REPOSITORIES_STORAGES=storage1,storage2 +``` + ## Alternative backup strategies If your GitLab instance contains a lot of Git repository data, you may find the diff --git a/ee/lib/ee/backup/repositories.rb b/ee/lib/ee/backup/repositories.rb index 0b25accc1769d59ffbb53537923b5de04f571675..871b7a53e3031f496244055f4a8e1c54ae990052 100644 --- a/ee/lib/ee/backup/repositories.rb +++ b/ee/lib/ee/backup/repositories.rb @@ -8,7 +8,9 @@ module Repositories private def group_relation - ::Group.includes(:route, :owners, group_wiki_repository: :shard) # rubocop: disable CodeReuse/ActiveRecord + scope = ::Group.includes(:route, :owners, group_wiki_repository: :shard) # rubocop: disable CodeReuse/ActiveRecord + scope = scope.id_in(GroupWikiRepository.for_repository_storage(storages).select(:group_id)) if storages.any? + scope end def find_groups_in_batches(&block) diff --git a/ee/spec/lib/ee/backup/repositories_spec.rb b/ee/spec/lib/ee/backup/repositories_spec.rb index 87c4d8f950b884f83dd5377ae6ede84f38fcadc6..363c279a8fff24dce8a403477339a234f0ceb667 100644 --- a/ee/spec/lib/ee/backup/repositories_spec.rb +++ b/ee/spec/lib/ee/backup/repositories_spec.rb @@ -5,10 +5,11 @@ RSpec.describe Backup::Repositories do let(:progress) { spy(:stdout) } let(:strategy) { spy(:strategy) } + let(:storages) { [] } let(:destination) { 'repositories' } let(:backup_id) { 'backup_id' } - subject { described_class.new(progress, strategy: strategy) } + subject { described_class.new(progress, strategy: strategy, storages: storages) } describe '#dump' do let_it_be(:project) { create(:project, :repository) } @@ -50,11 +51,37 @@ subject.dump(destination, backup_id) end.not_to exceed_query_limit(control_count) end + + context 'storages' do + let(:storages) { %w{default} } + + before do + stub_storage_settings('test_second_storage' => { + 'gitaly_address' => Gitlab.config.repositories.storages.default.gitaly_address, + 'path' => TestEnv::SECOND_STORAGE_PATH + }) + end + + it 'calls enqueue for all repositories on the specified storage', :aggregate_failures do + excluded_group = create(:group, :wiki_repo) + excluded_group.group_wiki_repository.update!(shard_name: 'test_second_storage') + + subject.dump(destination, backup_id) + + expect(strategy).to have_received(:start).with(:create, destination, backup_id: backup_id) + expect(strategy).to have_received(:enqueue).with(project, Gitlab::GlRepository::PROJECT) + expect(strategy).not_to have_received(:enqueue).with(excluded_group, Gitlab::GlRepository::WIKI) + groups.each do |group| + expect(strategy).to have_received(:enqueue).with(group, Gitlab::GlRepository::WIKI) + end + expect(strategy).to have_received(:finish!) + end + end end describe '#restore' do - let_it_be(:project) { create(:project) } - let_it_be(:group) { create(:group) } + let_it_be(:project) { create(:project, :repository) } + let_it_be(:group) { create(:group, :wiki_repo) } it 'calls enqueue for each repository type', :aggregate_failures do subject.restore(destination) @@ -64,5 +91,29 @@ expect(strategy).to have_received(:enqueue).with(group, Gitlab::GlRepository::WIKI) expect(strategy).to have_received(:finish!) end + + context 'storages' do + let(:storages) { %w{default} } + + before do + stub_storage_settings('test_second_storage' => { + 'gitaly_address' => Gitlab.config.repositories.storages.default.gitaly_address, + 'path' => TestEnv::SECOND_STORAGE_PATH + }) + end + + it 'calls enqueue for all repositories on the specified storage', :aggregate_failures do + excluded_group = create(:group, :wiki_repo) + excluded_group.group_wiki_repository.update!(shard_name: 'test_second_storage') + + subject.restore(destination) + + expect(strategy).to have_received(:start).with(:restore, destination) + expect(strategy).not_to have_received(:enqueue).with(excluded_group, Gitlab::GlRepository::WIKI) + expect(strategy).to have_received(:enqueue).with(project, Gitlab::GlRepository::PROJECT) + expect(strategy).to have_received(:enqueue).with(group, Gitlab::GlRepository::WIKI) + expect(strategy).to have_received(:finish!) + end + end end end diff --git a/lib/backup/manager.rb b/lib/backup/manager.rb index baff68d503fae51ed37614c76dab0c7100c27356..0991177d044a779cae6be3029721d53bb49aa392 100644 --- a/lib/backup/manager.rb +++ b/lib/backup/manager.rb @@ -9,6 +9,11 @@ class Manager # if some of these files are still there, we don't need them in the backup LEGACY_PAGES_TMP_PATH = '@pages.tmp' + LIST_ENVS = { + skipped: 'SKIP', + repositories_storages: 'REPOSITORIES_STORAGES' + }.freeze + TaskDefinition = Struct.new( :enabled, # `true` if the task can be used. Treated as `true` when not specified. :human_name, # Name of the task used for logging. @@ -32,7 +37,7 @@ def initialize(progress, definitions: nil) Feature.enabled?(:incremental_repository_backup) && Gitlab::Utils.to_boolean(ENV['INCREMENTAL'], default: false) - @definitions = definitions || build_definitions + @definitions = definitions end def create @@ -43,7 +48,9 @@ def create update_backup_information end - @definitions.keys.each do |task_name| + build_backup_information + + definitions.keys.each do |task_name| run_create_task(task_name) end @@ -65,10 +72,10 @@ def create end def run_create_task(task_name) - definition = @definitions[task_name] - build_backup_information + definition = definitions[task_name] + unless definition.enabled? puts_time "Dumping #{definition.human_name} ... ".color(:blue) + "[DISABLED]".color(:cyan) return @@ -92,7 +99,7 @@ def restore read_backup_information verify_backup_version - @definitions.keys.each do |task_name| + definitions.keys.each do |task_name| run_restore_task(task_name) if !skipped?(task_name) && enabled_task?(task_name) end @@ -111,7 +118,9 @@ def restore end def run_restore_task(task_name) - definition = @definitions[task_name] + read_backup_information + + definition = definitions[task_name] unless definition.enabled? puts_time "Restoring #{definition.human_name} ... ".color(:blue) + "[DISABLED]".color(:cyan) @@ -143,6 +152,10 @@ def run_restore_task(task_name) private + def definitions + @definitions ||= build_definitions + end + def build_definitions { 'db' => TaskDefinition.new( @@ -212,7 +225,7 @@ def build_repositories_task max_storage_concurrency = ENV['GITLAB_BACKUP_MAX_STORAGE_CONCURRENCY'].presence strategy = Backup::GitalyBackup.new(progress, incremental: incremental?, max_parallelism: max_concurrency, storage_parallelism: max_storage_concurrency) - Repositories.new(progress, strategy: strategy) + Repositories.new(progress, strategy: strategy, storages: repositories_storages) end def build_files_task(app_files_dir, excludes: []) @@ -245,7 +258,8 @@ def build_backup_information gitlab_version: Gitlab::VERSION, tar_version: tar_version, installation_type: Gitlab::INSTALLATION_TYPE, - skipped: ENV["SKIP"] + skipped: ENV['SKIP'], + repositories_storages: ENV['REPOSITORIES_STORAGES'] } end @@ -256,7 +270,9 @@ def update_backup_information backup_created_at: Time.zone.now, gitlab_version: Gitlab::VERSION, tar_version: tar_version, - installation_type: Gitlab::INSTALLATION_TYPE + installation_type: Gitlab::INSTALLATION_TYPE, + skipped: list_env(:skipped).join(','), + repositories_storages: list_env(:repositories_storages).join(',') ) end @@ -309,7 +325,7 @@ def cleanup puts_time "Deleting tar staging files ... ".color(:blue) remove_backup_path(MANIFEST_NAME) - @definitions.each do |_, definition| + definitions.each do |_, definition| remove_backup_path(definition.cleanup_path || definition.destination_path) end @@ -443,12 +459,26 @@ def tar_version end def skipped?(item) - ENV.fetch('SKIP', '').include?(item) || - backup_information[:skipped] && backup_information[:skipped].include?(item) + skipped.include?(item) + end + + def skipped + @skipped ||= list_env(:skipped) + end + + def repositories_storages + @repositories_storages ||= list_env(:repositories_storages) + end + + def list_env(name) + list = ENV.fetch(LIST_ENVS[name], '').split(',') + list += backup_information[name].split(',') if backup_information[name] + list.uniq! + list end def enabled_task?(task_name) - @definitions[task_name].enabled? + definitions[task_name].enabled? end def backup_file?(file) @@ -503,7 +533,7 @@ def remote_target end def backup_contents - [MANIFEST_NAME] + @definitions.reject do |name, definition| + [MANIFEST_NAME] + definitions.reject do |name, definition| skipped?(name) || !enabled_task?(name) || (definition.destination_optional && !File.exist?(File.join(backup_path, definition.destination_path))) end.values.map(&:destination_path) diff --git a/lib/backup/repositories.rb b/lib/backup/repositories.rb index 11bed84e356f8da528b81d597a77e7e4c56508f6..4a31e87b9698f2e1bcfc9cd00ff22d7525681cc5 100644 --- a/lib/backup/repositories.rb +++ b/lib/backup/repositories.rb @@ -6,10 +6,11 @@ module Backup class Repositories < Task extend ::Gitlab::Utils::Override - def initialize(progress, strategy:) + def initialize(progress, strategy:, storages: []) super(progress) @strategy = strategy + @storages = storages end override :dump @@ -35,7 +36,7 @@ def restore(path) private - attr_reader :strategy + attr_reader :strategy, :storages def enqueue_consecutive enqueue_consecutive_projects @@ -49,7 +50,7 @@ def enqueue_consecutive_projects end def enqueue_consecutive_snippets - Snippet.find_each(batch_size: 1000) { |snippet| enqueue_snippet(snippet) } + snippet_relation.find_each(batch_size: 1000) { |snippet| enqueue_snippet(snippet) } end def enqueue_project(project) @@ -63,7 +64,15 @@ def enqueue_snippet(snippet) end def project_relation - Project.includes(:route, :group, namespace: :owner) + scope = Project.includes(:route, :group, namespace: :owner) + scope = scope.id_in(ProjectRepository.for_repository_storage(storages).select(:project_id)) if storages.any? + scope + end + + def snippet_relation + scope = Snippet.all + scope = scope.id_in(SnippetRepository.for_repository_storage(storages).select(:snippet_id)) if storages.any? + scope end def restore_object_pools @@ -88,7 +97,7 @@ def restore_object_pools def cleanup_snippets_without_repositories invalid_snippets = [] - Snippet.find_each(batch_size: 1000).each do |snippet| + snippet_relation.find_each(batch_size: 1000).each do |snippet| response = Snippets::RepositoryValidationService.new(nil, snippet).execute next if response.success? diff --git a/spec/lib/backup/manager_spec.rb b/spec/lib/backup/manager_spec.rb index 81573b6140d25a80d5433229ef78a2ac230d7297..a2477834dde3eba8ee5c8a11ed88aa738f5089af 100644 --- a/spec/lib/backup/manager_spec.rb +++ b/spec/lib/backup/manager_spec.rb @@ -853,6 +853,7 @@ ] ) allow(File).to receive(:exist?).with(File.join(Gitlab.config.backup.path, 'backup_information.yml')).and_return(true) + stub_env('SKIP', 'something') end after do @@ -872,7 +873,7 @@ backup_created_at: backup_time, full_backup_id: full_backup_id, gitlab_version: Gitlab::VERSION, - skipped: 'tar' + skipped: 'something,tar' ) end diff --git a/spec/lib/backup/repositories_spec.rb b/spec/lib/backup/repositories_spec.rb index c6f611e727cdd855f1e5ef737a6e82347670bcbc..1581e4793e3ea290aebd50b571287e91bbbfc853 100644 --- a/spec/lib/backup/repositories_spec.rb +++ b/spec/lib/backup/repositories_spec.rb @@ -5,13 +5,15 @@ RSpec.describe Backup::Repositories do let(:progress) { spy(:stdout) } let(:strategy) { spy(:strategy) } + let(:storages) { [] } let(:destination) { 'repositories' } let(:backup_id) { 'backup_id' } subject do described_class.new( progress, - strategy: strategy + strategy: strategy, + storages: storages ) end @@ -67,17 +69,50 @@ end.count create_list(:project, 2, :repository) + create_list(:snippet, 2, :repository) expect do subject.dump(destination, backup_id) end.not_to exceed_query_limit(control_count) end + + describe 'storages' do + let(:storages) { %w{default} } + + let_it_be(:project) { create(:project, :repository) } + + before do + stub_storage_settings('test_second_storage' => { + 'gitaly_address' => Gitlab.config.repositories.storages.default.gitaly_address, + 'path' => TestEnv::SECOND_STORAGE_PATH + }) + end + + it 'calls enqueue for all repositories on the specified storage', :aggregate_failures do + excluded_project = create(:project, :repository, repository_storage: 'test_second_storage') + excluded_project_snippet = create(:project_snippet, :repository, project: excluded_project) + excluded_project_snippet.track_snippet_repository('test_second_storage') + excluded_personal_snippet = create(:personal_snippet, :repository, author: excluded_project.first_owner) + excluded_personal_snippet.track_snippet_repository('test_second_storage') + + subject.dump(destination, backup_id) + + expect(strategy).to have_received(:start).with(:create, destination, backup_id: backup_id) + expect(strategy).not_to have_received(:enqueue).with(excluded_project, Gitlab::GlRepository::PROJECT) + expect(strategy).not_to have_received(:enqueue).with(excluded_project_snippet, Gitlab::GlRepository::SNIPPET) + expect(strategy).not_to have_received(:enqueue).with(excluded_personal_snippet, Gitlab::GlRepository::SNIPPET) + expect(strategy).to have_received(:enqueue).with(project, Gitlab::GlRepository::PROJECT) + expect(strategy).to have_received(:enqueue).with(project, Gitlab::GlRepository::WIKI) + expect(strategy).to have_received(:enqueue).with(project, Gitlab::GlRepository::DESIGN) + expect(strategy).to have_received(:finish!) + end + end end describe '#restore' do - let_it_be(:project) { create(:project) } - let_it_be(:personal_snippet) { create(:personal_snippet, author: project.first_owner) } - let_it_be(:project_snippet) { create(:project_snippet, project: project, author: project.first_owner) } + let_it_be(:project) { create(:project, :repository) } + let_it_be(:personal_snippet) { create(:personal_snippet, :repository, author: project.first_owner) } + let_it_be(:project_snippet) { create(:project_snippet, :repository, project: project, author: project.first_owner) } it 'calls enqueue for each repository type', :aggregate_failures do subject.restore(destination) @@ -116,9 +151,6 @@ context 'cleanup snippets' do before do - create(:snippet_repository, snippet: personal_snippet) - create(:snippet_repository, snippet: project_snippet) - error_response = ServiceResponse.error(message: "Repository has more than one branch") allow(Snippets::RepositoryValidationService).to receive_message_chain(:new, :execute).and_return(error_response) end @@ -146,5 +178,35 @@ expect(gitlab_shell.repository_exists?(shard_name, path)).to eq false end end + + context 'storages' do + let(:storages) { %w{default} } + + before do + stub_storage_settings('test_second_storage' => { + 'gitaly_address' => Gitlab.config.repositories.storages.default.gitaly_address, + 'path' => TestEnv::SECOND_STORAGE_PATH + }) + end + + it 'calls enqueue for all repositories on the specified storage', :aggregate_failures do + excluded_project = create(:project, :repository, repository_storage: 'test_second_storage') + excluded_project_snippet = create(:project_snippet, :repository, project: excluded_project) + excluded_project_snippet.track_snippet_repository('test_second_storage') + excluded_personal_snippet = create(:personal_snippet, :repository, author: excluded_project.first_owner) + excluded_personal_snippet.track_snippet_repository('test_second_storage') + + subject.restore(destination) + + expect(strategy).to have_received(:start).with(:restore, destination) + expect(strategy).not_to have_received(:enqueue).with(excluded_project, Gitlab::GlRepository::PROJECT) + expect(strategy).not_to have_received(:enqueue).with(excluded_project_snippet, Gitlab::GlRepository::SNIPPET) + expect(strategy).not_to have_received(:enqueue).with(excluded_personal_snippet, Gitlab::GlRepository::SNIPPET) + expect(strategy).to have_received(:enqueue).with(project, Gitlab::GlRepository::PROJECT) + expect(strategy).to have_received(:enqueue).with(project, Gitlab::GlRepository::WIKI) + expect(strategy).to have_received(:enqueue).with(project, Gitlab::GlRepository::DESIGN) + expect(strategy).to have_received(:finish!) + end + end end end diff --git a/spec/tasks/gitlab/backup_rake_spec.rb b/spec/tasks/gitlab/backup_rake_spec.rb index 6080948403df3956695aeec3a67082b1f0f749d5..52a0a9a73854737a4f7919a10c5b4c35612067b0 100644 --- a/spec/tasks/gitlab/backup_rake_spec.rb +++ b/spec/tasks/gitlab/backup_rake_spec.rb @@ -377,21 +377,6 @@ def reenable_backup_sub_tasks expect(tar_lines).to include(a_string_matching(repo_name)) end end - - def move_repository_to_secondary(record) - Gitlab::GitalyClient::StorageSettings.allow_disk_access do - default_shard_legacy_path = Gitlab.config.repositories.storages.default.legacy_disk_path - secondary_legacy_path = Gitlab.config.repositories.storages[second_storage_name].legacy_disk_path - dst_dir = File.join(secondary_legacy_path, File.dirname(record.disk_path)) - - FileUtils.mkdir_p(dst_dir) unless Dir.exist?(dst_dir) - - FileUtils.mv( - File.join(default_shard_legacy_path, record.disk_path + '.git'), - File.join(secondary_legacy_path, record.disk_path + '.git') - ) - end - end end context 'no concurrency' do @@ -405,6 +390,66 @@ def move_repository_to_secondary(record) it_behaves_like 'includes repositories in all repository storages' end + + context 'REPOSITORIES_STORAGES set' do + before do + stub_env('REPOSITORIES_STORAGES', default_storage_name) + end + + it 'includes repositories in default repository storage', :aggregate_failures do + project_a = create(:project, :repository) + project_snippet_a = create(:project_snippet, :repository, project: project_a, author: project_a.first_owner) + project_b = create(:project, :repository, repository_storage: second_storage_name) + project_snippet_b = create(:project_snippet, :repository, project: project_b, author: project_b.first_owner) + project_snippet_b.snippet_repository.update!(shard: project_b.project_repository.shard) + create(:wiki_page, container: project_a) + create(:design, :with_file, issue: create(:issue, project: project_a)) + + move_repository_to_secondary(project_b) + move_repository_to_secondary(project_snippet_b) + + expect { run_rake_task('gitlab:backup:create') }.to output.to_stdout_from_any_process + + tar_contents, exit_status = Gitlab::Popen.popen( + %W{tar -tvf #{backup_tar} repositories} + ) + + tar_lines = tar_contents.lines.grep(/\.bundle/) + + expect(exit_status).to eq(0) + + [ + "#{project_a.disk_path}/.+/001.bundle", + "#{project_a.disk_path}.wiki/.+/001.bundle", + "#{project_a.disk_path}.design/.+/001.bundle", + "#{project_snippet_a.disk_path}/.+/001.bundle" + ].each do |repo_name| + expect(tar_lines).to include(a_string_matching(repo_name)) + end + + [ + "#{project_b.disk_path}/.+/001.bundle", + "#{project_snippet_b.disk_path}/.+/001.bundle" + ].each do |repo_name| + expect(tar_lines).not_to include(a_string_matching(repo_name)) + end + end + end + + def move_repository_to_secondary(record) + Gitlab::GitalyClient::StorageSettings.allow_disk_access do + default_shard_legacy_path = Gitlab.config.repositories.storages.default.legacy_disk_path + secondary_legacy_path = Gitlab.config.repositories.storages[second_storage_name].legacy_disk_path + dst_dir = File.join(secondary_legacy_path, File.dirname(record.disk_path)) + + FileUtils.mkdir_p(dst_dir) unless Dir.exist?(dst_dir) + + FileUtils.mv( + File.join(default_shard_legacy_path, record.disk_path + '.git'), + File.join(secondary_legacy_path, record.disk_path + '.git') + ) + end + end end context 'concurrency settings' do @@ -420,7 +465,7 @@ def move_repository_to_secondary(record) stub_env('GITLAB_BACKUP_MAX_STORAGE_CONCURRENCY', 2) expect(::Backup::Repositories).to receive(:new) - .with(anything, strategy: anything) + .with(anything, strategy: anything, storages: []) .and_call_original expect(::Backup::GitalyBackup).to receive(:new).with(anything, max_parallelism: 5, storage_parallelism: 2, incremental: false).and_call_original