diff --git a/doc/raketasks/backup_restore.md b/doc/raketasks/backup_restore.md index ffad274c85487ab8a148cd0570b8caab2148938d..561ad808c0653414920f23003acf3035c0ebcd5c 100644 --- a/doc/raketasks/backup_restore.md +++ b/doc/raketasks/backup_restore.md @@ -426,6 +426,25 @@ For example, for installations from source: sudo -u git -H bundle exec rake gitlab:backup:create REPOSITORIES_STORAGES=storage1,storage2 ``` +#### Back up specific project repositories + +> [Introduced](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/88094) in GitLab 15.1. + +You can back up a specific project or list of projects using the `REPOSITORIES_PATHS` option. The option accepts a comma-separated list of +project paths. For example: + +- Omnibus GitLab installations: + + ```shell + sudo gitlab-backup create REPOSITORIES_PATHS=gitlab-org/gitlab,gitlab-org/gitaly + ``` + +- Installations from source: + + ```shell + sudo -u git -H bundle exec rake gitlab:backup:create REPOSITORIES_PATHS=gitlab-org/gitlab,gitlab-org/gitaly + ``` + #### Uploading backups to a remote (cloud) storage You can let the backup script upload (using the [Fog library](http://fog.io/)) @@ -1259,6 +1278,25 @@ For example, for installations from source: sudo -u git -H bundle exec rake gitlab:backup:restore BACKUP=timestamp_of_backup REPOSITORIES_STORAGES=storage1,storage2 ``` +#### Restore specific project repositories + +> [Introduced](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/88094) in GitLab 15.1. + +You can restore a specific project or list of projects using the `REPOSITORIES_PATHS` option. These projects must exist within the +specified backup. The option accepts a comma-separated list of project paths. For example: + +- Omnibus GitLab installations: + + ```shell + sudo gitlab-backup restore BACKUP=timestamp_of_backup REPOSITORIES_PATHS=gitlab-org/gitlab,gitlab-org/gitaly + ``` + +- Installations from source: + + ```shell + sudo -u git -H bundle exec rake gitlab:backup:restore BACKUP=timestamp_of_backup REPOSITORIES_PATHS=gitlab-org/gitlab,gitlab-org/gitaly + ``` + ## Alternative backup strategies If your GitLab instance contains a lot of Git repository data, you may find the diff --git a/ee/lib/ee/backup/repositories.rb b/ee/lib/ee/backup/repositories.rb index 871b7a53e3031f496244055f4a8e1c54ae990052..6dcd88b40c2de0228e75f9a6ff979a35c7bdfe75 100644 --- a/ee/lib/ee/backup/repositories.rb +++ b/ee/lib/ee/backup/repositories.rb @@ -10,6 +10,7 @@ module Repositories def group_relation scope = ::Group.includes(:route, :owners, group_wiki_repository: :shard) # rubocop: disable CodeReuse/ActiveRecord scope = scope.id_in(GroupWikiRepository.for_repository_storage(storages).select(:group_id)) if storages.any? + scope = scope.none if paths.any? scope end diff --git a/lib/backup/manager.rb b/lib/backup/manager.rb index f249becb8f9f43cb9ef2cb0af6b108cb4e69f882..16b8f21c9e949a0b5a21245dfbe494889838944b 100644 --- a/lib/backup/manager.rb +++ b/lib/backup/manager.rb @@ -11,7 +11,8 @@ class Manager LIST_ENVS = { skipped: 'SKIP', - repositories_storages: 'REPOSITORIES_STORAGES' + repositories_storages: 'REPOSITORIES_STORAGES', + repositories_paths: 'REPOSITORIES_PATHS' }.freeze TaskDefinition = Struct.new( @@ -190,7 +191,11 @@ def build_repositories_task max_storage_concurrency = ENV['GITLAB_BACKUP_MAX_STORAGE_CONCURRENCY'].presence strategy = Backup::GitalyBackup.new(progress, incremental: incremental?, max_parallelism: max_concurrency, storage_parallelism: max_storage_concurrency) - Repositories.new(progress, strategy: strategy, storages: repositories_storages) + Repositories.new(progress, + strategy: strategy, + storages: list_env(:repositories_storages), + paths: list_env(:repositories_paths) + ) end def build_files_task(app_files_dir, excludes: []) @@ -266,7 +271,8 @@ def build_backup_information tar_version: tar_version, installation_type: Gitlab::INSTALLATION_TYPE, skipped: ENV['SKIP'], - repositories_storages: ENV['REPOSITORIES_STORAGES'] + repositories_storages: ENV['REPOSITORIES_STORAGES'], + repositories_paths: ENV['REPOSITORIES_PATHS'] } end @@ -279,7 +285,8 @@ def update_backup_information tar_version: tar_version, installation_type: Gitlab::INSTALLATION_TYPE, skipped: list_env(:skipped).join(','), - repositories_storages: list_env(:repositories_storages).join(',') + repositories_storages: list_env(:repositories_storages).join(','), + repositories_paths: list_env(:repositories_paths).join(',') ) end @@ -472,10 +479,6 @@ def skipped @skipped ||= list_env(:skipped) end - def repositories_storages - @repositories_storages ||= list_env(:repositories_storages) - end - def list_env(name) list = ENV.fetch(LIST_ENVS[name], '').split(',') list += backup_information[name].split(',') if backup_information[name] diff --git a/lib/backup/repositories.rb b/lib/backup/repositories.rb index 4a31e87b9698f2e1bcfc9cd00ff22d7525681cc5..29b20bd2e73bab9e0346e6be9cc78543f5b5468d 100644 --- a/lib/backup/repositories.rb +++ b/lib/backup/repositories.rb @@ -3,19 +3,25 @@ require 'yaml' module Backup + # Backup and restores repositories by querying the database class Repositories < Task extend ::Gitlab::Utils::Override - def initialize(progress, strategy:, storages: []) + # @param [IO] progress IO interface to output progress + # @param [Object] :strategy Fetches backups from gitaly + # @param [Array] :storages Filter by specified storage names. Empty means all storages. + # @param [Array] :paths Filter by specified project paths. Empty means all projects, groups and snippets. + def initialize(progress, strategy:, storages: [], paths: []) super(progress) @strategy = strategy @storages = storages + @paths = paths end override :dump - def dump(path, backup_id) - strategy.start(:create, path, backup_id: backup_id) + def dump(destination_path, backup_id) + strategy.start(:create, destination_path, backup_id: backup_id) enqueue_consecutive ensure @@ -23,8 +29,8 @@ def dump(path, backup_id) end override :restore - def restore(path) - strategy.start(:restore, path) + def restore(destination_path) + strategy.start(:restore, destination_path) enqueue_consecutive ensure @@ -36,7 +42,7 @@ def restore(path) private - attr_reader :strategy, :storages + attr_reader :strategy, :storages, :paths def enqueue_consecutive enqueue_consecutive_projects @@ -66,12 +72,14 @@ def enqueue_snippet(snippet) def project_relation scope = Project.includes(:route, :group, namespace: :owner) scope = scope.id_in(ProjectRepository.for_repository_storage(storages).select(:project_id)) if storages.any? + scope = scope.where_full_path_in(paths) if paths.any? scope end def snippet_relation scope = Snippet.all scope = scope.id_in(SnippetRepository.for_repository_storage(storages).select(:snippet_id)) if storages.any? + scope = scope.joins(:project).merge(Project.where_full_path_in(paths)) if paths.any? scope end diff --git a/spec/lib/backup/repositories_spec.rb b/spec/lib/backup/repositories_spec.rb index 1581e4793e3ea290aebd50b571287e91bbbfc853..211b0d91f9fbb4e4ebf6d36208fa17484cd6d357 100644 --- a/spec/lib/backup/repositories_spec.rb +++ b/spec/lib/backup/repositories_spec.rb @@ -6,6 +6,7 @@ let(:progress) { spy(:stdout) } let(:strategy) { spy(:strategy) } let(:storages) { [] } + let(:paths) { [] } let(:destination) { 'repositories' } let(:backup_id) { 'backup_id' } @@ -13,7 +14,8 @@ described_class.new( progress, strategy: strategy, - storages: storages + storages: storages, + paths: paths ) end @@ -107,6 +109,29 @@ expect(strategy).to have_received(:finish!) end end + + describe 'paths' do + let_it_be(:project) { create(:project, :repository) } + + let(:paths) { [project.full_path] } + + it 'calls enqueue for all repositories on the specified project', :aggregate_failures do + excluded_project = create(:project, :repository) + excluded_project_snippet = create(:project_snippet, :repository, project: excluded_project) + excluded_personal_snippet = create(:personal_snippet, :repository, author: excluded_project.first_owner) + + subject.dump(destination, backup_id) + + expect(strategy).to have_received(:start).with(:create, destination, backup_id: backup_id) + expect(strategy).not_to have_received(:enqueue).with(excluded_project, Gitlab::GlRepository::PROJECT) + expect(strategy).not_to have_received(:enqueue).with(excluded_project_snippet, Gitlab::GlRepository::SNIPPET) + expect(strategy).not_to have_received(:enqueue).with(excluded_personal_snippet, Gitlab::GlRepository::SNIPPET) + expect(strategy).to have_received(:enqueue).with(project, Gitlab::GlRepository::PROJECT) + expect(strategy).to have_received(:enqueue).with(project, Gitlab::GlRepository::WIKI) + expect(strategy).to have_received(:enqueue).with(project, Gitlab::GlRepository::DESIGN) + expect(strategy).to have_received(:finish!) + end + end end describe '#restore' do @@ -208,5 +233,26 @@ expect(strategy).to have_received(:finish!) end end + + context 'paths' do + let(:paths) { [project.full_path] } + + it 'calls enqueue for all repositories on the specified project', :aggregate_failures do + excluded_project = create(:project, :repository) + excluded_project_snippet = create(:project_snippet, :repository, project: excluded_project) + excluded_personal_snippet = create(:personal_snippet, :repository, author: excluded_project.first_owner) + + subject.restore(destination) + + expect(strategy).to have_received(:start).with(:restore, destination) + expect(strategy).not_to have_received(:enqueue).with(excluded_project, Gitlab::GlRepository::PROJECT) + expect(strategy).not_to have_received(:enqueue).with(excluded_project_snippet, Gitlab::GlRepository::SNIPPET) + expect(strategy).not_to have_received(:enqueue).with(excluded_personal_snippet, Gitlab::GlRepository::SNIPPET) + expect(strategy).to have_received(:enqueue).with(project, Gitlab::GlRepository::PROJECT) + expect(strategy).to have_received(:enqueue).with(project, Gitlab::GlRepository::WIKI) + expect(strategy).to have_received(:enqueue).with(project, Gitlab::GlRepository::DESIGN) + expect(strategy).to have_received(:finish!) + end + end end end diff --git a/spec/tasks/gitlab/backup_rake_spec.rb b/spec/tasks/gitlab/backup_rake_spec.rb index 52a0a9a73854737a4f7919a10c5b4c35612067b0..4a3b81a072fbbf23613866f0d62df837c85ce048 100644 --- a/spec/tasks/gitlab/backup_rake_spec.rb +++ b/spec/tasks/gitlab/backup_rake_spec.rb @@ -465,7 +465,7 @@ def move_repository_to_secondary(record) stub_env('GITLAB_BACKUP_MAX_STORAGE_CONCURRENCY', 2) expect(::Backup::Repositories).to receive(:new) - .with(anything, strategy: anything, storages: []) + .with(anything, strategy: anything, storages: [], paths: []) .and_call_original expect(::Backup::GitalyBackup).to receive(:new).with(anything, max_parallelism: 5, storage_parallelism: 2, incremental: false).and_call_original