From 01c7df1b9abeb0e81f32fa15efd39eec21c89f5c Mon Sep 17 00:00:00 2001 From: James Fargher Date: Wed, 25 May 2022 12:17:35 +1200 Subject: [PATCH 1/2] Remove Backup::Manager.repositories_storages This method was cachizing the result, but the result was only used in one place that was already cached. --- lib/backup/manager.rb | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/lib/backup/manager.rb b/lib/backup/manager.rb index f249becb8f9f43..eed1c6f7286149 100644 --- a/lib/backup/manager.rb +++ b/lib/backup/manager.rb @@ -190,7 +190,10 @@ def build_repositories_task max_storage_concurrency = ENV['GITLAB_BACKUP_MAX_STORAGE_CONCURRENCY'].presence strategy = Backup::GitalyBackup.new(progress, incremental: incremental?, max_parallelism: max_concurrency, storage_parallelism: max_storage_concurrency) - Repositories.new(progress, strategy: strategy, storages: repositories_storages) + Repositories.new(progress, + strategy: strategy, + storages: list_env(:repositories_storages), + ) end def build_files_task(app_files_dir, excludes: []) @@ -472,10 +475,6 @@ def skipped @skipped ||= list_env(:skipped) end - def repositories_storages - @repositories_storages ||= list_env(:repositories_storages) - end - def list_env(name) list = ENV.fetch(LIST_ENVS[name], '').split(',') list += backup_information[name].split(',') if backup_information[name] -- GitLab From 28ad9885964359b4ecca98dd6139b333733c7de5 Mon Sep 17 00:00:00 2001 From: James Fargher Date: Fri, 20 May 2022 12:20:23 +1200 Subject: [PATCH 2/2] Add backup option to filter for specific projects The repositories for specific projects can be backed up or restored using the env var REPOSITORIES_PATHS which takes a comma separated list of project paths. In the future this feature may be expanded to backup all the repositories for groups. Changelog: added --- doc/raketasks/backup_restore.md | 38 +++++++++++++++++++++ ee/lib/ee/backup/repositories.rb | 1 + lib/backup/manager.rb | 10 ++++-- lib/backup/repositories.rb | 20 +++++++---- spec/lib/backup/repositories_spec.rb | 48 ++++++++++++++++++++++++++- spec/tasks/gitlab/backup_rake_spec.rb | 2 +- 6 files changed, 108 insertions(+), 11 deletions(-) diff --git a/doc/raketasks/backup_restore.md b/doc/raketasks/backup_restore.md index ffad274c85487a..561ad808c06534 100644 --- a/doc/raketasks/backup_restore.md +++ b/doc/raketasks/backup_restore.md @@ -426,6 +426,25 @@ For example, for installations from source: sudo -u git -H bundle exec rake gitlab:backup:create REPOSITORIES_STORAGES=storage1,storage2 ``` +#### Back up specific project repositories + +> [Introduced](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/88094) in GitLab 15.1. + +You can back up a specific project or list of projects using the `REPOSITORIES_PATHS` option. The option accepts a comma-separated list of +project paths. For example: + +- Omnibus GitLab installations: + + ```shell + sudo gitlab-backup create REPOSITORIES_PATHS=gitlab-org/gitlab,gitlab-org/gitaly + ``` + +- Installations from source: + + ```shell + sudo -u git -H bundle exec rake gitlab:backup:create REPOSITORIES_PATHS=gitlab-org/gitlab,gitlab-org/gitaly + ``` + #### Uploading backups to a remote (cloud) storage You can let the backup script upload (using the [Fog library](http://fog.io/)) @@ -1259,6 +1278,25 @@ For example, for installations from source: sudo -u git -H bundle exec rake gitlab:backup:restore BACKUP=timestamp_of_backup REPOSITORIES_STORAGES=storage1,storage2 ``` +#### Restore specific project repositories + +> [Introduced](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/88094) in GitLab 15.1. + +You can restore a specific project or list of projects using the `REPOSITORIES_PATHS` option. These projects must exist within the +specified backup. The option accepts a comma-separated list of project paths. For example: + +- Omnibus GitLab installations: + + ```shell + sudo gitlab-backup restore BACKUP=timestamp_of_backup REPOSITORIES_PATHS=gitlab-org/gitlab,gitlab-org/gitaly + ``` + +- Installations from source: + + ```shell + sudo -u git -H bundle exec rake gitlab:backup:restore BACKUP=timestamp_of_backup REPOSITORIES_PATHS=gitlab-org/gitlab,gitlab-org/gitaly + ``` + ## Alternative backup strategies If your GitLab instance contains a lot of Git repository data, you may find the diff --git a/ee/lib/ee/backup/repositories.rb b/ee/lib/ee/backup/repositories.rb index 871b7a53e3031f..6dcd88b40c2de0 100644 --- a/ee/lib/ee/backup/repositories.rb +++ b/ee/lib/ee/backup/repositories.rb @@ -10,6 +10,7 @@ module Repositories def group_relation scope = ::Group.includes(:route, :owners, group_wiki_repository: :shard) # rubocop: disable CodeReuse/ActiveRecord scope = scope.id_in(GroupWikiRepository.for_repository_storage(storages).select(:group_id)) if storages.any? + scope = scope.none if paths.any? scope end diff --git a/lib/backup/manager.rb b/lib/backup/manager.rb index eed1c6f7286149..16b8f21c9e949a 100644 --- a/lib/backup/manager.rb +++ b/lib/backup/manager.rb @@ -11,7 +11,8 @@ class Manager LIST_ENVS = { skipped: 'SKIP', - repositories_storages: 'REPOSITORIES_STORAGES' + repositories_storages: 'REPOSITORIES_STORAGES', + repositories_paths: 'REPOSITORIES_PATHS' }.freeze TaskDefinition = Struct.new( @@ -193,6 +194,7 @@ def build_repositories_task Repositories.new(progress, strategy: strategy, storages: list_env(:repositories_storages), + paths: list_env(:repositories_paths) ) end @@ -269,7 +271,8 @@ def build_backup_information tar_version: tar_version, installation_type: Gitlab::INSTALLATION_TYPE, skipped: ENV['SKIP'], - repositories_storages: ENV['REPOSITORIES_STORAGES'] + repositories_storages: ENV['REPOSITORIES_STORAGES'], + repositories_paths: ENV['REPOSITORIES_PATHS'] } end @@ -282,7 +285,8 @@ def update_backup_information tar_version: tar_version, installation_type: Gitlab::INSTALLATION_TYPE, skipped: list_env(:skipped).join(','), - repositories_storages: list_env(:repositories_storages).join(',') + repositories_storages: list_env(:repositories_storages).join(','), + repositories_paths: list_env(:repositories_paths).join(',') ) end diff --git a/lib/backup/repositories.rb b/lib/backup/repositories.rb index 4a31e87b9698f2..29b20bd2e73bab 100644 --- a/lib/backup/repositories.rb +++ b/lib/backup/repositories.rb @@ -3,19 +3,25 @@ require 'yaml' module Backup + # Backup and restores repositories by querying the database class Repositories < Task extend ::Gitlab::Utils::Override - def initialize(progress, strategy:, storages: []) + # @param [IO] progress IO interface to output progress + # @param [Object] :strategy Fetches backups from gitaly + # @param [Array] :storages Filter by specified storage names. Empty means all storages. + # @param [Array] :paths Filter by specified project paths. Empty means all projects, groups and snippets. + def initialize(progress, strategy:, storages: [], paths: []) super(progress) @strategy = strategy @storages = storages + @paths = paths end override :dump - def dump(path, backup_id) - strategy.start(:create, path, backup_id: backup_id) + def dump(destination_path, backup_id) + strategy.start(:create, destination_path, backup_id: backup_id) enqueue_consecutive ensure @@ -23,8 +29,8 @@ def dump(path, backup_id) end override :restore - def restore(path) - strategy.start(:restore, path) + def restore(destination_path) + strategy.start(:restore, destination_path) enqueue_consecutive ensure @@ -36,7 +42,7 @@ def restore(path) private - attr_reader :strategy, :storages + attr_reader :strategy, :storages, :paths def enqueue_consecutive enqueue_consecutive_projects @@ -66,12 +72,14 @@ def enqueue_snippet(snippet) def project_relation scope = Project.includes(:route, :group, namespace: :owner) scope = scope.id_in(ProjectRepository.for_repository_storage(storages).select(:project_id)) if storages.any? + scope = scope.where_full_path_in(paths) if paths.any? scope end def snippet_relation scope = Snippet.all scope = scope.id_in(SnippetRepository.for_repository_storage(storages).select(:snippet_id)) if storages.any? + scope = scope.joins(:project).merge(Project.where_full_path_in(paths)) if paths.any? scope end diff --git a/spec/lib/backup/repositories_spec.rb b/spec/lib/backup/repositories_spec.rb index 1581e4793e3ea2..211b0d91f9fbb4 100644 --- a/spec/lib/backup/repositories_spec.rb +++ b/spec/lib/backup/repositories_spec.rb @@ -6,6 +6,7 @@ let(:progress) { spy(:stdout) } let(:strategy) { spy(:strategy) } let(:storages) { [] } + let(:paths) { [] } let(:destination) { 'repositories' } let(:backup_id) { 'backup_id' } @@ -13,7 +14,8 @@ described_class.new( progress, strategy: strategy, - storages: storages + storages: storages, + paths: paths ) end @@ -107,6 +109,29 @@ expect(strategy).to have_received(:finish!) end end + + describe 'paths' do + let_it_be(:project) { create(:project, :repository) } + + let(:paths) { [project.full_path] } + + it 'calls enqueue for all repositories on the specified project', :aggregate_failures do + excluded_project = create(:project, :repository) + excluded_project_snippet = create(:project_snippet, :repository, project: excluded_project) + excluded_personal_snippet = create(:personal_snippet, :repository, author: excluded_project.first_owner) + + subject.dump(destination, backup_id) + + expect(strategy).to have_received(:start).with(:create, destination, backup_id: backup_id) + expect(strategy).not_to have_received(:enqueue).with(excluded_project, Gitlab::GlRepository::PROJECT) + expect(strategy).not_to have_received(:enqueue).with(excluded_project_snippet, Gitlab::GlRepository::SNIPPET) + expect(strategy).not_to have_received(:enqueue).with(excluded_personal_snippet, Gitlab::GlRepository::SNIPPET) + expect(strategy).to have_received(:enqueue).with(project, Gitlab::GlRepository::PROJECT) + expect(strategy).to have_received(:enqueue).with(project, Gitlab::GlRepository::WIKI) + expect(strategy).to have_received(:enqueue).with(project, Gitlab::GlRepository::DESIGN) + expect(strategy).to have_received(:finish!) + end + end end describe '#restore' do @@ -208,5 +233,26 @@ expect(strategy).to have_received(:finish!) end end + + context 'paths' do + let(:paths) { [project.full_path] } + + it 'calls enqueue for all repositories on the specified project', :aggregate_failures do + excluded_project = create(:project, :repository) + excluded_project_snippet = create(:project_snippet, :repository, project: excluded_project) + excluded_personal_snippet = create(:personal_snippet, :repository, author: excluded_project.first_owner) + + subject.restore(destination) + + expect(strategy).to have_received(:start).with(:restore, destination) + expect(strategy).not_to have_received(:enqueue).with(excluded_project, Gitlab::GlRepository::PROJECT) + expect(strategy).not_to have_received(:enqueue).with(excluded_project_snippet, Gitlab::GlRepository::SNIPPET) + expect(strategy).not_to have_received(:enqueue).with(excluded_personal_snippet, Gitlab::GlRepository::SNIPPET) + expect(strategy).to have_received(:enqueue).with(project, Gitlab::GlRepository::PROJECT) + expect(strategy).to have_received(:enqueue).with(project, Gitlab::GlRepository::WIKI) + expect(strategy).to have_received(:enqueue).with(project, Gitlab::GlRepository::DESIGN) + expect(strategy).to have_received(:finish!) + end + end end end diff --git a/spec/tasks/gitlab/backup_rake_spec.rb b/spec/tasks/gitlab/backup_rake_spec.rb index 52a0a9a7385473..4a3b81a072fbbf 100644 --- a/spec/tasks/gitlab/backup_rake_spec.rb +++ b/spec/tasks/gitlab/backup_rake_spec.rb @@ -465,7 +465,7 @@ def move_repository_to_secondary(record) stub_env('GITLAB_BACKUP_MAX_STORAGE_CONCURRENCY', 2) expect(::Backup::Repositories).to receive(:new) - .with(anything, strategy: anything, storages: []) + .with(anything, strategy: anything, storages: [], paths: []) .and_call_original expect(::Backup::GitalyBackup).to receive(:new).with(anything, max_parallelism: 5, storage_parallelism: 2, incremental: false).and_call_original -- GitLab