From 49dad2037d6a0905a9ed8e7802d43212c1c2c3d8 Mon Sep 17 00:00:00 2001 From: James Liu Date: Tue, 9 Jan 2024 15:07:31 +1100 Subject: [PATCH 1/2] backup: Fix dangling repo check Addresses a few issues with the prior implementation that checked for "dangling" repositories after a restore. A dangling repo is one that was created after a backup which is being restored was made, and should be deleted to keep Rails and Gitaly in sync. Modifies the `processedRepos` inner map so it's keyed by a string instead of a pointer value. Comparing repository objects via their pointers would not work correctly. Also fixes how we're checking if a key exists in the nested inner map. Fixes the existing tests so that all repositories are created with `gittest.CreateRepository` and are thus visible when we later query `ListRepositories` in the restore logic. --- internal/backup/pipeline.go | 17 ++++++--- internal/backup/pipeline_test.go | 25 ++++++++----- internal/cli/gitalybackup/restore.go | 2 +- internal/cli/gitalybackup/restore_test.go | 45 +++++++++++++++-------- 4 files changed, 57 insertions(+), 32 deletions(-) diff --git a/internal/backup/pipeline.go b/internal/backup/pipeline.go index 3eb91de919..7769bab78d 100644 --- a/internal/backup/pipeline.go +++ b/internal/backup/pipeline.go @@ -188,7 +188,7 @@ type Pipeline struct { pipelineError error cmdErrors *commandErrors - processedRepos map[string]map[*gitalypb.Repository]struct{} + processedRepos map[string]map[repositoryKey]struct{} processedReposMu sync.Mutex } @@ -204,7 +204,7 @@ func NewPipeline(log log.Logger, opts ...PipelineOption) (*Pipeline, error) { done: make(chan struct{}), workersByStorage: make(map[string]chan *contextCommand), cmdErrors: &commandErrors{}, - processedRepos: make(map[string]map[*gitalypb.Repository]struct{}), + processedRepos: make(map[string]map[repositoryKey]struct{}), } for _, opt := range opts { @@ -262,7 +262,7 @@ func (p *Pipeline) Handle(ctx context.Context, cmd Command) { } // Done waits for any in progress jobs to complete then reports any accumulated errors -func (p *Pipeline) Done() (processedRepos map[string]map[*gitalypb.Repository]struct{}, err error) { +func (p *Pipeline) Done() (processedRepos map[string]map[repositoryKey]struct{}, err error) { close(p.done) p.workerWg.Wait() @@ -338,9 +338,9 @@ func (p *Pipeline) processCommand(ctx context.Context, cmd Command) { storageName := cmd.Repository().StorageName p.processedReposMu.Lock() if _, ok := p.processedRepos[storageName]; !ok { - p.processedRepos[storageName] = make(map[*gitalypb.Repository]struct{}) + p.processedRepos[storageName] = make(map[repositoryKey]struct{}) } - p.processedRepos[storageName][cmd.Repository()] = struct{}{} + p.processedRepos[storageName][NewRepositoryKey(cmd.Repository())] = struct{}{} p.processedReposMu.Unlock() log.Info(fmt.Sprintf("completed %s", cmd.Name())) @@ -381,3 +381,10 @@ func (p *Pipeline) releaseWorkerSlot() { } <-p.totalWorkers } + +type repositoryKey string + +// NewRepositoryKey returns a unique identifier for the provided repo. +func NewRepositoryKey(repo *gitalypb.Repository) repositoryKey { + return repositoryKey(repo.StorageName + "-" + repo.RelativePath) +} diff --git a/internal/backup/pipeline_test.go b/internal/backup/pipeline_test.go index 37f7c2e5b2..9d05eae743 100644 --- a/internal/backup/pipeline_test.go +++ b/internal/backup/pipeline_test.go @@ -323,16 +323,23 @@ func TestPipelineError(t *testing.T) { func TestPipelineProcessedRepos(t *testing.T) { strategy := MockStrategy{} - repos := map[string]map[*gitalypb.Repository]struct{}{ + repos := []*gitalypb.Repository{ + {RelativePath: "a.git", StorageName: "storage1"}, + {RelativePath: "b.git", StorageName: "storage1"}, + {RelativePath: "c.git", StorageName: "storage2"}, + {RelativePath: "d.git", StorageName: "storage3"}, + } + + expectedProcessedRepos := map[string]map[repositoryKey]struct{}{ "storage1": { - &gitalypb.Repository{RelativePath: "a.git", StorageName: "storage1"}: struct{}{}, - &gitalypb.Repository{RelativePath: "b.git", StorageName: "storage1"}: struct{}{}, + "storage1-a.git": {}, + "storage1-b.git": {}, }, "storage2": { - &gitalypb.Repository{RelativePath: "c.git", StorageName: "storage2"}: struct{}{}, + "storage2-c.git": {}, }, "storage3": { - &gitalypb.Repository{RelativePath: "d.git", StorageName: "storage3"}: struct{}{}, + "storage3-d.git": {}, }, } @@ -340,13 +347,11 @@ func TestPipelineProcessedRepos(t *testing.T) { require.NoError(t, err) ctx := testhelper.Context(t) - for _, v := range repos { - for repo := range v { - p.Handle(ctx, NewRestoreCommand(strategy, RestoreRequest{Repository: repo})) - } + for _, repo := range repos { + p.Handle(ctx, NewRestoreCommand(strategy, RestoreRequest{Repository: repo})) } processedRepos, err := p.Done() require.NoError(t, err) - require.EqualValues(t, repos, processedRepos) + require.EqualValues(t, expectedProcessedRepos, processedRepos) } diff --git a/internal/cli/gitalybackup/restore.go b/internal/cli/gitalybackup/restore.go index defffa2231..7a3833f80b 100644 --- a/internal/cli/gitalybackup/restore.go +++ b/internal/cli/gitalybackup/restore.go @@ -189,7 +189,7 @@ func (cmd *restoreSubcommand) run(ctx context.Context, logger log.Logger, stdin var removalErrors []error for storageName, repos := range existingRepos { for _, repo := range repos { - if dangling := restoredRepos[storageName][repo]; dangling == struct{}{} { + if _, ok := restoredRepos[storageName][backup.NewRepositoryKey(repo)]; !ok { // If we have dangling repos (those which exist in the storage but // weren't part of the restore), they need to be deleted so the // state of repos in Gitaly matches that in the Rails DB. diff --git a/internal/cli/gitalybackup/restore_test.go b/internal/cli/gitalybackup/restore_test.go index 6d235525c4..033420d8f6 100644 --- a/internal/cli/gitalybackup/restore_test.go +++ b/internal/cli/gitalybackup/restore_test.go @@ -40,24 +40,32 @@ Issue: https://gitlab.com/gitlab-org/gitaly/-/issues/5269`) cfg.SocketPath = testserver.RunGitalyServer(t, cfg, setup.RegisterAll) + // This is an example of a "dangling" repository (one that was created after a backup was taken) that should be + // removed after the backup is restored. existingRepo, existRepoPath := gittest.CreateRepository(t, ctx, cfg, gittest.CreateRepositoryConfig{ RelativePath: "existing_repo", }) gittest.WriteCommit(t, cfg, existRepoPath, gittest.WithBranch(git.DefaultBranch)) - path := testhelper.TempDir(t) - existingRepoBundlePath := filepath.Join(path, existingRepo.RelativePath+".bundle") - existingRepoRefPath := filepath.Join(path, existingRepo.RelativePath+".refs") + // The backupDir contains the artifacts that would've been created as part of a backup. + backupDir := testhelper.TempDir(t) + existingRepoBundlePath := filepath.Join(backupDir, existingRepo.RelativePath+".bundle") + existingRepoRefPath := filepath.Join(backupDir, existingRepo.RelativePath+".refs") gittest.Exec(t, cfg, "-C", existRepoPath, "bundle", "create", existingRepoBundlePath, "--all") require.NoError(t, os.WriteFile(existingRepoRefPath, gittest.Exec(t, cfg, "-C", existRepoPath, "show-ref"), perm.SharedFile)) + // These repos are the ones being restored, and should exist after the restore. var repos []*gitalypb.Repository for i := 0; i < 2; i++ { - repo := gittest.InitRepoDir(t, cfg.Storages[0].Path, fmt.Sprintf("repo-%d", i)) - repoBundlePath := filepath.Join(path, repo.RelativePath+".bundle") - repoRefPath := filepath.Join(path, repo.RelativePath+".refs") + repo, _ := gittest.CreateRepository(t, ctx, cfg, gittest.CreateRepositoryConfig{ + RelativePath: fmt.Sprintf("repo-%d", i), + Storage: cfg.Storages[0], + }) + + repoBundlePath := filepath.Join(backupDir, repo.RelativePath+".bundle") testhelper.CopyFile(t, existingRepoBundlePath, repoBundlePath) + repoRefPath := filepath.Join(backupDir, repo.RelativePath+".refs") testhelper.CopyFile(t, existingRepoRefPath, repoRefPath) repos = append(repos, repo) } @@ -81,7 +89,7 @@ Issue: https://gitlab.com/gitlab-org/gitaly/-/issues/5269`) progname, "restore", "--path", - path, + backupDir, "--parallel", strconv.Itoa(runtime.NumCPU()), "--parallel-storage", @@ -101,9 +109,10 @@ Issue: https://gitlab.com/gitlab-org/gitaly/-/issues/5269`) require.NoDirExists(t, existRepoPath) + // Ensure the repos were restored correctly. for _, repo := range repos { repoPath := filepath.Join(cfg.Storages[0].Path, gittest.GetReplicaPath(t, ctx, cfg, repo)) - bundlePath := filepath.Join(path, repo.RelativePath+".bundle") + bundlePath := filepath.Join(backupDir, repo.RelativePath+".bundle") output := gittest.Exec(t, cfg, "-C", repoPath, "bundle", "verify", bundlePath) require.Contains(t, string(output), "The bundle records a complete history") @@ -122,8 +131,8 @@ Issue: https://gitlab.com/gitlab-org/gitaly/-/issues/5269`) ctx := testhelper.Context(t) - path := testhelper.TempDir(t) - backupSink, err := backup.ResolveSink(ctx, path) + backupDir := testhelper.TempDir(t) + backupSink, err := backup.ResolveSink(ctx, backupDir) require.NoError(t, err) backupLocator, err := backup.ResolveLocator("pointer", backupSink) @@ -142,18 +151,22 @@ Issue: https://gitlab.com/gitlab-org/gitaly/-/issues/5269`) }) gittest.WriteCommit(t, cfg, existRepoPath, gittest.WithBranch(git.DefaultBranch)) - existingRepoBundlePath := filepath.Join(path, existingRepo.RelativePath+".bundle") - existingRepoRefPath := filepath.Join(path, existingRepo.RelativePath+".refs") + existingRepoBundlePath := filepath.Join(backupDir, existingRepo.RelativePath+".bundle") + existingRepoRefPath := filepath.Join(backupDir, existingRepo.RelativePath+".refs") gittest.Exec(t, cfg, "-C", existRepoPath, "bundle", "create", existingRepoBundlePath, "--all") require.NoError(t, os.WriteFile(existingRepoRefPath, gittest.Exec(t, cfg, "-C", existRepoPath, "show-ref"), perm.SharedFile)) var repos []*gitalypb.Repository for i := 0; i < 2; i++ { - repo := gittest.InitRepoDir(t, cfg.Storages[0].Path, fmt.Sprintf("repo-%d", i)) - repoBundlePath := filepath.Join(path, repo.RelativePath+".bundle") - repoRefPath := filepath.Join(path, repo.RelativePath+".refs") + repo, _ := gittest.CreateRepository(t, ctx, cfg, gittest.CreateRepositoryConfig{ + RelativePath: fmt.Sprintf("repo-%d", i), + Storage: cfg.Storages[0], + }) + + repoBundlePath := filepath.Join(backupDir, repo.RelativePath+".bundle") testhelper.CopyFile(t, existingRepoBundlePath, repoBundlePath) + repoRefPath := filepath.Join(backupDir, repo.RelativePath+".refs") testhelper.CopyFile(t, existingRepoRefPath, repoRefPath) repos = append(repos, repo) } @@ -199,7 +212,7 @@ Issue: https://gitlab.com/gitlab-org/gitaly/-/issues/5269`) for _, repo := range repos { repoPath := filepath.Join(cfg.Storages[0].Path, gittest.GetReplicaPath(t, ctx, cfg, repo)) - bundlePath := filepath.Join(path, repo.RelativePath+".bundle") + bundlePath := filepath.Join(backupDir, repo.RelativePath+".bundle") output := gittest.Exec(t, cfg, "-C", repoPath, "bundle", "verify", bundlePath) require.Contains(t, string(output), "The bundle records a complete history") -- GitLab From f8ff3324c74148328148f3c8d9600a9d95656df1 Mon Sep 17 00:00:00 2001 From: James Liu Date: Wed, 10 Jan 2024 13:18:57 +1100 Subject: [PATCH 2/2] backup: Remove test exceptions for WAL Now that backups no longer invoke the RemoveAll RPC, we can remove the exemption for these tests when the WAL is enabled. --- internal/cli/gitalybackup/restore_test.go | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/internal/cli/gitalybackup/restore_test.go b/internal/cli/gitalybackup/restore_test.go index 033420d8f6..4b1eee9aa0 100644 --- a/internal/cli/gitalybackup/restore_test.go +++ b/internal/cli/gitalybackup/restore_test.go @@ -26,13 +26,6 @@ import ( func TestRestoreSubcommand(t *testing.T) { gittest.SkipWithSHA256(t) - testhelper.SkipWithWAL(t, ` -RemoveAll is removing the entire content of the storage. This would also remove the database's and -the transaction manager's disk state. The RPC needs to be updated to shut down all partitions and -the database and only then perform the removal. - -Issue: https://gitlab.com/gitlab-org/gitaly/-/issues/5269`) - ctx := testhelper.Context(t) cfg := testcfg.Build(t) @@ -122,13 +115,6 @@ Issue: https://gitlab.com/gitlab-org/gitaly/-/issues/5269`) func TestRestoreSubcommand_serverSide(t *testing.T) { gittest.SkipWithSHA256(t) - testhelper.SkipWithWAL(t, ` -RemoveAll is removing the entire content of the storage. This would also remove the database's and -the transaction manager's disk state. The RPC needs to be updated to shut down all partitions and -the database and only then perform the removal. - -Issue: https://gitlab.com/gitlab-org/gitaly/-/issues/5269`) - ctx := testhelper.Context(t) backupDir := testhelper.TempDir(t) -- GitLab