diff --git a/internal/cli/gitaly/serve.go b/internal/cli/gitaly/serve.go index 60a72ebae15a81b27fc047429ddc1f6a605155e6..00e7b507fc14ab36e9ced28480499b8d432dfe45 100644 --- a/internal/cli/gitaly/serve.go +++ b/internal/cli/gitaly/serve.go @@ -372,7 +372,9 @@ func run(appCtx *cli.Command, cfg config.Cfg, logger log.Logger) error { reftableMigratorMetrics := reftable.NewMetrics() prometheus.MustRegister(housekeepingMetrics, storageMetrics, partitionMetrics, migrationMetrics, raftMetrics, reftableMigratorMetrics) - migrations := []migration.Migration{} + migrations := []migration.Migration{ + migration.NewLeftoverFileMigration(localrepoFactory, migration.DeleteDirectly), + } var txMiddleware server.TransactionMiddleware var node storage.Node diff --git a/internal/featureflag/ff_leftover_migration.go b/internal/featureflag/ff_leftover_migration.go new file mode 100644 index 0000000000000000000000000000000000000000..0df51a4a2b3e03c472873e0b1c3e150c0d6aed84 --- /dev/null +++ b/internal/featureflag/ff_leftover_migration.go @@ -0,0 +1,9 @@ +package featureflag + +// LeftoverMigration enables the feature to delete garbage files in repositories that Gitaly doesn't use. +var LeftoverMigration = NewFeatureFlag( + "leftover_migration", + "v17.8.0", + "https://gitlab.com/gitlab-org/gitaly/-/issues/5737", + false, +) diff --git a/internal/featureflag/ff_snapshot_filter.go b/internal/featureflag/ff_snapshot_filter.go new file mode 100644 index 0000000000000000000000000000000000000000..49da87721e17b8e00ce8774adeec732ae6e8c2ba --- /dev/null +++ b/internal/featureflag/ff_snapshot_filter.go @@ -0,0 +1,9 @@ +package featureflag + +// SnapshotFilter enables snapshot filter feature. +var SnapshotFilter = NewFeatureFlag( + "snapshot_filter", + "v18.1.0", + "https://gitlab.com/gitlab-org/gitaly/-/issues/5737", + false, +) diff --git a/internal/gitaly/service/operations/user_delete_branch_test.go b/internal/gitaly/service/operations/user_delete_branch_test.go index ae1fceb17552206c2c44bc9f3826e5d6146a41cd..578cf3322382755e4f14e77770e23fa4bb732527 100644 --- a/internal/gitaly/service/operations/user_delete_branch_test.go +++ b/internal/gitaly/service/operations/user_delete_branch_test.go @@ -346,10 +346,14 @@ func TestUserDeleteBranch_allowed(t *testing.T) { } func TestUserDeleteBranch_concurrentUpdate(t *testing.T) { + // In the context of transaction management, refs do not require a .lock file for concurrency control. + // This is because transaction manager ensures that no two transactions can operate + // on the same ref concurrently and tracks all ongoing ref transactions enforces mutual exclusion. + testhelper.SkipWithWAL(t, "snapshot ignore ref .lock file") + t.Parallel() ctx := testhelper.Context(t) - ctx, cfg, client := setupOperationsService(t, ctx) repo, repoPath := gittest.CreateRepository(t, ctx, cfg) diff --git a/internal/gitaly/service/ref/delete_refs_test.go b/internal/gitaly/service/ref/delete_refs_test.go index 2cf7c803a5aed4c87163df7c3403d82ce6a8c74a..a7a0d2237e4534775e014f87cac6791e505c8767 100644 --- a/internal/gitaly/service/ref/delete_refs_test.go +++ b/internal/gitaly/service/ref/delete_refs_test.go @@ -183,6 +183,10 @@ func TestDeleteRefs_invalidRefFormat(t *testing.T) { } func TestDeleteRefs_refLocked(t *testing.T) { + // In the context of transaction management, refs do not require a .lock file for concurrency control. + // This is because transaction manager ensures that no two transactions can operate + // on the same ref concurrently and tracks all ongoing ref transactions enforces mutual exclusion. + testhelper.SkipWithWAL(t, "snapshot ignore ref .lock file") t.Parallel() ctx := testhelper.Context(t) diff --git a/internal/gitaly/service/ref/update_references_test.go b/internal/gitaly/service/ref/update_references_test.go index 8d4768000fcfeb93988e10f3bd19877852224889..bf5d20b232f4a1bff8d36c36157e6d4f8c95bf2c 100644 --- a/internal/gitaly/service/ref/update_references_test.go +++ b/internal/gitaly/service/ref/update_references_test.go @@ -296,6 +296,10 @@ func TestUpdateReferences(t *testing.T) { { desc: "locked reference", setup: func(t *testing.T) setupData { + // In the context of transaction management, refs do not require a .lock file for concurrency control. + // This is because transaction manager ensures that no two transactions can operate + // on the same ref concurrently and tracks all ongoing ref transactions enforces mutual exclusion. + testhelper.SkipWithWAL(t, "snapshot ignore ref .lock file") repoProto, repoPath := gittest.CreateRepository(t, ctx, cfg) repo := localrepo.NewTestRepo(t, cfg, repoProto) diff --git a/internal/gitaly/service/repository/repository_info.go b/internal/gitaly/service/repository/repository_info.go index b3eff25e63493395e15cc49348eda11436f07269..cc7a07cde8a81023896430329c9a84c2918cf7a6 100644 --- a/internal/gitaly/service/repository/repository_info.go +++ b/internal/gitaly/service/repository/repository_info.go @@ -26,7 +26,7 @@ func (s *server) RepositoryInfo( return nil, err } - repoSize, err := dirSizeInBytes(repoPath) + repoSize, err := dirSizeInBytes(ctx, repoPath, s.cfg.Transactions.Enabled, true) if err != nil { return nil, fmt.Errorf("calculating repository size: %w", err) } diff --git a/internal/gitaly/service/repository/repository_info_test.go b/internal/gitaly/service/repository/repository_info_test.go index abf6af78b8f1623740d6775b10d721e9b419a002..ead46c77f24169e833eafc73f6311bd22f83d177 100644 --- a/internal/gitaly/service/repository/repository_info_test.go +++ b/internal/gitaly/service/repository/repository_info_test.go @@ -38,7 +38,9 @@ func TestRepositoryInfo(t *testing.T) { emptyRepoSize := func() uint64 { _, repoPath := gittest.CreateRepository(t, ctx, cfg) - size, err := dirSizeInBytes(repoPath) + + size, err := dirSizeInBytes(ctx, repoPath, testhelper.IsWALEnabled(), true) + require.NoError(t, err) return uint64(size) }() @@ -71,8 +73,10 @@ func TestRepositoryInfo(t *testing.T) { } for _, tc := range []struct { - desc string - setup func(t *testing.T) setupData + desc string + setup func(t *testing.T) setupData + ignore bool + ignoreReason string }{ { desc: "unset repository", @@ -338,6 +342,8 @@ func TestRepositoryInfo(t *testing.T) { }, } }, + ignore: testhelper.IsWALEnabled(), + ignoreReason: "transaction snapshot exclude .mtime file in objects/pack", }, { desc: "repository with keep pack", @@ -370,6 +376,8 @@ func TestRepositoryInfo(t *testing.T) { }, } }, + ignore: testhelper.IsWALEnabled(), + ignoreReason: "transaction snapshots exclude .keep file in objects/pack", }, { desc: "repository with different pack types", @@ -409,6 +417,8 @@ func TestRepositoryInfo(t *testing.T) { }, } }, + ignore: testhelper.IsWALEnabled(), + ignoreReason: "transaction snapshot exclude .keep and .mtime file in objects/pack", }, { desc: "repository with commit-graph", @@ -430,7 +440,8 @@ func TestRepositoryInfo(t *testing.T) { repoStats, err := stats.RepositoryInfoForRepository(ctx, localrepo.NewTestRepo(t, cfg, repo)) require.NoError(t, err) - repoSize, err := dirSizeInBytes(repoPath) + repoSize, err := dirSizeInBytes(ctx, repoPath, testhelper.IsWALEnabled(), true) + require.NoError(t, err) return setupData{ @@ -478,7 +489,8 @@ func TestRepositoryInfo(t *testing.T) { repoStats, err := stats.RepositoryInfoForRepository(ctx, localrepo.NewTestRepo(t, cfg, repo)) require.NoError(t, err) - repoSize, err := dirSizeInBytes(repoPath) + repoSize, err := dirSizeInBytes(ctx, repoPath, testhelper.IsWALEnabled(), true) + require.NoError(t, err) return setupData{ @@ -531,7 +543,8 @@ func TestRepositoryInfo(t *testing.T) { repoStats, err := stats.RepositoryInfoForRepository(ctx, localrepo.NewTestRepo(t, cfg, repo)) require.NoError(t, err) - repoSize, err := dirSizeInBytes(repoPath) + repoSize, err := dirSizeInBytes(ctx, repoPath, testhelper.IsWALEnabled(), true) + require.NoError(t, err) return setupData{ @@ -581,7 +594,8 @@ func TestRepositoryInfo(t *testing.T) { repoStats, err := stats.RepositoryInfoForRepository(ctx, localrepo.NewTestRepo(t, cfg, repo)) require.NoError(t, err) - repoSize, err := dirSizeInBytes(repoPath) + repoSize, err := dirSizeInBytes(ctx, repoPath, testhelper.IsWALEnabled(), true) + require.NoError(t, err) return setupData{ @@ -628,7 +642,8 @@ func TestRepositoryInfo(t *testing.T) { repoStats, err := stats.RepositoryInfoForRepository(ctx, localrepo.NewTestRepo(t, cfg, repo)) require.NoError(t, err) - repoSize, err := dirSizeInBytes(repoPath) + repoSize, err := dirSizeInBytes(ctx, repoPath, testhelper.IsWALEnabled(), true) + require.NoError(t, err) return setupData{ @@ -683,7 +698,8 @@ func TestRepositoryInfo(t *testing.T) { require.NoError(t, err) } - repoSize, err := dirSizeInBytes(repoPath) + repoSize, err := dirSizeInBytes(ctx, repoPath, testhelper.IsWALEnabled(), true) + require.NoError(t, err) return setupData{ @@ -710,6 +726,10 @@ func TestRepositoryInfo(t *testing.T) { }, } { t.Run(tc.desc, func(t *testing.T) { + if tc.ignore { + t.Skip(tc.ignoreReason) + } + t.Parallel() setup := tc.setup(t) diff --git a/internal/gitaly/service/repository/size.go b/internal/gitaly/service/repository/size.go index 94a2c01865796a1afbd7bae78c31bd9c12c459c0..1a341d88c8ae4b734893a6103dac2b4e6aab693b 100644 --- a/internal/gitaly/service/repository/size.go +++ b/internal/gitaly/service/repository/size.go @@ -8,6 +8,8 @@ import ( "os" "path/filepath" + "gitlab.com/gitlab-org/gitaly/v16/internal/featureflag" + "gitlab.com/gitlab-org/gitaly/v16/internal/gitaly/storage/storagemgr/partition/snapshot" "gitlab.com/gitlab-org/gitaly/v16/internal/structerr" "gitlab.com/gitlab-org/gitaly/v16/proto/go/gitalypb" ) @@ -26,7 +28,7 @@ func (s *server) RepositorySize(ctx context.Context, in *gitalypb.RepositorySize return nil, err } - sizeInBytes, err := dirSizeInBytes(path) + sizeInBytes, err := dirSizeInBytes(ctx, path, s.cfg.Transactions.Enabled, true) if err != nil { return nil, fmt.Errorf("calculating directory size: %w", err) } @@ -45,8 +47,8 @@ func (s *server) GetObjectDirectorySize(ctx context.Context, in *gitalypb.GetObj if err != nil { return nil, err } - - sizeInBytes, err := dirSizeInBytes(path) + // path is the objects directory path, not repo's path + sizeInBytes, err := dirSizeInBytes(ctx, path, s.cfg.Transactions.Enabled, false) if err != nil { return nil, fmt.Errorf("calculating directory size: %w", err) } @@ -54,10 +56,10 @@ func (s *server) GetObjectDirectorySize(ctx context.Context, in *gitalypb.GetObj return &gitalypb.GetObjectDirectorySizeResponse{Size: sizeInBytes / 1024}, nil } -func dirSizeInBytes(path string) (int64, error) { +func dirSizeInBytes(ctx context.Context, dirPath string, transactionEnabled, isRepoPath bool) (int64, error) { var totalSize int64 - if err := filepath.WalkDir(path, func(path string, d fs.DirEntry, err error) error { + if err := filepath.WalkDir(dirPath, func(path string, d fs.DirEntry, err error) error { if err != nil { // It can happen that we try to walk a directory like the object shards or // an empty reference directory that gets deleted concurrently. This is fine @@ -73,6 +75,17 @@ func dirSizeInBytes(path string) (int64, error) { return nil } + relPath, err := filepath.Rel(dirPath, path) + if err != nil { + return fmt.Errorf("calculating path relative to repo root: %w", err) + } + if transactionEnabled && isRepoPath && relPath != "." { + matcher := snapshot.NewSnapshotFilter(featureflag.SnapshotFilter.IsDisabled(ctx)) + if ok := matcher.Matches(relPath, d.IsDir()); !ok { + return nil + } + } + fi, err := d.Info() if err != nil { // The file may have been concurrently removed. diff --git a/internal/gitaly/service/repository/size_test.go b/internal/gitaly/service/repository/size_test.go index 4ae283c167b3c23d9ee35e3d9a70ef5aaecf3957..40db03077f8f5e60d4813a9dff9fcbf87b15f8bc 100644 --- a/internal/gitaly/service/repository/size_test.go +++ b/internal/gitaly/service/repository/size_test.go @@ -99,10 +99,16 @@ func TestRepositorySize_normalRepository(t *testing.T) { invalidateSnapshot() requireRepositorySize(t, ctx, client, repo, 16) - // Even garbage should increase the size. require.NoError(t, os.WriteFile(filepath.Join(repoPath, "garbage"), incompressibleData(5*1024), mode.File)) invalidateSnapshot() - requireRepositorySize(t, ctx, client, repo, 21) + if testhelper.IsWALEnabled() { + // when snapshot filter is enabled in transaction manager + // garbage is not included in object directory's size + requireRepositorySize(t, ctx, client, repo, 16) + } else { + // Otherwise, even garbage should increase the size. + requireRepositorySize(t, ctx, client, repo, 21) + } } func TestRepositorySize_failure(t *testing.T) { diff --git a/internal/gitaly/service/repository/snapshot_test.go b/internal/gitaly/service/repository/snapshot_test.go index 4c4c226f8c7957ebe8730871914d57af47373d18..3bac7f6743695071f910c4a4b2ddec2e943bef00 100644 --- a/internal/gitaly/service/repository/snapshot_test.go +++ b/internal/gitaly/service/repository/snapshot_test.go @@ -136,8 +136,10 @@ func TestGetSnapshot(t *testing.T) { // Unreachable objects should also be included in the snapshot. unreachableCommitID := gittest.WriteCommit(t, cfg, repoPath, gittest.WithMessage("unreachable")) - // The shallow file, used if the repository is a shallow clone, is also included in snapshots. - require.NoError(t, os.WriteFile(filepath.Join(repoPath, "shallow"), nil, mode.File)) + if !testhelper.IsWALEnabled() { + // The shallow file, used if the repository is a shallow clone, is also included in snapshots. + require.NoError(t, os.WriteFile(filepath.Join(repoPath, "shallow"), nil, mode.File)) + } // Custom Git hooks are not included in snapshots. require.NoError(t, os.MkdirAll(filepath.Join(repoPath, "hooks"), mode.Directory)) @@ -149,16 +151,24 @@ func TestGetSnapshot(t *testing.T) { mode.File, )) - expected := testhelper.DirectoryState{ - "HEAD": { - Mode: archive.TarFileMode, - ParseContent: ignoreContent, - }, - "shallow": { + expected := func() testhelper.DirectoryState { + info := testhelper.DirectoryEntry{ Mode: archive.TarFileMode, ParseContent: ignoreContent, - }, - } + } + if testhelper.IsWALEnabled() { + // Only HEAD is included in snapshots + // when snapshot filter is enabled in transaction manager. + return testhelper.DirectoryState{ + "HEAD": info, + } + } + return testhelper.DirectoryState{ + "HEAD": info, + "shallow": info, + } + }() + expected[fmt.Sprintf("objects/%s/%s", treeID[0:2], treeID[2:])] = testhelper.DirectoryEntry{ Mode: archive.TarFileMode, ParseContent: ignoreContent, diff --git a/internal/gitaly/service/repository/write_ref_test.go b/internal/gitaly/service/repository/write_ref_test.go index 08c5f417687314d3f408931775ae7f4a7906168d..0b9a5c664d48da4e1bbc34fead44f28df816caaf 100644 --- a/internal/gitaly/service/repository/write_ref_test.go +++ b/internal/gitaly/service/repository/write_ref_test.go @@ -393,6 +393,10 @@ func TestWriteRef(t *testing.T) { } func TestWriteRef_locked(t *testing.T) { + // In the context of transaction management, refs do not require a .lock file for concurrency control. + // This is because transaction manager ensures that no two transactions can operate + // on the same ref concurrently and tracks all ongoing ref transactions enforces mutual exclusion. + testhelper.SkipWithWAL(t, "snapshots ignore ref .lock files") t.Parallel() ctx := testhelper.Context(t) diff --git a/internal/gitaly/storage/storagemgr/partition/migration/leftover_file_migration.go b/internal/gitaly/storage/storagemgr/partition/migration/leftover_file_migration.go new file mode 100644 index 0000000000000000000000000000000000000000..8fade36596b79b79866f5068152bc9b3301e52ff --- /dev/null +++ b/internal/gitaly/storage/storagemgr/partition/migration/leftover_file_migration.go @@ -0,0 +1,112 @@ +package migration + +import ( + "context" + "errors" + "fmt" + "io/fs" + "os" + "path/filepath" + + "gitlab.com/gitlab-org/gitaly/v16/internal/featureflag" + "gitlab.com/gitlab-org/gitaly/v16/internal/git/localrepo" + "gitlab.com/gitlab-org/gitaly/v16/internal/gitaly/storage" + "gitlab.com/gitlab-org/gitaly/v16/internal/gitaly/storage/storagemgr/partition/snapshot" + "gitlab.com/gitlab-org/gitaly/v16/proto/go/gitalypb" +) + +// leftoverProcessingFn is a function we use to remove leftover file +type leftoverProcessingFn func(storage.FS, string) error + +// NewLeftoverFileMigration returns a migration which deletes left-over files in the repository. +func NewLeftoverFileMigration(localRepoFactory localrepo.Factory, fn leftoverProcessingFn) Migration { + return Migration{ + ID: 2, + Name: "leftover files removal", + IsDisabled: func(ctx context.Context) bool { + return featureflag.LeftoverMigration.IsDisabled(ctx) + }, + Fn: func(ctx context.Context, tx storage.Transaction, storageName string, relativePath string) error { + if featureflag.SnapshotFilter.IsEnabled(ctx) { + return fmt.Errorf("leftover migration should disable snapshot_filter") + } + + originalRepo := &gitalypb.Repository{ + StorageName: storageName, + RelativePath: relativePath, + } + originalRepo = tx.OriginalRepository(originalRepo) + scopedFactory, err := localRepoFactory.ScopeByStorage(ctx, storageName) + if err != nil { + return fmt.Errorf("creating storage scoped factory: %w", err) + } + repo := scopedFactory.Build(originalRepo.GetRelativePath()) + repoPath, err := repo.Path(ctx) + if err != nil { + return fmt.Errorf("original repo path: %w", err) + } + + snapshotFilter := snapshot.NewSnapshotFilter(false) + if err := filepath.WalkDir(repoPath, func(path string, d fs.DirEntry, err error) error { + if err != nil { + // It can happen that we try to walk a directory like the object shards or + // an empty reference directory that gets deleted concurrently. This is fine + // and expected to happen, so let's ignore any such errors. + if errors.Is(err, os.ErrNotExist) { + return nil + } + + return err + } + + if d.IsDir() { + return nil + } + + relPath, err := filepath.Rel(repoPath, path) + if err != nil { + return fmt.Errorf("calculating path relative to repo root: %w", err) + } + if relPath != "." { + if ok := snapshotFilter.Matches(relPath, d.IsDir()); !ok { + if err := fn(tx.FS(), relPath); err != nil { + return fmt.Errorf("processing lefover file: %w", err) + } + } + } + + return nil + }); err != nil { + return fmt.Errorf("walking directory: %w", err) + } + + return nil + }, + } +} + +// DeleteDirectly just delete the file and error in case of failure. +func DeleteDirectly(fs storage.FS, file string) error { + if err := os.RemoveAll(file); err != nil { + return fmt.Errorf("removing file: %w", err) + } + return nil +} + +// MoveToGarbageFolder moves the file to the transaction_garbage folder. +func MoveToGarbageFolder(fs storage.FS, file string) error { + targetPath := filepath.Join("transaction_garbage", file) + err := storage.MkdirAll(fs, filepath.Dir(targetPath)) + if err != nil { + return fmt.Errorf("make dir: %w", err) + } + err = storage.Link(fs, file, targetPath) + if err != nil { + return fmt.Errorf("move file to transaction_garbage: %w", err) + } + err = storage.RecordDirectoryRemoval(fs, fs.Root(), file) + if err != nil { + return fmt.Errorf("remove file: %w", err) + } + return nil +} diff --git a/internal/gitaly/storage/storagemgr/partition/migration/leftover_file_migration_test.go b/internal/gitaly/storage/storagemgr/partition/migration/leftover_file_migration_test.go new file mode 100644 index 0000000000000000000000000000000000000000..bd152a9ab0b27d8a279bf9b97a2f6008ee6d653d --- /dev/null +++ b/internal/gitaly/storage/storagemgr/partition/migration/leftover_file_migration_test.go @@ -0,0 +1,230 @@ +package migration + +import ( + "context" + "errors" + "fmt" + "io/fs" + "os" + "path/filepath" + "strconv" + "testing" + + "github.com/stretchr/testify/require" + "gitlab.com/gitlab-org/gitaly/v16/internal/git" + "gitlab.com/gitlab-org/gitaly/v16/internal/git/catfile" + "gitlab.com/gitlab-org/gitaly/v16/internal/git/gitcmd" + "gitlab.com/gitlab-org/gitaly/v16/internal/git/gittest" + "gitlab.com/gitlab-org/gitaly/v16/internal/git/housekeeping" + "gitlab.com/gitlab-org/gitaly/v16/internal/git/localrepo" + "gitlab.com/gitlab-org/gitaly/v16/internal/gitaly/config" + "gitlab.com/gitlab-org/gitaly/v16/internal/gitaly/storage" + "gitlab.com/gitlab-org/gitaly/v16/internal/gitaly/storage/keyvalue" + "gitlab.com/gitlab-org/gitaly/v16/internal/gitaly/storage/keyvalue/databasemgr" + "gitlab.com/gitlab-org/gitaly/v16/internal/gitaly/storage/mode" + nodeimpl "gitlab.com/gitlab-org/gitaly/v16/internal/gitaly/storage/node" + "gitlab.com/gitlab-org/gitaly/v16/internal/gitaly/storage/raftmgr" + "gitlab.com/gitlab-org/gitaly/v16/internal/gitaly/storage/storagemgr" + "gitlab.com/gitlab-org/gitaly/v16/internal/gitaly/storage/storagemgr/partition" + "gitlab.com/gitlab-org/gitaly/v16/internal/helper" + "gitlab.com/gitlab-org/gitaly/v16/internal/log" + "gitlab.com/gitlab-org/gitaly/v16/internal/testhelper" + "gitlab.com/gitlab-org/gitaly/v16/internal/testhelper/testcfg" + "gitlab.com/gitlab-org/gitaly/v16/proto/go/gitalypb" +) + +func TestNewLeftoverFileMigration(t *testing.T) { + // setup node + logger := testhelper.SharedLogger(t) + cfg := testcfg.Build(t) + ctx := testhelper.Context(t) + cmdFactory := gittest.NewCommandFactory(t, cfg) + catfileCache := catfile.NewCache(cfg) + t.Cleanup(catfileCache.Stop) + localRepoFactory := localrepo.NewFactory(logger, config.NewLocator(cfg), cmdFactory, catfileCache) + // leftoverFileMigration := NewLeftoverFileMigration(localRepoFactory, DeleteDirectly) + leftoverFileMigration := NewLeftoverFileMigration(localRepoFactory, MoveToGarbageFolder) + node := setupNodeForTransaction(t, ctx, cfg, logger, cmdFactory, localRepoFactory, []Migration{leftoverFileMigration}) + t.Cleanup(node.Close) + + // setup repo + setup := setupRepoForLeftoverMigration(t, ctx, cfg) + + // manually create transaction + originalRepo := &gitalypb.Repository{ + StorageName: setup.repo.GetStorageName(), + RelativePath: setup.repo.GetRelativePath(), + } + if tx := storage.ExtractTransaction(ctx); tx != nil { + originalRepo = tx.OriginalRepository(originalRepo) + } + storageHandle, err := node.GetStorage(setup.repo.GetStorageName()) + require.NoError(t, err) + + tx, err := storageHandle.Begin(ctx, storage.TransactionOptions{ + ReadOnly: false, + RelativePath: originalRepo.GetRelativePath(), + }) + require.NoError(t, err) + err = tx.Commit(ctx) + require.NoError(t, err) + + applyWAL(t, ctx, setup.repo, node) + + // check all files the left are what we expect them to be + acttualReftableFileCount := 0 + expectedReftableFileCount := 0 + if testhelper.IsReftableEnabled() { + expectedReftableFileCount = 2 + } + err = filepath.WalkDir(setup.repoPath, func(path string, d fs.DirEntry, err error) error { + if err != nil { + if errors.Is(err, os.ErrNotExist) { + return nil + } + return err + } + + if d.IsDir() { + return nil + } + if !testhelper.IsReftableEnabled() { + require.Contains(t, setup.filesToStay, path) + return nil + } + + if filepath.Dir(path) == filepath.Join(setup.repoPath, "reftable") && filepath.Ext(path) == ".ref" { + acttualReftableFileCount++ + } else { + require.Contains(t, setup.filesToStay, path) + } + + return nil + }) + require.NoError(t, err) + require.Equal(t, expectedReftableFileCount, acttualReftableFileCount) +} + +type leftoverMigrationSetup struct { + repoPath string + filesToStay []string + repo *gitalypb.Repository +} + +func setupRepoForLeftoverMigration(t *testing.T, ctx context.Context, cfg config.Cfg) leftoverMigrationSetup { + t.Helper() + + repo, repoPath := gittest.CreateRepository(t, ctx, cfg, gittest.CreateRepositoryConfig{ + SkipCreationViaService: true, + }) + + blobNum := 3 + refNum := 4 + filesToStay := make([]string, 0) + treeEntries := make([]gittest.TreeEntry, 0) + + err := os.WriteFile(filepath.Join(repoPath, "description"), []byte{}, mode.File) + require.NoError(t, err) + + for i := 0; i < blobNum; i++ { + blob := gittest.WriteBlob(t, cfg, repoPath, []byte(strconv.Itoa(i))) + filesToStay = append(filesToStay, filepath.Join(repoPath, "objects", blob.String()[0:2], blob.String()[2:])) + treeEntries = append(treeEntries, gittest.TreeEntry{Path: fmt.Sprintf("file%d", i), Mode: "100644", OID: blob}) + + err := os.WriteFile(filepath.Join(repoPath, "objects", "pack", fmt.Sprintf("pack-%d.keep", i)), []byte{}, mode.File) + require.NoError(t, err) + } + + tree := gittest.WriteTree(t, cfg, repoPath, treeEntries) + filesToStay = append(filesToStay, filepath.Join(repoPath, "objects", tree.String()[0:2], tree.String()[2:])) + + for i := 0; i < refNum; i++ { + branch := fmt.Sprintf("test%d", i) + commit := gittest.WriteCommit(t, cfg, repoPath, gittest.WithTree(tree), gittest.WithBranch(branch)) + gittest.WriteRef(t, cfg, repoPath, git.ReferenceName(fmt.Sprintf("refs/heads/%d", i)), commit) + + filesToStay = append(filesToStay, filepath.Join(repoPath, "objects", commit.String()[0:2], commit.String()[2:])) + + if !testhelper.IsReftableEnabled() { + filesToStay = append(filesToStay, filepath.Join(repoPath, "refs", "heads", branch)) + filesToStay = append(filesToStay, filepath.Join(repoPath, "refs", "heads", fmt.Sprintf("%d", i))) + + err := os.WriteFile(filepath.Join(repoPath, "refs", "heads", fmt.Sprintf("%d.lock", i)), []byte{}, mode.File) + require.NoError(t, err) + } + + } + if testhelper.IsReftableEnabled() { + filesToStay = append(filesToStay, filepath.Join(repoPath, "refs", "heads")) + filesToStay = append(filesToStay, filepath.Join(repoPath, "reftable", "tables.list")) + } + + filesToStay = append(filesToStay, filepath.Join(repoPath, "config"), filepath.Join(repoPath, "HEAD")) + + return leftoverMigrationSetup{ + repoPath: repoPath, + filesToStay: filesToStay, + repo: repo, + } +} + +func setupNodeForTransaction(t *testing.T, ctx context.Context, cfg config.Cfg, + logger log.Logger, cmdFactory gitcmd.CommandFactory, + localRepoFactory localrepo.Factory, migrations []Migration, +) *nodeimpl.Manager { + dbMgr, err := databasemgr.NewDBManager( + ctx, + cfg.Storages, + keyvalue.NewBadgerStore, + helper.NewNullTickerFactory(), + logger, + ) + require.NoError(t, err) + t.Cleanup(dbMgr.Close) + + raftNode, err := raftmgr.NewNode(cfg, logger, dbMgr, nil) + require.NoError(t, err) + + raftFactory := raftmgr.DefaultFactoryWithNode(cfg.Raft, raftNode) + + partitionFactoryOptions := []partition.FactoryOption{ + partition.WithCmdFactory(cmdFactory), + partition.WithRepoFactory(localRepoFactory), + partition.WithMetrics(partition.NewMetrics(housekeeping.NewMetrics(cfg.Prometheus))), + partition.WithRaftConfig(cfg.Raft), + partition.WithRaftFactory(raftFactory), + } + + node, err := nodeimpl.NewManager( + cfg.Storages, + storagemgr.NewFactory( + logger, + dbMgr, + NewFactory( + partition.NewFactory(partitionFactoryOptions...), + NewMetrics(), + migrations, + ), + config.DefaultMaxInactivePartitions, + storagemgr.NewMetrics(cfg.Prometheus), + ), + ) + require.NoError(t, err) + return node +} + +func applyWAL(t *testing.T, ctx context.Context, repo *gitalypb.Repository, node storage.Node) { + nodeStorage, err := node.GetStorage(repo.GetStorageName()) + require.NoError(t, err) + + // Start a transaction to ensure the WAL is fully applied. This test is still + // accessing the repository directly in the storage. + tx, err := nodeStorage.Begin(ctx, storage.TransactionOptions{ + ReadOnly: true, + RelativePath: repo.GetRelativePath(), + }) + require.NoError(t, err) + defer func() { + require.NoError(t, tx.Rollback(ctx)) + }() +} diff --git a/internal/gitaly/storage/storagemgr/partition/snapshot/snapshot.go b/internal/gitaly/storage/storagemgr/partition/snapshot/snapshot.go index 132c50ee11a7f84ddb78fc8d92c4fcc220826f7b..c8d22c6787693d2f9ebf51967bb3ca1e0cb1890c 100644 --- a/internal/gitaly/storage/storagemgr/partition/snapshot/snapshot.go +++ b/internal/gitaly/storage/storagemgr/partition/snapshot/snapshot.go @@ -10,7 +10,7 @@ import ( "strings" "time" - "gitlab.com/gitlab-org/gitaly/v16/internal/git/housekeeping" + "gitlab.com/gitlab-org/gitaly/v16/internal/featureflag" "gitlab.com/gitlab-org/gitaly/v16/internal/gitaly/storage" "gitlab.com/gitlab-org/gitaly/v16/internal/gitaly/storage/gitstorage" "gitlab.com/gitlab-org/gitaly/v16/internal/gitaly/storage/mode" @@ -242,13 +242,9 @@ func createParentDirectories(storageRoot, snapshotRoot, relativePath string, sta // are shared between the snapshot and the repository, they must not be modified. Git doesn't modify // existing files but writes new ones so this property is upheld. func createRepositorySnapshot(ctx context.Context, storageRoot, snapshotRoot, relativePath string, stats *snapshotStatistics) error { - if err := createDirectorySnapshot(ctx, filepath.Join(storageRoot, relativePath), filepath.Join(snapshotRoot, relativePath), map[string]struct{}{ - // Don't include worktrees in the snapshot. All of the worktrees in the repository should be leftover - // state from before transaction management was introduced as the transactions would create their - // worktrees in the snapshot. - housekeeping.WorktreesPrefix: {}, - housekeeping.GitlabWorktreePrefix: {}, - }, stats); err != nil { + if err := createDirectorySnapshot(ctx, filepath.Join(storageRoot, relativePath), + filepath.Join(snapshotRoot, relativePath), + NewSnapshotFilter(featureflag.SnapshotFilter.IsDisabled(ctx)), stats); err != nil { return fmt.Errorf("create directory snapshot: %w", err) } @@ -258,8 +254,8 @@ func createRepositorySnapshot(ctx context.Context, storageRoot, snapshotRoot, re // createDirectorySnapshot recursively recreates the directory structure from originalDirectory into // snapshotDirectory and hard links files into the same locations in snapshotDirectory. // -// skipRelativePaths can be provided to skip certain entries from being included in the snapshot. -func createDirectorySnapshot(ctx context.Context, originalDirectory, snapshotDirectory string, skipRelativePaths map[string]struct{}, stats *snapshotStatistics) error { +// includeRelativePaths is needed to track which paths we want to include in the snapshot. +func createDirectorySnapshot(ctx context.Context, originalDirectory, snapshotDirectory string, matcher Filter, stats *snapshotStatistics) error { if err := filepath.Walk(originalDirectory, func(oldPath string, info fs.FileInfo, err error) error { if err != nil { if errors.Is(err, fs.ErrNotExist) && oldPath == originalDirectory { @@ -276,8 +272,24 @@ func createDirectorySnapshot(ctx context.Context, originalDirectory, snapshotDir return fmt.Errorf("rel: %w", err) } - if _, ok := skipRelativePaths[relativePath]; ok { - return fs.SkipDir + //// TODO relativePath is when oldPath == originalDirectory. explain it + //if relativePath != "." { + // if ok := matcher.Matches(ctx, relativePath, info.IsDir()); !ok { + // if info.IsDir() { + // // TODO verify this, description file will cause whole dir be ignored + // // if we dont't have this conditionl + // return fs.SkipDir + // } else { + // return nil + // } + // } + //} + if ok := matcher.Matches(relativePath, info.IsDir()); !ok { + if info.IsDir() { + return fs.SkipDir + } else { + return nil + } } newPath := filepath.Join(snapshotDirectory, relativePath) diff --git a/internal/gitaly/storage/storagemgr/partition/snapshot/snapshot_filter.go b/internal/gitaly/storage/storagemgr/partition/snapshot/snapshot_filter.go new file mode 100644 index 0000000000000000000000000000000000000000..df51226571a4323fc67e10e55705cb3be6894ab6 --- /dev/null +++ b/internal/gitaly/storage/storagemgr/partition/snapshot/snapshot_filter.go @@ -0,0 +1,177 @@ +package snapshot + +import ( + "fmt" + "os" + "path/filepath" + "strings" +) + +var ( + // GitalySnapshotFileIncludePatterns defines the list of include patterns for selecting + // files during a repository snapshot. There are four types of supported patterns: + // + // 1. Exact file: Matches a specific file path. + // Example: "objects/info/commit-graph" matches only that exact file. + // + // 2. Prefix pattern: Matches all files and directories under a given path prefix. + // The '*' wildcard is required. + // Example: "refs/*" matches all files and directories under the "refs/" directory. + // + // 3. Postfix pattern: Matches files with a specific suffix within a given directory. + // The '*' wildcard is required and must follow the format: