diff --git a/internal/gitaly/storage/storagemgr/partition/transaction_manager_housekeeping.go b/internal/gitaly/storage/storagemgr/partition/transaction_manager_housekeeping.go index d9ee19b46f757981d96add6749f329bdc99421f9..af0c3750abd28b6f606221872d8a89dd4805961d 100644 --- a/internal/gitaly/storage/storagemgr/partition/transaction_manager_housekeeping.go +++ b/internal/gitaly/storage/storagemgr/partition/transaction_manager_housekeeping.go @@ -694,6 +694,11 @@ func (mgr *TransactionManager) verifyPackRefsReftable(transaction *Transaction) // command runs and the time this transaction is logged. Thus, we need to verify if the transaction conflicts with the // current state of the repository. // +// After packing references, the function uses a post-order directory traversal to identify and remove +// empty directories. Directories are considered empty if they contain no entries or if all entries are +// already scheduled for deletion. This ensures proper cleanup of nested empty directories that may +// result from reference packing. +// // There are three cases when a reference is modified: // - Reference creation: this is the easiest case. The new reference exists as a loose reference on disk and shadows the // one in the packed-ref. @@ -755,45 +760,83 @@ func (mgr *TransactionManager) verifyPackRefsFiles(ctx context.Context, transact // path that became empty as a result of removing the references. As we're operating on the real // repository here, we can't actually perform deletions in it. We instead keep track of the // files and directories we've deleted in-memory to ensure we only delete empty directories. + // First pass: Mark references for deletion but don't make decisions about directories yet deletedPaths := make(map[string]struct{}, len(packRefs.PrunedRefs)) if err := prunedRefs.WalkPostOrder(func(path string, isDir bool) error { relativePath := filepath.Join(transaction.relativePath, path) + + // Don't touch protected directories if _, ok := directoriesToKeep[relativePath]; ok { return nil } - if isDir { - // If this is a directory, we need to ensure it is actually empty before removing - // it. Check if we find any directory entries we haven't yet deleted. - entries, err := os.ReadDir(mgr.getAbsolutePath(relativePath)) - if err != nil { - return fmt.Errorf("read dir: %w", err) - } + if !isDir { + deletedPaths[relativePath] = struct{}{} + transaction.walEntry.RemoveDirectoryEntry(relativePath) + return nil + } - for _, entry := range entries { - if _, ok := deletedPaths[filepath.Join(relativePath, entry.Name())]; ok { - // This path was already deleted. Don't consider it to exist. - continue - } + return nil + }); err != nil { + return nil, fmt.Errorf("walk post order for references: %w", err) + } - // This directory was not empty because someone concurrently wrote - // a reference into it. Keep it in place. - directoriesToKeep[relativePath] = struct{}{} - return nil - } + // Second pass: Process directories separately with a true post-order traversal + // This ensures all files are processed first, then directories are processed bottom-up + if err := mgr.forEachEmptyDir(filepath.Join(transaction.relativePath, "refs"), deletedPaths, func(path string) error { + // Skip if it is a protected directory or already deleted + if _, ok := deletedPaths[path]; ok { + return nil + } + + if _, ok := directoriesToKeep[path]; ok { + return nil } - deletedPaths[relativePath] = struct{}{} - transaction.walEntry.RemoveDirectoryEntry(relativePath) + deletedPaths[path] = struct{}{} + transaction.walEntry.RemoveDirectoryEntry(path) return nil }); err != nil { - return nil, fmt.Errorf("walk post order: %w", err) + return nil, fmt.Errorf("for each empty dir: %w", err) } return &gitalypb.LogEntry_Housekeeping_PackRefs{}, nil } +// forEachEmptyDir marks directories for deletion recursively in a post-order traversal, +// ensuring that parent directories are only processed after all their children have been processed. +func (mgr *TransactionManager) forEachEmptyDir(path string, deletedPaths map[string]struct{}, fn func(string) error) error { + // We need to check the actual storage path to make sure + // someone did not concurrently wrote a reference into it. + entries, err := os.ReadDir(mgr.getAbsolutePath(path)) + if err != nil { + return fmt.Errorf("read dir: %w", err) + } + + var hasFiles bool + for _, entry := range entries { + if entry.IsDir() { + childPath := filepath.Join(path, entry.Name()) + if err := mgr.forEachEmptyDir(childPath, deletedPaths, fn); err != nil { + return err + } + } else { + if _, ok := deletedPaths[filepath.Join(path, entry.Name())]; ok { + // This path was already deleted. Don't consider it to exist. + continue + } + hasFiles = true + } + } + + if hasFiles { + return nil + } + + return fn(path) +} + // prepareOffloading implements the core offloading logic within the transaction manager. // It must be called inside a snapshot repository and performs these steps: // diff --git a/internal/gitaly/storage/storagemgr/partition/transaction_manager_housekeeping_test.go b/internal/gitaly/storage/storagemgr/partition/transaction_manager_housekeeping_test.go index 1df0c368986b893f6c7c0ce090e37f187f272e89..fbd0b4efb861d70b59aec851a7b45b1293be282a 100644 --- a/internal/gitaly/storage/storagemgr/partition/transaction_manager_housekeeping_test.go +++ b/internal/gitaly/storage/storagemgr/partition/transaction_manager_housekeeping_test.go @@ -1384,6 +1384,56 @@ func generateHousekeepingPackRefsTests(t *testing.T, ctx context.Context, testPa }, }, }, + { + desc: "empty directories without references are pruned during pack-refs", + skip: func(t *testing.T) { + testhelper.SkipWithReftable(t, "we don't deal with directories for reftable") + }, + steps: steps{ + StartManager{ + ModifyStorage: func(tb testing.TB, cfg config.Cfg, storagePath string) { + repoPath := filepath.Join(storagePath, setup.RelativePath) + + // Create nested empty directories + nestedDirPath1 := filepath.Join(repoPath, "refs", "heads", "first-branch", "empty-dir", "another-empty-dir") + require.NoError(tb, os.MkdirAll(nestedDirPath1, mode.Directory)) + nestedDirPath2 := filepath.Join(repoPath, "refs", "heads", "second-branch", "empty-dir") + require.NoError(tb, os.MkdirAll(nestedDirPath2, mode.Directory)) + + // Add a reference that would be packed + gittest.WriteRef(tb, cfg, repoPath, "refs/heads/first-branch/someRef", setup.Commits.First.OID) + }, + }, + Begin{ + TransactionID: 1, + RelativePaths: []string{setup.RelativePath}, + }, + RunPackRefs{ + TransactionID: 1, + }, + Commit{ + TransactionID: 1, + }, + }, + expectedState: StateAssertion{ + Database: DatabaseState{ + string(keyAppliedLSN): storage.LSN(1).ToProto(), + }, + Repositories: RepositoryStates{ + relativePath: { + DefaultBranch: "refs/heads/main", + References: &ReferencesState{ + FilesBackend: &FilesBackendState{ + PackedReferences: map[git.ReferenceName]git.ObjectID{ + "refs/heads/first-branch/someRef": setup.Commits.First.OID, + }, + LooseReferences: map[git.ReferenceName]git.ObjectID{}, + }, + }, + }, + }, + }, + }, { desc: "housekeeping fails in read-only transaction", customSetup: customSetup,