diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000000000000000000000000000000000000..98c116c452c9e63688e36137d8834393506394b6 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,6 @@ +{ + "go.testEnvVars": { + "TEST_TMP_DIR": "/mnt/git-repositories/tmp_sv" + }, + "go.testFlags": ["-v"] +} diff --git a/go.mod b/go.mod index 8854dc3f851b2db810469c76cb90f4985766acc0..66ade46969b5e66511432509646f0649e29ddf63 100644 --- a/go.mod +++ b/go.mod @@ -86,6 +86,7 @@ require ( github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/metric v0.48.1 // indirect github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapping v0.48.1 // indirect github.com/HdrHistogram/hdrhistogram-go v1.1.2 // indirect + github.com/KarpelesLab/reflink v1.0.2 // indirect github.com/Microsoft/go-winio v0.6.1 // indirect github.com/alexbrainman/sspi v0.0.0-20210105120005-909beea2cc74 // indirect github.com/avast/retry-go v3.0.0+incompatible // indirect diff --git a/go.sum b/go.sum index 2dd632e8c4fe304aac2bbc675057aa8a8c63c56f..21ee872e9e57dd72b8face53350d91000ea06754 100644 --- a/go.sum +++ b/go.sum @@ -108,6 +108,8 @@ github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapp github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapping v0.48.1/go.mod h1:viRWSEhtMZqz1rhwmOVKkWl6SwmVowfL9O2YR5gI2PE= github.com/HdrHistogram/hdrhistogram-go v1.1.2 h1:5IcZpTvzydCQeHzK4Ef/D5rrSqwxob0t8PQPMybUNFM= github.com/HdrHistogram/hdrhistogram-go v1.1.2/go.mod h1:yDgFjdqOqDEKOvasDdhWNXYg9BVp4O+o5f6V/ehm6Oo= +github.com/KarpelesLab/reflink v1.0.2 h1:hQ1aM3TmjU2kTNUx5p/HaobDoADYk+a6AuEinG4Cv88= +github.com/KarpelesLab/reflink v1.0.2/go.mod h1:WGkTOKNjd1FsJKBw3mu4JvrPEDJyJJ+JPtxBkbPoCok= github.com/Microsoft/go-winio v0.5.2/go.mod h1:WpS1mjBmmwHBEWmogvA2mj8546UReBk4v8QkMxJ6pZY= github.com/Microsoft/go-winio v0.6.1 h1:9/kr64B9VUZrLm5YYwbGtUJnMgqWVOdUAXu6Migciow= github.com/Microsoft/go-winio v0.6.1/go.mod h1:LRdKpFKfdobln8UmuiYcKPot9D2v6svN5+sAH+4kjUM= diff --git a/internal/git/objectpool/disconnect.go b/internal/git/objectpool/disconnect.go index 8f776ed38b8853bd476ea4f2a20afb7382276f0a..63f046d2e098ac813584df7a8c7f8a8146193435 100644 --- a/internal/git/objectpool/disconnect.go +++ b/internal/git/objectpool/disconnect.go @@ -16,6 +16,7 @@ import ( "gitlab.com/gitlab-org/gitaly/v18/internal/git/stats" "gitlab.com/gitlab-org/gitaly/v18/internal/gitaly/storage" "gitlab.com/gitlab-org/gitaly/v18/internal/gitaly/transaction" + "gitlab.com/gitlab-org/gitaly/v18/internal/helper" "gitlab.com/gitlab-org/gitaly/v18/internal/helper/text" "gitlab.com/gitlab-org/gitaly/v18/internal/log" "gitlab.com/gitlab-org/gitaly/v18/internal/transaction/voting" @@ -209,7 +210,7 @@ func removeAlternatesIfOk(ctx context.Context, repo *localrepo.Repo, altFile, ba return fmt.Errorf("preparatory vote for disconnecting alternate: %w", err) } - if err := os.Rename(altFile, backupFile); err != nil { + if err := helper.RenameDirectoryWithReflink(altFile, backupFile); err != nil { return err } @@ -225,12 +226,12 @@ func removeAlternatesIfOk(ctx context.Context, repo *localrepo.Repo, altFile, ba // preserved for possible forensic use. tmp := backupFile + ".2" - if err := os.Link(backupFile, tmp); err != nil { + if err := helper.ReflinkWithPermissions(backupFile, tmp); err != nil { logger.WithError(err).ErrorContext(ctx, "copy backup alternates file") return } - if err := os.Rename(tmp, altFile); err != nil { + if err := helper.RenameDirectoryWithReflink(tmp, altFile); err != nil { logger.WithError(err).ErrorContext(ctx, "restore backup alternates file") } }() diff --git a/internal/git/objectpool/link.go b/internal/git/objectpool/link.go index e284493196ef4319670efd8722915c76a9b9925b..5924277a3ebea84def3f76f47d6ec5cd25435a60 100644 --- a/internal/git/objectpool/link.go +++ b/internal/git/objectpool/link.go @@ -76,7 +76,7 @@ func Link(ctx context.Context, pool, repo *localrepo.Repo, txManager transaction } if err := tx.FS().RecordFile(alternatesRelativePath); err != nil { - return fmt.Errorf("record alternates file") + return fmt.Errorf("record alternates file: %w", err) } } diff --git a/internal/git/quarantine/quarantine.go b/internal/git/quarantine/quarantine.go index 23c7354eefb6ff4806c18c78c5eaa1a97fb2ec9b..de8e41f6a804cb144a592bafca2b06c8b5ffc85b 100644 --- a/internal/git/quarantine/quarantine.go +++ b/internal/git/quarantine/quarantine.go @@ -11,6 +11,7 @@ import ( "gitlab.com/gitlab-org/gitaly/v18/internal/gitaly/storage" "gitlab.com/gitlab-org/gitaly/v18/internal/gitaly/storage/mode" + "gitlab.com/gitlab-org/gitaly/v18/internal/helper" "gitlab.com/gitlab-org/gitaly/v18/internal/log" "gitlab.com/gitlab-org/gitaly/v18/internal/safe" "gitlab.com/gitlab-org/gitaly/v18/internal/structerr" @@ -178,12 +179,12 @@ func migrate(ctx context.Context, sourcePath, targetPath string) error { func finalizeObjectFile(sourcePath, targetPath string) error { // We first try to link the file via a hardlink. The benefit compared to doing a rename is // that in case of a collision, we do not replace the target. - err := os.Link(sourcePath, targetPath) + err := helper.ReflinkWithPermissions(sourcePath, targetPath) // In case the hardlink failed, we fall back to a rename. renamed := false if err != nil && !errors.Is(err, os.ErrExist) { - err = os.Rename(sourcePath, targetPath) + err = helper.RenameDirectoryWithReflink(sourcePath, targetPath) renamed = err == nil } diff --git a/internal/gitaly/repoutil/custom_hooks.go b/internal/gitaly/repoutil/custom_hooks.go index 9e7955ff499f9c0c5fcfe2ca83ebc0482823e005..93e76384a0cd03751c2b74cdfb4a43232f05eddb 100644 --- a/internal/gitaly/repoutil/custom_hooks.go +++ b/internal/gitaly/repoutil/custom_hooks.go @@ -17,6 +17,7 @@ import ( "gitlab.com/gitlab-org/gitaly/v18/internal/gitaly/storage" "gitlab.com/gitlab-org/gitaly/v18/internal/gitaly/storage/mode" "gitlab.com/gitlab-org/gitaly/v18/internal/gitaly/transaction" + "gitlab.com/gitlab-org/gitaly/v18/internal/helper" "gitlab.com/gitlab-org/gitaly/v18/internal/log" "gitlab.com/gitlab-org/gitaly/v18/internal/safe" "gitlab.com/gitlab-org/gitaly/v18/internal/structerr" @@ -107,7 +108,7 @@ func SetCustomHooks( ) error { repoPath, err := locator.GetRepoPath(ctx, repo) if err != nil { - return fmt.Errorf("getting repo path: %w", err) + return fmt.Errorf("getting repo path: %s:: %w", repo.GetRelativePath(), err) } var originalCustomHooksRelativePath string @@ -192,24 +193,26 @@ func SetCustomHooks( // If the `custom_hooks` directory exists in the repository, move the // current hooks to `previous_hooks` in the temporary directory. - if err := os.Rename(repoHooksPath, prevHooksPath); err != nil && !errors.Is(err, fs.ErrNotExist) { + if err := helper.RenameDirectoryWithReflink(repoHooksPath, prevHooksPath); err != nil && !errors.Is(err, fs.ErrNotExist) { return fmt.Errorf("moving current hooks to temp: %w", err) } syncer := safe.NewSyncer() + // Ensure parent directories exist before syncing + if err := os.MkdirAll(filepath.Dir(tempHooksPath), mode.Directory); err != nil { + return fmt.Errorf("making repo hooks parent directories: %w", err) + } + if storage.NeedsSync(ctx) { - // Sync the custom hooks in the temporary directory before being moved into - // the repository. This makes the move atomic as there is no state where the - // move succeeds, but the hook files themselves are not yet on the disk, or - // are partially written. + // Then sync all the hook files recursively if err := syncer.SyncRecursive(ctx, tempHooksPath); err != nil { return fmt.Errorf("syncing extracted custom hooks: %w", err) } } // Move `custom_hooks` from the temporary directory to the repository. - if err := os.Rename(tempHooksPath, repoHooksPath); err != nil { + if err := helper.RenameDirectoryWithReflink(tempHooksPath, repoHooksPath); err != nil { return fmt.Errorf("moving new hooks to repo: %w", err) } diff --git a/internal/gitaly/repoutil/remove.go b/internal/gitaly/repoutil/remove.go index fca1f80a27cdba36ed8a3427ceeb8bb4fda6b12b..b7a0fc8263248e1158801b3ec742f3ab1535e91a 100644 --- a/internal/gitaly/repoutil/remove.go +++ b/internal/gitaly/repoutil/remove.go @@ -11,6 +11,7 @@ import ( "gitlab.com/gitlab-org/gitaly/v18/internal/gitaly/storage/counter" "gitlab.com/gitlab-org/gitaly/v18/internal/gitaly/storage/mode" "gitlab.com/gitlab-org/gitaly/v18/internal/gitaly/transaction" + "gitlab.com/gitlab-org/gitaly/v18/internal/helper" "gitlab.com/gitlab-org/gitaly/v18/internal/log" "gitlab.com/gitlab-org/gitaly/v18/internal/safe" "gitlab.com/gitlab-org/gitaly/v18/internal/structerr" @@ -126,7 +127,7 @@ func remove( // We move the repository into our temporary directory first before we start to // delete it. This is done such that we don't leave behind a partially-removed and // thus likely corrupt repository. - if err := os.Rename(path, filepath.Join(destDir, "repo")); err != nil { + if err := helper.RenameDirectoryWithReflink(path, filepath.Join(destDir, "repo")); err != nil { return structerr.NewInternal("staging repository for removal: %w", err) } diff --git a/internal/gitaly/service/repository/replicate.go b/internal/gitaly/service/repository/replicate.go index e9310c0a833aad33723a764b59e5aa89dd6fb6a2..2a4156535517393b5463e76a17724f810d003157 100644 --- a/internal/gitaly/service/repository/replicate.go +++ b/internal/gitaly/service/repository/replicate.go @@ -24,6 +24,7 @@ import ( "gitlab.com/gitlab-org/gitaly/v18/internal/gitaly/transaction" "gitlab.com/gitlab-org/gitaly/v18/internal/grpc/client" "gitlab.com/gitlab-org/gitaly/v18/internal/grpc/metadata" + "gitlab.com/gitlab-org/gitaly/v18/internal/helper" "gitlab.com/gitlab-org/gitaly/v18/internal/safe" "gitlab.com/gitlab-org/gitaly/v18/internal/structerr" "gitlab.com/gitlab-org/gitaly/v18/internal/tempdir" @@ -406,7 +407,7 @@ func (s *server) extractTarToDirectory(ctx context.Context, reader io.Reader, ta return fmt.Errorf("removing existing file for hard link %s: %w", targetPath, err) } - if err := os.Link(linkTarget, targetPath); err != nil { + if err := helper.ReflinkWithPermissions(linkTarget, targetPath); err != nil { return fmt.Errorf("creating hard link %s -> %s: %w", targetPath, linkTarget, err) } diff --git a/internal/gitaly/storage/fs.go b/internal/gitaly/storage/fs.go index 691ec47801f6ac52c1f7307e100ef66d8061bd6f..a2fca8eea3a39aca7a3f63405a65b4127b72e845 100644 --- a/internal/gitaly/storage/fs.go +++ b/internal/gitaly/storage/fs.go @@ -9,6 +9,7 @@ import ( "strings" "gitlab.com/gitlab-org/gitaly/v18/internal/gitaly/storage/mode" + "gitlab.com/gitlab-org/gitaly/v18/internal/helper" "gitlab.com/gitlab-org/gitaly/v18/internal/structerr" ) @@ -40,8 +41,8 @@ func newTargetIsFileError(path string) error { // Link creates a hard link from source to destination and records the operation. func Link(f FS, source, destination string) error { - if err := os.Link(filepath.Join(f.Root(), source), filepath.Join(f.Root(), destination)); err != nil { - return fmt.Errorf("link: %w", err) + if err := helper.ReflinkWithPermissions(filepath.Join(f.Root(), source), filepath.Join(f.Root(), destination)); err != nil { + return fmt.Errorf("reflink auto: %w", err) } if err := f.RecordLink(source, destination); err != nil { diff --git a/internal/gitaly/storage/storagemgr/partition/apply_operations.go b/internal/gitaly/storage/storagemgr/partition/apply_operations.go index c5ba4c0d8e5ac18f5d033e011bc2455b5f654829..694eef774b340815dbca1ee092a779cdec423ca7 100644 --- a/internal/gitaly/storage/storagemgr/partition/apply_operations.go +++ b/internal/gitaly/storage/storagemgr/partition/apply_operations.go @@ -7,8 +7,11 @@ import ( "io/fs" "os" "path/filepath" + "syscall" "gitlab.com/gitlab-org/gitaly/v18/internal/gitaly/storage/keyvalue" + + "gitlab.com/gitlab-org/gitaly/v18/internal/helper" "gitlab.com/gitlab-org/gitaly/v18/proto/go/gitalypb" ) @@ -34,11 +37,11 @@ func applyOperations(ctx context.Context, sync func(context.Context, string) err } destinationPath := string(op.GetDestinationPath()) - if err := os.Link( + if err := LinkOrReflink( filepath.Join(basePath, string(op.GetSourcePath())), filepath.Join(storageRoot, destinationPath), ); err != nil && !errors.Is(err, fs.ErrExist) { - return fmt.Errorf("link: %w", err) + return fmt.Errorf("linkOrReflink: %w", err) } // Sync the parent directory of the newly created directory entry. @@ -47,6 +50,7 @@ func applyOperations(ctx context.Context, sync func(context.Context, string) err op := wrapper.CreateDirectory path := string(op.GetPath()) + if err := os.Mkdir(filepath.Join(storageRoot, path), fs.FileMode(op.GetMode())); err != nil && !errors.Is(err, fs.ErrExist) { return fmt.Errorf("mkdir: %w", err) } @@ -57,8 +61,8 @@ func applyOperations(ctx context.Context, sync func(context.Context, string) err dirtyDirectories[filepath.Dir(path)] = struct{}{} case *gitalypb.LogEntry_Operation_RemoveDirectoryEntry_: op := wrapper.RemoveDirectoryEntry - path := string(op.GetPath()) + if err := os.Remove(filepath.Join(storageRoot, path)); err != nil && !errors.Is(err, fs.ErrNotExist) { return fmt.Errorf("remove: %w", err) } @@ -87,10 +91,31 @@ func applyOperations(ctx context.Context, sync func(context.Context, string) err // Sync all the dirty directories. for relativePath := range dirtyDirectories { - if err := sync(ctx, filepath.Join(storageRoot, relativePath)); err != nil { + absPath := filepath.Join(storageRoot, relativePath) + + if err := sync(ctx, absPath); err != nil { return fmt.Errorf("sync: %w", err) } } return nil } + +// LinkOrReflink attempts to create a hard link, and if that fails due to cross-device, +// falls back to creating a reflink (COW copy) on btrfs, or a regular copy as last resort. +func LinkOrReflink(src, dst string) error { + // Try hard link first (fast path for same subvolume) + if err := os.Link(src, dst); err == nil { + return nil + } else if !errors.Is(err, syscall.EXDEV) && !errors.Is(err, fs.ErrExist) { + // Some error other than cross-device or already exists + return fmt.Errorf("link: %w", err) + } + + // Hard link failed due to cross-device - use reflink + if err := helper.ReflinkWithPermissions(src, dst); err != nil { + return fmt.Errorf("reflink: %w", err) + } + + return nil +} diff --git a/internal/gitaly/storage/storagemgr/partition/conflict/fshistory/errors.go b/internal/gitaly/storage/storagemgr/partition/conflict/fshistory/errors.go index 5de3b1cfdaf43e29e889fa44ee9a8ed2b41aade1..212efd9aa0a7bf9645bb8a43ee7116ea675b99f2 100644 --- a/internal/gitaly/storage/storagemgr/partition/conflict/fshistory/errors.go +++ b/internal/gitaly/storage/storagemgr/partition/conflict/fshistory/errors.go @@ -1,6 +1,8 @@ package fshistory import ( + "fmt" + "gitlab.com/gitlab-org/gitaly/v18/internal/gitaly/storage" "gitlab.com/gitlab-org/gitaly/v18/internal/structerr" ) @@ -14,8 +16,8 @@ type ReadWriteConflictError struct { } // Error returns the error message. -func (ReadWriteConflictError) Error() string { - return "path was modified after read" +func (r ReadWriteConflictError) Error() string { + return fmt.Sprintf("path %s was modified after read at lsn %d and write lsn %d\n", r.Path, r.ReadLSN.ToProto().Value, r.WriteLSN.ToProto().Value) } // NewReadWriteConflictError returns an error detailing a conflicting read. diff --git a/internal/gitaly/storage/storagemgr/partition/snapshot/snapshot.go b/internal/gitaly/storage/storagemgr/partition/snapshot/snapshot.go index d7377a29e48601ca23d58059b5d0b2b5a5170f4b..87b5ed48cbfcf0a8b10e7db50b37c872d0842906 100644 --- a/internal/gitaly/storage/storagemgr/partition/snapshot/snapshot.go +++ b/internal/gitaly/storage/storagemgr/partition/snapshot/snapshot.go @@ -6,8 +6,11 @@ import ( "fmt" "io/fs" "os" + "os/exec" "path/filepath" "strings" + "sync" + "syscall" "time" "gitlab.com/gitlab-org/gitaly/v18/internal/gitaly/storage" @@ -56,6 +59,9 @@ type snapshot struct { readOnly bool // stats contains statistics related to the snapshot. stats snapshotStatistics + // repositoryPaths contains the paths of individual repository subvolumes + // that need to be cleaned up. The root itself is just a regular directory. + repositoryPaths []string } // Root returns the root of the snapshot's file system. @@ -74,17 +80,56 @@ func (s *snapshot) RelativePath(relativePath string) string { return filepath.Join(s.prefix, relativePath) } -// Closes removes the snapshot. func (s *snapshot) Close() error { + var errs []error if s.readOnly { - // Make the directories writable again so we can remove the snapshot. - if err := s.setDirectoryMode(mode.Directory); err != nil { - return fmt.Errorf("make writable: %w", err) + // First, make all btrfs subvolumes writable + for _, repoPath := range s.repositoryPaths { + // Make the subvolume writable + _ = setBtrfsReadOnly(context.Background(), repoPath, false) + // Restore write permissions to directories inside the subvolume + // so we can delete files + _ = storage.SetDirectoryMode(repoPath, mode.Directory) } } + // Restore write permissions to parent directories + _ = os.Chmod(s.root, mode.Directory) + for _, repoPath := range s.repositoryPaths { + dir := filepath.Dir(repoPath) + for dir != s.root && dir != "." && dir != "/" { + _ = os.Chmod(dir, mode.Directory) + dir = filepath.Dir(dir) + } + } + + // Use a WaitGroup for concurrency + var wg sync.WaitGroup + errChan := make(chan error, len(s.repositoryPaths)) + + // Now delete individual repository subvolumes concurrently + for _, repoPath := range s.repositoryPaths { + wg.Add(1) + go func(path string) { + defer wg.Done() + // Use the btrfs subvolume delete command directly, it is designed for this + if err := deleteBtrfsSnapshot(context.Background(), path, false); err != nil { + errChan <- fmt.Errorf("delete repository snapshot %s: %w", path, err) + } + }(repoPath) + } + wg.Wait() + close(errChan) + for err := range errChan { + errs = append(errs, err) + } + + // Finally delete the root if err := os.RemoveAll(s.root); err != nil { - return fmt.Errorf("remove all: %w", err) + errs = append(errs, fmt.Errorf("delete snapshot root %s: %w", s.root, err)) + } + if len(errs) > 0 { + return errors.Join(errs...) } return nil @@ -120,15 +165,16 @@ func newSnapshot(ctx context.Context, storageRoot, snapshotRoot string, relative } }() - if err := createRepositorySnapshots(ctx, storageRoot, snapshotRoot, relativePaths, snapshotFilter, &s.stats); err != nil { + // Create all repository snapshots (they handle their own read-only conversion internally) + if err := createRepositorySnapshots(ctx, storageRoot, snapshotRoot, relativePaths, snapshotFilter, readOnly, s); err != nil { return nil, fmt.Errorf("create repository snapshots: %w", err) } if readOnly { // Now that we've finished creating the snapshot, change the directory permissions to read-only // to prevent writing in the snapshot. - if err := s.setDirectoryMode(ModeReadOnlyDirectory); err != nil { - return nil, fmt.Errorf("make read-only: %w", err) + if err := setParentDirectoriesReadOnly(s); err != nil { + return nil, fmt.Errorf("make parent directories read-only: %w", err) } } @@ -136,14 +182,48 @@ func newSnapshot(ctx context.Context, storageRoot, snapshotRoot string, relative return s, nil } -// createRepositorySnapshots creates a snapshot of the partition containing all repositories at the given relative paths -// and their alternates. +// setParentDirectoriesReadOnly makes all parent directories (non-subvolume directories) read-only. +// This is called after all repository snapshots have been created. +func setParentDirectoriesReadOnly(s *snapshot) error { + // Walk the snapshot root and make directories read-only, but skip the repository subvolumes + // since they were already made read-only by createBtrfsSnapshotAndFilter + repoPathSet := make(map[string]struct{}) + for _, repoPath := range s.repositoryPaths { + repoPathSet[repoPath] = struct{}{} + } + + return filepath.Walk(s.root, func(path string, info fs.FileInfo, err error) error { + if err != nil { + return err + } + + if !info.IsDir() { + return nil + } + + // Skip repository subvolumes - they're already read-only + if _, isRepoPath := repoPathSet[path]; isRepoPath { + return fs.SkipDir + } + + // Make this parent directory read-only + if err := os.Chmod(path, ModeReadOnlyDirectory); err != nil { + return fmt.Errorf("chmod %s: %w", path, err) + } + + return nil + }) +} + +// createRepositorySnapshots creates btrfs snapshots of all repositories at the given relative paths +// and their alternates, then applies filtering to remove unwanted files. func createRepositorySnapshots(ctx context.Context, storageRoot, snapshotRoot string, relativePaths []string, - snapshotFilter Filter, stats *snapshotStatistics, + snapshotFilter Filter, readOnly bool, s *snapshot, ) error { // Create the root directory always to as the storage would also exist always. - stats.directoryCount++ - if err := os.Mkdir(snapshotRoot, mode.Directory); err != nil { + s.stats.directoryCount++ + + if err := ensureDir0700(snapshotRoot); err != nil { return fmt.Errorf("mkdir snapshot root: %w", err) } @@ -153,7 +233,7 @@ func createRepositorySnapshots(ctx context.Context, storageRoot, snapshotRoot st continue } - if err := createParentDirectories(storageRoot, snapshotRoot, relativePath, stats); err != nil { + if err := createParentDirectories(storageRoot, snapshotRoot, relativePath, &s.stats); err != nil { return fmt.Errorf("create parent directories: %w", err) } @@ -171,7 +251,7 @@ func createRepositorySnapshots(ctx context.Context, storageRoot, snapshotRoot st return fmt.Errorf("validate git directory: %w", err) } - if err := createRepositorySnapshot(ctx, storageRoot, snapshotRoot, relativePath, snapshotFilter, stats); err != nil { + if err := createRepositorySnapshot(ctx, storageRoot, snapshotRoot, relativePath, snapshotFilter, readOnly, s); err != nil { return fmt.Errorf("create snapshot: %w", err) } @@ -190,7 +270,7 @@ func createRepositorySnapshots(ctx context.Context, storageRoot, snapshotRoot st continue } - if err := createParentDirectories(storageRoot, snapshotRoot, alternateRelativePath, stats); err != nil { + if err := createParentDirectories(storageRoot, snapshotRoot, alternateRelativePath, &s.stats); err != nil { return fmt.Errorf("create parent directories: %w", err) } @@ -200,7 +280,8 @@ func createRepositorySnapshots(ctx context.Context, storageRoot, snapshotRoot st snapshotRoot, alternateRelativePath, snapshotFilter, - stats, + readOnly, + s, ); err != nil { return fmt.Errorf("create alternate snapshot: %w", err) } @@ -208,7 +289,6 @@ func createRepositorySnapshots(ctx context.Context, storageRoot, snapshotRoot st snapshottedRepositories[alternateRelativePath] = struct{}{} } } - return nil } @@ -254,68 +334,291 @@ func createParentDirectories(storageRoot, snapshotRoot, relativePath string, sta return nil } -// createRepositorySnapshot snapshots a repository's current state at snapshotPath. This is done by -// recreating the repository's directory structure and hard linking the repository's files in their -// correct locations there. This effectively does a copy-free clone of the repository. Since the files -// are shared between the snapshot and the repository, they must not be modified. Git doesn't modify -// existing files but writes new ones so this property is upheld. +// ensureDir0700 creates path (and parents) and enforces 0700 permissions. +// This guarantees the created directory has owner-only permissions regardless of umask. +func ensureDir0700(path string) error { + if err := os.MkdirAll(path, 0o700); err != nil { + return fmt.Errorf("create parent directories: %w", err) + } + // Explicitly set permissions to 0700 to avoid umask influence and ensure tests that + // assert specific directory modes are stable. + if err := os.Chmod(path, 0o700); err != nil { + return fmt.Errorf("set directory permissions: %w", err) + } + return nil +} + +// createRepositorySnapshot creates a snapshot of a repository using btrfs snapshots. +// The source repository is expected to be a btrfs subvolume for optimal performance. +// If the source is not a btrfs subvolume, it will be converted to one first. func createRepositorySnapshot(ctx context.Context, storageRoot, snapshotRoot, relativePath string, - snapshotFilter Filter, stats *snapshotStatistics, + snapshotFilter Filter, readOnly bool, s *snapshot, +) error { + originalPath := filepath.Join(storageRoot, relativePath) + snapshotPath := filepath.Join(snapshotRoot, relativePath) + + // Check if the original directory exists + if _, err := os.Stat(originalPath); err != nil { + if errors.Is(err, fs.ErrNotExist) { + return nil + } + return fmt.Errorf("stat original directory: %w", err) + } + + // Check if the source is a btrfs subvolume + isBtrfsSubvolume, err := isBtrfsSubvolume(originalPath) + if err != nil { + return fmt.Errorf("check if btrfs subvolume: %w", err) + } + + // If it's not a subvolume, convert it to one + if !isBtrfsSubvolume { + if err := convertToSubvolume(ctx, originalPath); err != nil { + return fmt.Errorf("convert to subvolume: %w", err) + } + } + + // Now create the btrfs snapshot and apply filtering + if err := createBtrfsSnapshotAndFilter(ctx, originalPath, snapshotPath, snapshotFilter, readOnly, s); err != nil { + return fmt.Errorf("create btrfs snapshot and filter: %w", err) + } + + // Track this repository subvolume for cleanup + s.repositoryPaths = append(s.repositoryPaths, snapshotPath) + + return nil +} + +// createBtrfsSnapshotAndFilter creates a btrfs snapshot and applies filtering. +func createBtrfsSnapshotAndFilter(ctx context.Context, originalPath, snapshotPath string, + snapshotFilter Filter, readOnly bool, s *snapshot, ) error { - if err := createDirectorySnapshot(ctx, filepath.Join(storageRoot, relativePath), - filepath.Join(snapshotRoot, relativePath), - snapshotFilter, stats); err != nil { - return fmt.Errorf("create directory snapshot: %w", err) + // Create btrfs snapshot - always create as writable initially + if err := createBtrfsSnapshot(ctx, originalPath, snapshotPath, false); err != nil { + return fmt.Errorf("create btrfs snapshot: %w", err) } + + // Apply the filter by removing unwanted files/directories from the snapshot + if err := applyFilterToSnapshot(ctx, snapshotPath, snapshotFilter, &s.stats); err != nil { + return fmt.Errorf("apply filter to snapshot: %w", err) + } + + // If we're making a read-only snapshot, make the btrfs subvolume read-only + if readOnly { + // First, set all directories within this subvolume to read-only permissions + // This changes the actual permission bits that will be visible to users + if err := storage.SetDirectoryMode(snapshotPath, ModeReadOnlyDirectory); err != nil { + return fmt.Errorf("set directory mode: %w", err) + } + // Make the btrfs subvolume read-only + // This protects the files without needing to change directory permissions + if err := setBtrfsReadOnly(ctx, snapshotPath, true); err != nil { + return fmt.Errorf("make snapshot read-only: %w", err) + } + } + return nil } -// createDirectorySnapshot recursively recreates the directory structure from originalDirectory into -// snapshotDirectory and hard links files into the same locations in snapshotDirectory. -// -// matcher is needed to track which paths we want to include in the snapshot. -func createDirectorySnapshot(ctx context.Context, originalDirectory, snapshotDirectory string, matcher Filter, stats *snapshotStatistics) error { - if err := filepath.Walk(originalDirectory, func(oldPath string, info fs.FileInfo, err error) error { +// applyFilterToSnapshot walks through the snapshot and removes files/directories that don't match the filter. +func applyFilterToSnapshot(ctx context.Context, snapshotPath string, matcher Filter, stats *snapshotStatistics) error { + // We need to walk the snapshot and remove files that don't match the filter + // We collect paths to remove first, then remove them in reverse order (deepest first) + var pathsToRemove []string + + err := filepath.Walk(snapshotPath, func(path string, info fs.FileInfo, err error) error { if err != nil { - if errors.Is(err, fs.ErrNotExist) && oldPath == originalDirectory { - // The directory being snapshotted does not exist. This is fine as the transaction - // may be about to create it. - return nil - } + return err + } + if err := ctx.Err(); err != nil { return err } - relativePath, err := filepath.Rel(originalDirectory, oldPath) + relativePath, err := filepath.Rel(snapshotPath, path) if err != nil { return fmt.Errorf("rel: %w", err) } + // Skip the root directory + if relativePath == "." { + return nil + } + if !matcher.Matches(relativePath) { + pathsToRemove = append(pathsToRemove, path) if info.IsDir() { return fs.SkipDir } - return nil + } else { + // Count the files and directories that we're keeping + if info.IsDir() { + stats.directoryCount++ + } else if info.Mode().IsRegular() { + stats.fileCount++ + } } - newPath := filepath.Join(snapshotDirectory, relativePath) - if info.IsDir() { - stats.directoryCount++ - if err := os.Mkdir(newPath, info.Mode().Perm()); err != nil { - return fmt.Errorf("create dir: %w", err) - } - } else if info.Mode().IsRegular() { - stats.fileCount++ - if err := os.Link(oldPath, newPath); err != nil { - return fmt.Errorf("link file: %w", err) - } + return nil + }) + if err != nil { + return fmt.Errorf("walk snapshot: %w", err) + } + + // Remove paths in reverse order (deepest first) + for i := len(pathsToRemove) - 1; i >= 0; i-- { + if err := os.RemoveAll(pathsToRemove[i]); err != nil { + return fmt.Errorf("remove filtered path %s: %w", pathsToRemove[i], err) + } + } + + return nil +} + +// createBtrfsSnapshot creates a btrfs subvolume snapshot from source to destination. +// If readOnly is true, the snapshot will be created as a read-only subvolume. +func createBtrfsSnapshot(ctx context.Context, source, destination string, readOnly bool) error { + // Build the btrfs subvolume snapshot command + args := []string{"subvolume", "snapshot"} + + // Add read-only flag if requested + if readOnly { + args = append(args, "-r") + } + + args = append(args, source, destination) + + cmd := exec.CommandContext(ctx, "btrfs", args...) + + if output, err := cmd.CombinedOutput(); err != nil { + return fmt.Errorf("btrfs snapshot failed: %w, output: %s", err, string(output)) + } + + return nil +} + +// isBtrfsSubvolume checks if the given path is a btrfs subvolume. +func isBtrfsSubvolume(path string) (bool, error) { + var stat syscall.Stat_t + if err := syscall.Stat(path, &stat); err != nil { + if errors.Is(err, os.ErrNotExist) { + return false, nil + } + return false, fmt.Errorf("stat path: %w", err) + } + + // Btrfs subvolumes always have inode number 256 + return stat.Ino == 256, nil +} + +// convertToSubvolume converts a regular directory to a btrfs subvolume in place. +func convertToSubvolume(ctx context.Context, path string) error { + // fmt.Println("Converting to btrfs subvolume:", path) + // Acquire an exclusive lock on the repository + lockPath := path + ".lock" + lockFile, err := os.OpenFile(lockPath, os.O_CREATE|os.O_EXCL|os.O_WRONLY, 0o644) + if err != nil { + return fmt.Errorf("acquire lock: %w", err) + } + defer func() { + lockFile.Close() + os.Remove(lockPath) + }() + + tempPath := path + ".subvol-temp" + + // Clean up any existing temp subvolume from previous failed attempts + if _, err := os.Stat(tempPath); err == nil { + if err := deleteBtrfsSnapshot(ctx, tempPath, false); err != nil { + return fmt.Errorf("cleanup existing temp subvolume: %w", err) + } + } + + // Create new subvolume with temporary name + cmd := exec.CommandContext(ctx, "btrfs", "subvolume", "create", tempPath) + if output, err := cmd.CombinedOutput(); err != nil { + return fmt.Errorf("create subvolume: %w, output: %s", err, string(output)) + } + + // Ensure cleanup on any subsequent failure + var cleanupTemp bool = true + defer func() { + if cleanupTemp { + _ = deleteBtrfsSnapshot(ctx, tempPath, false) + } + }() + + // Copy contents from old directory to new subvolume + cmd = exec.CommandContext(ctx, "cp", "-a", path+"/.", tempPath+"/") + if output, err := cmd.CombinedOutput(); err != nil { + return fmt.Errorf("copy contents: %w, output: %s", err, string(output)) + } + + // Atomic swap using rename + backupPath := path + ".old-backup" + + // Rename old to backup + if err := os.Rename(path, backupPath); err != nil { + return fmt.Errorf("backup old directory: %w", err) + } + + // Ensure cleanup of backup + defer func() { + if cleanupTemp { + // Restore backup if something failed + _ = os.Rename(backupPath, path) } else { - return fmt.Errorf("unsupported file mode: %q", info.Mode()) + // Remove backup on success + _ = os.RemoveAll(backupPath) } + }() - return nil - }); err != nil { - return fmt.Errorf("walk: %w", err) + // Rename subvolume to original path + if err := os.Rename(tempPath, path); err != nil { + return fmt.Errorf("rename subvolume: %w", err) + } + + // Success - don't clean up the temp path + cleanupTemp = false + return nil +} + +// setBtrfsReadOnly sets or unsets the read-only property of a btrfs subvolume. +func setBtrfsReadOnly(ctx context.Context, path string, readOnly bool) error { + value := "false" + if readOnly { + value = "true" + } + + cmd := exec.CommandContext(ctx, "btrfs", "property", "set", "-ts", path, "ro", value) + + if output, err := cmd.CombinedOutput(); err != nil { + return fmt.Errorf("btrfs property set ro=%s failed: %w for path: %s, output: %s", value, err, path, string(output)) + } + + return nil +} + +// deleteBtrfsSnapshot deletes a btrfs subvolume snapshot. +// If the snapshot is read-only, it first changes it to read-write before deletion. +func deleteBtrfsSnapshot(ctx context.Context, snapshotPath string, wasReadOnly bool) error { + // If it was a read-only snapshot, ensure it's writable before deletion + if wasReadOnly { + // Try to make it writable (ignore errors - it might not be a subvolume) + _ = setBtrfsReadOnly(ctx, snapshotPath, false) + + // Also restore directory write permissions + // _ = storage.SetDirectoryMode(snapshotPath, mode.Directory) + } + + // Try to delete as a btrfs subvolume first + cmd := exec.CommandContext(ctx, "sudo", "btrfs", "subvolume", "delete", snapshotPath) + _, err := cmd.CombinedOutput() + if err != nil { + // Try regular removal + if err := os.RemoveAll(snapshotPath); err != nil { + return fmt.Errorf("failed to delete %s as subvolume or directory: %w", snapshotPath, err) + } } return nil diff --git a/internal/gitaly/storage/storagemgr/partition/transaction_manager.go b/internal/gitaly/storage/storagemgr/partition/transaction_manager.go index 8056d30a85b3ad095e1b12ff6636601d2a8a33af..cda591aa4962c611fdde6d9699fa218a70e8c84a 100644 --- a/internal/gitaly/storage/storagemgr/partition/transaction_manager.go +++ b/internal/gitaly/storage/storagemgr/partition/transaction_manager.go @@ -1572,6 +1572,7 @@ func (mgr *TransactionManager) preparePackRefsReftable(ctx context.Context, tran // https://gitlab.com/gitlab-org/git/-/issues/222 func (mgr *TransactionManager) preparePackRefsFiles(ctx context.Context, transaction *Transaction) error { runPackRefs := transaction.runHousekeeping.packRefs + for _, lock := range []string{".new", ".lock"} { lockRelativePath := filepath.Join(transaction.relativePath, "packed-refs"+lock) lockAbsolutePath := filepath.Join(transaction.snapshot.Root(), lockRelativePath) @@ -1595,7 +1596,6 @@ func (mgr *TransactionManager) preparePackRefsFiles(ctx context.Context, transac if err != nil { return err } - relPath, err := filepath.Rel(repoPath, path) if err != nil { return fmt.Errorf("extracting ref name: %w", err) @@ -1906,6 +1906,10 @@ func (mgr *TransactionManager) verifyFileSystemOperations(ctx context.Context, t isTablesList := func(path string) bool { return path == filepath.Join(tx.relativePath, "reftable", "tables.list") } + // // isPackedRefs returns true if this is the packed-ref file + // isPackedRefs := func(path string) bool { + // return path == filepath.Join(tx.relativePath, "packed-refs") + // } for _, op := range tx.walEntry.Operations() { switch op.GetOperation().(type) { @@ -1951,7 +1955,7 @@ func (mgr *TransactionManager) verifyFileSystemOperations(ctx context.Context, t // been resolved at this point. Don't conflict check it. if !isTablesList(path) { if err := fsTX.Read(path); err != nil { - return nil, fmt.Errorf("read: %w", err) + return nil, fmt.Errorf("read path %s: %w", path, err) } } @@ -2183,7 +2187,7 @@ func (mgr *TransactionManager) applyLogEntry(ctx context.Context, lsn storage.LS if err := mgr.db.Update(func(tx keyvalue.ReadWriter) error { if err := applyOperations(ctx, safe.NewSyncer().Sync, mgr.storagePath, mgr.logManager.GetEntryPath(lsn), manifest.GetOperations(), tx); err != nil { - return fmt.Errorf("apply operations: %w", err) + return fmt.Errorf("apply operations for lsn %d: %w", lsn, err) } return nil diff --git a/internal/gitaly/storage/wal/entry.go b/internal/gitaly/storage/wal/entry.go index f398447628d02b64aed44b028c0359010b01bb33..0fb9f3c80aec1797f2514a677d52fa9ad063a32b 100644 --- a/internal/gitaly/storage/wal/entry.go +++ b/internal/gitaly/storage/wal/entry.go @@ -1,13 +1,16 @@ package wal import ( + "errors" "fmt" "io/fs" "os" "path/filepath" "strconv" + "syscall" "gitlab.com/gitlab-org/gitaly/v18/internal/gitaly/storage/mode" + "gitlab.com/gitlab-org/gitaly/v18/internal/helper" "gitlab.com/gitlab-org/gitaly/v18/internal/structerr" "gitlab.com/gitlab-org/gitaly/v18/proto/go/gitalypb" ) @@ -76,8 +79,8 @@ func (e *Entry) stageFile(path string) (string, error) { // The file names within the log entry are not important as the manifest records the // actual name the file will be linked as. fileName := strconv.FormatUint(e.fileIDSequence, 36) - if err := os.Link(path, filepath.Join(e.stateDirectory, fileName)); err != nil { - return "", fmt.Errorf("link: %w", err) + if err := LinkOrReflink(path, filepath.Join(e.stateDirectory, fileName)); err != nil { + return "", fmt.Errorf("LinkOrReflink: %w", err) } return fileName, nil @@ -105,7 +108,6 @@ func (e *Entry) CreateFile(sourceAbsolutePath string, relativePath string) error if err != nil { return fmt.Errorf("stage file: %w", err) } - e.operations.createHardLink(stagedFile, relativePath, false) return nil } @@ -119,3 +121,29 @@ func (e *Entry) CreateLink(sourceRelativePath, destinationRelativePath string) { func (e *Entry) RemoveDirectoryEntry(relativePath string) { e.operations.removeDirectoryEntry(relativePath) } + +// LinkOrReflink attempts to create a hard link, and if that fails due to cross-device, +// falls back to creating a reflink (COW copy) on btrfs, or a regular copy as last resort. +func LinkOrReflink(src, dst string) error { + // Ensure the parent directory exists + dstDir := filepath.Dir(dst) + if _, err := os.Stat(dstDir); err != nil { + return fmt.Errorf("parent directory does not exist: %w", err) + } + + // Try hard link first (fast path for same subvolume) + if err := os.Link(src, dst); err == nil { + return nil + } else if !errors.Is(err, syscall.EXDEV) && !errors.Is(err, fs.ErrExist) { + // Some error other than cross-device or already exists + // fmt.Println("cross device error") + return fmt.Errorf("link: %w", err) + } + + // Hard link failed due to cross-device - use reflink + if err := helper.ReflinkWithPermissions(src, dst); err != nil { + return fmt.Errorf("reflink: %w", err) + } + + return nil +} diff --git a/internal/gitaly/storage/wal/reference_recorder.go b/internal/gitaly/storage/wal/reference_recorder.go index fede4d5e3ad6ba33e427883cc531351bb5ed9b68..b185c716a0719cc58874aee2e9493ed0faec6a65 100644 --- a/internal/gitaly/storage/wal/reference_recorder.go +++ b/internal/gitaly/storage/wal/reference_recorder.go @@ -1,9 +1,12 @@ package wal import ( + "bytes" "context" + "crypto/sha256" "errors" "fmt" + "io" "io/fs" "os" "path/filepath" @@ -11,6 +14,7 @@ import ( "gitlab.com/gitlab-org/gitaly/v18/internal/git" "gitlab.com/gitlab-org/gitaly/v18/internal/gitaly/storage/wal/reftree" + "gitlab.com/gitlab-org/gitaly/v18/internal/helper" ) // ReferenceRecorder records the file system operations performed by reference transactions. @@ -65,7 +69,7 @@ func NewReferenceRecorder(tmpDir string, entry *Entry, snapshotRoot, relativePat preImagePackedRefsPath := filepath.Join(tmpDir, "packed-refs") postImagePackedRefsPath := filepath.Join(repoRoot, "packed-refs") - if err := os.Link(postImagePackedRefsPath, preImagePackedRefsPath); err != nil && !errors.Is(err, fs.ErrNotExist) { + if err := helper.ReflinkWithPermissions(postImagePackedRefsPath, preImagePackedRefsPath); err != nil && !errors.Is(err, fs.ErrNotExist) { return nil, fmt.Errorf("record pre-image packed-refs: %w", err) } @@ -261,10 +265,30 @@ func (r *ReferenceRecorder) StagePackedRefs() error { postImageInode, err := GetInode(r.postImagePackedRefsPath) if err != nil { - return fmt.Errorf("post-imaga inode: %w", err) + return fmt.Errorf("post-image inode: %w", err) } - if preImageInode == postImageInode { + // With BTRFS reflinks, inode numbers differ even for identical content. + // We need to compare content instead of just inodes. + unchanged := false + + if preImageInode == postImageInode && preImageInode > 0 { + // Same inode means definitely unchanged (works for hard links) + unchanged = true + } else if preImageInode > 0 && postImageInode > 0 { + // Different inodes - need to check if content actually changed + // This handles BTRFS reflinks where inodes differ but content may be same + contentEqual, err := filesHaveSameContent(r.preImagePackedRefsPath, r.postImagePackedRefsPath) + if err != nil { + return fmt.Errorf("compare file contents: %w", err) + } + unchanged = contentEqual + } else if preImageInode == 0 && postImageInode == 0 { + // Both don't exist + unchanged = true + } + + if unchanged { return nil } @@ -284,3 +308,32 @@ func (r *ReferenceRecorder) StagePackedRefs() error { return nil } + +func filesHaveSameContent(path1, path2 string) (bool, error) { + hash1, err := hashFile(path1) + if err != nil { + return false, err + } + + hash2, err := hashFile(path2) + if err != nil { + return false, err + } + + return bytes.Equal(hash1, hash2), nil +} + +func hashFile(path string) ([]byte, error) { + f, err := os.Open(path) + if err != nil { + return nil, err + } + defer f.Close() + + h := sha256.New() + if _, err := io.Copy(h, f); err != nil { + return nil, err + } + + return h.Sum(nil), nil +} diff --git a/internal/helper/reflink.go b/internal/helper/reflink.go new file mode 100644 index 0000000000000000000000000000000000000000..7aff44234f7b8bfe53d888be7eca6320d31c02c5 --- /dev/null +++ b/internal/helper/reflink.go @@ -0,0 +1,98 @@ +package helper + +import ( + "fmt" + "os" + "path/filepath" + + "github.com/KarpelesLab/reflink" +) + +// RenameDirectoryWithReflink recursively copies a directory or file using reflinks when possible +// then deletes the source (simulating a rename operation) +func RenameDirectoryWithReflink(src, dst string) error { + // Remove destination if it exists (not the source!) + if _, err := os.Lstat(dst); err == nil { + // fmt.Println("RenameDirectoryWithReflink: Destination exists, removing ", dst) + if err := os.RemoveAll(dst); err != nil { + return fmt.Errorf("remove existing destination: %w", err) + } + } else if !os.IsNotExist(err) { + return fmt.Errorf("stat destination: %w", err) + } + + // Copy source to destination + if err := copyWithReflink(src, dst); err != nil { + return err + } + + // Delete the source after successful copy + return os.RemoveAll(src) +} + +func copyWithReflink(src, dst string) error { + srcInfo, err := os.Stat(src) + if err != nil { + return fmt.Errorf("stat source: %w", err) + } + + // Handle directory + if srcInfo.IsDir() { + if err := os.MkdirAll(dst, srcInfo.Mode()); err != nil { + return fmt.Errorf("create destination: %w", err) + } + + entries, err := os.ReadDir(src) + if err != nil { + return fmt.Errorf("read directory: %w", err) + } + + for _, entry := range entries { + srcPath := filepath.Join(src, entry.Name()) + dstPath := filepath.Join(dst, entry.Name()) + + if err := copyWithReflink(srcPath, dstPath); err != nil { + return err + } + } + return nil + } + + // Handle symlink + if srcInfo.Mode()&os.ModeSymlink != 0 { + target, err := os.Readlink(src) + if err != nil { + return fmt.Errorf("read symlink: %w", err) + } + return os.Symlink(target, dst) + } + + // Handle regular file + if srcInfo.Mode().IsRegular() { + // cp to new loc on different subvolume with preserved permissions + err := ReflinkWithPermissions(src, dst) + if err != nil { + return fmt.Errorf("reflink with permissions failed: %w", err) + } + } + + // Skip other file types + return nil +} + +func ReflinkWithPermissions(src, dst string) error { + srcInfo, err := os.Stat(src) + if err != nil { + return fmt.Errorf("stat source: %w", err) + } + + if err := reflink.Always(src, dst); err != nil { + return fmt.Errorf("reflink copy: %w", err) + } + + if err := os.Chmod(dst, srcInfo.Mode()); err != nil { + return fmt.Errorf("chmod destination: %w", err) + } + + return nil +}