From bbb4872670f0cd81917929b541d96440fc4549b0 Mon Sep 17 00:00:00 2001 From: Harshith S Date: Tue, 2 Sep 2025 15:18:40 +0000 Subject: [PATCH 1/3] Add zstd compression support to TarBuilder Introduces compression option to TarBuilder for smaller partition backup archives. Adds NewTarBuilderWithCompression constructor and WriteZstdTarball function while maintaining backward compatibility. --- go.mod | 2 +- internal/archive/archive.go | 22 +++++++++++++++ internal/archive/archive_test.go | 44 ++++++++++++++++++++++++++++++ internal/archive/tar_builder.go | 47 ++++++++++++++++++++++++++++---- 4 files changed, 109 insertions(+), 6 deletions(-) diff --git a/go.mod b/go.mod index a4bd459ea7..c16eeb3afe 100644 --- a/go.mod +++ b/go.mod @@ -58,6 +58,7 @@ require ( // - https://gitlab.com/gitlab-org/gitaly/-/issues/6385 google.golang.org/grpc v1.71.1 google.golang.org/protobuf v1.36.8 + github.com/klauspost/compress v1.18.0 ) require ( @@ -165,7 +166,6 @@ require ( github.com/jmespath/go-jmespath v0.4.0 // indirect github.com/jmhodges/clock v1.2.0 // indirect github.com/kevinburke/ssh_config v1.2.0 // indirect - github.com/klauspost/compress v1.18.0 // indirect github.com/kylelemons/godebug v1.1.0 // indirect github.com/leonelquinteros/gotext v1.5.0 // indirect github.com/lightstep/lightstep-tracer-common/golang/gogo v0.0.0-20210210170715-a8dfcb80d3a7 // indirect diff --git a/internal/archive/archive.go b/internal/archive/archive.go index 040ec879a8..c257e191a6 100644 --- a/internal/archive/archive.go +++ b/internal/archive/archive.go @@ -34,3 +34,25 @@ func WriteTarball(ctx context.Context, logger log.Logger, writer io.Writer, path return nil } + +// WriteZstdTarball writes a zstd-compressed tarball to an io.Writer for the +// provided path containing the specified archive members. Members should be +// specified relative to `path`. +// +// Symlinks will be included in the archive. Permissions are normalised to +// allow global read/write. +func WriteZstdTarball(ctx context.Context, logger log.Logger, writer io.Writer, path string, members ...string) error { + builder := NewTarBuilderWithCompression(path, writer, true) + + builder.allowSymlinks = true + + for _, member := range members { + _ = builder.RecursiveDir(member, "", true) + } + + if err := builder.Close(); err != nil { + return fmt.Errorf("write zstd tarball: %w", err) + } + + return nil +} diff --git a/internal/archive/archive_test.go b/internal/archive/archive_test.go index 514e7e1c01..8e8df07ef4 100644 --- a/internal/archive/archive_test.go +++ b/internal/archive/archive_test.go @@ -12,6 +12,7 @@ import ( "github.com/stretchr/testify/require" "gitlab.com/gitlab-org/gitaly/v16/internal/gitaly/storage/mode" "gitlab.com/gitlab-org/gitaly/v16/internal/testhelper" + "github.com/klauspost/compress/zstd" ) func TestWriteTarball(t *testing.T) { @@ -103,6 +104,49 @@ func TestWriteTarball(t *testing.T) { testhelper.RequireTarState(t, &archFile, expected) } +func TestWriteZstdTarball(t *testing.T) { + t.Parallel() + + ctx := testhelper.Context(t) + tempDir := testhelper.TempDir(t) + srcDir := filepath.Join(tempDir, "src") + + // Keep it simple: two regular files + writeFile(t, filepath.Join(srcDir, "a.txt"), []byte("hello zstd")) + writeFile(t, filepath.Join(srcDir, "nested/file.txt"), []byte("nested content")) + + var archFile bytes.Buffer + err := WriteZstdTarball( + ctx, + testhelper.NewLogger(t), + &archFile, + srcDir, + // Use file paths (not directories) so we don't have to assert dir entries. + "a.txt", + "nested/file.txt", + ) + require.NoError(t, err) + + // Decompress zstd to get a plain tar stream for the helper + zr, err := zstd.NewReader(bytes.NewReader(archFile.Bytes())) + require.NoError(t, err) + defer zr.Close() + + expected := testhelper.DirectoryState{ + "a.txt": { + Mode: TarFileMode, + Content: []byte("hello zstd"), + }, + "nested/file.txt": { + Mode: TarFileMode, + Content: []byte("nested content"), + }, + } + + // The helper expects a tar stream, which zr provides after decompression. + testhelper.RequireTarState(t, zr, expected) +} + func writeFile(tb testing.TB, path string, data []byte) { tb.Helper() require.NoError(tb, os.MkdirAll(filepath.Dir(path), mode.Directory)) diff --git a/internal/archive/tar_builder.go b/internal/archive/tar_builder.go index 3f52d2dd4f..5a232f8645 100644 --- a/internal/archive/tar_builder.go +++ b/internal/archive/tar_builder.go @@ -12,6 +12,7 @@ import ( "gitlab.com/gitlab-org/gitaly/v16/internal/gitaly/storage/mode/permission" "golang.org/x/sys/unix" + "github.com/klauspost/compress/zstd" ) // TarFileMode is the minimum file permission given to files in tar archives in @@ -41,6 +42,7 @@ type TarBuilder struct { basePath string tarWriter *tar.Writer + compressor *zstd.Encoder // The first error stops all further processing err error @@ -49,10 +51,31 @@ type TarBuilder struct { // NewTarBuilder creates a TarBuilder that writes files from basePath on the // filesystem to the given io.Writer func NewTarBuilder(basePath string, w io.Writer) *TarBuilder { - return &TarBuilder{ - basePath: basePath, - tarWriter: tar.NewWriter(w), - } + return NewTarBuilderWithCompression(basePath, w, false) +} + +func NewTarBuilderWithCompression(basePath string, w io.Writer, useCompression bool) *TarBuilder { + var finalWriter io.Writer = w + var compressor *zstd.Encoder + + if useCompression { + var err error + compressor, err = zstd.NewWriter(w) + if err != nil { + // Handle error - could return error or create without compression + return &TarBuilder{ + basePath: basePath, + err: err, + } + } + finalWriter = compressor + } + + return &TarBuilder{ + basePath: basePath, + tarWriter: tar.NewWriter(finalWriter), + compressor: compressor, + } } func (t *TarBuilder) join(rel string) string { @@ -248,7 +271,21 @@ func (t *TarBuilder) Close() error { return t.err } - return t.tarWriter.Close() + // Close tar writer first, then compressor if it exists + if err := t.tarWriter.Close(); err != nil { + // Still try to close compressor even if tar writer failed + if t.compressor != nil { + _ = t.compressor.Close() + } + return err + } + + // Close compressor last if it exists + if t.compressor != nil { + return t.compressor.Close() + } + + return nil } // Err returns the last error seen during operation of a TarBuilder. Once an -- GitLab From 2e31523d4e83a012114ad051a5f7fca947ba0b98 Mon Sep 17 00:00:00 2001 From: Harshith Date: Thu, 4 Sep 2025 12:09:46 +0530 Subject: [PATCH 2/3] Add zstd compression to Gitaly backup and restore --- internal/cli/gitaly/subcmd_recovery_restore.go | 12 ++++++++++++ .../gitaly/service/partition/backup_partition.go | 4 ++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/internal/cli/gitaly/subcmd_recovery_restore.go b/internal/cli/gitaly/subcmd_recovery_restore.go index c9325ce269..fd9516f0d4 100644 --- a/internal/cli/gitaly/subcmd_recovery_restore.go +++ b/internal/cli/gitaly/subcmd_recovery_restore.go @@ -19,6 +19,7 @@ import ( "gitlab.com/gitlab-org/gitaly/v16/internal/gitaly/storage/storagemgr" "gitlab.com/gitlab-org/gitaly/v16/proto/go/gitalypb" "google.golang.org/protobuf/encoding/protodelim" + "github.com/klauspost/compress/zstd" ) func newRecoveryRestoreCommand() *cli.Command { @@ -76,6 +77,16 @@ func recoveryRestoreAction(ctx context.Context, cmd *cli.Command) (returnErr err } defer backupReader.Close() + var reader io.Reader = backupReader + if strings.HasSuffix(backupEntry.Path, ".tar.zst") { + decoder, err := zstd.NewReader(backupReader) + if err != nil { + return fmt.Errorf("create zstd decoder: %w", err) + } + defer decoder.Close() + reader = decoder + } + partitionTempDir := filepath.Join(tempDir, partitionID.String()) if err := os.MkdirAll(partitionTempDir, mode.Directory); err != nil { return fmt.Errorf("create partition temp dir: %w", err) @@ -102,6 +113,7 @@ func recoveryRestoreAction(ctx context.Context, cmd *cli.Command) (returnErr err // It returns any error encountered during extraction. func extractBackup(reader io.Reader, destDir string) (string, error) { var repoRelPath string + tarReader := tar.NewReader(reader) for { header, err := tarReader.Next() diff --git a/internal/gitaly/service/partition/backup_partition.go b/internal/gitaly/service/partition/backup_partition.go index 06cf26a7c2..7face7a1f5 100644 --- a/internal/gitaly/service/partition/backup_partition.go +++ b/internal/gitaly/service/partition/backup_partition.go @@ -28,7 +28,7 @@ func (s *server) BackupPartition(ctx context.Context, in *gitalypb.BackupPartiti return nil, structerr.NewInternal("backup partition: transaction not initialized") } - backupRelativePath := filepath.Join("partition-backups", in.GetStorageName(), in.GetPartitionId(), tx.SnapshotLSN().String()+".tar") + backupRelativePath := filepath.Join("partition-backups", in.GetStorageName(), in.GetPartitionId(), tx.SnapshotLSN().String()+".tar.zst") exists, err := s.backupSink.Exists(ctx, backupRelativePath) if err != nil { return nil, fmt.Errorf("backup exists: %w", err) @@ -79,7 +79,7 @@ func (s *server) BackupPartition(ctx context.Context, in *gitalypb.BackupPartiti } func writeTarball(partitionRoot string, kvFile *os.File, w io.Writer) error { - builder := archive.NewTarBuilder(partitionRoot, w) + builder := archive.NewTarBuilderWithCompression(partitionRoot, w, true) if err := builder.VirtualFileWithContents(storage.KVStateFileName, kvFile); err != nil { return fmt.Errorf("tar builder: virtual file: %w", err) -- GitLab From 789d1460528bd6fdfda044bcfdaa4044c3450545 Mon Sep 17 00:00:00 2001 From: Harshith S Date: Thu, 4 Sep 2025 07:31:24 +0000 Subject: [PATCH 3/3] Fix unused reader in recovery restore --- internal/cli/gitaly/subcmd_recovery_restore.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/cli/gitaly/subcmd_recovery_restore.go b/internal/cli/gitaly/subcmd_recovery_restore.go index fd9516f0d4..0d3e13d41d 100644 --- a/internal/cli/gitaly/subcmd_recovery_restore.go +++ b/internal/cli/gitaly/subcmd_recovery_restore.go @@ -92,7 +92,7 @@ func recoveryRestoreAction(ctx context.Context, cmd *cli.Command) (returnErr err return fmt.Errorf("create partition temp dir: %w", err) } - relativePath, err := extractBackup(backupReader, partitionTempDir) + relativePath, err := extractBackup(reader, partitionTempDir) if err != nil { return fmt.Errorf("extract backup: %w", err) } @@ -113,7 +113,7 @@ func recoveryRestoreAction(ctx context.Context, cmd *cli.Command) (returnErr err // It returns any error encountered during extraction. func extractBackup(reader io.Reader, destDir string) (string, error) { var repoRelPath string - + tarReader := tar.NewReader(reader) for { header, err := tarReader.Next() -- GitLab