From 41cf4b0a0e54929c668d34d08bba59410dc524e3 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 3 Jan 2023 12:24:58 +0100 Subject: [PATCH 1/5] proto: Add documentation for Fsck RPC Add missing documentation for the `Fsck` RPC and its related data types. --- proto/go/gitalypb/repository.pb.go | 8 ++++---- proto/go/gitalypb/repository_grpc.pb.go | 6 ++++-- proto/repository.proto | 11 ++++++----- 3 files changed, 14 insertions(+), 11 deletions(-) diff --git a/proto/go/gitalypb/repository.pb.go b/proto/go/gitalypb/repository.pb.go index 82cc54b6d3..db4e1182bc 100644 --- a/proto/go/gitalypb/repository.pb.go +++ b/proto/go/gitalypb/repository.pb.go @@ -1897,13 +1897,13 @@ func (x *FetchSourceBranchResponse) GetResult() bool { return false } -// This comment is left unintentionally blank. +// FsckRequest is a request for the Fsck RPC. type FsckRequest struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - // This comment is left unintentionally blank. + // Repository is the repository that shall be checked for consistency. Repository *Repository `protobuf:"bytes,1,opt,name=repository,proto3" json:"repository,omitempty"` } @@ -1946,13 +1946,13 @@ func (x *FsckRequest) GetRepository() *Repository { return nil } -// This comment is left unintentionally blank. +// FsckResponse is a response for the Fsck RPC. type FsckResponse struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - // This comment is left unintentionally blank. + // Error contains both stdout and stderr of git-fsck(1) in case it returned an error. Error []byte `protobuf:"bytes,1,opt,name=error,proto3" json:"error,omitempty"` } diff --git a/proto/go/gitalypb/repository_grpc.pb.go b/proto/go/gitalypb/repository_grpc.pb.go index f6bf2b7ae6..84cd90f3f6 100644 --- a/proto/go/gitalypb/repository_grpc.pb.go +++ b/proto/go/gitalypb/repository_grpc.pb.go @@ -55,7 +55,8 @@ type RepositoryServiceClient interface { // FetchSourceBranch fetches a branch from a second (potentially remote) // repository into the given repository. FetchSourceBranch(ctx context.Context, in *FetchSourceBranchRequest, opts ...grpc.CallOption) (*FetchSourceBranchResponse, error) - // This comment is left unintentionally blank. + // Fsck checks the repository for consistency via git-fsck(1). This can be used to check for + // repository corruption. Fsck(ctx context.Context, in *FsckRequest, opts ...grpc.CallOption) (*FsckResponse, error) // This comment is left unintentionally blank. WriteRef(ctx context.Context, in *WriteRefRequest, opts ...grpc.CallOption) (*WriteRefResponse, error) @@ -875,7 +876,8 @@ type RepositoryServiceServer interface { // FetchSourceBranch fetches a branch from a second (potentially remote) // repository into the given repository. FetchSourceBranch(context.Context, *FetchSourceBranchRequest) (*FetchSourceBranchResponse, error) - // This comment is left unintentionally blank. + // Fsck checks the repository for consistency via git-fsck(1). This can be used to check for + // repository corruption. Fsck(context.Context, *FsckRequest) (*FsckResponse, error) // This comment is left unintentionally blank. WriteRef(context.Context, *WriteRefRequest) (*WriteRefResponse, error) diff --git a/proto/repository.proto b/proto/repository.proto index 33c040f245..0cda33a829 100644 --- a/proto/repository.proto +++ b/proto/repository.proto @@ -108,7 +108,8 @@ service RepositoryService { }; } - // This comment is left unintentionally blank. + // Fsck checks the repository for consistency via git-fsck(1). This can be used to check for + // repository corruption. rpc Fsck(FsckRequest) returns (FsckResponse) { option (op_type) = { op: ACCESSOR @@ -605,15 +606,15 @@ message FetchSourceBranchResponse { bool result = 1; } -// This comment is left unintentionally blank. +// FsckRequest is a request for the Fsck RPC. message FsckRequest { - // This comment is left unintentionally blank. + // Repository is the repository that shall be checked for consistency. Repository repository = 1 [(target_repository)=true]; } -// This comment is left unintentionally blank. +// FsckResponse is a response for the Fsck RPC. message FsckResponse { - // This comment is left unintentionally blank. + // Error contains both stdout and stderr of git-fsck(1) in case it returned an error. bytes error = 1; } -- GitLab From 33cf1476d920e6465924b4433aaa85c0c36981c6 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 3 Jan 2023 12:43:38 +0100 Subject: [PATCH 2/5] repository: Modernize tests for `Fsck` RPC Modernize tests for the `Fsck` RPC to use table-driven tests. Stop using a seed repository and generate test data at runtime instead. --- .../gitaly/service/repository/fsck_test.go | 169 ++++++++++++------ 1 file changed, 112 insertions(+), 57 deletions(-) diff --git a/internal/gitaly/service/repository/fsck_test.go b/internal/gitaly/service/repository/fsck_test.go index 8fddbb1f5b..174aec129f 100644 --- a/internal/gitaly/service/repository/fsck_test.go +++ b/internal/gitaly/service/repository/fsck_test.go @@ -3,74 +3,129 @@ package repository import ( + "fmt" "os" "path/filepath" "strings" "testing" - "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "gitlab.com/gitlab-org/gitaly/v15/internal/git/gittest" + "gitlab.com/gitlab-org/gitaly/v15/internal/structerr" "gitlab.com/gitlab-org/gitaly/v15/internal/testhelper" "gitlab.com/gitlab-org/gitaly/v15/proto/go/gitalypb" - "google.golang.org/grpc/codes" - "google.golang.org/grpc/status" ) -func TestFsckSuccess(t *testing.T) { +func TestFsck(t *testing.T) { t.Parallel() - ctx := testhelper.Context(t) - - _, repo, _, client := setupRepositoryService(t, ctx) - - c, err := client.Fsck(ctx, &gitalypb.FsckRequest{Repository: repo}) - assert.NoError(t, err) - assert.NotNil(t, c) - assert.Empty(t, c.GetError()) -} -func TestFsckFailureSeverelyBrokenRepo(t *testing.T) { - t.Parallel() ctx := testhelper.Context(t) - - _, repo, repoPath, client := setupRepositoryService(t, ctx) - - // This makes the repo severely broken so that `git` does not identify it as a - // proper repo. - require.NoError(t, os.RemoveAll(filepath.Join(repoPath, "objects"))) - fd, err := os.Create(filepath.Join(repoPath, "objects")) - require.NoError(t, err) - require.NoError(t, fd.Close()) - - c, err := client.Fsck(ctx, &gitalypb.FsckRequest{Repository: repo}) - assert.NoError(t, err) - assert.NotNil(t, c) - assert.Contains(t, strings.ToLower(string(c.GetError())), "not a git repository") -} - -func TestFsckFailureSlightlyBrokenRepo(t *testing.T) { - t.Parallel() - ctx := testhelper.Context(t) - - _, repo, repoPath, client := setupRepositoryService(t, ctx) - - // This makes the repo slightly broken so that `git` still identify it as a - // proper repo, but `fsck` complains about broken refs... - require.NoError(t, os.RemoveAll(filepath.Join(repoPath, "objects", "pack"))) - - c, err := client.Fsck(ctx, &gitalypb.FsckRequest{Repository: repo}) - assert.NoError(t, err) - assert.NotNil(t, c) - assert.NotEmpty(t, string(c.GetError())) - assert.Contains(t, string(c.GetError()), "error: HEAD: invalid sha1 pointer") -} - -func TestFsck_validate(t *testing.T) { - t.Parallel() - ctx := testhelper.Context(t) - - _, client := setupRepositoryServiceWithoutRepo(t) - - _, err := client.Fsck(ctx, &gitalypb.FsckRequest{Repository: nil}) - msg := testhelper.GitalyOrPraefect("empty Repository", "repo scoped: empty Repository") - testhelper.RequireGrpcError(t, status.Error(codes.InvalidArgument, msg), err) + cfg, client := setupRepositoryServiceWithoutRepo(t) + + type setupData struct { + repo *gitalypb.Repository + expectedErr error + expectedResponse *gitalypb.FsckResponse + } + + for _, tc := range []struct { + desc string + setup func(t *testing.T) setupData + }{ + { + desc: "request is missing repository", + setup: func(t *testing.T) setupData { + return setupData{ + repo: nil, + expectedErr: structerr.NewInvalidArgument( + testhelper.GitalyOrPraefect( + "empty Repository", + "repo scoped: empty Repository", + ), + ), + } + }, + }, + { + desc: "empty repository", + setup: func(t *testing.T) setupData { + repo, _ := gittest.CreateRepository(t, ctx, cfg) + + return setupData{ + repo: repo, + expectedResponse: &gitalypb.FsckResponse{}, + } + }, + }, + { + desc: "repository with commit", + setup: func(t *testing.T) setupData { + repo, repoPath := gittest.CreateRepository(t, ctx, cfg) + gittest.WriteCommit(t, cfg, repoPath, gittest.WithBranch("main")) + + return setupData{ + repo: repo, + expectedResponse: &gitalypb.FsckResponse{}, + } + }, + }, + { + desc: "invalid object directory", + setup: func(t *testing.T) setupData { + repo, repoPath := gittest.CreateRepository(t, ctx, cfg) + + // This makes the repo severely broken so that `git` does not + // identify it as a proper repository anymore. + require.NoError(t, os.RemoveAll(filepath.Join(repoPath, "objects"))) + require.NoError(t, os.WriteFile(filepath.Join(repoPath, "objects"), nil, 0o644)) + + return setupData{ + repo: repo, + expectedResponse: &gitalypb.FsckResponse{ + Error: []byte(fmt.Sprintf("fatal: not a git repository: '%s'\n", repoPath)), + }, + } + }, + }, + { + desc: "missing objects", + setup: func(t *testing.T) setupData { + repo, repoPath := gittest.CreateRepository(t, ctx, cfg) + + // We write a commit and repack it into a packfile... + commitID := gittest.WriteCommit(t, cfg, repoPath, gittest.WithBranch("main")) + gittest.Exec(t, cfg, "-C", repoPath, "repack", "-Ad") + // ... but then subsequently delete all the packfiles. This should + // lead to git-fsck(1) complaining about missing objects. + require.NoError(t, os.RemoveAll(filepath.Join(repoPath, "objects", "pack"))) + + expectedErr := strings.Join([]string{ + "error: refs/heads/main: invalid sha1 pointer " + commitID.String(), + "error: HEAD: invalid sha1 pointer " + commitID.String(), + "notice: No default references", + }, "\n") + "\n" + + return setupData{ + repo: repo, + expectedResponse: &gitalypb.FsckResponse{ + Error: []byte(expectedErr), + }, + } + }, + }, + } { + tc := tc + + t.Run(tc.desc, func(t *testing.T) { + t.Parallel() + + setupData := tc.setup(t) + + response, err := client.Fsck(ctx, &gitalypb.FsckRequest{ + Repository: setupData.repo, + }) + testhelper.RequireGrpcError(t, setupData.expectedErr, err) + testhelper.ProtoEqual(t, setupData.expectedResponse, response) + }) + } } -- GitLab From 7196b1cc17fa9e4b2673eaa9e7ef8d63dd2f66b3 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 3 Jan 2023 12:55:56 +0100 Subject: [PATCH 3/5] repository: Add more tests for `Fsck` RPC Add some more tests for the `Fsck` RPC to more thoroughly assert its behaviour. --- .../gitaly/service/repository/fsck_test.go | 52 +++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/internal/gitaly/service/repository/fsck_test.go b/internal/gitaly/service/repository/fsck_test.go index 174aec129f..932bac8450 100644 --- a/internal/gitaly/service/repository/fsck_test.go +++ b/internal/gitaly/service/repository/fsck_test.go @@ -105,6 +105,58 @@ func TestFsck(t *testing.T) { "notice: No default references", }, "\n") + "\n" + return setupData{ + repo: repo, + expectedResponse: &gitalypb.FsckResponse{ + Error: []byte(expectedErr), + }, + } + }, + }, + { + desc: "dangling blob", + setup: func(t *testing.T) setupData { + repo, repoPath := gittest.CreateRepository(t, ctx, cfg) + + // A dangling blob should not cause the consistency check to fail as + // it is totally expected that repositories accumulate unreachable + // objects. + gittest.WriteBlob(t, cfg, repoPath, []byte("content")) + + return setupData{ + repo: repo, + expectedResponse: &gitalypb.FsckResponse{}, + } + }, + }, + { + desc: "invalid tree object", + setup: func(t *testing.T) setupData { + repo, repoPath := gittest.CreateRepository(t, ctx, cfg) + + // Create the default branch so that git-fsck(1) doesn't complain + // about it missing. + gittest.WriteCommit(t, cfg, repoPath, gittest.WithBranch("main")) + + // Write a tree object that has the same path twice. We use this as + // an example to verify that git-fsck(1) indeed also verifies + // objects as expected just because writing trees with two entries + // is so easy. + // + // Furthermore, the tree is also dangling on purpose to verify that + // we don't complain about it dangling. + treeID := gittest.WriteTree(t, cfg, repoPath, []gittest.TreeEntry{ + {Path: "duplicate", Mode: "100644", Content: "foo"}, + {Path: "duplicate", Mode: "100644", Content: "bar"}, + }) + + expectedErr := strings.Join([]string{ + // This first error is a bug: we shouldn't complain about + // the dangling tree. + "dangling tree " + treeID.String(), + "error in tree " + treeID.String() + ": duplicateEntries: contains duplicate file entries", + }, "\n") + "\n" + return setupData{ repo: repo, expectedResponse: &gitalypb.FsckResponse{ -- GitLab From aa9f210c73eea36dba5561477932c08b7c855578 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 3 Jan 2023 12:57:46 +0100 Subject: [PATCH 4/5] repository: Fix intermingled stdout/stderr output for git-fsck(1) We use two different buffers when spawning git-fsck(1) for its stdout and stderr. This is inefficient as we now need to handle the overhead for two buffers and need to spawn two Goroutines for them. Furthermore, if git-fsck(1) was to print alternately to stdout and stderr, the order of these messages gets lost as we used to just concatenate stdout and stderr. Fix this by using a single buffer for both standard streams. While at it, convert the code to use a `strings.Builder` instead of a `bytes.Buffer`, which is more efficient. --- internal/gitaly/service/repository/fsck.go | 10 +++++----- internal/gitaly/service/repository/fsck_test.go | 6 +++--- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/internal/gitaly/service/repository/fsck.go b/internal/gitaly/service/repository/fsck.go index 9b5f0a9e92..6ccd7499b4 100644 --- a/internal/gitaly/service/repository/fsck.go +++ b/internal/gitaly/service/repository/fsck.go @@ -1,8 +1,8 @@ package repository import ( - "bytes" "context" + "strings" "gitlab.com/gitlab-org/gitaly/v15/internal/git" "gitlab.com/gitlab-org/gitaly/v15/internal/gitaly/service" @@ -16,18 +16,18 @@ func (s *server) Fsck(ctx context.Context, req *gitalypb.FsckRequest) (*gitalypb return nil, structerr.NewInvalidArgument("%w", err) } - var stdout, stderr bytes.Buffer + var output strings.Builder cmd, err := s.gitCmdFactory.New(ctx, repository, git.Command{Name: "fsck"}, - git.WithStdout(&stdout), - git.WithStderr(&stderr), + git.WithStdout(&output), + git.WithStderr(&output), ) if err != nil { return nil, err } if err = cmd.Wait(); err != nil { - return &gitalypb.FsckResponse{Error: append(stdout.Bytes(), stderr.Bytes()...)}, nil + return &gitalypb.FsckResponse{Error: []byte(output.String())}, nil } return &gitalypb.FsckResponse{}, nil diff --git a/internal/gitaly/service/repository/fsck_test.go b/internal/gitaly/service/repository/fsck_test.go index 932bac8450..303ad89342 100644 --- a/internal/gitaly/service/repository/fsck_test.go +++ b/internal/gitaly/service/repository/fsck_test.go @@ -151,10 +151,10 @@ func TestFsck(t *testing.T) { }) expectedErr := strings.Join([]string{ - // This first error is a bug: we shouldn't complain about - // the dangling tree. - "dangling tree " + treeID.String(), "error in tree " + treeID.String() + ": duplicateEntries: contains duplicate file entries", + // This error is a bug: we shouldn't complain about the + // dangling tree. + "dangling tree " + treeID.String(), }, "\n") + "\n" return setupData{ -- GitLab From fb830f5f01311609c01590ddf04a22a3d33cd36d Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 3 Jan 2023 13:00:21 +0100 Subject: [PATCH 5/5] repository: Disable misleading dangling checks in git-fsck(1) The `Fsck` RPC uses git-fsck(1) to inform the user about repository corruption. In case the Git command returns with a non-zero exit code the RPC will return both its stdout and stderr to the caller via the `Error` field that is part of the `FsckResponse`. By default, git-fsck(1) will not only print information about repository corruption though, but also about dangling objects that aren't reachable via any reference. But as it is totally expected that repositories have dangling objects, it is only distracting from the actual error messages that the repository owner would care about. Stop git-fsck(1) from checking for dangling objects to improve the signal to noise ratio. While at it, let's also explicitly ask it to not print any progress -- while it wouldn't do that anyway because it is not printing to a TTY, it stops the next reader of the code from wondering whether it would ever do that. Changelog: improved --- internal/gitaly/service/repository/fsck.go | 12 +++++++++++- internal/gitaly/service/repository/fsck_test.go | 7 +------ 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/internal/gitaly/service/repository/fsck.go b/internal/gitaly/service/repository/fsck.go index 6ccd7499b4..9561339a7f 100644 --- a/internal/gitaly/service/repository/fsck.go +++ b/internal/gitaly/service/repository/fsck.go @@ -18,7 +18,17 @@ func (s *server) Fsck(ctx context.Context, req *gitalypb.FsckRequest) (*gitalypb var output strings.Builder cmd, err := s.gitCmdFactory.New(ctx, repository, - git.Command{Name: "fsck"}, + git.Command{ + Name: "fsck", + Flags: []git.Option{ + // We don't care about any progress bars. + git.Flag{Name: "--no-progress"}, + // We don't want to get warning about dangling objects. It is + // expected that repositories have these and makes the signal to + // noise ratio a lot worse. + git.Flag{Name: "--no-dangling"}, + }, + }, git.WithStdout(&output), git.WithStderr(&output), ) diff --git a/internal/gitaly/service/repository/fsck_test.go b/internal/gitaly/service/repository/fsck_test.go index 303ad89342..6bf3373886 100644 --- a/internal/gitaly/service/repository/fsck_test.go +++ b/internal/gitaly/service/repository/fsck_test.go @@ -150,12 +150,7 @@ func TestFsck(t *testing.T) { {Path: "duplicate", Mode: "100644", Content: "bar"}, }) - expectedErr := strings.Join([]string{ - "error in tree " + treeID.String() + ": duplicateEntries: contains duplicate file entries", - // This error is a bug: we shouldn't complain about the - // dangling tree. - "dangling tree " + treeID.String(), - }, "\n") + "\n" + expectedErr := "error in tree " + treeID.String() + ": duplicateEntries: contains duplicate file entries\n" return setupData{ repo: repo, -- GitLab