From a35d007c05c6effe440c840a28808f894abf7ca8 Mon Sep 17 00:00:00 2001 From: John Cai Date: Wed, 23 Mar 2022 16:42:13 -0400 Subject: [PATCH 1/2] localrepo: Add Size() for repo Add a Size() method for repo that calculates disk usage of objects. The way we currently calculate repository size is to use du(1), which is a quick and dirty way to get the total storage of a repository. However, this ends up varying depending on the __how__ the objects are stored by Git, which is affected by repository maintenance strategies and can vary. A more consistent way to calculate storage is to get a total of number of reachable objects with git rev-list --all --objects --disk-usage --- internal/git/localrepo/repo.go | 29 ++++++ internal/git/localrepo/repo_test.go | 136 ++++++++++++++++++++++++++++ 2 files changed, 165 insertions(+) diff --git a/internal/git/localrepo/repo.go b/internal/git/localrepo/repo.go index 806b615530..7d7f3e4094 100644 --- a/internal/git/localrepo/repo.go +++ b/internal/git/localrepo/repo.go @@ -1,8 +1,11 @@ package localrepo import ( + "bytes" "context" "fmt" + "strconv" + "strings" "testing" "github.com/stretchr/testify/require" @@ -90,3 +93,29 @@ func errorWithStderr(err error, stderr []byte) error { } return fmt.Errorf("%w, stderr: %q", err, stderr) } + +// Size calculates the size of all reachable objects in bytes +func (repo *Repo) Size(ctx context.Context) (int64, error) { + var stdout bytes.Buffer + if err := repo.ExecAndWait(ctx, + git.SubCmd{ + Name: "rev-list", + Flags: []git.Option{ + git.Flag{Name: "--all"}, + git.Flag{Name: "--objects"}, + git.Flag{Name: "--use-bitmap-index"}, + git.Flag{Name: "--disk-usage"}, + }, + }, + git.WithStdout(&stdout), + ); err != nil { + return -1, err + } + + size, err := strconv.ParseInt(strings.TrimSuffix(stdout.String(), "\n"), 10, 64) + if err != nil { + return -1, err + } + + return size, nil +} diff --git a/internal/git/localrepo/repo_test.go b/internal/git/localrepo/repo_test.go index 356ce2ce53..c305519c85 100644 --- a/internal/git/localrepo/repo_test.go +++ b/internal/git/localrepo/repo_test.go @@ -1,13 +1,19 @@ package localrepo import ( + "bytes" "context" + "os" + "path/filepath" "testing" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" "gitlab.com/gitlab-org/gitaly/v14/internal/git" "gitlab.com/gitlab-org/gitaly/v14/internal/git/catfile" "gitlab.com/gitlab-org/gitaly/v14/internal/git/gittest" "gitlab.com/gitlab-org/gitaly/v14/internal/gitaly/config" + "gitlab.com/gitlab-org/gitaly/v14/internal/testhelper" "gitlab.com/gitlab-org/gitaly/v14/internal/testhelper/testcfg" "gitlab.com/gitlab-org/gitaly/v14/proto/go/gitalypb" ) @@ -35,3 +41,133 @@ func TestRepo(t *testing.T) { return New(config.NewLocator(cfg), gitCmdFactory, catfileCache, pbRepo), repoPath }) } + +func TestSize(t *testing.T) { + cfg := testcfg.Build(t) + gitCmdFactory := gittest.NewCommandFactory(t, cfg) + catfileCache := catfile.NewCache(cfg) + t.Cleanup(catfileCache.Stop) + + testCases := []struct { + desc string + setup func(repoPath string, t *testing.T) + expectedSize int64 + }{ + { + desc: "empty repository", + expectedSize: 0, + }, + { + desc: "one committed file", + setup: func(repoPath string, t *testing.T) { + require.NoError(t, os.WriteFile( + filepath.Join(repoPath, "file"), + bytes.Repeat([]byte("a"), 1000), + 0o644, + )) + + cmd := gittest.NewCommand(t, cfg, "-C", repoPath, "add", "file") + require.NoError(t, cmd.Run()) + cmd = gittest.NewCommand(t, cfg, "-C", repoPath, "commit", "-m", "initial") + require.NoError(t, cmd.Run()) + }, + expectedSize: 202, + }, + { + desc: "one large loose blob", + setup: func(repoPath string, t *testing.T) { + require.NoError(t, os.WriteFile( + filepath.Join(repoPath, "file"), + bytes.Repeat([]byte("a"), 1000), + 0o644, + )) + + cmd := gittest.NewCommand(t, cfg, "-C", repoPath, "checkout", "-b", "branch-a") + require.NoError(t, cmd.Run()) + cmd = gittest.NewCommand(t, cfg, "-C", repoPath, "add", "file") + require.NoError(t, cmd.Run()) + cmd = gittest.NewCommand(t, cfg, "-C", repoPath, "commit", "-m", "initial") + require.NoError(t, cmd.Run()) + cmd = gittest.NewCommand(t, cfg, "-C", repoPath, "update-ref", "-d", "refs/heads/branch-a") + require.NoError(t, cmd.Run()) + }, + expectedSize: 0, + }, + { + desc: "modification to blob without repack", + setup: func(repoPath string, t *testing.T) { + require.NoError(t, os.WriteFile( + filepath.Join(repoPath, "file"), + bytes.Repeat([]byte("a"), 1000), + 0o644, + )) + + cmd := gittest.NewCommand(t, cfg, "-C", repoPath, "add", "file") + require.NoError(t, cmd.Run()) + cmd = gittest.NewCommand(t, cfg, "-C", repoPath, "commit", "-m", "initial") + require.NoError(t, cmd.Run()) + + f, err := os.OpenFile( + filepath.Join(repoPath, "file"), + os.O_APPEND|os.O_WRONLY, + 0o644) + require.NoError(t, err) + defer f.Close() + _, err = f.WriteString("a") + assert.NoError(t, err) + + cmd = gittest.NewCommand(t, cfg, "-C", repoPath, "commit", "-am", "modification") + require.NoError(t, cmd.Run()) + }, + expectedSize: 437, + }, + { + desc: "modification to blob after repack", + setup: func(repoPath string, t *testing.T) { + require.NoError(t, os.WriteFile( + filepath.Join(repoPath, "file"), + bytes.Repeat([]byte("a"), 1000), + 0o644, + )) + + cmd := gittest.NewCommand(t, cfg, "-C", repoPath, "add", "file") + require.NoError(t, cmd.Run()) + cmd = gittest.NewCommand(t, cfg, "-C", repoPath, "commit", "-m", "initial") + require.NoError(t, cmd.Run()) + + f, err := os.OpenFile( + filepath.Join(repoPath, "file"), + os.O_APPEND|os.O_WRONLY, + 0o644) + require.NoError(t, err) + defer f.Close() + _, err = f.WriteString("a") + assert.NoError(t, err) + + cmd = gittest.NewCommand(t, cfg, "-C", repoPath, "commit", "-am", "modification") + require.NoError(t, cmd.Run()) + + cmd = gittest.NewCommand(t, cfg, "-C", repoPath, "repack", "-a", "-d") + require.NoError(t, cmd.Run()) + }, + expectedSize: 391, + }, + } + + for _, tc := range testCases { + t.Run(tc.desc, func(t *testing.T) { + pbRepo, repoPath := gittest.InitRepo(t, cfg, cfg.Storages[0], gittest.InitRepoOpts{ + WithWorktree: true, + }) + repo := New(config.NewLocator(cfg), gitCmdFactory, catfileCache, pbRepo) + if tc.setup != nil { + tc.setup(repoPath, t) + } + + ctx := testhelper.Context(t) + size, err := repo.Size(ctx) + require.NoError(t, err) + assert.Equal(t, tc.expectedSize, size) + }) + } +} -- GitLab From 0d28358d259724bc71b1833ffb877f73852b197c Mon Sep 17 00:00:00 2001 From: John Cai Date: Thu, 24 Mar 2022 16:11:47 -0400 Subject: [PATCH 2/2] repository: Use Size() to calculate repo size behind feature flag In the previous commit, we added a Size() method in localrepo that calls git rev-list --all --objects --disk-usage to calculate the size of the repository. Add a feature flag that, when toggled on, will cause RepositorySize to use this new way of calculating repository size. Changelog: added --- internal/gitaly/service/repository/size.go | 23 +++++++++++--- .../gitaly/service/repository/size_test.go | 31 ++++++++++++++----- .../featureflag/ff_repo_size_revlist.go | 5 +++ 3 files changed, 47 insertions(+), 12 deletions(-) create mode 100644 internal/metadata/featureflag/ff_repo_size_revlist.go diff --git a/internal/gitaly/service/repository/size.go b/internal/gitaly/service/repository/size.go index 261bb0417f..4d43dc2b19 100644 --- a/internal/gitaly/service/repository/size.go +++ b/internal/gitaly/service/repository/size.go @@ -10,16 +10,31 @@ import ( "github.com/grpc-ecosystem/go-grpc-middleware/logging/logrus/ctxlogrus" "gitlab.com/gitlab-org/gitaly/v14/internal/command" + "gitlab.com/gitlab-org/gitaly/v14/internal/metadata/featureflag" "gitlab.com/gitlab-org/gitaly/v14/proto/go/gitalypb" ) func (s *server) RepositorySize(ctx context.Context, in *gitalypb.RepositorySizeRequest) (*gitalypb.RepositorySizeResponse, error) { - path, err := s.locator.GetPath(in.Repository) - if err != nil { - return nil, err + repo := s.localrepo(in.GetRepository()) + var size int64 + var err error + + if featureflag.RevlistForRepoSize.IsEnabled(ctx) { + size, err = repo.Size(ctx) + if err != nil { + return nil, err + } + // return the size in kb to remain consistent + size = size / 1024 + } else { + path, err := repo.Path() + if err != nil { + return nil, err + } + size = getPathSize(ctx, path) } - return &gitalypb.RepositorySizeResponse{Size: getPathSize(ctx, path)}, nil + return &gitalypb.RepositorySizeResponse{Size: size}, nil } func (s *server) GetObjectDirectorySize(ctx context.Context, in *gitalypb.GetObjectDirectorySizeRequest) (*gitalypb.GetObjectDirectorySizeResponse, error) { diff --git a/internal/gitaly/service/repository/size_test.go b/internal/gitaly/service/repository/size_test.go index 5aa51da731..b9c7527239 100644 --- a/internal/gitaly/service/repository/size_test.go +++ b/internal/gitaly/service/repository/size_test.go @@ -1,12 +1,14 @@ package repository import ( + "context" "testing" "github.com/stretchr/testify/require" "gitlab.com/gitlab-org/gitaly/v14/internal/git/gittest" "gitlab.com/gitlab-org/gitaly/v14/internal/git/quarantine" "gitlab.com/gitlab-org/gitaly/v14/internal/gitaly/config" + "gitlab.com/gitlab-org/gitaly/v14/internal/metadata/featureflag" "gitlab.com/gitlab-org/gitaly/v14/internal/testhelper" "gitlab.com/gitlab-org/gitaly/v14/proto/go/gitalypb" "google.golang.org/grpc/codes" @@ -17,10 +19,13 @@ import ( // repository, even in optimally packed state, is greater than this. const testRepoMinSizeKB = 10000 -func TestSuccessfulRepositorySizeRequest(t *testing.T) { +func TestRepositorySize_SuccessfulRequest(t *testing.T) { t.Parallel() + testhelper.NewFeatureSets(featureflag.RevlistForRepoSize). + Run(t, testSuccessfulRepositorySizeRequest) +} - ctx := testhelper.Context(t) +func testSuccessfulRepositorySizeRequest(t *testing.T, ctx context.Context) { _, repo, _, client := setupRepositoryService(ctx, t) request := &gitalypb.RepositorySizeRequest{Repository: repo} @@ -33,8 +38,13 @@ func TestSuccessfulRepositorySizeRequest(t *testing.T) { ) } -func TestFailedRepositorySizeRequest(t *testing.T) { +func TestRepositorySixe_FailedRequest(t *testing.T) { t.Parallel() + testhelper.NewFeatureSets(featureflag.RevlistForRepoSize). + Run(t, testFailedRepositorySizeRequest) +} + +func testFailedRepositorySizeRequest(t *testing.T, ctx context.Context) { _, client := setupRepositoryServiceWithoutRepo(t) testCases := []struct { @@ -52,17 +62,19 @@ func TestFailedRepositorySizeRequest(t *testing.T) { request := &gitalypb.RepositorySizeRequest{ Repository: testCase.repo, } - ctx := testhelper.Context(t) _, err := client.RepositorySize(ctx, request) testhelper.RequireGrpcCode(t, err, codes.InvalidArgument) }) } } -func TestSuccessfulGetObjectDirectorySizeRequest(t *testing.T) { +func TestRepositorySize_SuccessfulGetObjectDirectorySizeRequest(t *testing.T) { t.Parallel() + testhelper.NewFeatureSets(featureflag.RevlistForRepoSize). + Run(t, testSuccessfulGetObjectDirectorySizeRequest) +} - ctx := testhelper.Context(t) +func testSuccessfulGetObjectDirectorySizeRequest(t *testing.T, ctx context.Context) { _, repo, _, client := setupRepositoryService(ctx, t) repo.GitObjectDirectory = "objects/" @@ -76,12 +88,15 @@ func TestSuccessfulGetObjectDirectorySizeRequest(t *testing.T) { ) } -func TestGetObjectDirectorySize_quarantine(t *testing.T) { +func TestRepositorySize_GetObjectDirectorySize_quarantine(t *testing.T) { t.Parallel() + testhelper.NewFeatureSets(featureflag.RevlistForRepoSize). + Run(t, testGetObjectDirectorySizeQuarantine) +} +func testGetObjectDirectorySizeQuarantine(t *testing.T, ctx context.Context) { cfg, client := setupRepositoryServiceWithoutRepo(t) locator := config.NewLocator(cfg) - ctx := testhelper.Context(t) t.Run("quarantined repo", func(t *testing.T) { repo, _ := gittest.CreateRepository(ctx, t, cfg, gittest.CreateRepositoryConfig{ diff --git a/internal/metadata/featureflag/ff_repo_size_revlist.go b/internal/metadata/featureflag/ff_repo_size_revlist.go new file mode 100644 index 0000000000..8be87e482b --- /dev/null +++ b/internal/metadata/featureflag/ff_repo_size_revlist.go @@ -0,0 +1,5 @@ +package featureflag + +// RevlistForRepoSize enables the RepositorySize RPC to use git rev-list to +// calculate the disk usage of the repository. +var RevlistForRepoSize = NewFeatureFlag("revlist_for_repo_size", false) -- GitLab