From cbff66e68b7cfd4b916d40095b02ae613d31fa59 Mon Sep 17 00:00:00 2001 From: Jacob Vosmaer Date: Fri, 7 Jun 2019 18:46:10 +0200 Subject: [PATCH 1/5] Start writing test with dangling objects --- internal/git/objectpool/fetch_test.go | 59 +++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 internal/git/objectpool/fetch_test.go diff --git a/internal/git/objectpool/fetch_test.go b/internal/git/objectpool/fetch_test.go new file mode 100644 index 0000000000..8c1afca158 --- /dev/null +++ b/internal/git/objectpool/fetch_test.go @@ -0,0 +1,59 @@ +package objectpool + +import ( + "crypto/rand" + "encoding/hex" + "fmt" + "io" + "strings" + "testing" + + "github.com/stretchr/testify/require" + "gitlab.com/gitlab-org/gitaly/internal/testhelper" +) + +func TestFetchFromOriginDangling(t *testing.T) { + source, _, cleanup := testhelper.NewTestRepo(t) + defer cleanup() + + pool, err := NewObjectPool(source.StorageName, testhelper.NewTestObjectPoolName(t)) + require.NoError(t, err) + + ctx, cancel := testhelper.Context() + defer cancel() + + require.NoError(t, pool.FetchFromOrigin(ctx, source), "seed pool") + + const ( + existingTree = "07f8147e8e73aab6c935c296e8cdc5194dee729b" + existingCommit = "7975be0116940bf2ad4321f79d02a55c5f7779aa" + existingBlob = "c60514b6d3d6bf4bec1030f70026e34dfbd69ad5" + ) + + nonceBytes := make([]byte, 4) + _, err = io.ReadFull(rand.Reader, nonceBytes) + require.NoError(t, err) + nonce := hex.EncodeToString(nonceBytes) + + baseArgs := []string{"-C", pool.FullPath()} + newBlobArgs := append(baseArgs, "hash-object", "-t", "blob", "-w", "--stdin") + newBlob := string(testhelper.MustRunCommand(t, strings.NewReader(nonce), "git", newBlobArgs...)) + t.Log(newBlob) + + newTreeArgs := append(baseArgs, "mktree") + newTreeStdin := strings.NewReader(fmt.Sprintf("100644 blob %s %s\n", existingBlob, nonce)) + newTree := string(testhelper.MustRunCommand(t, newTreeStdin, "git", newTreeArgs...)) + t.Log(newTree) + + newCommitArgs := append(baseArgs, "commit-tree", existingTree) + newCommit := string(testhelper.MustRunCommand(t, strings.NewReader(nonce), "git", newCommitArgs...)) + t.Log(newCommit) + + newTagName := "tag-" + nonce + newTagArgs := append(baseArgs, "tag", "-m", "msg", "-a", newTagName, existingCommit) + testhelper.MustRunCommand(t, strings.NewReader(nonce), "git", newTagArgs...) + newTag := string(testhelper.MustRunCommand(t, nil, "git", append(baseArgs, "rev-parse", newTagName)...)) + testhelper.MustRunCommand(t, nil, "git", append(baseArgs, "update-ref", "-d", "refs/tags/"+newTagName)...) + + t.Log(newTag) +} -- GitLab From 84270a3bfcf16d9cf728a34438ce37fd1fa2ff4a Mon Sep 17 00:00:00 2001 From: Jacob Vosmaer Date: Wed, 12 Jun 2019 18:53:01 +0200 Subject: [PATCH 2/5] Prevent dangling objects in object pool --- internal/git/objectpool/fetch.go | 54 +++++++++++++++++++++++++++ internal/git/objectpool/fetch_test.go | 33 ++++++++++++---- internal/git/updateref/updateref.go | 6 ++- 3 files changed, 84 insertions(+), 9 deletions(-) diff --git a/internal/git/objectpool/fetch.go b/internal/git/objectpool/fetch.go index 2b967ad705..88648a7cbc 100644 --- a/internal/git/objectpool/fetch.go +++ b/internal/git/objectpool/fetch.go @@ -3,10 +3,14 @@ package objectpool import ( "bufio" "context" + "fmt" + "strings" "gitlab.com/gitlab-org/gitaly-proto/go/gitalypb" "gitlab.com/gitlab-org/gitaly/internal/command" "gitlab.com/gitlab-org/gitaly/internal/git" + "gitlab.com/gitlab-org/gitaly/internal/git/repository" + "gitlab.com/gitlab-org/gitaly/internal/git/updateref" "gitlab.com/gitlab-org/gitaly/internal/helper" ) @@ -64,5 +68,55 @@ func (o *ObjectPool) FetchFromOrigin(ctx context.Context, origin *gitalypb.Repos if err := fetchCmd.Wait(); err != nil { return err } + + if err := rescueDanglingObjects(ctx, o); err != nil { + return err + } + + return nil +} + +const danglingObjectNamespace = "refs/dangling" + +func rescueDanglingObjects(ctx context.Context, repo repository.GitRepo) error { + fsck, err := git.Command(ctx, repo, "fsck", "--connectivity-only", "--dangling") + if err != nil { + return err + } + + updater, err := updateref.New(ctx, repo) + if err != nil { + return err + } + + scanner := bufio.NewScanner(fsck) + for scanner.Scan() { + split := strings.SplitN(scanner.Text(), " ", 3) + if len(split) != 3 { + continue + } + + if split[0] != "dangling" { + continue + } + + ref := danglingObjectNamespace + "/" + split[2] + if err := updater.Create(ref, split[2]); err != nil { + return err + } + } + + if err := scanner.Err(); err != nil { + return err + } + + if err := fsck.Wait(); err != nil { + return fmt.Errorf("git fsck: %v", err) + } + + if err := updater.Wait(); err != nil { + return err + } + return nil } diff --git a/internal/git/objectpool/fetch_test.go b/internal/git/objectpool/fetch_test.go index 8c1afca158..91db52854b 100644 --- a/internal/git/objectpool/fetch_test.go +++ b/internal/git/objectpool/fetch_test.go @@ -9,6 +9,7 @@ import ( "testing" "github.com/stretchr/testify/require" + "gitlab.com/gitlab-org/gitaly/internal/helper/text" "gitlab.com/gitlab-org/gitaly/internal/testhelper" ) @@ -37,23 +38,39 @@ func TestFetchFromOriginDangling(t *testing.T) { baseArgs := []string{"-C", pool.FullPath()} newBlobArgs := append(baseArgs, "hash-object", "-t", "blob", "-w", "--stdin") - newBlob := string(testhelper.MustRunCommand(t, strings.NewReader(nonce), "git", newBlobArgs...)) - t.Log(newBlob) + newBlob := text.ChompBytes(testhelper.MustRunCommand(t, strings.NewReader(nonce), "git", newBlobArgs...)) newTreeArgs := append(baseArgs, "mktree") newTreeStdin := strings.NewReader(fmt.Sprintf("100644 blob %s %s\n", existingBlob, nonce)) - newTree := string(testhelper.MustRunCommand(t, newTreeStdin, "git", newTreeArgs...)) - t.Log(newTree) + newTree := text.ChompBytes(testhelper.MustRunCommand(t, newTreeStdin, "git", newTreeArgs...)) newCommitArgs := append(baseArgs, "commit-tree", existingTree) - newCommit := string(testhelper.MustRunCommand(t, strings.NewReader(nonce), "git", newCommitArgs...)) - t.Log(newCommit) + newCommit := text.ChompBytes(testhelper.MustRunCommand(t, strings.NewReader(nonce), "git", newCommitArgs...)) newTagName := "tag-" + nonce newTagArgs := append(baseArgs, "tag", "-m", "msg", "-a", newTagName, existingCommit) testhelper.MustRunCommand(t, strings.NewReader(nonce), "git", newTagArgs...) - newTag := string(testhelper.MustRunCommand(t, nil, "git", append(baseArgs, "rev-parse", newTagName)...)) + newTag := text.ChompBytes(testhelper.MustRunCommand(t, nil, "git", append(baseArgs, "rev-parse", newTagName)...)) testhelper.MustRunCommand(t, nil, "git", append(baseArgs, "update-ref", "-d", "refs/tags/"+newTagName)...) - t.Log(newTag) + fsckBefore := testhelper.MustRunCommand(t, nil, "git", append(baseArgs, "fsck", "--connectivity-only", "--dangling")...) + fsckBeforeLines := strings.Split(string(fsckBefore), "\n") + + for _, l := range []string{ + fmt.Sprintf("dangling blob %s", newBlob), + fmt.Sprintf("dangling tree %s", newTree), + fmt.Sprintf("dangling commit %s", newCommit), + fmt.Sprintf("dangling tag %s", newTag), + } { + require.Contains(t, fsckBeforeLines, l) + } + + require.NoError(t, pool.FetchFromOrigin(ctx, source), "second fetch") + + refsArgs := append(baseArgs, "for-each-ref", "--format=%(refname) %(objectname)") + refsAfter := testhelper.MustRunCommand(t, nil, "git", refsArgs...) + refsAfterLines := strings.Split(string(refsAfter), "\n") + for _, id := range []string{newBlob, newTree, newCommit, newTag} { + require.Contains(t, refsAfterLines, fmt.Sprintf("refs/dangling/%s %s", id, id)) + } } diff --git a/internal/git/updateref/updateref.go b/internal/git/updateref/updateref.go index 01376be527..7df494b181 100644 --- a/internal/git/updateref/updateref.go +++ b/internal/git/updateref/updateref.go @@ -53,5 +53,9 @@ func (u *Updater) Delete(ref string) error { // Wait applies the commands specified in other calls to the Updater func (u *Updater) Wait() error { - return u.cmd.Wait() + if err := u.cmd.Wait(); err != nil { + return fmt.Errorf("git update-ref: %v", err) + } + + return nil } -- GitLab From ce56d7d08c247a0fe1adc5a82942dc8383367a76 Mon Sep 17 00:00:00 2001 From: Jacob Vosmaer Date: Thu, 13 Jun 2019 11:46:28 +0200 Subject: [PATCH 3/5] Add comment --- internal/git/objectpool/fetch.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/internal/git/objectpool/fetch.go b/internal/git/objectpool/fetch.go index 88648a7cbc..30df30ba1b 100644 --- a/internal/git/objectpool/fetch.go +++ b/internal/git/objectpool/fetch.go @@ -78,6 +78,14 @@ func (o *ObjectPool) FetchFromOrigin(ctx context.Context, origin *gitalypb.Repos const danglingObjectNamespace = "refs/dangling" +// rescueDanglingObjects creates refs for all dangling objects if finds +// with `git fsck`, which converts those objects from "dangling" to +// "not-dangling". This guards against any object ever being deleted from +// a pool repository. This is a defense in depth against accidental use +// of `git prune`, which could remove Git objects that a pool member +// relies on. There is currently no way for us to reliably determine if +// an object is still used anywhere, so the only safe thing to do is to +// assume that every object _is_ used. func rescueDanglingObjects(ctx context.Context, repo repository.GitRepo) error { fsck, err := git.Command(ctx, repo, "fsck", "--connectivity-only", "--dangling") if err != nil { -- GitLab From 2c2611995487bc1efa72c92007ebc9311776cde0 Mon Sep 17 00:00:00 2001 From: Jacob Vosmaer Date: Thu, 13 Jun 2019 11:47:17 +0200 Subject: [PATCH 4/5] changelog --- changelogs/unreleased/jv-rescue-dangling.yml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 changelogs/unreleased/jv-rescue-dangling.yml diff --git a/changelogs/unreleased/jv-rescue-dangling.yml b/changelogs/unreleased/jv-rescue-dangling.yml new file mode 100644 index 0000000000..b0c1e859e6 --- /dev/null +++ b/changelogs/unreleased/jv-rescue-dangling.yml @@ -0,0 +1,5 @@ +--- +title: Un-dangle dangling objects in object pools +merge_request: 1297 +author: +type: changed -- GitLab From b8533f80cbd123f2cb8375362d27211cbe7d4596 Mon Sep 17 00:00:00 2001 From: Jacob Vosmaer Date: Thu, 13 Jun 2019 11:54:56 +0200 Subject: [PATCH 5/5] Add comments to test --- internal/git/objectpool/fetch_test.go | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/internal/git/objectpool/fetch_test.go b/internal/git/objectpool/fetch_test.go index 91db52854b..85dfb144a3 100644 --- a/internal/git/objectpool/fetch_test.go +++ b/internal/git/objectpool/fetch_test.go @@ -31,26 +31,36 @@ func TestFetchFromOriginDangling(t *testing.T) { existingBlob = "c60514b6d3d6bf4bec1030f70026e34dfbd69ad5" ) + // We want to have some objects that are guaranteed to be dangling. Use + // random data to make each object unique. nonceBytes := make([]byte, 4) _, err = io.ReadFull(rand.Reader, nonceBytes) require.NoError(t, err) nonce := hex.EncodeToString(nonceBytes) baseArgs := []string{"-C", pool.FullPath()} + + // A blob with random contents should be unique. newBlobArgs := append(baseArgs, "hash-object", "-t", "blob", "-w", "--stdin") newBlob := text.ChompBytes(testhelper.MustRunCommand(t, strings.NewReader(nonce), "git", newBlobArgs...)) + // A tree with a randomly named blob entry should be unique. newTreeArgs := append(baseArgs, "mktree") newTreeStdin := strings.NewReader(fmt.Sprintf("100644 blob %s %s\n", existingBlob, nonce)) newTree := text.ChompBytes(testhelper.MustRunCommand(t, newTreeStdin, "git", newTreeArgs...)) + // A commit with a random message should be unique. newCommitArgs := append(baseArgs, "commit-tree", existingTree) newCommit := text.ChompBytes(testhelper.MustRunCommand(t, strings.NewReader(nonce), "git", newCommitArgs...)) + // A tag with random hex characters in its name should be unique. newTagName := "tag-" + nonce newTagArgs := append(baseArgs, "tag", "-m", "msg", "-a", newTagName, existingCommit) testhelper.MustRunCommand(t, strings.NewReader(nonce), "git", newTagArgs...) newTag := text.ChompBytes(testhelper.MustRunCommand(t, nil, "git", append(baseArgs, "rev-parse", newTagName)...)) + + // `git tag` automatically creates a ref, so our new tag is not dangling. + // Deleting the ref should fix that. testhelper.MustRunCommand(t, nil, "git", append(baseArgs, "update-ref", "-d", "refs/tags/"+newTagName)...) fsckBefore := testhelper.MustRunCommand(t, nil, "git", append(baseArgs, "fsck", "--connectivity-only", "--dangling")...) @@ -62,9 +72,11 @@ func TestFetchFromOriginDangling(t *testing.T) { fmt.Sprintf("dangling commit %s", newCommit), fmt.Sprintf("dangling tag %s", newTag), } { - require.Contains(t, fsckBeforeLines, l) + require.Contains(t, fsckBeforeLines, l, "test setup sanity check") } + // We expect this second run to convert the dangling objects into + // non-dangling objects. require.NoError(t, pool.FetchFromOrigin(ctx, source), "second fetch") refsArgs := append(baseArgs, "for-each-ref", "--format=%(refname) %(objectname)") -- GitLab