diff --git a/internal/git/housekeeping/manager/optimize_repository.go b/internal/git/housekeeping/manager/optimize_repository.go index 0e433c329ecd551bad3c2cd6a26dac32f34e9716..6601e0b35791ebdbc5a38d3b802bc7f6c7ac0e00 100644 --- a/internal/git/housekeeping/manager/optimize_repository.go +++ b/internal/git/housekeeping/manager/optimize_repository.go @@ -13,6 +13,7 @@ import ( "gitlab.com/gitlab-org/gitaly/v16/internal/git/localrepo" "gitlab.com/gitlab-org/gitaly/v16/internal/git/stats" "gitlab.com/gitlab-org/gitaly/v16/internal/gitaly/storage" + "gitlab.com/gitlab-org/gitaly/v16/internal/log" "gitlab.com/gitlab-org/gitaly/v16/internal/structerr" "gitlab.com/gitlab-org/gitaly/v16/internal/tracing" "gitlab.com/gitlab-org/gitaly/v16/proto/go/gitalypb" @@ -435,9 +436,32 @@ func (m *RepositoryManager) packRefsIfNeeded(ctx context.Context, repo *localrep return false, fmt.Errorf("packing refs: %w, stderr: %q", err, stderr.String()) } + if err := m.verifyRefs(ctx, repo); err != nil { + m.logger.WithFields(log.Fields{ + "storage": repo.GetStorageName(), + "relative_path": repo.GetRelativePath(), + }).WithError(err).WarnContext(ctx, "refs verification failed") + m.metrics.FailedVerifyRefs.WithLabelValues(repo.GetStorageName()).Inc() + } + return true, nil } +func (m *RepositoryManager) verifyRefs(ctx context.Context, repo *localrepo.Repo) error { + var stderr bytes.Buffer + if err := repo.ExecAndWait(ctx, gitcmd.Command{ + Name: "refs", + Action: "verify", + Flags: []gitcmd.Option{ + gitcmd.Flag{Name: "--strict"}, + }, + }, gitcmd.WithStderr(&stderr)); err != nil { + return fmt.Errorf("verify refs: %w, stderr: %q", err, stderr.String()) + } + + return nil +} + // CleanStaleData removes any stale data in the repository as per the provided configuration. func (m *RepositoryManager) CleanStaleData(ctx context.Context, repo *localrepo.Repo, cfg housekeeping.CleanStaleDataConfig) error { span, ctx := tracing.StartSpanIfHasParent(ctx, "housekeeping.CleanStaleData", nil) diff --git a/internal/git/housekeeping/manager/optimize_repository_test.go b/internal/git/housekeeping/manager/optimize_repository_test.go index 462e5a60d06940fbad6205860de57be3b354b549..a6b34201069e978732e5bea1e7d677b84467d1ef 100644 --- a/internal/git/housekeeping/manager/optimize_repository_test.go +++ b/internal/git/housekeeping/manager/optimize_repository_test.go @@ -421,6 +421,7 @@ func TestPackRefsIfNeeded(t *testing.T) { ctx := testhelper.Context(t) cfg := testcfg.Build(t) logger := testhelper.NewLogger(t) + logHook := testhelper.AddLoggerHook(logger) gitCmdFactory := blockingCommandFactory{ CommandFactory: gittest.NewCommandFactory(t, cfg), @@ -457,10 +458,75 @@ func TestPackRefsIfNeeded(t *testing.T) { require.FileExists(t, looseRefPath) } + testhelper.RequirePromMetrics(t, manager.metrics.FailedVerifyRefs, "") + + var logs []string + for _, entry := range logHook.AllEntries() { + logs = append(logs, entry.Message) + } + require.NotContains(t, logs, "refs verification failed") + _, ok := manager.repositoryStates.values[repositoryStatesKey(repo)] require.Equal(t, tc.repoStateExists, ok) }) } + + t.Run("failure in refs verification", func(t *testing.T) { + t.Parallel() + + ctx := testhelper.Context(t) + cfg := testcfg.Build(t) + logger := testhelper.NewLogger(t) + logHook := testhelper.AddLoggerHook(logger) + + gitCmdFactory := errorInjectingCommandFactory{ + CommandFactory: gittest.NewCommandFactory(t, cfg), + injectedErrors: map[string]error{ + "refs": assert.AnError, + }, + } + + repoProto, repoPath := gittest.CreateRepository(t, ctx, cfg, gittest.CreateRepositoryConfig{ + SkipCreationViaService: true, + }) + repo := localrepo.New(logger, gitalycfg.NewLocator(cfg), &gitCmdFactory, nil, repoProto) + + // Write an empty commit such that we can create valid refs. + gittest.WriteCommit(t, cfg, repoPath, gittest.WithBranch("main")) + + packedRefsPath := filepath.Join(repoPath, "packed-refs") + looseRefPath := filepath.Join(repoPath, "refs", "heads", "main") + + manager := New(gitalycfgprom.Config{}, logger, nil, nil) + + didRepack, err := manager.packRefsIfNeeded(ctx, repo, mockOptimizationStrategy{ + shouldRepackReferences: func(ctx context.Context) bool { return true }, + }) + + // The verification failure is localized to the method scope and do not propagate to the calling code. + require.NoError(t, err) + require.True(t, didRepack) + require.FileExists(t, packedRefsPath) + require.NoFileExists(t, looseRefPath) + + var buf bytes.Buffer + _, err = buf.WriteString("# HELP gitaly_housekeeping_failed_verify_refs Number of git refs verify failures during housekeeping\n") + require.NoError(t, err) + _, err = buf.WriteString("# TYPE gitaly_housekeeping_failed_verify_refs counter\n") + require.NoError(t, err) + _, err = buf.WriteString(fmt.Sprintf( + "gitaly_housekeeping_failed_verify_refs{storage=%q} 1\n", + repo.GetStorageName(), + )) + require.NoError(t, err) + testhelper.RequirePromMetrics(t, manager.metrics.FailedVerifyRefs, buf.String()) + + var logs []string + for _, entry := range logHook.AllEntries() { + logs = append(logs, entry.Message) + } + require.Contains(t, logs, "refs verification failed") + }) } func TestOptimizeRepository(t *testing.T) { diff --git a/internal/git/housekeeping/metrics.go b/internal/git/housekeeping/metrics.go index fb424af9759f5e6e3cb8a291d2d2c07703eacad4..222ee7cd569854412e7ea78b5dc910049ffbca54 100644 --- a/internal/git/housekeeping/metrics.go +++ b/internal/git/housekeeping/metrics.go @@ -18,6 +18,7 @@ type Metrics struct { PrunedFilesTotal *prometheus.CounterVec TasksLatency *prometheus.HistogramVec TasksTotal *prometheus.CounterVec + FailedVerifyRefs *prometheus.CounterVec } // NewMetrics returns a new metric wrapper object. @@ -45,6 +46,13 @@ func NewMetrics(promCfg gitalycfgprom.Config) *Metrics { }, []string{"filetype"}, ), + FailedVerifyRefs: prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "gitaly_housekeeping_failed_verify_refs", + Help: "Number of git refs verify failures during housekeeping", + }, + []string{"storage"}, + ), DataStructureExistence: prometheus.NewCounterVec( prometheus.CounterOpts{ Name: "gitaly_housekeeping_data_structure_existence_total", @@ -108,6 +116,7 @@ func (m *Metrics) Collect(metrics chan<- prometheus.Metric) { m.TasksTotal.Collect(metrics) m.TasksLatency.Collect(metrics) m.PrunedFilesTotal.Collect(metrics) + m.FailedVerifyRefs.Collect(metrics) m.DataStructureExistence.Collect(metrics) m.DataStructureCount.Collect(metrics) m.DataStructureSize.Collect(metrics)