From a431e07bb8fa67f9481228094c6898d8e6a6c47c Mon Sep 17 00:00:00 2001 From: Santhanu Vinod Date: Sun, 18 May 2025 20:13:23 +0530 Subject: [PATCH 1/5] housekeeping: Add git refs verify after reference repacking This commit implements the `git refs verify` command as part of the housekeeping process. The verification runs after successful reference repacking to detect potential reference corruption. The implementation: - Adds a new `verifyRefs` method that executes `git refs verify` - Calls this method from `packRefsIfNeeded` after successful repacking - Returns detailed errors including stderr output when verification fails - Logs verification failures with repository context and storage label - Does not propagate verification errors outside the `packRefsIfNeeded` method This provides an additional safety mechanism for detecting reference corruptions. Fixes: https://gitlab.com/gitlab-org/gitaly/-/issues/6531 Changelog: added Signed-off-by: Santhanu Vinod --- .../manager/optimize_repository.go | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/internal/git/housekeeping/manager/optimize_repository.go b/internal/git/housekeeping/manager/optimize_repository.go index 0e433c329e..a432fe0e65 100644 --- a/internal/git/housekeeping/manager/optimize_repository.go +++ b/internal/git/housekeeping/manager/optimize_repository.go @@ -13,6 +13,7 @@ import ( "gitlab.com/gitlab-org/gitaly/v16/internal/git/localrepo" "gitlab.com/gitlab-org/gitaly/v16/internal/git/stats" "gitlab.com/gitlab-org/gitaly/v16/internal/gitaly/storage" + "gitlab.com/gitlab-org/gitaly/v16/internal/log" "gitlab.com/gitlab-org/gitaly/v16/internal/structerr" "gitlab.com/gitlab-org/gitaly/v16/internal/tracing" "gitlab.com/gitlab-org/gitaly/v16/proto/go/gitalypb" @@ -435,9 +436,31 @@ func (m *RepositoryManager) packRefsIfNeeded(ctx context.Context, repo *localrep return false, fmt.Errorf("packing refs: %w, stderr: %q", err, stderr.String()) } + if err := m.verifyRefs(ctx, repo); err != nil { + m.logger.WithFields(log.Fields{ + "storage": repo.GetStorageName(), + "relative_pathh": repo.GetRelativePath(), + }).WithError(err).WarnContext(ctx, "refs verification failed") + } + return true, nil } +func (m *RepositoryManager) verifyRefs(ctx context.Context, repo *localrepo.Repo) error { + var stderr bytes.Buffer + if err := repo.ExecAndWait(ctx, gitcmd.Command{ + Name: "refs", + Action: "verify", + Flags: []gitcmd.Option{ + gitcmd.Flag{Name: "--strict"}, + }, + }, gitcmd.WithStderr(&stderr)); err != nil { + return fmt.Errorf("verify refs: %w, stderr: %q", err, stderr.String()) + } + + return nil +} + // CleanStaleData removes any stale data in the repository as per the provided configuration. func (m *RepositoryManager) CleanStaleData(ctx context.Context, repo *localrepo.Repo, cfg housekeeping.CleanStaleDataConfig) error { span, ctx := tracing.StartSpanIfHasParent(ctx, "housekeeping.CleanStaleData", nil) -- GitLab From 2a692e4356ca473a8373f7250e593c29502e7224 Mon Sep 17 00:00:00 2001 From: Santhanu Vinod Date: Sun, 18 May 2025 20:29:41 +0530 Subject: [PATCH 2/5] housekeeping: Add Prometheus counter for failed ref verifications This commit adds a new Prometheus counter metric to track failed reference verifications during housekeeping. This metric is incremented whenever `git refs verify` returns an error, allowing for monitoring and altering on reference database corruption. The implementation: - Adds a new counter metric`gitaly_housekeeping_failed_verify_refs` - Increments the counter after logging verification errors in `packRefsIfNeeded` - Includes the storage name as a label for better observability This metric will help detect and alert on reference corruption issues. Fixes: https://gitlab.com/gitlab-org/gitaly/-/issues/6531 Changelog: added Signed-off-by: Santhanu Vinod --- internal/git/housekeeping/manager/optimize_repository.go | 1 + internal/git/housekeeping/metrics.go | 9 +++++++++ 2 files changed, 10 insertions(+) diff --git a/internal/git/housekeeping/manager/optimize_repository.go b/internal/git/housekeeping/manager/optimize_repository.go index a432fe0e65..280ea34d68 100644 --- a/internal/git/housekeeping/manager/optimize_repository.go +++ b/internal/git/housekeeping/manager/optimize_repository.go @@ -441,6 +441,7 @@ func (m *RepositoryManager) packRefsIfNeeded(ctx context.Context, repo *localrep "storage": repo.GetStorageName(), "relative_pathh": repo.GetRelativePath(), }).WithError(err).WarnContext(ctx, "refs verification failed") + m.metrics.FailedVerifyRefs.WithLabelValues(repo.GetStorageName()).Inc() } return true, nil diff --git a/internal/git/housekeeping/metrics.go b/internal/git/housekeeping/metrics.go index fb424af975..222ee7cd56 100644 --- a/internal/git/housekeeping/metrics.go +++ b/internal/git/housekeeping/metrics.go @@ -18,6 +18,7 @@ type Metrics struct { PrunedFilesTotal *prometheus.CounterVec TasksLatency *prometheus.HistogramVec TasksTotal *prometheus.CounterVec + FailedVerifyRefs *prometheus.CounterVec } // NewMetrics returns a new metric wrapper object. @@ -45,6 +46,13 @@ func NewMetrics(promCfg gitalycfgprom.Config) *Metrics { }, []string{"filetype"}, ), + FailedVerifyRefs: prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "gitaly_housekeeping_failed_verify_refs", + Help: "Number of git refs verify failures during housekeeping", + }, + []string{"storage"}, + ), DataStructureExistence: prometheus.NewCounterVec( prometheus.CounterOpts{ Name: "gitaly_housekeeping_data_structure_existence_total", @@ -108,6 +116,7 @@ func (m *Metrics) Collect(metrics chan<- prometheus.Metric) { m.TasksTotal.Collect(metrics) m.TasksLatency.Collect(metrics) m.PrunedFilesTotal.Collect(metrics) + m.FailedVerifyRefs.Collect(metrics) m.DataStructureExistence.Collect(metrics) m.DataStructureCount.Collect(metrics) m.DataStructureSize.Collect(metrics) -- GitLab From 4a6001cea57dc63b00367033928230da3ba768b8 Mon Sep 17 00:00:00 2001 From: Santhanu Vinod Date: Sun, 18 May 2025 20:52:11 +0530 Subject: [PATCH 3/5] housekeeping: Add tests for git refs verify functionality This commit adds tests for the git refs verify functionality in the housekeeping process. The tests verify both success and failure scenarios for reference verification. The implementation: - Adds verification to existing tests to ensure no logs or metrics are generated when verification succeeds - Adds a dedicated test case for verification failures that checks: - Error handling behavior - Proper logging of verification failures - Correct incrementation of the Prometheus counter These tests ensure that the verification mechanism works as expected and properly reports issues when reference corruption is detected. Fixes: https://gitlab.com/gitlab-org/gitaly/-/issues/6531 Changelog: added Signed-off-by: Santhanu Vinod --- .../manager/optimize_repository_test.go | 66 +++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/internal/git/housekeeping/manager/optimize_repository_test.go b/internal/git/housekeeping/manager/optimize_repository_test.go index 462e5a60d0..a6b3420106 100644 --- a/internal/git/housekeeping/manager/optimize_repository_test.go +++ b/internal/git/housekeeping/manager/optimize_repository_test.go @@ -421,6 +421,7 @@ func TestPackRefsIfNeeded(t *testing.T) { ctx := testhelper.Context(t) cfg := testcfg.Build(t) logger := testhelper.NewLogger(t) + logHook := testhelper.AddLoggerHook(logger) gitCmdFactory := blockingCommandFactory{ CommandFactory: gittest.NewCommandFactory(t, cfg), @@ -457,10 +458,75 @@ func TestPackRefsIfNeeded(t *testing.T) { require.FileExists(t, looseRefPath) } + testhelper.RequirePromMetrics(t, manager.metrics.FailedVerifyRefs, "") + + var logs []string + for _, entry := range logHook.AllEntries() { + logs = append(logs, entry.Message) + } + require.NotContains(t, logs, "refs verification failed") + _, ok := manager.repositoryStates.values[repositoryStatesKey(repo)] require.Equal(t, tc.repoStateExists, ok) }) } + + t.Run("failure in refs verification", func(t *testing.T) { + t.Parallel() + + ctx := testhelper.Context(t) + cfg := testcfg.Build(t) + logger := testhelper.NewLogger(t) + logHook := testhelper.AddLoggerHook(logger) + + gitCmdFactory := errorInjectingCommandFactory{ + CommandFactory: gittest.NewCommandFactory(t, cfg), + injectedErrors: map[string]error{ + "refs": assert.AnError, + }, + } + + repoProto, repoPath := gittest.CreateRepository(t, ctx, cfg, gittest.CreateRepositoryConfig{ + SkipCreationViaService: true, + }) + repo := localrepo.New(logger, gitalycfg.NewLocator(cfg), &gitCmdFactory, nil, repoProto) + + // Write an empty commit such that we can create valid refs. + gittest.WriteCommit(t, cfg, repoPath, gittest.WithBranch("main")) + + packedRefsPath := filepath.Join(repoPath, "packed-refs") + looseRefPath := filepath.Join(repoPath, "refs", "heads", "main") + + manager := New(gitalycfgprom.Config{}, logger, nil, nil) + + didRepack, err := manager.packRefsIfNeeded(ctx, repo, mockOptimizationStrategy{ + shouldRepackReferences: func(ctx context.Context) bool { return true }, + }) + + // The verification failure is localized to the method scope and do not propagate to the calling code. + require.NoError(t, err) + require.True(t, didRepack) + require.FileExists(t, packedRefsPath) + require.NoFileExists(t, looseRefPath) + + var buf bytes.Buffer + _, err = buf.WriteString("# HELP gitaly_housekeeping_failed_verify_refs Number of git refs verify failures during housekeeping\n") + require.NoError(t, err) + _, err = buf.WriteString("# TYPE gitaly_housekeeping_failed_verify_refs counter\n") + require.NoError(t, err) + _, err = buf.WriteString(fmt.Sprintf( + "gitaly_housekeeping_failed_verify_refs{storage=%q} 1\n", + repo.GetStorageName(), + )) + require.NoError(t, err) + testhelper.RequirePromMetrics(t, manager.metrics.FailedVerifyRefs, buf.String()) + + var logs []string + for _, entry := range logHook.AllEntries() { + logs = append(logs, entry.Message) + } + require.Contains(t, logs, "refs verification failed") + }) } func TestOptimizeRepository(t *testing.T) { -- GitLab From 073ea5a535372b2a11399bdd4f96c6eeb9bf022f Mon Sep 17 00:00:00 2001 From: Santhanu V Date: Sun, 18 May 2025 21:45:09 +0530 Subject: [PATCH 4/5] Apply 1 suggestion(s) to 1 file(s) Co-authored-by: GitLab Duo --- internal/git/housekeeping/manager/optimize_repository.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/git/housekeeping/manager/optimize_repository.go b/internal/git/housekeeping/manager/optimize_repository.go index 280ea34d68..ad5c424089 100644 --- a/internal/git/housekeeping/manager/optimize_repository.go +++ b/internal/git/housekeeping/manager/optimize_repository.go @@ -439,7 +439,7 @@ func (m *RepositoryManager) packRefsIfNeeded(ctx context.Context, repo *localrep if err := m.verifyRefs(ctx, repo); err != nil { m.logger.WithFields(log.Fields{ "storage": repo.GetStorageName(), - "relative_pathh": repo.GetRelativePath(), + "relative_path": repo.GetRelativePath(), }).WithError(err).WarnContext(ctx, "refs verification failed") m.metrics.FailedVerifyRefs.WithLabelValues(repo.GetStorageName()).Inc() } -- GitLab From 9a96cadc7703176f58170d9b36ccadd4b399b5c8 Mon Sep 17 00:00:00 2001 From: Santhanu Vinod Date: Sun, 18 May 2025 22:24:34 +0530 Subject: [PATCH 5/5] Fix linting issue in optimize_repository.go The linting error was related to improper formatting of the "storage" field declaration. --- internal/git/housekeeping/manager/optimize_repository.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/git/housekeeping/manager/optimize_repository.go b/internal/git/housekeeping/manager/optimize_repository.go index ad5c424089..6601e0b357 100644 --- a/internal/git/housekeeping/manager/optimize_repository.go +++ b/internal/git/housekeeping/manager/optimize_repository.go @@ -438,7 +438,7 @@ func (m *RepositoryManager) packRefsIfNeeded(ctx context.Context, repo *localrep if err := m.verifyRefs(ctx, repo); err != nil { m.logger.WithFields(log.Fields{ - "storage": repo.GetStorageName(), + "storage": repo.GetStorageName(), "relative_path": repo.GetRelativePath(), }).WithError(err).WarnContext(ctx, "refs verification failed") m.metrics.FailedVerifyRefs.WithLabelValues(repo.GetStorageName()).Inc() -- GitLab