From 4b96dcbb848f2df521692b2b2b2f5ed47e486cda Mon Sep 17 00:00:00 2001 From: James Liu Date: Mon, 26 May 2025 14:38:51 +1000 Subject: [PATCH] housekeeping: Use fresh context when executing job The housekeeping middleware introduced in [1] intercepts gRPC requests and decides whether an asynchronous housekeeping job should be scheduled. The job itself is executed in a goroutine by calling the `OptimizeRepository` function. The original implementation provided the active context as the first argument. This eventually causes OptimizeRepository to fail with a "context canceled" error because the active context will go out of scope and be cancelled once the current request finishes. Instead, we need to create a fresh context with a Done channel that lives independently from the current request context. We propagate the existing correlation ID to the new context if it exists. [1] https://gitlab.com/gitlab-org/gitaly/-/merge_requests/7881 --- .../grpc/middleware/housekeeping/middleware.go | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/internal/grpc/middleware/housekeeping/middleware.go b/internal/grpc/middleware/housekeeping/middleware.go index 2987615401..bb60e8e9a3 100644 --- a/internal/grpc/middleware/housekeeping/middleware.go +++ b/internal/grpc/middleware/housekeeping/middleware.go @@ -14,6 +14,7 @@ import ( "gitlab.com/gitlab-org/gitaly/v16/internal/structerr" "gitlab.com/gitlab-org/gitaly/v16/middleware" "gitlab.com/gitlab-org/gitaly/v16/proto/go/gitalypb" + "gitlab.com/gitlab-org/labkit/correlation" "google.golang.org/grpc" "google.golang.org/protobuf/proto" ) @@ -239,20 +240,28 @@ func (m *Middleware) scheduleHousekeeping(ctx context.Context, repo *gitalypb.Re m.wg.Add(1) go func() { + // We need to call OptimizeRepository with a fresh context as we're executing it asynchronously. + // Providing the existing `ctx` would cause it to fail, since `ctx` would be cancelled when this + // request completes. + housekeepingCtx, housekeepingCancel := context.WithCancel(context.Background()) + // We should ensure the correlation ID is propagated across to the new context so we can effectively + // trace housekeeping jobs to their origin. + housekeepingCtx = correlation.ContextWithCorrelation(housekeepingCtx, correlation.ExtractFromContextOrGenerate(ctx)) + defer func() { m.markHousekeepingInactive(key) - - m.logger.InfoContext(ctx, "ended scheduled housekeeping") + m.logger.InfoContext(housekeepingCtx, "ended scheduled housekeeping") + housekeepingCancel() m.wg.Done() }() localRepo := m.localRepoFactory.Build(repo) - if err := m.manager.OptimizeRepository(ctx, localRepo, manager.WithOptimizationStrategyConstructor( + if err := m.manager.OptimizeRepository(housekeepingCtx, localRepo, manager.WithOptimizationStrategyConstructor( func(info stats.RepositoryInfo) housekeeping.OptimizationStrategy { return housekeeping.NewHeuristicalOptimizationStrategy(info) }, )); err != nil { - m.logger.WithError(err).ErrorContext(ctx, "failed scheduled housekeeping") + m.logger.WithError(err).ErrorContext(housekeepingCtx, "failed scheduled housekeeping") } }() } -- GitLab