diff --git a/internal/cli/gitaly/serve.go b/internal/cli/gitaly/serve.go index fd44702416f9731144756786ad40ed680ff86961..d90460910fe853e393b6d35f57e43f73be9b17f2 100644 --- a/internal/cli/gitaly/serve.go +++ b/internal/cli/gitaly/serve.go @@ -210,6 +210,15 @@ func run(cfg config.Cfg, logger logrus.FieldLogger) error { commandFactoryOpts = append(commandFactoryOpts, git.WithSkipHooks()) } + commandKillWait := (cfg.GracefulRestartTimeout.Duration() * 3) / 4 + commandFactoryOpts = append( + commandFactoryOpts, + git.WithKillFunc( + time.After(commandKillWait), + helper.NewTimerTicker(commandKillWait/5), + ), + ) + gitCmdFactory, cleanup, err := git.NewExecCommandFactory(cfg, logger, commandFactoryOpts...) if err != nil { return fmt.Errorf("creating Git command factory: %w", err) diff --git a/internal/command/command.go b/internal/command/command.go index 6f6d9738c84791a00705f31b078c1341e3c072fb..23fb4ee65882e2e9fbd5d37f9f8b7409292e4837 100644 --- a/internal/command/command.go +++ b/internal/command/command.go @@ -319,6 +319,10 @@ func New(ctx context.Context, nameAndArgs []string, opts ...Option) (*Command, e syscall.Kill(-cmd.Process.Pid, syscall.SIGTERM) } + if featureflag.KillGitProcessesOnShutdown.IsEnabled(ctx) && cfg.killFunc != nil { + cfg.killFunc(cmd.Process) + } + // We do not care for any potential error code, but just want to // make sure that the subprocess gets properly killed and processed. _ = command.Wait() diff --git a/internal/command/command_test.go b/internal/command/command_test.go index b037b22e85dd756cc273fb2ac2eb26fa528e8407..479758ab0d4af70dcae16eeabd9a94a1f7f1dfe0 100644 --- a/internal/command/command_test.go +++ b/internal/command/command_test.go @@ -5,6 +5,7 @@ import ( "context" "fmt" "io" + "os" "os/exec" "path/filepath" "runtime" @@ -19,6 +20,9 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "gitlab.com/gitlab-org/gitaly/v16/internal/cgroups" + "gitlab.com/gitlab-org/gitaly/v16/internal/featureflag" + "gitlab.com/gitlab-org/gitaly/v16/internal/helper" + "gitlab.com/gitlab-org/gitaly/v16/internal/helper/perm" "gitlab.com/gitlab-org/gitaly/v16/internal/structerr" "gitlab.com/gitlab-org/gitaly/v16/internal/testhelper" "gitlab.com/gitlab-org/gitaly/v16/proto/go/gitalypb" @@ -227,6 +231,75 @@ func TestCommand_Wait_contextCancellationKillsCommand(t *testing.T) { require.ErrorIs(t, err, context.Canceled) } +// writeTrigger is a simple writer that will signal over a channel every time a +// Write happens. This will be used below in the case where we want to wait +// until a script is running before sending a sigterm. +type writeTrigger struct { + c chan struct{} +} + +func (w *writeTrigger) Write(p []byte) (int, error) { + w.c <- struct{}{} + + return len(p), nil +} + +func TestCommand_KillsCommand(t *testing.T) { + t.Parallel() + + tempDir := testhelper.TempDir(t) + // script traps SIGTERM + script := []byte(`#!/usr/bin/env bash +trap -- '' SIGTERM + +while true +do + echo hello + sleep 1 +done +`) + scriptPath := filepath.Join(tempDir, "trapSigTerm") + require.NoError(t, os.WriteFile(scriptPath, script, perm.SharedExecutable)) + + ctx, cancel := context.WithCancel( + featureflag.ContextWithFeatureFlag( + testhelper.Context(t), + featureflag.KillGitProcessesOnShutdown, + true, + ), + ) + + timeLimit := make(chan time.Time) + interval := helper.NewManualTicker() + + scriptCh := make(chan struct{}) + stdout := &writeTrigger{ + c: scriptCh, + } + cmd, err := New( + ctx, + []string{scriptPath}, + WithStdout(stdout), + WithKillFunc(func(p *os.Process) { + KillProcessEventually(p, timeLimit, interval) + }), + ) + require.NoError(t, err) + + go func() { + // first, make sure the script is running + <-scriptCh + cancel() + interval.Tick() + // sending a time onto the timeLimit channel will cause + // KillProcessEventually to send a sigkill to the process. + timeLimit <- time.Now() + }() + + err = cmd.Wait() + require.Equal(t, err, fmt.Errorf("signal: killed: %w", context.Canceled)) +} + func TestNew_setupStdin(t *testing.T) { t.Parallel() diff --git a/internal/command/option.go b/internal/command/option.go index 881f71fb7999a3edebd29f1e0714a0ba20031cc9..e34338673a1bc20f3f948cdd5b76d8bb6475f155 100644 --- a/internal/command/option.go +++ b/internal/command/option.go @@ -3,8 +3,12 @@ package command import ( "context" "io" + "os" + "syscall" + "time" "gitlab.com/gitlab-org/gitaly/v16/internal/cgroups" + "gitlab.com/gitlab-org/gitaly/v16/internal/helper" ) type config struct { @@ -23,6 +27,8 @@ type config struct { cgroupsManager cgroups.Manager cgroupsAddCommandOpts []cgroups.AddCommandOption spawnTokenManager *SpawnTokenManager + + killFunc func(process *os.Process) } // Option is an option that can be passed to `New()` for controlling how the command is being @@ -112,3 +118,29 @@ func WithFinalizer(finalizer func(context.Context, *Command)) Option { cfg.finalizers = append(cfg.finalizers, finalizer) } } + +// WithKillFunc provides the command with a function that will send a sigkill +// signal after a certain amount of time. +func WithKillFunc(f func(p *os.Process)) Option { + return func(cfg *config) { + cfg.killFunc = f + } +} + +// KillProcessEventually will wait a certain amount of time before sending a +// sigkill to the process +func KillProcessEventually(p *os.Process, timeLimit <-chan time.Time, interval helper.Ticker) { + for { + select { + case <-timeLimit: + //nolint:errcheck // TODO: do we want to report errors? + syscall.Kill(-p.Pid, syscall.SIGKILL) + interval.Stop() + return + case <-interval.C(): + if err := p.Signal(syscall.Signal(0)); err != nil { + return + } + } + } +} diff --git a/internal/featureflag/ff_kill_git_processes.go b/internal/featureflag/ff_kill_git_processes.go new file mode 100644 index 0000000000000000000000000000000000000000..2b355e0136d3345552cea5d1e9e87b5d183aa142 --- /dev/null +++ b/internal/featureflag/ff_kill_git_processes.go @@ -0,0 +1,9 @@ +package featureflag + +// KillGitProcessesOnShutdown enables the use of Git v2.42. +var KillGitProcessesOnShutdown = NewFeatureFlag( + "kill_git_processes_on_shutdown", + "v16.4.0", + "https://gitlab.com/gitlab-org/gitaly/-/issues/5547", + false, +) diff --git a/internal/git/command_factory.go b/internal/git/command_factory.go index bfb47203029e1376ca57d4d561bab7919078b058..8dd53d17f92bcea7899200ff89dfd61278d05e6e 100644 --- a/internal/git/command_factory.go +++ b/internal/git/command_factory.go @@ -9,6 +9,7 @@ import ( "path/filepath" "strings" "sync" + "time" "github.com/prometheus/client_golang/prometheus" "github.com/sirupsen/logrus" @@ -19,6 +20,7 @@ import ( "gitlab.com/gitlab-org/gitaly/v16/internal/git/trace2hooks" "gitlab.com/gitlab-org/gitaly/v16/internal/gitaly/config" "gitlab.com/gitlab-org/gitaly/v16/internal/gitaly/storage" + "gitlab.com/gitlab-org/gitaly/v16/internal/helper" "gitlab.com/gitlab-org/gitaly/v16/internal/log" "gitlab.com/gitlab-org/gitaly/v16/internal/tracing" "gitlab.com/gitlab-org/labkit/correlation" @@ -50,6 +52,7 @@ type execCommandFactoryConfig struct { cgroupsManager cgroups.Manager trace2Hooks []trace2.Hook execEnvConstructors []ExecutionEnvironmentConstructor + killFunc func(p *os.Process) } // ExecCommandFactoryOption is an option that can be passed to NewExecCommandFactory. @@ -111,6 +114,16 @@ func WithExecutionEnvironmentConstructors(constructors ...ExecutionEnvironmentCo } } +// WithKillFunc provides the command with a function that will send a sigkill +// signal after a certain amount of time. +func WithKillFunc(timeLimit <-chan time.Time, interval helper.Ticker) ExecCommandFactoryOption { + return func(cfg *execCommandFactoryConfig) { + cfg.killFunc = func(process *os.Process) { + command.KillProcessEventually(process, timeLimit, interval) + } + } +} + type hookDirectories struct { tempHooksPath string } @@ -133,6 +146,7 @@ type ExecCommandFactory struct { cachedGitVersionLock sync.RWMutex cachedGitVersionByBinary map[string]cachedGitVersion + killFunc func(p *os.Process) } // NewExecCommandFactory returns a new instance of initialized ExecCommandFactory. The returned @@ -189,6 +203,7 @@ func NewExecCommandFactory(cfg config.Cfg, logger logrus.FieldLogger, opts ...Ex ), hookDirs: hookDirectories, cachedGitVersionByBinary: make(map[string]cachedGitVersion), + killFunc: factoryCfg.killFunc, } return gitCmdFactory, runCleanups, nil @@ -488,6 +503,7 @@ func (cf *ExecCommandFactory) newCommand(ctx context.Context, repo storage.Repos command.WithCommandName("git", sc.Name), command.WithCgroup(cf.cgroupsManager, cgroupsAddCommandOpts...), command.WithCommandGitVersion(cmdGitVersion.String()), + command.WithKillFunc(cf.killFunc), ) command, err := command.New(ctx, append([]string{execEnv.BinaryPath}, args...), commandOpts...) if err != nil { diff --git a/internal/testhelper/testhelper.go b/internal/testhelper/testhelper.go index 69e2f72f69adb21b9b94b5c1363dde4c198dc770..d7abc50f5daeb0971d68ce75b8411d7ee22f28ae 100644 --- a/internal/testhelper/testhelper.go +++ b/internal/testhelper/testhelper.go @@ -237,6 +237,12 @@ func ContextWithoutCancel(opts ...ContextOpt) context.Context { // Randomly enable mailmap ctx = featureflag.ContextWithFeatureFlag(ctx, featureflag.MailmapOptions, rand.Int()%2 == 0) + // Randomly enable killing git processes on shutdown + ctx = featureflag.ContextWithFeatureFlag( + ctx, + featureflag.KillGitProcessesOnShutdown, + rand.Int()%2 == 0, + ) for _, opt := range opts { ctx = opt(ctx)