From d622d0f774ef651c0eff7c86ca79e92fcc2d8747 Mon Sep 17 00:00:00 2001 From: John Cai Date: Fri, 25 Aug 2023 14:58:57 -0400 Subject: [PATCH 1/2] command: Kill processes after a grace period We want to make sure that processes get killed when the context is cancelled. In order to keep the code testable, we will pass in a function down with options. --- internal/command/command.go | 4 + internal/command/command_test.go | 73 +++++++++++++++++++ internal/command/option.go | 32 ++++++++ internal/featureflag/ff_kill_git_processes.go | 9 +++ internal/git/command_factory.go | 16 ++++ internal/testhelper/testhelper.go | 6 ++ 6 files changed, 140 insertions(+) create mode 100644 internal/featureflag/ff_kill_git_processes.go diff --git a/internal/command/command.go b/internal/command/command.go index 6f6d9738c8..23fb4ee658 100644 --- a/internal/command/command.go +++ b/internal/command/command.go @@ -319,6 +319,10 @@ func New(ctx context.Context, nameAndArgs []string, opts ...Option) (*Command, e syscall.Kill(-cmd.Process.Pid, syscall.SIGTERM) } + if featureflag.KillGitProcessesOnShutdown.IsEnabled(ctx) && cfg.killFunc != nil { + cfg.killFunc(cmd.Process) + } + // We do not care for any potential error code, but just want to // make sure that the subprocess gets properly killed and processed. _ = command.Wait() diff --git a/internal/command/command_test.go b/internal/command/command_test.go index b037b22e85..479758ab0d 100644 --- a/internal/command/command_test.go +++ b/internal/command/command_test.go @@ -5,6 +5,7 @@ import ( "context" "fmt" "io" + "os" "os/exec" "path/filepath" "runtime" @@ -19,6 +20,9 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "gitlab.com/gitlab-org/gitaly/v16/internal/cgroups" + "gitlab.com/gitlab-org/gitaly/v16/internal/featureflag" + "gitlab.com/gitlab-org/gitaly/v16/internal/helper" + "gitlab.com/gitlab-org/gitaly/v16/internal/helper/perm" "gitlab.com/gitlab-org/gitaly/v16/internal/structerr" "gitlab.com/gitlab-org/gitaly/v16/internal/testhelper" "gitlab.com/gitlab-org/gitaly/v16/proto/go/gitalypb" @@ -227,6 +231,75 @@ func TestCommand_Wait_contextCancellationKillsCommand(t *testing.T) { require.ErrorIs(t, err, context.Canceled) } +// writeTrigger is a simple writer that will signal over a channel every time a +// Write happens. This will be used below in the case where we want to wait +// until a script is running before sending a sigterm. +type writeTrigger struct { + c chan struct{} +} + +func (w *writeTrigger) Write(p []byte) (int, error) { + w.c <- struct{}{} + + return len(p), nil +} + +func TestCommand_KillsCommand(t *testing.T) { + t.Parallel() + + tempDir := testhelper.TempDir(t) + // script traps SIGTERM + script := []byte(`#!/usr/bin/env bash +trap -- '' SIGTERM + +while true +do + echo hello + sleep 1 +done +`) + scriptPath := filepath.Join(tempDir, "trapSigTerm") + require.NoError(t, os.WriteFile(scriptPath, script, perm.SharedExecutable)) + + ctx, cancel := context.WithCancel( + featureflag.ContextWithFeatureFlag( + testhelper.Context(t), + featureflag.KillGitProcessesOnShutdown, + true, + ), + ) + + timeLimit := make(chan time.Time) + interval := helper.NewManualTicker() + + scriptCh := make(chan struct{}) + stdout := &writeTrigger{ + c: scriptCh, + } + cmd, err := New( + ctx, + []string{scriptPath}, + WithStdout(stdout), + WithKillFunc(func(p *os.Process) { + KillProcessEventually(p, timeLimit, interval) + }), + ) + require.NoError(t, err) + + go func() { + // first, make sure the script is running + <-scriptCh + cancel() + interval.Tick() + // sending a time onto the timeLimit channel will cause + // KillProcessEventually to send a sigkill to the process. + timeLimit <- time.Now() + }() + + err = cmd.Wait() + require.Equal(t, err, fmt.Errorf("signal: killed: %w", context.Canceled)) +} + func TestNew_setupStdin(t *testing.T) { t.Parallel() diff --git a/internal/command/option.go b/internal/command/option.go index 881f71fb79..e34338673a 100644 --- a/internal/command/option.go +++ b/internal/command/option.go @@ -3,8 +3,12 @@ package command import ( "context" "io" + "os" + "syscall" + "time" "gitlab.com/gitlab-org/gitaly/v16/internal/cgroups" + "gitlab.com/gitlab-org/gitaly/v16/internal/helper" ) type config struct { @@ -23,6 +27,8 @@ type config struct { cgroupsManager cgroups.Manager cgroupsAddCommandOpts []cgroups.AddCommandOption spawnTokenManager *SpawnTokenManager + + killFunc func(process *os.Process) } // Option is an option that can be passed to `New()` for controlling how the command is being @@ -112,3 +118,29 @@ func WithFinalizer(finalizer func(context.Context, *Command)) Option { cfg.finalizers = append(cfg.finalizers, finalizer) } } + +// WithKillFunc provides the command with a function that will send a sigkill +// signal after a certain amount of time. +func WithKillFunc(f func(p *os.Process)) Option { + return func(cfg *config) { + cfg.killFunc = f + } +} + +// KillProcessEventually will wait a certain amount of time before sending a +// sigkill to the process +func KillProcessEventually(p *os.Process, timeLimit <-chan time.Time, interval helper.Ticker) { + for { + select { + case <-timeLimit: + //nolint:errcheck // TODO: do we want to report errors? + syscall.Kill(-p.Pid, syscall.SIGKILL) + interval.Stop() + return + case <-interval.C(): + if err := p.Signal(syscall.Signal(0)); err != nil { + return + } + } + } +} diff --git a/internal/featureflag/ff_kill_git_processes.go b/internal/featureflag/ff_kill_git_processes.go new file mode 100644 index 0000000000..2b355e0136 --- /dev/null +++ b/internal/featureflag/ff_kill_git_processes.go @@ -0,0 +1,9 @@ +package featureflag + +// KillGitProcessesOnShutdown enables the use of Git v2.42. +var KillGitProcessesOnShutdown = NewFeatureFlag( + "kill_git_processes_on_shutdown", + "v16.4.0", + "https://gitlab.com/gitlab-org/gitaly/-/issues/5547", + false, +) diff --git a/internal/git/command_factory.go b/internal/git/command_factory.go index bfb4720302..8dd53d17f9 100644 --- a/internal/git/command_factory.go +++ b/internal/git/command_factory.go @@ -9,6 +9,7 @@ import ( "path/filepath" "strings" "sync" + "time" "github.com/prometheus/client_golang/prometheus" "github.com/sirupsen/logrus" @@ -19,6 +20,7 @@ import ( "gitlab.com/gitlab-org/gitaly/v16/internal/git/trace2hooks" "gitlab.com/gitlab-org/gitaly/v16/internal/gitaly/config" "gitlab.com/gitlab-org/gitaly/v16/internal/gitaly/storage" + "gitlab.com/gitlab-org/gitaly/v16/internal/helper" "gitlab.com/gitlab-org/gitaly/v16/internal/log" "gitlab.com/gitlab-org/gitaly/v16/internal/tracing" "gitlab.com/gitlab-org/labkit/correlation" @@ -50,6 +52,7 @@ type execCommandFactoryConfig struct { cgroupsManager cgroups.Manager trace2Hooks []trace2.Hook execEnvConstructors []ExecutionEnvironmentConstructor + killFunc func(p *os.Process) } // ExecCommandFactoryOption is an option that can be passed to NewExecCommandFactory. @@ -111,6 +114,16 @@ func WithExecutionEnvironmentConstructors(constructors ...ExecutionEnvironmentCo } } +// WithKillFunc provides the command with a function that will send a sigkill +// signal after a certain amount of time. +func WithKillFunc(timeLimit <-chan time.Time, interval helper.Ticker) ExecCommandFactoryOption { + return func(cfg *execCommandFactoryConfig) { + cfg.killFunc = func(process *os.Process) { + command.KillProcessEventually(process, timeLimit, interval) + } + } +} + type hookDirectories struct { tempHooksPath string } @@ -133,6 +146,7 @@ type ExecCommandFactory struct { cachedGitVersionLock sync.RWMutex cachedGitVersionByBinary map[string]cachedGitVersion + killFunc func(p *os.Process) } // NewExecCommandFactory returns a new instance of initialized ExecCommandFactory. The returned @@ -189,6 +203,7 @@ func NewExecCommandFactory(cfg config.Cfg, logger logrus.FieldLogger, opts ...Ex ), hookDirs: hookDirectories, cachedGitVersionByBinary: make(map[string]cachedGitVersion), + killFunc: factoryCfg.killFunc, } return gitCmdFactory, runCleanups, nil @@ -488,6 +503,7 @@ func (cf *ExecCommandFactory) newCommand(ctx context.Context, repo storage.Repos command.WithCommandName("git", sc.Name), command.WithCgroup(cf.cgroupsManager, cgroupsAddCommandOpts...), command.WithCommandGitVersion(cmdGitVersion.String()), + command.WithKillFunc(cf.killFunc), ) command, err := command.New(ctx, append([]string{execEnv.BinaryPath}, args...), commandOpts...) if err != nil { diff --git a/internal/testhelper/testhelper.go b/internal/testhelper/testhelper.go index 69e2f72f69..d7abc50f5d 100644 --- a/internal/testhelper/testhelper.go +++ b/internal/testhelper/testhelper.go @@ -237,6 +237,12 @@ func ContextWithoutCancel(opts ...ContextOpt) context.Context { // Randomly enable mailmap ctx = featureflag.ContextWithFeatureFlag(ctx, featureflag.MailmapOptions, rand.Int()%2 == 0) + // Randomly enable killing git processes on shutdown + ctx = featureflag.ContextWithFeatureFlag( + ctx, + featureflag.KillGitProcessesOnShutdown, + rand.Int()%2 == 0, + ) for _, opt := range opts { ctx = opt(ctx) -- GitLab From 53dc3d8273cf7f2be813821f4e8c7f0f6c8a78b4 Mon Sep 17 00:00:00 2001 From: John Cai Date: Fri, 25 Aug 2023 17:48:13 -0400 Subject: [PATCH 2/2] gitaly: Set kill function for killing off processes that stay alive Now that we have a way to kill processes with sigkill after a determined amount of time, let's use it in the main command factory. --- internal/cli/gitaly/serve.go | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/internal/cli/gitaly/serve.go b/internal/cli/gitaly/serve.go index fd44702416..d90460910f 100644 --- a/internal/cli/gitaly/serve.go +++ b/internal/cli/gitaly/serve.go @@ -210,6 +210,15 @@ func run(cfg config.Cfg, logger logrus.FieldLogger) error { commandFactoryOpts = append(commandFactoryOpts, git.WithSkipHooks()) } + commandKillWait := (cfg.GracefulRestartTimeout.Duration() * 3) / 4 + commandFactoryOpts = append( + commandFactoryOpts, + git.WithKillFunc( + time.After(commandKillWait), + helper.NewTimerTicker(commandKillWait/5), + ), + ) + gitCmdFactory, cleanup, err := git.NewExecCommandFactory(cfg, logger, commandFactoryOpts...) if err != nil { return fmt.Errorf("creating Git command factory: %w", err) -- GitLab