From 74dd68d43e4a7f66cba80fed2866543427f9ed69 Mon Sep 17 00:00:00 2001 From: John Cai Date: Tue, 10 May 2022 15:09:53 -0400 Subject: [PATCH 1/3] cgroups: Add FallbackToOldVersion to support old cgroups config Once the new configuration rolls out, we need to support old configurations for the time being until 16.0 when we can deprecate the old cgroups config. Until then, allow for a translation layer that translates the old version to the new version. --- internal/gitaly/config/cgroups/cgroups.go | 16 ++ .../gitaly/config/cgroups/cgroups_test.go | 150 ++++++++++++++++++ internal/gitaly/config/config.go | 2 + internal/gitaly/config/config_test.go | 27 ---- 4 files changed, 168 insertions(+), 27 deletions(-) create mode 100644 internal/gitaly/config/cgroups/cgroups_test.go diff --git a/internal/gitaly/config/cgroups/cgroups.go b/internal/gitaly/config/cgroups/cgroups.go index 00fb9f45ef..935a1a5652 100644 --- a/internal/gitaly/config/cgroups/cgroups.go +++ b/internal/gitaly/config/cgroups/cgroups.go @@ -22,6 +22,22 @@ type Config struct { Memory Memory `toml:"memory"` } +// FallbackToOldVersion translates the old format of cgroups into the new +// format. +func (c *Config) FallbackToOldVersion() { + if c.Repositories.Count == 0 { + c.Repositories.Count = c.Count + + if c.Repositories.MemoryBytes == 0 && c.Memory.Enabled { + c.Repositories.MemoryBytes = c.Memory.Limit + } + + if c.Repositories.CPUShares == 0 && c.CPU.Enabled { + c.Repositories.CPUShares = c.CPU.Shares + } + } +} + // Repositories configures cgroups to be created that are isolated by repository. type Repositories struct { // Count is the number of cgroups that will be created for repository-level isolation diff --git a/internal/gitaly/config/cgroups/cgroups_test.go b/internal/gitaly/config/cgroups/cgroups_test.go new file mode 100644 index 0000000000..8b8bdf8cb5 --- /dev/null +++ b/internal/gitaly/config/cgroups/cgroups_test.go @@ -0,0 +1,150 @@ +package cgroups + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestFallbackToOldVersion(t *testing.T) { + testCases := []struct { + desc string + configBefore Config + configAfter Config + }{ + { + desc: "empty config", + configBefore: Config{}, + configAfter: Config{}, + }, + { + desc: "new format", + configBefore: Config{ + Mountpoint: "some/mountpoint", + HierarchyRoot: "gitaly", + Repositories: Repositories{ + Count: 100, + MemoryBytes: 1024, + CPUShares: 16, + }, + }, + configAfter: Config{ + Mountpoint: "some/mountpoint", + HierarchyRoot: "gitaly", + Repositories: Repositories{ + Count: 100, + MemoryBytes: 1024, + CPUShares: 16, + }, + }, + }, + { + desc: "old format", + configBefore: Config{ + Mountpoint: "some/mountpoint", + HierarchyRoot: "gitaly", + Count: 100, + Memory: Memory{ + Enabled: true, + Limit: 1024, + }, + CPU: CPU{ + Enabled: true, + Shares: 16, + }, + }, + configAfter: Config{ + Mountpoint: "some/mountpoint", + HierarchyRoot: "gitaly", + Count: 100, + Memory: Memory{ + Enabled: true, + Limit: 1024, + }, + CPU: CPU{ + Enabled: true, + Shares: 16, + }, + Repositories: Repositories{ + Count: 100, + MemoryBytes: 1024, + CPUShares: 16, + }, + }, + }, + { + desc: "old format, memory only", + configBefore: Config{ + Mountpoint: "some/mountpoint", + HierarchyRoot: "gitaly", + Count: 100, + Memory: Memory{ + Enabled: true, + Limit: 1024, + }, + CPU: CPU{ + Enabled: false, + Shares: 16, + }, + }, + configAfter: Config{ + Mountpoint: "some/mountpoint", + HierarchyRoot: "gitaly", + Count: 100, + Memory: Memory{ + Enabled: true, + Limit: 1024, + }, + CPU: CPU{ + Enabled: false, + Shares: 16, + }, + + Repositories: Repositories{ + Count: 100, + MemoryBytes: 1024, + }, + }, + }, + { + desc: "old format, cpu only", + configBefore: Config{ + Mountpoint: "some/mountpoint", + HierarchyRoot: "gitaly", + Count: 100, + Memory: Memory{ + Enabled: false, + Limit: 1024, + }, + CPU: CPU{ + Enabled: true, + Shares: 16, + }, + }, + configAfter: Config{ + Mountpoint: "some/mountpoint", + HierarchyRoot: "gitaly", + Count: 100, + Memory: Memory{ + Enabled: false, + Limit: 1024, + }, + CPU: CPU{ + Enabled: true, + Shares: 16, + }, + Repositories: Repositories{ + Count: 100, + CPUShares: 16, + }, + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.desc, func(t *testing.T) { + tc.configBefore.FallbackToOldVersion() + assert.Equal(t, tc.configAfter, tc.configBefore) + }) + } +} diff --git a/internal/gitaly/config/config.go b/internal/gitaly/config/config.go index 505a0fb9db..bad2091fcc 100644 --- a/internal/gitaly/config/config.go +++ b/internal/gitaly/config/config.go @@ -291,6 +291,8 @@ func (cfg *Cfg) setDefaults() error { cfg.Cgroups.HierarchyRoot = "gitaly" } + cfg.Cgroups.FallbackToOldVersion() + return nil } diff --git a/internal/gitaly/config/config_test.go b/internal/gitaly/config/config_test.go index bbb348e601..a5d15003e6 100644 --- a/internal/gitaly/config/config_test.go +++ b/internal/gitaly/config/config_test.go @@ -931,33 +931,6 @@ func TestValidateCgroups(t *testing.T) { }, }, }, - { - name: "memory limit - negative", - rawCfg: `[cgroups] - count = 10 - mountpoint = "/sys/fs/cgroup" - hierarchy_root = "gitaly" - [cgroups.memory] - enabled = true - limit = -5 - [cgroups.cpu] - enabled = true - shares = 512 - `, - expect: cgroups.Config{ - Count: 10, - Mountpoint: "/sys/fs/cgroup", - HierarchyRoot: "gitaly", - Memory: cgroups.Memory{ - Enabled: true, - Limit: -5, - }, - CPU: cgroups.CPU{ - Enabled: true, - Shares: 512, - }, - }, - }, { name: "repositories - zero count", rawCfg: `[cgroups] -- GitLab From 07dc211c8d457e82cdc682bea35a4b3e7c2181e3 Mon Sep 17 00:00:00 2001 From: John Cai Date: Wed, 13 Apr 2022 15:25:06 -0400 Subject: [PATCH 2/3] cgroups: Repository isolated cgroups In be0ee06 (Introduce new Cgroups config 2022-04-17) we introduced a new Cgroups configuration that supports repository-isolation for command execution. In this change, we implement the repository-based isolation cgroups. Commands are placed into cgroups based on the repository path, so that all commands for a given repository will end up in the same cgroup. This helps to prevent one user from potentially starving the whole system of memory or CPU. Changelog: changed --- internal/cgroups/cgroups.go | 6 +- internal/cgroups/cgroups_linux_test.go | 2 +- internal/cgroups/mock_linux_test.go | 5 +- internal/cgroups/noop.go | 3 +- internal/cgroups/v1_linux.go | 148 ++++++++++++-------- internal/cgroups/v1_linux_test.go | 76 ++++++---- internal/git/command_factory.go | 2 +- internal/git/command_factory_cgroup_test.go | 7 +- internal/gitaly/config/config_test.go | 19 +++ 9 files changed, 169 insertions(+), 99 deletions(-) diff --git a/internal/cgroups/cgroups.go b/internal/cgroups/cgroups.go index dc0cf00663..55e603de95 100644 --- a/internal/cgroups/cgroups.go +++ b/internal/cgroups/cgroups.go @@ -3,6 +3,7 @@ package cgroups import ( "github.com/prometheus/client_golang/prometheus" "gitlab.com/gitlab-org/gitaly/v14/internal/command" + "gitlab.com/gitlab-org/gitaly/v14/internal/git/repository" "gitlab.com/gitlab-org/gitaly/v14/internal/gitaly/config/cgroups" ) @@ -13,7 +14,7 @@ type Manager interface { // instance of the Manager. Setup() error // AddCommand adds a Command to a cgroup - AddCommand(*command.Command) error + AddCommand(*command.Command, repository.GitRepo) error // Cleanup cleans up cgroups created in Setup. // It is expected to be called once at Gitaly shutdown from any // instance of the Manager. @@ -24,8 +25,7 @@ type Manager interface { // NewManager returns the appropriate Cgroups manager func NewManager(cfg cgroups.Config) Manager { - // nolint:staticcheck // we will deprecate the old cgroups config in 15.0 - if cfg.Count > 0 { + if cfg.Repositories.Count > 0 { return newV1Manager(cfg) } diff --git a/internal/cgroups/cgroups_linux_test.go b/internal/cgroups/cgroups_linux_test.go index 9d8267bffc..29246cd769 100644 --- a/internal/cgroups/cgroups_linux_test.go +++ b/internal/cgroups/cgroups_linux_test.go @@ -13,7 +13,7 @@ func TestMain(m *testing.M) { } func TestNewManager(t *testing.T) { - cfg := cgroups.Config{Count: 10} + cfg := cgroups.Config{Repositories: cgroups.Repositories{Count: 10}} require.IsType(t, &CGroupV1Manager{}, &CGroupV1Manager{cfg: cfg}) require.IsType(t, &NoopManager{}, NewManager(cgroups.Config{})) diff --git a/internal/cgroups/mock_linux_test.go b/internal/cgroups/mock_linux_test.go index 620ea06d5d..b941bda236 100644 --- a/internal/cgroups/mock_linux_test.go +++ b/internal/cgroups/mock_linux_test.go @@ -103,9 +103,8 @@ func (m *mockCgroup) setupMockCgroupFiles( require.NoError(t, os.WriteFile(controlFilePath, []byte(content), 0o644)) } - // nolint:staticcheck // we will deprecate the old cgroups config in 15.0 - for shard := uint(0); shard < manager.cfg.Count; shard++ { - shardPath := filepath.Join(cgroupPath, fmt.Sprintf("shard-%d", shard)) + for shard := uint(0); shard < manager.cfg.Repositories.Count; shard++ { + shardPath := filepath.Join(cgroupPath, fmt.Sprintf("repos-%d", shard)) require.NoError(t, os.MkdirAll(shardPath, 0o755)) for filename, content := range contentByFilename { diff --git a/internal/cgroups/noop.go b/internal/cgroups/noop.go index 57f5529021..ff4ca2e492 100644 --- a/internal/cgroups/noop.go +++ b/internal/cgroups/noop.go @@ -3,6 +3,7 @@ package cgroups import ( "github.com/prometheus/client_golang/prometheus" "gitlab.com/gitlab-org/gitaly/v14/internal/command" + "gitlab.com/gitlab-org/gitaly/v14/internal/git/repository" ) // NoopManager is a cgroups manager that does nothing @@ -14,7 +15,7 @@ func (cg *NoopManager) Setup() error { } //nolint: revive,stylecheck // This is unintentionally missing documentation. -func (cg *NoopManager) AddCommand(cmd *command.Command) error { +func (cg *NoopManager) AddCommand(cmd *command.Command, repo repository.GitRepo) error { return nil } diff --git a/internal/cgroups/v1_linux.go b/internal/cgroups/v1_linux.go index 75573e96a9..f36d461d27 100644 --- a/internal/cgroups/v1_linux.go +++ b/internal/cgroups/v1_linux.go @@ -10,6 +10,7 @@ import ( specs "github.com/opencontainers/runtime-spec/specs-go" "github.com/prometheus/client_golang/prometheus" "gitlab.com/gitlab-org/gitaly/v14/internal/command" + "gitlab.com/gitlab-org/gitaly/v14/internal/git/repository" cgroupscfg "gitlab.com/gitlab-org/gitaly/v14/internal/gitaly/config/cgroups" "gitlab.com/gitlab-org/gitaly/v14/internal/log" ) @@ -54,25 +55,41 @@ func newV1Manager(cfg cgroupscfg.Config) *CGroupV1Manager { //nolint: revive,stylecheck // This is unintentionally missing documentation. func (cg *CGroupV1Manager) Setup() error { - resources := &specs.LinuxResources{} + var parentResources specs.LinuxResources - if cg.cfg.CPU.Enabled { - resources.CPU = &specs.LinuxCPU{ - Shares: &cg.cfg.CPU.Shares, - } + if cg.cfg.CPUShares > 0 { + parentResources.CPU = &specs.LinuxCPU{Shares: &cg.cfg.CPUShares} } - if cg.cfg.Memory.Enabled { - resources.Memory = &specs.LinuxMemory{ - Limit: &cg.cfg.Memory.Limit, - } + if cg.cfg.MemoryBytes > 0 { + parentResources.Memory = &specs.LinuxMemory{Limit: &cg.cfg.MemoryBytes} } - // nolint:staticcheck // we will deprecate the old cgroups config in 15.0 - for i := 0; i < int(cg.cfg.Count); i++ { - _, err := cgroups.New(cg.hierarchy, cgroups.StaticPath(cg.cgroupPath(i)), resources) - if err != nil { - return fmt.Errorf("failed creating cgroup: %w", err) + if _, err := cgroups.New( + cg.hierarchy, + cgroups.StaticPath(cg.currentProcessCgroup()), + &parentResources, + ); err != nil { + return fmt.Errorf("failed creating parent cgroup: %w", err) + } + + var reposResources specs.LinuxResources + + if cg.cfg.Repositories.CPUShares > 0 { + reposResources.CPU = &specs.LinuxCPU{Shares: &cg.cfg.Repositories.CPUShares} + } + + if cg.cfg.Repositories.MemoryBytes > 0 { + reposResources.Memory = &specs.LinuxMemory{Limit: &cg.cfg.Repositories.MemoryBytes} + } + + for i := 0; i < int(cg.cfg.Repositories.Count); i++ { + if _, err := cgroups.New( + cg.hierarchy, + cgroups.StaticPath(cg.repoPath(i)), + &reposResources, + ); err != nil { + return fmt.Errorf("failed creating repository cgroup: %w", err) } } @@ -80,20 +97,30 @@ func (cg *CGroupV1Manager) Setup() error { } // AddCommand adds the given command to one of the CGroup's buckets. The bucket used for the command -// is determined by hashing the commands arguments. No error is returned if the command has already +// is determined by hashing the repository storage and path. No error is returned if the command has already // exited. -func (cg *CGroupV1Manager) AddCommand(cmd *command.Command) error { - checksum := crc32.ChecksumIEEE([]byte(strings.Join(cmd.Args(), ""))) - // nolint:staticcheck // we will deprecate the old cgroups config in 15.0 - groupID := uint(checksum) % cg.cfg.Count - cgroupPath := cg.cgroupPath(int(groupID)) +func (cg *CGroupV1Manager) AddCommand( + cmd *command.Command, + repo repository.GitRepo, +) error { + checksum := crc32.ChecksumIEEE( + []byte(strings.Join([]string{repo.GetStorageName(), repo.GetRelativePath()}, "")), + ) + groupID := uint(checksum) % cg.cfg.Repositories.Count + cgroupPath := cg.repoPath(int(groupID)) + + cmd.SetCgroupPath(cgroupPath) + return cg.addToCgroup(cmd.Pid(), cgroupPath) +} + +func (cg *CGroupV1Manager) addToCgroup(pid int, cgroupPath string) error { control, err := cgroups.Load(cg.hierarchy, cgroups.StaticPath(cgroupPath)) if err != nil { return fmt.Errorf("failed loading %s cgroup: %w", cgroupPath, err) } - if err := control.Add(cgroups.Process{Pid: cmd.Pid()}); err != nil { + if err := control.Add(cgroups.Process{Pid: pid}); err != nil { // Command could finish so quickly before we can add it to a cgroup, so // we don't consider it an error. if strings.Contains(err.Error(), "no such process") { @@ -102,52 +129,55 @@ func (cg *CGroupV1Manager) AddCommand(cmd *command.Command) error { return fmt.Errorf("failed adding process to cgroup: %w", err) } - cmd.SetCgroupPath(cgroupPath) - return nil } // Collect collects metrics from the cgroups controller func (cg *CGroupV1Manager) Collect(ch chan<- prometheus.Metric) { - path := cg.currentProcessCgroup() - logger := log.Default().WithField("cgroup_path", path) - control, err := cgroups.Load(cg.hierarchy, cgroups.StaticPath(path)) - if err != nil { - logger.WithError(err).Warn("unable to load cgroup controller") - return - } + for i := 0; i < int(cg.cfg.Repositories.Count); i++ { + repoPath := cg.repoPath(i) + logger := log.Default().WithField("cgroup_path", repoPath) + control, err := cgroups.Load( + cg.hierarchy, + cgroups.StaticPath(repoPath), + ) + if err != nil { + logger.WithError(err).Warn("unable to load cgroup controller") + return + } - if metrics, err := control.Stat(); err != nil { - logger.WithError(err).Warn("unable to get cgroup stats") - } else { - memoryMetric := cg.memoryFailedTotal.WithLabelValues(path) - memoryMetric.Set(float64(metrics.Memory.Usage.Failcnt)) - ch <- memoryMetric + if metrics, err := control.Stat(); err != nil { + logger.WithError(err).Warn("unable to get cgroup stats") + } else { + memoryMetric := cg.memoryFailedTotal.WithLabelValues(repoPath) + memoryMetric.Set(float64(metrics.Memory.Usage.Failcnt)) + ch <- memoryMetric - cpuUserMetric := cg.cpuUsage.WithLabelValues(path, "user") - cpuUserMetric.Set(float64(metrics.CPU.Usage.User)) - ch <- cpuUserMetric + cpuUserMetric := cg.cpuUsage.WithLabelValues(repoPath, "user") + cpuUserMetric.Set(float64(metrics.CPU.Usage.User)) + ch <- cpuUserMetric - cpuKernelMetric := cg.cpuUsage.WithLabelValues(path, "kernel") - cpuKernelMetric.Set(float64(metrics.CPU.Usage.Kernel)) - ch <- cpuKernelMetric - } + cpuKernelMetric := cg.cpuUsage.WithLabelValues(repoPath, "kernel") + cpuKernelMetric.Set(float64(metrics.CPU.Usage.Kernel)) + ch <- cpuKernelMetric + } - if subsystems, err := cg.hierarchy(); err != nil { - logger.WithError(err).Warn("unable to get cgroup hierarchy") - } else { - for _, subsystem := range subsystems { - processes, err := control.Processes(subsystem.Name(), true) - if err != nil { - logger.WithField("subsystem", subsystem.Name()). - WithError(err). - Warn("unable to get process list") - continue + if subsystems, err := cg.hierarchy(); err != nil { + logger.WithError(err).Warn("unable to get cgroup hierarchy") + } else { + for _, subsystem := range subsystems { + processes, err := control.Processes(subsystem.Name(), true) + if err != nil { + logger.WithField("subsystem", subsystem.Name()). + WithError(err). + Warn("unable to get process list") + continue + } + + procsMetric := cg.procs.WithLabelValues(repoPath, string(subsystem.Name())) + procsMetric.Set(float64(len(processes))) + ch <- procsMetric } - - procsMetric := cg.procs.WithLabelValues(path, string(subsystem.Name())) - procsMetric.Set(float64(len(processes))) - ch <- procsMetric } } } @@ -173,8 +203,8 @@ func (cg *CGroupV1Manager) Cleanup() error { return nil } -func (cg *CGroupV1Manager) cgroupPath(groupID int) string { - return fmt.Sprintf("/%s/shard-%d", cg.currentProcessCgroup(), groupID) +func (cg *CGroupV1Manager) repoPath(groupID int) string { + return fmt.Sprintf("%s/repos-%d", cg.currentProcessCgroup(), groupID) } func (cg *CGroupV1Manager) currentProcessCgroup() string { diff --git a/internal/cgroups/v1_linux_test.go b/internal/cgroups/v1_linux_test.go index 7de3d910d5..3479733740 100644 --- a/internal/cgroups/v1_linux_test.go +++ b/internal/cgroups/v1_linux_test.go @@ -20,19 +20,16 @@ import ( "gitlab.com/gitlab-org/gitaly/v14/internal/command" "gitlab.com/gitlab-org/gitaly/v14/internal/gitaly/config/cgroups" "gitlab.com/gitlab-org/gitaly/v14/internal/testhelper" + "gitlab.com/gitlab-org/gitaly/v14/proto/go/gitalypb" ) func defaultCgroupsConfig() cgroups.Config { return cgroups.Config{ - Count: 3, HierarchyRoot: "gitaly", - CPU: cgroups.CPU{ - Enabled: true, - Shares: 256, - }, - Memory: cgroups.Memory{ - Enabled: true, - Limit: 1024000, + Repositories: cgroups.Repositories{ + Count: 3, + MemoryBytes: 1024000, + CPUShares: 256, }, } } @@ -48,14 +45,14 @@ func TestSetup(t *testing.T) { for i := 0; i < 3; i++ { memoryPath := filepath.Join( - mock.root, "memory", "gitaly", fmt.Sprintf("gitaly-%d", os.Getpid()), fmt.Sprintf("shard-%d", i), "memory.limit_in_bytes", + mock.root, "memory", "gitaly", fmt.Sprintf("gitaly-%d", os.Getpid()), fmt.Sprintf("repos-%d", i), "memory.limit_in_bytes", ) memoryContent := readCgroupFile(t, memoryPath) require.Equal(t, string(memoryContent), "1024000") cpuPath := filepath.Join( - mock.root, "cpu", "gitaly", fmt.Sprintf("gitaly-%d", os.Getpid()), fmt.Sprintf("shard-%d", i), "cpu.shares", + mock.root, "cpu", "gitaly", fmt.Sprintf("gitaly-%d", os.Getpid()), fmt.Sprintf("repos-%d", i), "cpu.shares", ) cpuContent := readCgroupFile(t, cpuPath) @@ -66,7 +63,16 @@ func TestSetup(t *testing.T) { func TestAddCommand(t *testing.T) { mock := newMock(t) + repo := &gitalypb.Repository{ + StorageName: "default", + RelativePath: "path/to/repo.git", + } + config := defaultCgroupsConfig() + config.Repositories.Count = 1 + config.Repositories.MemoryBytes = 1024 + config.Repositories.CPUShares = 16 + v1Manager1 := &CGroupV1Manager{ cfg: config, hierarchy: mock.hierarchy, @@ -83,14 +89,15 @@ func TestAddCommand(t *testing.T) { cfg: config, hierarchy: mock.hierarchy, } - require.NoError(t, v1Manager2.AddCommand(cmd2)) - checksum := crc32.ChecksumIEEE([]byte(strings.Join(cmd2.Args(), ""))) - // nolint:staticcheck // we will deprecate the old cgroups config in 15.0 - groupID := uint(checksum) % config.Count + require.NoError(t, v1Manager2.AddCommand(cmd2, repo)) + + checksum := crc32.ChecksumIEEE([]byte(strings.Join([]string{"default", "path/to/repo.git"}, ""))) + groupID := uint(checksum) % config.Repositories.Count for _, s := range mock.subsystems { - path := filepath.Join(mock.root, string(s.Name()), "gitaly", fmt.Sprintf("gitaly-%d", os.Getpid()), fmt.Sprintf("shard-%d", groupID), "cgroup.procs") + path := filepath.Join(mock.root, string(s.Name()), "gitaly", + fmt.Sprintf("gitaly-%d", os.Getpid()), fmt.Sprintf("repos-%d", groupID), "cgroup.procs") content := readCgroupFile(t, path) pid, err := strconv.Atoi(string(content)) @@ -111,8 +118,8 @@ func TestCleanup(t *testing.T) { require.NoError(t, v1Manager.Cleanup()) for i := 0; i < 3; i++ { - memoryPath := filepath.Join(mock.root, "memory", "gitaly", fmt.Sprintf("gitaly-%d", os.Getpid()), fmt.Sprintf("shard-%d", i)) - cpuPath := filepath.Join(mock.root, "cpu", "gitaly", fmt.Sprintf("gitaly-%d", os.Getpid()), fmt.Sprintf("shard-%d", i)) + memoryPath := filepath.Join(mock.root, "memory", "gitaly", fmt.Sprintf("gitaly-%d", os.Getpid()), fmt.Sprintf("repos-%d", i)) + cpuPath := filepath.Join(mock.root, "cpu", "gitaly", fmt.Sprintf("gitaly-%d", os.Getpid()), fmt.Sprintf("repos-%d", i)) require.NoDirExists(t, memoryPath) require.NoDirExists(t, cpuPath) @@ -121,8 +128,16 @@ func TestCleanup(t *testing.T) { func TestMetrics(t *testing.T) { mock := newMock(t) + repo := &gitalypb.Repository{ + StorageName: "default", + RelativePath: "path/to/repo.git", + } config := defaultCgroupsConfig() + config.Repositories.Count = 1 + config.Repositories.MemoryBytes = 1048576 + config.Repositories.CPUShares = 16 + v1Manager1 := newV1Manager(config) v1Manager1.hierarchy = mock.hierarchy @@ -135,14 +150,20 @@ func TestMetrics(t *testing.T) { logger.SetLevel(logrus.DebugLevel) ctx = ctxlogrus.ToContext(ctx, logrus.NewEntry(logger)) - cmd1 := exec.Command("ls", "-hal", ".") - cmd2, err := command.New(ctx, cmd1, nil, nil, nil) + cmd, err := command.New(ctx, exec.Command("ls", "-hal", "."), nil, nil, nil) + require.NoError(t, err) + gitCmd1, err := command.New(ctx, exec.Command("ls", "-hal", "."), nil, nil, nil) + require.NoError(t, err) + gitCmd2, err := command.New(ctx, exec.Command("ls", "-hal", "."), nil, nil, nil) require.NoError(t, err) - require.NoError(t, v1Manager1.AddCommand(cmd2)) - require.NoError(t, cmd2.Wait()) + require.NoError(t, v1Manager1.AddCommand(cmd, repo)) + require.NoError(t, v1Manager1.AddCommand(gitCmd1, repo)) + require.NoError(t, v1Manager1.AddCommand(gitCmd2, repo)) + require.NoError(t, cmd.Wait()) + require.NoError(t, gitCmd1.Wait()) - processCgroupPath := v1Manager1.currentProcessCgroup() + repoCgroupPath := filepath.Join(v1Manager1.currentProcessCgroup(), "repos-0") expected := bytes.NewBufferString(fmt.Sprintf(`# HELP gitaly_cgroup_cpu_usage CPU Usage of Cgroup # TYPE gitaly_cgroup_cpu_usage gauge @@ -153,21 +174,18 @@ gitaly_cgroup_cpu_usage{path="%s",type="user"} 0 gitaly_cgroup_memory_failed_total{path="%s"} 2 # HELP gitaly_cgroup_procs_total Total number of procs # TYPE gitaly_cgroup_procs_total gauge -gitaly_cgroup_procs_total{path="%s",subsystem="memory"} 1 gitaly_cgroup_procs_total{path="%s",subsystem="cpu"} 1 -`, processCgroupPath, processCgroupPath, processCgroupPath, processCgroupPath, processCgroupPath)) +gitaly_cgroup_procs_total{path="%s",subsystem="memory"} 1 +`, repoCgroupPath, repoCgroupPath, repoCgroupPath, repoCgroupPath, repoCgroupPath)) assert.NoError(t, testutil.CollectAndCompare( v1Manager1, - expected, - "gitaly_cgroup_memory_failed_total", - "gitaly_cgroup_cpu_usage", - "gitaly_cgroup_procs_total")) + expected)) logEntry := hook.LastEntry() assert.Contains( t, logEntry.Data["command.cgroup_path"], - processCgroupPath, + repoCgroupPath, "log field includes a cgroup path that is a subdirectory of the current process' cgroup path", ) } diff --git a/internal/git/command_factory.go b/internal/git/command_factory.go index a9544a46ca..211f40dbde 100644 --- a/internal/git/command_factory.go +++ b/internal/git/command_factory.go @@ -378,7 +378,7 @@ func (cf *ExecCommandFactory) newCommand(ctx context.Context, repo repository.Gi command.SetMetricsSubCmd(sc.Subcommand()) if featureflag.RunCommandsInCGroup.IsEnabled(ctx) { - if err := cf.cgroupsManager.AddCommand(command); err != nil { + if err := cf.cgroupsManager.AddCommand(command, repo); err != nil { return nil, err } } diff --git a/internal/git/command_factory_cgroup_test.go b/internal/git/command_factory_cgroup_test.go index 922e3d12ab..813961586b 100644 --- a/internal/git/command_factory_cgroup_test.go +++ b/internal/git/command_factory_cgroup_test.go @@ -9,6 +9,7 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "gitlab.com/gitlab-org/gitaly/v14/internal/command" + "gitlab.com/gitlab-org/gitaly/v14/internal/git/repository" "gitlab.com/gitlab-org/gitaly/v14/internal/gitaly/config" "gitlab.com/gitlab-org/gitaly/v14/internal/gitaly/config/cgroups" "gitlab.com/gitlab-org/gitaly/v14/internal/metadata/featureflag" @@ -23,7 +24,7 @@ func (m *mockCgroupsManager) Setup() error { return nil } -func (m *mockCgroupsManager) AddCommand(c *command.Command) error { +func (m *mockCgroupsManager) AddCommand(c *command.Command, repo repository.GitRepo) error { m.commands = append(m.commands, c) return nil } @@ -41,7 +42,9 @@ func TestNewCommandAddsToCgroup(t *testing.T) { cfg := config.Cfg{ SocketPath: "/path/to/socket", Cgroups: cgroups.Config{ - Count: 1, + Repositories: cgroups.Repositories{ + Count: 1, + }, }, Storages: []config.Storage{{ Name: "storage-1", diff --git a/internal/gitaly/config/config_test.go b/internal/gitaly/config/config_test.go index a5d15003e6..2bc2995065 100644 --- a/internal/gitaly/config/config_test.go +++ b/internal/gitaly/config/config_test.go @@ -851,6 +851,11 @@ func TestValidateCgroups(t *testing.T) { Enabled: true, Shares: 512, }, + Repositories: cgroups.Repositories{ + Count: 10, + MemoryBytes: 1024, + CPUShares: 512, + }, }, }, { @@ -864,6 +869,9 @@ func TestValidateCgroups(t *testing.T) { Count: 10, Mountpoint: "/sys/fs/cgroup", HierarchyRoot: "baz", + Repositories: cgroups.Repositories{ + Count: 10, + }, }, }, { @@ -876,6 +884,9 @@ func TestValidateCgroups(t *testing.T) { Count: 10, Mountpoint: "/sys/fs/cgroup", HierarchyRoot: "gitaly", + Repositories: cgroups.Repositories{ + Count: 10, + }, }, }, { @@ -902,6 +913,10 @@ func TestValidateCgroups(t *testing.T) { Enabled: true, Shares: 0, }, + Repositories: cgroups.Repositories{ + Count: 10, + MemoryBytes: 1024, + }, }, }, { @@ -929,6 +944,10 @@ func TestValidateCgroups(t *testing.T) { Enabled: true, Shares: 512, }, + Repositories: cgroups.Repositories{ + Count: 10, + CPUShares: 512, + }, }, }, { -- GitLab From 56db80e0e0ca58df83fbc9b97b51773835692ea3 Mon Sep 17 00:00:00 2001 From: John Cai Date: Fri, 13 May 2022 17:18:00 -0400 Subject: [PATCH 3/3] cgroups: Add delimiter for key The cgroups key did not have a delimeter. This can potentially cause key collisions between two different sets of storage/repository path. Add a "/" as a delimeter in the key that gets hashed. --- internal/cgroups/v1_linux.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/cgroups/v1_linux.go b/internal/cgroups/v1_linux.go index f36d461d27..49802bcb89 100644 --- a/internal/cgroups/v1_linux.go +++ b/internal/cgroups/v1_linux.go @@ -104,7 +104,7 @@ func (cg *CGroupV1Manager) AddCommand( repo repository.GitRepo, ) error { checksum := crc32.ChecksumIEEE( - []byte(strings.Join([]string{repo.GetStorageName(), repo.GetRelativePath()}, "")), + []byte(repo.GetStorageName() + "/" + repo.GetRelativePath()), ) groupID := uint(checksum) % cg.cfg.Repositories.Count cgroupPath := cg.repoPath(int(groupID)) -- GitLab