From 9dfca6bdbad32b91678d54e69ace1774c4d07182 Mon Sep 17 00:00:00 2001 From: Bilal Akhtar Date: Wed, 2 Dec 2020 13:41:39 -0500 Subject: [PATCH] cli: Set GOMAXPROCS on start if in a CPU-limited cgroup Having a GOMAXPROCS value defaulting to the Go default (numCPUs) is inefficient if there's a lower CPU limit set for the cgroup the cockroach process is in. The Go runtime could be scheduling multiple OS-level threads, not all of which would get to run concurrently. This change sees if the GOMAXPROCS env variable was set, denoting an overriden value. If it isn't set, and we're inside a cgroup, the value of GOMAXPROCS is now lowered to the maximum This is an implementation of part of the advice given in https://github.com/cockroachdb/docs/issues/9001 . The CPU requests/shares part of that equation cannot be implemented in Cockroach, so the docs issue remains unchanged. Release note (general change): Run fewer threads in parallel if running inside a container with a CPU limit. --- pkg/cli/BUILD.bazel | 1 + pkg/cli/start.go | 6 ++++++ pkg/testutils/lint/lint_test.go | 1 + pkg/util/cgroups/BUILD.bazel | 5 ++++- pkg/util/cgroups/cgroups.go | 19 +++++++++++++++++++ 5 files changed, 31 insertions(+), 1 deletion(-) diff --git a/pkg/cli/BUILD.bazel b/pkg/cli/BUILD.bazel index b4529767da28..9a23d01143a2 100644 --- a/pkg/cli/BUILD.bazel +++ b/pkg/cli/BUILD.bazel @@ -136,6 +136,7 @@ go_library( "//pkg/storage/enginepb", "//pkg/ts/tspb", "//pkg/util", + "//pkg/util/cgroups", "//pkg/util/contextutil", "//pkg/util/encoding", "//pkg/util/encoding/csv", diff --git a/pkg/cli/start.go b/pkg/cli/start.go index 41bf6bdd20c8..80311007eb38 100644 --- a/pkg/cli/start.go +++ b/pkg/cli/start.go @@ -39,6 +39,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/settings/cluster" "github.com/cockroachdb/cockroach/pkg/storage" "github.com/cockroachdb/cockroach/pkg/storage/enginepb" + "github.com/cockroachdb/cockroach/pkg/util/cgroups" "github.com/cockroachdb/cockroach/pkg/util/envutil" "github.com/cockroachdb/cockroach/pkg/util/grpcutil" "github.com/cockroachdb/cockroach/pkg/util/hlc" @@ -359,6 +360,11 @@ func runStart(cmd *cobra.Command, args []string, startSingleNode bool) (returnEr // but when actually starting a server, we enable them. grpcutil.LowerSeverity(severity.WARNING) + // Tweak GOMAXPROCS if we're in a cgroup / container that has cpu limits set. + // The GO default for GOMAXPROCS is runtime.NumCPU(), however this is less + // than ideal if the cgruop is limited to a number lower than that. + cgroups.AdjustMaxProcs(ctx) + // Check the --join flag. if !flagSetForCmd(cmd).Lookup(cliflags.Join.Name).Changed { err := errors.WithHint( diff --git a/pkg/testutils/lint/lint_test.go b/pkg/testutils/lint/lint_test.go index c8638cff79b6..8432e765f221 100644 --- a/pkg/testutils/lint/lint_test.go +++ b/pkg/testutils/lint/lint_test.go @@ -446,6 +446,7 @@ func TestLint(t *testing.T) { ":!ccl/workloadccl/fixture_test.go", ":!internal/gopath/gopath.go", ":!cmd", + ":!util/cgroups/cgroups.go", ":!nightly", ":!testutils/lint", ":!util/envutil/env.go", diff --git a/pkg/util/cgroups/BUILD.bazel b/pkg/util/cgroups/BUILD.bazel index 05d93030bd27..097c152962d0 100644 --- a/pkg/util/cgroups/BUILD.bazel +++ b/pkg/util/cgroups/BUILD.bazel @@ -5,7 +5,10 @@ go_library( srcs = ["cgroups.go"], importpath = "github.com/cockroachdb/cockroach/pkg/util/cgroups", visibility = ["//visibility:public"], - deps = ["//vendor/github.com/cockroachdb/errors"], + deps = [ + "//pkg/util/log", + "//vendor/github.com/cockroachdb/errors", + ], ) go_test( diff --git a/pkg/util/cgroups/cgroups.go b/pkg/util/cgroups/cgroups.go index 5ccb8cd858f5..37dc6fe4cc97 100644 --- a/pkg/util/cgroups/cgroups.go +++ b/pkg/util/cgroups/cgroups.go @@ -13,6 +13,7 @@ package cgroups import ( "bufio" "bytes" + "context" "fmt" "io/ioutil" "math" @@ -22,6 +23,7 @@ import ( "strconv" "strings" + "github.com/cockroachdb/cockroach/pkg/util/log" "github.com/cockroachdb/errors" ) @@ -444,3 +446,20 @@ func getCgroupCPU(root string) (CPUUsage, error) { return res, nil } + +// AdjustMaxProcs sets GOMAXPROCS (if not overridden by env variables) to be +// the CPU limit of the current cgroup, if running inside a cgroup with a cpu +// limit lower than runtime.NumCPU(). This is preferable to letting it fall back +// to Go default, which is runtime.NumCPU(), as the Go scheduler would be +// running more OS-level threads than can ever be concurrently scheduled. +func AdjustMaxProcs(ctx context.Context) { + if _, set := os.LookupEnv("GOMAXPROCS"); !set { + if cpuInfo, err := GetCgroupCPU(); err == nil { + numCPUToUse := int(math.Ceil(cpuInfo.CPUShares())) + if numCPUToUse < runtime.NumCPU() && numCPUToUse > 0 { + log.Infof(ctx, "running in a container; setting GOMAXPROCS to %d", numCPUToUse) + runtime.GOMAXPROCS(numCPUToUse) + } + } + } +}