Skip to content

Commit

Permalink
util, server: Add GetCgroupCPU, use it to reflect cpu usage
Browse files Browse the repository at this point in the history
Enhances the util/cgroups package to also support getting
CPU limits from the process' current cgroup. The cpu limit / share
for the current cgroup is denoted in two separate variables:
the cpu period, and the cpu quota. When (quota / period) = numCPUs,
this cgroup has access to all the CPUs on the system.

This PR also updates SampleEnvironment to call this method
and adjust the cpu usage % accordingly.

Release note (bug fix): Improve accuracy of reported CPU usage
when running in containers.
  • Loading branch information
itsbilal committed Nov 17, 2020
1 parent 4e6e745 commit 3a5c37c
Show file tree
Hide file tree
Showing 3 changed files with 467 additions and 17 deletions.
5 changes: 4 additions & 1 deletion pkg/server/status/runtime.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"time"

"github.com/cockroachdb/cockroach/pkg/build"
"github.com/cockroachdb/cockroach/pkg/util/cgroups"
"github.com/cockroachdb/cockroach/pkg/util/hlc"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/metric"
Expand Down Expand Up @@ -440,6 +441,8 @@ func (rsr *RuntimeStatSampler) SampleEnvironment(
if err := cpuTime.Get(pid); err != nil {
log.Errorf(ctx, "unable to get cpu usage: %v", err)
}
cgroupCPU, _ := cgroups.GetCgroupCPU()
cpuShare := cgroupCPU.CPUShares()

fds := gosigar.ProcFDUsage{}
if err := fds.Get(pid); err != nil {
Expand Down Expand Up @@ -500,7 +503,7 @@ func (rsr *RuntimeStatSampler) SampleEnvironment(
stime := int64(cpuTime.Sys) * 1e6
uPerc := float64(utime-rsr.last.utime) / dur
sPerc := float64(stime-rsr.last.stime) / dur
combinedNormalizedPerc := (sPerc + uPerc) / float64(runtime.NumCPU())
combinedNormalizedPerc := (sPerc + uPerc) / cpuShare
gcPausePercent := float64(uint64(gc.PauseTotal)-rsr.last.gcPauseTime) / dur
rsr.last.now = now
rsr.last.utime = utime
Expand Down
244 changes: 228 additions & 16 deletions pkg/util/cgroups/cgroups.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,22 @@ import (
"math"
"os"
"path/filepath"
"runtime"
"strconv"
"strings"

"github.com/cockroachdb/errors"
)

const (
cgroupV1MemLimitFilename = "memory.stat"
cgroupV2MemLimitFilename = "memory.max"
cgroupV1MemLimitFilename = "memory.stat"
cgroupV2MemLimitFilename = "memory.max"
cgroupV1CPUQuotaFilename = "cpu.cfs_quota_us"
cgroupV1CPUPeriodFilename = "cpu.cfs_period_us"
cgroupV1CPUSysUsageFilename = "cpuacct.usage_sys"
cgroupV1CPUUserUsageFilename = "cpuacct.usage_user"
cgroupV2CPUMaxFilename = "cpu.max"
cgroupV2CPUStatFilename = "cpu.stat"
)

// GetMemoryLimit attempts to retrieve the cgroup memory limit for the current
Expand All @@ -38,7 +46,7 @@ func GetMemoryLimit() (limit int64, warnings string, err error) {
// cgroup memory limit detection path implemented here as
// /proc/self/cgroup file -> /proc/self/mountinfo mounts -> cgroup version -> version specific limit check
func getCgroupMem(root string) (limit int64, warnings string, err error) {
path, err := detectMemCntrlPath(filepath.Join(root, "/proc/self/cgroup"))
path, err := detectCntrlPath(filepath.Join(root, "/proc/self/cgroup"), "memory")
if err != nil {
return 0, "", err
}
Expand All @@ -48,25 +56,153 @@ func getCgroupMem(root string) (limit int64, warnings string, err error) {
return 0, "no cgroup memory controller detected", nil
}

mount, ver, err := getCgroupDetails(filepath.Join(root, "/proc/self/mountinfo"), path)
mount, ver, err := getCgroupDetails(filepath.Join(root, "/proc/self/mountinfo"), path, "memory")
if err != nil {
return 0, "", err
}

switch ver {
case 1:
limit, warnings, err = detectLimitInV1(filepath.Join(root, mount))
limit, warnings, err = detectMemLimitInV1(filepath.Join(root, mount))
case 2:
limit, warnings, err = detectLimitInV2(filepath.Join(root, mount, path))
limit, warnings, err = detectMemLimitInV2(filepath.Join(root, mount, path))
default:
limit, err = 0, fmt.Errorf("detected unknown cgroup version index: %d", ver)
}

return limit, warnings, err
}

func readFile(filepath string) (res []byte, err error) {
var f *os.File
f, err = os.Open(filepath)
if err != nil {
return nil, err
}
defer func() {
err = errors.CombineErrors(err, f.Close())
}()
res, err = ioutil.ReadAll(f)
return res, err
}

func cgroupFileToUint64(filepath, desc string) (res uint64, err error) {
contents, err := readFile(filepath)
if err != nil {
return 0, errors.Wrapf(err, "error when reading %s from cgroup v1 at %s", desc, filepath)
}
res, err = strconv.ParseUint(string(bytes.TrimSpace(contents)), 10, 64)
if err != nil {
return 0, errors.Wrapf(err, "error when parsing %s from cgroup v1 at %s", desc, filepath)
}
return res, err
}

func cgroupFileToInt64(filepath, desc string) (res int64, err error) {
contents, err := readFile(filepath)
if err != nil {
return 0, errors.Wrapf(err, "error when reading %s from cgroup v1 at %s", desc, filepath)
}
res, err = strconv.ParseInt(string(bytes.TrimSpace(contents)), 10, 64)
if err != nil {
return 0, errors.Wrapf(err, "error when parsing %s from cgroup v1 at %s", desc, filepath)
}
return res, nil
}

func detectCPUQuotaInV1(cRoot string) (period, quota int64, err error) {
quotaFilePath := filepath.Join(cRoot, cgroupV1CPUQuotaFilename)
periodFilePath := filepath.Join(cRoot, cgroupV1CPUPeriodFilename)
quota, err = cgroupFileToInt64(quotaFilePath, "cpu quota")
if err != nil {
return 0, 0, err
}
period, err = cgroupFileToInt64(periodFilePath, "cpu period")
if err != nil {
return 0, 0, err
}

return period, quota, err
}

func detectCPUUsageInV1(cRoot string) (stime, utime uint64, err error) {
sysFilePath := filepath.Join(cRoot, cgroupV1CPUSysUsageFilename)
userFilePath := filepath.Join(cRoot, cgroupV1CPUUserUsageFilename)
stime, err = cgroupFileToUint64(sysFilePath, "cpu system time")
if err != nil {
return 0, 0, err
}
utime, err = cgroupFileToUint64(userFilePath, "cpu user time")
if err != nil {
return 0, 0, err
}

return stime, utime, err
}

func detectCPUQuotaInV2(cRoot string) (period, quota int64, err error) {
maxFilePath := filepath.Join(cRoot, cgroupV2CPUMaxFilename)
contents, err := readFile(maxFilePath)
if err != nil {
return 0, 0, errors.Wrapf(err, "error when read cpu quota from cgroup v2 at %s", maxFilePath)
}
fields := strings.Fields(string(contents))
if len(fields) > 2 || len(fields) == 0 {
return 0, 0, errors.Errorf("unexpected format when reading cpu quota from cgroup v2 at %s: %s", maxFilePath, contents)
}
if fields[0] == "max" {
// Negative quota denotes no limit.
quota = -1
} else {
quota, err = strconv.ParseInt(fields[0], 10, 64)
if err != nil {
return 0, 0, errors.Wrapf(err, "error when reading cpu quota from cgroup v2 at %s", maxFilePath)
}
}
if len(fields) == 2 {
period, err = strconv.ParseInt(fields[1], 10, 64)
if err != nil {
return 0, 0, errors.Wrapf(err, "error when reading cpu period from cgroup v2 at %s", maxFilePath)
}
}
return period, quota, nil
}

func detectCPUUsageInV2(cRoot string) (stime, utime uint64, err error) {
statFilePath := filepath.Join(cRoot, cgroupV2CPUStatFilename)
var stat *os.File
stat, err = os.Open(statFilePath)
if err != nil {
return 0, 0, errors.Wrapf(err, "can't read cpu usage from cgroup v2 at %s", statFilePath)
}
defer func() {
err = errors.CombineErrors(err, stat.Close())
}()

scanner := bufio.NewScanner(stat)
for scanner.Scan() {
fields := bytes.Fields(scanner.Bytes())
if len(fields) != 2 || (string(fields[0]) != "user_usec" && string(fields[0]) != "system_usec") {
continue
}
keyField := string(fields[0])

trimmed := string(bytes.TrimSpace(fields[1]))
usageVar := &stime
if keyField == "user_usec" {
usageVar = &utime
}
*usageVar, err = strconv.ParseUint(trimmed, 10, 64)
if err != nil {
return 0, 0, errors.Wrapf(err, "can't read cpu usage %s from cgroup v1 at %s", keyField, statFilePath)
}
}

return stime, utime, err
}

// Finds memory limit for cgroup V1 via looking in [contoller mount path]/memory.stat
func detectLimitInV1(cRoot string) (limit int64, warnings string, err error) {
func detectMemLimitInV1(cRoot string) (limit int64, warnings string, err error) {
statFilePath := filepath.Join(cRoot, cgroupV1MemLimitFilename)
stat, err := os.Open(statFilePath)
if err != nil {
Expand Down Expand Up @@ -98,7 +234,7 @@ func detectLimitInV1(cRoot string) (limit int64, warnings string, err error) {
// Finds memory limit for cgroup V2 via looking into [controller mount path]/[leaf path]/memory.max
// TODO(vladdy): this implementation was based on podman+criu environment. It may cover not
// all the cases when v2 becomes more widely used in container world.
func detectLimitInV2(cRoot string) (limit int64, warnings string, err error) {
func detectMemLimitInV2(cRoot string) (limit int64, warnings string, err error) {
limitFilePath := filepath.Join(cRoot, cgroupV2MemLimitFilename)

var buf []byte
Expand All @@ -120,10 +256,10 @@ func detectLimitInV2(cRoot string) (limit int64, warnings string, err error) {

// The controller is defined via either type `memory` for cgroup v1 or via empty type for cgroup v2,
// where the type is the second field in /proc/[pid]/cgroup file
func detectMemCntrlPath(cgroupFilePath string) (string, error) {
func detectCntrlPath(cgroupFilePath string, controller string) (string, error) {
cgroup, err := os.Open(cgroupFilePath)
if err != nil {
return "", errors.Wrapf(err, "failed to read memory cgroup from cgroups file: %s", cgroupFilePath)
return "", errors.Wrapf(err, "failed to read %s cgroup from cgroups file: %s", controller, cgroupFilePath)
}
defer func() { _ = cgroup.Close() }()

Expand All @@ -142,7 +278,7 @@ func detectMemCntrlPath(cgroupFilePath string) (string, error) {
// but no known container solutions support it afaik
if f0 == "0" && f1 == "" {
unifiedPathIfFound = string(fields[2])
} else if f1 == "memory" {
} else if f1 == controller {
return string(fields[2]), nil
}
}
Expand All @@ -152,7 +288,7 @@ func detectMemCntrlPath(cgroupFilePath string) (string, error) {

// Reads /proc/[pid]/mountinfo for cgoup or cgroup2 mount which defines the used version.
// See http://man7.org/linux/man-pages/man5/proc.5.html for `mountinfo` format.
func getCgroupDetails(mountinfoPath string, cRoot string) (string, int, error) {
func getCgroupDetails(mountinfoPath string, cRoot string, controller string) (string, int, error) {
info, err := os.Open(mountinfoPath)
if err != nil {
return "", 0, errors.Wrapf(err, "failed to read mounts info from file: %s", mountinfoPath)
Expand All @@ -168,7 +304,7 @@ func getCgroupDetails(mountinfoPath string, cRoot string) (string, int, error) {
continue
}

ver, ok := detectCgroupVersion(fields)
ver, ok := detectCgroupVersion(fields, controller)
if ok && (ver == 1 && string(fields[3]) == cRoot) || ver == 2 {
return string(fields[4]), ver, nil
}
Expand All @@ -178,7 +314,7 @@ func getCgroupDetails(mountinfoPath string, cRoot string) (string, int, error) {
}

// Return version of cgroup mount for memory controller if found
func detectCgroupVersion(fields [][]byte) (_ int, found bool) {
func detectCgroupVersion(fields [][]byte, controller string) (_ int, found bool) {
if len(fields) < 10 {
return 0, false
}
Expand All @@ -201,13 +337,89 @@ func detectCgroupVersion(fields [][]byte) (_ int, found bool) {

pos++

// Check for memory controller specifically in cgroup v1 (it is listed in super options field),
// Check for controller specifically in cgroup v1 (it is listed in super options field),
// as the limit can't be found if it is not enforced
if bytes.Equal(fields[pos], []byte("cgroup")) && bytes.Contains(fields[pos+2], []byte("memory")) {
if bytes.Equal(fields[pos], []byte("cgroup")) && bytes.Contains(fields[pos+2], []byte(controller)) {
return 1, true
} else if bytes.Equal(fields[pos], []byte("cgroup2")) {
return 2, true
}

return 0, false
}

// CPUUsage returns CPU usage and quotas for an entire cgroup.
type CPUUsage struct {
// System time and user time taken by this cgroup or process. In nanoseconds.
Stime, Utime uint64
// CPU period and quota for this process, in microseconds. This cgroup has
// access to up to (quota/period) proportion of CPU resources on the system.
// For instance, if there are 4 CPUs, quota = 150000, period = 100000,
// this cgroup can use around ~1.5 CPUs, or 37.5% of total scheduler time.
// If quota is -1, it's unlimited.
Period, Quota int64
// NumCPUs is the number of CPUs in the system. Always returned even if
// not called from a cgroup.
NumCPU int
}

// CPUShares returns the number of CPUs this cgroup can be expected to
// max out. If there's no limit, NumCPU is returned.
func (c CPUUsage) CPUShares() float64 {
if c.Period <= 0 || c.Quota <= 0 {
return float64(c.NumCPU)
}
return float64(c.Quota) / float64(c.Period)
}

// GetCgroupCPU returns the CPU usage and quota for the current cgroup.
func GetCgroupCPU() (CPUUsage, error) {
cpuusage, err := getCgroupCPU("/")
cpuusage.NumCPU = runtime.NumCPU()
return cpuusage, err
}

// Helper function for getCgroupCPU. Root is always "/", except in tests.
func getCgroupCPU(root string) (CPUUsage, error) {
path, err := detectCntrlPath(filepath.Join(root, "/proc/self/cgroup"), "cpu,cpuacct")
if err != nil {
return CPUUsage{}, err
}

// No CPU controller detected
if path == "" {
return CPUUsage{}, errors.New("no cpu controller detected")
}

mount, ver, err := getCgroupDetails(filepath.Join(root, "/proc/self/mountinfo"), path, "cpu,cpuacct")
if err != nil {
return CPUUsage{}, err
}

var res CPUUsage

switch ver {
case 1:
res.Period, res.Quota, err = detectCPUQuotaInV1(filepath.Join(root, mount))
if err != nil {
return res, err
}
res.Stime, res.Utime, err = detectCPUUsageInV1(filepath.Join(root, mount))
if err != nil {
return res, err
}
case 2:
res.Period, res.Quota, err = detectCPUQuotaInV2(filepath.Join(root, mount, path))
if err != nil {
return res, err
}
res.Stime, res.Utime, err = detectCPUUsageInV2(filepath.Join(root, mount, path))
if err != nil {
return res, err
}
default:
return CPUUsage{}, fmt.Errorf("detected unknown cgroup version index: %d", ver)
}

return res, nil
}
Loading

0 comments on commit 3a5c37c

Please sign in to comment.