Skip to content

Commit

Permalink
Merge pull request #7132 from cockroachdb/marc/rework_mem_stats
Browse files Browse the repository at this point in the history
Rework memory stats logging and metrics.
  • Loading branch information
mberhault authored Jun 9, 2016
2 parents c17e77f + 2a4d67e commit c17928f
Show file tree
Hide file tree
Showing 11 changed files with 147 additions and 41 deletions.
58 changes: 42 additions & 16 deletions server/status/runtime.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,10 @@ import (
const (
nameCgoCalls = "cgocalls"
nameGoroutines = "goroutines"
nameAllocBytes = "allocbytes"
nameSysBytes = "sysbytes"
nameGoAllocBytes = "go.allocbytes"
nameGoTotalBytes = "go.totalbytes"
nameCgoAllocBytes = "cgo.allocbytes"
nameCgoTotalBytes = "cgo.totalbytes"
nameGCCount = "gc.count"
nameGCPauseNS = "gc.pause.ns"
nameGCPausePercent = "gc.pause.percent"
Expand All @@ -44,9 +46,13 @@ const (
nameRSS = "rss"
)

// logBuildStats is a function that logs build-specific stats. We will not necessarily
// have implementations for all builds.
var logBuildStats func()
// getCgoMemStats is a function that fetches stats for the C++ portion of the code.
// We will not necessarily have implementations for all builds, so check for nil first.
// Returns the following:
// allocated uint64: bytes allocated by application
// total uint64: total bytes requested from system
// error : any issues fetching stats. This should be a warning only.
var getCgoMemStats func() (uint64, uint64, error)

// RuntimeStatSampler is used to periodically sample the runtime environment
// for useful statistics, performing some rudimentary calculations and storing
Expand All @@ -68,8 +74,10 @@ type RuntimeStatSampler struct {
// Metric gauges maintained by the sampler.
cgoCalls *metric.Gauge
goroutines *metric.Gauge
allocBytes *metric.Gauge
sysBytes *metric.Gauge
goAllocBytes *metric.Gauge
goTotalBytes *metric.Gauge
cgoAllocBytes *metric.Gauge
cgoTotalBytes *metric.Gauge
gcCount *metric.Gauge
gcPauseNS *metric.Gauge
gcPausePercent *metric.GaugeFloat64
Expand All @@ -88,8 +96,10 @@ func MakeRuntimeStatSampler(clock *hlc.Clock) RuntimeStatSampler {
clock: clock,
cgoCalls: reg.Gauge(nameCgoCalls),
goroutines: reg.Gauge(nameGoroutines),
allocBytes: reg.Gauge(nameAllocBytes),
sysBytes: reg.Gauge(nameSysBytes),
goAllocBytes: reg.Gauge(nameGoAllocBytes),
goTotalBytes: reg.Gauge(nameGoTotalBytes),
cgoAllocBytes: reg.Gauge(nameCgoAllocBytes),
cgoTotalBytes: reg.Gauge(nameCgoTotalBytes),
gcCount: reg.Gauge(nameGCCount),
gcPauseNS: reg.Gauge(nameGCPauseNS),
gcPausePercent: reg.GaugeFloat64(nameGCPausePercent),
Expand Down Expand Up @@ -123,6 +133,9 @@ func (rsr *RuntimeStatSampler) SampleEnvironment() {

// It might be useful to call ReadMemStats() more often, but it stops the
// world while collecting stats so shouldn't be called too often.
// NOTE: the MemStats fields do not get decremented when memory is released,
// to get accurate numbers, be sure to subtract. eg: ms.Sys - ms.HeapReleased for
// current memory reserved.
ms := runtime.MemStats{}
runtime.ReadMemStats(&ms)

Expand Down Expand Up @@ -153,24 +166,37 @@ func (rsr *RuntimeStatSampler) SampleEnvironment() {
rsr.lastStime = newStime
rsr.lastPauseTime = ms.PauseTotalNs

var cgoAllocated, cgoTotal uint64
if getCgoMemStats != nil {
var err error
cgoAllocated, cgoTotal, err = getCgoMemStats()
if err != nil {
log.Warningf("problem fetching CGO memory stats: %s, CGO stats will be empty.", err)
}
}

goAllocated := ms.Alloc
goTotal := ms.Sys - ms.HeapReleased

// Log summary of statistics to console.
cgoRate := float64((numCgoCall-rsr.lastCgoCall)*int64(time.Second)) / dur
log.Infof("runtime stats: %s RSS, %d goroutines, %s active, %.2fcgo/sec, %.2f/%.2f %%(u/s)time, %.2f %%gc (%dx)",
humanize.IBytes(mem.Resident), numGoroutine, humanize.IBytes(ms.Alloc),
log.Infof("runtime stats: %s RSS, %d goroutines, %s/%s/%s GO alloc/idle/total, %s/%s CGO alloc/total, %.2fcgo/sec, %.2f/%.2f %%(u/s)time, %.2f %%gc (%dx)",
humanize.IBytes(mem.Resident), numGoroutine,
humanize.IBytes(goAllocated), humanize.IBytes(ms.HeapIdle-ms.HeapReleased), humanize.IBytes(goTotal),
humanize.IBytes(cgoAllocated), humanize.IBytes(cgoTotal),
cgoRate, uPerc, sPerc, pausePerc, ms.NumGC-rsr.lastNumGC)
if log.V(2) {
log.Infof("memstats: %+v", ms)
}
if logBuildStats != nil {
logBuildStats()
}
rsr.lastCgoCall = numCgoCall
rsr.lastNumGC = ms.NumGC

rsr.cgoCalls.Update(numCgoCall)
rsr.goroutines.Update(int64(numGoroutine))
rsr.allocBytes.Update(int64(ms.Alloc))
rsr.sysBytes.Update(int64(ms.Sys))
rsr.goAllocBytes.Update(int64(goAllocated))
rsr.goTotalBytes.Update(int64(goTotal))
rsr.cgoAllocBytes.Update(int64(cgoAllocated))
rsr.cgoTotalBytes.Update(int64(cgoTotal))
rsr.gcCount.Update(int64(ms.NumGC))
rsr.gcPauseNS.Update(int64(ms.PauseTotalNs))
rsr.gcPausePercent.Update(pausePerc)
Expand Down
72 changes: 68 additions & 4 deletions server/status/runtime_jemalloc.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,24 +24,88 @@ package status
// #cgo linux LDFLAGS: -Wl,-unresolved-symbols=ignore-all
//
// #include <jemalloc/jemalloc.h>
//
// // See field definitions at:
// // http://www.canonware.com/download/jemalloc/jemalloc-latest/doc/jemalloc.html#stats.allocated
// typedef struct {
// size_t allocated;
// size_t active;
// size_t metadata;
// size_t resident;
// size_t mapped;
// size_t retained;
// } JemallocStats;
//
// int jemalloc_get_stats(JemallocStats *stats) {
// // Update the statistics cached by mallctl.
// uint64_t epoch = 1;
// size_t sz = sizeof(epoch);
// mallctl("epoch", &epoch, &sz, &epoch, sz);
//
// sz = sizeof(size_t);
// int err = mallctl("stats.allocated", &stats->allocated, &sz, NULL, 0);
// if (err != 0) {
// return err;
// }
// err = mallctl("stats.active", &stats->active, &sz, NULL, 0);
// if (err != 0) {
// return err;
// }
// err = mallctl("stats.metadata", &stats->metadata, &sz, NULL, 0);
// if (err != 0) {
// return err;
// }
// err = mallctl("stats.resident", &stats->resident, &sz, NULL, 0);
// if (err != 0) {
// return err;
// }
// err = mallctl("stats.mapped", &stats->mapped, &sz, NULL, 0);
// if (err != 0) {
// return err;
// }
// // stats.retained is introduced in 4.2.0.
// // err = mallctl("stats.retained", &stats->retained, &sz, NULL, 0);
// return err;
// }
import "C"

import (
"fmt"

// This is explicit because this Go library does not export any Go symbols.
_ "github.com/cockroachdb/c-jemalloc"

"github.com/cockroachdb/cockroach/util/log"

"github.com/dustin/go-humanize"
)

func init() {
if logBuildStats != nil {
panic("logBuildStats is already set")
if getCgoMemStats != nil {
panic("getCgoMemStats is already set")
}
logBuildStats = logJemallocStats
getCgoMemStats = getJemallocStats
}

func logJemallocStats() {
func getJemallocStats() (uint64, uint64, error) {
var js C.JemallocStats
// TODO(marc): should we panic here? Failure on fetching the stats may be a problem.
if errCode := C.jemalloc_get_stats(&js); errCode != 0 {
return 0, 0, fmt.Errorf("error code %d", errCode)
}

if log.V(2) {
// Summary of jemalloc stats:
log.Infof("jemalloc stats: allocated: %s, active: %s, metadata: %s, resident: %s, mapped: %s",
humanize.IBytes(uint64(js.allocated)), humanize.IBytes(uint64(js.active)),
humanize.IBytes(uint64(js.metadata)), humanize.IBytes(uint64(js.resident)),
humanize.IBytes(uint64(js.mapped)))
}

if log.V(3) {
// Detailed jemalloc stats (very verbose, includes per-arena stats).
C.malloc_stats_print(nil, nil, nil)
}

return uint64(js.allocated), uint64(js.resident), nil
}
2 changes: 1 addition & 1 deletion server/status_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -471,7 +471,7 @@ func TestMetricsRecording(t *testing.T) {
if err := checkTimeSeriesKey(now, "cr.store.livebytes.1"); err != nil {
return err
}
if err := checkTimeSeriesKey(now, "cr.node.sys.allocbytes.1"); err != nil {
if err := checkTimeSeriesKey(now, "cr.node.sys.go.allocbytes.1"); err != nil {
return err
}
return nil
Expand Down
2 changes: 1 addition & 1 deletion ui/embedded.go

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions ui/next/app/containers/clusterOverview.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -98,9 +98,9 @@ class ClusterMain extends React.Component<ClusterMainProps, {}> {
</StackedAreaGraph>

<LineGraph title="Memory Usage"
tooltip="The average memory in use across all nodes.">
tooltip="The memory in use across all nodes.">
<Axis format={ Bytes }>
<Metric name="cr.node.sys.allocbytes" title="Memory" />
<Metric name="cr.node.sys.rss" title="Memory" />
</Axis>
</LineGraph>
</GraphGroup>
Expand Down
6 changes: 4 additions & 2 deletions ui/next/app/containers/nodeGraphs.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,10 @@ export default class extends React.Component<RouteComponentProps<any, any>, {}>

<LineGraph title="Memory Usage" sources={sources}>
<Axis format={ Bytes }>
<Metric name="cr.node.sys.allocbytes" title="Go In Use" />
<Metric name="cr.node.sys.sysbytes" title="Go Sys" />
<Metric name="cr.node.sys.go.allocbytes" title="Go Allocated" />
<Metric name="cr.node.sys.go.totalbytes" title="Go Total" />
<Metric name="cr.node.sys.cgo.allocbytes" title="Cgo Allocated" />
<Metric name="cr.node.sys.cgo.totalbytes" title="Cgo Total" />
<Metric name="cr.node.sys.rss" title="RSS" />
</Axis>
</LineGraph>
Expand Down
6 changes: 4 additions & 2 deletions ui/next/app/containers/nodesGraphs.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,10 @@ export default class extends React.Component<{}, {}> {

<LineGraph title="Memory Usage">
<Axis format={ Bytes }>
<Metric name="cr.node.sys.allocbytes" title="Go In Use" />
<Metric name="cr.node.sys.sysbytes" title="Go Sys" />
<Metric name="cr.node.sys.go.allocbytes" title="Go Allocated" />
<Metric name="cr.node.sys.go.totalbytes" title="Go Total" />
<Metric name="cr.node.sys.cgo.allocbytes" title="Cgo Allocated" />
<Metric name="cr.node.sys.cgo.totalbytes" title="Cgo Total" />
<Metric name="cr.node.sys.rss" title="RSS" />
</Axis>
</LineGraph>
Expand Down
2 changes: 1 addition & 1 deletion ui/next/app/util/proto.ts
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ export namespace MetricConstants {
// Node level metrics.
export var userCPUPercent: string = "sys.cpu.user.percent";
export var sysCPUPercent: string = "sys.cpu.sys.percent";
export var allocBytes: string = "sys.allocbytes";
export var allocBytes: string = "sys.go.allocbytes";
export var sqlConns: string = "sql.conns";
export var rss: string = "sys.rss";
}
Expand Down
5 changes: 4 additions & 1 deletion ui/ts/models/proto.ts
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,10 @@ module Models {
// Node level metrics.
export var userCPUPercent: string = "sys.cpu.user.percent";
export var sysCPUPercent: string = "sys.cpu.sys.percent";
export var allocBytes: string = "sys.allocbytes";
export var goAllocBytes: string = "sys.go.allocbytes";
export var goTotalBytes: string = "sys.go.totalbytes";
export var cgoAllocBytes: string = "sys.cgo.allocbytes";
export var cgoTotalBytes: string = "sys.cgo.totalbytes";
export var sqlConns: string = "sql.conns";
export var rss: string = "sys.rss";
}
Expand Down
5 changes: 2 additions & 3 deletions ui/ts/pages/cluster.ts
Original file line number Diff line number Diff line change
Expand Up @@ -153,13 +153,12 @@ module AdminViews {
.tooltip("The percentage of CPU used by CockroachDB (User %) and system-level operations (Sys %) across all nodes.")
);

// TODO: get total/average memory from all machines
this._addChartSmall(
Metrics.NewAxis(
Metrics.Select.Avg(_sysMetric("allocbytes"))
Metrics.Select.Avg(_sysMetric("rss"))
.title("Memory")
).format(Utils.Format.Bytes).title("Memory Usage")
.tooltip("The average memory in use across all nodes.")
.tooltip("The memory in use across all nodes.")

);

Expand Down
26 changes: 18 additions & 8 deletions ui/ts/pages/nodes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -337,10 +337,14 @@ module AdminViews {
this._addChart(
this.systemAxes,
Metrics.NewAxis(
Metrics.Select.Avg(_sysMetric("allocbytes"))
.title("Go In Use"),
Metrics.Select.Avg(_sysMetric("sysbytes"))
.title("Go Sys"),
Metrics.Select.Avg(_sysMetric("go.allocbytes"))
.title("Go Allocated"),
Metrics.Select.Avg(_sysMetric("go.totalbytes"))
.title("Go Total"),
Metrics.Select.Avg(_sysMetric("cgo.allocbytes"))
.title("Cgo Allocated"),
Metrics.Select.Avg(_sysMetric("cgo.totalbytes"))
.title("Cgo Total"),
Metrics.Select.Avg(_sysMetric("rss"))
.title("RSS")
).format(Utils.Format.Bytes).title("Memory Usage")
Expand Down Expand Up @@ -905,12 +909,18 @@ module AdminViews {
this._addChart(
this.systemAxes,
Metrics.NewAxis(
Metrics.Select.Avg(_sysMetric("allocbytes"))
Metrics.Select.Avg(_sysMetric("go.allocbytes"))
.sources([this._nodeId])
.title("Go In Use"),
Metrics.Select.Avg(_sysMetric("sysbytes"))
.title("Go Allocated"),
Metrics.Select.Avg(_sysMetric("go.totalbytes"))
.sources([this._nodeId])
.title("Go Sys"),
.title("Go Total"),
Metrics.Select.Avg(_sysMetric("cgo.allocbytes"))
.sources([this._nodeId])
.title("Cgo Allocated"),
Metrics.Select.Avg(_sysMetric("cgo.totalbytes"))
.sources([this._nodeId])
.title("Cgo Total"),
Metrics.Select.Avg(_sysMetric("rss"))
.sources([this._nodeId])
.title("RSS")
Expand Down

0 comments on commit c17928f

Please sign in to comment.