Skip to content

Commit

Permalink
ui: add an Overload dashboard
Browse files Browse the repository at this point in the history
This commit adds an Overload dashboard in the metrics view. This is
intended to be a convenient way to monitor admission control.

The dashboard contains:
 - CPU Percent
 - Runnable Goroutines per CPU
 - L0 Sublevels and Files

Release note (ui change): a new Overload dashboard groups metrics that
are useful for admission control.
  • Loading branch information
RaduBerinde committed Jun 29, 2021
1 parent 3e90db4 commit 92bc49c
Show file tree
Hide file tree
Showing 5 changed files with 115 additions and 1 deletion.
20 changes: 19 additions & 1 deletion pkg/kv/kvserver/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -406,6 +406,18 @@ var (
Measurement: "Storage",
Unit: metric.Unit_BYTES,
}
metaRdbL0Sublevels = metric.Metadata{
Name: "storage.l0-sublevels",
Help: "Number of Level 0 sublevels",
Measurement: "Storage",
Unit: metric.Unit_COUNT,
}
metaRdbL0NumFiles = metric.Metadata{
Name: "storage.l0-num-files",
Help: "Number of Level 0 files",
Measurement: "Storage",
Unit: metric.Unit_COUNT,
}

// Disk health metrics.
metaDiskSlow = metric.Metadata{
Expand Down Expand Up @@ -1145,6 +1157,8 @@ type StoreMetrics struct {
RdbReadAmplification *metric.Gauge
RdbNumSSTables *metric.Gauge
RdbPendingCompaction *metric.Gauge
RdbL0Sublevels *metric.Gauge
RdbL0NumFiles *metric.Gauge

// Disk health metrics.
DiskSlow *metric.Gauge
Expand Down Expand Up @@ -1510,7 +1524,7 @@ func newStoreMetrics(histogramWindow time.Duration) *StoreMetrics {
// Server-side transaction metrics.
CommitWaitsBeforeCommitTrigger: metric.NewCounter(metaCommitWaitBeforeCommitTriggerCount),

// RocksDB metrics.
// RocksDB/Pebble metrics.
RdbBlockCacheHits: metric.NewGauge(metaRdbBlockCacheHits),
RdbBlockCacheMisses: metric.NewGauge(metaRdbBlockCacheMisses),
RdbBlockCacheUsage: metric.NewGauge(metaRdbBlockCacheUsage),
Expand All @@ -1528,6 +1542,8 @@ func newStoreMetrics(histogramWindow time.Duration) *StoreMetrics {
RdbReadAmplification: metric.NewGauge(metaRdbReadAmplification),
RdbNumSSTables: metric.NewGauge(metaRdbNumSSTables),
RdbPendingCompaction: metric.NewGauge(metaRdbPendingCompaction),
RdbL0Sublevels: metric.NewGauge(metaRdbL0Sublevels),
RdbL0NumFiles: metric.NewGauge(metaRdbL0NumFiles),

// Disk health metrics.
DiskSlow: metric.NewGauge(metaDiskSlow),
Expand Down Expand Up @@ -1735,6 +1751,8 @@ func (sm *StoreMetrics) updateEngineMetrics(m storage.Metrics) {
sm.RdbTableReadersMemEstimate.Update(m.TableCache.Size)
sm.RdbReadAmplification.Update(int64(m.ReadAmp()))
sm.RdbPendingCompaction.Update(int64(m.Compact.EstimatedDebt))
sm.RdbL0Sublevels.Update(int64(m.Levels[0].Sublevels))
sm.RdbL0NumFiles.Update(m.Levels[0].NumFiles)
sm.RdbNumSSTables.Update(m.NumSSTables())
sm.DiskSlow.Update(m.DiskSlowCount)
sm.DiskStalled.Update(m.DiskStallCount)
Expand Down
8 changes: 8 additions & 0 deletions pkg/ts/catalog/chart_catalog.go
Original file line number Diff line number Diff line change
Expand Up @@ -2258,6 +2258,14 @@ var charts = []sectionDescription{
Title: "Pending Compaction",
Metrics: []string{"rocksdb.estimated-pending-compaction"},
},
{
Title: "L0 Sublevels",
Metrics: []string{"storage.l0-sublevels"},
},
{
Title: "L0 Files",
Metrics: []string{"storage.l0-num-files"},
},
{
Title: "Ingestion",
Metrics: []string{"rocksdb.ingested-bytes"},
Expand Down
2 changes: 2 additions & 0 deletions pkg/ui/cluster-ui/src/store/nodes/nodes.fixtures.ts
Original file line number Diff line number Diff line change
Expand Up @@ -895,6 +895,8 @@ export const getNodeStatus = () => {
"rocksdb.memtable.total-size": 67108864,
"rocksdb.num-sstables": 4,
"rocksdb.read-amplification": 1,
"storage.l0-sublevels": 1,
"storage.l0-num-files": 1,
"rocksdb.table-readers-mem-estimate": 1848,
"storage.disk-slow": 0,
"storage.disk-stalled": 0,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
// Copyright 2021 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.

import React from "react";
import _ from "lodash";

import { LineGraph } from "src/views/cluster/components/linegraph";
import {
Metric,
Axis,
AxisUnits,
} from "src/views/shared/components/metricQuery";

import {
GraphDashboardProps,
nodeDisplayName,
storeIDsForNode,
} from "./dashboardUtils";

export default function (props: GraphDashboardProps) {
const { nodeIDs, nodesSummary, nodeSources, storeSources } = props;

return [
<LineGraph title="CPU Percent" sources={nodeSources}>
<Axis units={AxisUnits.Percentage} label="CPU">
{nodeIDs.map((nid) => (
<Metric
name="cr.node.sys.cpu.combined.percent-normalized"
title={nodeDisplayName(nodesSummary, nid)}
sources={[nid]}
/>
))}
</Axis>
</LineGraph>,

<LineGraph
title="Runnable Goroutines per CPU"
sources={nodeSources}
tooltip={`The number of Goroutines waiting per CPU.`}
>
<Axis label="goroutines">
{nodeIDs.map((nid) => (
<Metric
name="cr.node.sys.runnable.goroutines.per.cpu"
title={nodeDisplayName(nodesSummary, nid)}
sources={[nid]}
/>
))}
</Axis>
</LineGraph>,

<LineGraph
title="LSM L0 Health"
sources={storeSources}
tooltip={`The number of files and sublevels within Level 0.`}
>
<Axis label="count">
{nodeIDs.map((nid) => (
<>
<Metric
key={nid}
name="cr.store.storage.l0-sublevels"
title={"L0 Sublevels " + nodeDisplayName(nodesSummary, nid)}
sources={storeIDsForNode(nodesSummary, nid)}
/>
<Metric
key={nid}
name="cr.store.storage.l0-num-files"
title={"L0 Files " + nodeDisplayName(nodesSummary, nid)}
sources={storeIDsForNode(nodesSummary, nid)}
/>
</>
))}
</Axis>
</LineGraph>,
];
}
2 changes: 2 additions & 0 deletions pkg/ui/src/views/cluster/containers/nodeGraphs/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ import queuesDashboard from "./dashboards/queues";
import requestsDashboard from "./dashboards/requests";
import hardwareDashboard from "./dashboards/hardware";
import changefeedsDashboard from "./dashboards/changefeeds";
import overloadDashboard from "./dashboards/overload";
import { getMatchParamByName } from "src/util/query";
import { PayloadAction } from "src/interfaces/action";
import {
Expand All @@ -82,6 +83,7 @@ const dashboards: { [key: string]: GraphDashboard } = {
queues: { label: "Queues", component: queuesDashboard },
requests: { label: "Slow Requests", component: requestsDashboard },
changefeeds: { label: "Changefeeds", component: changefeedsDashboard },
overload: { label: "Overload", component: overloadDashboard },
};

const defaultDashboard = "overview";
Expand Down

0 comments on commit 92bc49c

Please sign in to comment.