From 3b2c560873a509d3215f82fc56ae2afed32529fc Mon Sep 17 00:00:00 2001 From: marc Date: Wed, 10 Aug 2016 18:22:18 -0700 Subject: [PATCH] Rework Metrics Part 2 * add MetricMetadata to all metrics. Contains name/help string. * don't automatically add metrics to registries * replace all Registry.X methods to create/add new metrics with NewX * "pseudo" metrics are a group that expands into individual metrics * optional label pairs on registries (storeID only for now) * registries can no longer be added to registries A few things I would still like to do: * make registries just hold a slice. sorting them would allow reasonable searching (used in tests only) and keep the vars page sorted (easier refresh) * remove registries from big objects, instead just keep the metrics. The metric/registry pattern is not the same everywhere. * fill in help strings and audit names. this is part 3. --- gossip/client.go | 3 +- gossip/client_test.go | 52 +++-- gossip/gossip.go | 30 +-- gossip/gossip_test.go | 4 +- gossip/infostore_test.go | 2 +- gossip/node_set.go | 3 +- gossip/node_set_test.go | 12 +- gossip/server.go | 16 +- gossip/simulation/network.go | 4 +- kv/txn_coord_sender.go | 38 ++-- rpc/clock_offset.go | 24 +-- rpc/clock_offset_test.go | 5 +- server/node.go | 32 ++-- server/status/recorder.go | 1 + server/status/recorder_test.go | 24 ++- server/status/runtime.go | 81 ++++---- server/testserver.go | 2 +- sql/executor.go | 72 +++---- sql/metric_test.go | 36 ++-- sql/metric_util_test.go | 13 +- sql/pgwire/server.go | 20 +- sql/pgwire/types_test.go | 4 +- sql/pgwire_test.go | 6 +- sql/txn_restart_test.go | 4 +- storage/store.go | 207 -------------------- storage/store_metrics.go | 336 +++++++++++++++++++++++++++++++++ util/metric/metric.go | 140 ++++++-------- util/metric/metric_group.go | 122 ++++++++++++ util/metric/metric_test.go | 14 +- util/metric/registry.go | 182 +++++++----------- util/metric/registry_test.go | 64 ++++--- 31 files changed, 889 insertions(+), 664 deletions(-) create mode 100644 storage/store_metrics.go create mode 100644 util/metric/metric_group.go diff --git a/gossip/client.go b/gossip/client.go index 1a5a25b0be3c..a7d9c9901999 100644 --- a/gossip/client.go +++ b/gossip/client.go @@ -29,7 +29,6 @@ import ( "github.com/cockroachdb/cockroach/util" "github.com/cockroachdb/cockroach/util/grpcutil" "github.com/cockroachdb/cockroach/util/log" - "github.com/cockroachdb/cockroach/util/metric" "github.com/cockroachdb/cockroach/util/stop" "github.com/cockroachdb/cockroach/util/timeutil" ) @@ -62,7 +61,7 @@ func newClient(addr net.Addr, nodeMetrics metrics) *client { addr: addr, remoteHighWaterStamps: map[roachpb.NodeID]int64{}, closer: make(chan struct{}), - clientMetrics: makeMetrics(metric.NewRegistry()), + clientMetrics: makeMetrics(), nodeMetrics: nodeMetrics, } } diff --git a/gossip/client_test.go b/gossip/client_test.go index ad1fb43b417b..ccc728ed0477 100644 --- a/gossip/client_test.go +++ b/gossip/client_test.go @@ -150,7 +150,7 @@ func TestClientGossip(t *testing.T) { local := startGossip(1, stopper, t, metric.NewRegistry()) remote := startGossip(2, stopper, t, metric.NewRegistry()) disconnected := make(chan *client, 1) - c := newClient(&remote.is.NodeAddr, makeMetrics(metric.NewRegistry())) + c := newClient(&remote.is.NodeAddr, makeMetrics()) defer func() { stopper.Stop() @@ -184,16 +184,14 @@ func TestClientGossipMetrics(t *testing.T) { defer leaktest.AfterTest(t)() stopper := stop.NewStopper() defer stopper.Stop() - localRegistry := metric.NewRegistry() - local := startGossip(1, stopper, t, localRegistry) - remoteRegistry := metric.NewRegistry() - remote := startGossip(2, stopper, t, remoteRegistry) + local := startGossip(1, stopper, t, metric.NewRegistry()) + remote := startGossip(2, stopper, t, metric.NewRegistry()) gossipSucceedsSoon( t, stopper, make(chan *client, 2), map[*client]*Gossip{ - newClient(&local.is.NodeAddr, makeMetrics(metric.NewRegistry())): remote, - newClient(&remote.is.NodeAddr, makeMetrics(metric.NewRegistry())): local, + newClient(&local.is.NodeAddr, makeMetrics()): remote, + newClient(&remote.is.NodeAddr, makeMetrics()): local, }, func() error { if err := local.AddInfo("local-key", nil, time.Hour); err != nil { @@ -204,35 +202,29 @@ func TestClientGossipMetrics(t *testing.T) { } // Infos/Bytes Sent/Received should not be zero. - for i, reg := range []*metric.Registry{localRegistry, remoteRegistry} { - for _, ratesName := range []string{ - InfosSentRatesName, - InfosReceivedRatesName, - BytesSentRatesName, - BytesReceivedRatesName, + for i, s := range []*server{local.server, remote.server} { + for _, rate := range []metric.Rates{ + s.nodeMetrics.infosSent, + s.nodeMetrics.infosReceived, + s.nodeMetrics.bytesSent, + s.nodeMetrics.bytesReceived, } { - counterName := ratesName + "-count" - counter := reg.GetCounter(counterName) - if counter == nil { - return errors.Errorf("%d: missing counter %q", i, counterName) - } + counter := rate.Counter if count := counter.Count(); count <= 0 { - return errors.Errorf("%d: expected metrics counter %q > 0; = %d", i, counterName, count) + return errors.Errorf("%d: expected metrics counter %q > 0; = %d", i, counter.GetName(), count) } } } // Since there are two gossip nodes, there should be at least one incoming // and outgoing connection. - for i, reg := range []*metric.Registry{localRegistry, remoteRegistry} { - for _, name := range []string{} { - gauge := reg.GetGauge(name) - if gauge == nil { - return errors.Errorf("%d: missing gauge %q", i, name) - } - if count := gauge.Value(); count <= 0 { - return errors.Errorf("%d: expected metrics gauge %q > 0; = %d", i, name, count) - } + for i, s := range []*server{local.server, remote.server} { + gauge := s.incoming.gauge + if gauge == nil { + return errors.Errorf("%d: missing gauge \"incoming\"", i) + } + if count := gauge.Value(); count <= 0 { + return errors.Errorf("%d: expected metrics gauge %q > 0; = %d", i, gauge.GetName(), count) } } return nil @@ -249,7 +241,7 @@ func TestClientNodeID(t *testing.T) { // Use an insecure context. We're talking to tcp socket which are not in the certs. rpcContext := rpc.NewContext(&base.Context{Insecure: true}, nil, stopper) - c := newClient(&remote.nodeAddr, makeMetrics(metric.NewRegistry())) + c := newClient(&remote.nodeAddr, makeMetrics()) disconnected := make(chan *client, 1) disconnected <- c @@ -494,7 +486,7 @@ func TestClientForwardUnresolved(t *testing.T) { addr := local.is.NodeAddr local.mu.Unlock() - client := newClient(&addr, makeMetrics(metric.NewRegistry())) // never started + client := newClient(&addr, makeMetrics()) // never started newAddr := util.UnresolvedAddr{ NetworkField: "tcp", diff --git a/gossip/gossip.go b/gossip/gossip.go index 149f191b47ed..4dd7bc461bcc 100644 --- a/gossip/gossip.go +++ b/gossip/gossip.go @@ -117,13 +117,13 @@ const ( ) // Gossip metrics counter names. -const ( - ConnectionsIncomingGaugeName = "gossip.connections.incoming" - ConnectionsOutgoingGaugeName = "gossip.connections.outgoing" - InfosSentRatesName = "gossip.infos.sent" - InfosReceivedRatesName = "gossip.infos.received" - BytesSentRatesName = "gossip.bytes.sent" - BytesReceivedRatesName = "gossip.bytes.received" +var ( + MetaConnectionsIncomingGauge = metric.MetricMetadata{"gossip.connections.incoming", ""} + MetaConnectionsOutgoingGauge = metric.MetricMetadata{"gossip.connections.outgoing", ""} + MetaInfosSentRates = metric.MetricMetadata{"gossip.infos.sent", ""} + MetaInfosReceivedRates = metric.MetricMetadata{"gossip.infos.received", ""} + MetaBytesSentRates = metric.MetricMetadata{"gossip.bytes.sent", ""} + MetaBytesReceivedRates = metric.MetricMetadata{"gossip.bytes.received", ""} ) // Storage is an interface which allows the gossip instance @@ -196,7 +196,7 @@ func New(rpcContext *rpc.Context, grpcServer *grpc.Server, resolvers []resolver. Connected: make(chan struct{}), rpcContext: rpcContext, server: newServer(stopper, registry), - outgoing: makeNodeSet(minPeers, registry.Gauge(ConnectionsOutgoingGaugeName)), + outgoing: makeNodeSet(minPeers, metric.NewGauge(MetaConnectionsOutgoingGauge)), bootstrapping: map[string]struct{}{}, disconnected: make(chan *client, 10), stalledCh: make(chan struct{}, 1), @@ -207,6 +207,7 @@ func New(rpcContext *rpc.Context, grpcServer *grpc.Server, resolvers []resolver. resolverAddrs: map[util.UnresolvedAddr]resolver.Resolver{}, bootstrapAddrs: map[util.UnresolvedAddr]struct{}{}, } + registry.AddMetric(g.outgoing.gauge) g.SetResolvers(resolvers) // Add ourselves as a SystemConfig watcher. @@ -1092,13 +1093,12 @@ func (m metrics) String() string { m.infosSent.Count(), m.infosReceived.Count(), m.bytesSent.Count(), m.bytesReceived.Count()) } -// makeMetrics makes a new metrics object with rates set on the provided -// registry. -func makeMetrics(registry *metric.Registry) metrics { +// makeMetrics makes a new metrics object with rates. +func makeMetrics() metrics { return metrics{ - bytesReceived: registry.Rates(BytesReceivedRatesName), - bytesSent: registry.Rates(BytesSentRatesName), - infosReceived: registry.Rates(InfosReceivedRatesName), - infosSent: registry.Rates(InfosSentRatesName), + bytesReceived: metric.NewRates(MetaBytesReceivedRates), + bytesSent: metric.NewRates(MetaBytesSentRates), + infosReceived: metric.NewRates(MetaInfosReceivedRates), + infosSent: metric.NewRates(MetaInfosSentRates), } } diff --git a/gossip/gossip_test.go b/gossip/gossip_test.go index f60b5512a11a..b35e4205ca93 100644 --- a/gossip/gossip_test.go +++ b/gossip/gossip_test.go @@ -161,7 +161,7 @@ func TestGossipNoForwardSelf(t *testing.T) { } for _, peer := range peers { - c := newClient(&local.is.NodeAddr, makeMetrics(metric.NewRegistry())) + c := newClient(&local.is.NodeAddr, makeMetrics()) util.SucceedsSoon(t, func() error { conn, err := peer.rpcContext.GRPCDial(c.addr.String(), grpc.WithBlock()) @@ -193,7 +193,7 @@ func TestGossipNoForwardSelf(t *testing.T) { peer := startGossip(roachpb.NodeID(i+local.server.incoming.maxSize+2), stopper, t, metric.NewRegistry()) for { - c := newClient(&local.is.NodeAddr, makeMetrics(metric.NewRegistry())) + c := newClient(&local.is.NodeAddr, makeMetrics()) c.start(peer, disconnectedCh, peer.rpcContext, stopper) disconnectedClient := <-disconnectedCh diff --git a/gossip/infostore_test.go b/gossip/infostore_test.go index af656870ecd7..b6b699cef774 100644 --- a/gossip/infostore_test.go +++ b/gossip/infostore_test.go @@ -296,7 +296,7 @@ func TestLeastUseful(t *testing.T) { defer stopper.Stop() is := newInfoStore(1, emptyAddr, stopper) - set := makeNodeSet(3, metric.NewGauge()) + set := makeNodeSet(3, metric.NewGauge(metric.MetricMetadata{"", ""})) if is.leastUseful(set) != 0 { t.Error("not expecting a node from an empty set") } diff --git a/gossip/node_set.go b/gossip/node_set.go index ef3971d9b37a..fd12d36be992 100644 --- a/gossip/node_set.go +++ b/gossip/node_set.go @@ -62,7 +62,8 @@ func (as nodeSet) asSlice() []roachpb.NodeID { // node and false to remove an node. The new nodeSet has a separate gauge object // from the parent. func (as nodeSet) filter(filterFn func(node roachpb.NodeID) bool) nodeSet { - avail := makeNodeSet(as.maxSize, metric.NewGauge()) + avail := makeNodeSet(as.maxSize, + metric.NewGauge(metric.MetricMetadata{"TODO(marc)", "TODO(marc)"})) for node := range as.nodes { if filterFn(node) { avail.addNode(node) diff --git a/gossip/node_set_test.go b/gossip/node_set_test.go index 8a03e314f990..90745fde405d 100644 --- a/gossip/node_set_test.go +++ b/gossip/node_set_test.go @@ -26,7 +26,7 @@ import ( func TestNodeSetMaxSize(t *testing.T) { defer leaktest.AfterTest(t)() - nodes := makeNodeSet(1, metric.NewGauge()) + nodes := makeNodeSet(1, metric.NewGauge(metric.MetricMetadata{"", ""})) if !nodes.hasSpace() { t.Error("set should have space") } @@ -38,7 +38,7 @@ func TestNodeSetMaxSize(t *testing.T) { func TestNodeSetHasNode(t *testing.T) { defer leaktest.AfterTest(t)() - nodes := makeNodeSet(2, metric.NewGauge()) + nodes := makeNodeSet(2, metric.NewGauge(metric.MetricMetadata{"", ""})) node := roachpb.NodeID(1) if nodes.hasNode(node) { t.Error("node wasn't added and should not be valid") @@ -52,7 +52,7 @@ func TestNodeSetHasNode(t *testing.T) { func TestNodeSetAddAndRemoveNode(t *testing.T) { defer leaktest.AfterTest(t)() - nodes := makeNodeSet(2, metric.NewGauge()) + nodes := makeNodeSet(2, metric.NewGauge(metric.MetricMetadata{"", ""})) node0 := roachpb.NodeID(1) node1 := roachpb.NodeID(2) nodes.addNode(node0) @@ -72,13 +72,13 @@ func TestNodeSetAddAndRemoveNode(t *testing.T) { func TestNodeSetFilter(t *testing.T) { defer leaktest.AfterTest(t)() - nodes1 := makeNodeSet(2, metric.NewGauge()) + nodes1 := makeNodeSet(2, metric.NewGauge(metric.MetricMetadata{"", ""})) node0 := roachpb.NodeID(1) node1 := roachpb.NodeID(2) nodes1.addNode(node0) nodes1.addNode(node1) - nodes2 := makeNodeSet(1, metric.NewGauge()) + nodes2 := makeNodeSet(1, metric.NewGauge(metric.MetricMetadata{"", ""})) nodes2.addNode(node1) filtered := nodes1.filter(func(a roachpb.NodeID) bool { @@ -91,7 +91,7 @@ func TestNodeSetFilter(t *testing.T) { func TestNodeSetAsSlice(t *testing.T) { defer leaktest.AfterTest(t)() - nodes := makeNodeSet(2, metric.NewGauge()) + nodes := makeNodeSet(2, metric.NewGauge(metric.MetricMetadata{"", ""})) node0 := roachpb.NodeID(1) node1 := roachpb.NodeID(2) nodes.addNode(node0) diff --git a/gossip/server.go b/gossip/server.go index bdbe9277340d..d566e8b688b8 100644 --- a/gossip/server.go +++ b/gossip/server.go @@ -61,16 +61,24 @@ type server struct { // newServer creates and returns a server struct. func newServer(stopper *stop.Stopper, registry *metric.Registry) *server { - return &server{ + s := &server{ stopper: stopper, is: newInfoStore(0, util.UnresolvedAddr{}, stopper), - incoming: makeNodeSet(minPeers, registry.Gauge(ConnectionsIncomingGaugeName)), + incoming: makeNodeSet(minPeers, metric.NewGauge(MetaConnectionsIncomingGauge)), nodeMap: make(map[util.UnresolvedAddr]serverInfo), tighten: make(chan roachpb.NodeID, 1), ready: make(chan struct{}), - nodeMetrics: makeMetrics(registry), - serverMetrics: makeMetrics(metric.NewRegistry()), + nodeMetrics: makeMetrics(), + serverMetrics: makeMetrics(), } + + registry.AddMetric(s.incoming.gauge) + registry.AddMetricGroup(s.nodeMetrics.bytesReceived) + registry.AddMetricGroup(s.nodeMetrics.bytesSent) + registry.AddMetricGroup(s.nodeMetrics.infosReceived) + registry.AddMetricGroup(s.nodeMetrics.infosSent) + + return s } // Gossip receives gossiped information from a peer node. diff --git a/gossip/simulation/network.go b/gossip/simulation/network.go index 30cbb63e6852..6f7adcbfade6 100644 --- a/gossip/simulation/network.go +++ b/gossip/simulation/network.go @@ -250,7 +250,7 @@ func (n *Network) isNetworkConnected() bool { func (n *Network) infosSent() int { var count int64 for _, node := range n.Nodes { - count += node.Registry.GetCounter(gossip.InfosSentRatesName + "-count").Count() + count += node.Registry.GetCounter(gossip.MetaInfosSentRates.Name + "-count").Count() } return int(count) } @@ -260,7 +260,7 @@ func (n *Network) infosSent() int { func (n *Network) infosReceived() int { var count int64 for _, node := range n.Nodes { - count += node.Registry.GetCounter(gossip.InfosReceivedRatesName + "-count").Count() + count += node.Registry.GetCounter(gossip.MetaInfosReceivedRates.Name + "-count").Count() } return int(count) } diff --git a/kv/txn_coord_sender.go b/kv/txn_coord_sender.go index 24cb7d04106d..af6f46265f0a 100644 --- a/kv/txn_coord_sender.go +++ b/kv/txn_coord_sender.go @@ -114,26 +114,34 @@ type TxnMetrics struct { Restarts *metric.Histogram } -const ( - abortsPrefix = "txn.aborts" - commitsPrefix = "txn.commits" - commits1PCPrefix = "txn.commits1PC" - abandonsPrefix = "txn.abandons" - durationsPrefix = "txn.durations" - restartsKey = "txn.restarts" +var ( + metaAbortsRates = metric.MetricMetadata{"txn.aborts", ""} + metaCommitsRates = metric.MetricMetadata{"txn.commits", ""} + metaCommits1PCRates = metric.MetricMetadata{"txn.commits1PC", ""} + metaAbandonsRates = metric.MetricMetadata{"txn.abandons", ""} + metaDurationsHistograms = metric.MetricMetadata{"txn.durations", ""} + metaRestartsHistogram = metric.MetricMetadata{"txn.restarts", ""} ) // NewTxnMetrics returns a new instance of txnMetrics that contains metrics which have // been registered with the provided Registry. func NewTxnMetrics(registry *metric.Registry) *TxnMetrics { - return &TxnMetrics{ - Aborts: registry.Rates(abortsPrefix), - Commits: registry.Rates(commitsPrefix), - Commits1PC: registry.Rates(commits1PCPrefix), - Abandons: registry.Rates(abandonsPrefix), - Durations: registry.Latency(durationsPrefix), - Restarts: registry.Histogram(restartsKey, 60*time.Second, 100, 3), - } + tm := &TxnMetrics{ + Aborts: metric.NewRates(metaAbortsRates), + Commits: metric.NewRates(metaCommitsRates), + Commits1PC: metric.NewRates(metaCommits1PCRates), + Abandons: metric.NewRates(metaAbandonsRates), + Durations: metric.NewLatency(metaDurationsHistograms), + Restarts: metric.NewHistogram(metaRestartsHistogram, 60*time.Second, 100, 3), + } + registry.AddMetricGroup(tm.Aborts) + registry.AddMetricGroup(tm.Commits) + registry.AddMetricGroup(tm.Commits1PC) + registry.AddMetricGroup(tm.Abandons) + registry.AddMetricGroup(tm.Durations) + registry.AddMetric(tm.Restarts) + + return tm } // A TxnCoordSender is an implementation of client.Sender which diff --git a/rpc/clock_offset.go b/rpc/clock_offset.go index af1d6dcc3fb5..5508776d16d3 100644 --- a/rpc/clock_offset.go +++ b/rpc/clock_offset.go @@ -33,9 +33,9 @@ type remoteClockMetrics struct { clusterOffsetUpperBound *metric.Gauge } -const ( - clusterOffsetLowerBoundName = "clock-offset.lower-bound-nanos" - clusterOffsetUpperBoundName = "clock-offset.upper-bound-nanos" +var ( + metaClusterOffsetLowerBound = metric.MetricMetadata{"clock-offset.lower-bound-nanos", ""} + metaClusterOffsetUpperBound = metric.MetricMetadata{"clock-offset.upper-bound-nanos", ""} ) // RemoteClockMonitor keeps track of the most recent measurements of remote @@ -49,8 +49,7 @@ type RemoteClockMonitor struct { offsets map[string]RemoteOffset } - metrics remoteClockMetrics - registry *metric.Registry + metrics remoteClockMetrics } // newRemoteClockMonitor returns a monitor with the given server clock. @@ -58,12 +57,11 @@ func newRemoteClockMonitor(clock *hlc.Clock, offsetTTL time.Duration) *RemoteClo r := RemoteClockMonitor{ clock: clock, offsetTTL: offsetTTL, - registry: metric.NewRegistry(), } r.mu.offsets = make(map[string]RemoteOffset) r.metrics = remoteClockMetrics{ - clusterOffsetLowerBound: r.registry.Gauge(clusterOffsetLowerBoundName), - clusterOffsetUpperBound: r.registry.Gauge(clusterOffsetUpperBoundName), + clusterOffsetLowerBound: metric.NewGauge(metaClusterOffsetLowerBound), + clusterOffsetUpperBound: metric.NewGauge(metaClusterOffsetUpperBound), } return &r } @@ -178,16 +176,10 @@ func (r RemoteOffset) isStale(ttl time.Duration, now time.Time) bool { return r.measuredAt().Add(ttl).Before(now) } -// Registry returns a registry with the metrics tracked by this server, which can be used to -// access its stats or be added to another registry. -func (r *RemoteClockMonitor) Registry() *metric.Registry { - return r.registry -} - // RegisterMetrics adds the local metrics to a registry. // TODO(marc): this pattern deviates from other users of the registry // that take it as an argument at metric construction time. func (r *RemoteClockMonitor) RegisterMetrics(reg *metric.Registry) { - reg.MustAdd(clusterOffsetLowerBoundName, r.metrics.clusterOffsetLowerBound) - reg.MustAdd(clusterOffsetUpperBoundName, r.metrics.clusterOffsetUpperBound) + reg.AddMetric(r.metrics.clusterOffsetLowerBound) + reg.AddMetric(r.metrics.clusterOffsetUpperBound) } diff --git a/rpc/clock_offset_test.go b/rpc/clock_offset_test.go index b2de6770d2e0..fd4ba2cdece1 100644 --- a/rpc/clock_offset_test.go +++ b/rpc/clock_offset_test.go @@ -25,6 +25,7 @@ import ( "github.com/cockroachdb/cockroach/testutils" "github.com/cockroachdb/cockroach/util/hlc" "github.com/cockroachdb/cockroach/util/leaktest" + "github.com/cockroachdb/cockroach/util/metric" "github.com/cockroachdb/cockroach/util/stop" ) @@ -181,7 +182,9 @@ func TestClockOffsetMetrics(t *testing.T) { t.Fatal(err) } - reg := monitor.Registry() + reg := metric.NewRegistry() + monitor.RegisterMetrics(reg) + expLower := offset.Offset - offset.Uncertainty if a, e := reg.GetGauge("lower-bound-nanos").Value(), expLower; a != e { t.Errorf("lower bound %d != expected %d", a, e) diff --git a/server/node.go b/server/node.go index 602dee198ffa..a9c74b44e2e5 100644 --- a/server/node.go +++ b/server/node.go @@ -63,11 +63,13 @@ const ( // publishStatusInterval is the interval for publishing periodic statistics // from stores to the internal event feed. publishStatusInterval = 10 * time.Second +) - // Metric names. - execLatencyName = "exec.latency" - execSuccessName = "exec.success" - execErrorName = "exec.error" +// Metric names. +var ( + metaExecLatency = metric.MetricMetadata{"exec.latency", ""} + metaExecSuccess = metric.MetricMetadata{"exec.success", ""} + metaExecError = metric.MetricMetadata{"exec.error", ""} ) // errNeedsBootstrap indicates the node should be used as the seed of @@ -80,19 +82,21 @@ var errNeedsBootstrap = errors.New("node has no initialized stores and no instru var errCannotJoinSelf = errors.New("an uninitialized node cannot specify its own address to join a cluster") type nodeMetrics struct { - registry *metric.Registry - latency metric.Histograms - success metric.Rates - err metric.Rates + latency metric.Histograms + success metric.Rates + err metric.Rates } func makeNodeMetrics(reg *metric.Registry) nodeMetrics { - return nodeMetrics{ - registry: reg, - latency: reg.Latency(execLatencyName), - success: reg.Rates(execSuccessName), - err: reg.Rates(execErrorName), - } + nm := nodeMetrics{ + latency: metric.NewLatency(metaExecLatency), + success: metric.NewRates(metaExecSuccess), + err: metric.NewRates(metaExecError), + } + reg.AddMetricGroup(nm.latency) + reg.AddMetricGroup(nm.success) + reg.AddMetricGroup(nm.err) + return nm } // callComplete records very high-level metrics about the number of completed diff --git a/server/status/recorder.go b/server/status/recorder.go index d3dfcd4e90f9..d760f0f01d20 100644 --- a/server/status/recorder.go +++ b/server/status/recorder.go @@ -131,6 +131,7 @@ func (mr *MetricsRecorder) AddStore(store storeMetrics) { mr.mu.Lock() defer mr.mu.Unlock() storeID := store.StoreID() + store.Registry().AddLabel("store", strconv.Itoa(int(storeID))) mr.mu.storeRegistries[storeID] = store.Registry() mr.mu.stores[storeID] = store } diff --git a/server/status/recorder_test.go b/server/status/recorder_test.go index 7d162f1a781b..8b240be8bdfe 100644 --- a/server/status/recorder_test.go +++ b/server/status/recorder_test.go @@ -257,16 +257,24 @@ func TestMetricsRecorder(t *testing.T) { for _, data := range metricNames { switch data.typ { case "gauge": - reg.reg.Gauge(reg.prefix + data.name).Update(data.val) + g := metric.NewGauge(metric.MetricMetadata{reg.prefix + data.name, ""}) + reg.reg.AddMetric(g) + g.Update(data.val) addExpected(reg.prefix, data.name, reg.source, 100, data.val, reg.isNode) case "floatgauge": - reg.reg.GaugeFloat64(reg.prefix + data.name).Update(float64(data.val)) + g := metric.NewGaugeFloat64(metric.MetricMetadata{reg.prefix + data.name, ""}) + reg.reg.AddMetric(g) + g.Update(float64(data.val)) addExpected(reg.prefix, data.name, reg.source, 100, data.val, reg.isNode) case "counter": - reg.reg.Counter(reg.prefix + data.name).Inc(data.val) + c := metric.NewCounter(metric.MetricMetadata{reg.prefix + data.name, ""}) + reg.reg.AddMetric(c) + c.Inc((data.val)) addExpected(reg.prefix, data.name, reg.source, 100, data.val, reg.isNode) case "rate": - reg.reg.Rates(reg.prefix + data.name).Add(data.val) + r := metric.NewRates(metric.MetricMetadata{reg.prefix + data.name, ""}) + reg.reg.AddMetricGroup(r) + r.Add(data.val) addExpected(reg.prefix, data.name+"-count", reg.source, 100, data.val, reg.isNode) for _, scale := range metric.DefaultTimeScales { // Rate data is subject to timing errors in tests. Zero out @@ -274,12 +282,16 @@ func TestMetricsRecorder(t *testing.T) { addExpected(reg.prefix, data.name+sep+scale.Name(), reg.source, 100, 0, reg.isNode) } case "histogram": - reg.reg.Histogram(reg.prefix+data.name, time.Second, 1000, 2).RecordValue(data.val) + h := metric.NewHistogram(metric.MetricMetadata{reg.prefix + data.name, ""}, time.Second, 1000, 2) + reg.reg.AddMetric(h) + h.RecordValue(data.val) for _, q := range recordHistogramQuantiles { addExpected(reg.prefix, data.name+q.suffix, reg.source, 100, data.val, reg.isNode) } case "latency": - reg.reg.Latency(reg.prefix + data.name).RecordValue(data.val) + l := metric.NewLatency(metric.MetricMetadata{reg.prefix + data.name, ""}) + reg.reg.AddMetricGroup(l) + l.RecordValue(data.val) // Latency is simply three histograms (at different resolution // time scales). for _, scale := range metric.DefaultTimeScales { diff --git a/server/status/runtime.go b/server/status/runtime.go index 6bb57448fddc..08b0bd1ab8ed 100644 --- a/server/status/runtime.go +++ b/server/status/runtime.go @@ -31,21 +31,21 @@ import ( "github.com/elastic/gosigar" ) -const ( - nameCgoCalls = "sys.cgocalls" - nameGoroutines = "sys.goroutines" - nameGoAllocBytes = "sys.go.allocbytes" - nameGoTotalBytes = "sys.go.totalbytes" - nameCgoAllocBytes = "sys.cgo.allocbytes" - nameCgoTotalBytes = "sys.cgo.totalbytes" - nameGCCount = "sys.gc.count" - nameGCPauseNS = "sys.gc.pause.ns" - nameGCPausePercent = "sys.gc.pause.percent" - nameCPUUserNS = "sys.cpu.user.ns" - nameCPUUserPercent = "sys.cpu.user.percent" - nameCPUSysNS = "sys.cpu.sys.ns" - nameCPUSysPercent = "sys.cpu.sys.percent" - nameRSS = "sys.rss" +var ( + metaCgoCalls = metric.MetricMetadata{"sys.cgocalls", "Number of cgo calls"} + metaGoroutines = metric.MetricMetadata{"sys.goroutines", "Number of goroutines"} + metaGoAllocBytes = metric.MetricMetadata{"sys.go.allocbytes", ""} + metaGoTotalBytes = metric.MetricMetadata{"sys.go.totalbytes", ""} + metaCgoAllocBytes = metric.MetricMetadata{"sys.cgo.allocbytes", ""} + metaCgoTotalBytes = metric.MetricMetadata{"sys.cgo.totalbytes", ""} + metaGCCount = metric.MetricMetadata{"sys.gc.count", ""} + metaGCPauseNS = metric.MetricMetadata{"sys.gc.pause.ns", ""} + metaGCPausePercent = metric.MetricMetadata{"sys.gc.pause.percent", ""} + metaCPUUserNS = metric.MetricMetadata{"sys.cpu.user.ns", ""} + metaCPUUserPercent = metric.MetricMetadata{"sys.cpu.user.percent", ""} + metaCPUSysNS = metric.MetricMetadata{"sys.cpu.sys.ns", ""} + metaCPUSysPercent = metric.MetricMetadata{"sys.cpu.sys.percent", ""} + metaRSS = metric.MetricMetadata{"sys.rss", ""} ) // getCgoMemStats is a function that fetches stats for the C++ portion of the code. @@ -61,8 +61,7 @@ var getCgoMemStats func() (uint64, uint64, error) // the resulting information in a format that can be easily consumed by status // logging systems. type RuntimeStatSampler struct { - clock *hlc.Clock - registry *metric.Registry + clock *hlc.Clock // The last sampled values of some statistics are kept only to compute // derivative statistics. @@ -92,24 +91,40 @@ type RuntimeStatSampler struct { // MakeRuntimeStatSampler constructs a new RuntimeStatSampler object. func MakeRuntimeStatSampler(clock *hlc.Clock, reg *metric.Registry) RuntimeStatSampler { - return RuntimeStatSampler{ - registry: reg, + r := RuntimeStatSampler{ clock: clock, - cgoCalls: reg.Gauge(nameCgoCalls), - goroutines: reg.Gauge(nameGoroutines), - goAllocBytes: reg.Gauge(nameGoAllocBytes), - goTotalBytes: reg.Gauge(nameGoTotalBytes), - cgoAllocBytes: reg.Gauge(nameCgoAllocBytes), - cgoTotalBytes: reg.Gauge(nameCgoTotalBytes), - gcCount: reg.Gauge(nameGCCount), - gcPauseNS: reg.Gauge(nameGCPauseNS), - gcPausePercent: reg.GaugeFloat64(nameGCPausePercent), - cpuUserNS: reg.Gauge(nameCPUUserNS), - cpuUserPercent: reg.GaugeFloat64(nameCPUUserPercent), - cpuSysNS: reg.Gauge(nameCPUSysNS), - cpuSysPercent: reg.GaugeFloat64(nameCPUSysPercent), - rss: reg.Gauge(nameRSS), + cgoCalls: metric.NewGauge(metaCgoCalls), + goroutines: metric.NewGauge(metaGoroutines), + goAllocBytes: metric.NewGauge(metaGoAllocBytes), + goTotalBytes: metric.NewGauge(metaGoTotalBytes), + cgoAllocBytes: metric.NewGauge(metaCgoAllocBytes), + cgoTotalBytes: metric.NewGauge(metaCgoTotalBytes), + gcCount: metric.NewGauge(metaGCCount), + gcPauseNS: metric.NewGauge(metaGCPauseNS), + gcPausePercent: metric.NewGaugeFloat64(metaGCPausePercent), + cpuUserNS: metric.NewGauge(metaCPUUserNS), + cpuUserPercent: metric.NewGaugeFloat64(metaCPUUserPercent), + cpuSysNS: metric.NewGauge(metaCPUSysNS), + cpuSysPercent: metric.NewGaugeFloat64(metaCPUSysPercent), + rss: metric.NewGauge(metaRSS), } + + reg.AddMetric(r.cgoCalls) + reg.AddMetric(r.goroutines) + reg.AddMetric(r.goAllocBytes) + reg.AddMetric(r.goTotalBytes) + reg.AddMetric(r.cgoAllocBytes) + reg.AddMetric(r.cgoTotalBytes) + reg.AddMetric(r.gcCount) + reg.AddMetric(r.gcPauseNS) + reg.AddMetric(r.gcPausePercent) + reg.AddMetric(r.cpuUserNS) + reg.AddMetric(r.cpuUserPercent) + reg.AddMetric(r.cpuSysNS) + reg.AddMetric(r.cpuSysPercent) + reg.AddMetric(r.rss) + + return r } // SampleEnvironment queries the runtime system for various interesting metrics, diff --git a/server/testserver.go b/server/testserver.go index 8a455eb00af2..4d595a90c563 100644 --- a/server/testserver.go +++ b/server/testserver.go @@ -326,7 +326,7 @@ func (ts *TestServer) MustGetSQLCounter(name string) int64 { var c int64 var found bool - ts.sqlExecutor.Registry().Each(func(n string, v interface{}) { + ts.registry.Each(func(n string, v interface{}) { if name == n { c = v.(*metric.Counter).Count() found = true diff --git a/sql/executor.go b/sql/executor.go index 839bdd275c26..619fc3298e6c 100644 --- a/sql/executor.go +++ b/sql/executor.go @@ -53,19 +53,19 @@ const sqlTxnName string = "sql txn" const sqlImplicitTxnName string = "sql txn implicit" // Fully-qualified names for metrics. -const ( - MetricLatencyName = "sql.latency" - MetricTxnBeginName = "sql.txn.begin.count" - MetricTxnCommitName = "sql.txn.commit.count" - MetricTxnAbortName = "sql.txn.abort.count" - MetricTxnRollbackName = "sql.txn.rollback.count" - MetricSelectName = "sql.select.count" - MetricUpdateName = "sql.update.count" - MetricInsertName = "sql.insert.count" - MetricDeleteName = "sql.delete.count" - MetricDdlName = "sql.ddl.count" - MetricMiscName = "sql.misc.count" - MetricQueryName = "sql.query.count" +var ( + MetaLatency = metric.MetricMetadata{"sql.latency", ""} + MetaTxnBegin = metric.MetricMetadata{"sql.txn.begin.count", ""} + MetaTxnCommit = metric.MetricMetadata{"sql.txn.commit.count", ""} + MetaTxnAbort = metric.MetricMetadata{"sql.txn.abort.count", ""} + MetaTxnRollback = metric.MetricMetadata{"sql.txn.rollback.count", ""} + MetaSelect = metric.MetricMetadata{"sql.select.count", ""} + MetaUpdate = metric.MetricMetadata{"sql.update.count", ""} + MetaInsert = metric.MetricMetadata{"sql.insert.count", ""} + MetaDelete = metric.MetricMetadata{"sql.delete.count", ""} + MetaDdl = metric.MetricMetadata{"sql.ddl.count", ""} + MetaMisc = metric.MetricMetadata{"sql.misc.count", ""} + MetaQuery = metric.MetricMetadata{"sql.query.count", ""} ) // TODO(radu): experimental code for testing distSQL flows. @@ -140,7 +140,6 @@ type Executor struct { reCache *parser.RegexpCache // Transient stats. - registry *metric.Registry latency metric.Histograms selectCount *metric.Counter txnBeginCount *metric.Counter @@ -249,20 +248,33 @@ func NewExecutor(ctx ExecutorContext, stopper *stop.Stopper, registry *metric.Re ctx: ctx, reCache: parser.NewRegexpCache(512), - registry: registry, - latency: registry.Latency(MetricLatencyName), - txnBeginCount: registry.Counter(MetricTxnBeginName), - txnCommitCount: registry.Counter(MetricTxnCommitName), - txnAbortCount: registry.Counter(MetricTxnAbortName), - txnRollbackCount: registry.Counter(MetricTxnRollbackName), - selectCount: registry.Counter(MetricSelectName), - updateCount: registry.Counter(MetricUpdateName), - insertCount: registry.Counter(MetricInsertName), - deleteCount: registry.Counter(MetricDeleteName), - ddlCount: registry.Counter(MetricDdlName), - miscCount: registry.Counter(MetricMiscName), - queryCount: registry.Counter(MetricQueryName), + latency: metric.NewLatency(MetaLatency), + txnBeginCount: metric.NewCounter(MetaTxnBegin), + txnCommitCount: metric.NewCounter(MetaTxnCommit), + txnAbortCount: metric.NewCounter(MetaTxnAbort), + txnRollbackCount: metric.NewCounter(MetaTxnRollback), + selectCount: metric.NewCounter(MetaSelect), + updateCount: metric.NewCounter(MetaUpdate), + insertCount: metric.NewCounter(MetaInsert), + deleteCount: metric.NewCounter(MetaDelete), + ddlCount: metric.NewCounter(MetaDdl), + miscCount: metric.NewCounter(MetaMisc), + queryCount: metric.NewCounter(MetaQuery), } + + registry.AddMetricGroup(exec.latency) + registry.AddMetric(exec.txnBeginCount) + registry.AddMetric(exec.txnCommitCount) + registry.AddMetric(exec.txnAbortCount) + registry.AddMetric(exec.txnRollbackCount) + registry.AddMetric(exec.selectCount) + registry.AddMetric(exec.updateCount) + registry.AddMetric(exec.insertCount) + registry.AddMetric(exec.deleteCount) + registry.AddMetric(exec.ddlCount) + registry.AddMetric(exec.miscCount) + registry.AddMetric(exec.queryCount) + exec.systemConfigCond = sync.NewCond(exec.systemConfigMu.RLocker()) gossipUpdateC := ctx.Gossip.RegisterSystemConfigChannel() @@ -1167,12 +1179,6 @@ func (e *Executor) updateStmtCounts(stmt parser.Statement) { } } -// Registry returns a registry with the metrics tracked by this executor, which can be used to -// access its stats or be added to another registry. -func (e *Executor) Registry() *metric.Registry { - return e.registry -} - // golangFillQueryArguments populates the placeholder map with // types and values from an array of Go values. // TODO: This does not support arguments of the SQL 'Date' type, as there is not diff --git a/sql/metric_test.go b/sql/metric_test.go index 667601bf04e1..65f880552bd5 100644 --- a/sql/metric_test.go +++ b/sql/metric_test.go @@ -74,16 +74,16 @@ func TestQueryCounts(t *testing.T) { t.Fatal(err) } - checkCounterEQ(t, s, sql.MetricTxnBeginName, tc.txnBeginCount) - checkCounterEQ(t, s, sql.MetricTxnCommitName, tc.txnCommitCount) - checkCounterEQ(t, s, sql.MetricTxnRollbackName, tc.txnRollbackCount) - checkCounterEQ(t, s, sql.MetricTxnAbortName, 0) - checkCounterEQ(t, s, sql.MetricSelectName, tc.selectCount) - checkCounterEQ(t, s, sql.MetricUpdateName, tc.updateCount) - checkCounterEQ(t, s, sql.MetricInsertName, tc.insertCount) - checkCounterEQ(t, s, sql.MetricDeleteName, tc.deleteCount) - checkCounterEQ(t, s, sql.MetricDdlName, tc.ddlCount) - checkCounterEQ(t, s, sql.MetricMiscName, tc.miscCount) + checkCounterEQ(t, s, sql.MetaTxnBegin, tc.txnBeginCount) + checkCounterEQ(t, s, sql.MetaTxnCommit, tc.txnCommitCount) + checkCounterEQ(t, s, sql.MetaTxnRollback, tc.txnRollbackCount) + checkCounterEQ(t, s, sql.MetaTxnAbort, 0) + checkCounterEQ(t, s, sql.MetaSelect, tc.selectCount) + checkCounterEQ(t, s, sql.MetaUpdate, tc.updateCount) + checkCounterEQ(t, s, sql.MetaInsert, tc.insertCount) + checkCounterEQ(t, s, sql.MetaDelete, tc.deleteCount) + checkCounterEQ(t, s, sql.MetaDdl, tc.ddlCount) + checkCounterEQ(t, s, sql.MetaMisc, tc.miscCount) // Everything after this query will also fail, so quit now to avoid deluge of errors. if t.Failed() { @@ -134,11 +134,11 @@ func TestAbortCountConflictingWrites(t *testing.T) { t.Fatal(err) } - checkCounterEQ(t, s, sql.MetricTxnAbortName, 1) - checkCounterEQ(t, s, sql.MetricTxnBeginName, 1) - checkCounterEQ(t, s, sql.MetricTxnRollbackName, 0) - checkCounterEQ(t, s, sql.MetricTxnCommitName, 0) - checkCounterEQ(t, s, sql.MetricInsertName, 1) + checkCounterEQ(t, s, sql.MetaTxnAbort, 1) + checkCounterEQ(t, s, sql.MetaTxnBegin, 1) + checkCounterEQ(t, s, sql.MetaTxnRollback, 0) + checkCounterEQ(t, s, sql.MetaTxnCommit, 0) + checkCounterEQ(t, s, sql.MetaInsert, 1) } // TestErrorDuringTransaction tests that the transaction abort count goes up when a query @@ -158,7 +158,7 @@ func TestAbortCountErrorDuringTransaction(t *testing.T) { t.Fatal("Expected an error but didn't get one") } - checkCounterEQ(t, s, sql.MetricTxnAbortName, 1) - checkCounterEQ(t, s, sql.MetricTxnBeginName, 1) - checkCounterEQ(t, s, sql.MetricSelectName, 1) + checkCounterEQ(t, s, sql.MetaTxnAbort, 1) + checkCounterEQ(t, s, sql.MetaTxnBegin, 1) + checkCounterEQ(t, s, sql.MetaSelect, 1) } diff --git a/sql/metric_util_test.go b/sql/metric_util_test.go index b578b35683bd..8a20a5f1dd5e 100644 --- a/sql/metric_util_test.go +++ b/sql/metric_util_test.go @@ -22,21 +22,22 @@ import ( "testing" "github.com/cockroachdb/cockroach/testutils/serverutils" + "github.com/cockroachdb/cockroach/util/metric" "github.com/pkg/errors" ) func checkCounterEQ( - t *testing.T, s serverutils.TestServerInterface, key string, e int64, + t *testing.T, s serverutils.TestServerInterface, meta metric.MetricMetadata, e int64, ) { - if a := s.MustGetSQLCounter(key); a != e { - t.Error(errors.Errorf("stat %s: actual %d != expected %d", key, a, e)) + if a := s.MustGetSQLCounter(meta.Name); a != e { + t.Error(errors.Errorf("stat %s: actual %d != expected %d", meta.Name, a, e)) } } func checkCounterGE( - t *testing.T, s serverutils.TestServerInterface, key string, e int64, + t *testing.T, s serverutils.TestServerInterface, meta metric.MetricMetadata, e int64, ) { - if a := s.MustGetSQLCounter(key); a < e { - t.Error(errors.Errorf("stat %s: expected: actual %d >= %d", key, a, e)) + if a := s.MustGetSQLCounter(meta.Name); a < e { + t.Error(errors.Errorf("stat %s: expected: actual %d >= %d", meta.Name, a, e)) } } diff --git a/sql/pgwire/server.go b/sql/pgwire/server.go index 79b52e38b881..723490fcaccd 100644 --- a/sql/pgwire/server.go +++ b/sql/pgwire/server.go @@ -43,10 +43,10 @@ const ( ) // Fully-qualified names for metrics. -const ( - MetricConnsName = "sql.conns" - MetricBytesInName = "sql.bytesin" - MetricBytesOutName = "sql.bytesout" +var ( + MetaConns = metric.MetricMetadata{"sql.conns", ""} + MetaBytesIn = metric.MetricMetadata{"sql.bytesin", ""} + MetaBytesOut = metric.MetricMetadata{"sql.bytesout", ""} ) const ( @@ -82,11 +82,15 @@ type serverMetrics struct { } func newServerMetrics(reg *metric.Registry) *serverMetrics { - return &serverMetrics{ - conns: reg.Counter(MetricConnsName), - bytesInCount: reg.Counter(MetricBytesInName), - bytesOutCount: reg.Counter(MetricBytesOutName), + sm := &serverMetrics{ + conns: metric.NewCounter(MetaConns), + bytesInCount: metric.NewCounter(MetaBytesIn), + bytesOutCount: metric.NewCounter(MetaBytesOut), } + reg.AddMetric(sm.conns) + reg.AddMetric(sm.bytesInCount) + reg.AddMetric(sm.bytesOutCount) + return sm } // MakeServer creates a Server, adding network stats to the given Registry. diff --git a/sql/pgwire/types_test.go b/sql/pgwire/types_test.go index 4de49223415c..a20d98645136 100644 --- a/sql/pgwire/types_test.go +++ b/sql/pgwire/types_test.go @@ -83,7 +83,7 @@ func TestTimestampRoundtrip(t *testing.T) { } func BenchmarkWriteBinaryDecimal(b *testing.B) { - buf := writeBuffer{bytecount: metric.NewCounter()} + buf := writeBuffer{bytecount: metric.NewCounter(metric.MetricMetadata{"", ""})} dec := new(parser.DDecimal) dec.SetString("-1728718718271827121233.1212121212") @@ -102,7 +102,7 @@ func BenchmarkWriteBinaryDecimal(b *testing.B) { } func BenchmarkDecodeBinaryDecimal(b *testing.B) { - wbuf := writeBuffer{bytecount: metric.NewCounter()} + wbuf := writeBuffer{bytecount: metric.NewCounter(metric.MetricMetadata{"", ""})} expected := new(parser.DDecimal) expected.SetString("-1728718718271827121233.1212121212") diff --git a/sql/pgwire_test.go b/sql/pgwire_test.go index 99e881fda833..be5b647dac0a 100644 --- a/sql/pgwire_test.go +++ b/sql/pgwire_test.go @@ -1052,8 +1052,8 @@ func checkSQLNetworkMetrics( return -1, -1, err } - bytesIn := s.MustGetSQLNetworkCounter(pgwire.MetricBytesInName) - bytesOut := s.MustGetSQLNetworkCounter(pgwire.MetricBytesOutName) + bytesIn := s.MustGetSQLNetworkCounter(pgwire.MetaBytesIn.Name) + bytesOut := s.MustGetSQLNetworkCounter(pgwire.MetaBytesOut.Name) if a, min := bytesIn, minBytesIn; a < min { return bytesIn, bytesOut, errors.Errorf("bytesin %d < expected min %d", a, min) } @@ -1108,7 +1108,7 @@ func TestSQLNetworkMetrics(t *testing.T) { // Verify connection counter. expectConns := func(n int) { util.SucceedsSoon(t, func() error { - if conns := s.MustGetSQLNetworkCounter(pgwire.MetricConnsName); conns != int64(n) { + if conns := s.MustGetSQLNetworkCounter(pgwire.MetaConns.Name); conns != int64(n) { return errors.Errorf("connections %d != expected %d", conns, n) } return nil diff --git a/sql/txn_restart_test.go b/sql/txn_restart_test.go index b71f5b938599..0c707ca7874c 100644 --- a/sql/txn_restart_test.go +++ b/sql/txn_restart_test.go @@ -485,7 +485,7 @@ CREATE TABLE t.test (k INT PRIMARY KEY, v TEXT); // Also inject an error at RELEASE time, besides the error injected by magicVals. injectReleaseError := true - commitCount := s.MustGetSQLCounter(sql.MetricTxnCommitName) + commitCount := s.MustGetSQLCounter(sql.MetaTxnCommit.Name) // This is the magic. Run the txn closure until all the retries are exhausted. exec(t, sqlDB, rs, func(tx *gosql.Tx) bool { return runTestTxn(t, tc.magicVals, tc.expectedErr, &injectReleaseError, sqlDB, tx) @@ -511,7 +511,7 @@ CREATE TABLE t.test (k INT PRIMARY KEY, v TEXT); // Check that the commit counter was incremented. It could have been // incremented by more than 1 because of the transactions we use to force // aborts, plus who knows what else the server is doing in the background. - checkCounterGE(t, s, sql.MetricTxnCommitName, commitCount+1) + checkCounterGE(t, s, sql.MetaTxnCommit, commitCount+1) // Clean up the table for the next test iteration. _, err = sqlDB.Exec("DELETE FROM t.test WHERE true") if err != nil { diff --git a/storage/store.go b/storage/store.go index 8396b2dc5539..447d212e01b5 100644 --- a/storage/store.go +++ b/storage/store.go @@ -574,213 +574,6 @@ var _ base.ModuleTestingKnobs = &StoreTestingKnobs{} // ModuleTestingKnobs is part of the base.ModuleTestingKnobs interface. func (*StoreTestingKnobs) ModuleTestingKnobs() {} -type storeMetrics struct { - registry *metric.Registry - - // Range data metrics. - replicaCount *metric.Counter // Does not include reserved replicas. - reservedReplicaCount *metric.Counter - leaderRangeCount *metric.Gauge - replicatedRangeCount *metric.Gauge - replicationPendingRangeCount *metric.Gauge - availableRangeCount *metric.Gauge - - // Lease data metrics. - leaseRequestSuccessCount *metric.Counter - leaseRequestErrorCount *metric.Counter - - // Storage metrics. - liveBytes *metric.Gauge - keyBytes *metric.Gauge - valBytes *metric.Gauge - intentBytes *metric.Gauge - liveCount *metric.Gauge - keyCount *metric.Gauge - valCount *metric.Gauge - intentCount *metric.Gauge - intentAge *metric.Gauge - gcBytesAge *metric.Gauge - lastUpdateNanos *metric.Gauge - capacity *metric.Gauge - available *metric.Gauge - reserved *metric.Counter - sysBytes *metric.Gauge - sysCount *metric.Gauge - - // RocksDB metrics. - rdbBlockCacheHits *metric.Gauge - rdbBlockCacheMisses *metric.Gauge - rdbBlockCacheUsage *metric.Gauge - rdbBlockCachePinnedUsage *metric.Gauge - rdbBloomFilterPrefixChecked *metric.Gauge - rdbBloomFilterPrefixUseful *metric.Gauge - rdbMemtableHits *metric.Gauge - rdbMemtableMisses *metric.Gauge - rdbMemtableTotalSize *metric.Gauge - rdbFlushes *metric.Gauge - rdbCompactions *metric.Gauge - rdbTableReadersMemEstimate *metric.Gauge - rdbReadAmplification *metric.Gauge - - // Range event metrics. - rangeSplits *metric.Counter - rangeAdds *metric.Counter - rangeRemoves *metric.Counter - rangeSnapshotsGenerated *metric.Counter - rangeSnapshotsNormalApplied *metric.Counter - rangeSnapshotsPreemptiveApplied *metric.Counter - - // Raft processing metrics. - raftSelectDurationNanos *metric.Counter - raftWorkingDurationNanos *metric.Counter - raftTickingDurationNanos *metric.Counter - - // Stats for efficient merges. - // TODO(mrtracy): This should be removed as part of #4465. This is only - // maintained to keep the current structure of StatusSummaries; it would be - // better to convert the Gauges above into counters which are adjusted - // accordingly. - mu syncutil.Mutex - stats enginepb.MVCCStats -} - -func newStoreMetrics() *storeMetrics { - storeRegistry := metric.NewRegistry() - return &storeMetrics{ - registry: storeRegistry, - replicaCount: storeRegistry.Counter("replicas"), - reservedReplicaCount: storeRegistry.Counter("replicas.reserved"), - leaderRangeCount: storeRegistry.Gauge("ranges.leader"), - replicatedRangeCount: storeRegistry.Gauge("ranges.replicated"), - replicationPendingRangeCount: storeRegistry.Gauge("ranges.replication-pending"), - availableRangeCount: storeRegistry.Gauge("ranges.available"), - leaseRequestSuccessCount: storeRegistry.Counter("leases.success"), - leaseRequestErrorCount: storeRegistry.Counter("leases.error"), - liveBytes: storeRegistry.Gauge("livebytes"), - keyBytes: storeRegistry.Gauge("keybytes"), - valBytes: storeRegistry.Gauge("valbytes"), - intentBytes: storeRegistry.Gauge("intentbytes"), - liveCount: storeRegistry.Gauge("livecount"), - keyCount: storeRegistry.Gauge("keycount"), - valCount: storeRegistry.Gauge("valcount"), - intentCount: storeRegistry.Gauge("intentcount"), - intentAge: storeRegistry.Gauge("intentage"), - gcBytesAge: storeRegistry.Gauge("gcbytesage"), - lastUpdateNanos: storeRegistry.Gauge("lastupdatenanos"), - capacity: storeRegistry.Gauge("capacity"), - available: storeRegistry.Gauge("capacity.available"), - reserved: storeRegistry.Counter("capacity.reserved"), - sysBytes: storeRegistry.Gauge("sysbytes"), - sysCount: storeRegistry.Gauge("syscount"), - - // RocksDB metrics. - rdbBlockCacheHits: storeRegistry.Gauge("rocksdb.block.cache.hits"), - rdbBlockCacheMisses: storeRegistry.Gauge("rocksdb.block.cache.misses"), - rdbBlockCacheUsage: storeRegistry.Gauge("rocksdb.block.cache.usage"), - rdbBlockCachePinnedUsage: storeRegistry.Gauge("rocksdb.block.cache.pinned-usage"), - rdbBloomFilterPrefixChecked: storeRegistry.Gauge("rocksdb.bloom.filter.prefix.checked"), - rdbBloomFilterPrefixUseful: storeRegistry.Gauge("rocksdb.bloom.filter.prefix.useful"), - rdbMemtableHits: storeRegistry.Gauge("rocksdb.memtable.hits"), - rdbMemtableMisses: storeRegistry.Gauge("rocksdb.memtable.misses"), - rdbMemtableTotalSize: storeRegistry.Gauge("rocksdb.memtable.total-size"), - rdbFlushes: storeRegistry.Gauge("rocksdb.flushes"), - rdbCompactions: storeRegistry.Gauge("rocksdb.compactions"), - rdbTableReadersMemEstimate: storeRegistry.Gauge("rocksdb.table-readers-mem-estimate"), - rdbReadAmplification: storeRegistry.Gauge("rocksdb.read-amplification"), - - // Range event metrics. - rangeSplits: storeRegistry.Counter("range.splits"), - rangeAdds: storeRegistry.Counter("range.adds"), - rangeRemoves: storeRegistry.Counter("range.removes"), - rangeSnapshotsGenerated: storeRegistry.Counter("range.snapshots.generated"), - rangeSnapshotsNormalApplied: storeRegistry.Counter("range.snapshots.normal-applied"), - rangeSnapshotsPreemptiveApplied: storeRegistry.Counter("range.snapshots.preemptive-applied"), - - // Raft processing metrics. - raftSelectDurationNanos: storeRegistry.Counter("process-raft.waitingnanos"), - raftWorkingDurationNanos: storeRegistry.Counter("process-raft.workingnanos"), - raftTickingDurationNanos: storeRegistry.Counter("process-raft.tickingnanos"), - } -} - -// updateGaugesLocked breaks out individual metrics from the MVCCStats object. -// This process should be locked with each stat application to ensure that all -// gauges increase/decrease in step with the application of updates. However, -// this locking is not exposed to the registry level, and therefore a single -// snapshot of these gauges in the registry might mix the values of two -// subsequent updates. -func (sm *storeMetrics) updateMVCCGaugesLocked() { - sm.liveBytes.Update(sm.stats.LiveBytes) - sm.keyBytes.Update(sm.stats.KeyBytes) - sm.valBytes.Update(sm.stats.ValBytes) - sm.intentBytes.Update(sm.stats.IntentBytes) - sm.liveCount.Update(sm.stats.LiveCount) - sm.keyCount.Update(sm.stats.KeyCount) - sm.valCount.Update(sm.stats.ValCount) - sm.intentCount.Update(sm.stats.IntentCount) - sm.intentAge.Update(sm.stats.IntentAge) - sm.gcBytesAge.Update(sm.stats.GCBytesAge) - sm.lastUpdateNanos.Update(sm.stats.LastUpdateNanos) - sm.sysBytes.Update(sm.stats.SysBytes) - sm.sysCount.Update(sm.stats.SysCount) -} - -func (sm *storeMetrics) updateCapacityGauges(capacity roachpb.StoreCapacity) { - sm.mu.Lock() - defer sm.mu.Unlock() - sm.capacity.Update(capacity.Capacity) - sm.available.Update(capacity.Available) -} - -func (sm *storeMetrics) updateReplicationGauges(leaders, replicated, pending, available int64) { - sm.mu.Lock() - defer sm.mu.Unlock() - sm.leaderRangeCount.Update(leaders) - sm.replicatedRangeCount.Update(replicated) - sm.replicationPendingRangeCount.Update(pending) - sm.availableRangeCount.Update(available) -} - -func (sm *storeMetrics) addMVCCStats(stats enginepb.MVCCStats) { - sm.mu.Lock() - defer sm.mu.Unlock() - sm.stats.Add(stats) - sm.updateMVCCGaugesLocked() -} - -func (sm *storeMetrics) subtractMVCCStats(stats enginepb.MVCCStats) { - sm.mu.Lock() - defer sm.mu.Unlock() - sm.stats.Subtract(stats) - sm.updateMVCCGaugesLocked() -} - -func (sm *storeMetrics) updateRocksDBStats(stats engine.Stats) { - // We do not grab a lock here, because it's not possible to get a point-in- - // time snapshot of RocksDB stats. Retrieving RocksDB stats doesn't grab any - // locks, and there's no way to retrieve multiple stats in a single operation. - sm.rdbBlockCacheHits.Update(stats.BlockCacheHits) - sm.rdbBlockCacheMisses.Update(stats.BlockCacheMisses) - sm.rdbBlockCacheUsage.Update(stats.BlockCacheUsage) - sm.rdbBlockCachePinnedUsage.Update(stats.BlockCachePinnedUsage) - sm.rdbBloomFilterPrefixUseful.Update(stats.BloomFilterPrefixUseful) - sm.rdbBloomFilterPrefixChecked.Update(stats.BloomFilterPrefixChecked) - sm.rdbMemtableHits.Update(stats.MemtableHits) - sm.rdbMemtableMisses.Update(stats.MemtableMisses) - sm.rdbMemtableTotalSize.Update(stats.MemtableTotalSize) - sm.rdbFlushes.Update(stats.Flushes) - sm.rdbCompactions.Update(stats.Compactions) - sm.rdbTableReadersMemEstimate.Update(stats.TableReadersMemEstimate) -} - -func (sm *storeMetrics) leaseRequestComplete(success bool) { - if success { - sm.leaseRequestSuccessCount.Inc(1) - } else { - sm.leaseRequestErrorCount.Inc(1) - } -} - // Valid returns true if the StoreContext is populated correctly. // We don't check for Gossip and DB since some of our tests pass // that as nil. diff --git a/storage/store_metrics.go b/storage/store_metrics.go new file mode 100644 index 000000000000..650ed4bc229f --- /dev/null +++ b/storage/store_metrics.go @@ -0,0 +1,336 @@ +// Copyright 2016 The Cockroach Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +// implied. See the License for the specific language governing +// permissions and limitations under the License. +// +// Author: Marc Berhault (marc@cockroachlabs.com) + +package storage + +import ( + "github.com/cockroachdb/cockroach/roachpb" + "github.com/cockroachdb/cockroach/storage/engine" + "github.com/cockroachdb/cockroach/storage/engine/enginepb" + "github.com/cockroachdb/cockroach/util/metric" + "github.com/cockroachdb/cockroach/util/syncutil" +) + +var ( + metaReplicaCount = metric.MetricMetadata{"replicas", ""} + metaReservedReplicaCount = metric.MetricMetadata{"replicas.reserved", ""} + metaLeaderRangeCount = metric.MetricMetadata{"ranges.leader", ""} + metaReplicatedRangeCount = metric.MetricMetadata{"ranges.replicated", ""} + metaReplicationPendingRangeCount = metric.MetricMetadata{"ranges.replication-pending", ""} + metaAvailableRangeCount = metric.MetricMetadata{"ranges.available", ""} + metaLeaseRequestSuccessCount = metric.MetricMetadata{"leases.success", ""} + metaLeaseRequestErrorCount = metric.MetricMetadata{"leases.error", ""} + metaLiveBytes = metric.MetricMetadata{"livebytes", ""} + metaKeyBytes = metric.MetricMetadata{"keybytes", ""} + metaValBytes = metric.MetricMetadata{"valbytes", ""} + metaIntentBytes = metric.MetricMetadata{"intentbytes", ""} + metaLiveCount = metric.MetricMetadata{"livecount", ""} + metaKeyCount = metric.MetricMetadata{"keycount", ""} + metaValCount = metric.MetricMetadata{"valcount", ""} + metaIntentCount = metric.MetricMetadata{"intentcount", ""} + metaIntentAge = metric.MetricMetadata{"intentage", ""} + metaGcBytesAge = metric.MetricMetadata{"gcbytesage", ""} + metaLastUpdateNanos = metric.MetricMetadata{"lastupdatenanos", ""} + metaCapacity = metric.MetricMetadata{"capacity", ""} + metaAvailable = metric.MetricMetadata{"capacity.available", ""} + metaReserved = metric.MetricMetadata{"capacity.reserved", ""} + metaSysBytes = metric.MetricMetadata{"sysbytes", ""} + metaSysCount = metric.MetricMetadata{"syscount", ""} + + // RocksDB metrics. + metaRdbBlockCacheHits = metric.MetricMetadata{"rocksdb.block.cache.hits", ""} + metaRdbBlockCacheMisses = metric.MetricMetadata{"rocksdb.block.cache.misses", ""} + metaRdbBlockCacheUsage = metric.MetricMetadata{"rocksdb.block.cache.usage", ""} + metaRdbBlockCachePinnedUsage = metric.MetricMetadata{"rocksdb.block.cache.pinned-usage", ""} + metaRdbBloomFilterPrefixChecked = metric.MetricMetadata{"rocksdb.bloom.filter.prefix.checked", ""} + metaRdbBloomFilterPrefixUseful = metric.MetricMetadata{"rocksdb.bloom.filter.prefix.useful", ""} + metaRdbMemtableHits = metric.MetricMetadata{"rocksdb.memtable.hits", ""} + metaRdbMemtableMisses = metric.MetricMetadata{"rocksdb.memtable.misses", ""} + metaRdbMemtableTotalSize = metric.MetricMetadata{"rocksdb.memtable.total-size", ""} + metaRdbFlushes = metric.MetricMetadata{"rocksdb.flushes", ""} + metaRdbCompactions = metric.MetricMetadata{"rocksdb.compactions", ""} + metaRdbTableReadersMemEstimate = metric.MetricMetadata{"rocksdb.table-readers-mem-estimate", ""} + metaRdbReadAmplification = metric.MetricMetadata{"rocksdb.read-amplification", ""} + + // Range event metrics. + metaRangeSplits = metric.MetricMetadata{"range.splits", ""} + metaRangeAdds = metric.MetricMetadata{"range.adds", ""} + metaRangeRemoves = metric.MetricMetadata{"range.removes", ""} + metaRangeSnapshotsGenerated = metric.MetricMetadata{"range.snapshots.generated", ""} + metaRangeSnapshotsNormalApplied = metric.MetricMetadata{"range.snapshots.normal-applied", ""} + metaRangeSnapshotsPreemptiveApplied = metric.MetricMetadata{"range.snapshots.preemptive-applied", ""} + + // Raft processing metrics. + metaRaftSelectDurationNanos = metric.MetricMetadata{"process-raft.waitingnanos", ""} + metaRaftWorkingDurationNanos = metric.MetricMetadata{"process-raft.workingnanos", ""} + metaRaftTickingDurationNanos = metric.MetricMetadata{"process-raft.tickingnanos", ""} +) + +type storeMetrics struct { + registry *metric.Registry + + // Range data metrics. + replicaCount *metric.Counter // Does not include reserved replicas. + reservedReplicaCount *metric.Counter + leaderRangeCount *metric.Gauge + replicatedRangeCount *metric.Gauge + replicationPendingRangeCount *metric.Gauge + availableRangeCount *metric.Gauge + + // Lease data metrics. + leaseRequestSuccessCount *metric.Counter + leaseRequestErrorCount *metric.Counter + + // Storage metrics. + liveBytes *metric.Gauge + keyBytes *metric.Gauge + valBytes *metric.Gauge + intentBytes *metric.Gauge + liveCount *metric.Gauge + keyCount *metric.Gauge + valCount *metric.Gauge + intentCount *metric.Gauge + intentAge *metric.Gauge + gcBytesAge *metric.Gauge + lastUpdateNanos *metric.Gauge + capacity *metric.Gauge + available *metric.Gauge + reserved *metric.Counter + sysBytes *metric.Gauge + sysCount *metric.Gauge + + // RocksDB metrics. + rdbBlockCacheHits *metric.Gauge + rdbBlockCacheMisses *metric.Gauge + rdbBlockCacheUsage *metric.Gauge + rdbBlockCachePinnedUsage *metric.Gauge + rdbBloomFilterPrefixChecked *metric.Gauge + rdbBloomFilterPrefixUseful *metric.Gauge + rdbMemtableHits *metric.Gauge + rdbMemtableMisses *metric.Gauge + rdbMemtableTotalSize *metric.Gauge + rdbFlushes *metric.Gauge + rdbCompactions *metric.Gauge + rdbTableReadersMemEstimate *metric.Gauge + rdbReadAmplification *metric.Gauge + + // Range event metrics. + rangeSplits *metric.Counter + rangeAdds *metric.Counter + rangeRemoves *metric.Counter + rangeSnapshotsGenerated *metric.Counter + rangeSnapshotsNormalApplied *metric.Counter + rangeSnapshotsPreemptiveApplied *metric.Counter + + // Raft processing metrics. + raftSelectDurationNanos *metric.Counter + raftWorkingDurationNanos *metric.Counter + raftTickingDurationNanos *metric.Counter + + // Stats for efficient merges. + // TODO(mrtracy): This should be removed as part of #4465. This is only + // maintained to keep the current structure of StatusSummaries; it would be + // better to convert the Gauges above into counters which are adjusted + // accordingly. + mu syncutil.Mutex + stats enginepb.MVCCStats +} + +func newStoreMetrics() *storeMetrics { + storeRegistry := metric.NewRegistry() + sm := &storeMetrics{ + registry: storeRegistry, + replicaCount: metric.NewCounter(metaReplicaCount), + reservedReplicaCount: metric.NewCounter(metaReservedReplicaCount), + leaderRangeCount: metric.NewGauge(metaLeaderRangeCount), + replicatedRangeCount: metric.NewGauge(metaReplicatedRangeCount), + replicationPendingRangeCount: metric.NewGauge(metaReplicationPendingRangeCount), + availableRangeCount: metric.NewGauge(metaAvailableRangeCount), + leaseRequestSuccessCount: metric.NewCounter(metaLeaseRequestSuccessCount), + leaseRequestErrorCount: metric.NewCounter(metaLeaseRequestErrorCount), + liveBytes: metric.NewGauge(metaLiveBytes), + keyBytes: metric.NewGauge(metaKeyBytes), + valBytes: metric.NewGauge(metaValBytes), + intentBytes: metric.NewGauge(metaIntentBytes), + liveCount: metric.NewGauge(metaLiveCount), + keyCount: metric.NewGauge(metaKeyCount), + valCount: metric.NewGauge(metaValCount), + intentCount: metric.NewGauge(metaIntentCount), + intentAge: metric.NewGauge(metaIntentAge), + gcBytesAge: metric.NewGauge(metaGcBytesAge), + lastUpdateNanos: metric.NewGauge(metaLastUpdateNanos), + capacity: metric.NewGauge(metaCapacity), + available: metric.NewGauge(metaAvailable), + reserved: metric.NewCounter(metaReserved), + sysBytes: metric.NewGauge(metaSysBytes), + sysCount: metric.NewGauge(metaSysCount), + + // RocksDB metrics. + rdbBlockCacheHits: metric.NewGauge(metaRdbBlockCacheHits), + rdbBlockCacheMisses: metric.NewGauge(metaRdbBlockCacheMisses), + rdbBlockCacheUsage: metric.NewGauge(metaRdbBlockCacheUsage), + rdbBlockCachePinnedUsage: metric.NewGauge(metaRdbBlockCachePinnedUsage), + rdbBloomFilterPrefixChecked: metric.NewGauge(metaRdbBloomFilterPrefixChecked), + rdbBloomFilterPrefixUseful: metric.NewGauge(metaRdbBloomFilterPrefixUseful), + rdbMemtableHits: metric.NewGauge(metaRdbMemtableHits), + rdbMemtableMisses: metric.NewGauge(metaRdbMemtableMisses), + rdbMemtableTotalSize: metric.NewGauge(metaRdbMemtableTotalSize), + rdbFlushes: metric.NewGauge(metaRdbFlushes), + rdbCompactions: metric.NewGauge(metaRdbCompactions), + rdbTableReadersMemEstimate: metric.NewGauge(metaRdbTableReadersMemEstimate), + rdbReadAmplification: metric.NewGauge(metaRdbReadAmplification), + + // Range event metrics. + rangeSplits: metric.NewCounter(metaRangeSplits), + rangeAdds: metric.NewCounter(metaRangeAdds), + rangeRemoves: metric.NewCounter(metaRangeRemoves), + rangeSnapshotsGenerated: metric.NewCounter(metaRangeSnapshotsGenerated), + rangeSnapshotsNormalApplied: metric.NewCounter(metaRangeSnapshotsNormalApplied), + rangeSnapshotsPreemptiveApplied: metric.NewCounter(metaRangeSnapshotsPreemptiveApplied), + + // Raft processing metrics. + raftSelectDurationNanos: metric.NewCounter(metaRaftSelectDurationNanos), + raftWorkingDurationNanos: metric.NewCounter(metaRaftWorkingDurationNanos), + raftTickingDurationNanos: metric.NewCounter(metaRaftTickingDurationNanos), + } + + storeRegistry.AddMetric(sm.replicaCount) + storeRegistry.AddMetric(sm.reservedReplicaCount) + storeRegistry.AddMetric(sm.leaderRangeCount) + storeRegistry.AddMetric(sm.replicatedRangeCount) + storeRegistry.AddMetric(sm.replicationPendingRangeCount) + storeRegistry.AddMetric(sm.availableRangeCount) + storeRegistry.AddMetric(sm.leaseRequestSuccessCount) + storeRegistry.AddMetric(sm.leaseRequestErrorCount) + storeRegistry.AddMetric(sm.liveBytes) + storeRegistry.AddMetric(sm.keyBytes) + storeRegistry.AddMetric(sm.valBytes) + storeRegistry.AddMetric(sm.intentBytes) + storeRegistry.AddMetric(sm.liveCount) + storeRegistry.AddMetric(sm.keyCount) + storeRegistry.AddMetric(sm.valCount) + storeRegistry.AddMetric(sm.intentCount) + storeRegistry.AddMetric(sm.intentAge) + storeRegistry.AddMetric(sm.gcBytesAge) + storeRegistry.AddMetric(sm.lastUpdateNanos) + storeRegistry.AddMetric(sm.capacity) + storeRegistry.AddMetric(sm.available) + storeRegistry.AddMetric(sm.reserved) + storeRegistry.AddMetric(sm.sysBytes) + storeRegistry.AddMetric(sm.sysCount) + storeRegistry.AddMetric(sm.rdbBlockCacheHits) + storeRegistry.AddMetric(sm.rdbBlockCacheMisses) + storeRegistry.AddMetric(sm.rdbBlockCacheUsage) + storeRegistry.AddMetric(sm.rdbBlockCachePinnedUsage) + storeRegistry.AddMetric(sm.rdbBloomFilterPrefixChecked) + storeRegistry.AddMetric(sm.rdbBloomFilterPrefixUseful) + storeRegistry.AddMetric(sm.rdbMemtableHits) + storeRegistry.AddMetric(sm.rdbMemtableMisses) + storeRegistry.AddMetric(sm.rdbMemtableTotalSize) + storeRegistry.AddMetric(sm.rdbFlushes) + storeRegistry.AddMetric(sm.rdbCompactions) + storeRegistry.AddMetric(sm.rdbTableReadersMemEstimate) + storeRegistry.AddMetric(sm.rdbReadAmplification) + storeRegistry.AddMetric(sm.rangeSplits) + storeRegistry.AddMetric(sm.rangeAdds) + storeRegistry.AddMetric(sm.rangeRemoves) + storeRegistry.AddMetric(sm.rangeSnapshotsGenerated) + storeRegistry.AddMetric(sm.rangeSnapshotsNormalApplied) + storeRegistry.AddMetric(sm.rangeSnapshotsPreemptiveApplied) + storeRegistry.AddMetric(sm.raftSelectDurationNanos) + storeRegistry.AddMetric(sm.raftWorkingDurationNanos) + storeRegistry.AddMetric(sm.raftTickingDurationNanos) + + return sm +} + +// updateGaugesLocked breaks out individual metrics from the MVCCStats object. +// This process should be locked with each stat application to ensure that all +// gauges increase/decrease in step with the application of updates. However, +// this locking is not exposed to the registry level, and therefore a single +// snapshot of these gauges in the registry might mix the values of two +// subsequent updates. +func (sm *storeMetrics) updateMVCCGaugesLocked() { + sm.liveBytes.Update(sm.stats.LiveBytes) + sm.keyBytes.Update(sm.stats.KeyBytes) + sm.valBytes.Update(sm.stats.ValBytes) + sm.intentBytes.Update(sm.stats.IntentBytes) + sm.liveCount.Update(sm.stats.LiveCount) + sm.keyCount.Update(sm.stats.KeyCount) + sm.valCount.Update(sm.stats.ValCount) + sm.intentCount.Update(sm.stats.IntentCount) + sm.intentAge.Update(sm.stats.IntentAge) + sm.gcBytesAge.Update(sm.stats.GCBytesAge) + sm.lastUpdateNanos.Update(sm.stats.LastUpdateNanos) + sm.sysBytes.Update(sm.stats.SysBytes) + sm.sysCount.Update(sm.stats.SysCount) +} + +func (sm *storeMetrics) updateCapacityGauges(capacity roachpb.StoreCapacity) { + sm.mu.Lock() + defer sm.mu.Unlock() + sm.capacity.Update(capacity.Capacity) + sm.available.Update(capacity.Available) +} + +func (sm *storeMetrics) updateReplicationGauges(leaders, replicated, pending, available int64) { + sm.mu.Lock() + defer sm.mu.Unlock() + sm.leaderRangeCount.Update(leaders) + sm.replicatedRangeCount.Update(replicated) + sm.replicationPendingRangeCount.Update(pending) + sm.availableRangeCount.Update(available) +} + +func (sm *storeMetrics) addMVCCStats(stats enginepb.MVCCStats) { + sm.mu.Lock() + defer sm.mu.Unlock() + sm.stats.Add(stats) + sm.updateMVCCGaugesLocked() +} + +func (sm *storeMetrics) subtractMVCCStats(stats enginepb.MVCCStats) { + sm.mu.Lock() + defer sm.mu.Unlock() + sm.stats.Subtract(stats) + sm.updateMVCCGaugesLocked() +} + +func (sm *storeMetrics) updateRocksDBStats(stats engine.Stats) { + // We do not grab a lock here, because it's not possible to get a point-in- + // time snapshot of RocksDB stats. Retrieving RocksDB stats doesn't grab any + // locks, and there's no way to retrieve multiple stats in a single operation. + sm.rdbBlockCacheHits.Update(stats.BlockCacheHits) + sm.rdbBlockCacheMisses.Update(stats.BlockCacheMisses) + sm.rdbBlockCacheUsage.Update(stats.BlockCacheUsage) + sm.rdbBlockCachePinnedUsage.Update(stats.BlockCachePinnedUsage) + sm.rdbBloomFilterPrefixUseful.Update(stats.BloomFilterPrefixUseful) + sm.rdbBloomFilterPrefixChecked.Update(stats.BloomFilterPrefixChecked) + sm.rdbMemtableHits.Update(stats.MemtableHits) + sm.rdbMemtableMisses.Update(stats.MemtableMisses) + sm.rdbMemtableTotalSize.Update(stats.MemtableTotalSize) + sm.rdbFlushes.Update(stats.Flushes) + sm.rdbCompactions.Update(stats.Compactions) + sm.rdbTableReadersMemEstimate.Update(stats.TableReadersMemEstimate) +} + +func (sm *storeMetrics) leaseRequestComplete(success bool) { + if success { + sm.leaseRequestSuccessCount.Inc(1) + } else { + sm.leaseRequestErrorCount.Inc(1) + } +} diff --git a/util/metric/metric.go b/util/metric/metric.go index 12ecaa3ed406..938c3db8b1b3 100644 --- a/util/metric/metric.go +++ b/util/metric/metric.go @@ -33,41 +33,38 @@ import ( const histWrapNum = 4 // number of histograms to keep in rolling window -// A TimeScale is a named duration. -type TimeScale struct { - name string - d time.Duration -} - -// Name returns the name of the TimeScale. -func (ts TimeScale) Name() string { - return ts.name -} - -var ( - // Scale1M is a 1 minute window for windowed stats (e.g. Rates and Histograms). - Scale1M = TimeScale{"1m", 1 * time.Minute} - - // Scale10M is a 10 minute window for windowed stats (e.g. Rates and Histograms). - Scale10M = TimeScale{"10m", 10 * time.Minute} - - // Scale1H is a 1 hour window for windowed stats (e.g. Rates and Histograms). - Scale1H = TimeScale{"1h", time.Hour} -) - // Iterable provides a method for synchronized access to interior objects. type Iterable interface { - // Each calls the given closure with each contained item. - Each(func(string, interface{})) + // Name returns the fully-qualified name of the metric. + GetName() string + // Help returns the help text for the metric. + GetHelp() string + // Inspect calls the given closure with each contained item. + Inspect(func(interface{})) } -// PrometheusExportable provides a method to fill in a prometheus object. +// PrometheusExportable is the standard interface for an individual metric +// that can be exported to prometheus. type PrometheusExportable interface { // FillPrometheusMetric takes an initialized prometheus metric object and // fills the appropriate fields for the metric type. FillPrometheusMetric(promMetric *prometheusgo.MetricFamily) } +// MetricMetadata holds metadata about a metric. It must be embedded in +// each metric object. +type MetricMetadata struct { + Name, Help string +} + +func (m *MetricMetadata) GetName() string { + return m.Name +} + +func (m *MetricMetadata) GetHelp() string { + return m.Help +} + var _ Iterable = &Gauge{} var _ Iterable = &GaugeFloat64{} var _ Iterable = &Counter{} @@ -114,6 +111,7 @@ func maybeTick(m periodic) { // A Histogram is a wrapper around an hdrhistogram.WindowedHistogram. type Histogram struct { + MetricMetadata maxVal int64 mu syncutil.Mutex @@ -125,14 +123,15 @@ type Histogram struct { // NewHistogram creates a new windowed HDRHistogram with the given parameters. // Data is kept in the active window for approximately the given duration. // See the documentation for hdrhistogram.WindowedHistogram for details. -func NewHistogram(duration time.Duration, maxVal int64, sigFigs int) *Histogram { - h := &Histogram{} - h.maxVal = maxVal - h.nextT = now() - h.duration = duration - - h.windowed = hdrhistogram.NewWindowed(histWrapNum, 0, h.maxVal, sigFigs) - return h +func NewHistogram(metadata MetricMetadata, duration time.Duration, + maxVal int64, sigFigs int) *Histogram { + return &Histogram{ + MetricMetadata: metadata, + maxVal: maxVal, + nextT: now(), + duration: duration, + windowed: hdrhistogram.NewWindowed(histWrapNum, 0, maxVal, sigFigs), + } } func (h *Histogram) tick() { @@ -174,22 +173,12 @@ func (h *Histogram) Current() *hdrhistogram.Histogram { return hdrhistogram.Import(export) } -// Each calls the closure with the empty string and the receiver. -func (h *Histogram) Each(f func(string, interface{})) { +// Inspect calls the closure with the empty string and the receiver. +func (h *Histogram) Inspect(f func(interface{})) { h.mu.Lock() maybeTick(h) h.mu.Unlock() - f("", h) -} - -// Histograms is a map of Histogram metrics. -type Histograms map[TimeScale]*Histogram - -// RecordValue calls through to each individual Histogram. -func (hs Histograms) RecordValue(v int64) { - for _, h := range hs { - h.RecordValue(v) - } + f(h) } // FillPrometheusMetric fills the appropriate metric fields. @@ -217,16 +206,17 @@ func (h *Histogram) FillPrometheusMetric(promMetric *prometheusgo.MetricFamily) // A Counter holds a single mutable atomic value. type Counter struct { + MetricMetadata metrics.Counter } // NewCounter creates a counter. -func NewCounter() *Counter { - return &Counter{metrics.NewCounter()} +func NewCounter(metadata MetricMetadata) *Counter { + return &Counter{metadata, metrics.NewCounter()} } -// Each calls the given closure with the empty string and itself. -func (c *Counter) Each(f func(string, interface{})) { f("", c) } +// Inspect calls the given closure with the empty string and itself. +func (c *Counter) Inspect(f func(interface{})) { f(c) } // MarshalJSON marshals to JSON. func (c *Counter) MarshalJSON() ([]byte, error) { @@ -243,17 +233,17 @@ func (c *Counter) FillPrometheusMetric(promMetric *prometheusgo.MetricFamily) { // A Gauge atomically stores a single integer value. type Gauge struct { + MetricMetadata metrics.Gauge } // NewGauge creates a Gauge. -func NewGauge() *Gauge { - g := &Gauge{metrics.NewGauge()} - return g +func NewGauge(metadata MetricMetadata) *Gauge { + return &Gauge{metadata, metrics.NewGauge()} } -// Each calls the given closure with the empty string and itself. -func (g *Gauge) Each(f func(string, interface{})) { f("", g) } +// Inspect calls the given closure with the empty string and itself. +func (g *Gauge) Inspect(f func(interface{})) { f(g) } // MarshalJSON marshals to JSON. func (g *Gauge) MarshalJSON() ([]byte, error) { @@ -270,17 +260,17 @@ func (g *Gauge) FillPrometheusMetric(promMetric *prometheusgo.MetricFamily) { // A GaugeFloat64 atomically stores a single float64 value. type GaugeFloat64 struct { + MetricMetadata metrics.GaugeFloat64 } // NewGaugeFloat64 creates a GaugeFloat64. -func NewGaugeFloat64() *GaugeFloat64 { - g := &GaugeFloat64{metrics.NewGaugeFloat64()} - return g +func NewGaugeFloat64(metadata MetricMetadata) *GaugeFloat64 { + return &GaugeFloat64{metadata, metrics.NewGaugeFloat64()} } -// Each calls the given closure with the empty string and itself. -func (g *GaugeFloat64) Each(f func(string, interface{})) { f("", g) } +// Inspect calls the given closure with the empty string and itself. +func (g *GaugeFloat64) Inspect(f func(interface{})) { f(g) } // MarshalJSON marshals to JSON. func (g *GaugeFloat64) MarshalJSON() ([]byte, error) { @@ -297,6 +287,7 @@ func (g *GaugeFloat64) FillPrometheusMetric(promMetric *prometheusgo.MetricFamil // A Rate is a exponential weighted moving average. type Rate struct { + MetricMetadata mu syncutil.Mutex // protects fields below curSum float64 wrapped ewma.MovingAverage @@ -306,7 +297,7 @@ type Rate struct { // NewRate creates an EWMA rate on the given timescale. Timescales at // or below 2s are illegal and will cause a panic. -func NewRate(timescale time.Duration) *Rate { +func NewRate(metadata MetricMetadata, timescale time.Duration) *Rate { const tickInterval = time.Second if timescale <= 2*time.Second { panic(fmt.Sprintf("EWMA with per-second ticks makes no sense on timescale %s", timescale)) @@ -314,9 +305,10 @@ func NewRate(timescale time.Duration) *Rate { avgAge := float64(timescale) / float64(2*tickInterval) return &Rate{ - interval: tickInterval, - nextT: now(), - wrapped: ewma.NewMovingAverage(avgAge), + MetricMetadata: metadata, + interval: tickInterval, + nextT: now(), + wrapped: ewma.NewMovingAverage(avgAge), } } @@ -346,16 +338,16 @@ func (e *Rate) Add(v float64) { e.mu.Unlock() } -// Each calls the given closure with the empty string and the Rate's current +// Inspect calls the given closure with the empty string and the Rate's current // value. TODO(mrtracy): Fix this to pass the Rate object itself to 'f', to // match the 'visitor' behavior as the other metric types (currently, it passes // the current value of the Rate as a float64.) -func (e *Rate) Each(f func(string, interface{})) { +func (e *Rate) Inspect(f func(interface{})) { e.mu.Lock() maybeTick(e) v := e.wrapped.Value() e.mu.Unlock() - f("", v) + f(v) } // MarshalJSON marshals to JSON. @@ -365,17 +357,3 @@ func (e *Rate) MarshalJSON() ([]byte, error) { maybeTick(e) return json.Marshal(e.wrapped.Value()) } - -// Rates is a counter and associated EWMA backed rates at different time scales. -type Rates struct { - *Counter - Rates map[TimeScale]*Rate -} - -// Add adds the given value to all contained objects. -func (es Rates) Add(v int64) { - es.Counter.Inc(v) - for _, e := range es.Rates { - e.Add(float64(v)) - } -} diff --git a/util/metric/metric_group.go b/util/metric/metric_group.go new file mode 100644 index 000000000000..d48e047f5be8 --- /dev/null +++ b/util/metric/metric_group.go @@ -0,0 +1,122 @@ +// Copyright 2016 The Cockroach Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +// implied. See the License for the specific language governing +// permissions and limitations under the License. +// +// Author: Marc Berhault (marc@cockroachlabs.com) + +package metric + +import "time" + +// A TimeScale is a named duration. +type TimeScale struct { + name string + d time.Duration +} + +// DefaultTimeScales are the durations used for helpers which create windowed +// metrics in bulk (such as Latency or Rates). +var DefaultTimeScales = []TimeScale{Scale1M, Scale10M, Scale1H} + +// Name returns the name of the TimeScale. +func (ts TimeScale) Name() string { + return ts.name +} + +var ( + // Scale1M is a 1 minute window for windowed stats (e.g. Rates and Histograms). + Scale1M = TimeScale{"1m", 1 * time.Minute} + + // Scale10M is a 10 minute window for windowed stats (e.g. Rates and Histograms). + Scale10M = TimeScale{"10m", 10 * time.Minute} + + // Scale1H is a 1 hour window for windowed stats (e.g. Rates and Histograms). + Scale1H = TimeScale{"1h", time.Hour} +) + +// Histograms is a map of Histogram metrics. +type Histograms map[TimeScale]*Histogram + +// metricGroup defines a metric that is composed of multiple metrics. +// It can be used directly by the code updating metrics, and expands +// to multiple individual metrics to add to a registry. +type metricGroup interface { + // iterate will run the callback for every individual metric in the metric group. + iterate(func(Iterable)) +} + +// NewLatency is a convenience function which registers histograms with +// suitable defaults for latency tracking. Values are expressed in ns, +// are truncated into the interval [0, time.Minute] and are recorded +// with two digits of precision (i.e. errors of <1ms at 100ms, <.6s at 1m). +// The generated names of the metric will begin with the given prefix. +// +// TODO(mrtracy,tschottdorf): need to discuss roll-ups and generally how (and +// which) information flows between metrics and time series. +func NewLatency(metadata MetricMetadata) Histograms { + windows := DefaultTimeScales + hs := make(Histograms) + for _, w := range windows { + hs[w] = NewHistogram(MetricMetadata{metadata.Name + sep + w.name, metadata.Help}, + w.d, int64(time.Minute), 2) + } + return hs +} + +// RecordValue calls through to each individual Histogram. +func (hs Histograms) RecordValue(v int64) { + for _, h := range hs { + h.RecordValue(v) + } +} + +// iterate runs the callback function with individual histograms. +func (hs Histograms) iterate(cb func(Iterable)) { + for _, h := range hs { + cb(h) + } +} + +// Rates is a counter and associated EWMA backed rates at different time scales. +type Rates struct { + *Counter + Rates map[TimeScale]*Rate +} + +// NewRates registers and returns a new Rates instance, which contains a set of EWMA-based rates +// with generally useful time scales and a cumulative counter. +func NewRates(metadata MetricMetadata) Rates { + scales := DefaultTimeScales + es := make(map[TimeScale]*Rate) + for _, scale := range scales { + es[scale] = NewRate(MetricMetadata{metadata.Name + sep + scale.name, metadata.Help}, scale.d) + } + c := NewCounter(MetricMetadata{metadata.Name + sep + "count", metadata.Help}) + return Rates{Counter: c, Rates: es} +} + +// Add adds the given value to all contained objects. +func (es Rates) Add(v int64) { + es.Counter.Inc(v) + for _, e := range es.Rates { + e.Add(float64(v)) + } +} + +// iterate runs the callback function with the counter and the individual rates. +func (es Rates) iterate(cb func(Iterable)) { + cb(es.Counter) + for _, e := range es.Rates { + cb(e) + } +} diff --git a/util/metric/metric_test.go b/util/metric/metric_test.go index f0117427ac90..fede18ddc326 100644 --- a/util/metric/metric_test.go +++ b/util/metric/metric_test.go @@ -31,8 +31,10 @@ func testMarshal(t *testing.T, m json.Marshaler, exp string) { } } +var emptyMetadata = MetricMetadata{"", ""} + func TestGauge(t *testing.T) { - g := NewGauge() + g := NewGauge(emptyMetadata) g.Update(10) if v := g.Value(); v != 10 { t.Fatalf("unexpected value: %d", v) @@ -41,7 +43,7 @@ func TestGauge(t *testing.T) { } func TestGaugeFloat64(t *testing.T) { - g := NewGaugeFloat64() + g := NewGaugeFloat64(emptyMetadata) g.Update(10.4) if v := g.Value(); v != 10.4 { t.Fatalf("unexpected value: %f", v) @@ -50,7 +52,7 @@ func TestGaugeFloat64(t *testing.T) { } func TestCounter(t *testing.T) { - c := NewCounter() + c := NewCounter(emptyMetadata) c.Inc(100) c.Dec(10) if v := c.Count(); v != 90 { @@ -69,7 +71,7 @@ func setNow(d time.Duration) { func TestHistogramRotate(t *testing.T) { defer TestingSetNow(nil)() setNow(0) - h := NewHistogram(histWrapNum*time.Second, 1000+10*histWrapNum, 3) + h := NewHistogram(emptyMetadata, histWrapNum*time.Second, 1000+10*histWrapNum, 3) var cur time.Duration for i := 0; i < 3*histWrapNum; i++ { v := int64(10 * i) @@ -98,7 +100,7 @@ func TestHistogramRotate(t *testing.T) { func TestHistogramJSON(t *testing.T) { defer TestingSetNow(nil)() setNow(0) - h := NewHistogram(0, 1, 3) + h := NewHistogram(emptyMetadata, 0, 1, 3) testMarshal(t, h, `[{"Quantile":100,"Count":0,"ValueAt":0}]`) h.RecordValue(1) testMarshal(t, h, `[{"Quantile":0,"Count":1,"ValueAt":1},{"Quantile":100,"Count":1,"ValueAt":1}]`) @@ -108,7 +110,7 @@ func TestRateRotate(t *testing.T) { defer TestingSetNow(nil)() setNow(0) const interval = 10 * time.Second - r := NewRate(interval) + r := NewRate(emptyMetadata, interval) // Skip the warmup phase of the wrapped EWMA for this test. for i := 0; i < 100; i++ { diff --git a/util/metric/registry.go b/util/metric/registry.go index bd47e25931f6..c2ebac820be5 100644 --- a/util/metric/registry.go +++ b/util/metric/registry.go @@ -18,11 +18,8 @@ package metric import ( "encoding/json" - "errors" - "fmt" "io" "regexp" - "time" "github.com/cockroachdb/cockroach/util/syncutil" "github.com/gogo/protobuf/proto" @@ -32,62 +29,66 @@ import ( const sep = "-" -// DefaultTimeScales are the durations used for helpers which create windowed -// metrics in bulk (such as Latency or Rates). -var DefaultTimeScales = []TimeScale{Scale1M, Scale10M, Scale1H} - -// A Registry bundles up various iterables (i.e. typically metrics or other -// registries) to provide a single point of access to them. +// A Registry is a list of metrics. It provides a simple way of iterating over +// them, can marshal into JSON, and generate a prometheus format. // -// A Registry can be added to another Registry through the Add/MustAdd methods. This allows a -// hierarchy of Registry instances to be created. +// A registry can have label pairs that will be applied to all its metrics +// when exported to prometheus. type Registry struct { syncutil.Mutex + labels []*prometheusgo.LabelPair tracked map[string]Iterable } // NewRegistry creates a new Registry. func NewRegistry() *Registry { return &Registry{ + labels: []*prometheusgo.LabelPair{}, tracked: map[string]Iterable{}, } } -// Add links the given Iterable into this registry using the given format -// string. The individual items in the registry will be formatted via -// fmt.Sprintf(format, ). As a special case, *Registry implements -// Iterable and can thus be added. -// Metric types in this package have helpers that allow them to be created -// and registered in a single step. Add is called manually only when adding -// a registry to another, or when integrating metrics defined elsewhere. -func (r *Registry) Add(format string, item Iterable) error { +// AddLabel adds a label/value pair for this registry. +func (r *Registry) AddLabel(name, value string) { r.Lock() defer r.Unlock() - if _, ok := r.tracked[format]; ok { - return errors.New("format string already in use") - } - r.tracked[format] = item - return nil + r.labels = append(r.labels, + &prometheusgo.LabelPair{ + Name: proto.String(name), + Value: proto.String(value), + }) } -// MustAdd calls Add and panics on error. -func (r *Registry) MustAdd(format string, item Iterable) { - if err := r.Add(format, item); err != nil { - panic(fmt.Sprintf("error adding %s: %s", format, err)) - } +func (r *Registry) getLabels() []*prometheusgo.LabelPair { + r.Lock() + defer r.Unlock() + return r.labels +} + +// AddMetric adds the passed-in metric to the registry. +func (r *Registry) AddMetric(metric Iterable) { + r.Lock() + defer r.Unlock() + r.tracked[metric.GetName()] = metric +} + +// AddMetricGroup expands the metric group and adds all of them +// as individual metrics to the registry. +func (r *Registry) AddMetricGroup(group metricGroup) { + r.Lock() + defer r.Unlock() + group.iterate(func(metric Iterable) { + r.tracked[metric.GetName()] = metric + }) } // Each calls the given closure for all metrics. func (r *Registry) Each(f func(name string, val interface{})) { r.Lock() defer r.Unlock() - for format, registry := range r.tracked { - registry.Each(func(name string, v interface{}) { - if name == "" { - f(format, v) - } else { - f(fmt.Sprintf(format, name), v) - } + for _, metric := range r.tracked { + metric.Inspect(func(v interface{}) { + f(metric.GetName(), v) }) } } @@ -95,9 +96,11 @@ func (r *Registry) Each(f func(name string, val interface{})) { // MarshalJSON marshals to JSON. func (r *Registry) MarshalJSON() ([]byte, error) { m := make(map[string]interface{}) - r.Each(func(name string, v interface{}) { - m[name] = v - }) + for _, metric := range r.tracked { + metric.Inspect(func(v interface{}) { + m[metric.GetName()] = v + }) + } return json.Marshal(m) } @@ -115,53 +118,30 @@ func exportedName(name string) string { func (r *Registry) PrintAsText(w io.Writer) error { var metricFamily prometheusgo.MetricFamily var ret error - r.Each(func(name string, v interface{}) { - if ret != nil { - return - } - if metric, ok := v.(PrometheusExportable); ok { - metricFamily.Reset() - metricFamily.Name = proto.String(exportedName(name)) - metric.FillPrometheusMetric(&metricFamily) - if _, err := expfmt.MetricFamilyToText(w, &metricFamily); err != nil { - ret = err + labels := r.getLabels() + for _, metric := range r.tracked { + metric.Inspect(func(v interface{}) { + if ret != nil { + return } - } - }) - return ret -} - -// Histogram registers a new windowed HDRHistogram with the given parameters. -// Data is kept in the active window for approximately the given duration. -func (r *Registry) Histogram(name string, duration time.Duration, maxVal int64, - sigFigs int) *Histogram { - h := NewHistogram(duration, maxVal, sigFigs) - r.MustAdd(name, h) - return h -} - -// Latency is a convenience function which registers histograms with -// suitable defaults for latency tracking. Values are expressed in ns, -// are truncated into the interval [0, time.Minute] and are recorded -// with two digits of precision (i.e. errors of <1ms at 100ms, <.6s at 1m). -// The generated names of the metric will begin with the given prefix. -// -// TODO(mrtracy,tschottdorf): need to discuss roll-ups and generally how (and -// which) information flows between metrics and time series. -func (r *Registry) Latency(prefix string) Histograms { - windows := DefaultTimeScales - hs := make(Histograms) - for _, w := range windows { - hs[w] = r.Histogram(prefix+sep+w.name, w.d, int64(time.Minute), 2) + if prom, ok := v.(PrometheusExportable); ok { + metricFamily.Reset() + metricFamily.Name = proto.String(exportedName(metric.GetName())) + metricFamily.Help = proto.String(exportedName(metric.GetHelp())) + prom.FillPrometheusMetric(&metricFamily) + if len(labels) != 0 { + // Set labels. We only set one metric in the slice, but loop anyway. + for _, m := range metricFamily.Metric { + m.Label = labels + } + } + if _, err := expfmt.MetricFamilyToText(w, &metricFamily); err != nil { + ret = err + } + } + }) } - return hs -} - -// Counter registers new counter to the registry. -func (r *Registry) Counter(name string) *Counter { - c := NewCounter() - r.MustAdd(name, c) - return c + return ret } // GetCounter returns the Counter in this registry with the given name. If a @@ -181,13 +161,6 @@ func (r *Registry) GetCounter(name string) *Counter { return counter } -// Gauge registers a new Gauge with the given name. -func (r *Registry) Gauge(name string) *Gauge { - g := NewGauge() - r.MustAdd(name, g) - return g -} - // GetGauge returns the Gauge in this registry with the given name. If a Gauge // with this name is not present (including if a non-Gauge Iterable is // registered with the name), nil is returned. @@ -205,21 +178,6 @@ func (r *Registry) GetGauge(name string) *Gauge { return gauge } -// GaugeFloat64 registers a new GaugeFloat64 with the given name. -func (r *Registry) GaugeFloat64(name string) *GaugeFloat64 { - g := NewGaugeFloat64() - r.MustAdd(name, g) - return g -} - -// Rate creates an EWMA rate over the given timescale. The comments on NewRate -// apply. -func (r *Registry) Rate(name string, timescale time.Duration) *Rate { - e := NewRate(timescale) - r.MustAdd(name, e) - return e -} - // GetRate returns the Rate in this registry with the given name. If a Rate with // this name is not present (including if a non-Rate Iterable is registered with // the name), nil is returned. @@ -236,15 +194,3 @@ func (r *Registry) GetRate(name string) *Rate { } return rate } - -// Rates registers and returns a new Rates instance, which contains a set of EWMA-based rates -// with generally useful time scales and a cumulative counter. -func (r *Registry) Rates(prefix string) Rates { - scales := DefaultTimeScales - es := make(map[TimeScale]*Rate) - for _, scale := range scales { - es[scale] = r.Rate(prefix+sep+scale.name, scale.d) - } - c := r.Counter(prefix + sep + "count") - return Rates{Counter: c, Rates: es} -} diff --git a/util/metric/registry_test.go b/util/metric/registry_test.go index cc827508f5fb..28ace3e671a8 100644 --- a/util/metric/registry_test.go +++ b/util/metric/registry_test.go @@ -23,41 +23,43 @@ import ( func TestRegistry(t *testing.T) { r := NewRegistry() - sub := NewRegistry() - topGauge := r.Gauge("top.gauge") - _ = r.GaugeFloat64("top.floatgauge") - topCounter := r.Counter("top.counter") - topRate := r.Rate("top.rate", time.Minute) - _ = r.Rates("top.rates") - _ = r.Histogram("top.hist", time.Minute, 1000, 3) - _ = r.Latency("top.latency") + topGauge := NewGauge(MetricMetadata{"top.gauge", ""}) + r.AddMetric(topGauge) - _ = sub.Gauge("gauge") - r.MustAdd("bottom.%s#1", sub) - if err := r.Add("bottom.%s#1", sub); err == nil { - t.Fatalf("expected failure on double-add") - } - _ = sub.Rates("rates") + r.AddMetric(NewGaugeFloat64(MetricMetadata{"top.floatgauge", ""})) + + topCounter := NewCounter(MetricMetadata{"top.counter", ""}) + r.AddMetric(topCounter) + + topRate := NewRate(MetricMetadata{"top.rate", ""}, time.Minute) + r.AddMetric(topRate) + + r.AddMetricGroup(NewRates(MetricMetadata{"top.rates", ""})) + r.AddMetric(NewHistogram(MetricMetadata{"top.hist", ""}, time.Minute, 1000, 3)) + r.AddMetricGroup(NewLatency(MetricMetadata{"top.latency", ""})) + + r.AddMetric(NewGauge(MetricMetadata{"bottom.gauge", ""})) + r.AddMetricGroup(NewRates(MetricMetadata{"bottom.rates", ""})) expNames := map[string]struct{}{ - "top.rate": {}, - "top.rates-count": {}, - "top.rates-1m": {}, - "top.rates-10m": {}, - "top.rates-1h": {}, - "top.hist": {}, - "top.latency-1m": {}, - "top.latency-10m": {}, - "top.latency-1h": {}, - "top.gauge": {}, - "top.floatgauge": {}, - "top.counter": {}, - "bottom.gauge#1": {}, - "bottom.rates-count#1": {}, - "bottom.rates-1m#1": {}, - "bottom.rates-10m#1": {}, - "bottom.rates-1h#1": {}, + "top.rate": {}, + "top.rates-count": {}, + "top.rates-1m": {}, + "top.rates-10m": {}, + "top.rates-1h": {}, + "top.hist": {}, + "top.latency-1m": {}, + "top.latency-10m": {}, + "top.latency-1h": {}, + "top.gauge": {}, + "top.floatgauge": {}, + "top.counter": {}, + "bottom.gauge": {}, + "bottom.rates-count": {}, + "bottom.rates-1m": {}, + "bottom.rates-10m": {}, + "bottom.rates-1h": {}, } r.Each(func(name string, _ interface{}) {