Skip to content

Commit

Permalink
multitenant: expose consumption metrics
Browse files Browse the repository at this point in the history
This commit adds per-tenant consumption metrics. Each node presents
the latest consumption value that it knows about, for each tenant. The
higher-level logic will take the Max value across all nodes.

The metrics are:
 - tenant.consumption.request_units
 - tenant.consumption.read_requests
 - tenant.consumption.read_bytes
 - tenant.consumption.write_requests
 - tenant.consumption.write_bytes
 - tenant.consumption.sql_pods_cpu_seconds

Note that these are aggregate metrics, meaning that we export a
separate value for each tenant, and also a sum across all tenants. The
sums are not meaningful in this case and should not be used.

Release note: None
  • Loading branch information
RaduBerinde committed Jul 26, 2021
1 parent b9ef1a6 commit ac7ff37
Show file tree
Hide file tree
Showing 21 changed files with 465 additions and 41 deletions.
1 change: 1 addition & 0 deletions pkg/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ ALL_TESTS = [
"//pkg/ccl/kvccl/kvtenantccl:kvtenantccl_test",
"//pkg/ccl/logictestccl:logictestccl_test",
"//pkg/ccl/multiregionccl:multiregionccl_test",
"//pkg/ccl/multitenantccl/tenantcostserver:tenantcostserver_test",
"//pkg/ccl/oidcccl:oidcccl_test",
"//pkg/ccl/partitionccl:partitionccl_test",
"//pkg/ccl/serverccl:serverccl_test",
Expand Down
34 changes: 33 additions & 1 deletion pkg/ccl/multitenantccl/tenantcostserver/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library")
load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")

go_library(
name = "tenantcostserver",
srcs = [
"configure.go",
"metrics.go",
"server.go",
"token_bucket.go",
],
Expand All @@ -20,7 +21,38 @@ go_library(
"//pkg/sql/pgwire/pgerror",
"//pkg/sql/sem/tree",
"//pkg/sql/sessiondata",
"//pkg/util/metric",
"//pkg/util/metric/aggmetric",
"//pkg/util/syncutil",
"//pkg/util/timeutil",
"@com_github_cockroachdb_errors//:errors",
],
)

go_test(
name = "tenantcostserver_test",
srcs = [
"main_test.go",
"server_test.go",
],
data = glob(["testdata/**"]),
deps = [
":tenantcostserver",
"//pkg/base",
"//pkg/roachpb:with-mocks",
"//pkg/security",
"//pkg/security/securitytest",
"//pkg/server",
"//pkg/sql",
"//pkg/testutils/metrictestutils",
"//pkg/testutils/serverutils",
"//pkg/testutils/sqlutils",
"//pkg/testutils/testcluster",
"//pkg/util/leaktest",
"//pkg/util/log",
"//pkg/util/metric",
"//pkg/util/randutil",
"@com_github_cockroachdb_datadriven//:datadriven",
"@in_gopkg_yaml_v2//:yaml_v2",
],
)
33 changes: 33 additions & 0 deletions pkg/ccl/multitenantccl/tenantcostserver/main_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
// Copyright 2021 The Cockroach Authors.
//
// Licensed as a CockroachDB Enterprise file under the Cockroach Community
// License (the "License"); you may not use this file except in compliance with
// the License. You may obtain a copy of the License at
//
// https://github.com/cockroachdb/cockroach/blob/master/licenses/CCL.txt

package tenantcostserver_test

import (
"os"
"testing"

_ "github.com/cockroachdb/cockroach/pkg/ccl/multitenantccl/tenantcostserver"
"github.com/cockroachdb/cockroach/pkg/security"
"github.com/cockroachdb/cockroach/pkg/security/securitytest"
"github.com/cockroachdb/cockroach/pkg/server"
"github.com/cockroachdb/cockroach/pkg/testutils/serverutils"
"github.com/cockroachdb/cockroach/pkg/testutils/testcluster"
"github.com/cockroachdb/cockroach/pkg/util/randutil"
)

//go:generate ../../../util/leaktest/add-leaktest.sh *_test.go

func TestMain(m *testing.M) {
security.SetAssetLoader(securitytest.EmbeddedAssets)
randutil.SeedForTests()
serverutils.InitTestServerFactory(server.TestServerFactory)
serverutils.InitTestClusterFactory(testcluster.TestClusterFactory)

os.Exit(m.Run())
}
129 changes: 129 additions & 0 deletions pkg/ccl/multitenantccl/tenantcostserver/metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
// Copyright 2021 The Cockroach Authors.
//
// Licensed as a CockroachDB Enterprise file under the Cockroach Community
// License (the "License"); you may not use this file except in compliance with
// the License. You may obtain a copy of the License at
//
// https://github.com/cockroachdb/cockroach/blob/master/licenses/CCL.txt

package tenantcostserver

import (
"github.com/cockroachdb/cockroach/pkg/multitenant"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/util/metric"
"github.com/cockroachdb/cockroach/pkg/util/metric/aggmetric"
"github.com/cockroachdb/cockroach/pkg/util/syncutil"
)

// Metrics is a metric.Struct for reporting tenant resource consumption.
//
// All metrics are aggregate metrics, containing child metrics for all tenants
// that have communicated with this node. The metrics report cumulative usage
// for the tenant; the current value for a given tenant is the most recent (or,
// equivalently the largest) value reported across all nodes. The top-level
// aggregated value for a metric is not useful (it sums up the consumption for
// each tenant, as last reported to this node).
type Metrics struct {
TotalRU *aggmetric.AggGaugeFloat64
TotalReadRequests *aggmetric.AggGauge
TotalReadBytes *aggmetric.AggGauge
TotalWriteRequests *aggmetric.AggGauge
TotalWriteBytes *aggmetric.AggGauge
TotalSQLPodsCPUSeconds *aggmetric.AggGaugeFloat64

mu struct {
syncutil.Mutex
// tenantMetrics stores the tenantMetrics for all tenants that have
// sent TokenBucketRequests to this node.
// TODO(radu): add garbage collection to remove inactive tenants.
tenantMetrics map[roachpb.TenantID]tenantMetrics
}
}

var _ metric.Struct = (*Metrics)(nil)

// MetricStruct indicates that Metrics is a metric.Struct
func (m *Metrics) MetricStruct() {}

var (
metaTotalRU = metric.Metadata{
Name: "tenant.consumption.request_units",
Help: "Total RU consumption",
Measurement: "Request Units",
Unit: metric.Unit_COUNT,
}
metaTotalReadRequests = metric.Metadata{
Name: "tenant.consumption.read_requests",
Help: "Total number of KV read requests",
Measurement: "Requests",
Unit: metric.Unit_COUNT,
}
metaTotalReadBytes = metric.Metadata{
Name: "tenant.consumption.read_bytes",
Help: "Total number of bytes read from KV",
Measurement: "Bytes",
Unit: metric.Unit_COUNT,
}
metaTotalWriteRequests = metric.Metadata{
Name: "tenant.consumption.write_requests",
Help: "Total number of KV write requests",
Measurement: "Requests",
Unit: metric.Unit_COUNT,
}
metaTotalWriteBytes = metric.Metadata{
Name: "tenant.consumption.write_bytes",
Help: "Total number of bytes written to KV",
Measurement: "Bytes",
Unit: metric.Unit_COUNT,
}
metaTotalSQLPodsCPUSeconds = metric.Metadata{
Name: "tenant.consumption.sql_pods_cpu_seconds",
Help: "Total number of bytes written to KV",
Measurement: "CPU Seconds",
Unit: metric.Unit_SECONDS,
}
)

func (m *Metrics) init() {
b := aggmetric.MakeBuilder(multitenant.TenantIDLabel)
*m = Metrics{
TotalRU: b.GaugeFloat64(metaTotalRU),
TotalReadRequests: b.Gauge(metaTotalReadRequests),
TotalReadBytes: b.Gauge(metaTotalReadBytes),
TotalWriteRequests: b.Gauge(metaTotalWriteRequests),
TotalWriteBytes: b.Gauge(metaTotalWriteBytes),
TotalSQLPodsCPUSeconds: b.GaugeFloat64(metaTotalSQLPodsCPUSeconds),
}
m.mu.tenantMetrics = make(map[roachpb.TenantID]tenantMetrics)
}

// tenantMetrics represent metrics for an individual tenant.
type tenantMetrics struct {
totalRU *aggmetric.GaugeFloat64
totalReadRequests *aggmetric.Gauge
totalReadBytes *aggmetric.Gauge
totalWriteRequests *aggmetric.Gauge
totalWriteBytes *aggmetric.Gauge
totalSQLPodsCPUSeconds *aggmetric.GaugeFloat64
}

// getTenantMetrics returns the metrics for a tenant.
func (m *Metrics) getTenantMetrics(tenantID roachpb.TenantID) tenantMetrics {
m.mu.Lock()
defer m.mu.Unlock()
tm, ok := m.mu.tenantMetrics[tenantID]
if !ok {
tid := tenantID.String()
tm = tenantMetrics{
totalRU: m.TotalRU.AddChild(tid),
totalReadRequests: m.TotalReadRequests.AddChild(tid),
totalReadBytes: m.TotalReadBytes.AddChild(tid),
totalWriteRequests: m.TotalWriteRequests.AddChild(tid),
totalWriteBytes: m.TotalWriteBytes.AddChild(tid),
totalSQLPodsCPUSeconds: m.TotalSQLPodsCPUSeconds.AddChild(tid),
}
m.mu.tenantMetrics[tenantID] = tm
}
return tm
}
11 changes: 10 additions & 1 deletion pkg/ccl/multitenantccl/tenantcostserver/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,18 +13,27 @@ import (
"github.com/cockroachdb/cockroach/pkg/multitenant"
"github.com/cockroachdb/cockroach/pkg/server"
"github.com/cockroachdb/cockroach/pkg/sql"
"github.com/cockroachdb/cockroach/pkg/util/metric"
)

type instance struct {
db *kv.DB
executor *sql.InternalExecutor
metrics Metrics
}

func newInstance(db *kv.DB, executor *sql.InternalExecutor) *instance {
return &instance{
res := &instance{
db: db,
executor: executor,
}
res.metrics.init()
return res
}

// Metrics is part of the multitenant.TenantUsageServer.
func (s *instance) Metrics() metric.Struct {
return &s.metrics
}

var _ multitenant.TenantUsageServer = (*instance)(nil)
Expand Down
112 changes: 112 additions & 0 deletions pkg/ccl/multitenantccl/tenantcostserver/server_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
// Copyright 2021 The Cockroach Authors.
//
// Licensed as a CockroachDB Enterprise file under the Cockroach Community
// License (the "License"); you may not use this file except in compliance with
// the License. You may obtain a copy of the License at
//
// https://github.com/cockroachdb/cockroach/blob/master/licenses/CCL.txt

package tenantcostserver_test

import (
"context"
"fmt"
"regexp"
"strconv"
"testing"

"github.com/cockroachdb/cockroach/pkg/base"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/server"
"github.com/cockroachdb/cockroach/pkg/sql"
"github.com/cockroachdb/cockroach/pkg/testutils/metrictestutils"
"github.com/cockroachdb/cockroach/pkg/testutils/serverutils"
"github.com/cockroachdb/cockroach/pkg/testutils/sqlutils"
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/metric"
"github.com/cockroachdb/datadriven"
"gopkg.in/yaml.v2"
)

func TestDataDriven(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)

datadriven.Walk(t, "testdata", func(t *testing.T, path string) {
defer leaktest.AfterTest(t)()

// Set up a server that we use only for the system tables.
ctx := context.Background()
s, db, kvDB := serverutils.StartServer(t, base.TestServerArgs{})
defer s.Stopper().Stop(ctx)
r := sqlutils.MakeSQLRunner(db)

tenantUsage := server.NewTenantUsageServer(kvDB, s.InternalExecutor().(*sql.InternalExecutor))
metricsReg := metric.NewRegistry()
metricsReg.AddMetricStruct(tenantUsage.Metrics())

datadriven.RunTest(t, path, func(t *testing.T, d *datadriven.TestData) string {
switch d.Cmd {
case "create-tenant":
if len(d.CmdArgs) != 1 {
d.Fatalf(t, "expected tenant number")
}
r.Exec(t, fmt.Sprintf("SELECT crdb_internal.create_tenant(%s)", d.CmdArgs[0].Key))
return ""

case "token-bucket-request":
if len(d.CmdArgs) != 1 {
d.Fatalf(t, "expected tenant number")
}
tenantID, err := strconv.Atoi(d.CmdArgs[0].Key)
if err != nil {
d.Fatalf(t, "%v", err)
}
var args struct {
Consumption struct {
RU float64 `yaml:"ru"`
ReadReq uint64 `yaml:"read_req"`
ReadBytes uint64 `yaml:"read_bytes"`
WriteReq uint64 `yaml:"write_req"`
WriteBytes uint64 `yaml:"write_bytes"`
SQLPodsCPUUsage float64 `yaml:"sql_pods_cpu_usage"`
}
}
if err := yaml.UnmarshalStrict([]byte(d.Input), &args); err != nil {
d.Fatalf(t, "failed to parse request yaml: %v", err)
}
req := roachpb.TokenBucketRequest{
ConsumptionSinceLastRequest: roachpb.TokenBucketRequest_Consumption{
RU: args.Consumption.RU,
ReadRequests: args.Consumption.ReadReq,
ReadBytes: args.Consumption.ReadBytes,
WriteRequests: args.Consumption.WriteReq,
WriteBytes: args.Consumption.WriteBytes,
SQLPodCPUSeconds: args.Consumption.SQLPodsCPUUsage,
},
}
_, err = tenantUsage.TokenBucketRequest(ctx, roachpb.MakeTenantID(uint64(tenantID)), &req)
if err != nil {
return fmt.Sprintf("error: %v", err)
}
return ""

case "metrics":
re, err := regexp.Compile(d.Input)
if err != nil {
d.Fatalf(t, "failed to compile pattern: %v", err)
}
str, err := metrictestutils.GetMetricsText(metricsReg, re)
if err != nil {
d.Fatalf(t, "failed to scrape metrics: %v", err)
}
return str

default:
d.Fatalf(t, "unknown command %q", d.Cmd)
return ""
}
})
})
}
22 changes: 22 additions & 0 deletions pkg/ccl/multitenantccl/tenantcostserver/testdata/metrics
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
create-tenant 5
----

token-bucket-request 5
consumption:
ru: 10
read_req: 20
read_bytes: 30
write_req: 40
write_bytes: 50
sql_pods_cpu_usage: 60
----

metrics
tenant_id="5"
----
tenant_consumption_read_bytes{tenant_id="5"} 30
tenant_consumption_read_requests{tenant_id="5"} 20
tenant_consumption_request_units{tenant_id="5"} 10
tenant_consumption_sql_pods_cpu_seconds{tenant_id="5"} 60
tenant_consumption_write_bytes{tenant_id="5"} 50
tenant_consumption_write_requests{tenant_id="5"} 40
Loading

0 comments on commit ac7ff37

Please sign in to comment.