Skip to content

Commit

Permalink
sql: change system.tenant_usage to use a single consumption column
Browse files Browse the repository at this point in the history
There are ongoing discussions about adding more components to the RU
cost. The prospect of having to change the columns of
`system.tenant_usage` each time is daunting.

This commit modifies the table to use a single `total_consumption`
column which encodes the TenantConsumption proto. We can add fields to
the proto in the future without changing the schema.

The values are still readable via SQL using `pb_to_json`:
```
crdb_internal.pb_to_json('cockroach.roachpb.TenantConsumption', total_consumption)
```

Informs #68479.

Release note: None

Release justification: Necessary change for supporting changes to the
cost model for the upcoming Serverless MVP release. This functionality
is only enabled in multi-tenant scenarios and should have no impact on
our dedicated customers.
  • Loading branch information
RaduBerinde committed Sep 28, 2021
1 parent bdb4c1a commit dd7cbf2
Show file tree
Hide file tree
Showing 12 changed files with 205 additions and 195 deletions.
2 changes: 1 addition & 1 deletion docs/generated/settings/settings-for-tenants.txt
Original file line number Diff line number Diff line change
Expand Up @@ -164,4 +164,4 @@ trace.debug.enable boolean false if set, traces for recent requests can be seen
trace.jaeger.agent string the address of a Jaeger agent to receive traces using the Jaeger UDP Thrift protocol, as <host>:<port>. If no port is specified, 6381 will be used.
trace.opentelemetry.collector string address of an OpenTelemetry trace collector to receive traces using the otel gRPC protocol, as <host>:<port>. If no port is specified, 4317 will be used.
trace.zipkin.collector string the address of a Zipkin instance to receive traces, as <host>:<port>. If no port is specified, 9411 will be used.
version version 21.1-1166 set the active cluster version in the format '<major>.<minor>'
version version 21.1-1168 set the active cluster version in the format '<major>.<minor>'
2 changes: 1 addition & 1 deletion docs/generated/settings/settings.html
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,6 @@
<tr><td><code>trace.jaeger.agent</code></td><td>string</td><td><code></code></td><td>the address of a Jaeger agent to receive traces using the Jaeger UDP Thrift protocol, as <host>:<port>. If no port is specified, 6381 will be used.</td></tr>
<tr><td><code>trace.opentelemetry.collector</code></td><td>string</td><td><code></code></td><td>address of an OpenTelemetry trace collector to receive traces using the otel gRPC protocol, as <host>:<port>. If no port is specified, 4317 will be used.</td></tr>
<tr><td><code>trace.zipkin.collector</code></td><td>string</td><td><code></code></td><td>the address of a Zipkin instance to receive traces, as <host>:<port>. If no port is specified, 9411 will be used.</td></tr>
<tr><td><code>version</code></td><td>version</td><td><code>21.1-1166</code></td><td>set the active cluster version in the format '<major>.<minor>'</td></tr>
<tr><td><code>version</code></td><td>version</td><td><code>21.1-1168</code></td><td>set the active cluster version in the format '<major>.<minor>'</td></tr>
</tbody>
</table>
44 changes: 21 additions & 23 deletions pkg/ccl/backupccl/backup_planning_tenant.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,24 +22,25 @@ import (

const tenantMetadataQuery = `
SELECT
tenants.id,
tenants.active,
tenants.info,
tenant_usage.ru_burst_limit,
tenant_usage.ru_refill_rate,
tenant_usage.ru_current,
tenant_usage.total_ru_usage,
tenant_usage.total_read_requests,
tenant_usage.total_read_bytes,
tenant_usage.total_write_requests,
tenant_usage.total_write_bytes,
tenant_usage.total_sql_pod_cpu_seconds
tenants.id, /* 0 */
tenants.active, /* 1 */
tenants.info, /* 2 */
tenant_usage.ru_burst_limit, /* 3 */
tenant_usage.ru_refill_rate, /* 4 */
tenant_usage.ru_current, /* 5 */
tenant_usage.total_consumption /* 6 */
FROM
system.tenants
LEFT JOIN system.tenant_usage ON
tenants.id = tenant_usage.tenant_id AND tenant_usage.instance_id = 0`

func tenantMetadataFromRow(row tree.Datums) (descpb.TenantInfoWithUsage, error) {
if len(row) != 7 {
return descpb.TenantInfoWithUsage{}, errors.AssertionFailedf(
"unexpected row size %d from tenant metadata query", len(row),
)
}

id := uint64(tree.MustBeDInt(row[0]))
res := descpb.TenantInfoWithUsage{
TenantInfo: descpb.TenantInfo{
Expand All @@ -51,12 +52,11 @@ func tenantMetadataFromRow(row tree.Datums) (descpb.TenantInfoWithUsage, error)
return descpb.TenantInfoWithUsage{}, err
}
// If this tenant had no reported consumption and its token bucket was not
// configured, the tenant_usage values are all NULL. Otherwise none of them
// are NULL.
// configured, the tenant_usage values are all NULL.
//
// It should be sufficient to check any one value, but we check all of them
// just to be defensive (in case the table contains invalid data).
for _, d := range row[3:] {
for _, d := range row[3:5] {
if d == tree.DNull {
return res, nil
}
Expand All @@ -65,14 +65,12 @@ func tenantMetadataFromRow(row tree.Datums) (descpb.TenantInfoWithUsage, error)
RUBurstLimit: float64(tree.MustBeDFloat(row[3])),
RURefillRate: float64(tree.MustBeDFloat(row[4])),
RUCurrent: float64(tree.MustBeDFloat(row[5])),
Consumption: roachpb.TenantConsumption{
RU: float64(tree.MustBeDFloat(row[6])),
ReadRequests: uint64(tree.MustBeDInt(row[7])),
ReadBytes: uint64(tree.MustBeDInt(row[8])),
WriteRequests: uint64(tree.MustBeDInt(row[9])),
WriteBytes: uint64(tree.MustBeDInt(row[10])),
SQLPodsCPUSeconds: float64(tree.MustBeDFloat(row[11])),
},
}
if row[6] != tree.DNull {
consumptionBytes := []byte(tree.MustBeDBytes(row[6]))
if err := protoutil.Unmarshal(consumptionBytes, &res.Usage.Consumption); err != nil {
return descpb.TenantInfoWithUsage{}, err
}
}
return res, nil
}
Expand Down
1 change: 1 addition & 0 deletions pkg/ccl/multitenantccl/tenantcostserver/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ go_library(
"//pkg/util/log",
"//pkg/util/metric",
"//pkg/util/metric/aggmetric",
"//pkg/util/protoutil",
"//pkg/util/syncutil",
"//pkg/util/timeutil",
"@com_github_cockroachdb_errors//:errors",
Expand Down
159 changes: 69 additions & 90 deletions pkg/ccl/multitenantccl/tenantcostserver/system_table.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/sql/sessiondata"
"github.com/cockroachdb/cockroach/pkg/util"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/protoutil"
"github.com/cockroachdb/errors"
)

Expand Down Expand Up @@ -162,22 +163,17 @@ func (h *sysTableHelper) readTenantAndInstanceState(
ru_refill_rate, /* 4 */
ru_current, /* 5 */
current_share_sum, /* 6 */
total_ru_usage, /* 7 */
total_read_requests, /* 8 */
total_read_bytes, /* 9 */
total_write_requests, /* 10 */
total_write_bytes, /* 11 */
total_sql_pod_cpu_seconds, /* 12 */
instance_lease, /* 13 */
instance_seq, /* 14 */
instance_shares /* 15 */
total_consumption, /* 7 */
instance_lease, /* 8 */
instance_seq, /* 9 */
instance_shares /* 10 */
FROM system.tenant_usage
WHERE tenant_id = $1 AND instance_id IN (0, $2)`,
h.tenantID.ToUint64(),
int64(instanceID),
)
if err != nil {
return tenant, instance, err
return tenantState{}, instanceState{}, err
}
for _, r := range rows {
instanceID := base.SQLInstanceID(tree.MustBeDInt(r[0]))
Expand All @@ -192,22 +188,23 @@ func (h *sysTableHelper) readTenantAndInstanceState(
RUCurrent: float64(tree.MustBeDFloat(r[5])),
CurrentShareSum: float64(tree.MustBeDFloat(r[6])),
}
tenant.Consumption = roachpb.TenantConsumption{
RU: float64(tree.MustBeDFloat(r[7])),
ReadRequests: uint64(tree.MustBeDInt(r[8])),
ReadBytes: uint64(tree.MustBeDInt(r[9])),
WriteRequests: uint64(tree.MustBeDInt(r[10])),
WriteBytes: uint64(tree.MustBeDInt(r[11])),
SQLPodsCPUSeconds: float64(tree.MustBeDFloat(r[12])),
if consumption := r[7]; consumption != tree.DNull {
// total_consumption can be NULL because of a migration of the
// tenant_usage table.
if err := protoutil.Unmarshal(
[]byte(tree.MustBeDBytes(consumption)), &tenant.Consumption,
); err != nil {
return tenantState{}, instanceState{}, err
}
}
} else {
// Instance state.
instance.Present = true
instance.LastUpdate = tree.MustBeDTimestamp(r[2])
instance.NextInstance = base.SQLInstanceID(tree.MustBeDInt(r[1]))
instance.Lease = tree.MustBeDBytes(r[13])
instance.Seq = int64(tree.MustBeDInt(r[14]))
instance.Shares = float64(tree.MustBeDFloat(r[15]))
instance.Lease = tree.MustBeDBytes(r[8])
instance.Seq = int64(tree.MustBeDInt(r[9]))
instance.Shares = float64(tree.MustBeDFloat(r[10]))
}
}

Expand All @@ -216,9 +213,13 @@ func (h *sysTableHelper) readTenantAndInstanceState(

// updateTenantState writes out an updated tenant state.
func (h *sysTableHelper) updateTenantState(tenant tenantState) error {
consumption, err := protoutil.Marshal(&tenant.Consumption)
if err != nil {
return err
}
// Note: it is important that this UPSERT specifies all columns of the
// table, to allow it to perform "blind" writes.
_, err := h.ex.ExecEx(
_, err = h.ex.ExecEx(
h.ctx, "tenant-usage-upsert", h.txn,
sessiondata.NodeUserSessionDataOverride,
`UPSERT INTO system.tenant_usage(
Expand All @@ -230,30 +231,20 @@ func (h *sysTableHelper) updateTenantState(tenant tenantState) error {
ru_refill_rate,
ru_current,
current_share_sum,
total_ru_usage,
total_read_requests,
total_read_bytes,
total_write_requests,
total_write_bytes,
total_sql_pod_cpu_seconds,
total_consumption,
instance_lease,
instance_seq,
instance_shares
) VALUES ($1, 0, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, NULL, NULL, NULL)
) VALUES ($1, 0, $2, $3, $4, $5, $6, $7, $8, NULL, NULL, NULL)
`,
h.tenantID.ToUint64(), // $1
int64(tenant.FirstInstance), // $2
&tenant.LastUpdate, // $3
tenant.Bucket.RUBurstLimit, // $4
tenant.Bucket.RURefillRate, // $5
tenant.Bucket.RUCurrent, // $6
tenant.Bucket.CurrentShareSum, // $7
tenant.Consumption.RU, // $8
tenant.Consumption.ReadRequests, // $9
tenant.Consumption.ReadBytes, // $10
tenant.Consumption.WriteRequests, // $11
tenant.Consumption.WriteBytes, // $12
tenant.Consumption.SQLPodsCPUSeconds, // $13
h.tenantID.ToUint64(), // $1
int64(tenant.FirstInstance), // $2
&tenant.LastUpdate, // $3
tenant.Bucket.RUBurstLimit, // $4
tenant.Bucket.RURefillRate, // $5
tenant.Bucket.RUCurrent, // $6
tenant.Bucket.CurrentShareSum, // $7
tree.NewDBytes(tree.DBytes(consumption)), // $8
)
return err
}
Expand All @@ -262,9 +253,13 @@ func (h *sysTableHelper) updateTenantState(tenant tenantState) error {
func (h *sysTableHelper) updateTenantAndInstanceState(
tenant tenantState, instance instanceState,
) error {
consumption, err := protoutil.Marshal(&tenant.Consumption)
if err != nil {
return err
}
// Note: it is important that this UPSERT specifies all columns of the
// table, to allow it to perform "blind" writes.
_, err := h.ex.ExecEx(
_, err = h.ex.ExecEx(
h.ctx, "tenant-usage-insert", h.txn,
sessiondata.NodeUserSessionDataOverride,
`UPSERT INTO system.tenant_usage(
Expand All @@ -276,38 +271,28 @@ func (h *sysTableHelper) updateTenantAndInstanceState(
ru_refill_rate,
ru_current,
current_share_sum,
total_ru_usage,
total_read_requests,
total_read_bytes,
total_write_requests,
total_write_bytes,
total_sql_pod_cpu_seconds,
total_consumption,
instance_lease,
instance_seq,
instance_shares
) VALUES
($1, 0, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, NULL, NULL, NULL),
($1, $14, $15, $16, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, $17, $18, $19)
($1, 0, $2, $3, $4, $5, $6, $7, $8, NULL, NULL, NULL),
($1, $9, $10, $11, NULL, NULL, NULL, NULL, NULL, $12, $13, $14)
`,
h.tenantID.ToUint64(), // $1
int64(tenant.FirstInstance), // $2
&tenant.LastUpdate, // $3
tenant.Bucket.RUBurstLimit, // $4
tenant.Bucket.RURefillRate, // $5
tenant.Bucket.RUCurrent, // $6
tenant.Bucket.CurrentShareSum, // $7
tenant.Consumption.RU, // $8
tenant.Consumption.ReadRequests, // $9
tenant.Consumption.ReadBytes, // $10
tenant.Consumption.WriteRequests, // $11
tenant.Consumption.WriteBytes, // $12
tenant.Consumption.SQLPodsCPUSeconds, // $13
int64(instance.ID), // $14
int64(instance.NextInstance), // $15
&instance.LastUpdate, // $16
&instance.Lease, // $17
instance.Seq, // $18
instance.Shares, // $19
h.tenantID.ToUint64(), // $1
int64(tenant.FirstInstance), // $2
&tenant.LastUpdate, // $3
tenant.Bucket.RUBurstLimit, // $4
tenant.Bucket.RURefillRate, // $5
tenant.Bucket.RUCurrent, // $6
tenant.Bucket.CurrentShareSum, // $7
tree.NewDBytes(tree.DBytes(consumption)), // $8
int64(instance.ID), // $9
int64(instance.NextInstance), // $10
&instance.LastUpdate, // $11
&instance.Lease, // $12
instance.Seq, // $13
instance.Shares, // $14
)
return err
}
Expand Down Expand Up @@ -373,16 +358,11 @@ func (h *sysTableHelper) accomodateNewInstance(tenant *tenantState, instance *in
ru_refill_rate,
ru_current,
current_share_sum,
total_ru_usage,
total_read_requests,
total_read_bytes,
total_write_requests,
total_write_bytes,
total_sql_pod_cpu_seconds,
total_consumption,
instance_lease,
instance_seq,
instance_shares
) VALUES ($1, $2, $3, $4, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, $5, $6, $7)
) VALUES ($1, $2, $3, $4, NULL, NULL, NULL, NULL, NULL, $5, $6, $7)
`,
h.tenantID.ToUint64(), // $1
int64(prevInstanceID), // $2
Expand Down Expand Up @@ -476,20 +456,15 @@ func (h *sysTableHelper) checkInvariants() error {
`SELECT
instance_id, /* 0 */
next_instance_id, /* 1 */
last_update, /* 2 */
last_update, /* 2 */
ru_burst_limit, /* 3 */
ru_refill_rate, /* 4 */
ru_current, /* 5 */
current_share_sum, /* 6 */
total_ru_usage, /* 7 */
total_read_requests, /* 8 */
total_read_bytes, /* 9 */
total_write_requests, /* 10 */
total_write_bytes, /* 11 */
total_sql_pod_cpu_seconds, /* 12 */
instance_lease, /* 13 */
instance_seq, /* 14 */
instance_shares /* 15 */
total_consumption, /* 7 */
instance_lease, /* 8 */
instance_seq, /* 9 */
instance_shares /* 10 */
FROM system.tenant_usage
WHERE tenant_id = $1
ORDER BY instance_id`,
Expand Down Expand Up @@ -524,10 +499,10 @@ func (h *sysTableHelper) checkInvariants() error {
var nullFirst, nullLast int
if i == 0 {
// Row 0 should have NULL per-instance values.
nullFirst, nullLast = 13, 16
nullFirst, nullLast = 8, 10
} else {
// Other rows should have NULL per-tenant values.
nullFirst, nullLast = 3, 12
nullFirst, nullLast = 3, 7
}
for j := range rows[i] {
isNull := (rows[i][j] == tree.DNull)
Expand All @@ -536,7 +511,11 @@ func (h *sysTableHelper) checkInvariants() error {
if !expNull {
return errors.Errorf("expected NULL column %d", j)
}
return errors.Errorf("expected non-NULL column %d", j)
// We have an exception for total_consumption, which can be NULL because
// of a migration of the tenant_usage table.
if i != 7 {
return errors.Errorf("expected non-NULL column %d", j)
}
}
}
}
Expand All @@ -557,7 +536,7 @@ func (h *sysTableHelper) checkInvariants() error {
sharesSum := float64(tree.MustBeDFloat(rows[0][6]))
var expSharesSum float64
for _, r := range rows[1:] {
expSharesSum += float64(tree.MustBeDFloat(r[15]))
expSharesSum += float64(tree.MustBeDFloat(r[10]))
}

a, b := sharesSum, expSharesSum
Expand Down
7 changes: 7 additions & 0 deletions pkg/clusterversion/cockroach_versions.go
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,9 @@ const (
// programming error. See https://github.com/cockroachdb/pebble/issues/1255
// and #69891.
PebbleSetWithDelete
// TenantUsageSingleConsumptionColumn changes the tenant_usage system table to
// use a single consumption column (encoding a proto).
TenantUsageSingleConsumptionColumn

// *************************************************
// Step (1): Add new versions here.
Expand Down Expand Up @@ -480,6 +483,10 @@ var versionsSingleton = keyedVersions{
Key: PebbleSetWithDelete,
Version: roachpb.Version{Major: 21, Minor: 1, Internal: 1166},
},
{
Key: TenantUsageSingleConsumptionColumn,
Version: roachpb.Version{Major: 21, Minor: 1, Internal: 1168},
},
// *************************************************
// Step (2): Add new versions here.
// Do not add new versions to a patch release.
Expand Down
Loading

0 comments on commit dd7cbf2

Please sign in to comment.