Skip to content

Commit

Permalink
tenantcostmodel: update tenant cost model
Browse files Browse the repository at this point in the history
The current tenant cost model only takes batches into account. It ignores the
requests in each read and write batch (e.g. GetRequest, CPutRequest, etc). For
example, lookup joins typically send a small number of read batches with a large
number of requests in each batch, which resulted in the model greatly under-
costing queries containing them.

This commit updates the cost model to take requests into account, as well as
batches.

Release note: None
  • Loading branch information
andy-kimball committed Jun 16, 2022
1 parent 22751ff commit 61674bc
Show file tree
Hide file tree
Showing 31 changed files with 479 additions and 214 deletions.
38 changes: 21 additions & 17 deletions pkg/ccl/multitenantccl/tenantcostclient/tenant_side.go
Original file line number Diff line number Diff line change
Expand Up @@ -348,12 +348,12 @@ func (c *tenantSideCostController) updateRunState(ctx context.Context) {
if deltaCPU < 0 {
deltaCPU = 0
}
ru := deltaCPU * float64(c.costCfg.PodCPUSecond)
ru := float64(c.costCfg.PodCPUCost(deltaCPU))

var deltaPGWireEgressBytes uint64
if newExternalUsage.PGWireEgressBytes > c.run.externalUsage.PGWireEgressBytes {
deltaPGWireEgressBytes = newExternalUsage.PGWireEgressBytes - c.run.externalUsage.PGWireEgressBytes
ru += float64(deltaPGWireEgressBytes) * float64(c.costCfg.PGWireEgressByte)
ru += float64(c.costCfg.PGWireEgressCost(int64(deltaPGWireEgressBytes)))
}

// KV RUs are not included here, these metrics correspond only to the SQL pod.
Expand Down Expand Up @@ -657,24 +657,25 @@ func (c *tenantSideCostController) OnResponse(
if multitenant.HasTenantCostControlExemption(ctx) {
return
}
if resp.ReadBytes() > 0 {
if resp.IsRead() {
c.limiter.RemoveTokens(c.timeSource.Now(), c.costCfg.ResponseCost(resp))
}

c.mu.Lock()
defer c.mu.Unlock()

if isWrite, writeBytes := req.IsWrite(); isWrite {
c.mu.consumption.WriteRequests++
c.mu.consumption.WriteBytes += uint64(writeBytes)
writeRU := float64(c.costCfg.KVWriteCost(writeBytes))
if req.IsWrite() {
c.mu.consumption.WriteBatches++
c.mu.consumption.WriteRequests += uint64(req.WriteCount())
c.mu.consumption.WriteBytes += uint64(req.WriteBytes())
writeRU := float64(c.costCfg.RequestCost(req))
c.mu.consumption.KVRU += writeRU
c.mu.consumption.RU += writeRU
} else {
c.mu.consumption.ReadRequests++
readBytes := resp.ReadBytes()
c.mu.consumption.ReadBytes += uint64(readBytes)
readRU := float64(c.costCfg.KVReadCost(readBytes))
} else if resp.IsRead() {
c.mu.consumption.ReadBatches++
c.mu.consumption.ReadRequests += uint64(resp.ReadCount())
c.mu.consumption.ReadBytes += uint64(resp.ReadBytes())
readRU := float64(c.costCfg.ResponseCost(resp))
c.mu.consumption.KVRU += readRU
c.mu.consumption.RU += readRU
}
Expand All @@ -692,7 +693,8 @@ func (c *tenantSideCostController) shouldAccountForExternalIORUs() bool {
return c.modeMu.externalIORUAccountingMode != externalIORUAccountingOff
}

// ExternalIOWriteWait is part of the multitenant.TenantSideExternalIORecorder interface.
// ExternalIOWriteWait is part of the multitenant.TenantSideExternalIORecorder
// interface.
func (c *tenantSideCostController) ExternalIOWriteWait(ctx context.Context, bytes int64) error {
if !c.shouldWaitForExternalIORUs() {
return nil
Expand All @@ -704,7 +706,8 @@ func (c *tenantSideCostController) ExternalIOWriteWait(ctx context.Context, byte
return c.limiter.Wait(ctx, ru)
}

// ExternalIOWriteScucess is part of the multitenant.TenantSideExternalIORecorder interface.
// ExternalIOWriteSuccess is part of the multitenant.TenantSideExternalIORecorder
// interface.
func (c *tenantSideCostController) ExternalIOWriteSuccess(ctx context.Context, bytes int64) {
if multitenant.HasTenantCostControlExemption(ctx) {
return
Expand All @@ -718,7 +721,8 @@ func (c *tenantSideCostController) ExternalIOWriteSuccess(ctx context.Context, b
c.mu.Unlock()
}

// ExternalIOWriteFailure is part of the multitenant.TenantSideExternalIORecorder interface.
// ExternalIOWriteFailure is part of the multitenant.TenantSideExternalIORecorder
// interface.
//
// The given byte count should be the number of bytes that were never
// actually written.
Expand All @@ -741,8 +745,8 @@ func (c *tenantSideCostController) ExternalIOWriteFailure(
c.mu.Unlock()
}

// ExternalIOReadWait is part of the multitenant.TenantSideExternalIORecorder interface. We wait
// after adding the RUs since these reads already happened.
// ExternalIOReadWait is part of the multitenant.TenantSideExternalIORecorder
// interface. We wait after adding the RUs since these reads already happened.
func (c *tenantSideCostController) ExternalIOReadWait(ctx context.Context, bytes int64) error {
if multitenant.HasTenantCostControlExemption(ctx) {
return nil
Expand Down
24 changes: 15 additions & 9 deletions pkg/ccl/multitenantccl/tenantcostclient/tenant_side_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -293,14 +293,16 @@ func (ts *testState) asyncRequest(
func (ts *testState) request(
t *testing.T, d *datadriven.TestData, isWrite bool, args cmdArgs,
) string {
var writeBytes, readBytes int64
var writeCount, readCount, writeBytes, readBytes int64
if isWrite {
writeCount = 1
writeBytes = args.bytes
} else {
readCount = 1
readBytes = args.bytes
}
reqInfo := tenantcostmodel.TestingRequestInfo(isWrite, writeBytes)
respInfo := tenantcostmodel.TestingResponseInfo(readBytes)
reqInfo := tenantcostmodel.TestingRequestInfo(writeCount, writeBytes)
respInfo := tenantcostmodel.TestingResponseInfo(readCount, readBytes)
if args.label == "" {
ts.syncRequest(t, d, reqInfo, respInfo)
} else {
Expand Down Expand Up @@ -525,17 +527,19 @@ func (ts *testState) usage(t *testing.T, d *datadriven.TestData, args cmdArgs) s
return fmt.Sprintf(""+
"RU: %.2f\n"+
"KVRU: %.2f\n"+
"Reads: %d requests (%d bytes)\n"+
"Writes: %d requests (%d bytes)\n"+
"Reads: %d requests in %d batches (%d bytes)\n"+
"Writes: %d requests in %d batches (%d bytes)\n"+
"SQL Pods CPU seconds: %.2f\n"+
"PGWire egress: %d bytes\n"+
"ExternalIO egress: %d bytes\n"+
"ExternalIO ingress: %d bytes\n",
c.RU,
c.KVRU,
c.ReadRequests,
c.ReadBatches,
c.ReadBytes,
c.WriteRequests,
c.WriteBatches,
c.WriteBytes,
c.SQLPodsCPUSeconds,
c.PGWireEgressBytes,
Expand Down Expand Up @@ -668,19 +672,21 @@ func TestConsumption(t *testing.T) {
},
})
r := sqlutils.MakeSQLRunner(tenantDB)
r.Exec(t, "CREATE TABLE t (v STRING)")
// Create a secondary index to ensure that writes to both indexes are
// recorded in metrics.
r.Exec(t, "CREATE TABLE t (v STRING, w STRING, INDEX (w, v))")
// Do some writes and reads and check the reported consumption. Repeat the
// test a few times, since background requests can trick the test into
// passing.
for repeat := 0; repeat < 5; repeat++ {
beforeWrite := testProvider.waitForConsumption(t)
r.Exec(t, "INSERT INTO t SELECT repeat('1234567890', 1024) FROM generate_series(1, 10) AS g(i)")
r.Exec(t, "INSERT INTO t (v) SELECT repeat('1234567890', 1024) FROM generate_series(1, 10) AS g(i)")
const expectedBytes = 10 * 10 * 1024

afterWrite := testProvider.waitForConsumption(t)
delta := afterWrite
delta.Sub(&beforeWrite)
if delta.WriteRequests < 1 || delta.WriteBytes < expectedBytes {
if delta.WriteBatches < 1 || delta.WriteRequests < 2 || delta.WriteBytes < expectedBytes*2 {
t.Errorf("usage after write: %s", delta.String())
}

Expand All @@ -689,7 +695,7 @@ func TestConsumption(t *testing.T) {
afterRead := testProvider.waitForConsumption(t)
delta = afterRead
delta.Sub(&afterWrite)
if delta.ReadRequests < 1 || delta.ReadBytes < expectedBytes {
if delta.ReadBatches < 1 || delta.ReadRequests < 1 || delta.ReadBytes < expectedBytes {
t.Errorf("usage after read: %s", delta.String())
}
r.Exec(t, "DELETE FROM t WHERE true")
Expand Down
52 changes: 26 additions & 26 deletions pkg/ccl/multitenantccl/tenantcostclient/testdata/consumption
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ usage
----
RU: 0.00
KVRU: 0.00
Reads: 0 requests (0 bytes)
Writes: 0 requests (0 bytes)
Reads: 0 requests in 0 batches (0 bytes)
Writes: 0 requests in 0 batches (0 bytes)
SQL Pods CPU seconds: 0.00
PGWire egress: 0 bytes
ExternalIO egress: 0 bytes
Expand All @@ -16,10 +16,10 @@ read bytes=1024000

usage
----
RU: 105.83
KVRU: 105.83
Reads: 1 requests (1024000 bytes)
Writes: 0 requests (0 bytes)
RU: 5.77
KVRU: 5.77
Reads: 0 requests in 0 batches (0 bytes)
Writes: 0 requests in 1 batches (0 bytes)
SQL Pods CPU seconds: 0.00
PGWire egress: 0 bytes
ExternalIO egress: 0 bytes
Expand All @@ -30,10 +30,10 @@ write bytes=1024

usage
----
RU: 113.58
KVRU: 113.58
Reads: 1 requests (1024000 bytes)
Writes: 1 requests (1024 bytes)
RU: 19.30
KVRU: 19.30
Reads: 0 requests in 0 batches (0 bytes)
Writes: 1 requests in 2 batches (1024 bytes)
SQL Pods CPU seconds: 0.00
PGWire egress: 0 bytes
ExternalIO egress: 0 bytes
Expand All @@ -45,10 +45,10 @@ cpu

usage
----
RU: 1113.58
KVRU: 113.58
Reads: 1 requests (1024000 bytes)
Writes: 1 requests (1024 bytes)
RU: 1019.30
KVRU: 19.30
Reads: 0 requests in 0 batches (0 bytes)
Writes: 1 requests in 2 batches (1024 bytes)
SQL Pods CPU seconds: 1.00
PGWire egress: 0 bytes
ExternalIO egress: 0 bytes
Expand All @@ -65,10 +65,10 @@ write bytes=4096

usage
----
RU: 1148.39
KVRU: 148.39
Reads: 2 requests (1089536 bytes)
Writes: 3 requests (9216 bytes)
RU: 1064.00
KVRU: 64.00
Reads: 0 requests in 0 batches (0 bytes)
Writes: 3 requests in 5 batches (9216 bytes)
SQL Pods CPU seconds: 1.00
PGWire egress: 0 bytes
ExternalIO egress: 0 bytes
Expand All @@ -80,10 +80,10 @@ cpu

usage
----
RU: 3601148.39
KVRU: 148.39
Reads: 2 requests (1089536 bytes)
Writes: 3 requests (9216 bytes)
RU: 3601064.00
KVRU: 64.00
Reads: 0 requests in 0 batches (0 bytes)
Writes: 3 requests in 5 batches (9216 bytes)
SQL Pods CPU seconds: 3601.00
PGWire egress: 0 bytes
ExternalIO egress: 0 bytes
Expand All @@ -95,10 +95,10 @@ pgwire-egress

usage
----
RU: 3601158.74
KVRU: 148.39
Reads: 2 requests (1089536 bytes)
Writes: 3 requests (9216 bytes)
RU: 3601074.34
KVRU: 64.00
Reads: 0 requests in 0 batches (0 bytes)
Writes: 3 requests in 5 batches (9216 bytes)
SQL Pods CPU seconds: 3601.00
PGWire egress: 12345 bytes
ExternalIO egress: 0 bytes
Expand Down
4 changes: 2 additions & 2 deletions pkg/ccl/multitenantccl/tenantcostclient/testdata/debt
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ token-bucket-response

timers
----
00:00:01.600
00:00:01.606
00:00:09.000

advance
Expand All @@ -43,7 +43,7 @@ write bytes=1024 label=w2

timers
----
00:00:02.011
00:00:02.019
00:00:09.000

advance
Expand Down
2 changes: 1 addition & 1 deletion pkg/ccl/multitenantccl/tenantcostclient/testdata/fallback
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ tick

timers
----
00:00:29.653
00:00:29.660

advance
10s
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ token-bucket-response
timers
----
00:00:09.000
00:04:46.544
00:04:46.601

configure
error: true
Expand All @@ -45,7 +45,7 @@ low-ru

timers
----
00:04:46.544
00:04:46.601

advance
2s
Expand All @@ -60,7 +60,7 @@ tick

timers
----
00:00:39.454
00:00:39.460

not-completed label=w2
----
Expand Down
6 changes: 3 additions & 3 deletions pkg/ccl/multitenantccl/tenantcostclient/testdata/throttling
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ token-bucket-response
timers
----
00:00:11.000
00:00:11.359
00:00:11.391

advance
1s
Expand Down Expand Up @@ -75,7 +75,7 @@ token-bucket-response
timers
----
00:00:22.000
00:00:50.013
00:00:50.051

advance
20s
Expand All @@ -88,7 +88,7 @@ await label=w1
timers
----
00:00:42.000
00:00:50.013
00:00:50.051

not-completed label=w2
----
Expand Down
Loading

0 comments on commit 61674bc

Please sign in to comment.