From 2643e24406f558b71db05b5da287c24915103feb Mon Sep 17 00:00:00 2001 From: Yahor Yuzefovich Date: Mon, 20 Jul 2020 11:32:51 -0700 Subject: [PATCH] sql: fix pagination in UPSERT `optTableUpserter` was incorrectly using `curBatchSize` method implementation of `tableUpserterBase` which returns the number of rows in `insertRows` row container, but that container is not actually used `optTableUpserter`. As a result, `curBatchSize` was always considered 0, and we didn't perform the pagination on the UPSERTs when driven by the optimizer. Release note (bug fix): Previously, CockroachDB could hit a "command is too large" error when performing UPSERT operation with many values. Internally, we attempt to perform such operation by splitting it into "batches", but the batching mechanism was broken. --- pkg/sql/logictest/testdata/logic_test/upsert | 21 ++++++++++++++++++++ pkg/sql/tablewriter_upsert_opt.go | 7 ++++++- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/pkg/sql/logictest/testdata/logic_test/upsert b/pkg/sql/logictest/testdata/logic_test/upsert index e3b0bb15a80b..ff2879f6dc67 100644 --- a/pkg/sql/logictest/testdata/logic_test/upsert +++ b/pkg/sql/logictest/testdata/logic_test/upsert @@ -985,3 +985,24 @@ query III SELECT * from table38627 ---- 1 1 5 + +# Regression test for UPSERT batching logic (#51391). +statement ok +SET CLUSTER SETTING kv.raft.command.max_size='4MiB'; +CREATE TABLE src (s STRING); +CREATE TABLE dest (s STRING); +INSERT INTO src +SELECT + 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa' +FROM + generate_series(1, 50000) + +# This statement produces a raft command of about 6.6 MiB in size, so if the +# batching logic is incorrect, we'll encounter "command is too large" error. +statement ok +UPSERT INTO dest (s) (SELECT s FROM src) + +statement ok +RESET CLUSTER SETTING kv.raft.command.max_size; +DROP TABLE src; +DROP TABLE dest diff --git a/pkg/sql/tablewriter_upsert_opt.go b/pkg/sql/tablewriter_upsert_opt.go index 7095e4880676..dce48fd3a714 100644 --- a/pkg/sql/tablewriter_upsert_opt.go +++ b/pkg/sql/tablewriter_upsert_opt.go @@ -92,7 +92,7 @@ func (tu *optTableUpserter) init(txn *client.Txn, evalCtx *tree.EvalContext) err tu.rowsUpserted = rowcontainer.NewRowContainer( evalCtx.Mon.MakeBoundAccount(), sqlbase.ColTypeInfoFromColDescs(tu.returnCols), - tu.insertRows.Len(), + 0, /* rowCapacity */ ) } @@ -160,6 +160,11 @@ func (tu *optTableUpserter) atBatchEnd(ctx context.Context, traceKV bool) error return nil } +// curBatchSize is part of the extendedTableWriter interface. Note that we need +// to override tableUpserterBase.curBatchSize because the optimizer-driven +// UPSERTs do not store the insert rows in insertRows row container. +func (tu *optTableUpserter) curBatchSize() int { return tu.batchSize } + // insertNonConflictingRow inserts the given source row into the table when // there was no conflict. If the RETURNING clause was specified, then the // inserted row is stored in the rowsUpserted collection.