From 5df1b2509e83d2e5d140029c25e816583c511655 Mon Sep 17 00:00:00 2001
From: Rafi Shamim <rafi@cockroachlabs.com>
Date: Fri, 7 Jul 2023 01:24:48 -0400
Subject: [PATCH 1/2] copy: add microbenchmark for COPY ... FROM STDIN CSV

Release note: None
---
 pkg/sql/copy/copy_test.go | 55 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 55 insertions(+)

diff --git a/pkg/sql/copy/copy_test.go b/pkg/sql/copy/copy_test.go
index be864cd43393..33edb995dcac 100644
--- a/pkg/sql/copy/copy_test.go
+++ b/pkg/sql/copy/copy_test.go
@@ -48,6 +48,7 @@ import (
 	"github.com/cockroachdb/cockroach/pkg/util/leaktest"
 	"github.com/cockroachdb/cockroach/pkg/util/log"
 	"github.com/cockroachdb/cockroach/pkg/util/randutil"
+	"github.com/cockroachdb/cockroach/pkg/util/stop"
 	"github.com/cockroachdb/datadriven"
 	"github.com/cockroachdb/errors"
 	"github.com/jackc/pgconn"
@@ -770,3 +771,57 @@ func min(a, b int) int {
 	}
 	return b
 }
+
+func BenchmarkCopyCSVEndToEnd(b *testing.B) {
+	defer leaktest.AfterTest(b)()
+	defer log.Scope(b).Close(b)
+
+	ctx := context.Background()
+	s, db, _ := serverutils.StartServer(b, base.TestServerArgs{
+		DefaultTestTenant: base.TestTenantDisabled,
+	})
+	defer s.Stopper().Stop(ctx)
+
+	pgURL, cleanup, err := sqlutils.PGUrlE(
+		s.ServingSQLAddr(),
+		"BenchmarkCopyEndToEnd", /* prefix */
+		url.User(username.RootUser),
+	)
+	require.NoError(b, err)
+	s.Stopper().AddCloser(stop.CloserFn(cleanup))
+
+	_, err = db.Exec("CREATE TABLE t (i INT PRIMARY KEY, s STRING)")
+	require.NoError(b, err)
+
+	conn, err := pgx.Connect(ctx, pgURL.String())
+	require.NoError(b, err)
+
+	rng, _ := randutil.NewTestRand()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		b.StopTimer()
+		// Create an input of 1_000_000 rows.
+		buf := &bytes.Buffer{}
+		for j := 0; j < 1_000_000; j++ {
+			buf.WriteString(strconv.Itoa(j))
+			buf.WriteString(",")
+			str := randutil.RandString(rng, rng.Intn(50), "abc123\n")
+			buf.WriteString("\"")
+			buf.WriteString(str)
+			buf.WriteString("\"\n")
+		}
+		b.StartTimer()
+
+		// Run the COPY.
+		_, err = conn.PgConn().CopyFrom(ctx, buf, "COPY t FROM STDIN CSV")
+		require.NoError(b, err)
+
+		// Verify that the data was inserted.
+		b.StopTimer()
+		var count int
+		err = db.QueryRow("SELECT count(*) FROM t").Scan(&count)
+		require.NoError(b, err)
+		require.Equal(b, 1_000_000, count)
+		b.StartTimer()
+	}
+}

From b72784a84bac8545ae5860ffc695c882c320e587 Mon Sep 17 00:00:00 2001
From: Rafi Shamim <rafi@cockroachlabs.com>
Date: Tue, 4 Jul 2023 02:34:37 -0400
Subject: [PATCH 2/2] copy: avoid recopying buffer
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use a []byte instead of a bytes.Buffer, so that we only move the cursor
into the buffer if we know we want to advance.

This provides a 25% reduction in allocations in BenchmarkCopyCSVEndToEnd.

```
goos: darwin
goarch: arm64
                   │ /var/folders/p3/c61_z_vd3r7dr1_hnztm3ryr0000gq/T/tmp.Dm00yAlG6O/bench.a51e8a7806c │ /var/folders/p3/c61_z_vd3r7dr1_hnztm3ryr0000gq/T/tmp.Dm00yAlG6O/bench.0c9d3d5efb0 │
                   │                                      sec/op                                       │                           sec/op                            vs base               │
CopyCSVEndToEnd-10                                                                          3.991 ± 3%                                                   3.883 ± 2%  -2.71% (p=0.019 n=10)

                   │ /var/folders/p3/c61_z_vd3r7dr1_hnztm3ryr0000gq/T/tmp.Dm00yAlG6O/bench.a51e8a7806c │ /var/folders/p3/c61_z_vd3r7dr1_hnztm3ryr0000gq/T/tmp.Dm00yAlG6O/bench.0c9d3d5efb0 │
                   │                                       B/op                                        │                              B/op                                vs base          │
CopyCSVEndToEnd-10                                                                        8.322Gi ± 2%                                                      8.304Gi ± 1%  ~ (p=0.971 n=10)

                   │ /var/folders/p3/c61_z_vd3r7dr1_hnztm3ryr0000gq/T/tmp.Dm00yAlG6O/bench.a51e8a7806c │ /var/folders/p3/c61_z_vd3r7dr1_hnztm3ryr0000gq/T/tmp.Dm00yAlG6O/bench.0c9d3d5efb0 │
                   │                                     allocs/op                                     │                         allocs/op                          vs base                │
CopyCSVEndToEnd-10                                                                         18.30M ± 0%                                                 13.79M ± 0%  -24.62% (p=0.000 n=10)
```

Release note: None
---
 pkg/sql/copy_from.go | 162 +++++++++++++++++++++++--------------------
 1 file changed, 86 insertions(+), 76 deletions(-)

diff --git a/pkg/sql/copy_from.go b/pkg/sql/copy_from.go
index f955b4df07a9..13a6702c3d60 100644
--- a/pkg/sql/copy_from.go
+++ b/pkg/sql/copy_from.go
@@ -218,7 +218,7 @@ type copyMachine struct {
 	csvReader    *csv.Reader
 	// buf is used to parse input data into rows. It also accumulates a partial
 	// row between protocol messages.
-	buf bytes.Buffer
+	buf []byte
 	// rows accumulates a batch of rows to be eventually inserted.
 	rows rowcontainer.RowContainer
 	// insertedRows keeps track of the total number of rows inserted by the
@@ -616,20 +616,20 @@ const (
 func (c *copyMachine) processCopyData(ctx context.Context, data string, final bool) (retErr error) {
 	// At the end, adjust the mem accounting to reflect what's left in the buffer.
 	defer func() {
-		if err := c.bufMemAcc.ResizeTo(ctx, int64(c.buf.Cap())); err != nil && retErr == nil {
+		if err := c.bufMemAcc.ResizeTo(ctx, int64(cap(c.buf))); err != nil && retErr == nil {
 			retErr = err
 		}
 	}()
 
-	if len(data) > (c.buf.Cap() - c.buf.Len()) {
+	if len(data) > (cap(c.buf) - len(c.buf)) {
 		// If it looks like the buffer will need to allocate to accommodate data,
 		// account for the memory here. This is not particularly accurate - we don't
 		// know how much the buffer will actually grow by.
-		if err := c.bufMemAcc.ResizeTo(ctx, int64(len(data))); err != nil {
+		if err := c.bufMemAcc.Grow(ctx, int64(len(data))); err != nil {
 			return err
 		}
 	}
-	c.buf.WriteString(data)
+	c.buf = append(c.buf, data...)
 	var readFn func(ctx context.Context, final bool) (brk bool, err error)
 	switch c.format {
 	case tree.CopyFormatText:
@@ -641,7 +641,7 @@ func (c *copyMachine) processCopyData(ctx context.Context, data string, final bo
 	default:
 		panic("unknown copy format")
 	}
-	for c.buf.Len() > 0 {
+	for len(c.buf) > 0 {
 		brk, err := readFn(ctx, final)
 		if err != nil {
 			return err
@@ -664,11 +664,11 @@ func (c *copyMachine) processCopyData(ctx context.Context, data string, final bo
 		// If we have a full batch of rows or we have exceeded maxRowMem process
 		// them. Only set finalBatch to true if this is the last
 		// CopyData segment AND we have no more data in the buffer.
-		if len := c.currentBatchSize(); len > 0 && (c.rowsMemAcc.Used() > c.maxRowMem || len >= c.copyBatchRowSize || batchDone) {
-			if len != c.copyBatchRowSize {
-				log.VEventf(ctx, 2, "copy batch of %d rows flushing due to memory usage %d > %d", len, c.rowsMemAcc.Used(), c.maxRowMem)
+		if length := c.currentBatchSize(); length > 0 && (c.rowsMemAcc.Used() > c.maxRowMem || length >= c.copyBatchRowSize || batchDone) {
+			if length != c.copyBatchRowSize {
+				log.VEventf(ctx, 2, "copy batch of %d rows flushing due to memory usage %d > %d", length, c.rowsMemAcc.Used(), c.maxRowMem)
 			}
-			if err := c.processRows(ctx, final && c.buf.Len() == 0); err != nil {
+			if err := c.processRows(ctx, final && len(c.buf) == 0); err != nil {
 				return err
 			}
 		}
@@ -692,24 +692,26 @@ func (c *copyMachine) currentBatchSize() int {
 }
 
 func (c *copyMachine) readTextData(ctx context.Context, final bool) (brk bool, err error) {
-	line, err := c.buf.ReadBytes(lineDelim)
-	if err != nil {
-		if err != io.EOF {
-			return false, err
-		} else if !final {
-			// Put the incomplete row back in the buffer, to be processed next time.
-			c.buf.Write(line)
+	idx := bytes.IndexByte(c.buf, lineDelim)
+	var line []byte
+	if idx == -1 {
+		if !final {
+			// Leave the incomplete row in the buffer, to be processed next time.
 			return true, nil
 		}
+		// If this is the final batch, use the whole buffer.
+		line = c.buf[:len(c.buf)]
+		c.buf = c.buf[len(c.buf):]
 	} else {
 		// Remove lineDelim from end.
-		line = line[:len(line)-1]
+		line = c.buf[:idx]
+		c.buf = c.buf[idx+1:]
 		// Remove a single '\r' at EOL, if present.
 		if len(line) > 0 && line[len(line)-1] == '\r' {
 			line = line[:len(line)-1]
 		}
 	}
-	if c.buf.Len() == 0 && bytes.Equal(line, []byte(`\.`)) {
+	if len(c.buf) == 0 && bytes.Equal(line, []byte(`\.`)) {
 		return true, nil
 	}
 	err = c.readTextTuple(ctx, line)
@@ -719,29 +721,29 @@ func (c *copyMachine) readTextData(ctx context.Context, final bool) (brk bool, e
 func (c *copyMachine) readCSVData(ctx context.Context, final bool) (brk bool, err error) {
 	var fullLine []byte
 	quoteCharsSeen := 0
+	offset := 0
 	// Keep reading lines until we encounter a newline that is not inside a
 	// quoted field, and therefore signifies the end of a CSV record.
 	for {
-		line, err := c.buf.ReadBytes(lineDelim)
-		fullLine = append(fullLine, line...)
-		if err != nil {
-			if err == io.EOF {
-				if final {
-					// If we reached EOF and this is the final chunk of input data, then
-					// try to process it.
-					break
-				} else {
-					// If there's more CopyData, put the incomplete row back in the
-					// buffer, to be processed next time.
-					c.buf.Write(fullLine)
-					return true, nil
-				}
+		idx := bytes.IndexByte(c.buf[offset:], lineDelim)
+		if idx == -1 {
+			if final {
+				// If we reached EOF and this is the final chunk of input data, then
+				// try to process it.
+				fullLine = append(fullLine, c.buf[offset:]...)
+				c.buf = c.buf[len(c.buf):]
+				break
 			} else {
-				return false, err
+				// If there's more CopyData, keep the incomplete row in the
+				// buffer, to be processed next time.
+				return true, nil
 			}
 		}
-
-		// Now we need to calculate if we are have reached the end of the quote.
+		// Include the delimiter in the line.
+		line := c.buf[offset : offset+idx+1]
+		offset += idx + 1
+		fullLine = append(fullLine, line...)
+		// Now we need to calculate if we have reached the end of the quote.
 		// If so, break out.
 		if c.csvEscape == 0 {
 			// CSV escape is not specified and hence defaults to '"'.¥
@@ -777,6 +779,7 @@ func (c *copyMachine) readCSVData(ctx context.Context, final bool) (brk bool, er
 			}
 		}
 		if quoteCharsSeen%2 == 0 {
+			c.buf = c.buf[offset:]
 			break
 		}
 	}
@@ -792,7 +795,7 @@ func (c *copyMachine) readCSVData(ctx context.Context, final bool) (brk bool, er
 	record, err := c.csvReader.Read()
 	// Look for end of data before checking for errors, since a field count
 	// error will still return record data.
-	if len(record) == 1 && !record[0].Quoted && record[0].Val == endOfData && c.buf.Len() == 0 {
+	if len(record) == 1 && !record[0].Quoted && record[0].Val == endOfData && len(c.buf) == 0 {
 		return true, nil
 	}
 	if err != nil {
@@ -868,25 +871,31 @@ func (c *copyMachine) readBinaryData(ctx context.Context, final bool) (brk bool,
 	}
 	switch c.binaryState {
 	case binaryStateNeedSignature:
-		if readSoFar, err := c.readBinarySignature(); err != nil {
+		n, err := c.readBinarySignature()
+		if err != nil {
 			// If this isn't the last message and we saw incomplete data, then
-			// put it back in the buffer to process more next time.
-			if !final && (err == io.EOF || err == io.ErrUnexpectedEOF) {
-				c.buf.Write(readSoFar)
+			// leave it in the buffer to process more next time.
+			if !final && err == io.ErrUnexpectedEOF {
 				return true, nil
 			}
+			c.buf = c.buf[n:]
 			return false, err
 		}
+		c.buf = c.buf[n:]
+		return false, nil
 	case binaryStateRead:
-		if readSoFar, err := c.readBinaryTuple(ctx); err != nil {
+		n, err := c.readBinaryTuple(ctx)
+		if err != nil {
 			// If this isn't the last message and we saw incomplete data, then
-			// put it back in the buffer to process more next time.
-			if !final && (err == io.EOF || err == io.ErrUnexpectedEOF) {
-				c.buf.Write(readSoFar)
+			// leave it in the buffer to process more next time.
+			if !final && err == io.ErrUnexpectedEOF {
 				return true, nil
 			}
+			c.buf = c.buf[n:]
 			return false, errors.Wrapf(err, "read binary tuple")
 		}
+		c.buf = c.buf[n:]
+		return false, nil
 	case binaryStateFoundTrailer:
 		if !final {
 			return false, pgerror.New(pgcode.BadCopyFileFormat,
@@ -896,34 +905,33 @@ func (c *copyMachine) readBinaryData(ctx context.Context, final bool) (brk bool,
 	default:
 		panic("unknown binary state")
 	}
-	return false, nil
 }
 
-func (c *copyMachine) readBinaryTuple(ctx context.Context) (readSoFar []byte, err error) {
+func (c *copyMachine) readBinaryTuple(ctx context.Context) (bytesRead int, err error) {
 	var fieldCount int16
 	var fieldCountBytes [2]byte
-	n, err := io.ReadFull(&c.buf, fieldCountBytes[:])
-	readSoFar = append(readSoFar, fieldCountBytes[:n]...)
-	if err != nil {
-		return readSoFar, err
+	n := copy(fieldCountBytes[:], c.buf[bytesRead:])
+	bytesRead += n
+	if n < len(fieldCountBytes) {
+		return bytesRead, io.ErrUnexpectedEOF
 	}
 	fieldCount = int16(binary.BigEndian.Uint16(fieldCountBytes[:]))
 	if fieldCount == -1 {
 		c.binaryState = binaryStateFoundTrailer
-		return nil, nil
+		return bytesRead, nil
 	}
 	if fieldCount < 1 {
-		return nil, pgerror.Newf(pgcode.BadCopyFileFormat,
+		return bytesRead, pgerror.Newf(pgcode.BadCopyFileFormat,
 			"unexpected field count: %d", fieldCount)
 	}
 	datums := make(tree.Datums, fieldCount)
 	var byteCount int32
 	var byteCountBytes [4]byte
 	for i := range datums {
-		n, err := io.ReadFull(&c.buf, byteCountBytes[:])
-		readSoFar = append(readSoFar, byteCountBytes[:n]...)
-		if err != nil {
-			return readSoFar, err
+		n := copy(byteCountBytes[:], c.buf[bytesRead:])
+		bytesRead += n
+		if n < len(byteCountBytes) {
+			return bytesRead, io.ErrUnexpectedEOF
 		}
 		byteCount = int32(binary.BigEndian.Uint32(byteCountBytes[:]))
 		if byteCount == -1 {
@@ -931,10 +939,10 @@ func (c *copyMachine) readBinaryTuple(ctx context.Context) (readSoFar []byte, er
 			continue
 		}
 		data := make([]byte, byteCount)
-		n, err = io.ReadFull(&c.buf, data)
-		readSoFar = append(readSoFar, data[:n]...)
-		if err != nil {
-			return readSoFar, err
+		n = copy(data, c.buf[bytesRead:])
+		bytesRead += n
+		if n < len(data) {
+			return bytesRead, io.ErrUnexpectedEOF
 		}
 		d, err := pgwirebase.DecodeDatum(
 			ctx,
@@ -944,33 +952,35 @@ func (c *copyMachine) readBinaryTuple(ctx context.Context) (readSoFar []byte, er
 			data,
 		)
 		if err != nil {
-			return nil, pgerror.Wrapf(err, pgcode.BadCopyFileFormat,
+			return bytesRead, pgerror.Wrapf(err, pgcode.BadCopyFileFormat,
 				"decode datum as %s: %s", c.resultColumns[i].Typ.SQLString(), data)
 		}
 		datums[i] = d
 	}
 	_, err = c.rows.AddRow(ctx, datums)
 	if err != nil {
-		return nil, err
+		return bytesRead, err
 	}
-	return nil, nil
+	return bytesRead, nil
 }
 
-func (c *copyMachine) readBinarySignature() ([]byte, error) {
-	// This is the standard 11-byte binary signature with the flags and
-	// header 32-bit integers appended since we only support the zero value
-	// of them.
-	const binarySignature = "PGCOPY\n\377\r\n\000" + "\x00\x00\x00\x00" + "\x00\x00\x00\x00"
-	var sig [11 + 8]byte
-	if n, err := io.ReadFull(&c.buf, sig[:]); err != nil {
-		return sig[:n], err
-	}
-	if !bytes.Equal(sig[:], []byte(binarySignature)) {
-		return sig[:], pgerror.New(pgcode.BadCopyFileFormat,
+// This is the standard 11-byte binary signature with the flags and
+// header 32-bit integers appended since we only support the zero value
+// of them.
+var copyBinarySignature = [19]byte{'P', 'G', 'C', 'O', 'P', 'Y', '\n', '\377', '\r', '\n', '\000', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}
+
+func (c *copyMachine) readBinarySignature() (int, error) {
+	var sig [19]byte
+	n := copy(sig[:], c.buf)
+	if n < len(sig) {
+		return n, io.ErrUnexpectedEOF
+	}
+	if sig != copyBinarySignature {
+		return n, pgerror.New(pgcode.BadCopyFileFormat,
 			"unrecognized binary copy signature")
 	}
 	c.binaryState = binaryStateRead
-	return sig[:], nil
+	return n, nil
 }
 
 // preparePlannerForCopy resets the planner so that it can be used during