From 7daca7006346ddfcb39254805f8a46a0a9a90e8f Mon Sep 17 00:00:00 2001 From: Oliver Tan Date: Wed, 23 Mar 2022 12:15:20 +1100 Subject: [PATCH 1/5] sql: amend COPY to use CRDB csv implementation Release note: None --- pkg/sql/BUILD.bazel | 1 + pkg/sql/copy.go | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/pkg/sql/BUILD.bazel b/pkg/sql/BUILD.bazel index e66c79b0cc45..2ef7d03e37dd 100644 --- a/pkg/sql/BUILD.bazel +++ b/pkg/sql/BUILD.bazel @@ -407,6 +407,7 @@ go_library( "//pkg/util/ctxgroup", "//pkg/util/duration", "//pkg/util/encoding", + "//pkg/util/encoding/csv", "//pkg/util/envutil", "//pkg/util/errorutil", "//pkg/util/errorutil/unimplemented", diff --git a/pkg/sql/copy.go b/pkg/sql/copy.go index 7c6f5d07db3e..5ecbbc017a1f 100644 --- a/pkg/sql/copy.go +++ b/pkg/sql/copy.go @@ -14,7 +14,6 @@ import ( "bytes" "context" "encoding/binary" - "encoding/csv" "io" "strconv" "strings" @@ -32,6 +31,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" "github.com/cockroachdb/cockroach/pkg/sql/sessiondatapb" "github.com/cockroachdb/cockroach/pkg/sql/types" + "github.com/cockroachdb/cockroach/pkg/util/encoding/csv" "github.com/cockroachdb/cockroach/pkg/util/log" "github.com/cockroachdb/cockroach/pkg/util/mon" "github.com/cockroachdb/errors" From 149a1db96ed8ee4bfac341e6910a3b36ae6fd694 Mon Sep 17 00:00:00 2001 From: Oliver Tan Date: Wed, 23 Mar 2022 14:32:36 +1100 Subject: [PATCH 2/5] parser: add unimplemented syntax for all the COPY options Release note: None --- docs/generated/sql/bnf/stmt_block.bnf | 3 ++ pkg/sql/parser/parse_test.go | 10 ++++++ pkg/sql/parser/sql.y | 45 +++++++++++++++++++++++++-- 3 files changed, 55 insertions(+), 3 deletions(-) diff --git a/docs/generated/sql/bnf/stmt_block.bnf b/docs/generated/sql/bnf/stmt_block.bnf index 8778db7b1233..ce1a2915ade0 100644 --- a/docs/generated/sql/bnf/stmt_block.bnf +++ b/docs/generated/sql/bnf/stmt_block.bnf @@ -1008,6 +1008,7 @@ unreserved_keyword ::= | 'FORCE_INDEX' | 'FORCE_ZIGZAG' | 'FORWARD' + | 'FREEZE' | 'FUNCTION' | 'FUNCTIONS' | 'GENERATED' @@ -1023,6 +1024,7 @@ unreserved_keyword ::= | 'GRANTS' | 'GROUPS' | 'HASH' + | 'HEADER' | 'HIGH' | 'HISTOGRAM' | 'HOLD' @@ -1158,6 +1160,7 @@ unreserved_keyword ::= | 'PUBLICATION' | 'QUERIES' | 'QUERY' + | 'QUOTE' | 'RANGE' | 'RANGES' | 'READ' diff --git a/pkg/sql/parser/parse_test.go b/pkg/sql/parser/parse_test.go index 76f3216484ca..eca92f5b6608 100644 --- a/pkg/sql/parser/parse_test.go +++ b/pkg/sql/parser/parse_test.go @@ -399,6 +399,16 @@ func TestUnimplementedSyntax(t *testing.T) { {`COMMENT ON EXTENSION a`, 74777, `comment on extension`, ``}, {`COMMENT ON FUNCTION f() is 'f'`, 17511, ``, ``}, + + {`COPY t FROM STDIN OIDS`, 41608, `oids`, ``}, + {`COPY t FROM STDIN FREEZE`, 41608, `freeze`, ``}, + {`COPY t FROM STDIN HEADER`, 41608, `header`, ``}, + {`COPY t FROM STDIN ENCODING 'utf-8'`, 41608, `encoding`, ``}, + {`COPY t FROM STDIN QUOTE 'x'`, 41608, `quote`, ``}, + {`COPY t FROM STDIN ESCAPE 'x'`, 41608, `escape`, ``}, + {`COPY t FROM STDIN FORCE QUOTE *`, 41608, `quote`, ``}, + {`COPY t FROM STDIN FORCE NULL *`, 41608, `force null`, ``}, + {`COPY t FROM STDIN FORCE NOT NULL *`, 41608, `force not null`, ``}, {`COPY x FROM STDIN WHERE a = b`, 54580, ``, ``}, {`ALTER AGGREGATE a`, 74775, `alter aggregate`, ``}, diff --git a/pkg/sql/parser/sql.y b/pkg/sql/parser/sql.y index 42074084cb30..b6d7c93edab0 100644 --- a/pkg/sql/parser/sql.y +++ b/pkg/sql/parser/sql.y @@ -826,13 +826,13 @@ func (u *sqlSymUnion) cursorStmt() tree.CursorStmt { %token FAILURE FALSE FAMILY FETCH FETCHVAL FETCHTEXT FETCHVAL_PATH FETCHTEXT_PATH %token FILES FILTER %token FIRST FLOAT FLOAT4 FLOAT8 FLOORDIV FOLLOWING FOR FORCE FORCE_INDEX FORCE_ZIGZAG -%token FOREIGN FORWARD FROM FULL FUNCTION FUNCTIONS +%token FOREIGN FORWARD FREEZE FROM FULL FUNCTION FUNCTIONS %token GENERATED GEOGRAPHY GEOMETRY GEOMETRYM GEOMETRYZ GEOMETRYZM %token GEOMETRYCOLLECTION GEOMETRYCOLLECTIONM GEOMETRYCOLLECTIONZ GEOMETRYCOLLECTIONZM %token GLOBAL GOAL GRANT GRANTS GREATEST GROUP GROUPING GROUPS -%token HAVING HASH HIGH HISTOGRAM HOLD HOUR +%token HAVING HASH HEADER HIGH HISTOGRAM HOLD HOUR %token IDENTITY %token IF IFERROR IFNULL IGNORE_FOREIGN_KEYS ILIKE IMMEDIATE IMPORT IN INCLUDE @@ -869,7 +869,7 @@ func (u *sqlSymUnion) cursorStmt() tree.CursorStmt { %token POSITION PRECEDING PRECISION PREPARE PRESERVE PRIMARY PRIOR PRIORITY PRIVILEGES %token PROCEDURAL PUBLIC PUBLICATION -%token QUERIES QUERY +%token QUERIES QUERY QUOTE %token RANGE RANGES READ REAL REASON REASSIGN RECURSIVE RECURRING REF REFERENCES REFRESH %token REGCLASS REGION REGIONAL REGIONS REGNAMESPACE REGPROC REGPROCEDURE REGROLE REGTYPE REINDEX @@ -3519,6 +3519,42 @@ copy_options: { $$.val = &tree.CopyOptions{Null: $2.expr()} } +| OIDS error + { + return unimplementedWithIssueDetail(sqllex, 41608, "oids") + } +| FREEZE error + { + return unimplementedWithIssueDetail(sqllex, 41608, "freeze") + } +| HEADER error + { + return unimplementedWithIssueDetail(sqllex, 41608, "header") + } +| QUOTE SCONST error + { + return unimplementedWithIssueDetail(sqllex, 41608, "quote") + } +| ESCAPE SCONST error + { + return unimplementedWithIssueDetail(sqllex, 41608, "escape") + } +| FORCE QUOTE error + { + return unimplementedWithIssueDetail(sqllex, 41608, "force quote") + } +| FORCE NOT NULL error + { + return unimplementedWithIssueDetail(sqllex, 41608, "force not null") + } +| FORCE NULL error + { + return unimplementedWithIssueDetail(sqllex, 41608, "force null") + } +| ENCODING SCONST error + { + return unimplementedWithIssueDetail(sqllex, 41608, "encoding") + } // %Help: CANCEL // %Category: Group @@ -14048,6 +14084,7 @@ unreserved_keyword: | FORCE_INDEX | FORCE_ZIGZAG | FORWARD +| FREEZE | FUNCTION | FUNCTIONS | GENERATED @@ -14063,6 +14100,7 @@ unreserved_keyword: | GRANTS | GROUPS | HASH +| HEADER | HIGH | HISTOGRAM | HOLD @@ -14198,6 +14236,7 @@ unreserved_keyword: | PUBLICATION | QUERIES | QUERY +| QUOTE | RANGE | RANGES | READ From a50353972ac29cf24ec4a5a1e2483d8534c30d45 Mon Sep 17 00:00:00 2001 From: Oliver Tan Date: Wed, 23 Mar 2022 14:41:32 +1100 Subject: [PATCH 3/5] tree: add pgcodes to multiple specified COPY options Release note: None --- pkg/sql/sem/tree/copy.go | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/pkg/sql/sem/tree/copy.go b/pkg/sql/sem/tree/copy.go index 1bdd2ff78f61..8e80314fba9c 100644 --- a/pkg/sql/sem/tree/copy.go +++ b/pkg/sql/sem/tree/copy.go @@ -10,7 +10,10 @@ package tree -import "github.com/cockroachdb/errors" +import ( + "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode" + "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror" +) // CopyFrom represents a COPY FROM statement. type CopyFrom struct { @@ -100,25 +103,25 @@ func (o CopyOptions) IsDefault() bool { func (o *CopyOptions) CombineWith(other *CopyOptions) error { if other.Destination != nil { if o.Destination != nil { - return errors.New("destination option specified multiple times") + return pgerror.Newf(pgcode.Syntax, "destination option specified multiple times") } o.Destination = other.Destination } if other.CopyFormat != CopyFormatText { if o.CopyFormat != CopyFormatText { - return errors.New("format option specified multiple times") + return pgerror.Newf(pgcode.Syntax, "format option specified multiple times") } o.CopyFormat = other.CopyFormat } if other.Delimiter != nil { if o.Delimiter != nil { - return errors.New("delimiter option specified multiple times") + return pgerror.Newf(pgcode.Syntax, "delimiter option specified multiple times") } o.Delimiter = other.Delimiter } if other.Null != nil { if o.Null != nil { - return errors.New("null option specified multiple times") + return pgerror.Newf(pgcode.Syntax, "null option specified multiple times") } o.Null = other.Null } From bbf0ab385e1ef920d5183e194a15a4e2e6b5bfe9 Mon Sep 17 00:00:00 2001 From: Oliver Tan Date: Thu, 24 Mar 2022 08:41:10 +1100 Subject: [PATCH 4/5] encoding/csv: add escape logic Add `ESCAPE` logic to the `encoding/csv` package, for exposure to SQL at a later stage. It is worth noting I wrote this in the "safest" backportable way possible. Ideally we'd rewrite the read logic to be more "parser"-like to account for the change in QUOTE case, but that's a lot riskier to backport. Release note: None --- pkg/util/encoding/csv/reader.go | 59 ++++++++++++++++++++++++++-- pkg/util/encoding/csv/reader_test.go | 32 ++++++++++++++- pkg/util/encoding/csv/writer.go | 10 +++-- pkg/util/encoding/csv/writer_test.go | 5 +++ 4 files changed, 98 insertions(+), 8 deletions(-) diff --git a/pkg/util/encoding/csv/reader.go b/pkg/util/encoding/csv/reader.go index 1f002d369ef1..7652a1b1f188 100644 --- a/pkg/util/encoding/csv/reader.go +++ b/pkg/util/encoding/csv/reader.go @@ -129,6 +129,10 @@ type Reader struct { // It is set to comma (',') by NewReader. Comma rune + // Escape, if unset, is the character used to escape certain characters + // (e.g. `"` (Quote), `,`) and itself. + Escape rune + // Comment, if not 0, is the comment character. Lines beginning with the // Comment character without preceding whitespace are ignored. // With leading whitespace the Comment character becomes part of the @@ -181,8 +185,9 @@ type Reader struct { // NewReader returns a new Reader that reads from r. func NewReader(r io.Reader) *Reader { return &Reader{ - Comma: ',', - r: bufio.NewReader(r), + Comma: ',', + Escape: '"', + r: bufio.NewReader(r), } } @@ -264,6 +269,36 @@ func nextRune(b []byte) rune { return r } +func (r *Reader) stripEscapeForReadRecord(in []byte) (ret []byte, trailingEscape bool) { + // Special speedup: calls to this always assume `"` escape characters should + // have no "s in the incoming byte array, so we can just return the byte + // array back. + if r.Escape == '"' { + return in, false + } + ret = make([]byte, 0, len(in)) + curr := 0 + for curr < len(in) { + ru, l := utf8.DecodeRune(in[curr:]) + next := curr + l + if ru == r.Escape { + if next >= len(in) { + return ret, true + } + // Look at the next character. + // We only escape the escape character itself and the `"` character. + nextRu, nextRuLength := utf8.DecodeRune(in[next:]) + if nextRu == r.Escape || nextRu == '"' { + curr = next + next = curr + nextRuLength + } + } + ret = append(ret, in[curr:next]...) + curr = next + } + return ret, false +} + func (r *Reader) readRecord(dst []string) ([]string, error) { if r.Comma == r.Comment || !validDelim(r.Comma) || (r.Comment != 0 && !validDelim(r.Comment)) { return nil, errInvalidDelim @@ -331,11 +366,27 @@ parseField: for { i := bytes.IndexByte(line, '"') if i >= 0 { - // Hit next quote. - r.recordBuffer = append(r.recordBuffer, line[:i]...) + // Note hasTrailingEscape is only true for escape characters that + // are not " - if it is ", IndexByte would guarantee there are no + // " characters beforehand. + contents, hasTrailingEscape := r.stripEscapeForReadRecord(line[:i]) + r.recordBuffer = append(r.recordBuffer, contents...) line = line[i+quoteLen:] + // If we are at a `"` character, and we have a character before + // that is an escape character, we are hitting a single " char. + if r.Escape != '"' && hasTrailingEscape { + r.recordBuffer = append(r.recordBuffer, '"') + continue + } + // Hit next quote. switch rn := nextRune(line); { case rn == '"': + // Do not expect "" if the escape character is different. + if r.Escape != '"' { + col := utf8.RuneCount(fullLine[:len(fullLine)-len(line)-quoteLen]) + err = &ParseError{StartLine: recLine, Line: r.numLine, Column: col, Err: ErrQuote} + break parseField + } // `""` sequence (append quote). r.recordBuffer = append(r.recordBuffer, '"') line = line[quoteLen:] diff --git a/pkg/util/encoding/csv/reader_test.go b/pkg/util/encoding/csv/reader_test.go index 8f9a270ba44c..164a84eec154 100644 --- a/pkg/util/encoding/csv/reader_test.go +++ b/pkg/util/encoding/csv/reader_test.go @@ -31,6 +31,7 @@ func TestRead(t *testing.T) { // These fields are copied into the Reader Comma rune + Escape rune Comment rune UseFieldsPerRecord bool // false (default) means FieldsPerRecord is -1 FieldsPerRecord int @@ -388,6 +389,32 @@ x,,, Comma: 'X', Comment: 'X', Error: errInvalidDelim, + }, { + Name: "EscapeText", + Escape: 'x', + Input: `"x"",",","xxx"",x,"xxxx,"` + "\n", + Output: [][]string{{`"`, `,`, `x"`, `x`, `xx,`}}, + }, { + Name: "EscapeTextWithComma", + Escape: 'x', + Comma: 'x', + Input: `"x""x,x"xxx""x"xx"x"xxxx,"` + "\n", + Output: [][]string{{`"`, `,`, `x"`, `x`, `xx,`}}, + }, { + Name: "EscapeTextWithNonEscapingCharacter", + Escape: 'x', + Input: `"xxx,xa",",x,"` + "\n", + Output: [][]string{{`xx,xa`, `,x,`}}, + }, { + Name: "EscapeTextWithComma", + Escape: 'x', + Input: `a,"""` + "\n", + Error: &ParseError{StartLine: 1, Line: 1, Column: 3, Err: ErrQuote}, + }, { + Name: "EscapeTrailingQuote", + Escape: 'x', + Input: `"x"` + "\n", + Error: &ParseError{StartLine: 1, Line: 2, Column: 0, Err: ErrQuote}, }} for _, tt := range tests { @@ -397,6 +424,9 @@ x,,, if tt.Comma != 0 { r.Comma = tt.Comma } + if tt.Escape != 0 { + r.Escape = tt.Escape + } r.Comment = tt.Comment if tt.UseFieldsPerRecord { r.FieldsPerRecord = tt.FieldsPerRecord @@ -411,7 +441,7 @@ x,,, if !reflect.DeepEqual(err, tt.Error) { t.Errorf("ReadAll() error:\ngot %v\nwant %v", err, tt.Error) } else if !reflect.DeepEqual(out, tt.Output) { - t.Errorf("ReadAll() output:\ngot %q\nwant %q", out, tt.Output) + t.Errorf("ReadAll() output:\ngot %#v\nwant %#v", out, tt.Output) } // Check that the error can be rendered. diff --git a/pkg/util/encoding/csv/writer.go b/pkg/util/encoding/csv/writer.go index c559e9b4b63a..868991dce6cd 100644 --- a/pkg/util/encoding/csv/writer.go +++ b/pkg/util/encoding/csv/writer.go @@ -33,6 +33,7 @@ import ( // If UseCRLF is true, the Writer ends each record with \r\n instead of \n. type Writer struct { Comma rune // Field delimiter (set to ',' by NewWriter) + Escape rune UseCRLF bool // True to use \r\n as the line terminator w *bufio.Writer } @@ -40,8 +41,9 @@ type Writer struct { // NewWriter returns a new Writer that writes to w. func NewWriter(w io.Writer) *Writer { return &Writer{ - Comma: ',', - w: bufio.NewWriter(w), + Comma: ',', + Escape: '"', + w: bufio.NewWriter(w), } } @@ -75,7 +77,9 @@ func (w *Writer) Write(record []string) error { var err error switch r1 { case '"': - _, err = w.w.WriteString(`""`) + _, err = w.w.WriteString(string(w.Escape) + `"`) + case w.Escape: + _, err = w.w.WriteString(string(w.Escape) + string(w.Escape)) case '\r': if !w.UseCRLF { err = w.w.WriteByte('\r') diff --git a/pkg/util/encoding/csv/writer_test.go b/pkg/util/encoding/csv/writer_test.go index d57a7f3d15d8..9542d3be8456 100644 --- a/pkg/util/encoding/csv/writer_test.go +++ b/pkg/util/encoding/csv/writer_test.go @@ -24,6 +24,7 @@ import ( var writeTests = []struct { Input [][]string Output string + Escape rune UseCRLF bool }{ {Input: [][]string{{"abc"}}, Output: "abc\n"}, @@ -50,6 +51,7 @@ var writeTests = []struct { {Input: [][]string{{"a", "a", ""}}, Output: "a,a,\n"}, {Input: [][]string{{"a", "a", "a"}}, Output: "a,a,a\n"}, {Input: [][]string{{`\.`}}, Output: "\"\\.\"\n"}, + {Input: [][]string{{`"`, `,`, `x"`, `x`, `xx,`}}, Escape: 'x', Output: `"x"",",","xxx"",x,"xxxx,"` + "\n"}, } func TestWrite(t *testing.T) { @@ -57,6 +59,9 @@ func TestWrite(t *testing.T) { b := &bytes.Buffer{} f := NewWriter(b) f.UseCRLF = tt.UseCRLF + if tt.Escape != 0 { + f.Escape = tt.Escape + } err := f.WriteAll(tt.Input) if err != nil { t.Errorf("Unexpected error: %s\n", err) From 7e2c7f4bcacfc13160746f91159dbdf798b330ca Mon Sep 17 00:00:00 2001 From: Oliver Tan Date: Thu, 24 Mar 2022 11:39:09 +1100 Subject: [PATCH 5/5] sql: implement COPY FROM ... ESCAPE ... Release note (sql change): Implemented the `COPY FROM ... ESCAPE ...` syntax. --- pkg/sql/copy.go | 22 +++++++++++ pkg/sql/parser/parse_test.go | 1 - pkg/sql/parser/sql.y | 4 +- pkg/sql/parser/testdata/copy | 8 ++++ pkg/sql/pgwire/testdata/pgtest/copy | 60 ++++++++++++++++++++++++++--- pkg/sql/sem/tree/copy.go | 12 ++++++ 6 files changed, 99 insertions(+), 8 deletions(-) diff --git a/pkg/sql/copy.go b/pkg/sql/copy.go index 5ecbbc017a1f..da0b031f7189 100644 --- a/pkg/sql/copy.go +++ b/pkg/sql/copy.go @@ -59,6 +59,7 @@ type copyMachine struct { columns tree.NameList resultColumns colinfo.ResultColumns format tree.CopyFormat + csvEscape rune delimiter byte // textDelim is delimiter converted to a []byte so that we don't have to do that per row. textDelim []byte @@ -174,6 +175,24 @@ func newCopyMachine( return nil, err } } + if n.Options.Escape != nil { + s := n.Options.Escape.RawString() + if len(s) != 1 { + return nil, pgerror.Newf( + pgcode.FeatureNotSupported, + "ESCAPE must be a single rune", + ) + } + + if c.format != tree.CopyFormatCSV { + return nil, pgerror.Newf( + pgcode.FeatureNotSupported, + "ESCAPE can only be specified for CSV", + ) + } + + c.csvEscape, _ = utf8.DecodeRuneInString(s) + } flags := tree.ObjectLookupFlagsWithRequiredTableKind(tree.ResolveRequireTableDesc) _, tableDesc, err := resolver.ResolveExistingTableObject(ctx, &c.p, &n.Table, flags) @@ -250,6 +269,9 @@ func (c *copyMachine) run(ctx context.Context) error { c.csvReader.Comma = rune(c.delimiter) c.csvReader.ReuseRecord = true c.csvReader.FieldsPerRecord = len(c.resultColumns) + if c.csvEscape != 0 { + c.csvReader.Escape = c.csvEscape + } } Loop: diff --git a/pkg/sql/parser/parse_test.go b/pkg/sql/parser/parse_test.go index eca92f5b6608..39117955d34c 100644 --- a/pkg/sql/parser/parse_test.go +++ b/pkg/sql/parser/parse_test.go @@ -405,7 +405,6 @@ func TestUnimplementedSyntax(t *testing.T) { {`COPY t FROM STDIN HEADER`, 41608, `header`, ``}, {`COPY t FROM STDIN ENCODING 'utf-8'`, 41608, `encoding`, ``}, {`COPY t FROM STDIN QUOTE 'x'`, 41608, `quote`, ``}, - {`COPY t FROM STDIN ESCAPE 'x'`, 41608, `escape`, ``}, {`COPY t FROM STDIN FORCE QUOTE *`, 41608, `quote`, ``}, {`COPY t FROM STDIN FORCE NULL *`, 41608, `force null`, ``}, {`COPY t FROM STDIN FORCE NOT NULL *`, 41608, `force not null`, ``}, diff --git a/pkg/sql/parser/sql.y b/pkg/sql/parser/sql.y index b6d7c93edab0..6fa09501c756 100644 --- a/pkg/sql/parser/sql.y +++ b/pkg/sql/parser/sql.y @@ -3531,13 +3531,13 @@ copy_options: { return unimplementedWithIssueDetail(sqllex, 41608, "header") } -| QUOTE SCONST error +| QUOTE SCONST { return unimplementedWithIssueDetail(sqllex, 41608, "quote") } | ESCAPE SCONST error { - return unimplementedWithIssueDetail(sqllex, 41608, "escape") + $$.val = &tree.CopyOptions{Escape: tree.NewStrVal($2)} } | FORCE QUOTE error { diff --git a/pkg/sql/parser/testdata/copy b/pkg/sql/parser/testdata/copy index 9dc596dda89d..9d3fcbfe5e6b 100644 --- a/pkg/sql/parser/testdata/copy +++ b/pkg/sql/parser/testdata/copy @@ -77,3 +77,11 @@ COPY t (a, b, c) FROM STDIN WITH CSV DELIMITER ' ' destination = 'filename' -- n COPY t (a, b, c) FROM STDIN WITH CSV DELIMITER (' ') destination = ('filename') -- fully parenthesized COPY t (a, b, c) FROM STDIN WITH CSV DELIMITER '_' destination = '_' -- literals removed COPY _ (_, _, _) FROM STDIN WITH CSV DELIMITER ' ' destination = 'filename' -- identifiers removed + +parse +COPY t (a, b, c) FROM STDIN destination = 'filename' CSV DELIMITER ' ' ESCAPE 'x' +---- +COPY t (a, b, c) FROM STDIN WITH CSV DELIMITER ' ' destination = 'filename' ESCAPE 'x' -- normalized! +COPY t (a, b, c) FROM STDIN WITH CSV DELIMITER (' ') destination = ('filename') ESCAPE ('x') -- fully parenthesized +COPY t (a, b, c) FROM STDIN WITH CSV DELIMITER '_' destination = '_' ESCAPE '_' -- literals removed +COPY _ (_, _, _) FROM STDIN WITH CSV DELIMITER ' ' destination = 'filename' ESCAPE 'x' -- identifiers removed diff --git a/pkg/sql/pgwire/testdata/pgtest/copy b/pkg/sql/pgwire/testdata/pgtest/copy index 843d9a5eb54c..450307b52a01 100644 --- a/pkg/sql/pgwire/testdata/pgtest/copy +++ b/pkg/sql/pgwire/testdata/pgtest/copy @@ -46,6 +46,29 @@ ReadyForQuery {"Type":"CommandComplete","CommandTag":"SELECT 3"} {"Type":"ReadyForQuery","TxStatus":"I"} +# Invalid ESCAPE syntax. +send +Query {"String": "COPY t FROM STDIN ESCAPE 'xxx'"} +---- + +until +ErrorResponse +ReadyForQuery +---- +{"Type":"ErrorResponse","Code":"0A000"} +{"Type":"ReadyForQuery","TxStatus":"I"} + +send +Query {"String": "COPY t FROM STDIN ESCAPE 'x'"} +---- + +until +ErrorResponse +ReadyForQuery +---- +{"Type":"ErrorResponse","Code":"0A000"} +{"Type":"ReadyForQuery","TxStatus":"I"} + # Wrong number of columns. send Query {"String": "COPY t FROM STDIN"} @@ -63,12 +86,13 @@ ReadyForQuery {"Type":"ReadyForQuery","TxStatus":"I"} # Verify that only one COPY can run at once. -send +# This crashes PG, so only run on CRDB. +send crdb_only Query {"String": "COPY t FROM STDIN"} Query {"String": "COPY t FROM STDIN"} ---- -until +until crdb_only ErrorResponse ReadyForQuery ---- @@ -77,12 +101,13 @@ ReadyForQuery {"Type":"ReadyForQuery","TxStatus":"I"} # Verify that after a COPY has started another statement cannot run. -send +# This crashes PG, so only run on CRDB. +send crdb_only Query {"String": "COPY t FROM STDIN"} Query {"String": "SELECT 2"} ---- -until ignore=RowDescription +until ignore=RowDescription crdb_only ErrorResponse ReadyForQuery ---- @@ -248,6 +273,31 @@ ReadyForQuery {"Type":"CommandComplete","CommandTag":"SELECT 3"} {"Type":"ReadyForQuery","TxStatus":"I"} +send +Query {"String": "DELETE FROM t"} +Query {"String": "COPY t FROM STDIN CSV ESCAPE 'x'"} +CopyData {"Data": "1,\"x\"\"\n"} +CopyData {"Data": "1,\"xxx\",xx\"\n"} +CopyData {"Data": "\\.\n"} +CopyDone +Query {"String": "SELECT * FROM t ORDER BY i"} +---- + +until ignore=RowDescription +ReadyForQuery +ReadyForQuery +ReadyForQuery +---- +{"Type":"CommandComplete","CommandTag":"DELETE 3"} +{"Type":"ReadyForQuery","TxStatus":"I"} +{"Type":"CopyInResponse","ColumnFormatCodes":[0,0]} +{"Type":"CommandComplete","CommandTag":"COPY 2"} +{"Type":"ReadyForQuery","TxStatus":"I"} +{"Type":"DataRow","Values":[{"text":"1"},{"text":"\""}]} +{"Type":"DataRow","Values":[{"text":"1"},{"text":"x\",x"}]} +{"Type":"CommandComplete","CommandTag":"SELECT 2"} +{"Type":"ReadyForQuery","TxStatus":"I"} + send Query {"String": "COPY t FROM STDIN CSV"} CopyData {"Data": "1\n"} @@ -276,7 +326,7 @@ ReadyForQuery ReadyForQuery ReadyForQuery ---- -{"Type":"CommandComplete","CommandTag":"DELETE 3"} +{"Type":"CommandComplete","CommandTag":"DELETE 2"} {"Type":"ReadyForQuery","TxStatus":"I"} {"Type":"CopyInResponse","ColumnFormatCodes":[0,0]} {"Type":"CommandComplete","CommandTag":"COPY 3"} diff --git a/pkg/sql/sem/tree/copy.go b/pkg/sql/sem/tree/copy.go index 8e80314fba9c..a2af1539c345 100644 --- a/pkg/sql/sem/tree/copy.go +++ b/pkg/sql/sem/tree/copy.go @@ -29,6 +29,7 @@ type CopyOptions struct { CopyFormat CopyFormat Delimiter Expr Null Expr + Escape *StrVal } var _ NodeFormatter = &CopyOptions{} @@ -91,6 +92,11 @@ func (o *CopyOptions) Format(ctx *FmtCtx) { ctx.FormatNode(o.Destination) addSep = true } + if o.Escape != nil { + maybeAddSep() + ctx.WriteString("ESCAPE ") + ctx.FormatNode(o.Escape) + } } // IsDefault returns true if this struct has default value. @@ -125,6 +131,12 @@ func (o *CopyOptions) CombineWith(other *CopyOptions) error { } o.Null = other.Null } + if other.Escape != nil { + if o.Escape != nil { + return pgerror.Newf(pgcode.Syntax, "escape option specified multiple times") + } + o.Escape = other.Escape + } return nil }