From c6dc65272d5e779b81576e068dc50edbe6f13edc Mon Sep 17 00:00:00 2001 From: Oliver Tan Date: Mon, 6 Jun 2022 13:51:48 +1000 Subject: [PATCH 1/2] parser: support COPY ... FROM CSV HEADER Release note (sql change): COPY ... FROM CSV HEADER is now supported. --- docs/generated/sql/bnf/stmt_block.bnf | 1 + pkg/sql/copy.go | 22 +++++++++-- pkg/sql/parser/parse_test.go | 1 - pkg/sql/parser/sql.y | 4 +- pkg/sql/parser/testdata/copy | 10 ++--- pkg/sql/pgwire/testdata/pgtest/copy | 53 ++++++++++++++++++++++++++- pkg/sql/sem/tree/copy.go | 8 ++++ 7 files changed, 86 insertions(+), 13 deletions(-) diff --git a/docs/generated/sql/bnf/stmt_block.bnf b/docs/generated/sql/bnf/stmt_block.bnf index fd4e867cd510..a984aabe5f85 100644 --- a/docs/generated/sql/bnf/stmt_block.bnf +++ b/docs/generated/sql/bnf/stmt_block.bnf @@ -1860,6 +1860,7 @@ copy_options ::= | 'CSV' | 'DELIMITER' string_or_placeholder | 'NULL' string_or_placeholder + | 'HEADER' db_object_name_component ::= name diff --git a/pkg/sql/copy.go b/pkg/sql/copy.go index 8d2aa7efbdee..f87e3b082c28 100644 --- a/pkg/sql/copy.go +++ b/pkg/sql/copy.go @@ -67,6 +67,8 @@ type copyMachine struct { textDelim []byte null string binaryState binaryState + // csvExpectHeader is true if we are expecting a header for the CSV input. + csvExpectHeader bool // forceNotNull disables converting values matching the null string to // NULL. The spec says this is only supported for CSV, and also must specify // which columns it applies to. @@ -121,10 +123,11 @@ func newCopyMachine( conn: conn, // TODO(georgiah): Currently, insertRows depends on Table and Columns, // but that dependency can be removed by refactoring it. - table: &n.Table, - columns: n.Columns, - format: n.Options.CopyFormat, - txnOpt: txnOpt, + table: &n.Table, + columns: n.Columns, + format: n.Options.CopyFormat, + txnOpt: txnOpt, + csvExpectHeader: n.Options.Header, // The planner will be prepared before use. p: planner{execCfg: execCfg, alloc: &tree.DatumAlloc{}}, execInsertPlan: execInsertPlan, @@ -147,6 +150,10 @@ func newCopyMachine( c.delimiter = ',' } + if n.Options.Header && c.format != tree.CopyFormatCSV { + return nil, pgerror.Newf(pgcode.FeatureNotSupported, "HEADER only supported with CSV format") + } + if n.Options.Delimiter != nil { if c.format == tree.CopyFormatBinary { return nil, errors.Newf("DELIMITER unsupported in BINARY format") @@ -471,6 +478,13 @@ func (c *copyMachine) readCSVData(ctx context.Context, final bool) (brk bool, er } } + // If we are using COPY FROM and expecting a header, PostgreSQL ignores + // the header row in all circumstances. Do the same. + if c.csvExpectHeader { + c.csvExpectHeader = false + return false, nil + } + c.csvInput.Write(fullLine) record, err := c.csvReader.Read() // Look for end of data before checking for errors, since a field count diff --git a/pkg/sql/parser/parse_test.go b/pkg/sql/parser/parse_test.go index c29ea5eaacca..3233f54cca1f 100644 --- a/pkg/sql/parser/parse_test.go +++ b/pkg/sql/parser/parse_test.go @@ -402,7 +402,6 @@ func TestUnimplementedSyntax(t *testing.T) { {`COPY t FROM STDIN OIDS`, 41608, `oids`, ``}, {`COPY t FROM STDIN FREEZE`, 41608, `freeze`, ``}, - {`COPY t FROM STDIN HEADER`, 41608, `header`, ``}, {`COPY t FROM STDIN ENCODING 'utf-8'`, 41608, `encoding`, ``}, {`COPY t FROM STDIN QUOTE 'x'`, 41608, `quote`, ``}, {`COPY t FROM STDIN FORCE QUOTE *`, 41608, `quote`, ``}, diff --git a/pkg/sql/parser/sql.y b/pkg/sql/parser/sql.y index cd1e4594e605..8d5e61a4c719 100644 --- a/pkg/sql/parser/sql.y +++ b/pkg/sql/parser/sql.y @@ -3560,9 +3560,9 @@ copy_options: { return unimplementedWithIssueDetail(sqllex, 41608, "freeze") } -| HEADER error +| HEADER { - return unimplementedWithIssueDetail(sqllex, 41608, "header") + $$.val = &tree.CopyOptions{Header: true} } | QUOTE SCONST { diff --git a/pkg/sql/parser/testdata/copy b/pkg/sql/parser/testdata/copy index 9d3fcbfe5e6b..b62e9971eb97 100644 --- a/pkg/sql/parser/testdata/copy +++ b/pkg/sql/parser/testdata/copy @@ -79,9 +79,9 @@ COPY t (a, b, c) FROM STDIN WITH CSV DELIMITER '_' destination = '_' -- literals COPY _ (_, _, _) FROM STDIN WITH CSV DELIMITER ' ' destination = 'filename' -- identifiers removed parse -COPY t (a, b, c) FROM STDIN destination = 'filename' CSV DELIMITER ' ' ESCAPE 'x' +COPY t (a, b, c) FROM STDIN destination = 'filename' CSV DELIMITER ' ' ESCAPE 'x' HEADER ---- -COPY t (a, b, c) FROM STDIN WITH CSV DELIMITER ' ' destination = 'filename' ESCAPE 'x' -- normalized! -COPY t (a, b, c) FROM STDIN WITH CSV DELIMITER (' ') destination = ('filename') ESCAPE ('x') -- fully parenthesized -COPY t (a, b, c) FROM STDIN WITH CSV DELIMITER '_' destination = '_' ESCAPE '_' -- literals removed -COPY _ (_, _, _) FROM STDIN WITH CSV DELIMITER ' ' destination = 'filename' ESCAPE 'x' -- identifiers removed +COPY t (a, b, c) FROM STDIN WITH CSV DELIMITER ' ' destination = 'filename' ESCAPE 'x' HEADER -- normalized! +COPY t (a, b, c) FROM STDIN WITH CSV DELIMITER (' ') destination = ('filename') ESCAPE ('x') HEADER -- fully parenthesized +COPY t (a, b, c) FROM STDIN WITH CSV DELIMITER '_' destination = '_' ESCAPE '_' HEADER -- literals removed +COPY _ (_, _, _) FROM STDIN WITH CSV DELIMITER ' ' destination = 'filename' ESCAPE 'x' HEADER -- identifiers removed diff --git a/pkg/sql/pgwire/testdata/pgtest/copy b/pkg/sql/pgwire/testdata/pgtest/copy index 4fec01e8ab87..a3681ea58532 100644 --- a/pkg/sql/pgwire/testdata/pgtest/copy +++ b/pkg/sql/pgwire/testdata/pgtest/copy @@ -212,6 +212,18 @@ ReadyForQuery {"Type":"ErrorResponse","Code":"0A000"} {"Type":"ReadyForQuery","TxStatus":"I"} +# Header without CSV. +send +Query {"String": "COPY t FROM STDIN HEADER"} +---- + +until +ErrorResponse +ReadyForQuery +---- +{"Type":"ErrorResponse","Code":"0A000"} +{"Type":"ReadyForQuery","TxStatus":"I"} + # Wrong number of columns. send Query {"String": "COPY t FROM STDIN"} @@ -322,6 +334,45 @@ ReadyForQuery {"Type":"CommandComplete","CommandTag":"SELECT 2"} {"Type":"ReadyForQuery","TxStatus":"I"} +send +Query {"String": "DELETE FROM t"} +Query {"String": "COPY t FROM STDIN HEADER CSV"} +CopyData {"Data": "\\.\n"} +CopyDone +Query {"String": "COPY t FROM STDIN HEADER CSV"} +CopyData {"Data": "justonelinewithheader\n"} +CopyData {"Data": "\\.\n"} +CopyDone +Query {"String": "COPY t FROM STDIN HEADER CSV"} +CopyData {"Data": "ignore,this,entire,line\n"} +CopyData {"Data": "1,blah\n"} +CopyData {"Data": "\\.\n"} +CopyDone +Query {"String": "SELECT * FROM t ORDER BY i"} +---- + +until ignore=RowDescription +ReadyForQuery +ReadyForQuery +ReadyForQuery +ReadyForQuery +ReadyForQuery +---- +{"Type":"CommandComplete","CommandTag":"DELETE 2"} +{"Type":"ReadyForQuery","TxStatus":"I"} +{"Type":"CopyInResponse","ColumnFormatCodes":[0,0]} +{"Type":"CommandComplete","CommandTag":"COPY 0"} +{"Type":"ReadyForQuery","TxStatus":"I"} +{"Type":"CopyInResponse","ColumnFormatCodes":[0,0]} +{"Type":"CommandComplete","CommandTag":"COPY 0"} +{"Type":"ReadyForQuery","TxStatus":"I"} +{"Type":"CopyInResponse","ColumnFormatCodes":[0,0]} +{"Type":"CommandComplete","CommandTag":"COPY 1"} +{"Type":"ReadyForQuery","TxStatus":"I"} +{"Type":"DataRow","Values":[{"text":"1"},{"text":"blah"}]} +{"Type":"CommandComplete","CommandTag":"SELECT 1"} +{"Type":"ReadyForQuery","TxStatus":"I"} + send Query {"String": "DELETE FROM t"} Query {"String": "COPY t FROM STDIN NULL 'NS'"} @@ -338,7 +389,7 @@ ReadyForQuery ReadyForQuery ReadyForQuery ---- -{"Type":"CommandComplete","CommandTag":"DELETE 2"} +{"Type":"CommandComplete","CommandTag":"DELETE 1"} {"Type":"ReadyForQuery","TxStatus":"I"} {"Type":"CopyInResponse","ColumnFormatCodes":[0,0]} {"Type":"CommandComplete","CommandTag":"COPY 3"} diff --git a/pkg/sql/sem/tree/copy.go b/pkg/sql/sem/tree/copy.go index a2af1539c345..85b160e008cc 100644 --- a/pkg/sql/sem/tree/copy.go +++ b/pkg/sql/sem/tree/copy.go @@ -30,6 +30,7 @@ type CopyOptions struct { Delimiter Expr Null Expr Escape *StrVal + Header bool } var _ NodeFormatter = &CopyOptions{} @@ -97,6 +98,10 @@ func (o *CopyOptions) Format(ctx *FmtCtx) { ctx.WriteString("ESCAPE ") ctx.FormatNode(o.Escape) } + if o.Header { + maybeAddSep() + ctx.WriteString("HEADER") + } } // IsDefault returns true if this struct has default value. @@ -137,6 +142,9 @@ func (o *CopyOptions) CombineWith(other *CopyOptions) error { } o.Escape = other.Escape } + if other.Header { + o.Header = true + } return nil } From 5ac480e3761cb0a685d9202e09a1ac02a4e8ff71 Mon Sep 17 00:00:00 2001 From: Oliver Tan Date: Thu, 9 Jun 2022 09:57:20 +1000 Subject: [PATCH 2/2] sql: improve error codes of COPY .. FROM with invalid options Release note: None --- pkg/sql/copy.go | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/pkg/sql/copy.go b/pkg/sql/copy.go index f87e3b082c28..1884639fd9e3 100644 --- a/pkg/sql/copy.go +++ b/pkg/sql/copy.go @@ -156,7 +156,10 @@ func newCopyMachine( if n.Options.Delimiter != nil { if c.format == tree.CopyFormatBinary { - return nil, errors.Newf("DELIMITER unsupported in BINARY format") + return nil, pgerror.Newf( + pgcode.Syntax, + "DELIMITER unsupported in BINARY format", + ) } fn, err := c.p.TypeAsString(ctx, n.Options.Delimiter, "COPY") if err != nil { @@ -167,13 +170,19 @@ func newCopyMachine( return nil, err } if len(delim) != 1 || !utf8.ValidString(delim) { - return nil, errors.Newf("delimiter must be a single-byte character") + return nil, pgerror.Newf( + pgcode.FeatureNotSupported, + "delimiter must be a single-byte character", + ) } c.delimiter = delim[0] } if n.Options.Null != nil { if c.format == tree.CopyFormatBinary { - return nil, errors.Newf("NULL unsupported in BINARY format") + return nil, pgerror.Newf( + pgcode.Syntax, + "NULL unsupported in BINARY format", + ) } fn, err := c.p.TypeAsString(ctx, n.Options.Null, "COPY") if err != nil {