Skip to content

Commit

Permalink
parser: support COPY ... FROM CSV HEADER
Browse files Browse the repository at this point in the history
Release note (sql change): COPY ... FROM CSV HEADER is now supported.
  • Loading branch information
otan committed Jun 6, 2022
1 parent d8d995d commit c6dc652
Show file tree
Hide file tree
Showing 7 changed files with 86 additions and 13 deletions.
1 change: 1 addition & 0 deletions docs/generated/sql/bnf/stmt_block.bnf
Original file line number Diff line number Diff line change
Expand Up @@ -1860,6 +1860,7 @@ copy_options ::=
| 'CSV'
| 'DELIMITER' string_or_placeholder
| 'NULL' string_or_placeholder
| 'HEADER'

db_object_name_component ::=
name
Expand Down
22 changes: 18 additions & 4 deletions pkg/sql/copy.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ type copyMachine struct {
textDelim []byte
null string
binaryState binaryState
// csvExpectHeader is true if we are expecting a header for the CSV input.
csvExpectHeader bool
// forceNotNull disables converting values matching the null string to
// NULL. The spec says this is only supported for CSV, and also must specify
// which columns it applies to.
Expand Down Expand Up @@ -121,10 +123,11 @@ func newCopyMachine(
conn: conn,
// TODO(georgiah): Currently, insertRows depends on Table and Columns,
// but that dependency can be removed by refactoring it.
table: &n.Table,
columns: n.Columns,
format: n.Options.CopyFormat,
txnOpt: txnOpt,
table: &n.Table,
columns: n.Columns,
format: n.Options.CopyFormat,
txnOpt: txnOpt,
csvExpectHeader: n.Options.Header,
// The planner will be prepared before use.
p: planner{execCfg: execCfg, alloc: &tree.DatumAlloc{}},
execInsertPlan: execInsertPlan,
Expand All @@ -147,6 +150,10 @@ func newCopyMachine(
c.delimiter = ','
}

if n.Options.Header && c.format != tree.CopyFormatCSV {
return nil, pgerror.Newf(pgcode.FeatureNotSupported, "HEADER only supported with CSV format")
}

if n.Options.Delimiter != nil {
if c.format == tree.CopyFormatBinary {
return nil, errors.Newf("DELIMITER unsupported in BINARY format")
Expand Down Expand Up @@ -471,6 +478,13 @@ func (c *copyMachine) readCSVData(ctx context.Context, final bool) (brk bool, er
}
}

// If we are using COPY FROM and expecting a header, PostgreSQL ignores
// the header row in all circumstances. Do the same.
if c.csvExpectHeader {
c.csvExpectHeader = false
return false, nil
}

c.csvInput.Write(fullLine)
record, err := c.csvReader.Read()
// Look for end of data before checking for errors, since a field count
Expand Down
1 change: 0 additions & 1 deletion pkg/sql/parser/parse_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -402,7 +402,6 @@ func TestUnimplementedSyntax(t *testing.T) {

{`COPY t FROM STDIN OIDS`, 41608, `oids`, ``},
{`COPY t FROM STDIN FREEZE`, 41608, `freeze`, ``},
{`COPY t FROM STDIN HEADER`, 41608, `header`, ``},
{`COPY t FROM STDIN ENCODING 'utf-8'`, 41608, `encoding`, ``},
{`COPY t FROM STDIN QUOTE 'x'`, 41608, `quote`, ``},
{`COPY t FROM STDIN FORCE QUOTE *`, 41608, `quote`, ``},
Expand Down
4 changes: 2 additions & 2 deletions pkg/sql/parser/sql.y
Original file line number Diff line number Diff line change
Expand Up @@ -3560,9 +3560,9 @@ copy_options:
{
return unimplementedWithIssueDetail(sqllex, 41608, "freeze")
}
| HEADER error
| HEADER
{
return unimplementedWithIssueDetail(sqllex, 41608, "header")
$$.val = &tree.CopyOptions{Header: true}
}
| QUOTE SCONST
{
Expand Down
10 changes: 5 additions & 5 deletions pkg/sql/parser/testdata/copy
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,9 @@ COPY t (a, b, c) FROM STDIN WITH CSV DELIMITER '_' destination = '_' -- literals
COPY _ (_, _, _) FROM STDIN WITH CSV DELIMITER ' ' destination = 'filename' -- identifiers removed

parse
COPY t (a, b, c) FROM STDIN destination = 'filename' CSV DELIMITER ' ' ESCAPE 'x'
COPY t (a, b, c) FROM STDIN destination = 'filename' CSV DELIMITER ' ' ESCAPE 'x' HEADER
----
COPY t (a, b, c) FROM STDIN WITH CSV DELIMITER ' ' destination = 'filename' ESCAPE 'x' -- normalized!
COPY t (a, b, c) FROM STDIN WITH CSV DELIMITER (' ') destination = ('filename') ESCAPE ('x') -- fully parenthesized
COPY t (a, b, c) FROM STDIN WITH CSV DELIMITER '_' destination = '_' ESCAPE '_' -- literals removed
COPY _ (_, _, _) FROM STDIN WITH CSV DELIMITER ' ' destination = 'filename' ESCAPE 'x' -- identifiers removed
COPY t (a, b, c) FROM STDIN WITH CSV DELIMITER ' ' destination = 'filename' ESCAPE 'x' HEADER -- normalized!
COPY t (a, b, c) FROM STDIN WITH CSV DELIMITER (' ') destination = ('filename') ESCAPE ('x') HEADER -- fully parenthesized
COPY t (a, b, c) FROM STDIN WITH CSV DELIMITER '_' destination = '_' ESCAPE '_' HEADER -- literals removed
COPY _ (_, _, _) FROM STDIN WITH CSV DELIMITER ' ' destination = 'filename' ESCAPE 'x' HEADER -- identifiers removed
53 changes: 52 additions & 1 deletion pkg/sql/pgwire/testdata/pgtest/copy
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,18 @@ ReadyForQuery
{"Type":"ErrorResponse","Code":"0A000"}
{"Type":"ReadyForQuery","TxStatus":"I"}

# Header without CSV.
send
Query {"String": "COPY t FROM STDIN HEADER"}
----

until
ErrorResponse
ReadyForQuery
----
{"Type":"ErrorResponse","Code":"0A000"}
{"Type":"ReadyForQuery","TxStatus":"I"}

# Wrong number of columns.
send
Query {"String": "COPY t FROM STDIN"}
Expand Down Expand Up @@ -322,6 +334,45 @@ ReadyForQuery
{"Type":"CommandComplete","CommandTag":"SELECT 2"}
{"Type":"ReadyForQuery","TxStatus":"I"}

send
Query {"String": "DELETE FROM t"}
Query {"String": "COPY t FROM STDIN HEADER CSV"}
CopyData {"Data": "\\.\n"}
CopyDone
Query {"String": "COPY t FROM STDIN HEADER CSV"}
CopyData {"Data": "justonelinewithheader\n"}
CopyData {"Data": "\\.\n"}
CopyDone
Query {"String": "COPY t FROM STDIN HEADER CSV"}
CopyData {"Data": "ignore,this,entire,line\n"}
CopyData {"Data": "1,blah\n"}
CopyData {"Data": "\\.\n"}
CopyDone
Query {"String": "SELECT * FROM t ORDER BY i"}
----

until ignore=RowDescription
ReadyForQuery
ReadyForQuery
ReadyForQuery
ReadyForQuery
ReadyForQuery
----
{"Type":"CommandComplete","CommandTag":"DELETE 2"}
{"Type":"ReadyForQuery","TxStatus":"I"}
{"Type":"CopyInResponse","ColumnFormatCodes":[0,0]}
{"Type":"CommandComplete","CommandTag":"COPY 0"}
{"Type":"ReadyForQuery","TxStatus":"I"}
{"Type":"CopyInResponse","ColumnFormatCodes":[0,0]}
{"Type":"CommandComplete","CommandTag":"COPY 0"}
{"Type":"ReadyForQuery","TxStatus":"I"}
{"Type":"CopyInResponse","ColumnFormatCodes":[0,0]}
{"Type":"CommandComplete","CommandTag":"COPY 1"}
{"Type":"ReadyForQuery","TxStatus":"I"}
{"Type":"DataRow","Values":[{"text":"1"},{"text":"blah"}]}
{"Type":"CommandComplete","CommandTag":"SELECT 1"}
{"Type":"ReadyForQuery","TxStatus":"I"}

send
Query {"String": "DELETE FROM t"}
Query {"String": "COPY t FROM STDIN NULL 'NS'"}
Expand All @@ -338,7 +389,7 @@ ReadyForQuery
ReadyForQuery
ReadyForQuery
----
{"Type":"CommandComplete","CommandTag":"DELETE 2"}
{"Type":"CommandComplete","CommandTag":"DELETE 1"}
{"Type":"ReadyForQuery","TxStatus":"I"}
{"Type":"CopyInResponse","ColumnFormatCodes":[0,0]}
{"Type":"CommandComplete","CommandTag":"COPY 3"}
Expand Down
8 changes: 8 additions & 0 deletions pkg/sql/sem/tree/copy.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ type CopyOptions struct {
Delimiter Expr
Null Expr
Escape *StrVal
Header bool
}

var _ NodeFormatter = &CopyOptions{}
Expand Down Expand Up @@ -97,6 +98,10 @@ func (o *CopyOptions) Format(ctx *FmtCtx) {
ctx.WriteString("ESCAPE ")
ctx.FormatNode(o.Escape)
}
if o.Header {
maybeAddSep()
ctx.WriteString("HEADER")
}
}

// IsDefault returns true if this struct has default value.
Expand Down Expand Up @@ -137,6 +142,9 @@ func (o *CopyOptions) CombineWith(other *CopyOptions) error {
}
o.Escape = other.Escape
}
if other.Header {
o.Header = true
}
return nil
}

Expand Down

0 comments on commit c6dc652

Please sign in to comment.