Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
78303: sql: various COPY improvements r=rafiss,ajstorm a=otan

Refs: cockroachdb#41608

See individual commits for details. 


Co-authored-by: Oliver Tan <[email protected]>
  • Loading branch information
craig[bot] and otan committed Mar 24, 2022
2 parents 9f90d41 + 7e2c7f4 commit 8b36717
Show file tree
Hide file tree
Showing 12 changed files with 259 additions and 22 deletions.
3 changes: 3 additions & 0 deletions docs/generated/sql/bnf/stmt_block.bnf
Original file line number Diff line number Diff line change
Expand Up @@ -1008,6 +1008,7 @@ unreserved_keyword ::=
| 'FORCE_INDEX'
| 'FORCE_ZIGZAG'
| 'FORWARD'
| 'FREEZE'
| 'FUNCTION'
| 'FUNCTIONS'
| 'GENERATED'
Expand All @@ -1023,6 +1024,7 @@ unreserved_keyword ::=
| 'GRANTS'
| 'GROUPS'
| 'HASH'
| 'HEADER'
| 'HIGH'
| 'HISTOGRAM'
| 'HOLD'
Expand Down Expand Up @@ -1158,6 +1160,7 @@ unreserved_keyword ::=
| 'PUBLICATION'
| 'QUERIES'
| 'QUERY'
| 'QUOTE'
| 'RANGE'
| 'RANGES'
| 'READ'
Expand Down
1 change: 1 addition & 0 deletions pkg/sql/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -407,6 +407,7 @@ go_library(
"//pkg/util/ctxgroup",
"//pkg/util/duration",
"//pkg/util/encoding",
"//pkg/util/encoding/csv",
"//pkg/util/envutil",
"//pkg/util/errorutil",
"//pkg/util/errorutil/unimplemented",
Expand Down
24 changes: 23 additions & 1 deletion pkg/sql/copy.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ import (
"bytes"
"context"
"encoding/binary"
"encoding/csv"
"io"
"strconv"
"strings"
Expand All @@ -32,6 +31,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
"github.com/cockroachdb/cockroach/pkg/sql/sessiondatapb"
"github.com/cockroachdb/cockroach/pkg/sql/types"
"github.com/cockroachdb/cockroach/pkg/util/encoding/csv"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/mon"
"github.com/cockroachdb/errors"
Expand Down Expand Up @@ -59,6 +59,7 @@ type copyMachine struct {
columns tree.NameList
resultColumns colinfo.ResultColumns
format tree.CopyFormat
csvEscape rune
delimiter byte
// textDelim is delimiter converted to a []byte so that we don't have to do that per row.
textDelim []byte
Expand Down Expand Up @@ -174,6 +175,24 @@ func newCopyMachine(
return nil, err
}
}
if n.Options.Escape != nil {
s := n.Options.Escape.RawString()
if len(s) != 1 {
return nil, pgerror.Newf(
pgcode.FeatureNotSupported,
"ESCAPE must be a single rune",
)
}

if c.format != tree.CopyFormatCSV {
return nil, pgerror.Newf(
pgcode.FeatureNotSupported,
"ESCAPE can only be specified for CSV",
)
}

c.csvEscape, _ = utf8.DecodeRuneInString(s)
}

flags := tree.ObjectLookupFlagsWithRequiredTableKind(tree.ResolveRequireTableDesc)
_, tableDesc, err := resolver.ResolveExistingTableObject(ctx, &c.p, &n.Table, flags)
Expand Down Expand Up @@ -250,6 +269,9 @@ func (c *copyMachine) run(ctx context.Context) error {
c.csvReader.Comma = rune(c.delimiter)
c.csvReader.ReuseRecord = true
c.csvReader.FieldsPerRecord = len(c.resultColumns)
if c.csvEscape != 0 {
c.csvReader.Escape = c.csvEscape
}
}

Loop:
Expand Down
9 changes: 9 additions & 0 deletions pkg/sql/parser/parse_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,15 @@ func TestUnimplementedSyntax(t *testing.T) {

{`COMMENT ON EXTENSION a`, 74777, `comment on extension`, ``},
{`COMMENT ON FUNCTION f() is 'f'`, 17511, ``, ``},

{`COPY t FROM STDIN OIDS`, 41608, `oids`, ``},
{`COPY t FROM STDIN FREEZE`, 41608, `freeze`, ``},
{`COPY t FROM STDIN HEADER`, 41608, `header`, ``},
{`COPY t FROM STDIN ENCODING 'utf-8'`, 41608, `encoding`, ``},
{`COPY t FROM STDIN QUOTE 'x'`, 41608, `quote`, ``},
{`COPY t FROM STDIN FORCE QUOTE *`, 41608, `quote`, ``},
{`COPY t FROM STDIN FORCE NULL *`, 41608, `force null`, ``},
{`COPY t FROM STDIN FORCE NOT NULL *`, 41608, `force not null`, ``},
{`COPY x FROM STDIN WHERE a = b`, 54580, ``, ``},

{`ALTER AGGREGATE a`, 74775, `alter aggregate`, ``},
Expand Down
45 changes: 42 additions & 3 deletions pkg/sql/parser/sql.y
Original file line number Diff line number Diff line change
Expand Up @@ -826,13 +826,13 @@ func (u *sqlSymUnion) cursorStmt() tree.CursorStmt {
%token <str> FAILURE FALSE FAMILY FETCH FETCHVAL FETCHTEXT FETCHVAL_PATH FETCHTEXT_PATH
%token <str> FILES FILTER
%token <str> FIRST FLOAT FLOAT4 FLOAT8 FLOORDIV FOLLOWING FOR FORCE FORCE_INDEX FORCE_ZIGZAG
%token <str> FOREIGN FORWARD FROM FULL FUNCTION FUNCTIONS
%token <str> FOREIGN FORWARD FREEZE FROM FULL FUNCTION FUNCTIONS

%token <str> GENERATED GEOGRAPHY GEOMETRY GEOMETRYM GEOMETRYZ GEOMETRYZM
%token <str> GEOMETRYCOLLECTION GEOMETRYCOLLECTIONM GEOMETRYCOLLECTIONZ GEOMETRYCOLLECTIONZM
%token <str> GLOBAL GOAL GRANT GRANTS GREATEST GROUP GROUPING GROUPS

%token <str> HAVING HASH HIGH HISTOGRAM HOLD HOUR
%token <str> HAVING HASH HEADER HIGH HISTOGRAM HOLD HOUR

%token <str> IDENTITY
%token <str> IF IFERROR IFNULL IGNORE_FOREIGN_KEYS ILIKE IMMEDIATE IMPORT IN INCLUDE
Expand Down Expand Up @@ -869,7 +869,7 @@ func (u *sqlSymUnion) cursorStmt() tree.CursorStmt {
%token <str> POSITION PRECEDING PRECISION PREPARE PRESERVE PRIMARY PRIOR PRIORITY PRIVILEGES
%token <str> PROCEDURAL PUBLIC PUBLICATION

%token <str> QUERIES QUERY
%token <str> QUERIES QUERY QUOTE

%token <str> RANGE RANGES READ REAL REASON REASSIGN RECURSIVE RECURRING REF REFERENCES REFRESH
%token <str> REGCLASS REGION REGIONAL REGIONS REGNAMESPACE REGPROC REGPROCEDURE REGROLE REGTYPE REINDEX
Expand Down Expand Up @@ -3509,6 +3509,42 @@ copy_options:
{
$$.val = &tree.CopyOptions{Null: $2.expr()}
}
| OIDS error
{
return unimplementedWithIssueDetail(sqllex, 41608, "oids")
}
| FREEZE error
{
return unimplementedWithIssueDetail(sqllex, 41608, "freeze")
}
| HEADER error
{
return unimplementedWithIssueDetail(sqllex, 41608, "header")
}
| QUOTE SCONST
{
return unimplementedWithIssueDetail(sqllex, 41608, "quote")
}
| ESCAPE SCONST error
{
$$.val = &tree.CopyOptions{Escape: tree.NewStrVal($2)}
}
| FORCE QUOTE error
{
return unimplementedWithIssueDetail(sqllex, 41608, "force quote")
}
| FORCE NOT NULL error
{
return unimplementedWithIssueDetail(sqllex, 41608, "force not null")
}
| FORCE NULL error
{
return unimplementedWithIssueDetail(sqllex, 41608, "force null")
}
| ENCODING SCONST error
{
return unimplementedWithIssueDetail(sqllex, 41608, "encoding")
}

// %Help: CANCEL
// %Category: Group
Expand Down Expand Up @@ -14038,6 +14074,7 @@ unreserved_keyword:
| FORCE_INDEX
| FORCE_ZIGZAG
| FORWARD
| FREEZE
| FUNCTION
| FUNCTIONS
| GENERATED
Expand All @@ -14053,6 +14090,7 @@ unreserved_keyword:
| GRANTS
| GROUPS
| HASH
| HEADER
| HIGH
| HISTOGRAM
| HOLD
Expand Down Expand Up @@ -14188,6 +14226,7 @@ unreserved_keyword:
| PUBLICATION
| QUERIES
| QUERY
| QUOTE
| RANGE
| RANGES
| READ
Expand Down
8 changes: 8 additions & 0 deletions pkg/sql/parser/testdata/copy
Original file line number Diff line number Diff line change
Expand Up @@ -77,3 +77,11 @@ COPY t (a, b, c) FROM STDIN WITH CSV DELIMITER ' ' destination = 'filename' -- n
COPY t (a, b, c) FROM STDIN WITH CSV DELIMITER (' ') destination = ('filename') -- fully parenthesized
COPY t (a, b, c) FROM STDIN WITH CSV DELIMITER '_' destination = '_' -- literals removed
COPY _ (_, _, _) FROM STDIN WITH CSV DELIMITER ' ' destination = 'filename' -- identifiers removed

parse
COPY t (a, b, c) FROM STDIN destination = 'filename' CSV DELIMITER ' ' ESCAPE 'x'
----
COPY t (a, b, c) FROM STDIN WITH CSV DELIMITER ' ' destination = 'filename' ESCAPE 'x' -- normalized!
COPY t (a, b, c) FROM STDIN WITH CSV DELIMITER (' ') destination = ('filename') ESCAPE ('x') -- fully parenthesized
COPY t (a, b, c) FROM STDIN WITH CSV DELIMITER '_' destination = '_' ESCAPE '_' -- literals removed
COPY _ (_, _, _) FROM STDIN WITH CSV DELIMITER ' ' destination = 'filename' ESCAPE 'x' -- identifiers removed
60 changes: 55 additions & 5 deletions pkg/sql/pgwire/testdata/pgtest/copy
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,29 @@ ReadyForQuery
{"Type":"CommandComplete","CommandTag":"SELECT 3"}
{"Type":"ReadyForQuery","TxStatus":"I"}

# Invalid ESCAPE syntax.
send
Query {"String": "COPY t FROM STDIN ESCAPE 'xxx'"}
----

until
ErrorResponse
ReadyForQuery
----
{"Type":"ErrorResponse","Code":"0A000"}
{"Type":"ReadyForQuery","TxStatus":"I"}

send
Query {"String": "COPY t FROM STDIN ESCAPE 'x'"}
----

until
ErrorResponse
ReadyForQuery
----
{"Type":"ErrorResponse","Code":"0A000"}
{"Type":"ReadyForQuery","TxStatus":"I"}

# Wrong number of columns.
send
Query {"String": "COPY t FROM STDIN"}
Expand All @@ -63,12 +86,13 @@ ReadyForQuery
{"Type":"ReadyForQuery","TxStatus":"I"}

# Verify that only one COPY can run at once.
send
# This crashes PG, so only run on CRDB.
send crdb_only
Query {"String": "COPY t FROM STDIN"}
Query {"String": "COPY t FROM STDIN"}
----

until
until crdb_only
ErrorResponse
ReadyForQuery
----
Expand All @@ -77,12 +101,13 @@ ReadyForQuery
{"Type":"ReadyForQuery","TxStatus":"I"}

# Verify that after a COPY has started another statement cannot run.
send
# This crashes PG, so only run on CRDB.
send crdb_only
Query {"String": "COPY t FROM STDIN"}
Query {"String": "SELECT 2"}
----

until ignore=RowDescription
until ignore=RowDescription crdb_only
ErrorResponse
ReadyForQuery
----
Expand Down Expand Up @@ -248,6 +273,31 @@ ReadyForQuery
{"Type":"CommandComplete","CommandTag":"SELECT 3"}
{"Type":"ReadyForQuery","TxStatus":"I"}

send
Query {"String": "DELETE FROM t"}
Query {"String": "COPY t FROM STDIN CSV ESCAPE 'x'"}
CopyData {"Data": "1,\"x\"\"\n"}
CopyData {"Data": "1,\"xxx\",xx\"\n"}
CopyData {"Data": "\\.\n"}
CopyDone
Query {"String": "SELECT * FROM t ORDER BY i"}
----

until ignore=RowDescription
ReadyForQuery
ReadyForQuery
ReadyForQuery
----
{"Type":"CommandComplete","CommandTag":"DELETE 3"}
{"Type":"ReadyForQuery","TxStatus":"I"}
{"Type":"CopyInResponse","ColumnFormatCodes":[0,0]}
{"Type":"CommandComplete","CommandTag":"COPY 2"}
{"Type":"ReadyForQuery","TxStatus":"I"}
{"Type":"DataRow","Values":[{"text":"1"},{"text":"\""}]}
{"Type":"DataRow","Values":[{"text":"1"},{"text":"x\",x"}]}
{"Type":"CommandComplete","CommandTag":"SELECT 2"}
{"Type":"ReadyForQuery","TxStatus":"I"}

send
Query {"String": "COPY t FROM STDIN CSV"}
CopyData {"Data": "1\n"}
Expand Down Expand Up @@ -276,7 +326,7 @@ ReadyForQuery
ReadyForQuery
ReadyForQuery
----
{"Type":"CommandComplete","CommandTag":"DELETE 3"}
{"Type":"CommandComplete","CommandTag":"DELETE 2"}
{"Type":"ReadyForQuery","TxStatus":"I"}
{"Type":"CopyInResponse","ColumnFormatCodes":[0,0]}
{"Type":"CommandComplete","CommandTag":"COPY 3"}
Expand Down
25 changes: 20 additions & 5 deletions pkg/sql/sem/tree/copy.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,10 @@

package tree

import "github.com/cockroachdb/errors"
import (
"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode"
"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror"
)

// CopyFrom represents a COPY FROM statement.
type CopyFrom struct {
Expand All @@ -26,6 +29,7 @@ type CopyOptions struct {
CopyFormat CopyFormat
Delimiter Expr
Null Expr
Escape *StrVal
}

var _ NodeFormatter = &CopyOptions{}
Expand Down Expand Up @@ -88,6 +92,11 @@ func (o *CopyOptions) Format(ctx *FmtCtx) {
ctx.FormatNode(o.Destination)
addSep = true
}
if o.Escape != nil {
maybeAddSep()
ctx.WriteString("ESCAPE ")
ctx.FormatNode(o.Escape)
}
}

// IsDefault returns true if this struct has default value.
Expand All @@ -100,28 +109,34 @@ func (o CopyOptions) IsDefault() bool {
func (o *CopyOptions) CombineWith(other *CopyOptions) error {
if other.Destination != nil {
if o.Destination != nil {
return errors.New("destination option specified multiple times")
return pgerror.Newf(pgcode.Syntax, "destination option specified multiple times")
}
o.Destination = other.Destination
}
if other.CopyFormat != CopyFormatText {
if o.CopyFormat != CopyFormatText {
return errors.New("format option specified multiple times")
return pgerror.Newf(pgcode.Syntax, "format option specified multiple times")
}
o.CopyFormat = other.CopyFormat
}
if other.Delimiter != nil {
if o.Delimiter != nil {
return errors.New("delimiter option specified multiple times")
return pgerror.Newf(pgcode.Syntax, "delimiter option specified multiple times")
}
o.Delimiter = other.Delimiter
}
if other.Null != nil {
if o.Null != nil {
return errors.New("null option specified multiple times")
return pgerror.Newf(pgcode.Syntax, "null option specified multiple times")
}
o.Null = other.Null
}
if other.Escape != nil {
if o.Escape != nil {
return pgerror.Newf(pgcode.Syntax, "escape option specified multiple times")
}
o.Escape = other.Escape
}
return nil
}

Expand Down
Loading

0 comments on commit 8b36717

Please sign in to comment.