diff --git a/pkg/roachpb/io-formats.proto b/pkg/roachpb/io-formats.proto index c7d97e663aff..8c5f73f5583f 100644 --- a/pkg/roachpb/io-formats.proto +++ b/pkg/roachpb/io-formats.proto @@ -65,6 +65,8 @@ message CSVOptions { // Indicates the number of rows to import per CSV file. // Must be a non-zero positive number. optional int64 row_limit = 6 [(gogoproto.nullable) = false]; + // allow_quoted_null + optional bool allow_quoted_null = 7 [(gogoproto.nullable) = false]; } // MySQLOutfileOptions describe the format of mysql's outfile. diff --git a/pkg/sql/importer/import_planning.go b/pkg/sql/importer/import_planning.go index 0ca2336446d0..2d4d6dbf34f0 100644 --- a/pkg/sql/importer/import_planning.go +++ b/pkg/sql/importer/import_planning.go @@ -56,12 +56,13 @@ import ( ) const ( - csvDelimiter = "delimiter" - csvComment = "comment" - csvNullIf = "nullif" - csvSkip = "skip" - csvRowLimit = "row_limit" - csvStrictQuotes = "strict_quotes" + csvDelimiter = "delimiter" + csvComment = "comment" + csvNullIf = "nullif" + csvSkip = "skip" + csvRowLimit = "row_limit" + csvStrictQuotes = "strict_quotes" + csvAllowQuotedNulls = "allow_quoted_null" mysqlOutfileRowSep = "rows_terminated_by" mysqlOutfileFieldSep = "fields_terminated_by" @@ -105,12 +106,13 @@ const ( ) var importOptionExpectValues = map[string]sql.KVStringOptValidate{ - csvDelimiter: sql.KVStringOptRequireValue, - csvComment: sql.KVStringOptRequireValue, - csvNullIf: sql.KVStringOptRequireValue, - csvSkip: sql.KVStringOptRequireValue, - csvRowLimit: sql.KVStringOptRequireValue, - csvStrictQuotes: sql.KVStringOptRequireNoValue, + csvDelimiter: sql.KVStringOptRequireValue, + csvComment: sql.KVStringOptRequireValue, + csvNullIf: sql.KVStringOptRequireValue, + csvSkip: sql.KVStringOptRequireValue, + csvRowLimit: sql.KVStringOptRequireValue, + csvStrictQuotes: sql.KVStringOptRequireNoValue, + csvAllowQuotedNulls: sql.KVStringOptRequireNoValue, mysqlOutfileRowSep: sql.KVStringOptRequireValue, mysqlOutfileFieldSep: sql.KVStringOptRequireValue, @@ -169,7 +171,7 @@ var avroAllowedOptions = makeStringSet( ) var csvAllowedOptions = makeStringSet( - csvDelimiter, csvComment, csvNullIf, csvSkip, csvStrictQuotes, csvRowLimit, + csvDelimiter, csvComment, csvNullIf, csvSkip, csvStrictQuotes, csvRowLimit, csvAllowQuotedNulls, ) var mysqlOutAllowedOptions = makeStringSet( @@ -543,6 +545,10 @@ func importPlanHook( format.Csv.NullEncoding = &override } + if _, ok := opts[csvAllowQuotedNulls]; ok { + format.Csv.AllowQuotedNull = true + } + if override, ok := opts[csvSkip]; ok { skip, err := strconv.Atoi(override) if err != nil { diff --git a/pkg/sql/importer/import_stmt_test.go b/pkg/sql/importer/import_stmt_test.go index e9095f6fcba6..6240d707cf74 100644 --- a/pkg/sql/importer/import_stmt_test.go +++ b/pkg/sql/importer/import_stmt_test.go @@ -640,6 +640,68 @@ ORDER BY table_name `SELECT * from t`: {{"NULL", "foop"}}, }, }, + { + name: "zero string is the default for nullif with CSV", + create: ` + i int primary key, + s string + `, + typ: "CSV", + data: `1, +2,""`, + query: map[string][][]string{ + `SELECT i, s from t`: { + {"1", "NULL"}, + {"2", ""}, + }, + }, + }, + { + name: "zero string in not null", + create: ` + i int primary key, + s string, + s2 string not null + `, + typ: "CSV", + data: `1,, + 2,"",""`, + err: "null value in column \"s2\" violates not-null constraint", + }, + { + name: "quoted nullif is treated as a string", + create: ` + i int primary key, + s string + `, + with: `WITH nullif = 'foo'`, + typ: "CSV", + data: `1,foo +2,"foo"`, + query: map[string][][]string{ + `SELECT i, s from t`: { + {"1", "NULL"}, + {"2", "foo"}, + }, + }, + }, + { + name: "quoted nullif is treated as a null if allow_quoted_null is used", + create: ` + i int primary key, + s string + `, + with: `WITH nullif = 'foo', allow_quoted_null`, + typ: "CSV", + data: `1,foo +2,"foo"`, + query: map[string][][]string{ + `SELECT i, s from t`: { + {"1", "NULL"}, + {"2", "NULL"}, + }, + }, + }, // PG COPY { @@ -2379,8 +2441,9 @@ func TestImportCSVStmt(t *testing.T) { f STRING DEFAULT 's', PRIMARY KEY (a, b, c) )` - query = `IMPORT INTO t CSV DATA ($1)` - nullif = ` WITH nullif=''` + query = `IMPORT INTO t CSV DATA ($1)` + nullif = ` WITH nullif=''` + allowQuotedNulls = `, allow_quoted_null` ) sqlDB.Exec(t, create) @@ -2388,13 +2451,32 @@ func TestImportCSVStmt(t *testing.T) { data = ",5,e,7,," t.Run(data, func(t *testing.T) { sqlDB.ExpectErr( - t, `row 1: parse "a" as INT8: could not parse ""`, + t, `row 1: generate insert row: null value in column "a" violates not-null constraint`, query, srv.URL, ) sqlDB.ExpectErr( t, `row 1: generate insert row: null value in column "a" violates not-null constraint`, query+nullif, srv.URL, ) + sqlDB.ExpectErr( + t, `row 1: generate insert row: null value in column "a" violates not-null constraint`, + query+nullif+allowQuotedNulls, srv.URL, + ) + }) + data = "\"\",5,e,7,," + t.Run(data, func(t *testing.T) { + sqlDB.ExpectErr( + t, `row 1: parse "a" as INT8: could not parse ""`, + query, srv.URL, + ) + sqlDB.ExpectErr( + t, `row 1: parse "a" as INT8: could not parse ""`, + query+nullif, srv.URL, + ) + sqlDB.ExpectErr( + t, `row 1: generate insert row: null value in column "a" violates not-null constraint`, + query+nullif+allowQuotedNulls, srv.URL, + ) }) data = "2,5,e,,," t.Run(data, func(t *testing.T) { @@ -3754,7 +3836,17 @@ func (s *csvBenchmarkStream) Read(buf []byte) (int, error) { if err != nil { return 0, err } - return copy(buf, strings.Join(r.([]string), "\t")+"\n"), nil + row := r.([]csv.Record) + if len(row) == 0 { + return copy(buf, "\n"), nil + } + var b strings.Builder + b.WriteString(row[0].String()) + for _, v := range row[1:] { + b.WriteString("\t") + b.WriteString(v.String()) + } + return copy(buf, b.String()+"\n"), nil } return 0, io.EOF } diff --git a/pkg/sql/importer/read_import_csv.go b/pkg/sql/importer/read_import_csv.go index 92e85dec3e86..f2abf3f97b57 100644 --- a/pkg/sql/importer/read_import_csv.go +++ b/pkg/sql/importer/read_import_csv.go @@ -204,8 +204,18 @@ func (c *csvRowConsumer) FillDatums( continue } - if c.opts.NullEncoding != nil && - field.Val == *c.opts.NullEncoding { + // NullEncoding is stored as a *string historically, from before we wanted + // it to default to "". Rather than changing the proto, we just set the + // default here. + nullEncoding := "" + if c.opts.NullEncoding != nil { + nullEncoding = *c.opts.NullEncoding + } + if (!field.Quoted || c.opts.AllowQuotedNull) && field.Val == nullEncoding { + // To match COPY, the default behavior is to only treat the field as NULL + // if it was not quoted (and if it matches the configured NullEncoding). + // The AllowQuotedNull option can be used to get the old behavior where + // even a quoted value is treated as NULL. conv.Datums[datumIdx] = tree.DNull } else { var err error diff --git a/pkg/util/encoding/csv/reader.go b/pkg/util/encoding/csv/reader.go index 07df8d721e59..b61e078748bc 100644 --- a/pkg/util/encoding/csv/reader.go +++ b/pkg/util/encoding/csv/reader.go @@ -308,6 +308,13 @@ type Record struct { Quoted bool } +func (r *Record) String() string { + if r.Quoted { + return "\"" + r.Val + "\"" + } + return r.Val +} + func (r *Reader) readRecord(dst []Record) ([]Record, error) { if r.Comma == r.Comment || !validDelim(r.Comma) || (r.Comment != 0 && !validDelim(r.Comment)) { return nil, errInvalidDelim