From 9679561724a4a95a8a378e93abf1f9cd439d8f43 Mon Sep 17 00:00:00 2001 From: Oliver Tan Date: Mon, 31 Jul 2023 22:52:33 +0000 Subject: [PATCH] importer: fix collated string imports for MySQL/DELIMITED DATA We previously special cased `ParseDatumStringWithRawBytes` for MySQL related imports, which was buggy for collated strings as well. Instead, make import have a specialised method for converting MySQL literals to the relevant data type without compromising on collated strings. Release note (bug fix): Previously, using IMPORT INTO for DELIMITED DATA or MySQL imports would error with `column ... does not exist` if it was importing into a collated string column. --- pkg/sql/importer/import_stmt_test.go | 10 ++++++++++ pkg/sql/importer/read_import_mysql.go | 20 +++++++++++++++----- pkg/sql/importer/read_import_mysql_test.go | 4 ++++ pkg/sql/importer/read_import_mysqlout.go | 7 +------ pkg/sql/rowenc/roundtrip_format.go | 16 ---------------- 5 files changed, 30 insertions(+), 27 deletions(-) diff --git a/pkg/sql/importer/import_stmt_test.go b/pkg/sql/importer/import_stmt_test.go index 110d51680cd7..d8b898dcf05a 100644 --- a/pkg/sql/importer/import_stmt_test.go +++ b/pkg/sql/importer/import_stmt_test.go @@ -5695,6 +5695,16 @@ func TestImportDelimited(t *testing.T) { sqlDB.Exec(t, intoCmd, opts...) checkQueryResults(t, fmt.Sprintf(`SELECT i, s, b FROM into%d ORDER BY i`, i)) }) + t.Run("import-into-collated", func(t *testing.T) { + defer sqlDB.Exec(t, fmt.Sprintf(`DROP TABLE into%d`, i)) + sqlDB.Exec(t, fmt.Sprintf(`CREATE TABLE into%d (i INT8 PRIMARY KEY, s text COLLATE "en_US", b bytea)`, i)) + intoCmd := fmt.Sprintf(`IMPORT INTO into%d (i, s, b) DELIMITED DATA ($1)`, i) + if len(flags) > 0 { + intoCmd += " WITH " + strings.Join(flags, ", ") + } + sqlDB.Exec(t, intoCmd, opts...) + checkQueryResults(t, fmt.Sprintf(`SELECT i, s, b FROM into%d ORDER BY i`, i)) + }) t.Run("import-into-target-cols-reordered", func(t *testing.T) { defer sqlDB.Exec(t, fmt.Sprintf(`DROP TABLE into%d`, i)) sqlDB.Exec(t, fmt.Sprintf(`CREATE TABLE into%d (b bytea, i INT8 PRIMARY KEY, s text)`, i)) diff --git a/pkg/sql/importer/read_import_mysql.go b/pkg/sql/importer/read_import_mysql.go index 7ee8e7342305..887f25c8aa35 100644 --- a/pkg/sql/importer/read_import_mysql.go +++ b/pkg/sql/importer/read_import_mysql.go @@ -206,6 +206,20 @@ const ( zeroTime = "0000-00-00 00:00:00" ) +func mysqlStrToDatum(evalCtx *eval.Context, s string, desired *types.T) (tree.Datum, error) { + switch desired.Family() { + case types.BytesFamily: + // mysql emits raw byte strings that do not use the same escaping as our + // tree.ParseDBytes function expects, and the difference between + // tree.ParseAndRequireString and mysqlStrToDatum is whether or not it + // attempts to parse bytes. + return tree.NewDBytes(tree.DBytes(s)), nil + default: + res, _, err := tree.ParseAndRequireString(desired, s, evalCtx) + return res, err + } +} + // mysqlValueToDatum attempts to convert a value, as parsed from a mysqldump // INSERT statement, in to a Cockroach Datum of type `desired`. The MySQL parser // does not parse the values themselves to Go primitivies, rather leaving the @@ -239,11 +253,7 @@ func mysqlValueToDatum( } } } - // This uses ParseDatumStringAsWithRawBytes instead of ParseDatumStringAs since mysql emits - // raw byte strings that do not use the same escaping as our ParseBytes - // function expects, and the difference between ParseStringAs and - // ParseDatumStringAs is whether or not it attempts to parse bytes. - return rowenc.ParseDatumStringAsWithRawBytes(ctx, desired, s, evalContext) + return mysqlStrToDatum(evalContext, s, desired) case mysql.IntVal: return rowenc.ParseDatumStringAs(ctx, desired, string(v.Val), evalContext) case mysql.FloatVal: diff --git a/pkg/sql/importer/read_import_mysql_test.go b/pkg/sql/importer/read_import_mysql_test.go index 2a46fa9dee36..eeead4b0c759 100644 --- a/pkg/sql/importer/read_import_mysql_test.go +++ b/pkg/sql/importer/read_import_mysql_test.go @@ -43,6 +43,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/util/log" "github.com/cockroachdb/cockroach/pkg/util/protoutil" "github.com/kr/pretty" + "github.com/stretchr/testify/require" mysql "vitess.io/vitess/go/vt/sqlparser" ) @@ -356,6 +357,8 @@ func TestMysqlValueToDatum(t *testing.T) { } return d } + asdfCS, err := tree.NewDCollatedString("Asdf", "en_US", &tree.CollationEnvironment{}) + require.NoError(t, err) tests := []struct { raw mysql.Expr typ *types.T @@ -363,6 +366,7 @@ func TestMysqlValueToDatum(t *testing.T) { }{ {raw: mysql.NewStrLiteral([]byte("0000-00-00")), typ: types.Date, want: tree.DNull}, {raw: mysql.NewStrLiteral([]byte("2010-01-01")), typ: types.Date, want: date("2010-01-01")}, + {raw: mysql.NewStrLiteral([]byte("Asdf")), typ: types.MakeCollatedString(types.String, "en_US"), want: asdfCS}, {raw: mysql.NewStrLiteral([]byte("0000-00-00 00:00:00")), typ: types.Timestamp, want: tree.DNull}, {raw: mysql.NewStrLiteral([]byte("2010-01-01 00:00:00")), typ: types.Timestamp, want: ts("2010-01-01 00:00:00")}, } diff --git a/pkg/sql/importer/read_import_mysqlout.go b/pkg/sql/importer/read_import_mysqlout.go index bcc8aa06abc9..ee2a8fea9620 100644 --- a/pkg/sql/importer/read_import_mysqlout.go +++ b/pkg/sql/importer/read_import_mysqlout.go @@ -23,7 +23,6 @@ import ( "github.com/cockroachdb/cockroach/pkg/security/username" "github.com/cockroachdb/cockroach/pkg/sql/catalog" "github.com/cockroachdb/cockroach/pkg/sql/row" - "github.com/cockroachdb/cockroach/pkg/sql/rowenc" "github.com/cockroachdb/cockroach/pkg/sql/sem/eval" "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" "github.com/cockroachdb/cockroach/pkg/util/ctxgroup" @@ -258,12 +257,8 @@ func (d *delimitedConsumer) FillDatums( } else if (!d.opts.HasEscape && field == "NULL") || d.opts.NullEncoding != nil && field == *d.opts.NullEncoding { datum = tree.DNull } else { - // This uses ParseDatumStringAsWithRawBytes instead of ParseDatumStringAs since mysql emits - // raw byte strings that do not use the same escaping as our ParseBytes - // function expects, and the difference between ParseStringAs and - // ParseDatumStringAs is whether or not it attempts to parse bytes. var err error - datum, err = rowenc.ParseDatumStringAsWithRawBytes(ctx, conv.VisibleColTypes[datumIdx], field, conv.EvalCtx) + datum, err = mysqlStrToDatum(conv.EvalCtx, field, conv.VisibleColTypes[datumIdx]) if err != nil { col := conv.VisibleCols[datumIdx] return newImportRowError( diff --git a/pkg/sql/rowenc/roundtrip_format.go b/pkg/sql/rowenc/roundtrip_format.go index 13f0c6fb2e50..e0369a9b2239 100644 --- a/pkg/sql/rowenc/roundtrip_format.go +++ b/pkg/sql/rowenc/roundtrip_format.go @@ -35,22 +35,6 @@ func ParseDatumStringAs( } } -// ParseDatumStringAsWithRawBytes parses s as type t. However, if the requested type is Bytes -// then the string is returned unchanged. This function is used when the input string might be -// unescaped raw bytes, so we don't want to run a bytes parsing routine on the input. Other -// than the bytes case, this function does the same as ParseDatumStringAs but is not -// guaranteed to round-trip. -func ParseDatumStringAsWithRawBytes( - ctx context.Context, t *types.T, s string, evalCtx *eval.Context, -) (tree.Datum, error) { - switch t.Family() { - case types.BytesFamily: - return tree.NewDBytes(tree.DBytes(s)), nil - default: - return ParseDatumStringAs(ctx, t, s, evalCtx) - } -} - func parseAsTyp( ctx context.Context, evalCtx *eval.Context, typ *types.T, s string, ) (tree.Datum, error) {