Skip to content

Commit

Permalink
importer: fix collated string imports for MySQL/DELIMITED DATA
Browse files Browse the repository at this point in the history
We previously special cased `ParseDatumStringWithRawBytes` for MySQL
related imports, which was buggy for collated strings as well. Instead,
make import have a specialised method for converting MySQL literals to
the relevant data type without compromising on collated strings.

Release note (bug fix): Previously, using IMPORT INTO for DELIMITED DATA
or MySQL imports would error with `column ... does not exist` if it was
importing into a collated string column.
  • Loading branch information
otan committed Aug 7, 2023
1 parent 4bc18b6 commit 9679561
Show file tree
Hide file tree
Showing 5 changed files with 30 additions and 27 deletions.
10 changes: 10 additions & 0 deletions pkg/sql/importer/import_stmt_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5695,6 +5695,16 @@ func TestImportDelimited(t *testing.T) {
sqlDB.Exec(t, intoCmd, opts...)
checkQueryResults(t, fmt.Sprintf(`SELECT i, s, b FROM into%d ORDER BY i`, i))
})
t.Run("import-into-collated", func(t *testing.T) {
defer sqlDB.Exec(t, fmt.Sprintf(`DROP TABLE into%d`, i))
sqlDB.Exec(t, fmt.Sprintf(`CREATE TABLE into%d (i INT8 PRIMARY KEY, s text COLLATE "en_US", b bytea)`, i))
intoCmd := fmt.Sprintf(`IMPORT INTO into%d (i, s, b) DELIMITED DATA ($1)`, i)
if len(flags) > 0 {
intoCmd += " WITH " + strings.Join(flags, ", ")
}
sqlDB.Exec(t, intoCmd, opts...)
checkQueryResults(t, fmt.Sprintf(`SELECT i, s, b FROM into%d ORDER BY i`, i))
})
t.Run("import-into-target-cols-reordered", func(t *testing.T) {
defer sqlDB.Exec(t, fmt.Sprintf(`DROP TABLE into%d`, i))
sqlDB.Exec(t, fmt.Sprintf(`CREATE TABLE into%d (b bytea, i INT8 PRIMARY KEY, s text)`, i))
Expand Down
20 changes: 15 additions & 5 deletions pkg/sql/importer/read_import_mysql.go
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,20 @@ const (
zeroTime = "0000-00-00 00:00:00"
)

func mysqlStrToDatum(evalCtx *eval.Context, s string, desired *types.T) (tree.Datum, error) {
switch desired.Family() {
case types.BytesFamily:
// mysql emits raw byte strings that do not use the same escaping as our
// tree.ParseDBytes function expects, and the difference between
// tree.ParseAndRequireString and mysqlStrToDatum is whether or not it
// attempts to parse bytes.
return tree.NewDBytes(tree.DBytes(s)), nil
default:
res, _, err := tree.ParseAndRequireString(desired, s, evalCtx)
return res, err
}
}

// mysqlValueToDatum attempts to convert a value, as parsed from a mysqldump
// INSERT statement, in to a Cockroach Datum of type `desired`. The MySQL parser
// does not parse the values themselves to Go primitivies, rather leaving the
Expand Down Expand Up @@ -239,11 +253,7 @@ func mysqlValueToDatum(
}
}
}
// This uses ParseDatumStringAsWithRawBytes instead of ParseDatumStringAs since mysql emits
// raw byte strings that do not use the same escaping as our ParseBytes
// function expects, and the difference between ParseStringAs and
// ParseDatumStringAs is whether or not it attempts to parse bytes.
return rowenc.ParseDatumStringAsWithRawBytes(ctx, desired, s, evalContext)
return mysqlStrToDatum(evalContext, s, desired)
case mysql.IntVal:
return rowenc.ParseDatumStringAs(ctx, desired, string(v.Val), evalContext)
case mysql.FloatVal:
Expand Down
4 changes: 4 additions & 0 deletions pkg/sql/importer/read_import_mysql_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/protoutil"
"github.com/kr/pretty"
"github.com/stretchr/testify/require"
mysql "vitess.io/vitess/go/vt/sqlparser"
)

Expand Down Expand Up @@ -356,13 +357,16 @@ func TestMysqlValueToDatum(t *testing.T) {
}
return d
}
asdfCS, err := tree.NewDCollatedString("Asdf", "en_US", &tree.CollationEnvironment{})
require.NoError(t, err)
tests := []struct {
raw mysql.Expr
typ *types.T
want tree.Datum
}{
{raw: mysql.NewStrLiteral([]byte("0000-00-00")), typ: types.Date, want: tree.DNull},
{raw: mysql.NewStrLiteral([]byte("2010-01-01")), typ: types.Date, want: date("2010-01-01")},
{raw: mysql.NewStrLiteral([]byte("Asdf")), typ: types.MakeCollatedString(types.String, "en_US"), want: asdfCS},
{raw: mysql.NewStrLiteral([]byte("0000-00-00 00:00:00")), typ: types.Timestamp, want: tree.DNull},
{raw: mysql.NewStrLiteral([]byte("2010-01-01 00:00:00")), typ: types.Timestamp, want: ts("2010-01-01 00:00:00")},
}
Expand Down
7 changes: 1 addition & 6 deletions pkg/sql/importer/read_import_mysqlout.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/security/username"
"github.com/cockroachdb/cockroach/pkg/sql/catalog"
"github.com/cockroachdb/cockroach/pkg/sql/row"
"github.com/cockroachdb/cockroach/pkg/sql/rowenc"
"github.com/cockroachdb/cockroach/pkg/sql/sem/eval"
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
"github.com/cockroachdb/cockroach/pkg/util/ctxgroup"
Expand Down Expand Up @@ -258,12 +257,8 @@ func (d *delimitedConsumer) FillDatums(
} else if (!d.opts.HasEscape && field == "NULL") || d.opts.NullEncoding != nil && field == *d.opts.NullEncoding {
datum = tree.DNull
} else {
// This uses ParseDatumStringAsWithRawBytes instead of ParseDatumStringAs since mysql emits
// raw byte strings that do not use the same escaping as our ParseBytes
// function expects, and the difference between ParseStringAs and
// ParseDatumStringAs is whether or not it attempts to parse bytes.
var err error
datum, err = rowenc.ParseDatumStringAsWithRawBytes(ctx, conv.VisibleColTypes[datumIdx], field, conv.EvalCtx)
datum, err = mysqlStrToDatum(conv.EvalCtx, field, conv.VisibleColTypes[datumIdx])
if err != nil {
col := conv.VisibleCols[datumIdx]
return newImportRowError(
Expand Down
16 changes: 0 additions & 16 deletions pkg/sql/rowenc/roundtrip_format.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,22 +35,6 @@ func ParseDatumStringAs(
}
}

// ParseDatumStringAsWithRawBytes parses s as type t. However, if the requested type is Bytes
// then the string is returned unchanged. This function is used when the input string might be
// unescaped raw bytes, so we don't want to run a bytes parsing routine on the input. Other
// than the bytes case, this function does the same as ParseDatumStringAs but is not
// guaranteed to round-trip.
func ParseDatumStringAsWithRawBytes(
ctx context.Context, t *types.T, s string, evalCtx *eval.Context,
) (tree.Datum, error) {
switch t.Family() {
case types.BytesFamily:
return tree.NewDBytes(tree.DBytes(s)), nil
default:
return ParseDatumStringAs(ctx, t, s, evalCtx)
}
}

func parseAsTyp(
ctx context.Context, evalCtx *eval.Context, typ *types.T, s string,
) (tree.Datum, error) {
Expand Down

0 comments on commit 9679561

Please sign in to comment.