Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

importer: fix collated string imports for MySQL/DELIMITED DATA #107918

Merged
merged 1 commit into from
Aug 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions pkg/sql/importer/import_stmt_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5730,6 +5730,16 @@ func TestImportDelimited(t *testing.T) {
sqlDB.Exec(t, intoCmd, opts...)
checkQueryResults(t, fmt.Sprintf(`SELECT i, s, b FROM into%d ORDER BY i`, i))
})
t.Run("import-into-collated", func(t *testing.T) {
defer sqlDB.Exec(t, fmt.Sprintf(`DROP TABLE into%d`, i))
sqlDB.Exec(t, fmt.Sprintf(`CREATE TABLE into%d (i INT8 PRIMARY KEY, s text COLLATE "en_US", b bytea)`, i))
intoCmd := fmt.Sprintf(`IMPORT INTO into%d (i, s, b) DELIMITED DATA ($1)`, i)
if len(flags) > 0 {
intoCmd += " WITH " + strings.Join(flags, ", ")
}
sqlDB.Exec(t, intoCmd, opts...)
checkQueryResults(t, fmt.Sprintf(`SELECT i, s, b FROM into%d ORDER BY i`, i))
})
t.Run("import-into-target-cols-reordered", func(t *testing.T) {
defer sqlDB.Exec(t, fmt.Sprintf(`DROP TABLE into%d`, i))
sqlDB.Exec(t, fmt.Sprintf(`CREATE TABLE into%d (b bytea, i INT8 PRIMARY KEY, s text)`, i))
Expand Down
19 changes: 14 additions & 5 deletions pkg/sql/importer/read_import_mysql.go
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,19 @@ const (
zeroTime = "0000-00-00 00:00:00"
)

func mysqlStrToDatum(evalCtx *eval.Context, s string, desired *types.T) (tree.Datum, error) {
switch desired.Family() {
case types.BytesFamily:
// mysql emits raw byte strings that do not use the same escaping as our ParseBytes
// function expects, and the difference between ParseStringAs and
// ParseDatumStringAs is whether or not it attempts to parse bytes.
return tree.NewDBytes(tree.DBytes(s)), nil
default:
res, _, err := tree.ParseAndRequireString(desired, s, evalCtx)
return res, err
}
}

// mysqlValueToDatum attempts to convert a value, as parsed from a mysqldump
// INSERT statement, in to a Cockroach Datum of type `desired`. The MySQL parser
// does not parse the values themselves to Go primitivies, rather leaving the
Expand Down Expand Up @@ -239,11 +252,7 @@ func mysqlValueToDatum(
}
}
}
// This uses ParseDatumStringAsWithRawBytes instead of ParseDatumStringAs since mysql emits
// raw byte strings that do not use the same escaping as our ParseBytes
// function expects, and the difference between ParseStringAs and
// ParseDatumStringAs is whether or not it attempts to parse bytes.
return rowenc.ParseDatumStringAsWithRawBytes(ctx, desired, s, evalContext)
return mysqlStrToDatum(evalContext, s, desired)
case mysql.IntVal:
return rowenc.ParseDatumStringAs(ctx, desired, string(v.Val), evalContext)
case mysql.FloatVal:
Expand Down
4 changes: 4 additions & 0 deletions pkg/sql/importer/read_import_mysql_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/protoutil"
"github.com/kr/pretty"
"github.com/stretchr/testify/require"
mysql "vitess.io/vitess/go/vt/sqlparser"
)

Expand Down Expand Up @@ -356,13 +357,16 @@ func TestMysqlValueToDatum(t *testing.T) {
}
return d
}
asdfCS, err := tree.NewDCollatedString("Asdf", "en_US", &tree.CollationEnvironment{})
require.NoError(t, err)
tests := []struct {
raw mysql.Expr
typ *types.T
want tree.Datum
}{
{raw: mysql.NewStrLiteral([]byte("0000-00-00")), typ: types.Date, want: tree.DNull},
{raw: mysql.NewStrLiteral([]byte("2010-01-01")), typ: types.Date, want: date("2010-01-01")},
{raw: mysql.NewStrLiteral([]byte("Asdf")), typ: types.MakeCollatedString(types.String, "en_US"), want: asdfCS},
{raw: mysql.NewStrLiteral([]byte("0000-00-00 00:00:00")), typ: types.Timestamp, want: tree.DNull},
{raw: mysql.NewStrLiteral([]byte("2010-01-01 00:00:00")), typ: types.Timestamp, want: ts("2010-01-01 00:00:00")},
}
Expand Down
7 changes: 1 addition & 6 deletions pkg/sql/importer/read_import_mysqlout.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/security/username"
"github.com/cockroachdb/cockroach/pkg/sql/catalog"
"github.com/cockroachdb/cockroach/pkg/sql/row"
"github.com/cockroachdb/cockroach/pkg/sql/rowenc"
"github.com/cockroachdb/cockroach/pkg/sql/sem/eval"
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
"github.com/cockroachdb/cockroach/pkg/util/ctxgroup"
Expand Down Expand Up @@ -258,12 +257,8 @@ func (d *delimitedConsumer) FillDatums(
} else if (!d.opts.HasEscape && field == "NULL") || d.opts.NullEncoding != nil && field == *d.opts.NullEncoding {
datum = tree.DNull
} else {
// This uses ParseDatumStringAsWithRawBytes instead of ParseDatumStringAs since mysql emits
// raw byte strings that do not use the same escaping as our ParseBytes
// function expects, and the difference between ParseStringAs and
// ParseDatumStringAs is whether or not it attempts to parse bytes.
var err error
datum, err = rowenc.ParseDatumStringAsWithRawBytes(ctx, conv.VisibleColTypes[datumIdx], field, conv.EvalCtx)
datum, err = mysqlStrToDatum(conv.EvalCtx, field, conv.VisibleColTypes[datumIdx])
if err != nil {
col := conv.VisibleCols[datumIdx]
return newImportRowError(
Expand Down
16 changes: 0 additions & 16 deletions pkg/sql/rowenc/roundtrip_format.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,22 +35,6 @@ func ParseDatumStringAs(
}
}

// ParseDatumStringAsWithRawBytes parses s as type t. However, if the requested type is Bytes
// then the string is returned unchanged. This function is used when the input string might be
// unescaped raw bytes, so we don't want to run a bytes parsing routine on the input. Other
// than the bytes case, this function does the same as ParseDatumStringAs but is not
// guaranteed to round-trip.
func ParseDatumStringAsWithRawBytes(
ctx context.Context, t *types.T, s string, evalCtx *eval.Context,
) (tree.Datum, error) {
switch t.Family() {
case types.BytesFamily:
return tree.NewDBytes(tree.DBytes(s)), nil
default:
return ParseDatumStringAs(ctx, t, s, evalCtx)
}
}

func parseAsTyp(
ctx context.Context, evalCtx *eval.Context, typ *types.T, s string,
) (tree.Datum, error) {
Expand Down