Skip to content

Commit

Permalink
importccl: support unique_rowid() as default expression for IMPORT …
Browse files Browse the repository at this point in the history
…INTO

The PR #50295 supports non-targeted columns with constant expression.
This PR is a follow up to that in adding support to `unique_rowid()`.
This is done by assigning the same value of `rowid` that was generated
at the `IMPORT stage (row converter) as the default value.

Release note (general change): IMPORT INTO now supports `unique_rowid()`
as a default expression.
  • Loading branch information
anzoteh96 committed Jul 6, 2020
1 parent d6ab02f commit e215947
Show file tree
Hide file tree
Showing 2 changed files with 78 additions and 12 deletions.
57 changes: 55 additions & 2 deletions pkg/ccl/importccl/import_stmt_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,39 @@ import (
"github.com/stretchr/testify/require"
)

// This checks that the selected columns of a query string have
// all unique elements. It's useful for checking unique_rowid.
func checkUnique(allStr [][]string, inds []int) bool {
uniqStr := make(map[string]struct{}, len(allStr))
for _, slice := range allStr {
for _, ind := range inds {
s := slice[ind]
if _, ok := uniqStr[s]; ok {
return false
}
uniqStr[s] = struct{}{}
}
}
return true
}

// This checks that the selected columns of a query string have
// no "NULL" elements. It's useful for checking unique_rowid.
func checkNoNull(allStr [][]string, inds []int) bool {
for _, slice := range allStr {
for _, ind := range inds {
if slice[ind] == "NULL" {
return false
}
}
}
return true
}

func validUniqueRowID(allStr [][]string, inds []int) bool {
return checkUnique(allStr, inds) && checkNoNull(allStr, inds)
}

func TestImportData(t *testing.T) {
defer leaktest.AfterTest(t)()

Expand Down Expand Up @@ -2475,8 +2508,10 @@ func TestImportIntoCSV(t *testing.T) {
// Test that IMPORT INTO works when columns with default expressions are present.
// The default expressions supported by IMPORT INTO are constant expressions,
// which are literals and functions that always return the same value given the
// same arguments (examples of non-constant expressions are given in the last two
// subtests below). The default expression of a column is used when this column is not
// same arguments (examples of non-constant expressions are given as now()
// and nextval()). `unique_rowid()` is also supported.
//
// The default expression of a column is used when this column is not
// targeted; otherwise, data from source file (like CSV) is used. It also checks
// that IMPORT TABLE works when there are default columns.
t.Run("import-into-default", func(t *testing.T) {
Expand Down Expand Up @@ -2599,6 +2634,24 @@ func TestImportIntoCSV(t *testing.T) {
fmt.Sprintf(`non-constant default expression .* for non-targeted column "b" is not supported by IMPORT INTO`),
fmt.Sprintf(`IMPORT INTO t (a) CSV DATA ("%s")`, srv.URL))
})
t.Run("unique_rowid", func(t *testing.T) {
sqlDB.Exec(t, `CREATE TABLE t(a INT DEFAULT unique_rowid(), b INT, c STRING, d INT DEFAULT unique_rowid())`)
defer sqlDB.Exec(t, `DROP TABLE t`)
sqlDB.Exec(t, fmt.Sprintf(`INSERT INTO t (b, c) VALUES (3, 'CAT')`))
sqlDB.Exec(t, fmt.Sprintf(`IMPORT INTO t (b, c) CSV DATA (%s)`, strings.Join(testFiles.files, ", ")))
sqlDB.Exec(t, fmt.Sprintf(`INSERT INTO t (b, c) VALUES (4, 'DOG')`))
IDstr := sqlDB.QueryStr(t, `SELECT a, d FROM t`)
require.True(t, validUniqueRowID(IDstr, []int{0, 1}))
})
t.Run("unique_rowid_with_pk", func(t *testing.T) {
sqlDB.Exec(t, `CREATE TABLE t(a INT DEFAULT unique_rowid(), b INT PRIMARY KEY, c STRING)`)
defer sqlDB.Exec(t, `DROP TABLE t`)
sqlDB.Exec(t, fmt.Sprintf(`INSERT INTO t (b, c) VALUES (-3, 'CAT')`))
sqlDB.Exec(t, fmt.Sprintf(`IMPORT INTO t (b, c) CSV DATA (%s)`, strings.Join(testFiles.files, ", ")))
sqlDB.Exec(t, fmt.Sprintf(`INSERT INTO t (b, c) VALUES (-4, 'DOG')`))
IDstr := sqlDB.QueryStr(t, `SELECT a FROM t`)
require.True(t, validUniqueRowID(IDstr, []int{0}))
})
})

t.Run("import-not-targeted-not-null", func(t *testing.T) {
Expand Down
33 changes: 23 additions & 10 deletions pkg/sql/row/row_converter.go
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ type DatumRowConverter struct {
IsTargetCol map[int]struct{}

// The rest of these are derived from tableDesc, just cached here.
hidden int
rowIDs []int
ri Inserter
EvalCtx *tree.EvalContext
cols []sqlbase.ColumnDescriptor
Expand Down Expand Up @@ -302,14 +302,18 @@ func NewDatumRowConverter(
_, ok := isTargetColID[col.ID]
return ok
}
c.hidden = -1
hidden := -1
c.rowIDs = make([]int, 0)
for i := range cols {
col := &cols[i]
if !isTargetCol(col) && col.HasDefault() && col.DefaultExprStr() == "unique_rowid()" {
c.rowIDs = append(c.rowIDs, i)
}
if col.Hidden {
if col.DefaultExpr == nil || *col.DefaultExpr != "unique_rowid()" || c.hidden != -1 {
if col.DefaultExpr == nil || *col.DefaultExpr != "unique_rowid()" || hidden != -1 {
return nil, errors.New("unexpected hidden column")
}
c.hidden = i
hidden = i
c.Datums = append(c.Datums, nil)
} else {
if !isTargetCol(col) && col.DefaultExpr != nil {
Expand All @@ -318,7 +322,7 @@ func NewDatumRowConverter(
//
// TODO (anzoteh96): add support to non-constant default expressions. Perhaps
// we can start with those with Stable volatility, like now().
if !tree.IsConst(evalCtx, defaultExprs[i]) {
if !(col.DefaultExprStr() == "unique_rowid()" || tree.IsConst(evalCtx, defaultExprs[i])) {
return nil, errors.Newf(
"non-constant default expression %s for non-targeted column %q is not supported by IMPORT INTO",
defaultExprs[i].String(),
Expand Down Expand Up @@ -350,8 +354,8 @@ const rowIDBits = 64 - builtins.NodeIDBits
// Row inserts kv operations into the current kv batch, and triggers a SendBatch
// if necessary.
func (c *DatumRowConverter) Row(ctx context.Context, sourceID int32, rowIndex int64) error {
if c.hidden >= 0 {
// We don't want to call unique_rowid() for the hidden PK column because it
for i, pos := range c.rowIDs {
// We don't want to call unique_rowid() for columns with such default expressions because it
// is not idempotent and has unfortunate overlapping of output spans since
// it puts the uniqueness-ensuring per-generator part (nodeID) in the
// low-bits. Instead, make our own IDs that attempt to keep each generator
Expand All @@ -377,14 +381,23 @@ func (c *DatumRowConverter) Row(ctx context.Context, sourceID int32, rowIndex in
// fileIndex*desc.Version) could improve on this. For now, if this
// best-effort collision avoidance scheme doesn't work in some cases we can
// just recommend an explicit PK as a workaround.
//
// TODO(anzoteh96): As per the issue in #51004, having too many columns with
// default expression unique_rowid() could cause collisions when IMPORTs are run
// too close to each other. It will therefore be nice to fix this problem.
avoidCollisionsWithSQLsIDs := uint64(1 << 63)
rowID := (uint64(sourceID) << rowIDBits) ^ uint64(rowIndex)
c.Datums[c.hidden] = tree.NewDInt(tree.DInt(avoidCollisionsWithSQLsIDs | rowID))
shiftedRowIndex := int64(len(c.rowIDs))*rowIndex + int64(i)
rowID := (uint64(sourceID) << rowIDBits) ^ uint64(shiftedRowIndex)
c.Datums[pos] = tree.NewDInt(tree.DInt(avoidCollisionsWithSQLsIDs | rowID))
}

isTargetCol := func(i int) bool {
_, ok := c.IsTargetCol[i]
return ok
}
for i := range c.cols {
col := &c.cols[i]
if _, ok := c.IsTargetCol[i]; !ok && !col.Hidden && col.DefaultExpr != nil {
if !isTargetCol(i) && col.DefaultExpr != nil && col.DefaultExprStr() != "unique_rowid()" {
datum, err := c.defaultExprs[i].Eval(c.EvalCtx)
if err != nil {
return errors.Wrapf(err, "error evaluating default expression for IMPORT INTO")
Expand Down

0 comments on commit e215947

Please sign in to comment.