diff --git a/pkg/ccl/importccl/import_stmt_test.go b/pkg/ccl/importccl/import_stmt_test.go index 759eaffcd663..a711f269ddd2 100644 --- a/pkg/ccl/importccl/import_stmt_test.go +++ b/pkg/ccl/importccl/import_stmt_test.go @@ -63,6 +63,39 @@ import ( "github.com/stretchr/testify/require" ) +// This checks that the selected columns of a query string have +// all unique elements. It's useful for checking unique_rowid. +func checkUnique(allStr [][]string, inds []int) bool { + uniqStr := make(map[string]struct{}, len(allStr)) + for _, slice := range allStr { + for _, ind := range inds { + s := slice[ind] + if _, ok := uniqStr[s]; ok { + return false + } + uniqStr[s] = struct{}{} + } + } + return true +} + +// This checks that the selected columns of a query string have +// no "NULL" elements. It's useful for checking unique_rowid. +func checkNoNull(allStr [][]string, inds []int) bool { + for _, slice := range allStr { + for _, ind := range inds { + if slice[ind] == "NULL" { + return false + } + } + } + return true +} + +func validUniqueRowID(allStr [][]string, inds []int) bool { + return checkUnique(allStr, inds) && checkNoNull(allStr, inds) +} + func TestImportData(t *testing.T) { defer leaktest.AfterTest(t)() @@ -2475,8 +2508,10 @@ func TestImportIntoCSV(t *testing.T) { // Test that IMPORT INTO works when columns with default expressions are present. // The default expressions supported by IMPORT INTO are constant expressions, // which are literals and functions that always return the same value given the - // same arguments (examples of non-constant expressions are given in the last two - // subtests below). The default expression of a column is used when this column is not + // same arguments (examples of non-constant expressions are given as now() + // and nextval()). `unique_rowid()` is also supported. + // + // The default expression of a column is used when this column is not // targeted; otherwise, data from source file (like CSV) is used. It also checks // that IMPORT TABLE works when there are default columns. t.Run("import-into-default", func(t *testing.T) { @@ -2599,6 +2634,24 @@ func TestImportIntoCSV(t *testing.T) { fmt.Sprintf(`non-constant default expression .* for non-targeted column "b" is not supported by IMPORT INTO`), fmt.Sprintf(`IMPORT INTO t (a) CSV DATA ("%s")`, srv.URL)) }) + t.Run("unique_rowid", func(t *testing.T) { + sqlDB.Exec(t, `CREATE TABLE t(a INT DEFAULT unique_rowid(), b INT, c STRING, d INT DEFAULT unique_rowid())`) + defer sqlDB.Exec(t, `DROP TABLE t`) + sqlDB.Exec(t, fmt.Sprintf(`INSERT INTO t (b, c) VALUES (3, 'CAT')`)) + sqlDB.Exec(t, fmt.Sprintf(`IMPORT INTO t (b, c) CSV DATA (%s)`, strings.Join(testFiles.files, ", "))) + sqlDB.Exec(t, fmt.Sprintf(`INSERT INTO t (b, c) VALUES (4, 'DOG')`)) + IDstr := sqlDB.QueryStr(t, `SELECT a, d FROM t`) + require.True(t, validUniqueRowID(IDstr, []int{0, 1})) + }) + t.Run("unique_rowid_with_pk", func(t *testing.T) { + sqlDB.Exec(t, `CREATE TABLE t(a INT DEFAULT unique_rowid(), b INT PRIMARY KEY, c STRING)`) + defer sqlDB.Exec(t, `DROP TABLE t`) + sqlDB.Exec(t, fmt.Sprintf(`INSERT INTO t (b, c) VALUES (-3, 'CAT')`)) + sqlDB.Exec(t, fmt.Sprintf(`IMPORT INTO t (b, c) CSV DATA (%s)`, strings.Join(testFiles.files, ", "))) + sqlDB.Exec(t, fmt.Sprintf(`INSERT INTO t (b, c) VALUES (-4, 'DOG')`)) + IDstr := sqlDB.QueryStr(t, `SELECT a FROM t`) + require.True(t, validUniqueRowID(IDstr, []int{0})) + }) }) t.Run("import-not-targeted-not-null", func(t *testing.T) { diff --git a/pkg/sql/row/row_converter.go b/pkg/sql/row/row_converter.go index cb2f54b455c2..4c55f8e8339c 100644 --- a/pkg/sql/row/row_converter.go +++ b/pkg/sql/row/row_converter.go @@ -198,7 +198,7 @@ type DatumRowConverter struct { IsTargetCol map[int]struct{} // The rest of these are derived from tableDesc, just cached here. - hidden int + rowIDs []int ri Inserter EvalCtx *tree.EvalContext cols []sqlbase.ColumnDescriptor @@ -302,14 +302,18 @@ func NewDatumRowConverter( _, ok := isTargetColID[col.ID] return ok } - c.hidden = -1 + hidden := -1 + c.rowIDs = make([]int, 0) for i := range cols { col := &cols[i] + if !isTargetCol(col) && col.HasDefault() && col.DefaultExprStr() == "unique_rowid()" { + c.rowIDs = append(c.rowIDs, i) + } if col.Hidden { - if col.DefaultExpr == nil || *col.DefaultExpr != "unique_rowid()" || c.hidden != -1 { + if col.DefaultExpr == nil || *col.DefaultExpr != "unique_rowid()" || hidden != -1 { return nil, errors.New("unexpected hidden column") } - c.hidden = i + hidden = i c.Datums = append(c.Datums, nil) } else { if !isTargetCol(col) && col.DefaultExpr != nil { @@ -318,7 +322,7 @@ func NewDatumRowConverter( // // TODO (anzoteh96): add support to non-constant default expressions. Perhaps // we can start with those with Stable volatility, like now(). - if !tree.IsConst(evalCtx, defaultExprs[i]) { + if !(col.DefaultExprStr() == "unique_rowid()" || tree.IsConst(evalCtx, defaultExprs[i])) { return nil, errors.Newf( "non-constant default expression %s for non-targeted column %q is not supported by IMPORT INTO", defaultExprs[i].String(), @@ -350,8 +354,8 @@ const rowIDBits = 64 - builtins.NodeIDBits // Row inserts kv operations into the current kv batch, and triggers a SendBatch // if necessary. func (c *DatumRowConverter) Row(ctx context.Context, sourceID int32, rowIndex int64) error { - if c.hidden >= 0 { - // We don't want to call unique_rowid() for the hidden PK column because it + for i, pos := range c.rowIDs { + // We don't want to call unique_rowid() for columns with such default expressions because it // is not idempotent and has unfortunate overlapping of output spans since // it puts the uniqueness-ensuring per-generator part (nodeID) in the // low-bits. Instead, make our own IDs that attempt to keep each generator @@ -377,14 +381,23 @@ func (c *DatumRowConverter) Row(ctx context.Context, sourceID int32, rowIndex in // fileIndex*desc.Version) could improve on this. For now, if this // best-effort collision avoidance scheme doesn't work in some cases we can // just recommend an explicit PK as a workaround. + // + // TODO(anzoteh96): As per the issue in #51004, having too many columns with + // default expression unique_rowid() could cause collisions when IMPORTs are run + // too close to each other. It will therefore be nice to fix this problem. avoidCollisionsWithSQLsIDs := uint64(1 << 63) - rowID := (uint64(sourceID) << rowIDBits) ^ uint64(rowIndex) - c.Datums[c.hidden] = tree.NewDInt(tree.DInt(avoidCollisionsWithSQLsIDs | rowID)) + shiftedRowIndex := int64(len(c.rowIDs))*rowIndex + int64(i) + rowID := (uint64(sourceID) << rowIDBits) ^ uint64(shiftedRowIndex) + c.Datums[pos] = tree.NewDInt(tree.DInt(avoidCollisionsWithSQLsIDs | rowID)) } + isTargetCol := func(i int) bool { + _, ok := c.IsTargetCol[i] + return ok + } for i := range c.cols { col := &c.cols[i] - if _, ok := c.IsTargetCol[i]; !ok && !col.Hidden && col.DefaultExpr != nil { + if !isTargetCol(i) && col.DefaultExpr != nil && col.DefaultExprStr() != "unique_rowid()" { datum, err := c.defaultExprs[i].Eval(c.EvalCtx) if err != nil { return errors.Wrapf(err, "error evaluating default expression for IMPORT INTO")