Skip to content

Commit

Permalink
importccl: support unique_rowid() as default expression for IMPORT …
Browse files Browse the repository at this point in the history
…INTO

The PR #50295 supports non-targeted columns with constant expression.
This PR is a follow up to that in adding support to `unique_rowid()`.
This is done by assigning the same value of `rowid` that was generated
at the `IMPORT stage (row converter) as the default value.

Release note (general change): IMPORT INTO now supports `unique_rowid()`
as a default expression.
  • Loading branch information
anzoteh96 committed Jul 27, 2020
1 parent 069d328 commit df25b0c
Show file tree
Hide file tree
Showing 4 changed files with 256 additions and 89 deletions.
82 changes: 76 additions & 6 deletions pkg/ccl/importccl/import_stmt_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2946,6 +2946,10 @@ func BenchmarkCSVConvertRecord(b *testing.B) {
b.ReportAllocs()
}

func selectNotNull(col string) string {
return fmt.Sprintf(`SELECT %s FROM t WHERE %s IS NOT NULL`, col, col)
}

// Test that IMPORT INTO works when columns with default expressions are present.
// The default expressions supported by IMPORT INTO are constant expressions,
// which are literals and functions that always return the same value given the
Expand All @@ -2958,6 +2962,10 @@ func TestImportDefault(t *testing.T) {
defer log.Scope(t).Close(t)

const nodes = 3
numFiles := nodes + 2
rowsPerFile := 1000
rowsPerRaceFile := 16
testFiles := makeCSVData(t, numFiles, rowsPerFile, nodes, rowsPerRaceFile)

ctx := context.Background()
baseDir := filepath.Join("testdata", "csv")
Expand Down Expand Up @@ -3231,6 +3239,59 @@ func TestImportDefault(t *testing.T) {
})
}
})
t.Run("unique_rowid", func(t *testing.T) {
const M = int(1e9 + 7) // Remainder for unique_rowid addition.
testCases := []struct {
name string
create string
targetCols []string
insert string
rowIDCols []string
}{
{
name: "multiple_unique_rowid",
create: "a INT DEFAULT unique_rowid(), b INT, c STRING, d INT DEFAULT unique_rowid()",
targetCols: []string{"b", "c"},
insert: "INSERT INTO t (b, c) VALUES (3, 'CAT'), (4, 'DOG')",
rowIDCols: []string{selectNotNull("a"), selectNotNull("d")},
},
{
name: "unique_rowid_with_pk",
create: "a INT DEFAULT unique_rowid(), b INT PRIMARY KEY, c STRING",
targetCols: []string{"b", "c"},
insert: "INSERT INTO t (b, c) VALUES (-3, 'CAT'), (-4, 'DOG')",
rowIDCols: []string{selectNotNull("a")},
},
{
name: "rowid+rowid",
create: fmt.Sprintf(
`a INT DEFAULT (unique_rowid() %% %d) + (unique_rowid() %% %d), b INT PRIMARY KEY, c STRING`, M, M),
targetCols: []string{"b", "c"},
rowIDCols: []string{selectNotNull("a")},
},
}
for _, test := range testCases {
t.Run(test.name, func(t *testing.T) {
defer sqlDB.Exec(t, `DROP TABLE t`)
sqlDB.Exec(t, fmt.Sprintf(`CREATE TABLE t(%s)`, test.create))
if test.insert != "" {
sqlDB.Exec(t, test.insert)
}
sqlDB.Exec(t, fmt.Sprintf(`IMPORT INTO t (%s) CSV DATA (%s)`,
strings.Join(test.targetCols, ", "),
strings.Join(testFiles.files, ", ")))
var numDistinctRows int
sqlDB.QueryRow(t,
fmt.Sprintf(`SELECT DISTINCT COUNT (*) FROM (%s)`,
strings.Join(test.rowIDCols, " UNION ")),
).Scan(&numDistinctRows)
var numRows int
sqlDB.QueryRow(t, `SELECT COUNT (*) FROM t`).Scan(&numRows)
require.Equal(t, numDistinctRows, len(test.rowIDCols)*numRows)
})

}
})
}

// goos: darwin
Expand Down Expand Up @@ -4369,14 +4430,23 @@ func TestImportPgDumpGeo(t *testing.T) {

// Verify both created tables are identical.
importCreate := sqlDB.QueryStr(t, "SELECT create_statement FROM [SHOW CREATE importdb.nyc_census_blocks]")
// Families are slightly different due to the geom column being last
// in exec and rowid being last in import, so swap that in import to
// match exec.
importCreate[0][0] = strings.Replace(importCreate[0][0], "geom, rowid", "rowid, geom", 1)
// Families are slightly different due to that rowid shows up in exec
// but not import (possibly due to the ALTER TABLE statement that makes
// gid a primary key), so add that into import to match exec.
importCreate[0][0] = strings.Replace(importCreate[0][0], "boroname, geom", "boroname, rowid, geom", 1)
sqlDB.CheckQueryResults(t, "SELECT create_statement FROM [SHOW CREATE execdb.nyc_census_blocks]", importCreate)

importSelect := sqlDB.QueryStr(t, "SELECT * FROM importdb.nyc_census_blocks ORDER BY PRIMARY KEY importdb.nyc_census_blocks")
sqlDB.CheckQueryResults(t, "SELECT * FROM execdb.nyc_census_blocks ORDER BY PRIMARY KEY execdb.nyc_census_blocks", importSelect)
// Drop the comparison of gid for import vs exec, then check that gid
// in import is indeed valid rowid.
importCols := "blkid, popn_total, popn_white, popn_black, popn_nativ, popn_asian, popn_other, boroname"
importSelect := sqlDB.QueryStr(t, fmt.Sprintf(
"SELECT (%s) FROM importdb.nyc_census_blocks ORDER BY PRIMARY KEY importdb.nyc_census_blocks",
importCols,
))
sqlDB.CheckQueryResults(t, fmt.Sprintf(
"SELECT (%s) FROM execdb.nyc_census_blocks ORDER BY PRIMARY KEY execdb.nyc_census_blocks",
importCols,
), importSelect)
}

func TestImportCockroachDump(t *testing.T) {
Expand Down
2 changes: 1 addition & 1 deletion pkg/ccl/importccl/read_import_pgdump.go
Original file line number Diff line number Diff line change
Expand Up @@ -508,7 +508,7 @@ func newPgDumpReader(
for i, col := range table.Desc.VisibleColumns() {
colSubMap[col.Name] = i
}
conv, err := row.NewDatumRowConverter(ctx, table.Desc, targetCols, evalCtx, kvCh)
conv, err := row.NewDatumRowConverter(ctx, table.Desc, targetCols, evalCtx.Copy(), kvCh)
if err != nil {
return nil, err
}
Expand Down
8 changes: 2 additions & 6 deletions pkg/ccl/importccl/testdata/pgdump/geo.sql
Original file line number Diff line number Diff line change
@@ -1,11 +1,7 @@
-- The two comments below removing gid are there because IMPORT doesn't
-- support DEFAULT functions (#48253). This function is otherwise exactly
-- what shp2pgsql produces.

SET CLIENT_ENCODING TO UTF8;
SET STANDARD_CONFORMING_STRINGS TO ON;
BEGIN;
CREATE TABLE "nyc_census_blocks" (--gid serial,
CREATE TABLE "nyc_census_blocks" (gid serial,
"blkid" varchar(15),
"popn_total" float8,
"popn_white" float8,
Expand All @@ -14,7 +10,7 @@ CREATE TABLE "nyc_census_blocks" (--gid serial,
"popn_asian" float8,
"popn_other" float8,
"boroname" varchar(32));
--ALTER TABLE "nyc_census_blocks" ADD PRIMARY KEY (gid);
ALTER TABLE "nyc_census_blocks" ADD PRIMARY KEY (gid);
SELECT AddGeometryColumn('','nyc_census_blocks','geom','26918','MULTIPOLYGON',2);
INSERT INTO "nyc_census_blocks" ("blkid","popn_total","popn_white","popn_black","popn_nativ","popn_asian","popn_other","boroname",geom) VALUES ('360850009001000','97','51','32','1','5','8','Staten Island','010600002026690000010000000103000000010000000A00000051AC161881A22141A31409CF1F2A51415F4321458DA2214100102A3F1D2A51418C34807C0BA221414E3E89F5122A5141782D605495A12141780D1CE92A2A51410D1C9C6770A121410F2D6074322A5141441560E0B0A02141A00099C72F2A51412365B4789AA021419F60A7BB342A514160E3E8FA66A0214118B4C0CE402A5141EA4BF3EEC7A12141A3023D61452A514151AC161881A22141A31409CF1F2A5141');
INSERT INTO "nyc_census_blocks" ("blkid","popn_total","popn_white","popn_black","popn_nativ","popn_asian","popn_other","boroname",geom) VALUES ('360850020011000','66','52','2','0','7','5','Staten Island','0106000020266900000100000001030000000100000007000000083B4A6F79A8214127EC57B49926514151B51BB7CEA72141B2EAD6F38A2651416F429640B9A72141449FCB1C89265141163AA64D56A72141B89E2B7C9B26514150509213EDA72141DCC9A351A826514184FA4C6017A82141B9AE24F0AB265141083B4A6F79A8214127EC57B499265141');
Expand Down
Loading

0 comments on commit df25b0c

Please sign in to comment.