Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

importccl: support unique_rowid() as default expression for IMPORT INTO #50922

Merged
merged 1 commit into from
Jul 28, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 79 additions & 6 deletions pkg/ccl/importccl/import_stmt_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2946,6 +2946,10 @@ func BenchmarkCSVConvertRecord(b *testing.B) {
b.ReportAllocs()
}

func selectNotNull(col string) string {
return fmt.Sprintf(`SELECT %s FROM t WHERE %s IS NOT NULL`, col, col)
}

// Test that IMPORT INTO works when columns with default expressions are present.
// The default expressions supported by IMPORT INTO are constant expressions,
// which are literals and functions that always return the same value given the
Expand All @@ -2958,6 +2962,10 @@ func TestImportDefault(t *testing.T) {
defer log.Scope(t).Close(t)

const nodes = 3
numFiles := nodes + 2
rowsPerFile := 1000
rowsPerRaceFile := 16
testFiles := makeCSVData(t, numFiles, rowsPerFile, nodes, rowsPerRaceFile)

ctx := context.Background()
baseDir := filepath.Join("testdata", "csv")
Expand Down Expand Up @@ -3231,6 +3239,64 @@ func TestImportDefault(t *testing.T) {
})
}
})
t.Run("unique_rowid", func(t *testing.T) {
const M = int(1e9 + 7) // Remainder for unique_rowid addition.
testCases := []struct {
name string
create string
targetCols []string
insert string
rowIDCols []string
}{
{
name: "multiple_unique_rowid",
create: "a INT DEFAULT unique_rowid(), b INT, c STRING, d INT DEFAULT unique_rowid()",
targetCols: []string{"b", "c"},
insert: "INSERT INTO t (b, c) VALUES (3, 'CAT'), (4, 'DOG')",
rowIDCols: []string{selectNotNull("a"), selectNotNull("d")},
},
{
name: "unique_rowid_with_pk",
create: "a INT DEFAULT unique_rowid(), b INT PRIMARY KEY, c STRING",
targetCols: []string{"b", "c"},
insert: "INSERT INTO t (b, c) VALUES (-3, 'CAT'), (-4, 'DOG')",
rowIDCols: []string{selectNotNull("a")},
},
{
// unique_rowid()+unique_rowid() won't work as the rowid produced by import
// has its leftmost bit set to 1, and adding them causes overflow. A way to
// get around is to have each unique_rowid() modulo a number, M. Here M = 1e9+7
// is used here given that it's big enough and is a prime, which is
// generally effective in avoiding collisions.
name: "rowid+rowid",
create: fmt.Sprintf(
`a INT DEFAULT (unique_rowid() %% %d) + (unique_rowid() %% %d), b INT PRIMARY KEY, c STRING`, M, M),
targetCols: []string{"b", "c"},
rowIDCols: []string{selectNotNull("a")},
},
}
for _, test := range testCases {
t.Run(test.name, func(t *testing.T) {
defer sqlDB.Exec(t, `DROP TABLE t`)
sqlDB.Exec(t, fmt.Sprintf(`CREATE TABLE t(%s)`, test.create))
if test.insert != "" {
sqlDB.Exec(t, test.insert)
}
sqlDB.Exec(t, fmt.Sprintf(`IMPORT INTO t (%s) CSV DATA (%s)`,
strings.Join(test.targetCols, ", "),
strings.Join(testFiles.files, ", ")))
var numDistinctRows int
sqlDB.QueryRow(t,
fmt.Sprintf(`SELECT DISTINCT COUNT (*) FROM (%s)`,
strings.Join(test.rowIDCols, " UNION ")),
).Scan(&numDistinctRows)
var numRows int
sqlDB.QueryRow(t, `SELECT COUNT (*) FROM t`).Scan(&numRows)
require.Equal(t, numDistinctRows, len(test.rowIDCols)*numRows)
})

}
})
}

// goos: darwin
Expand Down Expand Up @@ -4369,14 +4435,21 @@ func TestImportPgDumpGeo(t *testing.T) {

// Verify both created tables are identical.
importCreate := sqlDB.QueryStr(t, "SELECT create_statement FROM [SHOW CREATE importdb.nyc_census_blocks]")
// Families are slightly different due to the geom column being last
// in exec and rowid being last in import, so swap that in import to
// match exec.
importCreate[0][0] = strings.Replace(importCreate[0][0], "geom, rowid", "rowid, geom", 1)
// Families are slightly different due to rowid showing up in exec but
// not import (possibly due to the ALTER TABLE statement that makes
// gid a primary key), so add that into import to match exec.
importCreate[0][0] = strings.Replace(importCreate[0][0], "boroname, geom", "boroname, rowid, geom", 1)
sqlDB.CheckQueryResults(t, "SELECT create_statement FROM [SHOW CREATE execdb.nyc_census_blocks]", importCreate)

importSelect := sqlDB.QueryStr(t, "SELECT * FROM importdb.nyc_census_blocks ORDER BY PRIMARY KEY importdb.nyc_census_blocks")
sqlDB.CheckQueryResults(t, "SELECT * FROM execdb.nyc_census_blocks ORDER BY PRIMARY KEY execdb.nyc_census_blocks", importSelect)
importCols := "blkid, popn_total, popn_white, popn_black, popn_nativ, popn_asian, popn_other, boroname"
importSelect := sqlDB.QueryStr(t, fmt.Sprintf(
"SELECT (%s) FROM importdb.nyc_census_blocks ORDER BY PRIMARY KEY importdb.nyc_census_blocks",
importCols,
))
sqlDB.CheckQueryResults(t, fmt.Sprintf(
"SELECT (%s) FROM execdb.nyc_census_blocks ORDER BY PRIMARY KEY execdb.nyc_census_blocks",
importCols,
), importSelect)
}

func TestImportCockroachDump(t *testing.T) {
Expand Down
2 changes: 1 addition & 1 deletion pkg/ccl/importccl/read_import_base.go
Original file line number Diff line number Diff line change
Expand Up @@ -408,7 +408,7 @@ func makeDatumConverter(
ctx context.Context, importCtx *parallelImportContext, fileCtx *importFileContext,
) (*row.DatumRowConverter, error) {
conv, err := row.NewDatumRowConverter(
ctx, importCtx.tableDesc, importCtx.targetCols, importCtx.evalCtx.Copy(), importCtx.kvCh)
ctx, importCtx.tableDesc, importCtx.targetCols, importCtx.evalCtx, importCtx.kvCh)
if err == nil {
conv.KvBatch.Source = fileCtx.source
}
Expand Down
8 changes: 2 additions & 6 deletions pkg/ccl/importccl/testdata/pgdump/geo.sql
Original file line number Diff line number Diff line change
@@ -1,11 +1,7 @@
-- The two comments below removing gid are there because IMPORT doesn't
-- support DEFAULT functions (#48253). This function is otherwise exactly
-- what shp2pgsql produces.

SET CLIENT_ENCODING TO UTF8;
SET STANDARD_CONFORMING_STRINGS TO ON;
BEGIN;
CREATE TABLE "nyc_census_blocks" (--gid serial,
CREATE TABLE "nyc_census_blocks" (gid serial,
"blkid" varchar(15),
"popn_total" float8,
"popn_white" float8,
Expand All @@ -14,7 +10,7 @@ CREATE TABLE "nyc_census_blocks" (--gid serial,
"popn_asian" float8,
"popn_other" float8,
"boroname" varchar(32));
--ALTER TABLE "nyc_census_blocks" ADD PRIMARY KEY (gid);
ALTER TABLE "nyc_census_blocks" ADD PRIMARY KEY (gid);
SELECT AddGeometryColumn('','nyc_census_blocks','geom','26918','MULTIPOLYGON',2);
INSERT INTO "nyc_census_blocks" ("blkid","popn_total","popn_white","popn_black","popn_nativ","popn_asian","popn_other","boroname",geom) VALUES ('360850009001000','97','51','32','1','5','8','Staten Island','010600002026690000010000000103000000010000000A00000051AC161881A22141A31409CF1F2A51415F4321458DA2214100102A3F1D2A51418C34807C0BA221414E3E89F5122A5141782D605495A12141780D1CE92A2A51410D1C9C6770A121410F2D6074322A5141441560E0B0A02141A00099C72F2A51412365B4789AA021419F60A7BB342A514160E3E8FA66A0214118B4C0CE402A5141EA4BF3EEC7A12141A3023D61452A514151AC161881A22141A31409CF1F2A5141');
INSERT INTO "nyc_census_blocks" ("blkid","popn_total","popn_white","popn_black","popn_nativ","popn_asian","popn_other","boroname",geom) VALUES ('360850020011000','66','52','2','0','7','5','Staten Island','0106000020266900000100000001030000000100000007000000083B4A6F79A8214127EC57B49926514151B51BB7CEA72141B2EAD6F38A2651416F429640B9A72141449FCB1C89265141163AA64D56A72141B89E2B7C9B26514150509213EDA72141DCC9A351A826514184FA4C6017A82141B9AE24F0AB265141083B4A6F79A8214127EC57B499265141');
Expand Down
Loading