Skip to content

Commit

Permalink
Merge #50295
Browse files Browse the repository at this point in the history
50295: importccl: support DEFAULT columns for constant DEFAULT expressions r=Anzoteh96 a=Anzoteh96

Currently, `IMPORT` does not support importing DEFAULT columns, and non-targeted columns must be nullable. 

This PR adds support for `IMPORT INTO` that involves non-targeted columns with DEFAULT expressions that are constant. Constant expressions are those that return the same value in different statements. Some examples are: 
 -- Literals (booleans, strings, integers, decimals, dates)
 -- Functions where each argument is a constant expression and the functions themselves depend solely on their arguments. Examples are arithmetic operations, boolean logical operations (AND, OR), string operations like concat and repeat. 

In particular, functions like `now()` and `nextval` that return different values across different transactions are not supported. 

This is done by evaluating the required default expressions in the function `sql/row/row_converter:NewDatumRowConverter`. 

To illustrate this, given file.csv with content:
```
35,test
72,hello
```
this will be the expected behaviour of the following line: 
```
> CREATE TABLE t (a INT DEFAULT 53, b STRING, c INT DEFAULT 42);
> IMPORT INTO t (a, b) CSV DATA ('nodelocal://1/file.csv');
> SELECT * FROM t;
 a |  b | c
----+----- + ------
  35 |  test | 42
  72 | hello | 42
```
Note that the default expression for (c) is used since it's non-targeted, but the default expression for (a) is ignored in favour of the column in the csv. 

Fixes #48253

Release note (general change): `IMPORT INTO` now supports columns with constant default expressions, and non-targeted columns with constant default expressions are no longer required to be nullable. 

Co-authored-by: anzoteh96 <[email protected]>
  • Loading branch information
craig[bot] and anzoteh96 committed Jul 1, 2020
2 parents 54c7f57 + 101730a commit 9ac59f1
Show file tree
Hide file tree
Showing 3 changed files with 184 additions and 29 deletions.
21 changes: 11 additions & 10 deletions pkg/ccl/importccl/import_stmt.go
Original file line number Diff line number Diff line change
Expand Up @@ -587,16 +587,17 @@ func importPlanHook(
intoCols = append(intoCols, active[0].Name)
}

// IMPORT INTO does not support columns with DEFAULT expressions. Ensure
// that all non-target columns are nullable until we support DEFAULT
// expressions.
for _, col := range found.VisibleColumns() {
if col.HasDefault() {
return errors.Errorf("cannot IMPORT INTO a table with a DEFAULT expression for any of its columns")
}

if len(isTargetCol) != 0 && !isTargetCol[col.Name] && !col.IsNullable() {
return errors.Errorf("all non-target columns in IMPORT INTO must be nullable")
// Ensure that non-target columns that don't have default
// expressions are nullable.
if len(isTargetCol) != 0 {
for _, col := range found.VisibleColumns() {
if !(isTargetCol[col.Name] || col.IsNullable() || col.HasDefault()) {
return errors.Newf(
"all non-target columns in IMPORT INTO must be nullable "+
"or have default expressions but violated by column %q",
col.Name,
)
}
}
}

Expand Down
155 changes: 140 additions & 15 deletions pkg/ccl/importccl/import_stmt_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2472,22 +2472,147 @@ func TestImportIntoCSV(t *testing.T) {
}
})

// IMPORT INTO does not support DEFAULT expressions for either target or
// non-target columns.
t.Run("import-into-check-no-default-cols", func(t *testing.T) {
sqlDB.Exec(t, `CREATE TABLE t (a INT DEFAULT 1, b STRING)`)
defer sqlDB.Exec(t, `DROP TABLE t`)

// Insert the test data
insert := []string{"''", "'text'", "'a'", "'e'", "'l'", "'t'", "'z'"}

for i, v := range insert {
sqlDB.Exec(t, "INSERT INTO t (a, b) VALUES ($1, $2)", i, v)
}
// Test that IMPORT INTO works when columns with default expressions are present.
// The default expressions supported by IMPORT INTO are constant expressions,
// which are literals and functions that always return the same value given the
// same arguments (examples of non-constant expressions are given in the last two
// subtests below). The default expression of a column is used when this column is not
// targeted; otherwise, data from source file (like CSV) is used. It also checks
// that IMPORT TABLE works when there are default columns.
t.Run("import-into-default", func(t *testing.T) {
var data string
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.Method == "GET" {
_, _ = w.Write([]byte(data))
}
}))
defer srv.Close()
t.Run("is-not-target", func(t *testing.T) {
data = "1\n2"
sqlDB.Exec(t, `CREATE TABLE t (b INT DEFAULT 42, a INT)`)
defer sqlDB.Exec(t, `DROP TABLE t`)
sqlDB.Exec(t, fmt.Sprintf(`IMPORT INTO t (a) CSV DATA ("%s")`, srv.URL))
sqlDB.CheckQueryResults(t, `SELECT * FROM t`, [][]string{{"42", "1"}, {"42", "2"}})
})
t.Run("is-not-target-not-null", func(t *testing.T) {
data = "1\n2"
sqlDB.Exec(t, `CREATE TABLE t (a INT, b INT DEFAULT 42 NOT NULL)`)
defer sqlDB.Exec(t, `DROP TABLE t`)
sqlDB.Exec(t, fmt.Sprintf(`IMPORT INTO t (a) CSV DATA ("%s")`, srv.URL))
sqlDB.CheckQueryResults(t, `SELECT * FROM t`, [][]string{{"1", "42"}, {"2", "42"}})
})
t.Run("is-target", func(t *testing.T) {
data = "1,36\n2,37"
sqlDB.Exec(t, `CREATE TABLE t (a INT, b INT DEFAULT 42)`)
defer sqlDB.Exec(t, `DROP TABLE t`)
sqlDB.Exec(t, fmt.Sprintf(`IMPORT INTO t (a, b) CSV DATA ("%s")`, srv.URL))
sqlDB.CheckQueryResults(t, `SELECT * FROM t`, [][]string{{"1", "36"}, {"2", "37"}})
})
t.Run("is-target-with-null-data", func(t *testing.T) {
data = ",36\n2,"
sqlDB.Exec(t, `CREATE TABLE t (a INT, b INT DEFAULT 42)`)
defer sqlDB.Exec(t, `DROP TABLE t`)
sqlDB.Exec(t, fmt.Sprintf(`IMPORT INTO t (a, b) CSV DATA ("%s") WITH nullif = ''`, srv.URL))
sqlDB.CheckQueryResults(t, `SELECT * FROM t`, [][]string{{"NULL", "36"}, {"2", "NULL"}})
})
t.Run("mixed-target-and-non-target", func(t *testing.T) {
data = "35,test string\n72,another test string"
sqlDB.Exec(t, `CREATE TABLE t (b STRING, a INT DEFAULT 53, c INT DEFAULT 42)`)
defer sqlDB.Exec(t, `DROP TABLE t`)
sqlDB.Exec(t, fmt.Sprintf(`IMPORT INTO t (a, b) CSV DATA ("%s")`, srv.URL))
sqlDB.CheckQueryResults(t, `SELECT * FROM t`, [][]string{{"test string", "35", "42"}, {"another test string", "72", "42"}})
})
t.Run("with-import-table", func(t *testing.T) {
data = "35,string1,65\n72,string2,17"
sqlDB.Exec(t, fmt.Sprintf(
`IMPORT TABLE t (a INT, b STRING, c INT DEFAULT 33)
CSV DATA ("%s")`,
srv.URL,
))
defer sqlDB.Exec(t, `DROP TABLE t`)
data = "11,string3\n29,string4"
sqlDB.Exec(t, fmt.Sprintf(`IMPORT INTO t (a, b) CSV DATA ("%s")`, srv.URL))
sqlDB.CheckQueryResults(t, `SELECT * FROM t`, [][]string{
{"35", "string1", "65"},
{"72", "string2", "17"},
{"11", "string3", "33"},
{"29", "string4", "33"}})
})
t.Run("null-as-default", func(t *testing.T) {
data = "1\n2\n3"
sqlDB.Exec(t, fmt.Sprintf(`CREATE TABLE t (a INT DEFAULT NULL, b INT)`))
defer sqlDB.Exec(t, `DROP TABLE t`)
sqlDB.Exec(t, fmt.Sprintf(`IMPORT INTO t (b) CSV DATA ("%s")`, srv.URL))
sqlDB.CheckQueryResults(t, `SELECT * FROM t`, [][]string{{"NULL", "1"}, {"NULL", "2"}, {"NULL", "3"}})
})
t.Run("default-value-change", func(t *testing.T) {
data = "1\n2"
sqlDB.Exec(t, `CREATE TABLE t (a INT, b INT DEFAULT 7)`)
defer sqlDB.Exec(t, `DROP TABLE t`)
sqlDB.Exec(t, fmt.Sprintf(`IMPORT INTO t (a) CSV DATA ("%s")`, srv.URL))
data = "3\n4"
sqlDB.Exec(t, `ALTER TABLE t ALTER COLUMN b SET DEFAULT 8`)
sqlDB.Exec(t, fmt.Sprintf(`IMPORT INTO t (a) CSV DATA ("%s")`, srv.URL))
sqlDB.CheckQueryResults(t, `SELECT * FROM t`, [][]string{{"1", "7"}, {"2", "7"}, {"3", "8"}, {"4", "8"}})
})
t.Run("math-constant", func(t *testing.T) {
data = "35\n67"
sqlDB.Exec(t, `CREATE TABLE t (a INT, b FLOAT DEFAULT round(pi()))`)
defer sqlDB.Exec(t, `DROP TABLE t`)
sqlDB.Exec(t, fmt.Sprintf(`IMPORT INTO t (a) CSV DATA ("%s")`, srv.URL))
sqlDB.CheckQueryResults(t, `SELECT * FROM t`, [][]string{
{"35", "3"},
{"67", "3"}})
})
t.Run("string-function", func(t *testing.T) {
data = "1\n2"
sqlDB.Exec(t, `CREATE TABLE t (a INT, b STRING DEFAULT repeat('dog', 2))`)
defer sqlDB.Exec(t, `DROP TABLE t`)
sqlDB.Exec(t, fmt.Sprintf(`IMPORT INTO t (a) CSV DATA ("%s")`, srv.URL))
sqlDB.CheckQueryResults(t, `SELECT * FROM t`, [][]string{
{"1", "dogdog"},
{"2", "dogdog"}})
})
t.Run("arithmetic", func(t *testing.T) {
data = "35\n67"
sqlDB.Exec(t, `CREATE TABLE t (a INT, b INT DEFAULT 34 * 3)`)
defer sqlDB.Exec(t, `DROP TABLE t`)
sqlDB.Exec(t, fmt.Sprintf(`IMPORT INTO t (a) CSV DATA ("%s")`, srv.URL))
sqlDB.CheckQueryResults(t, `SELECT * FROM t`, [][]string{
{"35", "102"},
{"67", "102"}})
})
t.Run("sequence-impure", func(t *testing.T) {
data = "1\n2"
sqlDB.Exec(t, `CREATE SEQUENCE testseq`)
defer sqlDB.Exec(t, `DROP TABLE t`)
sqlDB.Exec(t, `CREATE TABLE t(a INT, b INT DEFAULT nextval('testseq'))`)
sqlDB.ExpectErr(t,
fmt.Sprintf(`non-constant default expression .* for non-targeted column "b" is not supported by IMPORT INTO`),
fmt.Sprintf(`IMPORT INTO t (a) CSV DATA ("%s")`, srv.URL))
})
t.Run("now-impure", func(t *testing.T) {
data = "1\n2"
sqlDB.Exec(t, `CREATE TABLE t(a INT, b TIMESTAMP DEFAULT now())`)
defer sqlDB.Exec(t, `DROP TABLE t`)
sqlDB.ExpectErr(t,
fmt.Sprintf(`non-constant default expression .* for non-targeted column "b" is not supported by IMPORT INTO`),
fmt.Sprintf(`IMPORT INTO t (a) CSV DATA ("%s")`, srv.URL))
})
})

sqlDB.ExpectErr(
t, fmt.Sprintf("pq: cannot IMPORT INTO a table with a DEFAULT expression for any of its columns"),
fmt.Sprintf(`IMPORT INTO t (a) CSV DATA (%s)`, testFiles.files[0]),
t.Run("import-not-targeted-not-null", func(t *testing.T) {
sqlDB.Exec(t, `CREATE TABLE t (a INT, b INT NOT NULL)`)
const data = "1\n2\n3"
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.Method == "GET" {
_, _ = w.Write([]byte(data))
}
}))
defer srv.Close()
defer sqlDB.Exec(t, `DROP TABLE t`)
sqlDB.ExpectErr(t, `violated by column "b"`,
fmt.Sprintf(`IMPORT INTO t (a) CSV DATA ("%s")`, srv.URL),
)
})

Expand Down
37 changes: 33 additions & 4 deletions pkg/sql/row/row_converter.go
Original file line number Diff line number Diff line change
Expand Up @@ -267,10 +267,6 @@ func NewDatumRowConverter(

var txCtx transform.ExprTransformContext
semaCtx := tree.MakeSemaContext()
// We do not currently support DEFAULT expressions on target or non-target
// columns. All non-target columns must be nullable and will be set to NULL
// during import. We do however support DEFAULT on hidden columns (which is
// only the default _rowid one). This allows those expressions to run.
cols, defaultExprs, err := sqlbase.ProcessDefaultColumns(ctx, targetColDescriptors, immutDesc, &txCtx, c.EvalCtx, &semaCtx)
if err != nil {
return nil, errors.Wrap(err, "process default columns")
Expand Down Expand Up @@ -301,6 +297,11 @@ func NewDatumRowConverter(
c.Datums = make([]tree.Datum, len(targetColDescriptors), len(cols))

// Check for a hidden column. This should be the unique_rowid PK if present.
// In addition, check for non-targeted columns with non-null DEFAULT expressions.
isTargetCol := func(col *sqlbase.ColumnDescriptor) bool {
_, ok := isTargetColID[col.ID]
return ok
}
c.hidden = -1
for i := range cols {
col := &cols[i]
Expand All @@ -310,6 +311,23 @@ func NewDatumRowConverter(
}
c.hidden = i
c.Datums = append(c.Datums, nil)
} else {
if !isTargetCol(col) && col.DefaultExpr != nil {
// Check if the default expression is a constant expression as we do not
// support non-constant default expressions for non-target columns in IMPORT INTO.
//
// TODO (anzoteh96): add support to non-constant default expressions. Perhaps
// we can start with those with Stable volatility, like now().
if !tree.IsConst(evalCtx, defaultExprs[i]) {
return nil, errors.Newf(
"non-constant default expression %s for non-targeted column %q is not supported by IMPORT INTO",
defaultExprs[i].String(),
col.Name)
}
// Placeholder for columns with default values that will be evaluated when
// each import row is being created.
c.Datums = append(c.Datums, nil)
}
}
}
if len(c.Datums) != len(cols) {
Expand Down Expand Up @@ -364,6 +382,17 @@ func (c *DatumRowConverter) Row(ctx context.Context, sourceID int32, rowIndex in
c.Datums[c.hidden] = tree.NewDInt(tree.DInt(avoidCollisionsWithSQLsIDs | rowID))
}

for i := range c.cols {
col := &c.cols[i]
if _, ok := c.IsTargetCol[i]; !ok && !col.Hidden && col.DefaultExpr != nil {
datum, err := c.defaultExprs[i].Eval(c.EvalCtx)
if err != nil {
return errors.Wrapf(err, "error evaluating default expression for IMPORT INTO")
}
c.Datums[i] = datum
}
}

// TODO(justin): we currently disallow computed columns in import statements.
var computeExprs []tree.TypedExpr
var computedCols []sqlbase.ColumnDescriptor
Expand Down

0 comments on commit 9ac59f1

Please sign in to comment.