From 101730a8980407677a3963e401d80e4e5a56b066 Mon Sep 17 00:00:00 2001 From: anzoteh96 Date: Thu, 11 Jun 2020 15:03:23 -0400 Subject: [PATCH] importccl: support DEFAULT columns for constant DEFAULT expressions. Currently, IMPORT does not support importing DEFAULT columns, and non-targeted columns must be nullable. This PR adds support for IMPORT INTO that involves non-targeted columns with DEFAULT expressions that are constant. Constant expressions are literals and functions that depend solely on their arguments (with arguments also being constant expressions). This is done by evaluating the required default expressions in the function sql/row/row_converter:NewDatumRowConverter. Release note (general change): IMPORT INTO now supports columns with constant default expressions, and non-targeted columns with constant default expressions are no longer required to be nullable. --- pkg/ccl/importccl/import_stmt.go | 21 ++-- pkg/ccl/importccl/import_stmt_test.go | 155 +++++++++++++++++++++++--- pkg/sql/row/row_converter.go | 37 +++++- 3 files changed, 184 insertions(+), 29 deletions(-) diff --git a/pkg/ccl/importccl/import_stmt.go b/pkg/ccl/importccl/import_stmt.go index aa95ba17a3c7..91abb53a2645 100644 --- a/pkg/ccl/importccl/import_stmt.go +++ b/pkg/ccl/importccl/import_stmt.go @@ -587,16 +587,17 @@ func importPlanHook( intoCols = append(intoCols, active[0].Name) } - // IMPORT INTO does not support columns with DEFAULT expressions. Ensure - // that all non-target columns are nullable until we support DEFAULT - // expressions. - for _, col := range found.VisibleColumns() { - if col.HasDefault() { - return errors.Errorf("cannot IMPORT INTO a table with a DEFAULT expression for any of its columns") - } - - if len(isTargetCol) != 0 && !isTargetCol[col.Name] && !col.IsNullable() { - return errors.Errorf("all non-target columns in IMPORT INTO must be nullable") + // Ensure that non-target columns that don't have default + // expressions are nullable. + if len(isTargetCol) != 0 { + for _, col := range found.VisibleColumns() { + if !(isTargetCol[col.Name] || col.IsNullable() || col.HasDefault()) { + return errors.Newf( + "all non-target columns in IMPORT INTO must be nullable "+ + "or have default expressions but violated by column %q", + col.Name, + ) + } } } diff --git a/pkg/ccl/importccl/import_stmt_test.go b/pkg/ccl/importccl/import_stmt_test.go index d040c2946dd9..d0e1c836d15e 100644 --- a/pkg/ccl/importccl/import_stmt_test.go +++ b/pkg/ccl/importccl/import_stmt_test.go @@ -2472,22 +2472,147 @@ func TestImportIntoCSV(t *testing.T) { } }) - // IMPORT INTO does not support DEFAULT expressions for either target or - // non-target columns. - t.Run("import-into-check-no-default-cols", func(t *testing.T) { - sqlDB.Exec(t, `CREATE TABLE t (a INT DEFAULT 1, b STRING)`) - defer sqlDB.Exec(t, `DROP TABLE t`) - - // Insert the test data - insert := []string{"''", "'text'", "'a'", "'e'", "'l'", "'t'", "'z'"} - - for i, v := range insert { - sqlDB.Exec(t, "INSERT INTO t (a, b) VALUES ($1, $2)", i, v) - } + // Test that IMPORT INTO works when columns with default expressions are present. + // The default expressions supported by IMPORT INTO are constant expressions, + // which are literals and functions that always return the same value given the + // same arguments (examples of non-constant expressions are given in the last two + // subtests below). The default expression of a column is used when this column is not + // targeted; otherwise, data from source file (like CSV) is used. It also checks + // that IMPORT TABLE works when there are default columns. + t.Run("import-into-default", func(t *testing.T) { + var data string + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.Method == "GET" { + _, _ = w.Write([]byte(data)) + } + })) + defer srv.Close() + t.Run("is-not-target", func(t *testing.T) { + data = "1\n2" + sqlDB.Exec(t, `CREATE TABLE t (b INT DEFAULT 42, a INT)`) + defer sqlDB.Exec(t, `DROP TABLE t`) + sqlDB.Exec(t, fmt.Sprintf(`IMPORT INTO t (a) CSV DATA ("%s")`, srv.URL)) + sqlDB.CheckQueryResults(t, `SELECT * FROM t`, [][]string{{"42", "1"}, {"42", "2"}}) + }) + t.Run("is-not-target-not-null", func(t *testing.T) { + data = "1\n2" + sqlDB.Exec(t, `CREATE TABLE t (a INT, b INT DEFAULT 42 NOT NULL)`) + defer sqlDB.Exec(t, `DROP TABLE t`) + sqlDB.Exec(t, fmt.Sprintf(`IMPORT INTO t (a) CSV DATA ("%s")`, srv.URL)) + sqlDB.CheckQueryResults(t, `SELECT * FROM t`, [][]string{{"1", "42"}, {"2", "42"}}) + }) + t.Run("is-target", func(t *testing.T) { + data = "1,36\n2,37" + sqlDB.Exec(t, `CREATE TABLE t (a INT, b INT DEFAULT 42)`) + defer sqlDB.Exec(t, `DROP TABLE t`) + sqlDB.Exec(t, fmt.Sprintf(`IMPORT INTO t (a, b) CSV DATA ("%s")`, srv.URL)) + sqlDB.CheckQueryResults(t, `SELECT * FROM t`, [][]string{{"1", "36"}, {"2", "37"}}) + }) + t.Run("is-target-with-null-data", func(t *testing.T) { + data = ",36\n2," + sqlDB.Exec(t, `CREATE TABLE t (a INT, b INT DEFAULT 42)`) + defer sqlDB.Exec(t, `DROP TABLE t`) + sqlDB.Exec(t, fmt.Sprintf(`IMPORT INTO t (a, b) CSV DATA ("%s") WITH nullif = ''`, srv.URL)) + sqlDB.CheckQueryResults(t, `SELECT * FROM t`, [][]string{{"NULL", "36"}, {"2", "NULL"}}) + }) + t.Run("mixed-target-and-non-target", func(t *testing.T) { + data = "35,test string\n72,another test string" + sqlDB.Exec(t, `CREATE TABLE t (b STRING, a INT DEFAULT 53, c INT DEFAULT 42)`) + defer sqlDB.Exec(t, `DROP TABLE t`) + sqlDB.Exec(t, fmt.Sprintf(`IMPORT INTO t (a, b) CSV DATA ("%s")`, srv.URL)) + sqlDB.CheckQueryResults(t, `SELECT * FROM t`, [][]string{{"test string", "35", "42"}, {"another test string", "72", "42"}}) + }) + t.Run("with-import-table", func(t *testing.T) { + data = "35,string1,65\n72,string2,17" + sqlDB.Exec(t, fmt.Sprintf( + `IMPORT TABLE t (a INT, b STRING, c INT DEFAULT 33) + CSV DATA ("%s")`, + srv.URL, + )) + defer sqlDB.Exec(t, `DROP TABLE t`) + data = "11,string3\n29,string4" + sqlDB.Exec(t, fmt.Sprintf(`IMPORT INTO t (a, b) CSV DATA ("%s")`, srv.URL)) + sqlDB.CheckQueryResults(t, `SELECT * FROM t`, [][]string{ + {"35", "string1", "65"}, + {"72", "string2", "17"}, + {"11", "string3", "33"}, + {"29", "string4", "33"}}) + }) + t.Run("null-as-default", func(t *testing.T) { + data = "1\n2\n3" + sqlDB.Exec(t, fmt.Sprintf(`CREATE TABLE t (a INT DEFAULT NULL, b INT)`)) + defer sqlDB.Exec(t, `DROP TABLE t`) + sqlDB.Exec(t, fmt.Sprintf(`IMPORT INTO t (b) CSV DATA ("%s")`, srv.URL)) + sqlDB.CheckQueryResults(t, `SELECT * FROM t`, [][]string{{"NULL", "1"}, {"NULL", "2"}, {"NULL", "3"}}) + }) + t.Run("default-value-change", func(t *testing.T) { + data = "1\n2" + sqlDB.Exec(t, `CREATE TABLE t (a INT, b INT DEFAULT 7)`) + defer sqlDB.Exec(t, `DROP TABLE t`) + sqlDB.Exec(t, fmt.Sprintf(`IMPORT INTO t (a) CSV DATA ("%s")`, srv.URL)) + data = "3\n4" + sqlDB.Exec(t, `ALTER TABLE t ALTER COLUMN b SET DEFAULT 8`) + sqlDB.Exec(t, fmt.Sprintf(`IMPORT INTO t (a) CSV DATA ("%s")`, srv.URL)) + sqlDB.CheckQueryResults(t, `SELECT * FROM t`, [][]string{{"1", "7"}, {"2", "7"}, {"3", "8"}, {"4", "8"}}) + }) + t.Run("math-constant", func(t *testing.T) { + data = "35\n67" + sqlDB.Exec(t, `CREATE TABLE t (a INT, b FLOAT DEFAULT round(pi()))`) + defer sqlDB.Exec(t, `DROP TABLE t`) + sqlDB.Exec(t, fmt.Sprintf(`IMPORT INTO t (a) CSV DATA ("%s")`, srv.URL)) + sqlDB.CheckQueryResults(t, `SELECT * FROM t`, [][]string{ + {"35", "3"}, + {"67", "3"}}) + }) + t.Run("string-function", func(t *testing.T) { + data = "1\n2" + sqlDB.Exec(t, `CREATE TABLE t (a INT, b STRING DEFAULT repeat('dog', 2))`) + defer sqlDB.Exec(t, `DROP TABLE t`) + sqlDB.Exec(t, fmt.Sprintf(`IMPORT INTO t (a) CSV DATA ("%s")`, srv.URL)) + sqlDB.CheckQueryResults(t, `SELECT * FROM t`, [][]string{ + {"1", "dogdog"}, + {"2", "dogdog"}}) + }) + t.Run("arithmetic", func(t *testing.T) { + data = "35\n67" + sqlDB.Exec(t, `CREATE TABLE t (a INT, b INT DEFAULT 34 * 3)`) + defer sqlDB.Exec(t, `DROP TABLE t`) + sqlDB.Exec(t, fmt.Sprintf(`IMPORT INTO t (a) CSV DATA ("%s")`, srv.URL)) + sqlDB.CheckQueryResults(t, `SELECT * FROM t`, [][]string{ + {"35", "102"}, + {"67", "102"}}) + }) + t.Run("sequence-impure", func(t *testing.T) { + data = "1\n2" + sqlDB.Exec(t, `CREATE SEQUENCE testseq`) + defer sqlDB.Exec(t, `DROP TABLE t`) + sqlDB.Exec(t, `CREATE TABLE t(a INT, b INT DEFAULT nextval('testseq'))`) + sqlDB.ExpectErr(t, + fmt.Sprintf(`non-constant default expression .* for non-targeted column "b" is not supported by IMPORT INTO`), + fmt.Sprintf(`IMPORT INTO t (a) CSV DATA ("%s")`, srv.URL)) + }) + t.Run("now-impure", func(t *testing.T) { + data = "1\n2" + sqlDB.Exec(t, `CREATE TABLE t(a INT, b TIMESTAMP DEFAULT now())`) + defer sqlDB.Exec(t, `DROP TABLE t`) + sqlDB.ExpectErr(t, + fmt.Sprintf(`non-constant default expression .* for non-targeted column "b" is not supported by IMPORT INTO`), + fmt.Sprintf(`IMPORT INTO t (a) CSV DATA ("%s")`, srv.URL)) + }) + }) - sqlDB.ExpectErr( - t, fmt.Sprintf("pq: cannot IMPORT INTO a table with a DEFAULT expression for any of its columns"), - fmt.Sprintf(`IMPORT INTO t (a) CSV DATA (%s)`, testFiles.files[0]), + t.Run("import-not-targeted-not-null", func(t *testing.T) { + sqlDB.Exec(t, `CREATE TABLE t (a INT, b INT NOT NULL)`) + const data = "1\n2\n3" + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.Method == "GET" { + _, _ = w.Write([]byte(data)) + } + })) + defer srv.Close() + defer sqlDB.Exec(t, `DROP TABLE t`) + sqlDB.ExpectErr(t, `violated by column "b"`, + fmt.Sprintf(`IMPORT INTO t (a) CSV DATA ("%s")`, srv.URL), ) }) diff --git a/pkg/sql/row/row_converter.go b/pkg/sql/row/row_converter.go index 1fd139b6bb8e..cb2f54b455c2 100644 --- a/pkg/sql/row/row_converter.go +++ b/pkg/sql/row/row_converter.go @@ -267,10 +267,6 @@ func NewDatumRowConverter( var txCtx transform.ExprTransformContext semaCtx := tree.MakeSemaContext() - // We do not currently support DEFAULT expressions on target or non-target - // columns. All non-target columns must be nullable and will be set to NULL - // during import. We do however support DEFAULT on hidden columns (which is - // only the default _rowid one). This allows those expressions to run. cols, defaultExprs, err := sqlbase.ProcessDefaultColumns(ctx, targetColDescriptors, immutDesc, &txCtx, c.EvalCtx, &semaCtx) if err != nil { return nil, errors.Wrap(err, "process default columns") @@ -301,6 +297,11 @@ func NewDatumRowConverter( c.Datums = make([]tree.Datum, len(targetColDescriptors), len(cols)) // Check for a hidden column. This should be the unique_rowid PK if present. + // In addition, check for non-targeted columns with non-null DEFAULT expressions. + isTargetCol := func(col *sqlbase.ColumnDescriptor) bool { + _, ok := isTargetColID[col.ID] + return ok + } c.hidden = -1 for i := range cols { col := &cols[i] @@ -310,6 +311,23 @@ func NewDatumRowConverter( } c.hidden = i c.Datums = append(c.Datums, nil) + } else { + if !isTargetCol(col) && col.DefaultExpr != nil { + // Check if the default expression is a constant expression as we do not + // support non-constant default expressions for non-target columns in IMPORT INTO. + // + // TODO (anzoteh96): add support to non-constant default expressions. Perhaps + // we can start with those with Stable volatility, like now(). + if !tree.IsConst(evalCtx, defaultExprs[i]) { + return nil, errors.Newf( + "non-constant default expression %s for non-targeted column %q is not supported by IMPORT INTO", + defaultExprs[i].String(), + col.Name) + } + // Placeholder for columns with default values that will be evaluated when + // each import row is being created. + c.Datums = append(c.Datums, nil) + } } } if len(c.Datums) != len(cols) { @@ -364,6 +382,17 @@ func (c *DatumRowConverter) Row(ctx context.Context, sourceID int32, rowIndex in c.Datums[c.hidden] = tree.NewDInt(tree.DInt(avoidCollisionsWithSQLsIDs | rowID)) } + for i := range c.cols { + col := &c.cols[i] + if _, ok := c.IsTargetCol[i]; !ok && !col.Hidden && col.DefaultExpr != nil { + datum, err := c.defaultExprs[i].Eval(c.EvalCtx) + if err != nil { + return errors.Wrapf(err, "error evaluating default expression for IMPORT INTO") + } + c.Datums[i] = datum + } + } + // TODO(justin): we currently disallow computed columns in import statements. var computeExprs []tree.TypedExpr var computedCols []sqlbase.ColumnDescriptor