Merge cockroachdb#74299

74299: internal/sqlsmith: remove vectorized-specific things r=yuzefovich a=yuzefovich It is no longer useful to have some vectorized-specific things since the vectorized engine now supports everything (either natively or by wrapping a row-by-row processor). Release note: None Co-authored-by: Yahor Yuzefovich <[email protected]>
mgartner · Dec 28, 2021 · 01c0c73 · 01c0c73
2 parents b0a4292 + 49b9601
commit 01c0c73
Show file tree

Hide file tree

Showing 6 changed files with 11 additions and 157 deletions.
diff --git a/pkg/cmd/roachtest/tests/sqlsmith.go b/pkg/cmd/roachtest/tests/sqlsmith.go
@@ -305,12 +305,9 @@ INSERT INTO seed_mr_table DEFAULT VALUES;`, regionList[0]),
 			register(setup, setting)
 		}
 	}
-	setups["seed-vec"] = sqlsmith.Setups["seed-vec"]
 	setups["seed-multi-region"] = sqlsmith.Setups["seed-multi-region"]
 	settings["ddl-nodrop"] = sqlsmith.Settings["ddl-nodrop"]
-	settings["vec"] = sqlsmith.SettingVectorize
 	settings["multi-region"] = sqlsmith.Settings["multi-region"]
-	register("seed-vec", "vec")
 	register("tpcc", "ddl-nodrop")
 	register("seed-multi-region", "multi-region")
 }
diff --git a/pkg/internal/sqlsmith/relational.go b/pkg/internal/sqlsmith/relational.go
@@ -123,11 +123,6 @@ var (
 		{1, makeValuesTable},
 		{2, makeSelectTable},
 	}
-	vectorizableTableExprs = []tableExprWeight{
-		{20, makeEquiJoinExpr},
-		{20, makeMergeJoinExpr},
-		{20, makeSchemaTable},
-	}
 	allTableExprs = append(mutatingTableExprs, nonMutatingTableExprs...)
 
 	selectStmts = []selectStatementWeight{
@@ -530,7 +525,7 @@ func (s *Smither) makeSelectClause(
 
 	var fromRefs colRefs
 	// Sometimes generate a SELECT with no FROM clause.
-	requireFrom := s.vectorizable || s.d6() != 1
+	requireFrom := s.d6() != 1
 	for (requireFrom && len(clause.From.Tables) < 1) || s.canRecurse() {
 		var from tree.TableExpr
 		if len(withTables) == 0 || s.coin() {
@@ -570,10 +565,7 @@ func (s *Smither) makeSelectClause(
 		orderByRefs = fromRefs
 		selectListRefs = selectListRefs.extend(fromRefs...)
 
-		// TODO(mjibson): vec only supports GROUP BYs on fully-ordered
-		// columns, which we could support here. Also see #39240 which
-		// will support this more generally.
-		if !s.vectorizable && s.d6() <= 2 && s.canRecurse() {
+		if s.d6() <= 2 && s.canRecurse() {
 			// Enable GROUP BY. Choose some random subset of the
 			// fromRefs.
 			// TODO(mjibson): Refence handling and aggregation functions
@@ -619,17 +611,6 @@ func (s *Smither) makeSelectClause(
 	}
 	clause.Exprs = selectList
 
-	// TODO(mjibson): Vectorized only supports ordered distinct, and so
-	// this often produces queries that won't vec. However since it will
-	// also sometimes produce vec queries with the distinctChainOps node,
-	// we allow this here. Teach this how to correctly limit itself to
-	// distinct only on ordered columns.
-	if s.d100() == 1 {
-		clause.Distinct = true
-		// For SELECT DISTINCT, ORDER BY expressions must appear in select list.
-		orderByRefs = selectRefs
-	}
-
 	return clause, selectRefs, orderByRefs, true
 }
 

diff --git a/pkg/internal/sqlsmith/scalar.go b/pkg/internal/sqlsmith/scalar.go
@@ -135,9 +135,6 @@ func makeCaseExpr(s *Smither, typ *types.T, refs colRefs) (tree.TypedExpr, bool)
 }
 
 func makeCoalesceExpr(s *Smither, typ *types.T, refs colRefs) (tree.TypedExpr, bool) {
-	if s.vectorizable {
-		return nil, false
-	}
 	typ = s.pickAnyType(typ)
 	firstExpr := makeScalar(s, typ, refs)
 	secondExpr := makeScalar(s, typ, refs)
@@ -177,11 +174,7 @@ func makeConstExpr(s *Smither, typ *types.T, refs colRefs) tree.TypedExpr {
 func makeConstDatum(s *Smither, typ *types.T) tree.Datum {
 	var datum tree.Datum
 	s.lock.Lock()
-	nullChance := 6
-	if s.vectorizable {
-		nullChance = 0
-	}
-	datum = randgen.RandDatumWithNullChance(s.rnd, typ, nullChance)
+	datum = randgen.RandDatumWithNullChance(s.rnd, typ, 6)
 	if f := datum.ResolvedType().Family(); f != types.UnknownFamily && s.simpleDatums {
 		datum = randgen.RandDatumSimple(s.rnd, typ)
 	}
@@ -252,9 +245,6 @@ func makeAnd(s *Smither, typ *types.T, refs colRefs) (tree.TypedExpr, bool) {
 }
 
 func makeNot(s *Smither, typ *types.T, refs colRefs) (tree.TypedExpr, bool) {
-	if s.vectorizable {
-		return nil, false
-	}
 	switch typ.Family() {
 	case types.BoolFamily, types.AnyFamily:
 	default:
@@ -285,21 +275,11 @@ func makeCompareOp(s *Smither, typ *types.T, refs colRefs) (tree.TypedExpr, bool
 	if _, ok := tree.CmpOps[op].LookupImpl(typ, typ); !ok {
 		return nil, false
 	}
-	if s.vectorizable && (op == tree.IsDistinctFrom || op == tree.IsNotDistinctFrom) {
-		return nil, false
-	}
 	left := makeScalar(s, typ, refs)
 	right := makeScalar(s, typ, refs)
 	return typedParen(tree.NewTypedComparisonExpr(tree.MakeComparisonOperator(op), left, right), typ), true
 }
 
-var vecBinOps = map[tree.BinaryOperatorSymbol]bool{
-	tree.Plus:  true,
-	tree.Minus: true,
-	tree.Mult:  true,
-	tree.Div:   true,
-}
-
 func makeBinOp(s *Smither, typ *types.T, refs colRefs) (tree.TypedExpr, bool) {
 	typ = s.pickAnyType(typ)
 	ops := operators[typ.Oid()]
@@ -308,9 +288,6 @@ func makeBinOp(s *Smither, typ *types.T, refs colRefs) (tree.TypedExpr, bool) {
 	}
 	n := s.rnd.Intn(len(ops))
 	op := ops[n]
-	if s.vectorizable && !vecBinOps[op.Operator.Symbol] {
-		return nil, false
-	}
 	if s.postgres {
 		if ignorePostgresBinOps[binOpTriple{
 			op.LeftType.Family(),
@@ -382,9 +359,6 @@ var postgresBinOpTransformations = map[binOpTriple]binOpOperands{
 }
 
 func makeFunc(s *Smither, ctx Context, typ *types.T, refs colRefs) (tree.TypedExpr, bool) {
-	if s.vectorizable {
-		return nil, false
-	}
 	typ = s.pickAnyType(typ)
 
 	class := ctx.fnClass
@@ -580,9 +554,6 @@ func makeWindowFrame(s *Smither, refs colRefs, orderTypes []*types.T) *tree.Wind
 }
 
 func makeExists(s *Smither, typ *types.T, refs colRefs) (tree.TypedExpr, bool) {
-	if s.vectorizable {
-		return nil, false
-	}
 	switch typ.Family() {
 	case types.BoolFamily, types.AnyFamily:
 	default:
@@ -611,7 +582,7 @@ func makeIn(s *Smither, typ *types.T, refs colRefs) (tree.TypedExpr, bool) {
 
 	t := s.randScalarType()
 	var rhs tree.TypedExpr
-	if s.vectorizable || s.coin() {
+	if s.coin() {
 		rhs = makeTuple(s, t, refs)
 	} else {
 		selectStmt, _, ok := s.makeSelect([]*types.T{t}, refs)
@@ -646,14 +617,6 @@ func makeIn(s *Smither, typ *types.T, refs colRefs) (tree.TypedExpr, bool) {
 
 func makeStringComparison(s *Smither, typ *types.T, refs colRefs) (tree.TypedExpr, bool) {
 	stringComparison := s.randStringComparison()
-	if s.vectorizable {
-		// Vectorized supports only tree.Like and tree.NotLike.
-		if s.coin() {
-			stringComparison = tree.MakeComparisonOperator(tree.Like)
-		} else {
-			stringComparison = tree.MakeComparisonOperator(tree.NotLike)
-		}
-	}
 	switch typ.Family() {
 	case types.BoolFamily, types.AnyFamily:
 	default:
@@ -669,12 +632,12 @@ func makeStringComparison(s *Smither, typ *types.T, refs colRefs) (tree.TypedExp
 func makeTuple(s *Smither, typ *types.T, refs colRefs) *tree.Tuple {
 	n := s.rnd.Intn(5)
 	// Don't allow empty tuples in simple/postgres mode.
-	if n == 0 && (s.simpleDatums || s.vectorizable) {
+	if n == 0 && s.simpleDatums {
 		n++
 	}
 	exprs := make(tree.Exprs, n)
 	for i := range exprs {
-		if s.vectorizable || s.d9() == 1 {
+		if s.d9() == 1 {
 			exprs[i] = makeConstDatum(s, typ)
 		} else {
 			exprs[i] = makeScalar(s, typ, refs)
@@ -684,9 +647,6 @@ func makeTuple(s *Smither, typ *types.T, refs colRefs) *tree.Tuple {
 }
 
 func makeScalarSubquery(s *Smither, typ *types.T, refs colRefs) (tree.TypedExpr, bool) {
-	if s.vectorizable {
-		return nil, false
-	}
 	if s.disableLimits {
 		// This query must use a LIMIT, so bail if they are disabled.
 		return nil, false

diff --git a/pkg/internal/sqlsmith/setup.go b/pkg/internal/sqlsmith/setup.go
@@ -32,10 +32,7 @@ var Setups = map[string]Setup{
 	"empty": wrapCommonSetup(stringSetup("")),
 	// seed is a SQL statement that creates a table with most data types
 	// and some sample rows.
-	"seed": wrapCommonSetup(stringSetup(seedTable)),
-	// seed-vec is like seed except only types supported by vectorized
-	// execution are used.
-	"seed-vec":          wrapCommonSetup(stringSetup(vecSeedTable)),
+	"seed":              wrapCommonSetup(stringSetup(seedTable)),
 	"seed-multi-region": wrapCommonSetup(stringSetup(multiregionSeed)),
 	RandTableSetupName:  wrapCommonSetup(randTables),
 }
@@ -140,29 +137,6 @@ CREATE TABLE IF NOT EXISTS seed AS
 INSERT INTO seed DEFAULT VALUES;
 CREATE INDEX on seed (_int8, _float8, _date);
 CREATE INVERTED INDEX on seed (_jsonb);
-`
-
-	vecSeedTable = `
-CREATE TABLE IF NOT EXISTS seed_vec AS
-	SELECT
-		g::INT2 AS _int2,
-		g::INT4 AS _int4,
-		g::INT8 AS _int8,
-		g::FLOAT8 AS _float8,
-		'2001-01-01'::DATE + g AS _date,
-		'2001-01-01'::TIMESTAMP + g * '1 day'::INTERVAL AS _timestamp,
-		'2001-01-01'::TIMESTAMPTZ + g * '1 day'::INTERVAL AS _timestamptz,
-		g * '1 day'::INTERVAL AS _interval,
-		g % 2 = 1 AS _bool,
-		g::DECIMAL AS _decimal,
-		g::STRING AS _string,
-		g::STRING::BYTES AS _bytes,
-		substring('00000000-0000-0000-0000-' || g::STRING || '00000000000', 1, 36)::UUID AS _uuid
-	FROM
-		generate_series(1, 5) AS g;
-
-INSERT INTO seed_vec DEFAULT VALUES;
-CREATE INDEX on seed_vec (_int8, _float8, _date);
 `
 
 	multiregionSeed = `
@@ -219,12 +193,6 @@ var Settings = map[string]SettingFunc{
 	"multi-region":      randSetting(Parallel, MultiRegionDDLs()),
 }
 
-// SettingVectorize is the setting for vectorizable. It is not included in
-// Settings because it has type restrictions during CREATE TABLE, but Settings
-// is designed to be used with anything in Setups, which may violate that
-// restriction.
-var SettingVectorize = staticSetting(Parallel, Vectorizable())
-
 var settingNames = func() []string {
 	var ret []string
 	for k := range Settings {
@@ -276,8 +244,4 @@ var randOptions = []SmitherOption{
 	DisableWith(),
 	PostgresMode(),
 	SimpleDatums(),
-
-	// Vectorizable() is not included here because it assumes certain
-	// types don't exist in table schemas. Since we don't yet have a way to
-	// verify that assumption, don't enable this.
 }
diff --git a/pkg/internal/sqlsmith/sqlsmith.go b/pkg/internal/sqlsmith/sqlsmith.go
@@ -84,7 +84,6 @@ type Smither struct {
 	disableWindowFuncs bool
 	simpleDatums       bool
 	avoidConsts        bool
-	vectorizable       bool
 	outputSort         bool
 	postgres           bool
 	ignoreFNs          []*regexp.Regexp
@@ -334,23 +333,6 @@ var DisableWindowFuncs = simpleOption("disable window funcs", func(s *Smither) {
 	s.disableWindowFuncs = true
 })
 
-// Vectorizable causes the Smither to limit query generation to queries
-// supported by vectorized execution.
-var Vectorizable = multiOption(
-	"Vectorizable",
-	DisableMutations(),
-	DisableWith(),
-	DisableWindowFuncs(),
-	AvoidConsts(),
-	// This must be last so it can make the final changes to table
-	// exprs and statements.
-	simpleOption("vectorizable", func(s *Smither) {
-		s.vectorizable = true
-		s.stmtWeights = nonMutatingStatements
-		s.tableExprWeights = vectorizableTableExprs
-	})(),
-)
-
 // OutputSort adds a top-level ORDER BY on all columns.
 var OutputSort = simpleOption("output sort", func(s *Smither) {
 	s.outputSort = true

diff --git a/pkg/internal/sqlsmith/sqlsmith_test.go b/pkg/internal/sqlsmith/sqlsmith_test.go
@@ -14,7 +14,6 @@ import (
 	"context"
 	"flag"
 	"fmt"
-	"strings"
 	"testing"
 
 	"github.com/cockroachdb/cockroach/pkg/base"
@@ -27,11 +26,10 @@ import (
 )
 
 var (
-	flagExec     = flag.Bool("ex", false, "execute (instead of just parse) generated statements")
-	flagNum      = flag.Int("num", 100, "number of statements to generate")
-	flagSetup    = flag.String("setup", "", "setup for TestGenerateParse, empty for random")
-	flagSetting  = flag.String("setting", "", "setting for TestGenerateParse, empty for random")
-	flagCheckVec = flag.Bool("check-vec", false, "fail if a generated statement cannot be vectorized")
+	flagExec    = flag.Bool("ex", false, "execute (instead of just parse) generated statements")
+	flagNum     = flag.Int("num", 100, "number of statements to generate")
+	flagSetup   = flag.String("setup", "", "setup for TestGenerateParse, empty for random")
+	flagSetting = flag.String("setting", "", "setting for TestGenerateParse, empty for random")
 )
 
 // TestSetups verifies that all setups generate executable SQL.
@@ -191,34 +189,6 @@ func TestGenerateParse(t *testing.T) {
 		}
 		stmt = prettyCfg.Pretty(parsed.AST)
 		fmt.Print("STMT: ", i, "\n", stmt, ";\n\n")
-		if *flagCheckVec {
-			if _, err := sqlDB.Exec(fmt.Sprintf("EXPLAIN (vec) %s", stmt)); err != nil {
-				es := err.Error()
-				ok := false
-				// It is hard to make queries that can always
-				// be vectorized. Hard code a list of error
-				// messages we are ok with.
-				for _, s := range []string{
-					// If the optimizer removes stuff due
-					// to something like a `WHERE false`,
-					// vec will fail with an error message
-					// like this. This is hard to fix
-					// because things like `WHERE true AND
-					// false` similarly remove rows but are
-					// harder to detect.
-					"num_rows:0",
-					"unsorted distinct",
-				} {
-					if strings.Contains(es, s) {
-						ok = true
-						break
-					}
-				}
-				if !ok {
-					t.Fatal(err)
-				}
-			}
-		}
 		if *flagExec {
 			db.Exec(t, `SET statement_timeout = '9s'`)
 			if _, err := sqlDB.Exec(stmt); err != nil {