diff --git a/pkg/internal/sqlsmith/tlp.go b/pkg/internal/sqlsmith/tlp.go index 5f977268de60..ccba8f763ccf 100644 --- a/pkg/internal/sqlsmith/tlp.go +++ b/pkg/internal/sqlsmith/tlp.go @@ -41,17 +41,19 @@ func (s *Smither) GenerateTLP() (unpartitioned, partitioned string, args []inter s.disableImpureFns = originalDisableImpureFns }() - switch tlpType := s.rnd.Intn(4); tlpType { + switch tlpType := s.rnd.Intn(5); tlpType { case 0: - return s.generateWhereTLP() + partitioned, unpartitioned, args = s.generateWhereTLP() case 1: partitioned, unpartitioned = s.generateOuterJoinTLP() case 2: partitioned, unpartitioned = s.generateInnerJoinTLP() + case 3: + partitioned, unpartitioned = s.generateDistinctTLP() default: partitioned, unpartitioned = s.generateAggregationTLP() } - return partitioned, unpartitioned, nil + return partitioned, unpartitioned, args } // generateWhereTLP returns two SQL queries as strings that can be used by the @@ -377,3 +379,55 @@ func (s *Smither) generateAggregationTLP() (unpartitioned, partitioned string) { return unpartitioned, partitioned } + +// generateDistinctTLP returns two SQL queries as strings that can be used by the +// GenerateTLP function. These queries DISTINCT on random columns and make use +// of the WHERE clause to partition the original query into three. +// +// The first query returned is an unpartitioned query of the form: +// +// SELECT DISTINCT {cols...} FROM table +// +// The second query returned is a partitioned query of the form: +// +// SELECT DISTINCT {cols...} FROM table WHERE (p) UNION +// SELECT DISTINCT {cols...} FROM table WHERE NOT (p) UNION +// SELECT DISTINCT {cols...} FROM table WHERE (p) IS NULL +// +// If the resulting values of the two queries are not equal, there is a logical +// bug. +func (s *Smither) generateDistinctTLP() (unpartitioned, partitioned string) { + f := tree.NewFmtCtx(tree.FmtParsable) + + table, _, _, cols, ok := s.getSchemaTable() + if !ok { + panic(errors.AssertionFailedf("failed to find random table")) + } + table.Format(f) + tableName := f.CloseAndGetString() + // Take a random subset of the columns to distinct on. + s.rnd.Shuffle(len(cols), func(i, j int) { cols[i], cols[j] = cols[j], cols[i] }) + n := s.rnd.Intn(len(cols)) + if n == 0 { + n = 1 + } + colStrs := make([]string, n) + for i, ref := range cols[:n] { + colStrs[i] = tree.AsStringWithFlags(ref.typedExpr(), tree.FmtParsable) + } + distinctCols := strings.Join(colStrs, ",") + unpartitioned = fmt.Sprintf("SELECT DISTINCT %s FROM %s", distinctCols, tableName) + + pred := makeBoolExpr(s, cols) + pred.Format(f) + predicate := f.CloseAndGetString() + + part1 := fmt.Sprintf("SELECT DISTINCT %s FROM %s WHERE %s", distinctCols, tableName, predicate) + part2 := fmt.Sprintf("SELECT DISTINCT %s FROM %s WHERE NOT (%s)", distinctCols, tableName, predicate) + part3 := fmt.Sprintf("SELECT DISTINCT %s FROM %s WHERE (%s) IS NULL", distinctCols, tableName, predicate) + partitioned = fmt.Sprintf( + "(%s) UNION (%s) UNION (%s)", part1, part2, part3, + ) + + return unpartitioned, partitioned +}