Skip to content

Commit

Permalink
sqlsmith: implement DISTINCT TLP oracle
Browse files Browse the repository at this point in the history
The DISTINCT oracle for TLP generates unpartitioned queries of the
following form:

SELECT DISTINCT {cols...} FROM t

and partitioned queries of the following form:

SELECT DISTINCT {cols...} FROM t WHERE pred UNION ALL
SELECT DISTINCT {cols...} FROM t WHERE NOT (pred) UNION ALL
SELECT DISTINCT {cols...} FROM t WHERE (pred) IS NULL

Release note: None
  • Loading branch information
jordanlewis committed Feb 4, 2022
1 parent 43178de commit edef932
Showing 1 changed file with 57 additions and 3 deletions.
60 changes: 57 additions & 3 deletions pkg/internal/sqlsmith/tlp.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,17 +41,19 @@ func (s *Smither) GenerateTLP() (unpartitioned, partitioned string, args []inter
s.disableImpureFns = originalDisableImpureFns
}()

switch tlpType := s.rnd.Intn(4); tlpType {
switch tlpType := s.rnd.Intn(5); tlpType {
case 0:
return s.generateWhereTLP()
partitioned, unpartitioned, args = s.generateWhereTLP()
case 1:
partitioned, unpartitioned = s.generateOuterJoinTLP()
case 2:
partitioned, unpartitioned = s.generateInnerJoinTLP()
case 3:
partitioned, unpartitioned = s.generateDistinctTLP()
default:
partitioned, unpartitioned = s.generateAggregationTLP()
}
return partitioned, unpartitioned, nil
return partitioned, unpartitioned, args
}

// generateWhereTLP returns two SQL queries as strings that can be used by the
Expand Down Expand Up @@ -377,3 +379,55 @@ func (s *Smither) generateAggregationTLP() (unpartitioned, partitioned string) {

return unpartitioned, partitioned
}

// generateDistinctTLP returns two SQL queries as strings that can be used by the
// GenerateTLP function. These queries DISTINCT on random columns and make use
// of the WHERE clause to partition the original query into three.
//
// The first query returned is an unpartitioned query of the form:
//
// SELECT DISTINCT {cols...} FROM table
//
// The second query returned is a partitioned query of the form:
//
// SELECT DISTINCT {cols...} FROM table WHERE (p) UNION
// SELECT DISTINCT {cols...} FROM table WHERE NOT (p) UNION
// SELECT DISTINCT {cols...} FROM table WHERE (p) IS NULL
//
// If the resulting values of the two queries are not equal, there is a logical
// bug.
func (s *Smither) generateDistinctTLP() (unpartitioned, partitioned string) {
f := tree.NewFmtCtx(tree.FmtParsable)

table, _, _, cols, ok := s.getSchemaTable()
if !ok {
panic(errors.AssertionFailedf("failed to find random table"))
}
table.Format(f)
tableName := f.CloseAndGetString()
// Take a random subset of the columns to distinct on.
s.rnd.Shuffle(len(cols), func(i, j int) { cols[i], cols[j] = cols[j], cols[i] })
n := s.rnd.Intn(len(cols))
if n == 0 {
n = 1
}
colStrs := make([]string, n)
for i, ref := range cols[:n] {
colStrs[i] = tree.AsStringWithFlags(ref.typedExpr(), tree.FmtParsable)
}
distinctCols := strings.Join(colStrs, ",")
unpartitioned = fmt.Sprintf("SELECT DISTINCT %s FROM %s", distinctCols, tableName)

pred := makeBoolExpr(s, cols)
pred.Format(f)
predicate := f.CloseAndGetString()

part1 := fmt.Sprintf("SELECT DISTINCT %s FROM %s WHERE %s", distinctCols, tableName, predicate)
part2 := fmt.Sprintf("SELECT DISTINCT %s FROM %s WHERE NOT (%s)", distinctCols, tableName, predicate)
part3 := fmt.Sprintf("SELECT DISTINCT %s FROM %s WHERE (%s) IS NULL", distinctCols, tableName, predicate)
partitioned = fmt.Sprintf(
"(%s) UNION (%s) UNION (%s)", part1, part2, part3,
)

return unpartitioned, partitioned
}

0 comments on commit edef932

Please sign in to comment.