Skip to content

Commit

Permalink
Merge pull request cockroachdb#24451 from andy-kimball/weakkey
Browse files Browse the repository at this point in the history
opt: Add weak keys and rule to eliminate DISTINCT
  • Loading branch information
andy-kimball authored Apr 6, 2018
2 parents 3043766 + e16ab2a commit cc882bf
Show file tree
Hide file tree
Showing 39 changed files with 1,530 additions and 489 deletions.
30 changes: 10 additions & 20 deletions pkg/sql/opt/exec/execbuilder/testdata/aggregate
Original file line number Diff line number Diff line change
Expand Up @@ -1256,17 +1256,11 @@ INSERT INTO ab VALUES
exec-explain
SELECT 1 FROM kv GROUP BY kv.*;
----
render 0 render · · (column5) ·
│ 0 · render 0 1 · ·
└── group 1 group · · (k, v, w, s) ·
│ 1 · aggregate 0 k · ·
│ 1 · aggregate 1 v · ·
│ 1 · aggregate 2 w · ·
│ 1 · aggregate 3 s · ·
│ 1 · group by @1-@4 · ·
└── scan 2 scan · · (k, v, w, s) ·
· 2 · table kv@primary · ·
· 2 · spans ALL · ·
render 0 render · · (column5) ·
│ 0 · render 0 1 · ·
└── scan 1 scan · · () ·
· 1 · table kv@primary · ·
· 1 · spans ALL · ·

exec
SELECT 1 FROM kv GROUP BY kv.*;
Expand Down Expand Up @@ -1456,15 +1450,11 @@ column3:tuple{int, int}
exec-explain
SELECT (b, a) FROM ab GROUP BY (b, a)
----
render 0 render · · (column3) ·
│ 0 · render 0 (b, a) · ·
└── group 1 group · · (a, b) ·
│ 1 · aggregate 0 a · ·
│ 1 · aggregate 1 b · ·
│ 1 · group by @1-@2 · ·
└── scan 2 scan · · (a, b) ·
· 2 · table ab@primary · ·
· 2 · spans ALL · ·
render 0 render · · (column3) ·
│ 0 · render 0 (b, a) · ·
└── scan 1 scan · · (a, b) ·
· 1 · table ab@primary · ·
· 1 · spans ALL · ·

exec rowsort
SELECT MIN(y), (b, a) FROM ab, xy GROUP BY (x, (a, b))
Expand Down
6 changes: 4 additions & 2 deletions pkg/sql/opt/exec/execbuilder/testdata/scan
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ SELECT * FROM t.a
scan a
├── columns: x:1(int!null) y:2(float) s:3(string)
├── stats: [rows=1000]
└── cost: 1000.00
├── cost: 1000.00
└── keys: (1)

exec-explain
SELECT * FROM t.a
Expand All @@ -37,7 +38,8 @@ SELECT s, x FROM t.a
scan a
├── columns: s:3(string) x:1(int!null)
├── stats: [rows=1000]
└── cost: 1000.00
├── cost: 1000.00
└── keys: (1)

exec-explain
SELECT s, x FROM t.a
Expand Down
26 changes: 25 additions & 1 deletion pkg/sql/opt/memo/expr_view.go
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,9 @@ const (
// ExprFmtHideConstraints does not show inferred constraints in the output.
ExprFmtHideConstraints

// ExprFmtHideKeys does not show keys in the output.
ExprFmtHideKeys

// ExprFmtHideAll shows only the most basic properties of the expression.
ExprFmtHideAll ExprFmtFlags = (1 << iota) - 1
)
Expand Down Expand Up @@ -256,7 +259,6 @@ func (ev ExprView) formatRelational(tp treeprinter.Node, flags ExprFmtFlags) {
logProps := ev.Logical()

tp = tp.Child(buf.String())
buf.Reset()

// If a particular column presentation is required of the expression, then
// print columns using that information.
Expand Down Expand Up @@ -320,6 +322,11 @@ func (ev ExprView) formatRelational(tp treeprinter.Node, flags ExprFmtFlags) {
tp.Childf("cost: %.2f", ev.lookupBestExpr().cost)
}

// Format weak keys.
if !flags.HasFlags(ExprFmtHideKeys) {
ev.formatWeakKeys(tp)
}

if physProps.Ordering.Defined() {
tp.Childf("ordering: %s", physProps.Ordering.String())
}
Expand Down Expand Up @@ -416,6 +423,23 @@ func (ev ExprView) formatPresentation(tp treeprinter.Node, presentation Presenta
tp.Child(buf.String())
}

func (ev ExprView) formatWeakKeys(tp treeprinter.Node) {
var buf bytes.Buffer
rel := ev.Logical().Relational
for i, key := range rel.WeakKeys {
if i != 0 {
buf.WriteRune(' ')
}
if !key.SubsetOf(rel.NotNullCols) {
buf.WriteString("weak")
}
buf.WriteString(key.String())
}
if buf.Len() != 0 {
tp.Childf("keys: %s", buf.String())
}
}

// MatchesTupleOfConstants returns true if the expression is a TupleOp with
// ConstValue children.
func MatchesTupleOfConstants(ev ExprView) bool {
Expand Down
22 changes: 22 additions & 0 deletions pkg/sql/opt/memo/logical_props.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,28 @@ type RelationalProps struct {
// derived from filters that are NULL-intolerant.
NotNullCols opt.ColSet

// WeakKeys are the column sets which form weak keys and are subsets of the
// expression's output columns. A weak key set cannot contain any other weak
// key set (it would be redundant).
//
// A column set is a key if no two rows are equal after projection onto that
// set. This definition treats NULL as if were equal to NULL, so two rows
// having duplicate NULL values would *not* qualify as key rows. Therefore,
// in the usual case, the key columns are also not nullable. The simplest
// example of a key is the primary key for a table (recall that all of the
// columns of the primary key are defined to be NOT NULL).
//
// A weak key is similar to a key, with the difference that NULL values are
// treated as *not equal* to other NULL values. Therefore, two rows having
// duplicate NULL values could still qualify as weak key rows. A UNIQUE index
// on a table is a weak key and possibly a key if all of the columns are NOT
// NULL. A weak key is a key if "(WeakKeys[i] & NotNullCols) == WeakKeys[i]".
//
// An empty key is valid (an empty key implies there is at most one row). Note
// that an empty key is always the only key in the set, since it's a subset of
// every other key (i.e. every other key would be redundant).
WeakKeys opt.WeakKeys

// OuterCols is the set of columns that are referenced by variables within
// this relational sub-expression, but are not bound within the scope of
// the expression. For example:
Expand Down
71 changes: 61 additions & 10 deletions pkg/sql/opt/memo/logical_props_factory.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,10 @@ func (f logicalPropsFactory) constructScanProps(ev ExprView) LogicalProps {
}
}

// Initialize weak keys from the table schema.
props.Relational.WeakKeys = md.TableWeakKeys(def.Table)
filterWeakKeys(props.Relational)

// TODO: Need actual number of rows.
if def.Constraint != nil {
props.Relational.Stats.RowCount = 100
Expand All @@ -121,11 +125,8 @@ func (f logicalPropsFactory) constructSelectProps(ev ExprView) LogicalProps {

inputProps := ev.lookupChildGroup(0).logical.Relational

// Inherit output columns from input.
props.Relational.OutputCols = inputProps.OutputCols

// Inherit not null columns from input.
props.Relational.NotNullCols = inputProps.NotNullCols
// Inherit input properties as starting point.
*props.Relational = *inputProps

// TODO: Need better estimate based on actual filter conditions.
props.Relational.Stats.RowCount = inputProps.Stats.RowCount / 10
Expand All @@ -141,10 +142,18 @@ func (f logicalPropsFactory) constructProjectProps(ev ExprView) LogicalProps {
// Use output columns from projection list.
props.Relational.OutputCols = opt.ColListToSet(ev.Child(1).Private().(opt.ColList))

// Inherit not null columns from input.
// Inherit not null columns from input, but only use those that are also
// output columns.
props.Relational.NotNullCols = inputProps.NotNullCols
filterNullCols(props.Relational)

// Inherit outer columns from input.
props.Relational.OuterCols = inputProps.OuterCols

// Inherit weak keys that are composed entirely of output columns.
props.Relational.WeakKeys = inputProps.WeakKeys
filterWeakKeys(props.Relational)

props.Relational.Stats.RowCount = inputProps.Stats.RowCount

return props
Expand Down Expand Up @@ -185,6 +194,9 @@ func (f logicalPropsFactory) constructJoinProps(ev ExprView) LogicalProps {
props.Relational.NotNullCols.UnionWith(leftProps.NotNullCols)
}

// TODO(andyk): Need to derive weak keys for joins, for example when weak
// keys on both sides are equivalent cols.

// TODO: Need better estimate based on actual on conditions.
props.Relational.Stats.RowCount = leftProps.Stats.RowCount * rightProps.Stats.RowCount
if ev.Child(2).Operator() != opt.TrueOp {
Expand All @@ -210,10 +222,25 @@ func (f logicalPropsFactory) constructGroupByProps(ev ExprView) LogicalProps {
props.Relational.NotNullCols = inputProps.NotNullCols.Copy()
props.Relational.NotNullCols.IntersectionWith(groupingColSet)

// Scalar group by has no grouping columns and always a single row.
if groupingColSet.Empty() {
// Scalar group by.
// Any combination of columns is a weak key when there is one row.
props.Relational.WeakKeys = opt.WeakKeys{groupingColSet}
props.Relational.Stats.RowCount = 1
} else {
// The grouping columns always form a key because the GroupBy operation
// eliminates all duplicates. The result WeakKeys property either contains
// only the grouping column set, or else it contains one or more weak keys
// that are strict subsets of the grouping column set. This is because
// the grouping column set contains every output column (except aggregate
// columns, which aren't relevant since they're newly synthesized).
if inputProps.WeakKeys.ContainsSubsetOf(groupingColSet) {
props.Relational.WeakKeys = inputProps.WeakKeys
filterWeakKeys(props.Relational)
} else {
props.Relational.WeakKeys = opt.WeakKeys{groupingColSet}
}

// TODO: Need better estimate.
props.Relational.Stats.RowCount = inputProps.Stats.RowCount / 10
}
Expand Down Expand Up @@ -324,11 +351,35 @@ func (f logicalPropsFactory) constructScalarProps(ev ExprView) LogicalProps {
return props
}

// filterNullCols will ensure that the set of null columns is a subset of the
// output columns. It respects immutability by making a copy of the null
// columns if they need to be updated.
// filterNullCols ensures that the set of null columns is a subset of the output
// columns. It respects immutability by making a copy of the null columns if
// they need to be updated.
func filterNullCols(props *RelationalProps) {
if !props.NotNullCols.SubsetOf(props.OutputCols) {
props.NotNullCols = props.NotNullCols.Intersection(props.OutputCols)
}
}

// filterWeakKeys ensures that each weak key is a subset of the output columns.
// It respects immutability by making a copy of the weak keys if they need to be
// updated.
func filterWeakKeys(props *RelationalProps) {
var filtered opt.WeakKeys
for i, weakKey := range props.WeakKeys {
// Discard weak keys that have columns that are not part of the output
// column set.
if !weakKey.SubsetOf(props.OutputCols) {
if filtered == nil {
filtered = make(opt.WeakKeys, i, len(props.WeakKeys)-1)
copy(filtered, props.WeakKeys[:i])
}
} else {
if filtered != nil {
filtered = append(filtered, weakKey)
}
}
}
if filtered != nil {
props.WeakKeys = filtered
}
}
31 changes: 11 additions & 20 deletions pkg/sql/opt/memo/logical_props_factory_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/sql/opt/norm"
"github.com/cockroachdb/cockroach/pkg/sql/opt/testutils"
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
"github.com/cockroachdb/cockroach/pkg/util/treeprinter"
)

func TestLogicalPropsFactory(t *testing.T) {
Expand Down Expand Up @@ -61,14 +60,14 @@ func TestLogicalJoinProps(t *testing.T) {
testLogicalProps(t, f.Metadata(), ev, expected)
}

joinFunc(opt.InnerJoinApplyOp, "a.x:1(int!null) a.y:2(int) b.x:3(int!null) b.z:4(int!null)\n")
joinFunc(opt.LeftJoinApplyOp, "a.x:1(int!null) a.y:2(int) b.x:3(int) b.z:4(int)\n")
joinFunc(opt.RightJoinApplyOp, "a.x:1(int) a.y:2(int) b.x:3(int!null) b.z:4(int!null)\n")
joinFunc(opt.FullJoinApplyOp, "a.x:1(int) a.y:2(int) b.x:3(int) b.z:4(int)\n")
joinFunc(opt.SemiJoinOp, "a.x:1(int!null) a.y:2(int)\n")
joinFunc(opt.SemiJoinApplyOp, "a.x:1(int!null) a.y:2(int)\n")
joinFunc(opt.AntiJoinOp, "a.x:1(int!null) a.y:2(int)\n")
joinFunc(opt.AntiJoinApplyOp, "a.x:1(int!null) a.y:2(int)\n")
joinFunc(opt.InnerJoinApplyOp, "a.x:1(int!null) a.y:2(int) b.x:3(int!null) b.z:4(int!null)")
joinFunc(opt.LeftJoinApplyOp, "a.x:1(int!null) a.y:2(int) b.x:3(int) b.z:4(int)")
joinFunc(opt.RightJoinApplyOp, "a.x:1(int) a.y:2(int) b.x:3(int!null) b.z:4(int!null)")
joinFunc(opt.FullJoinApplyOp, "a.x:1(int) a.y:2(int) b.x:3(int) b.z:4(int)")
joinFunc(opt.SemiJoinOp, "a.x:1(int!null) a.y:2(int)")
joinFunc(opt.SemiJoinApplyOp, "a.x:1(int!null) a.y:2(int)")
joinFunc(opt.AntiJoinOp, "a.x:1(int!null) a.y:2(int)")
joinFunc(opt.AntiJoinApplyOp, "a.x:1(int!null) a.y:2(int)")
}

func constructScanOpDef(md *opt.Metadata, tabID opt.TableID) *memo.ScanOpDef {
Expand All @@ -81,18 +80,10 @@ func constructScanOpDef(md *opt.Metadata, tabID opt.TableID) *memo.ScanOpDef {

func testLogicalProps(t *testing.T, md *opt.Metadata, ev memo.ExprView, expected string) {
t.Helper()
actual := ev.String()

logical := ev.Logical()
if logical.Relational == nil {
panic("only relational properties are supported")
}

tp := treeprinter.New()
logical.FormatColSet(tp, md, "", logical.Relational.OutputCols)
actual := strings.Trim(tp.String(), " ")

if actual != expected {
t.Fatalf("\nexpected: %s\nactual : %s", expected, actual)
if !strings.Contains(actual, expected) {
t.Fatalf("\nexpected to contain: %s\nactual:\n%s", expected, actual)
}
}

Expand Down
8 changes: 6 additions & 2 deletions pkg/sql/opt/memo/testdata/logprops/constraints
Original file line number Diff line number Diff line change
Expand Up @@ -213,9 +213,11 @@ SELECT * FROM kuv WHERE u > 1::INT
select
├── columns: k:1(int!null) u:2(float) v:3(string)
├── stats: [rows=100]
├── keys: (1)
├── scan kuv
│ ├── columns: kuv.k:1(int!null) kuv.u:2(float) kuv.v:3(string)
│ └── stats: [rows=1000]
│ ├── stats: [rows=1000]
│ └── keys: (1)
└── filters [type=bool, outer=(2)]
└── gt [type=bool, outer=(2)]
├── variable: kuv.u [type=float, outer=(2)]
Expand All @@ -227,9 +229,11 @@ SELECT * FROM kuv WHERE v <= 'foo' AND v >= 'bar'
select
├── columns: k:1(int!null) u:2(float) v:3(string)
├── stats: [rows=100]
├── keys: (1)
├── scan kuv
│ ├── columns: kuv.k:1(int!null) kuv.u:2(float) kuv.v:3(string)
│ └── stats: [rows=1000]
│ ├── stats: [rows=1000]
│ └── keys: (1)
└── filters [type=bool, outer=(3), constraints=(/3: [/'bar' - /'foo']; tight)]
├── le [type=bool, outer=(3), constraints=(/3: (/NULL - /'foo']; tight)]
│ ├── variable: kuv.v [type=string, outer=(3)]
Expand Down
Loading

0 comments on commit cc882bf

Please sign in to comment.