Skip to content

Commit

Permalink
Merge #40248 #40431
Browse files Browse the repository at this point in the history
40248: opt: calculate number of rows processed when costing joins r=rytaft a=rytaft

This PR updates the costing of joins to take into account the number of
rows processed by the operator. This number may be larger than the
number of output rows if an additional filter is applied as part of the
ON condition that is not used to determine equality
columns for the join.

For example, consider the query
  `SELECT * FROM abc JOIN def ON a = e AND b = 3;`

Assuming there is no index on b, if a lookup join is used to execute this
query, the number of rows processed is actually the same as the query
  `SELECT * FROM abc JOIN def ON a = e;`

The difference is that the filter b=3 must also be applied to every row in
the first query. The coster now takes this into account when determining
the cost of joins.

Fixes #34810

Release note: None

40431: workload: fix partition commands in tpcc import r=solongordon a=solongordon

The commands for partitioning indexes in the TPCC import were erroring
out due to a syntax change introduced in #39332. I updated them to use
`ALTER PARTITION ... OF INDEX` rather than `ALTER PARTITION ... OF
TABLE`.

Fixes #39005
Fixes #40360
Fixes #40416

Release note: None

Co-authored-by: Rebecca Taft <[email protected]>
Co-authored-by: Solon Gordon <[email protected]>
  • Loading branch information
3 people committed Sep 3, 2019
3 parents 4b202cb + 3d565a2 + f56a83d commit 48ed5f0
Show file tree
Hide file tree
Showing 27 changed files with 1,909 additions and 1,482 deletions.
68 changes: 33 additions & 35 deletions pkg/sql/opt/exec/execbuilder/testdata/lookup_join
Original file line number Diff line number Diff line change
Expand Up @@ -217,25 +217,25 @@ ALTER TABLE authors INJECT STATISTICS '[
query TTTTT colnames
EXPLAIN (VERBOSE) SELECT DISTINCT authors.name FROM books AS b1, books2 AS b2, authors WHERE b1.title = b2.title AND authors.book = b1.title AND b1.shelf <> b2.shelf
----
tree field description columns ordering
distinct · · (name) weak-key(name)
distinct on name · ·
└── render · · (name) ·
render 0 name · ·
└── hash-join · · (title, shelf, title, shelf, name, book) ·
type inner · ·
equality (title) = (book) · ·
├── lookup-join · · (title, shelf, title, shelf) ·
table books2@primary · ·
│ │ type inner · ·
equality (title) = (title) · ·
pred @2 != @4 · ·
── scan · · (title, shelf) ·
table books@primary · ·
spans ALL · ·
└── scan · · (name, book) ·
· table authors@primary · ·
· spans ALL · ·
tree field description columns ordering
distinct · · (name) weak-key(name)
│ distinct on name · ·
└── render · · (name) ·
│ render 0 name · ·
└── lookup-join · · (name, book, title, shelf, title, shelf) ·
table books2@primary · ·
type inner · ·
equality (title) = (title) · ·
pred @4 != @6 · ·
└── hash-join · · (name, book, title, shelf) ·
type inner · ·
equality (book) = (title) · ·
── scan · · (name, book) ·
table authors@primary · ·
spans ALL · ·
└── scan · · (title, shelf) ·
· table books@primary · ·
· spans ALL · ·

# Verify data placement.
query TTTI colnames
Expand All @@ -244,12 +244,10 @@ SELECT start_key, end_key, replicas, lease_holder from [SHOW RANGES FROM TABLE b
start_key end_key replicas lease_holder
NULL NULL {5} 5

# TODO(radu): this doesn't seem to be a lookup join, but it should be.

query T
SELECT url FROM [EXPLAIN (DISTSQL) SELECT DISTINCT authors.name FROM books AS b1, books2 AS b2, authors WHERE b1.title = b2.title AND authors.book = b1.title AND b1.shelf <> b2.shelf]
----
https://cockroachdb.github.io/distsqlplan/decode.html#eJyck89q3DAQh-99CndOCSjYku0cDAEdemhK2ZS0t7IHxZruqnE0RpKhJey7F9mlWZu10s1Rf76Zb362nsGSxo16Qg_Nd-DAoIYtg95Ri96Ti9vTpVv9C5qCgbH9EOL2lkFLDqF5hmBCh9DAhq6oz2tgoDEo043XDgxoCC-QD2qH0Fwf2FFhni78TT10eI9Ko8uLWXnonXlS7rd8IHr0wOBrr6xvsitgcDeEJpOcyRLWLPg5Fp_I2L8SPCEhgMFnosehz36SsRnZaBF9NpkU2fubTNbHdoLJksl61VG8NSlxWlINYR-_7OmsxKpHuerx0n6w5DQ61Mt_4PUrJ4b5qPw-ho4uL-ezdPgjXEh-eePMbh8upLj8N8N6kNU5QX4wPhjbhryad5Z8tX49q__KW7lH35P1-F-PpYjpoN7hlLanwbX4xVE7tpmWdyM3bmj0YTq9nha3djqKgscwT8IiDYskXM1gvoTLM2CxhKskXKe16yRcLODt4d2fAAAA__8CwK1z
https://cockroachdb.github.io/distsqlplan/decode.html#eJyck89q3DAQh-99CnVOCSjsyn9SEAR06KEpZbekvZU9KNZ0V42jMZIMLWHfvcgubGxipZujpPlG3_xsPYEjgxv9iAHkDxDAoYYdh85TgyGQT9tj0a35DXLNwbquj2l7x6EhjyCfINrYIkjY0BV1qxo4GIzatkPZkQP18QSFqPcI8vrInzUW-cbf9X2Ld6gN-tV60h46bx-1_6N0Hw_Jl8O3Trsg2RVw2PZRMiW4KmDJQ7zVQ7zscU_0sGRRLloUixany3tH3qBHM8_39ZIXRvmkw-EzWYd-VUwnafFnvFDF5Y23-0O8UOJykiRXJVfV4iTlOXkmgX9xlpk4k-AXooe-Y7_IOkZOMpWA7Yapir2_YerDyXFRrTpH7aMN0bomrqqpWKZ_Pen_ylu5w9CRC_hfj2WdviCaPY5_RKDeN_jVUzNcMy63AzdsGAxxPL0eF7duPEqCz2GRhcsJLOZwcQZczOEyC1d57SoL13m4zsLrGbw7vvsbAAD__5P3rXQ=

query TTTTT colnames
EXPLAIN (VERBOSE) SELECT a.name FROM authors AS a JOIN books2 AS b2 ON a.book = b2.title ORDER BY a.name
Expand Down Expand Up @@ -675,18 +673,18 @@ render · · (a, b, c) ·
query TTTTT
EXPLAIN (VERBOSE) SELECT * from abc WHERE EXISTS (SELECT * FROM def WHERE a=f AND c=e)
----
render · · (a, b, c) ·
│ render 0 a · ·
│ render 1 b · ·
│ render 2 c · ·
└── lookup-join · · (a, b, c, e, f) ·
│ table def@primary · ·
│ type inner · ·
│ equality (a, c) = (f, e) · ·
│ equality cols are key · · ·
└── scan · · (a, b, c) ·
· table abc@primary · ·
· spans ALL · ·
render · · (a, b, c) ·
│ render 0 a · ·
│ render 1 b · ·
│ render 2 c · ·
└── lookup-join · · (a, b, c) ·
│ table def@primary · ·
│ type semi · ·
│ equality (a, c) = (f, e) · ·
│ equality cols are key · · ·
└── scan · · (a, b, c) ·
· table abc@primary · ·
· spans ALL · ·

query TTTTT
EXPLAIN (VERBOSE) SELECT * from abc WHERE NOT EXISTS (SELECT * FROM def WHERE a=f AND c=e)
Expand Down Expand Up @@ -741,7 +739,7 @@ SELECT url FROM [ EXPLAIN (DISTSQL)
SELECT a,b from small WHERE EXISTS (SELECT a FROM data WHERE small.a=data.a) ORDER BY a
]
----
https://cockroachdb.github.io/distsqlplan/decode.html#eJzMlEFr2zAUx-_7FOJdJ-NIdtLUp1w6SOmS0eY2ctCiR-fN0TOSDCsl3304HqtdWlngQ3q0lD__X35PvGcwpHGjjuig-A4COEjgkAGHHDjMYc-htnRA58i2P-kCa_0HihmH0tSNb4_3HA5kEYpn8KWvEArYqR8V3qPSaNMZcNDoVVmda2pbHpV9Wrmjqirg8FAr4wqWpIIpo5lg5H-iBQ7bxhdsJfhKwv7EgRr_0ue8ekQoxInHM91Saf4hzYdIu6caC3Z382XHHm6-rtntdr0B_p9UK6-Awx3R76Zmv6g0jEyLFgcp34V8YSOr0aIeYq3EZ9if3vgnG0qoTsVQ7Hv12aBexM9NRM0tFUkqp05uhKo3ucXlJifj1ck4dTJJs6nqRqh66q4upy6LV5fFqcuSNJ-qboSqp255OXV5vLo8Tl2epPOp6kaoeuquP8aqfQPyHl1NxmHUFp21exj1I3Z721FjD_jN0uFc031uz7nzgUbnu1vRfaxNd9UC9sMiGJaDsHgdluHmkeosmM7D4XwK9zwYXoSbF1Oar4LhZbh5OaX5Ojyr2cgzCT-y193706e_AQAA__8Shyy4
https://cockroachdb.github.io/distsqlplan/decode.html#eJzEll9v2jAUxd_3KdB92lSjxE7Cn0iTeNhLJ62dur1NPLjEg0g0RraRVlV89wkyKSKBe-0F6Fsp-XGPfc65yhtUulAP8kVZyH8BBwYCGCTAIAUGGcwZbIxeKGu12T9SA_fFH8hjBmW12br9v-cMFtooyN_AlW6tIIef8nmtnpQslIliYFAoJ8v1YczGlC_SvM7si1yvgcGPjaxsPhhGfCCrYsAH2q2UAQaPW5cPZpzNBMx3DPTW_ZvXjHl-HaykXR0PmHGY7-YMrJNLBTnfsf_TnZ3WXUgnu7JFLds2uluiG0EiRNCX0rqyWriIx51TMng0hTKqQMclZ8c1U3T9O-0Jd_VFnteUBWkK8y3tp5s1T22rU8-dPNo3ZZbqqy4rZSI-Ov7dtfrtPs743afPplyu6j-xmDZHyS5uwYMe6k0keOtEp8ePjsZz_wZwr-ZGfBiJW3Q3QPnIp7uIcM_yEoqaovCLlJe_QwnG1ymB8PdS-KVQDKPkFikMUD72SuF54Z4pJBQ1KRQXSaF4hxROrpPCxN_LxC-FyTBKb5HCAOUTrxSeF-6ZQkJRk8LkIilM3iGF0-ukMPX3MvVLYTqMslukMED51CuFe-HBySNUNMlLL5K89PbJE_H1X0VPSHhSdqMrq7zeMuP9IVSxVPW9WL01C_Xd6MVhTP3x8cAdXmQKZV39bVp_uK_qr_YC_eFpH5gnvehxH1pwnOZtOj6ij-C4DQsUJk6dBLglwuBpH7jlVig97kO33OrQKXrhGX7hGW414fWoT7lwmCgXDlPlImiiXDhNlWuM3vgEv_BJn3JN-wSF91qkBE2YTdCU2xRO2E3g5DLFtyknKsZ77VPeWS1BnndWS5DnOE15jtOk5wROeY7jpOf4WuUjwvPOhgnyHN8wlOedFRPkOU5TnuM06TmBU57jOOk5vl5FTLw2dbZMiOcCXzJtz-e7D38DAAD__xe8Uow=

query T
SELECT url FROM [ EXPLAIN (DISTSQL)
Expand Down
26 changes: 26 additions & 0 deletions pkg/sql/opt/memo/extract.go
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,32 @@ func ExtractJoinEqualityColumns(
return leftEq, rightEq
}

// ExtractJoinEqualityFilters returns the filters containing pairs of columns
// (one from the left side, one from the right side) which are constrained to
// be equal in a join (and have equivalent types).
func ExtractJoinEqualityFilters(leftCols, rightCols opt.ColSet, on FiltersExpr) FiltersExpr {
// We want to avoid allocating a new slice unless strictly necessary.
var newFilters FiltersExpr
for i := range on {
condition := on[i].Condition
ok, _, _ := isJoinEquality(leftCols, rightCols, condition)
if ok {
if newFilters != nil {
newFilters = append(newFilters, on[i])
}
} else {
if newFilters == nil {
newFilters = make(FiltersExpr, i, len(on)-1)
copy(newFilters, on[:i])
}
}
}
if newFilters != nil {
return newFilters
}
return on
}

func isVarEquality(condition opt.ScalarExpr) (leftVar, rightVar *VariableExpr, ok bool) {
if eq, ok := condition.(*EqExpr); ok {
if leftVar, ok := eq.Left.(*VariableExpr); ok {
Expand Down
14 changes: 14 additions & 0 deletions pkg/sql/opt/memo/logical_props_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -1586,6 +1586,14 @@ func ensureLookupJoinInputProps(join *LookupJoinExpr, sb *statisticsBuilder) *pr
if relational.OutputCols.Empty() {
md := join.Memo().Metadata()
relational.OutputCols = join.Cols.Difference(join.Input.Relational().OutputCols)

// Include the key columns in the output columns.
index := md.Table(join.Table).Index(join.Index)
for i := range join.KeyCols {
indexColID := join.Table.ColumnID(index.Column(i).Ordinal)
relational.OutputCols.Add(indexColID)
}

relational.NotNullCols = tableNotNullCols(md, join.Table)
relational.NotNullCols.IntersectionWith(relational.OutputCols)
relational.Cardinality = props.AnyCardinality
Expand Down Expand Up @@ -1679,6 +1687,8 @@ type joinPropsHelper struct {
filterNotNullCols opt.ColSet
filterIsTrue bool
filterIsFalse bool

selfJoinCols opt.ColSet
}

func (h *joinPropsHelper) init(b *logicalPropsBuilder, joinExpr RelExpr) {
Expand All @@ -1702,6 +1712,10 @@ func (h *joinPropsHelper) init(b *logicalPropsBuilder, joinExpr RelExpr) {
h.filterNotNullCols.Add(colID)
h.filterNotNullCols.Add(indexColID)
h.filtersFD.AddEquivalency(colID, indexColID)
if colID == indexColID {
// This can happen if an index join was converted into a lookup join.
h.selfJoinCols.Add(colID)
}
}

// Lookup join has implicit equality conditions on KeyCols.
Expand Down
11 changes: 11 additions & 0 deletions pkg/sql/opt/memo/memo.go
Original file line number Diff line number Diff line change
Expand Up @@ -358,6 +358,17 @@ func (m *Memo) RequestColStat(
return nil, false
}

// RowsProcessed calculates and returns the number of rows processed by the
// relational expression. It is currently only supported for joins.
func (m *Memo) RowsProcessed(expr RelExpr) (_ float64, ok bool) {
// When SetRoot is called, the statistics builder may have been cleared.
// If this happens, we can't serve the request anymore.
if m.logPropsBuilder.sb.md != nil {
return m.logPropsBuilder.sb.rowsProcessed(expr), true
}
return 0, false
}

// NextWithID returns a not-yet-assigned identifier for a WITH expression.
func (m *Memo) NextWithID() opt.WithID {
m.curWithID++
Expand Down
Loading

0 comments on commit 48ed5f0

Please sign in to comment.