Skip to content

Commit

Permalink
opt: fix statistics estimation for semi and anti joins
Browse files Browse the repository at this point in the history
Prior to this commit, the statisticsBuilder always estimated that
the number of output rows for a semi or anti join was equal to the
number of rows on the left side. It ignored any ON conditions.
This commit improves the estimate by taking into account the ON
conditions.

Release note: None
  • Loading branch information
rytaft committed Sep 2, 2019
1 parent a4272c8 commit 1014541
Show file tree
Hide file tree
Showing 23 changed files with 1,227 additions and 921 deletions.
26 changes: 13 additions & 13 deletions pkg/sql/opt/exec/execbuilder/testdata/lookup_join
Original file line number Diff line number Diff line change
Expand Up @@ -675,18 +675,18 @@ render · · (a, b, c) ·
query TTTTT
EXPLAIN (VERBOSE) SELECT * from abc WHERE EXISTS (SELECT * FROM def WHERE a=f AND c=e)
----
render · · (a, b, c) ·
│ render 0 a · ·
│ render 1 b · ·
│ render 2 c · ·
└── lookup-join · · (a, b, c, e, f) ·
│ table def@primary · ·
│ type inner · ·
│ equality (a, c) = (f, e) · ·
│ equality cols are key · · ·
└── scan · · (a, b, c) ·
· table abc@primary · ·
· spans ALL · ·
render · · (a, b, c) ·
│ render 0 a · ·
│ render 1 b · ·
│ render 2 c · ·
└── lookup-join · · (a, b, c) ·
│ table def@primary · ·
│ type semi · ·
│ equality (a, c) = (f, e) · ·
│ equality cols are key · · ·
└── scan · · (a, b, c) ·
· table abc@primary · ·
· spans ALL · ·

query TTTTT
EXPLAIN (VERBOSE) SELECT * from abc WHERE NOT EXISTS (SELECT * FROM def WHERE a=f AND c=e)
Expand Down Expand Up @@ -741,7 +741,7 @@ SELECT url FROM [ EXPLAIN (DISTSQL)
SELECT a,b from small WHERE EXISTS (SELECT a FROM data WHERE small.a=data.a) ORDER BY a
]
----
https://cockroachdb.github.io/distsqlplan/decode.html#eJzMlEFr2zAUx-_7FOJdJ-NIdtLUp1w6SOmS0eY2ctCiR-fN0TOSDCsl3304HqtdWlngQ3q0lD__X35PvGcwpHGjjuig-A4COEjgkAGHHDjMYc-htnRA58i2P-kCa_0HihmH0tSNb4_3HA5kEYpn8KWvEArYqR8V3qPSaNMZcNDoVVmda2pbHpV9Wrmjqirg8FAr4wqWpIIpo5lg5H-iBQ7bxhdsJfhKwv7EgRr_0ue8ekQoxInHM91Saf4hzYdIu6caC3Z382XHHm6-rtntdr0B_p9UK6-Awx3R76Zmv6g0jEyLFgcp34V8YSOr0aIeYq3EZ9if3vgnG0qoTsVQ7Hv12aBexM9NRM0tFUkqp05uhKo3ucXlJifj1ck4dTJJs6nqRqh66q4upy6LV5fFqcuSNJ-qboSqp255OXV5vLo8Tl2epPOp6kaoeuquP8aqfQPyHl1NxmHUFp21exj1I3Z721FjD_jN0uFc031uz7nzgUbnu1vRfaxNd9UC9sMiGJaDsHgdluHmkeosmM7D4XwK9zwYXoSbF1Oar4LhZbh5OaX5Ojyr2cgzCT-y193706e_AQAA__8Shyy4
https://cockroachdb.github.io/distsqlplan/decode.html#eJzEll9v2jAUxd_3KdB92lSjxE7Cn0iTeNhLJ62dur1NPLjEg0g0RraRVlV89wkyKSKBe-0F6Fsp-XGPfc65yhtUulAP8kVZyH8BBwYCGCTAIAUGGcwZbIxeKGu12T9SA_fFH8hjBmW12br9v-cMFtooyN_AlW6tIIef8nmtnpQslIliYFAoJ8v1YczGlC_SvM7si1yvgcGPjaxsPhhGfCCrYsAH2q2UAQaPW5cPZpzNBMx3DPTW_ZvXjHl-HaykXR0PmHGY7-YMrJNLBTnfsf_TnZ3WXUgnu7JFLds2uluiG0EiRNCX0rqyWriIx51TMng0hTKqQMclZ8c1U3T9O-0Jd_VFnteUBWkK8y3tp5s1T22rU8-dPNo3ZZbqqy4rZSI-Ov7dtfrtPs743afPplyu6j-xmDZHyS5uwYMe6k0keOtEp8ePjsZz_wZwr-ZGfBiJW3Q3QPnIp7uIcM_yEoqaovCLlJe_QwnG1ymB8PdS-KVQDKPkFikMUD72SuF54Z4pJBQ1KRQXSaF4hxROrpPCxN_LxC-FyTBKb5HCAOUTrxSeF-6ZQkJRk8LkIilM3iGF0-ukMPX3MvVLYTqMslukMED51CuFe-HBySNUNMlLL5K89PbJE_H1X0VPSHhSdqMrq7zeMuP9IVSxVPW9WL01C_Xd6MVhTP3x8cAdXmQKZV39bVp_uK_qr_YC_eFpH5gnvehxH1pwnOZtOj6ij-C4DQsUJk6dBLglwuBpH7jlVig97kO33OrQKXrhGX7hGW414fWoT7lwmCgXDlPlImiiXDhNlWuM3vgEv_BJn3JN-wSF91qkBE2YTdCU2xRO2E3g5DLFtyknKsZ77VPeWS1BnndWS5DnOE15jtOk5wROeY7jpOf4WuUjwvPOhgnyHN8wlOedFRPkOU5TnuM06TmBU57jOOk5vl5FTLw2dbZMiOcCXzJtz-e7D38DAAD__xe8Uow=

query T
SELECT url FROM [ EXPLAIN (DISTSQL)
Expand Down
26 changes: 26 additions & 0 deletions pkg/sql/opt/memo/extract.go
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,32 @@ func ExtractJoinEqualityColumns(
return leftEq, rightEq
}

// ExtractJoinEqualityFilters returns the filters containing pairs of columns
// (one from the left side, one from the right side) which are constrained to
// be equal in a join (and have equivalent types).
func ExtractJoinEqualityFilters(leftCols, rightCols opt.ColSet, on FiltersExpr) FiltersExpr {
// We want to avoid allocating a new slice unless strictly necessary.
var newFilters FiltersExpr
for i := range on {
condition := on[i].Condition
ok, _, _ := isJoinEquality(leftCols, rightCols, condition)
if ok {
if newFilters != nil {
newFilters = append(newFilters, on[i])
}
} else {
if newFilters == nil {
newFilters = make(FiltersExpr, i, len(on)-1)
copy(newFilters, on[:i])
}
}
}
if newFilters != nil {
return newFilters
}
return on
}

func isVarEquality(condition opt.ScalarExpr) (leftVar, rightVar *VariableExpr, ok bool) {
if eq, ok := condition.(*EqExpr); ok {
if leftVar, ok := eq.Left.(*VariableExpr); ok {
Expand Down
3 changes: 1 addition & 2 deletions pkg/sql/opt/memo/memo.go
Original file line number Diff line number Diff line change
Expand Up @@ -359,8 +359,7 @@ func (m *Memo) RequestColStat(
}

// RowsProcessed calculates and returns the number of rows processed by the
// relational expression. It is currently only supported for lookup joins and
// merge joins.
// relational expression. It is currently only supported for joins.
func (m *Memo) RowsProcessed(expr RelExpr) (_ float64, ok bool) {
// When SetRoot is called, the statistics builder may have been cleared.
// If this happens, we can't serve the request anymore.
Expand Down
Loading

0 comments on commit 1014541

Please sign in to comment.