From 043e062ad05ce06bfcd72c4cf9b738fc1a1d1c05 Mon Sep 17 00:00:00 2001 From: Yiding Date: Mon, 21 Oct 2024 22:21:17 +0800 Subject: [PATCH 1/3] planner, stats: overflow estimation may lead to wrong join reorder --- pkg/planner/core/rule_join_reorder_greedy.go | 21 +++++-- pkg/statistics/histogram.go | 6 +- .../r/statistics/overflow_calc.result | 61 +++++++++++++++++++ .../t/statistics/overflow_calc.test | 8 +++ 4 files changed, 91 insertions(+), 5 deletions(-) create mode 100644 tests/integrationtest/r/statistics/overflow_calc.result create mode 100644 tests/integrationtest/t/statistics/overflow_calc.test diff --git a/pkg/planner/core/rule_join_reorder_greedy.go b/pkg/planner/core/rule_join_reorder_greedy.go index f489d3fe41bd6..140db24da2994 100644 --- a/pkg/planner/core/rule_join_reorder_greedy.go +++ b/pkg/planner/core/rule_join_reorder_greedy.go @@ -21,6 +21,7 @@ import ( "github.com/pingcap/tidb/pkg/expression" "github.com/pingcap/tidb/pkg/planner/core/base" + "github.com/pingcap/tidb/pkg/util/intest" ) type joinReorderGreedySolver struct { @@ -97,9 +98,9 @@ func (s *joinReorderGreedySolver) constructConnectedJoinTree(tracer *joinReorder s.curJoinGroup = s.curJoinGroup[1:] for { bestCost := math.MaxFloat64 - bestIdx := -1 - var finalRemainOthers []expression.Expression - var bestJoin base.LogicalPlan + bestIdx, whateverValidOneIdx := -1, -1 + var finalRemainOthers, remainOthersOfWhateverValidOne []expression.Expression + var bestJoin, whateverValidOne base.LogicalPlan for i, node := range s.curJoinGroup { newJoin, remainOthers := s.checkConnectionAndMakeJoin(curJoinTree.p, node.p) if newJoin == nil { @@ -109,6 +110,9 @@ func (s *joinReorderGreedySolver) constructConnectedJoinTree(tracer *joinReorder if err != nil { return nil, err } + whateverValidOne = newJoin + whateverValidOneIdx = i + remainOthersOfWhateverValidOne = remainOthers curCost := s.calcJoinCumCost(newJoin, curJoinTree, node) tracer.appendLogicalJoinCost(newJoin, curCost) if bestCost > curCost { @@ -120,7 +124,16 @@ func (s *joinReorderGreedySolver) constructConnectedJoinTree(tracer *joinReorder } // If we could find more join node, meaning that the sub connected graph have been totally explored. if bestJoin == nil { - break + if whateverValidOne == nil { + break + } + // This branch is for the unexpected case. + // We throw assertion in test env. And create a valid join to avoid wrong result in the production env. + intest.Assert(false, "Join reorder should find one valid join but failed.") + bestJoin = whateverValidOne + bestCost = math.MaxFloat64 + bestIdx = whateverValidOneIdx + finalRemainOthers = remainOthersOfWhateverValidOne } curJoinTree = &jrNode{ p: bestJoin, diff --git a/pkg/statistics/histogram.go b/pkg/statistics/histogram.go index 6a1f339903277..92c6d3b19b0da 100644 --- a/pkg/statistics/histogram.go +++ b/pkg/statistics/histogram.go @@ -67,7 +67,7 @@ type Histogram struct { // For some types like `Int`, we do not build it because we can get them directly from `Bounds`. Scalars []scalar ID int64 // Column ID. - NDV int64 // Number of distinct values. + NDV int64 // Number of distinct values. Note that It contains the NDV of the TopN which is excluded from histogram. NullCount int64 // Number of null values. // LastUpdateVersion is the version that this histogram updated last time. LastUpdateVersion uint64 @@ -980,6 +980,10 @@ func (hg *Histogram) OutOfRangeRowCount( if histWidth <= 0 { return 0 } + if math.IsInf(histWidth, 1) { + // The histogram is too wide. As a quick fix, we return 0 to indicate that the overlap percentage is near 0. + return 0 + } boundL := histL - histWidth boundR := histR + histWidth diff --git a/tests/integrationtest/r/statistics/overflow_calc.result b/tests/integrationtest/r/statistics/overflow_calc.result new file mode 100644 index 0000000000000..de61581e6c07d --- /dev/null +++ b/tests/integrationtest/r/statistics/overflow_calc.result @@ -0,0 +1,61 @@ +CREATE TABLE `lrr_test` ( `COL102` double DEFAULT NULL, `COL1` double GENERATED ALWAYS AS (`COL102` + 10) STORED NOT NULL, PRIMARY KEY (`COL1`) /*T![clustered_index] CLUSTERED */ ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin; +load data local infile 'csv/extreamfloat64.csv' into table lrr_test fields terminated by ',' enclosed by '"' lines terminated by '\n'; +analyze table lrr_test; +select t1.col1, t2.col1 from lrr_test as t1 right join lrr_test as t2 on t1.col1 = t2.col1 where t1.col1 >=0 order by t1.col1; +col1 col1 +4.124952267435051e305 4.124952267435051e305 +5.4576487694211726e306 5.4576487694211726e306 +1.1237742400537221e307 1.1237742400537221e307 +1.569984332645614e307 1.569984332645614e307 +1.7966188405412235e307 1.7966188405412235e307 +1.8619233341238355e307 1.8619233341238355e307 +2.1152066540419881e307 2.1152066540419881e307 +2.1764927570795164e307 2.1764927570795164e307 +2.99416682762135e307 2.99416682762135e307 +3.0545414962788647e307 3.0545414962788647e307 +3.262967770716021e307 3.262967770716021e307 +3.288944887183685e307 3.288944887183685e307 +4.9025219351381e307 4.9025219351381e307 +5.250864486081297e307 5.250864486081297e307 +5.52054372134351e307 5.52054372134351e307 +6.311436996747818e307 6.311436996747818e307 +6.870852232080436e307 6.870852232080436e307 +7.501871137935436e307 7.501871137935436e307 +7.925709054822421e307 7.925709054822421e307 +8.438195254661318e307 8.438195254661318e307 +8.446731596918706e307 8.446731596918706e307 +9.43580947190119e307 9.43580947190119e307 +9.66735866233596e307 9.66735866233596e307 +1.0022043827847664e308 1.0022043827847664e308 +1.020869767928594e308 1.020869767928594e308 +1.0327408606815872e308 1.0327408606815872e308 +1.0402383684235906e308 1.0402383684235906e308 +1.0690255622829305e308 1.0690255622829305e308 +1.1623306052784659e308 1.1623306052784659e308 +1.1906116361044565e308 1.1906116361044565e308 +1.2221839628780758e308 1.2221839628780758e308 +1.3112927565356536e308 1.3112927565356536e308 +1.3307364382402157e308 1.3307364382402157e308 +1.3646958839720612e308 1.3646958839720612e308 +1.425066345632827e308 1.425066345632827e308 +1.4433864261103511e308 1.4433864261103511e308 +1.5038532858735658e308 1.5038532858735658e308 +1.5079450808097928e308 1.5079450808097928e308 +1.553628680980576e308 1.553628680980576e308 +1.6241456663280369e308 1.6241456663280369e308 +1.6295729949930798e308 1.6295729949930798e308 +1.6328703529666413e308 1.6328703529666413e308 +1.6832354056195887e308 1.6832354056195887e308 +1.7017315016390902e308 1.7017315016390902e308 +1.7134206410400048e308 1.7134206410400048e308 +1.7240829054261275e308 1.7240829054261275e308 +1.7257738639648862e308 1.7257738639648862e308 +1.7262297095455299e308 1.7262297095455299e308 +1.7905151735809062e308 1.7905151735809062e308 +explain select t1. col1, t2.col1 from lrr_test as t1 right join lrr_test as t2 on t1.col1 = t2.col1 where t1.col1 >=0; +id estRows task access object operator info +MergeJoin_8 49.00 root inner join, left key:statistics__overflow_calc.lrr_test.col1, right key:statistics__overflow_calc.lrr_test.col1 +├─TableReader_36(Build) 49.00 root data:TableRangeScan_35 +│ └─TableRangeScan_35 49.00 cop[tikv] table:t2 range:[0,+inf], keep order:true +└─TableReader_34(Probe) 49.00 root data:TableRangeScan_33 + └─TableRangeScan_33 49.00 cop[tikv] table:t1 range:[0,+inf], keep order:true diff --git a/tests/integrationtest/t/statistics/overflow_calc.test b/tests/integrationtest/t/statistics/overflow_calc.test new file mode 100644 index 0000000000000..2a4a46eb85d3b --- /dev/null +++ b/tests/integrationtest/t/statistics/overflow_calc.test @@ -0,0 +1,8 @@ +# https://github.com/pingcap/tidb/issues/56704 +CREATE TABLE `lrr_test` ( `COL102` double DEFAULT NULL, `COL1` double GENERATED ALWAYS AS (`COL102` + 10) STORED NOT NULL, PRIMARY KEY (`COL1`) /*T![clustered_index] CLUSTERED */ ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin; +load data local infile 'csv/extreamfloat64.csv' into table lrr_test fields terminated by ',' enclosed by '"' lines terminated by '\n'; + +analyze table lrr_test; + +select t1.col1, t2.col1 from lrr_test as t1 right join lrr_test as t2 on t1.col1 = t2.col1 where t1.col1 >=0 order by t1.col1; +explain select t1. col1, t2.col1 from lrr_test as t1 right join lrr_test as t2 on t1.col1 = t2.col1 where t1.col1 >=0; From 3e295615fec6ea348c7d3f735b620c45a5b64f34 Mon Sep 17 00:00:00 2001 From: Yiding Date: Mon, 21 Oct 2024 23:03:17 +0800 Subject: [PATCH 2/3] address comment --- .../r/statistics/overflow_calc.result | 12 ++++++------ .../integrationtest/t/statistics/overflow_calc.test | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/integrationtest/r/statistics/overflow_calc.result b/tests/integrationtest/r/statistics/overflow_calc.result index de61581e6c07d..22caa45fd8eb9 100644 --- a/tests/integrationtest/r/statistics/overflow_calc.result +++ b/tests/integrationtest/r/statistics/overflow_calc.result @@ -52,10 +52,10 @@ col1 col1 1.7257738639648862e308 1.7257738639648862e308 1.7262297095455299e308 1.7262297095455299e308 1.7905151735809062e308 1.7905151735809062e308 -explain select t1. col1, t2.col1 from lrr_test as t1 right join lrr_test as t2 on t1.col1 = t2.col1 where t1.col1 >=0; +explain format='brief' select t1. col1, t2.col1 from lrr_test as t1 right join lrr_test as t2 on t1.col1 = t2.col1 where t1.col1 >=0; id estRows task access object operator info -MergeJoin_8 49.00 root inner join, left key:statistics__overflow_calc.lrr_test.col1, right key:statistics__overflow_calc.lrr_test.col1 -├─TableReader_36(Build) 49.00 root data:TableRangeScan_35 -│ └─TableRangeScan_35 49.00 cop[tikv] table:t2 range:[0,+inf], keep order:true -└─TableReader_34(Probe) 49.00 root data:TableRangeScan_33 - └─TableRangeScan_33 49.00 cop[tikv] table:t1 range:[0,+inf], keep order:true +MergeJoin 49.00 root inner join, left key:statistics__overflow_calc.lrr_test.col1, right key:statistics__overflow_calc.lrr_test.col1 +├─TableReader(Build) 49.00 root data:TableRangeScan +│ └─TableRangeScan 49.00 cop[tikv] table:t2 range:[0,+inf], keep order:true +└─TableReader(Probe) 49.00 root data:TableRangeScan + └─TableRangeScan 49.00 cop[tikv] table:t1 range:[0,+inf], keep order:true diff --git a/tests/integrationtest/t/statistics/overflow_calc.test b/tests/integrationtest/t/statistics/overflow_calc.test index 2a4a46eb85d3b..605cb89ba3de0 100644 --- a/tests/integrationtest/t/statistics/overflow_calc.test +++ b/tests/integrationtest/t/statistics/overflow_calc.test @@ -5,4 +5,4 @@ load data local infile 'csv/extreamfloat64.csv' into table lrr_test fields termi analyze table lrr_test; select t1.col1, t2.col1 from lrr_test as t1 right join lrr_test as t2 on t1.col1 = t2.col1 where t1.col1 >=0 order by t1.col1; -explain select t1. col1, t2.col1 from lrr_test as t1 right join lrr_test as t2 on t1.col1 = t2.col1 where t1.col1 >=0; +explain format='brief' select t1. col1, t2.col1 from lrr_test as t1 right join lrr_test as t2 on t1.col1 = t2.col1 where t1.col1 >=0; From ece87144dbd44e362c52008d86554ab1f404c4c9 Mon Sep 17 00:00:00 2001 From: Yiding Date: Mon, 21 Oct 2024 23:33:55 +0800 Subject: [PATCH 3/3] use insert into values --- tests/integrationtest/r/statistics/overflow_calc.result | 2 +- tests/integrationtest/t/statistics/overflow_calc.test | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integrationtest/r/statistics/overflow_calc.result b/tests/integrationtest/r/statistics/overflow_calc.result index 22caa45fd8eb9..40e46f768d878 100644 --- a/tests/integrationtest/r/statistics/overflow_calc.result +++ b/tests/integrationtest/r/statistics/overflow_calc.result @@ -1,5 +1,5 @@ CREATE TABLE `lrr_test` ( `COL102` double DEFAULT NULL, `COL1` double GENERATED ALWAYS AS (`COL102` + 10) STORED NOT NULL, PRIMARY KEY (`COL1`) /*T![clustered_index] CLUSTERED */ ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin; -load data local infile 'csv/extreamfloat64.csv' into table lrr_test fields terminated by ',' enclosed by '"' lines terminated by '\n'; +insert into lrr_test (col102) values(-1.704648925036604e308), (-1.6888619680353582e308), (-1.6685908644498436e308), (-1.6311134967437805e308), (-1.6128280680807152e308), (-1.5899713947158026e308), (-1.5709457594070477e308), (-1.4925714566991343e308), (-1.4705985087370154e308), (-1.4451316666300040e308), (-1.3946576985986583e308), (-1.3695679630646804e308), (-1.3208992137984086e308), (-1.2887981369134862e308), (-1.2119996449796167e308), (-1.195172956104992e308), (-1.1929781068369925e308), (-1.1746351299417647e308), (-1.1237012620945195e308), (-1.1223448185004882e308), (-1.0974439629672084e308), (-1.0657654808610821e308), (-1.0582598945271716e308), (-1.0565276887850733e308), (-1.0416104832981696e308), (-1.0368741532690337e308), (-1.033521479407133e308), (-1.0232269544119505e308), (-9.31943312515408e307), (-9.05107332838438e307), (-8.276443475796885e307), (-7.845086666145396e307), (-7.664543340054255e307), (-7.235369799352141e307), (-7.047280050755922e307), (-6.62205033356235e307), (-6.35964999739255e307), (-5.989391229038818e307), (-5.974526205854541e307), (-5.798684586589338e307), (-4.98047732376121e307), (-4.4623979626128605e307), (-4.3248436443381234e307), (-3.3391152928792773e307), (-3.2694282487729395e307), (-3.2461091065368577e307), (-2.8613054009714654e307), (-2.7176814604572905e307), (-2.1301127705458223e307), (-1.7280065154718344e307), (-1.6743061442642827e307), (-4.862812928655648e306), (-3.3262533560429795e305), (4.124952267435051e305), (5.4576487694211726e306), (1.1237742400537221e307), (1.569984332645614e307), (1.7966188405412235e307), (1.8619233341238355e307), (2.1152066540419881e307), (2.1764927570795164e307), (2.99416682762135e307), (3.0545414962788647e307), (3.262967770716021e307), (3.288944887183685e307), (4.9025219351381e307), (5.250864486081297e307), (5.52054372134351e307), (6.311436996747818e307), (6.870852232080436e307), (7.501871137935436e307), (7.925709054822421e307), (8.438195254661318e307), (8.446731596918706e307), (9.43580947190119e307), (9.66735866233596e307), (1.0022043827847664e308), (1.020869767928594e308), (1.0327408606815872e308), (1.0402383684235906e308), (1.0690255622829305e308), (1.1623306052784659e308), (1.1906116361044565e308), (1.2221839628780758e308), (1.3112927565356536e308), (1.3307364382402157e308), (1.3646958839720612e308), (1.425066345632827e308), (1.4433864261103511e308), (1.5038532858735658e308), (1.5079450808097928e308), (1.553628680980576e308), (1.6241456663280369e308), (1.6295729949930798e308), (1.6328703529666413e308), (1.6832354056195887e308), (1.7017315016390902e308), (1.7134206410400048e308), (1.7240829054261275e308), (1.7257738639648862e308), (1.7262297095455299e308), (1.7905151735809062e308); analyze table lrr_test; select t1.col1, t2.col1 from lrr_test as t1 right join lrr_test as t2 on t1.col1 = t2.col1 where t1.col1 >=0 order by t1.col1; col1 col1 diff --git a/tests/integrationtest/t/statistics/overflow_calc.test b/tests/integrationtest/t/statistics/overflow_calc.test index 605cb89ba3de0..10b844d72e1ca 100644 --- a/tests/integrationtest/t/statistics/overflow_calc.test +++ b/tests/integrationtest/t/statistics/overflow_calc.test @@ -1,6 +1,6 @@ # https://github.com/pingcap/tidb/issues/56704 CREATE TABLE `lrr_test` ( `COL102` double DEFAULT NULL, `COL1` double GENERATED ALWAYS AS (`COL102` + 10) STORED NOT NULL, PRIMARY KEY (`COL1`) /*T![clustered_index] CLUSTERED */ ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin; -load data local infile 'csv/extreamfloat64.csv' into table lrr_test fields terminated by ',' enclosed by '"' lines terminated by '\n'; +insert into lrr_test (col102) values(-1.704648925036604e308), (-1.6888619680353582e308), (-1.6685908644498436e308), (-1.6311134967437805e308), (-1.6128280680807152e308), (-1.5899713947158026e308), (-1.5709457594070477e308), (-1.4925714566991343e308), (-1.4705985087370154e308), (-1.4451316666300040e308), (-1.3946576985986583e308), (-1.3695679630646804e308), (-1.3208992137984086e308), (-1.2887981369134862e308), (-1.2119996449796167e308), (-1.195172956104992e308), (-1.1929781068369925e308), (-1.1746351299417647e308), (-1.1237012620945195e308), (-1.1223448185004882e308), (-1.0974439629672084e308), (-1.0657654808610821e308), (-1.0582598945271716e308), (-1.0565276887850733e308), (-1.0416104832981696e308), (-1.0368741532690337e308), (-1.033521479407133e308), (-1.0232269544119505e308), (-9.31943312515408e307), (-9.05107332838438e307), (-8.276443475796885e307), (-7.845086666145396e307), (-7.664543340054255e307), (-7.235369799352141e307), (-7.047280050755922e307), (-6.62205033356235e307), (-6.35964999739255e307), (-5.989391229038818e307), (-5.974526205854541e307), (-5.798684586589338e307), (-4.98047732376121e307), (-4.4623979626128605e307), (-4.3248436443381234e307), (-3.3391152928792773e307), (-3.2694282487729395e307), (-3.2461091065368577e307), (-2.8613054009714654e307), (-2.7176814604572905e307), (-2.1301127705458223e307), (-1.7280065154718344e307), (-1.6743061442642827e307), (-4.862812928655648e306), (-3.3262533560429795e305), (4.124952267435051e305), (5.4576487694211726e306), (1.1237742400537221e307), (1.569984332645614e307), (1.7966188405412235e307), (1.8619233341238355e307), (2.1152066540419881e307), (2.1764927570795164e307), (2.99416682762135e307), (3.0545414962788647e307), (3.262967770716021e307), (3.288944887183685e307), (4.9025219351381e307), (5.250864486081297e307), (5.52054372134351e307), (6.311436996747818e307), (6.870852232080436e307), (7.501871137935436e307), (7.925709054822421e307), (8.438195254661318e307), (8.446731596918706e307), (9.43580947190119e307), (9.66735866233596e307), (1.0022043827847664e308), (1.020869767928594e308), (1.0327408606815872e308), (1.0402383684235906e308), (1.0690255622829305e308), (1.1623306052784659e308), (1.1906116361044565e308), (1.2221839628780758e308), (1.3112927565356536e308), (1.3307364382402157e308), (1.3646958839720612e308), (1.425066345632827e308), (1.4433864261103511e308), (1.5038532858735658e308), (1.5079450808097928e308), (1.553628680980576e308), (1.6241456663280369e308), (1.6295729949930798e308), (1.6328703529666413e308), (1.6832354056195887e308), (1.7017315016390902e308), (1.7134206410400048e308), (1.7240829054261275e308), (1.7257738639648862e308), (1.7262297095455299e308), (1.7905151735809062e308); analyze table lrr_test;