From f60ffb9a2869bce153609bde2e1c32cf48564f6d Mon Sep 17 00:00:00 2001 From: Ti Chi Robot Date: Wed, 12 Jul 2023 11:33:43 +0800 Subject: [PATCH] planner: fix join reorder will append remained other condition to an outer join (#44409) (#44424) close pingcap/tidb#44314 --- planner/core/rule_join_reorder.go | 17 ++++++++++++++- planner/core/rule_join_reorder_test.go | 30 ++++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 1 deletion(-) diff --git a/planner/core/rule_join_reorder.go b/planner/core/rule_join_reorder.go index d83f7a24e8163..219470d6fd433 100644 --- a/planner/core/rule_join_reorder.go +++ b/planner/core/rule_join_reorder.go @@ -181,7 +181,7 @@ func extractJoinGroup(p LogicalPlan) *joinGroupResult { tmpOtherConds = append(tmpOtherConds, join.OtherConditions...) tmpOtherConds = append(tmpOtherConds, join.LeftConditions...) tmpOtherConds = append(tmpOtherConds, join.RightConditions...) - if join.JoinType == LeftOuterJoin || join.JoinType == RightOuterJoin { + if join.JoinType == LeftOuterJoin || join.JoinType == RightOuterJoin || join.JoinType == LeftOuterSemiJoin || join.JoinType == AntiLeftOuterSemiJoin { for range join.EqualConditions { abType := &joinTypeWithExtMsg{JoinType: join.JoinType} // outer join's other condition should be bound with the connecting edge. @@ -503,6 +503,13 @@ func (s *baseSingleGroupJoinOrderSolver) makeJoin(leftPlan, rightPlan LogicalPla remainOtherConds, otherConds = expression.FilterOutInPlace(remainOtherConds, func(expr expression.Expression) bool { return expression.ExprFromSchema(expr, mergedSchema) }) + if (joinType.JoinType == LeftOuterJoin || joinType.JoinType == RightOuterJoin || joinType.JoinType == LeftOuterSemiJoin || joinType.JoinType == AntiLeftOuterSemiJoin) && len(otherConds) > 0 { + // the original outer join's other conditions has been bound to the outer join Edge, + // these remained other condition here shouldn't be appended to it because on-mismatch + // logic will produce more append-null rows which is banned in original semantic. + remainOtherConds = append(remainOtherConds, otherConds...) // nozero + otherConds = otherConds[:0] + } if len(joinType.outerBindCondition) > 0 { remainOBOtherConds := make([]expression.Expression, len(joinType.outerBindCondition)) copy(remainOBOtherConds, joinType.outerBindCondition) @@ -546,6 +553,14 @@ func (s *baseSingleGroupJoinOrderSolver) makeBushyJoin(cartesianJoinGroup []Logi } cartesianJoinGroup, resultJoinGroup = resultJoinGroup, cartesianJoinGroup } + // other conditions may be possible to exist across different cartesian join group, resolving cartesianJoin first then adding another selection. + if len(s.otherConds) > 0 { + additionSelection := LogicalSelection{ + Conditions: s.otherConds, + }.Init(cartesianJoinGroup[0].SCtx(), cartesianJoinGroup[0].SelectBlockOffset()) + additionSelection.SetChildren(cartesianJoinGroup[0]) + cartesianJoinGroup[0] = additionSelection + } return cartesianJoinGroup[0] } diff --git a/planner/core/rule_join_reorder_test.go b/planner/core/rule_join_reorder_test.go index 37cb10164a4cc..5e53b9cc27147 100644 --- a/planner/core/rule_join_reorder_test.go +++ b/planner/core/rule_join_reorder_test.go @@ -69,6 +69,36 @@ func TestJoinOrderHintWithBinding(t *testing.T) { tk.MustExec("drop global binding for select * from t1 join t2 on t1.a=t2.a join t3 on t2.b=t3.b") } +func TestAdditionOtherConditionsRemained4OuterJoin(t *testing.T) { + store, _ := testkit.CreateMockStoreAndDomain(t) + tk := testkit.NewTestKit(t, store) + + tk.MustExec("use test") + tk.MustExec("CREATE TABLE `queries_identifier` (\n `id` int(11) NOT NULL AUTO_INCREMENT,\n `name` varchar(100) COLLATE utf8mb4_general_ci NOT NULL,\n PRIMARY KEY (`id`) /*T![clustered_index] CLUSTERED */\n ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;") + tk.MustExec("CREATE TABLE `queries_program` (\n `id` int(11) NOT NULL AUTO_INCREMENT,\n `identifier_id` int(11) NOT NULL,\n PRIMARY KEY (`id`) /*T![clustered_index] CLUSTERED */,\n UNIQUE KEY `identifier_id` (`identifier_id`),\n CONSTRAINT `queries_program_identifier_id_70ff12a6_fk_queries_identifier_id` FOREIGN KEY (`identifier_id`) REFERENCES `test`.`queries_identifier` (`id`)\n) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;") + tk.MustExec("CREATE TABLE `queries_channel` (\n `id` int(11) NOT NULL AUTO_INCREMENT,\n `identifier_id` int(11) NOT NULL,\n PRIMARY KEY (`id`) /*T![clustered_index] CLUSTERED */,\n UNIQUE KEY `identifier_id` (`identifier_id`),\n CONSTRAINT `queries_channel_identifier_id_06ac3513_fk_queries_identifier_id` FOREIGN KEY (`identifier_id`) REFERENCES `test`.`queries_identifier` (`id`)\n) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;") + + tk.MustExec("INSERT INTO queries_identifier(`id`, `name`) values(13, 'i1'), (14, 'i2'), (15, 'i3');") + tk.MustExec("INSERT INTO queries_program(`id`, `identifier_id`) values(8, 13), (9, 14);") + tk.MustExec("INSERT INTO queries_channel(`id`, `identifier_id`) values(5, 13);") + + tk.MustQuery("SELECT `queries_identifier`.`id`, `queries_identifier`.`name` FROM `queries_identifier` LEFT OUTER JOIN `queries_channel` ON (`queries_identifier`.`id` = `queries_channel`.`identifier_id`) INNER JOIN `queries_program` ON (`queries_identifier`.`id` = `queries_program`.`identifier_id`) WHERE ((`queries_channel`.`id` = 5 AND `queries_program`.`id` = 9) OR `queries_program`.`id` = 8) ORDER BY `queries_identifier`.`id` ASC;").Check(testkit.Rows("" + + "13 i1")) + tk.MustQuery("SELECT `queries_identifier`.`id`, `queries_identifier`.`name` FROM `queries_identifier` RIGHT OUTER JOIN `queries_channel` ON (`queries_identifier`.`id` = `queries_channel`.`identifier_id`) INNER JOIN `queries_program` ON (`queries_identifier`.`id` = `queries_program`.`identifier_id`) WHERE ((`queries_channel`.`id` = 5 AND `queries_program`.`id` = 9) OR `queries_program`.`id` = 8) ORDER BY `queries_identifier`.`id` ASC;").Check(testkit.Rows("" + + "13 i1")) + tk.MustQuery("explain format = 'brief' SELECT `queries_identifier`.`id`, `queries_identifier`.`name` FROM `queries_identifier` LEFT OUTER JOIN `queries_channel` ON (`queries_identifier`.`id` = `queries_channel`.`identifier_id`) INNER JOIN `queries_program` ON (`queries_identifier`.`id` = `queries_program`.`identifier_id`) WHERE ((`queries_channel`.`id` = 5 AND `queries_program`.`id` = 9) OR `queries_program`.`id` = 8) ORDER BY `queries_identifier`.`id` ASC;").Check(testkit.Rows(""+ + "Sort 2.50 root test.queries_identifier.id", + "└─Projection 2.50 root test.queries_identifier.id, test.queries_identifier.name", + " └─Selection 2.50 root or(and(eq(test.queries_channel.id, 5), eq(test.queries_program.id, 9)), eq(test.queries_program.id, 8))", + " └─IndexJoin 3.12 root left outer join, inner:IndexReader, outer key:test.queries_identifier.id, inner key:test.queries_channel.identifier_id, equal cond:eq(test.queries_identifier.id, test.queries_channel.identifier_id)", + " ├─IndexHashJoin(Build) 2.50 root inner join, inner:TableReader, outer key:test.queries_program.identifier_id, inner key:test.queries_identifier.id, equal cond:eq(test.queries_program.identifier_id, test.queries_identifier.id)", + " │ ├─Batch_Point_Get(Build) 2.00 root table:queries_program handle:[8 9], keep order:false, desc:false", + " │ └─TableReader(Probe) 2.00 root data:TableRangeScan", + " │ └─TableRangeScan 2.00 cop[tikv] table:queries_identifier range: decided by [test.queries_program.identifier_id], keep order:false, stats:pseudo", + " └─IndexReader(Probe) 2.50 root index:IndexRangeScan", + " └─IndexRangeScan 2.50 cop[tikv] table:queries_channel, index:identifier_id(identifier_id) range: decided by [eq(test.queries_channel.identifier_id, test.queries_identifier.id)], keep order:false, stats:pseudo")) +} + func TestOuterJoinWIthEqCondCrossInnerJoin(t *testing.T) { store := testkit.CreateMockStore(t)