Fix planner failure in some cases of sorting by an aggregate.

An oversight introduced by the incremental-sort patches caused "could not find pathkey item to sort" errors in some situations where a sort key involves an aggregate or window function. The basic problem here is that find_em_expr_usable_for_sorting_rel isn't properly modeling what prepare_sort_from_pathkeys will do later. Rather than hoping we can keep those functions in sync, let's refactor so that they actually share the code for identifying a suitable sort expression. With this refactoring, tlist.c's tlist_member_ignore_relabel is unused. I removed it in HEAD but left it in place in v13, in case any extensions are using it. Per report from Luc Vlaming. Back-patch to v13 where the problem arose. James Coleman and Tom Lane Discussion: https://postgr.es/m/[email protected]
neondatabase · Apr 20, 2021 · 3753982 · 3753982
1 parent 95c3a19
commit 3753982
Show file tree

Hide file tree

Showing 7 changed files with 262 additions and 181 deletions.
diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c
@@ -35,6 +35,7 @@
 static EquivalenceMember *add_eq_member(EquivalenceClass *ec,
 										Expr *expr, Relids relids, Relids nullable_relids,
 										bool is_child, Oid datatype);
+static bool is_exprlist_member(Expr *node, List *exprs);
 static void generate_base_implied_equalities_const(PlannerInfo *root,
 												   EquivalenceClass *ec);
 static void generate_base_implied_equalities_no_const(PlannerInfo *root,
@@ -769,6 +770,167 @@ get_eclass_for_sort_expr(PlannerInfo *root,
 	return newec;
 }
 
+/*
+ * find_ec_member_matching_expr
+ *		Locate an EquivalenceClass member matching the given expr, if any;
+ *		return NULL if no match.
+ *
+ * "Matching" is defined as "equal after stripping RelabelTypes".
+ * This is used for identifying sort expressions, and we need to allow
+ * binary-compatible relabeling for some cases involving binary-compatible
+ * sort operators.
+ *
+ * Child EC members are ignored unless they belong to given 'relids'.
+ */
+EquivalenceMember *
+find_ec_member_matching_expr(EquivalenceClass *ec,
+							 Expr *expr,
+							 Relids relids)
+{
+	ListCell   *lc;
+
+	/* We ignore binary-compatible relabeling on both ends */
+	while (expr && IsA(expr, RelabelType))
+		expr = ((RelabelType *) expr)->arg;
+
+	foreach(lc, ec->ec_members)
+	{
+		EquivalenceMember *em = (EquivalenceMember *) lfirst(lc);
+		Expr	   *emexpr;
+
+		/*
+		 * We shouldn't be trying to sort by an equivalence class that
+		 * contains a constant, so no need to consider such cases any further.
+		 */
+		if (em->em_is_const)
+			continue;
+
+		/*
+		 * Ignore child members unless they belong to the requested rel.
+		 */
+		if (em->em_is_child &&
+			!bms_is_subset(em->em_relids, relids))
+			continue;
+
+		/*
+		 * Match if same expression (after stripping relabel).
+		 */
+		emexpr = em->em_expr;
+		while (emexpr && IsA(emexpr, RelabelType))
+			emexpr = ((RelabelType *) emexpr)->arg;
+
+		if (equal(emexpr, expr))
+			return em;
+	}
+
+	return NULL;
+}
+
+/*
+ * find_computable_ec_member
+ *		Locate an EquivalenceClass member that can be computed from the
+ *		expressions appearing in "exprs"; return NULL if no match.
+ *
+ * "exprs" can be either a list of bare expression trees, or a list of
+ * TargetEntry nodes.  Either way, it should contain Vars and possibly
+ * Aggrefs and WindowFuncs, which are matched to the corresponding elements
+ * of the EquivalenceClass's expressions.
+ *
+ * Unlike find_ec_member_matching_expr, there's no special provision here
+ * for binary-compatible relabeling.  This is intentional: if we have to
+ * compute an expression in this way, setrefs.c is going to insist on exact
+ * matches of Vars to the source tlist.
+ *
+ * Child EC members are ignored unless they belong to given 'relids'.
+ * Also, non-parallel-safe expressions are ignored if 'require_parallel_safe'.
+ *
+ * Note: some callers pass root == NULL for notational reasons.  This is OK
+ * when require_parallel_safe is false.
+ */
+EquivalenceMember *
+find_computable_ec_member(PlannerInfo *root,
+						  EquivalenceClass *ec,
+						  List *exprs,
+						  Relids relids,
+						  bool require_parallel_safe)
+{
+	ListCell   *lc;
+
+	foreach(lc, ec->ec_members)
+	{
+		EquivalenceMember *em = (EquivalenceMember *) lfirst(lc);
+		List	   *exprvars;
+		ListCell   *lc2;
+
+		/*
+		 * We shouldn't be trying to sort by an equivalence class that
+		 * contains a constant, so no need to consider such cases any further.
+		 */
+		if (em->em_is_const)
+			continue;
+
+		/*
+		 * Ignore child members unless they belong to the requested rel.
+		 */
+		if (em->em_is_child &&
+			!bms_is_subset(em->em_relids, relids))
+			continue;
+
+		/*
+		 * Match if all Vars and quasi-Vars are available in "exprs".
+		 */
+		exprvars = pull_var_clause((Node *) em->em_expr,
+								   PVC_INCLUDE_AGGREGATES |
+								   PVC_INCLUDE_WINDOWFUNCS |
+								   PVC_INCLUDE_PLACEHOLDERS);
+		foreach(lc2, exprvars)
+		{
+			if (!is_exprlist_member(lfirst(lc2), exprs))
+				break;
+		}
+		list_free(exprvars);
+		if (lc2)
+			continue;			/* we hit a non-available Var */
+
+		/*
+		 * If requested, reject expressions that are not parallel-safe.  We
+		 * check this last because it's a rather expensive test.
+		 */
+		if (require_parallel_safe &&
+			!is_parallel_safe(root, (Node *) em->em_expr))
+			continue;
+
+		return em;				/* found usable expression */
+	}
+
+	return NULL;
+}
+
+/*
+ * is_exprlist_member
+ *	  Subroutine for find_computable_ec_member: is "node" in "exprs"?
+ *
+ * Per the requirements of that function, "exprs" might or might not have
+ * TargetEntry superstructure.
+ */
+static bool
+is_exprlist_member(Expr *node, List *exprs)
+{
+	ListCell   *lc;
+
+	foreach(lc, exprs)
+	{
+		Expr	   *expr = (Expr *) lfirst(lc);
+
+		if (expr && IsA(expr, TargetEntry))
+			expr = ((TargetEntry *) expr)->expr;
+
+		if (equal(node, expr))
+			return true;
+	}
+	return false;
+}
+
 /*
  * Find an equivalence class member expression, all of whose Vars, come from
  * the indicated relation.
@@ -799,71 +961,78 @@ find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel)
 }
 
 /*
- * Find an equivalence class member expression that can be safely used to build
- * a sort node using the provided relation. The rules are a subset of those
- * applied in prepare_sort_from_pathkeys since that function deals with sorts
- * that must be delayed until the last stages of query execution, while here
- * we only care about proactive sorts.
+ * Find an equivalence class member expression that can be used to build
+ * a sort node using the provided relation; return NULL if no candidate.
+ *
+ * To succeed, we must find an EC member that prepare_sort_from_pathkeys knows
+ * how to sort on, given the rel's reltarget as input.  There are also a few
+ * additional constraints based on the fact that the desired sort will be done
+ * within the scan/join part of the plan.  Also, non-parallel-safe expressions
+ * are ignored if 'require_parallel_safe'.
  */
 Expr *
 find_em_expr_usable_for_sorting_rel(PlannerInfo *root, EquivalenceClass *ec,
 									RelOptInfo *rel, bool require_parallel_safe)
 {
-	ListCell   *lc_em;
+	PathTarget *target = rel->reltarget;
+	EquivalenceMember *em;
+	ListCell   *lc;
 
 	/*
-	 * If there is more than one equivalence member matching these
-	 * requirements we'll be content to choose any one of them.
+	 * Reject volatile ECs immediately; such sorts must always be postponed.
 	 */
-	foreach(lc_em, ec->ec_members)
-	{
-		EquivalenceMember *em = lfirst(lc_em);
-		Expr	   *em_expr = em->em_expr;
+	if (ec->ec_has_volatile)
+		return NULL;
 
-		/*
-		 * We shouldn't be trying to sort by an equivalence class that
-		 * contains a constant, so no need to consider such cases any further.
-		 */
-		if (em->em_is_const)
-			continue;
+	/*
+	 * Try to find an EM directly matching some reltarget member.
+	 */
+	foreach(lc, target->exprs)
+	{
+		Expr	   *targetexpr = (Expr *) lfirst(lc);
 
-		/*
-		 * Any Vars in the equivalence class member need to come from this
-		 * relation. This is a superset of prepare_sort_from_pathkeys ignoring
-		 * child members unless they belong to the rel being sorted.
-		 */
-		if (!bms_is_subset(em->em_relids, rel->relids))
+		em = find_ec_member_matching_expr(ec, targetexpr, rel->relids);
+		if (!em)
 			continue;
 
 		/*
-		 * If requested, reject expressions that are not parallel-safe.
+		 * Reject expressions involving set-returning functions, as those
+		 * can't be computed early either.  (Note: this test and the following
+		 * one are effectively checking properties of targetexpr, so there's
+		 * no point in asking whether some other EC member would be better.)
 		 */
-		if (require_parallel_safe && !is_parallel_safe(root, (Node *) em_expr))
+		if (IS_SRF_CALL((Node *) em->em_expr))
 			continue;
 
 		/*
-		 * Disallow SRFs so that all of them can be evaluated at the correct
-		 * time as determined by make_sort_input_target.
+		 * If requested, reject expressions that are not parallel-safe.  We
+		 * check this last because it's a rather expensive test.
 		 */
-		if (IS_SRF_CALL((Node *) em_expr))
+		if (require_parallel_safe &&
+			!is_parallel_safe(root, (Node *) em->em_expr))
 			continue;
 
-		/*
-		 * As long as the expression isn't volatile then
-		 * prepare_sort_from_pathkeys is able to generate a new target entry,
-		 * so there's no need to verify that one already exists.
-		 *
-		 * While prepare_sort_from_pathkeys has to be concerned about matching
-		 * up a volatile expression to the proper tlist entry, it doesn't seem
-		 * valuable here to expend the work trying to find a match in the
-		 * target's exprs since such a sort will have to be postponed anyway.
-		 */
-		if (!ec->ec_has_volatile)
-			return em->em_expr;
+		return em->em_expr;
 	}
 
-	/* We didn't find any suitable equivalence class expression */
-	return NULL;
+	/*
+	 * Try to find a expression computable from the reltarget.
+	 */
+	em = find_computable_ec_member(root, ec, target->exprs, rel->relids,
+								   require_parallel_safe);
+	if (!em)
+		return NULL;
+
+	/*
+	 * Reject expressions involving set-returning functions, as those can't be
+	 * computed early either.  (There's no point in looking for another EC
+	 * member in this case; since SRFs can't appear in WHERE, they cannot
+	 * belong to multi-member ECs.)
+	 */
+	if (IS_SRF_CALL((Node *) em->em_expr))
+		return NULL;
+
+	return em->em_expr;
 }
 
 /*