Skip to content

Commit

Permalink
Teach estimate_array_length() to use statistics where available.
Browse files Browse the repository at this point in the history
If we have DECHIST statistics about the argument expression, use
the average number of distinct elements as the array length estimate.
(It'd be better to use the average total number of elements, but
that is not currently calculated by compute_array_stats(), and
it's unclear that it'd be worth extra effort to get.)

To do this, we have to change the signature of estimate_array_length
to pass the "root" pointer.  While at it, also change its result
type to "double".  That's probably not really necessary, but it
avoids any risk of overflow of the value extracted from DECHIST.
All existing callers are going to use the result in a "double"
calculation anyway.

Paul Jungwirth, reviewed by Jian He and myself

Discussion: https://postgr.es/m/CA+renyUnM2d+SmrxKpDuAdpiq6FOM=FByvi6aS6yi__qyf6j9A@mail.gmail.com
  • Loading branch information
tglsfdc committed Jan 4, 2024
1 parent 14dd0f2 commit 9391f71
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 16 deletions.
10 changes: 5 additions & 5 deletions src/backend/optimizer/path/costsize.c
Original file line number Diff line number Diff line change
Expand Up @@ -1256,7 +1256,7 @@ cost_tidscan(Path *path, PlannerInfo *root,
QualCost qpqual_cost;
Cost cpu_per_tuple;
QualCost tid_qual_cost;
int ntuples;
double ntuples;
ListCell *l;
double spc_random_page_cost;

Expand All @@ -1283,7 +1283,7 @@ cost_tidscan(Path *path, PlannerInfo *root,
ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) qual;
Node *arraynode = (Node *) lsecond(saop->args);

ntuples += estimate_array_length(arraynode);
ntuples += estimate_array_length(root, arraynode);
}
else if (IsA(qual, CurrentOfExpr))
{
Expand Down Expand Up @@ -4770,7 +4770,7 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context)
Node *arraynode = (Node *) lsecond(saop->args);
QualCost sacosts;
QualCost hcosts;
int estarraylen = estimate_array_length(arraynode);
double estarraylen = estimate_array_length(context->root, arraynode);

set_sa_opfuncid(saop);
sacosts.startup = sacosts.per_tuple = 0;
Expand Down Expand Up @@ -4808,7 +4808,7 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context)
*/
context->total.startup += sacosts.startup;
context->total.per_tuple += sacosts.per_tuple *
estimate_array_length(arraynode) * 0.5;
estimate_array_length(context->root, arraynode) * 0.5;
}
}
else if (IsA(node, Aggref) ||
Expand Down Expand Up @@ -4859,7 +4859,7 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context)
context->total.startup += perelemcost.startup;
if (perelemcost.per_tuple > 0)
context->total.per_tuple += perelemcost.per_tuple *
estimate_array_length((Node *) acoerce->arg);
estimate_array_length(context->root, (Node *) acoerce->arg);
}
else if (IsA(node, RowCompareExpr))
{
Expand Down
2 changes: 1 addition & 1 deletion src/backend/utils/adt/arrayfuncs.c
Original file line number Diff line number Diff line change
Expand Up @@ -6340,7 +6340,7 @@ array_unnest_support(PG_FUNCTION_ARGS)
/* We can use estimated argument values here */
arg1 = estimate_expression_value(req->root, linitial(args));

req->rows = estimate_array_length(arg1);
req->rows = estimate_array_length(req->root, arg1);
ret = (Node *) req;
}
}
Expand Down
47 changes: 38 additions & 9 deletions src/backend/utils/adt/selfuncs.c
Original file line number Diff line number Diff line change
Expand Up @@ -2128,10 +2128,11 @@ scalararraysel(PlannerInfo *root,
/*
* Estimate number of elements in the array yielded by an expression.
*
* It's important that this agree with scalararraysel.
* Note: the result is integral, but we use "double" to avoid overflow
* concerns. Most callers will use it in double-type expressions anyway.
*/
int
estimate_array_length(Node *arrayexpr)
double
estimate_array_length(PlannerInfo *root, Node *arrayexpr)
{
/* look through any binary-compatible relabeling of arrayexpr */
arrayexpr = strip_array_coercion(arrayexpr);
Expand All @@ -2152,11 +2153,39 @@ estimate_array_length(Node *arrayexpr)
{
return list_length(((ArrayExpr *) arrayexpr)->elements);
}
else
else if (arrayexpr)
{
/* default guess --- see also scalararraysel */
return 10;
/* See if we can find any statistics about it */
VariableStatData vardata;
AttStatsSlot sslot;
double nelem = 0;

examine_variable(root, arrayexpr, 0, &vardata);
if (HeapTupleIsValid(vardata.statsTuple))
{
/*
* Found stats, so use the average element count, which is stored
* in the last stanumbers element of the DECHIST statistics.
* Actually that is the average count of *distinct* elements;
* perhaps we should scale it up somewhat?
*/
if (get_attstatsslot(&sslot, vardata.statsTuple,
STATISTIC_KIND_DECHIST, InvalidOid,
ATTSTATSSLOT_NUMBERS))
{
if (sslot.nnumbers > 0)
nelem = clamp_row_est(sslot.numbers[sslot.nnumbers - 1]);
free_attstatsslot(&sslot);
}
}
ReleaseVariableStats(vardata);

if (nelem > 0)
return nelem;
}

/* Else use a default guess --- this should match scalararraysel */
return 10;
}

/*
Expand Down Expand Up @@ -6540,7 +6569,7 @@ genericcostestimate(PlannerInfo *root,
if (IsA(rinfo->clause, ScalarArrayOpExpr))
{
ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) rinfo->clause;
int alength = estimate_array_length(lsecond(saop->args));
double alength = estimate_array_length(root, lsecond(saop->args));

if (alength > 1)
num_sa_scans *= alength;
Expand Down Expand Up @@ -6820,7 +6849,7 @@ btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
{
ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause;
Node *other_operand = (Node *) lsecond(saop->args);
int alength = estimate_array_length(other_operand);
double alength = estimate_array_length(root, other_operand);

clause_op = saop->opno;
found_saop = true;
Expand Down Expand Up @@ -7414,7 +7443,7 @@ gincost_scalararrayopexpr(PlannerInfo *root,
{
counts->exactEntries++;
counts->searchEntries++;
counts->arrayScans *= estimate_array_length(rightop);
counts->arrayScans *= estimate_array_length(root, rightop);
return true;
}

Expand Down
2 changes: 1 addition & 1 deletion src/include/utils/selfuncs.h
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ extern Selectivity scalararraysel(PlannerInfo *root,
ScalarArrayOpExpr *clause,
bool is_join_clause,
int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo);
extern int estimate_array_length(Node *arrayexpr);
extern double estimate_array_length(PlannerInfo *root, Node *arrayexpr);
extern Selectivity rowcomparesel(PlannerInfo *root,
RowCompareExpr *clause,
int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo);
Expand Down

0 comments on commit 9391f71

Please sign in to comment.