1 /*-------------------------------------------------------------------------
4 * Special planning for aggregate queries.
6 * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
11 * $PostgreSQL: pgsql/src/backend/optimizer/plan/planagg.c,v 1.1 2005/04/11 23:06:55 tgl Exp $
13 *-------------------------------------------------------------------------
17 #include "access/skey.h"
18 #include "catalog/pg_aggregate.h"
19 #include "catalog/pg_type.h"
20 #include "nodes/makefuncs.h"
21 #include "optimizer/clauses.h"
22 #include "optimizer/cost.h"
23 #include "optimizer/pathnode.h"
24 #include "optimizer/paths.h"
25 #include "optimizer/planmain.h"
26 #include "optimizer/subselect.h"
27 #include "parser/parsetree.h"
28 #include "parser/parse_clause.h"
29 #include "parser/parse_expr.h"
30 #include "utils/lsyscache.h"
31 #include "utils/syscache.h"
36 Oid aggfnoid; /* pg_proc Oid of the aggregate */
37 Oid aggsortop; /* Oid of its sort operator */
38 Expr *target; /* expression we are aggregating on */
39 IndexPath *path; /* access path for index scan */
40 Cost pathcost; /* estimated cost to fetch first row */
41 Param *param; /* param for subplan's output */
44 static bool find_minmax_aggs_walker(Node *node, List **context);
45 static bool build_minmax_path(Query *root, RelOptInfo *rel,
47 static ScanDirection match_agg_to_index_col(MinMaxAggInfo *info,
48 IndexOptInfo *index, int indexcol);
49 static void make_agg_subplan(Query *root, MinMaxAggInfo *info,
50 List *constant_quals);
51 static Node *replace_aggs_with_params_mutator(Node *node, List **context);
52 static Oid fetch_agg_sort_op(Oid aggfnoid);
56 * optimize_minmax_aggregates - check for optimizing MIN/MAX via indexes
58 * This checks to see if we can replace MIN/MAX aggregate functions by
59 * subqueries of the form
60 * (SELECT col FROM tab WHERE ... ORDER BY col ASC/DESC LIMIT 1)
61 * Given a suitable index on tab.col, this can be much faster than the
62 * generic scan-all-the-rows plan.
64 * We are passed the Query, the preprocessed tlist, and the best path
65 * devised for computing the input of a standard Agg node. If we are able
66 * to optimize all the aggregates, and the result is estimated to be cheaper
67 * than the generic aggregate method, then generate and return a Plan that
68 * does it that way. Otherwise, return NULL.
71 optimize_minmax_aggregates(Query *root, List *tlist, Path *best_path)
85 /* Nothing to do if query has no aggregates */
89 Assert(!root->setOperations); /* shouldn't get here if a setop */
90 Assert(root->rowMarks == NIL); /* nor if FOR UPDATE */
93 * Reject unoptimizable cases.
95 * We don't handle GROUP BY, because our current implementations of
96 * grouping require looking at all the rows anyway, and so there's not
97 * much point in optimizing MIN/MAX.
99 if (root->groupClause)
103 * We also restrict the query to reference exactly one table, since
104 * join conditions can't be handled reasonably. (We could perhaps
105 * handle a query containing cartesian-product joins, but it hardly
106 * seems worth the trouble.)
108 Assert(root->jointree != NULL && IsA(root->jointree, FromExpr));
109 if (list_length(root->jointree->fromlist) != 1)
111 rtr = (RangeTblRef *) linitial(root->jointree->fromlist);
112 if (!IsA(rtr, RangeTblRef))
114 rte = rt_fetch(rtr->rtindex, root->rtable);
115 if (rte->rtekind != RTE_RELATION)
117 rel = find_base_rel(root, rtr->rtindex);
120 * Also reject cases with subplans or volatile functions in WHERE.
121 * This may be overly paranoid, but it's not entirely clear if the
122 * transformation is safe then.
124 if (contain_subplans(root->jointree->quals) ||
125 contain_volatile_functions(root->jointree->quals))
129 * Since this optimization is not applicable all that often, we want
130 * to fall out before doing very much work if possible. Therefore
131 * we do the work in several passes. The first pass scans the tlist
132 * and HAVING qual to find all the aggregates and verify that
133 * each of them is a MIN/MAX aggregate. If that succeeds, the second
134 * pass looks at each aggregate to see if it is optimizable; if so
135 * we make an IndexPath describing how we would scan it. (We do not
136 * try to optimize if only some aggs are optimizable, since that means
137 * we'll have to scan all the rows anyway.) If that succeeds, we have
138 * enough info to compare costs against the generic implementation.
139 * Only if that test passes do we build a Plan.
142 /* Pass 1: find all the aggregates */
144 if (find_minmax_aggs_walker((Node *) tlist, &aggs_list))
146 if (find_minmax_aggs_walker(root->havingQual, &aggs_list))
149 /* Pass 2: see if each one is optimizable */
151 foreach(l, aggs_list)
153 MinMaxAggInfo *info = (MinMaxAggInfo *) lfirst(l);
155 if (!build_minmax_path(root, rel, info))
157 total_cost += info->pathcost;
161 * Make the cost comparison.
163 * Note that we don't include evaluation cost of the tlist here;
164 * this is OK since it isn't included in best_path's cost either,
165 * and should be the same in either case.
167 cost_agg(&agg_p, root, AGG_PLAIN, list_length(aggs_list),
169 best_path->startup_cost, best_path->total_cost,
170 best_path->parent->rows);
172 if (total_cost > agg_p.total_cost)
173 return NULL; /* too expensive */
176 * OK, we are going to generate an optimized plan. The first thing we
177 * need to do is look for any non-variable WHERE clauses that query_planner
178 * might have removed from the basic plan. (Normal WHERE clauses will
179 * be properly incorporated into the sub-plans by create_plan.) If there
180 * are any, they will be in a gating Result node atop the best_path.
181 * They have to be incorporated into a gating Result in each sub-plan
182 * in order to produce the semantically correct result.
184 if (IsA(best_path, ResultPath))
186 Assert(((ResultPath *) best_path)->subpath != NULL);
187 constant_quals = ((ResultPath *) best_path)->constantqual;
190 constant_quals = NIL;
192 /* Pass 3: generate subplans and output Param nodes */
193 foreach(l, aggs_list)
195 make_agg_subplan(root, (MinMaxAggInfo *) lfirst(l), constant_quals);
199 * Modify the targetlist and HAVING qual to reference subquery outputs
201 tlist = (List *) replace_aggs_with_params_mutator((Node *) tlist,
203 hqual = replace_aggs_with_params_mutator(root->havingQual,
207 * Generate the output plan --- basically just a Result
209 plan = (Plan *) make_result(tlist, hqual, NULL);
211 /* Account for evaluation cost of the tlist (make_result did the rest) */
212 cost_qual_eval(&tlist_cost, tlist);
213 plan->startup_cost += tlist_cost.startup;
214 plan->total_cost += tlist_cost.startup + tlist_cost.per_tuple;
220 * find_minmax_aggs_walker
221 * Recursively scan the Aggref nodes in an expression tree, and check
222 * that each one is a MIN/MAX aggregate. If so, build a list of the
223 * distinct aggregate calls in the tree.
225 * Returns TRUE if a non-MIN/MAX aggregate is found, FALSE otherwise.
226 * (This seemingly-backward definition is used because expression_tree_walker
227 * aborts the scan on TRUE return, which is what we want.)
229 * Found aggregates are added to the list at *context; it's up to the caller
230 * to initialize the list to NIL.
232 * This does not descend into subqueries, and so should be used only after
233 * reduction of sublinks to subplans. There mustn't be outer-aggregate
237 find_minmax_aggs_walker(Node *node, List **context)
241 if (IsA(node, Aggref))
243 Aggref *aggref = (Aggref *) node;
248 Assert(aggref->agglevelsup == 0);
250 return true; /* foo(*) is surely not optimizable */
251 /* note: we do not care if DISTINCT is mentioned ... */
253 aggsortop = fetch_agg_sort_op(aggref->aggfnoid);
254 if (!OidIsValid(aggsortop))
255 return true; /* not a MIN/MAX aggregate */
258 * Check whether it's already in the list, and add it if not.
262 info = (MinMaxAggInfo *) lfirst(l);
263 if (info->aggfnoid == aggref->aggfnoid &&
264 equal(info->target, aggref->target))
268 info = (MinMaxAggInfo *) palloc0(sizeof(MinMaxAggInfo));
269 info->aggfnoid = aggref->aggfnoid;
270 info->aggsortop = aggsortop;
271 info->target = aggref->target;
273 *context = lappend(*context, info);
276 * We need not recurse into the argument, since it can't contain
281 Assert(!IsA(node, SubLink));
282 return expression_tree_walker(node, find_minmax_aggs_walker,
288 * Given a MIN/MAX aggregate, try to find an index it can be optimized
289 * with. Build a Path describing the best such index path.
291 * Returns TRUE if successful, FALSE if not. In the TRUE case, info->path
294 * XXX look at sharing more code with indxpath.c.
296 * Note: check_partial_indexes() must have been run previously.
299 build_minmax_path(Query *root, RelOptInfo *rel, MinMaxAggInfo *info)
301 IndexPath *best_path = NULL;
305 foreach(l, rel->indexlist)
307 IndexOptInfo *index = (IndexOptInfo *) lfirst(l);
308 ScanDirection indexscandir = NoMovementScanDirection;
311 List *restrictclauses;
315 /* Ignore non-btree indexes */
316 if (index->relam != BTREE_AM_OID)
319 /* Ignore partial indexes that do not match the query */
320 if (index->indpred != NIL && !index->predOK)
324 * Look for a match to one of the index columns. (In a stupidly
325 * designed index, there could be multiple matches, but we only
326 * care about the first one.)
328 for (indexcol = 0; indexcol < index->ncolumns; indexcol++)
330 indexscandir = match_agg_to_index_col(info, index, indexcol);
331 if (!ScanDirectionIsNoMovement(indexscandir))
334 if (ScanDirectionIsNoMovement(indexscandir))
338 * If the match is not at the first index column, we have to verify
339 * that there are "x = something" restrictions on all the earlier
340 * index columns. Since we'll need the restrictclauses list anyway
341 * to build the path, it's convenient to extract that first and then
342 * look through it for the equality restrictions.
344 restrictclauses = group_clauses_by_indexkey(index);
346 if (list_length(restrictclauses) < indexcol)
347 continue; /* definitely haven't got enough */
348 for (prevcol = 0; prevcol < indexcol; prevcol++)
350 List *rinfos = (List *) list_nth(restrictclauses, prevcol);
355 RestrictInfo *rinfo = (RestrictInfo *) lfirst(ll);
358 Assert(is_opclause(rinfo->clause));
360 get_op_opclass_strategy(((OpExpr *) rinfo->clause)->opno,
361 index->classlist[prevcol]);
362 if (strategy == BTEqualStrategyNumber)
366 break; /* none are Equal for this index col */
368 if (prevcol < indexcol)
369 continue; /* didn't find all Equal clauses */
372 * Build the access path. We don't bother marking it with pathkeys.
374 new_path = create_index_path(root, index,
380 * Estimate actual cost of fetching just one row.
382 if (new_path->rows > 1.0)
383 new_cost = new_path->path.startup_cost +
384 (new_path->path.total_cost - new_path->path.startup_cost)
385 * 1.0 / new_path->rows;
387 new_cost = new_path->path.total_cost;
390 * Keep if first or if cheaper than previous best.
392 if (best_path == NULL || new_cost < best_cost)
394 best_path = new_path;
395 best_cost = new_cost;
399 info->path = best_path;
400 info->pathcost = best_cost;
401 return (best_path != NULL);
405 * match_agg_to_index_col
406 * Does an aggregate match an index column?
408 * It matches if its argument is equal to the index column's data and its
409 * sortop is either the LessThan or GreaterThan member of the column's opclass.
411 * We return ForwardScanDirection if match the LessThan member,
412 * BackwardScanDirection if match the GreaterThan member,
413 * and NoMovementScanDirection if there's no match.
416 match_agg_to_index_col(MinMaxAggInfo *info, IndexOptInfo *index, int indexcol)
420 /* Check for data match */
421 if (!match_index_to_operand((Node *) info->target, indexcol, index))
422 return NoMovementScanDirection;
424 /* Look up the operator in the opclass */
425 strategy = get_op_opclass_strategy(info->aggsortop,
426 index->classlist[indexcol]);
427 if (strategy == BTLessStrategyNumber)
428 return ForwardScanDirection;
429 if (strategy == BTGreaterStrategyNumber)
430 return BackwardScanDirection;
431 return NoMovementScanDirection;
435 * Construct a suitable plan for a converted aggregate query
438 make_agg_subplan(Query *root, MinMaxAggInfo *info, List *constant_quals)
447 * Generate a suitably modified Query node. Much of the work here is
448 * probably unnecessary in the normal case, but we want to make it look
449 * good if someone tries to EXPLAIN the result.
451 subquery = (Query *) copyObject(root);
452 subquery->commandType = CMD_SELECT;
453 subquery->resultRelation = 0;
454 subquery->resultRelations = NIL;
455 subquery->into = NULL;
456 subquery->hasAggs = false;
457 subquery->groupClause = NIL;
458 subquery->havingQual = NULL;
459 subquery->hasHavingQual = false;
460 subquery->distinctClause = NIL;
462 /* single tlist entry that is the aggregate target */
463 tle = makeTargetEntry(copyObject(info->target),
465 pstrdup("agg_target"),
467 subquery->targetList = list_make1(tle);
469 /* set up the appropriate ORDER BY entry */
470 sortcl = makeNode(SortClause);
471 sortcl->tleSortGroupRef = assignSortGroupRef(tle, subquery->targetList);
472 sortcl->sortop = info->aggsortop;
473 subquery->sortClause = list_make1(sortcl);
476 subquery->limitOffset = NULL;
477 subquery->limitCount = (Node *) makeConst(INT4OID, sizeof(int4),
482 * Generate the plan for the subquery. We already have a Path for
483 * the basic indexscan, but we have to convert it to a Plan and
484 * attach a LIMIT node above it. We might need a gating Result, too,
485 * which is most easily added at the Path stage.
487 path = (Path *) info->path;
490 path = (Path *) create_result_path(NULL,
492 copyObject(constant_quals));
494 plan = create_plan(subquery, path);
496 plan->targetlist = copyObject(subquery->targetList);
498 plan = (Plan *) make_limit(plan,
499 subquery->limitOffset,
500 subquery->limitCount);
503 * Convert the plan into an InitPlan, and make a Param for its result.
505 info->param = SS_make_initplan_from_plan(subquery, plan,
506 exprType((Node *) tle->expr),
511 * Replace original aggregate calls with subplan output Params
514 replace_aggs_with_params_mutator(Node *node, List **context)
518 if (IsA(node, Aggref))
520 Aggref *aggref = (Aggref *) node;
525 MinMaxAggInfo *info = (MinMaxAggInfo *) lfirst(l);
527 if (info->aggfnoid == aggref->aggfnoid &&
528 equal(info->target, aggref->target))
529 return (Node *) info->param;
531 elog(ERROR, "failed to re-find aggregate info record");
533 Assert(!IsA(node, SubLink));
534 return expression_tree_mutator(node, replace_aggs_with_params_mutator,
539 * Get the OID of the sort operator, if any, associated with an aggregate.
540 * Returns InvalidOid if there is no such operator.
543 fetch_agg_sort_op(Oid aggfnoid)
547 Form_pg_aggregate aggform;
550 /* fetch aggregate entry from pg_aggregate */
551 aggTuple = SearchSysCache(AGGFNOID,
552 ObjectIdGetDatum(aggfnoid),
554 if (!HeapTupleIsValid(aggTuple))
556 aggform = (Form_pg_aggregate) GETSTRUCT(aggTuple);
557 aggsortop = aggform->aggsortop;
558 ReleaseSysCache(aggTuple);
563 * XXX stub implementation for testing: hardwire a few cases.
565 if (aggfnoid == 2132) /* min(int4) -> int4lt */
567 if (aggfnoid == 2116) /* max(int4) -> int4gt */
569 if (aggfnoid == 2145) /* min(text) -> text_lt */
571 if (aggfnoid == 2129) /* max(text) -> text_gt */