1 /*-------------------------------------------------------------------------
4 * Special planning for aggregate queries.
6 * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
11 * $PostgreSQL: pgsql/src/backend/optimizer/plan/planagg.c,v 1.52 2010/05/10 16:25:46 tgl Exp $
13 *-------------------------------------------------------------------------
17 #include "catalog/pg_aggregate.h"
18 #include "catalog/pg_am.h"
19 #include "catalog/pg_type.h"
20 #include "nodes/makefuncs.h"
21 #include "nodes/nodeFuncs.h"
22 #include "optimizer/clauses.h"
23 #include "optimizer/cost.h"
24 #include "optimizer/pathnode.h"
25 #include "optimizer/paths.h"
26 #include "optimizer/planmain.h"
27 #include "optimizer/predtest.h"
28 #include "optimizer/subselect.h"
29 #include "parser/parse_clause.h"
30 #include "parser/parsetree.h"
31 #include "utils/lsyscache.h"
32 #include "utils/syscache.h"
37 Oid aggfnoid; /* pg_proc Oid of the aggregate */
38 Oid aggsortop; /* Oid of its sort operator */
39 Expr *target; /* expression we are aggregating on */
40 NullTest *notnulltest; /* expression for "target IS NOT NULL" */
41 IndexPath *path; /* access path for index scan */
42 Cost pathcost; /* estimated cost to fetch first row */
43 bool nulls_first; /* null ordering direction matching index */
44 Param *param; /* param for subplan's output */
47 static bool find_minmax_aggs_walker(Node *node, List **context);
48 static bool build_minmax_path(PlannerInfo *root, RelOptInfo *rel,
50 static ScanDirection match_agg_to_index_col(MinMaxAggInfo *info,
51 IndexOptInfo *index, int indexcol);
52 static void make_agg_subplan(PlannerInfo *root, MinMaxAggInfo *info);
53 static void attach_notnull_index_qual(MinMaxAggInfo *info, IndexScan *iplan);
54 static Node *replace_aggs_with_params_mutator(Node *node, List **context);
55 static Oid fetch_agg_sort_op(Oid aggfnoid);
59 * optimize_minmax_aggregates - check for optimizing MIN/MAX via indexes
61 * This checks to see if we can replace MIN/MAX aggregate functions by
62 * subqueries of the form
63 * (SELECT col FROM tab WHERE ... ORDER BY col ASC/DESC LIMIT 1)
64 * Given a suitable index on tab.col, this can be much faster than the
65 * generic scan-all-the-rows plan.
67 * We are passed the preprocessed tlist, and the best path
68 * devised for computing the input of a standard Agg node. If we are able
69 * to optimize all the aggregates, and the result is estimated to be cheaper
70 * than the generic aggregate method, then generate and return a Plan that
71 * does it that way. Otherwise, return NULL.
74 optimize_minmax_aggregates(PlannerInfo *root, List *tlist, Path *best_path)
76 Query *parse = root->parse;
89 /* Nothing to do if query has no aggregates */
93 Assert(!parse->setOperations); /* shouldn't get here if a setop */
94 Assert(parse->rowMarks == NIL); /* nor if FOR UPDATE */
97 * Reject unoptimizable cases.
99 * We don't handle GROUP BY or windowing, because our current
100 * implementations of grouping require looking at all the rows anyway, and
101 * so there's not much point in optimizing MIN/MAX.
103 if (parse->groupClause || parse->hasWindowFuncs)
107 * We also restrict the query to reference exactly one table, since join
108 * conditions can't be handled reasonably. (We could perhaps handle a
109 * query containing cartesian-product joins, but it hardly seems worth the
110 * trouble.) However, the single real table could be buried in several
111 * levels of FromExpr.
113 jtnode = parse->jointree;
114 while (IsA(jtnode, FromExpr))
116 if (list_length(jtnode->fromlist) != 1)
118 jtnode = linitial(jtnode->fromlist);
120 if (!IsA(jtnode, RangeTblRef))
122 rtr = (RangeTblRef *) jtnode;
123 rte = planner_rt_fetch(rtr->rtindex, root);
124 if (rte->rtekind != RTE_RELATION || rte->inh)
126 rel = find_base_rel(root, rtr->rtindex);
129 * Since this optimization is not applicable all that often, we want to
130 * fall out before doing very much work if possible. Therefore we do the
131 * work in several passes. The first pass scans the tlist and HAVING qual
132 * to find all the aggregates and verify that each of them is a MIN/MAX
133 * aggregate. If that succeeds, the second pass looks at each aggregate
134 * to see if it is optimizable; if so we make an IndexPath describing how
135 * we would scan it. (We do not try to optimize if only some aggs are
136 * optimizable, since that means we'll have to scan all the rows anyway.)
137 * If that succeeds, we have enough info to compare costs against the
138 * generic implementation. Only if that test passes do we build a Plan.
141 /* Pass 1: find all the aggregates */
143 if (find_minmax_aggs_walker((Node *) tlist, &aggs_list))
145 if (find_minmax_aggs_walker(parse->havingQual, &aggs_list))
148 /* Pass 2: see if each one is optimizable */
150 foreach(l, aggs_list)
152 MinMaxAggInfo *info = (MinMaxAggInfo *) lfirst(l);
154 if (!build_minmax_path(root, rel, info))
156 total_cost += info->pathcost;
160 * Make the cost comparison.
162 * Note that we don't include evaluation cost of the tlist here; this is
163 * OK since it isn't included in best_path's cost either, and should be
164 * the same in either case.
166 cost_agg(&agg_p, root, AGG_PLAIN, list_length(aggs_list),
168 best_path->startup_cost, best_path->total_cost,
169 best_path->parent->rows);
171 if (total_cost > agg_p.total_cost)
172 return NULL; /* too expensive */
175 * OK, we are going to generate an optimized plan.
178 /* Pass 3: generate subplans and output Param nodes */
179 foreach(l, aggs_list)
181 make_agg_subplan(root, (MinMaxAggInfo *) lfirst(l));
185 * Modify the targetlist and HAVING qual to reference subquery outputs
187 tlist = (List *) replace_aggs_with_params_mutator((Node *) tlist,
189 hqual = replace_aggs_with_params_mutator(parse->havingQual,
193 * We have to replace Aggrefs with Params in equivalence classes too, else
194 * ORDER BY or DISTINCT on an optimized aggregate will fail.
196 * Note: at some point it might become necessary to mutate other data
197 * structures too, such as the query's sortClause or distinctClause. Right
198 * now, those won't be examined after this point.
200 mutate_eclass_expressions(root,
201 replace_aggs_with_params_mutator,
205 * Generate the output plan --- basically just a Result
207 plan = (Plan *) make_result(root, tlist, hqual, NULL);
209 /* Account for evaluation cost of the tlist (make_result did the rest) */
210 cost_qual_eval(&tlist_cost, tlist, root);
211 plan->startup_cost += tlist_cost.startup;
212 plan->total_cost += tlist_cost.startup + tlist_cost.per_tuple;
218 * find_minmax_aggs_walker
219 * Recursively scan the Aggref nodes in an expression tree, and check
220 * that each one is a MIN/MAX aggregate. If so, build a list of the
221 * distinct aggregate calls in the tree.
223 * Returns TRUE if a non-MIN/MAX aggregate is found, FALSE otherwise.
224 * (This seemingly-backward definition is used because expression_tree_walker
225 * aborts the scan on TRUE return, which is what we want.)
227 * Found aggregates are added to the list at *context; it's up to the caller
228 * to initialize the list to NIL.
230 * This does not descend into subqueries, and so should be used only after
231 * reduction of sublinks to subplans. There mustn't be outer-aggregate
235 find_minmax_aggs_walker(Node *node, List **context)
239 if (IsA(node, Aggref))
241 Aggref *aggref = (Aggref *) node;
243 TargetEntry *curTarget;
247 Assert(aggref->agglevelsup == 0);
248 if (list_length(aggref->args) != 1 || aggref->aggorder != NIL)
249 return true; /* it couldn't be MIN/MAX */
250 /* note: we do not care if DISTINCT is mentioned ... */
252 aggsortop = fetch_agg_sort_op(aggref->aggfnoid);
253 if (!OidIsValid(aggsortop))
254 return true; /* not a MIN/MAX aggregate */
257 * Check whether it's already in the list, and add it if not.
259 curTarget = (TargetEntry *) linitial(aggref->args);
262 info = (MinMaxAggInfo *) lfirst(l);
263 if (info->aggfnoid == aggref->aggfnoid &&
264 equal(info->target, curTarget->expr))
268 info = (MinMaxAggInfo *) palloc0(sizeof(MinMaxAggInfo));
269 info->aggfnoid = aggref->aggfnoid;
270 info->aggsortop = aggsortop;
271 info->target = curTarget->expr;
273 *context = lappend(*context, info);
276 * We need not recurse into the argument, since it can't contain any
281 Assert(!IsA(node, SubLink));
282 return expression_tree_walker(node, find_minmax_aggs_walker,
288 * Given a MIN/MAX aggregate, try to find an index it can be optimized
289 * with. Build a Path describing the best such index path.
291 * Returns TRUE if successful, FALSE if not. In the TRUE case, info->path
294 * XXX look at sharing more code with indxpath.c.
296 * Note: check_partial_indexes() must have been run previously.
299 build_minmax_path(PlannerInfo *root, RelOptInfo *rel, MinMaxAggInfo *info)
301 IndexPath *best_path = NULL;
303 bool best_nulls_first = false;
308 /* Build "target IS NOT NULL" expression for use below */
309 ntest = makeNode(NullTest);
310 ntest->nulltesttype = IS_NOT_NULL;
311 ntest->arg = copyObject(info->target);
312 ntest->argisrow = type_is_rowtype(exprType((Node *) ntest->arg));
314 return false; /* punt on composites */
315 info->notnulltest = ntest;
318 * Build list of existing restriction clauses plus the notnull test. We
319 * cheat a bit by not bothering with a RestrictInfo node for the notnull
320 * test --- predicate_implied_by() won't care.
322 allquals = list_concat(list_make1(ntest), rel->baserestrictinfo);
324 foreach(l, rel->indexlist)
326 IndexOptInfo *index = (IndexOptInfo *) lfirst(l);
327 ScanDirection indexscandir = NoMovementScanDirection;
330 List *restrictclauses;
335 /* Ignore non-btree indexes */
336 if (index->relam != BTREE_AM_OID)
340 * Ignore partial indexes that do not match the query --- unless their
341 * predicates can be proven from the baserestrict list plus the IS NOT
342 * NULL test. In that case we can use them.
344 if (index->indpred != NIL && !index->predOK &&
345 !predicate_implied_by(index->indpred, allquals))
349 * Look for a match to one of the index columns. (In a stupidly
350 * designed index, there could be multiple matches, but we only care
351 * about the first one.)
353 for (indexcol = 0; indexcol < index->ncolumns; indexcol++)
355 indexscandir = match_agg_to_index_col(info, index, indexcol);
356 if (!ScanDirectionIsNoMovement(indexscandir))
359 if (ScanDirectionIsNoMovement(indexscandir))
363 * If the match is not at the first index column, we have to verify
364 * that there are "x = something" restrictions on all the earlier
365 * index columns. Since we'll need the restrictclauses list anyway to
366 * build the path, it's convenient to extract that first and then look
367 * through it for the equality restrictions.
369 restrictclauses = group_clauses_by_indexkey(index,
370 index->rel->baserestrictinfo,
376 if (list_length(restrictclauses) < indexcol)
377 continue; /* definitely haven't got enough */
378 for (prevcol = 0; prevcol < indexcol; prevcol++)
380 List *rinfos = (List *) list_nth(restrictclauses, prevcol);
385 RestrictInfo *rinfo = (RestrictInfo *) lfirst(ll);
388 /* Could be an IS_NULL test, if so ignore */
389 if (!is_opclause(rinfo->clause))
392 get_op_opfamily_strategy(((OpExpr *) rinfo->clause)->opno,
393 index->opfamily[prevcol]);
394 if (strategy == BTEqualStrategyNumber)
398 break; /* none are Equal for this index col */
400 if (prevcol < indexcol)
401 continue; /* didn't find all Equal clauses */
404 * Build the access path. We don't bother marking it with pathkeys.
406 new_path = create_index_path(root, index,
413 * Estimate actual cost of fetching just one row.
415 if (new_path->rows > 1.0)
416 new_cost = new_path->path.startup_cost +
417 (new_path->path.total_cost - new_path->path.startup_cost)
418 * 1.0 / new_path->rows;
420 new_cost = new_path->path.total_cost;
423 * Keep if first or if cheaper than previous best.
425 if (best_path == NULL || new_cost < best_cost)
427 best_path = new_path;
428 best_cost = new_cost;
429 if (ScanDirectionIsForward(indexscandir))
430 best_nulls_first = index->nulls_first[indexcol];
432 best_nulls_first = !index->nulls_first[indexcol];
436 info->path = best_path;
437 info->pathcost = best_cost;
438 info->nulls_first = best_nulls_first;
439 return (best_path != NULL);
443 * match_agg_to_index_col
444 * Does an aggregate match an index column?
446 * It matches if its argument is equal to the index column's data and its
447 * sortop is either the forward or reverse sort operator for the column.
449 * We return ForwardScanDirection if match the forward sort operator,
450 * BackwardScanDirection if match the reverse sort operator,
451 * and NoMovementScanDirection if there's no match.
454 match_agg_to_index_col(MinMaxAggInfo *info, IndexOptInfo *index, int indexcol)
456 ScanDirection result;
458 /* Check for operator match first (cheaper) */
459 if (info->aggsortop == index->fwdsortop[indexcol])
460 result = ForwardScanDirection;
461 else if (info->aggsortop == index->revsortop[indexcol])
462 result = BackwardScanDirection;
464 return NoMovementScanDirection;
466 /* Check for data match */
467 if (!match_index_to_operand((Node *) info->target, indexcol, index))
468 return NoMovementScanDirection;
474 * Construct a suitable plan for a converted aggregate query
477 make_agg_subplan(PlannerInfo *root, MinMaxAggInfo *info)
484 SortGroupClause *sortcl;
487 * Generate a suitably modified query. Much of the work here is probably
488 * unnecessary in the normal case, but we want to make it look good if
489 * someone tries to EXPLAIN the result.
491 memcpy(&subroot, root, sizeof(PlannerInfo));
492 subroot.parse = subparse = (Query *) copyObject(root->parse);
493 subparse->commandType = CMD_SELECT;
494 subparse->resultRelation = 0;
495 subparse->returningList = NIL;
496 subparse->utilityStmt = NULL;
497 subparse->intoClause = NULL;
498 subparse->hasAggs = false;
499 subparse->hasDistinctOn = false;
500 subparse->groupClause = NIL;
501 subparse->havingQual = NULL;
502 subparse->distinctClause = NIL;
503 subroot.hasHavingQual = false;
505 /* single tlist entry that is the aggregate target */
506 tle = makeTargetEntry(copyObject(info->target),
508 pstrdup("agg_target"),
510 subparse->targetList = list_make1(tle);
512 /* set up the appropriate ORDER BY entry */
513 sortcl = makeNode(SortGroupClause);
514 sortcl->tleSortGroupRef = assignSortGroupRef(tle, subparse->targetList);
515 sortcl->eqop = get_equality_op_for_ordering_op(info->aggsortop, NULL);
516 if (!OidIsValid(sortcl->eqop)) /* shouldn't happen */
517 elog(ERROR, "could not find equality operator for ordering operator %u",
519 sortcl->sortop = info->aggsortop;
520 sortcl->nulls_first = info->nulls_first;
521 subparse->sortClause = list_make1(sortcl);
524 subparse->limitOffset = NULL;
525 subparse->limitCount = (Node *) makeConst(INT8OID, -1, sizeof(int64),
526 Int64GetDatum(1), false,
530 * Generate the plan for the subquery. We already have a Path for the
531 * basic indexscan, but we have to convert it to a Plan and attach a LIMIT
534 * Also we must add a "WHERE target IS NOT NULL" restriction to the
535 * indexscan, to be sure we don't return a NULL, which'd be contrary to
536 * the standard behavior of MIN/MAX.
538 * The NOT NULL qual has to go on the actual indexscan; create_plan might
539 * have stuck a gating Result atop that, if there were any pseudoconstant
542 plan = create_plan(&subroot, (Path *) info->path);
544 plan->targetlist = copyObject(subparse->targetList);
546 if (IsA(plan, Result))
547 iplan = (IndexScan *) plan->lefttree;
549 iplan = (IndexScan *) plan;
550 if (!IsA(iplan, IndexScan))
551 elog(ERROR, "result of create_plan(IndexPath) isn't an IndexScan");
553 attach_notnull_index_qual(info, iplan);
555 plan = (Plan *) make_limit(plan,
556 subparse->limitOffset,
557 subparse->limitCount,
561 * Convert the plan into an InitPlan, and make a Param for its result.
563 info->param = SS_make_initplan_from_plan(&subroot, plan,
564 exprType((Node *) tle->expr),
568 * Put the updated list of InitPlans back into the outer PlannerInfo.
570 root->init_plans = subroot.init_plans;
574 * Add "target IS NOT NULL" to the quals of the given indexscan.
576 * This is trickier than it sounds because the new qual has to be added at an
577 * appropriate place in the qual list, to preserve the list's ordering by
578 * index column position.
581 attach_notnull_index_qual(MinMaxAggInfo *info, IndexScan *iplan)
585 List *newindexqualorig;
590 AttrNumber targetattno;
593 * We can skip adding the NOT NULL qual if it duplicates either an
594 * already-given WHERE condition, or a clause of the index predicate.
596 if (list_member(iplan->indexqualorig, info->notnulltest) ||
597 list_member(info->path->indexinfo->indpred, info->notnulltest))
600 /* Need a "fixed" copy as well as the original */
601 ntest = copyObject(info->notnulltest);
602 ntest->arg = (Expr *) fix_indexqual_operand((Node *) ntest->arg,
603 info->path->indexinfo);
605 /* Identify the target index column from the "fixed" copy */
608 if (leftop && IsA(leftop, RelabelType))
609 leftop = ((RelabelType *) leftop)->arg;
611 Assert(leftop != NULL);
613 if (!IsA(leftop, Var))
614 elog(ERROR, "NullTest indexqual has wrong key");
616 targetattno = ((Var *) leftop)->varattno;
619 * list.c doesn't expose a primitive to insert a list cell at an arbitrary
620 * position, so our strategy is to copy the lists and insert the null test
621 * when we reach an appropriate spot.
623 newindexqual = newindexqualorig = NIL;
626 forboth(lc1, iplan->indexqual, lc2, iplan->indexqualorig)
628 Expr *qual = (Expr *) lfirst(lc1);
629 Expr *qualorig = (Expr *) lfirst(lc2);
633 * Identify which index column this qual is for. This code should
634 * match the qual disassembly code in ExecIndexBuildScanKeys.
636 if (IsA(qual, OpExpr))
638 /* indexkey op expression */
639 leftop = (Expr *) get_leftop(qual);
641 if (leftop && IsA(leftop, RelabelType))
642 leftop = ((RelabelType *) leftop)->arg;
644 Assert(leftop != NULL);
646 if (!IsA(leftop, Var))
647 elog(ERROR, "indexqual doesn't have key on left side");
649 varattno = ((Var *) leftop)->varattno;
651 else if (IsA(qual, RowCompareExpr))
653 /* (indexkey, indexkey, ...) op (expression, expression, ...) */
654 RowCompareExpr *rc = (RowCompareExpr *) qual;
657 * Examine just the first column of the rowcompare, which is
658 * what determines its placement in the overall qual list.
660 leftop = (Expr *) linitial(rc->largs);
662 if (leftop && IsA(leftop, RelabelType))
663 leftop = ((RelabelType *) leftop)->arg;
665 Assert(leftop != NULL);
667 if (!IsA(leftop, Var))
668 elog(ERROR, "indexqual doesn't have key on left side");
670 varattno = ((Var *) leftop)->varattno;
672 else if (IsA(qual, ScalarArrayOpExpr))
674 /* indexkey op ANY (array-expression) */
675 ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) qual;
677 leftop = (Expr *) linitial(saop->args);
679 if (leftop && IsA(leftop, RelabelType))
680 leftop = ((RelabelType *) leftop)->arg;
682 Assert(leftop != NULL);
684 if (!IsA(leftop, Var))
685 elog(ERROR, "indexqual doesn't have key on left side");
687 varattno = ((Var *) leftop)->varattno;
689 else if (IsA(qual, NullTest))
691 /* indexkey IS NULL or indexkey IS NOT NULL */
692 NullTest *ntest = (NullTest *) qual;
696 if (leftop && IsA(leftop, RelabelType))
697 leftop = ((RelabelType *) leftop)->arg;
699 Assert(leftop != NULL);
701 if (!IsA(leftop, Var))
702 elog(ERROR, "NullTest indexqual has wrong key");
704 varattno = ((Var *) leftop)->varattno;
708 elog(ERROR, "unsupported indexqual type: %d",
709 (int) nodeTag(qual));
710 varattno = 0; /* keep compiler quiet */
713 /* Insert the null test at the first place it can legally go */
714 if (!done && targetattno <= varattno)
716 newindexqual = lappend(newindexqual, ntest);
717 newindexqualorig = lappend(newindexqualorig, info->notnulltest);
721 newindexqual = lappend(newindexqual, qual);
722 newindexqualorig = lappend(newindexqualorig, qualorig);
725 /* Add the null test at the end if it must follow all existing quals */
728 newindexqual = lappend(newindexqual, ntest);
729 newindexqualorig = lappend(newindexqualorig, info->notnulltest);
732 iplan->indexqual = newindexqual;
733 iplan->indexqualorig = newindexqualorig;
737 * Replace original aggregate calls with subplan output Params
740 replace_aggs_with_params_mutator(Node *node, List **context)
744 if (IsA(node, Aggref))
746 Aggref *aggref = (Aggref *) node;
747 TargetEntry *curTarget = (TargetEntry *) linitial(aggref->args);
752 MinMaxAggInfo *info = (MinMaxAggInfo *) lfirst(l);
754 if (info->aggfnoid == aggref->aggfnoid &&
755 equal(info->target, curTarget->expr))
756 return (Node *) info->param;
758 elog(ERROR, "failed to re-find aggregate info record");
760 Assert(!IsA(node, SubLink));
761 return expression_tree_mutator(node, replace_aggs_with_params_mutator,
766 * Get the OID of the sort operator, if any, associated with an aggregate.
767 * Returns InvalidOid if there is no such operator.
770 fetch_agg_sort_op(Oid aggfnoid)
773 Form_pg_aggregate aggform;
776 /* fetch aggregate entry from pg_aggregate */
777 aggTuple = SearchSysCache1(AGGFNOID, ObjectIdGetDatum(aggfnoid));
778 if (!HeapTupleIsValid(aggTuple))
780 aggform = (Form_pg_aggregate) GETSTRUCT(aggTuple);
781 aggsortop = aggform->aggsortop;
782 ReleaseSysCache(aggTuple);