]> granicus.if.org Git - postgresql/blob - src/backend/optimizer/plan/planagg.c
240c529fa401c7d421664495ad9cd264d2527048
[postgresql] / src / backend / optimizer / plan / planagg.c
1 /*-------------------------------------------------------------------------
2  *
3  * planagg.c
4  *        Special planning for aggregate queries.
5  *
6  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *        $PostgreSQL: pgsql/src/backend/optimizer/plan/planagg.c,v 1.40 2008/07/10 01:17:29 tgl Exp $
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16
17 #include "catalog/pg_aggregate.h"
18 #include "catalog/pg_am.h"
19 #include "catalog/pg_type.h"
20 #include "nodes/makefuncs.h"
21 #include "optimizer/clauses.h"
22 #include "optimizer/cost.h"
23 #include "optimizer/pathnode.h"
24 #include "optimizer/paths.h"
25 #include "optimizer/planmain.h"
26 #include "optimizer/predtest.h"
27 #include "optimizer/subselect.h"
28 #include "parser/parse_clause.h"
29 #include "parser/parse_expr.h"
30 #include "parser/parsetree.h"
31 #include "utils/lsyscache.h"
32 #include "utils/syscache.h"
33
34
35 typedef struct
36 {
37         Oid                     aggfnoid;               /* pg_proc Oid of the aggregate */
38         Oid                     aggsortop;              /* Oid of its sort operator */
39         Expr       *target;                     /* expression we are aggregating on */
40         Expr       *notnulltest;        /* expression for "target IS NOT NULL" */
41         IndexPath  *path;                       /* access path for index scan */
42         Cost            pathcost;               /* estimated cost to fetch first row */
43         bool            nulls_first;    /* null ordering direction matching index */
44         Param      *param;                      /* param for subplan's output */
45 } MinMaxAggInfo;
46
47 static bool find_minmax_aggs_walker(Node *node, List **context);
48 static bool build_minmax_path(PlannerInfo *root, RelOptInfo *rel,
49                                   MinMaxAggInfo *info);
50 static ScanDirection match_agg_to_index_col(MinMaxAggInfo *info,
51                                            IndexOptInfo *index, int indexcol);
52 static void make_agg_subplan(PlannerInfo *root, MinMaxAggInfo *info);
53 static Node *replace_aggs_with_params_mutator(Node *node, List **context);
54 static Oid      fetch_agg_sort_op(Oid aggfnoid);
55
56
57 /*
58  * optimize_minmax_aggregates - check for optimizing MIN/MAX via indexes
59  *
60  * This checks to see if we can replace MIN/MAX aggregate functions by
61  * subqueries of the form
62  *              (SELECT col FROM tab WHERE ... ORDER BY col ASC/DESC LIMIT 1)
63  * Given a suitable index on tab.col, this can be much faster than the
64  * generic scan-all-the-rows plan.
65  *
66  * We are passed the preprocessed tlist, and the best path
67  * devised for computing the input of a standard Agg node.      If we are able
68  * to optimize all the aggregates, and the result is estimated to be cheaper
69  * than the generic aggregate method, then generate and return a Plan that
70  * does it that way.  Otherwise, return NULL.
71  */
72 Plan *
73 optimize_minmax_aggregates(PlannerInfo *root, List *tlist, Path *best_path)
74 {
75         Query      *parse = root->parse;
76         FromExpr   *jtnode;
77         RangeTblRef *rtr;
78         RangeTblEntry *rte;
79         RelOptInfo *rel;
80         List       *aggs_list;
81         ListCell   *l;
82         Cost            total_cost;
83         Path            agg_p;
84         Plan       *plan;
85         Node       *hqual;
86         QualCost        tlist_cost;
87
88         /* Nothing to do if query has no aggregates */
89         if (!parse->hasAggs)
90                 return NULL;
91
92         Assert(!parse->setOperations);          /* shouldn't get here if a setop */
93         Assert(parse->rowMarks == NIL);         /* nor if FOR UPDATE */
94
95         /*
96          * Reject unoptimizable cases.
97          *
98          * We don't handle GROUP BY, because our current implementations of
99          * grouping require looking at all the rows anyway, and so there's not
100          * much point in optimizing MIN/MAX.
101          */
102         if (parse->groupClause)
103                 return NULL;
104
105         /*
106          * We also restrict the query to reference exactly one table, since join
107          * conditions can't be handled reasonably.  (We could perhaps handle a
108          * query containing cartesian-product joins, but it hardly seems worth the
109          * trouble.)  However, the single real table could be buried in several
110          * levels of FromExpr.
111          */
112         jtnode = parse->jointree;
113         while (IsA(jtnode, FromExpr))
114         {
115                 if (list_length(jtnode->fromlist) != 1)
116                         return NULL;
117                 jtnode = linitial(jtnode->fromlist);
118         }
119         if (!IsA(jtnode, RangeTblRef))
120                 return NULL;
121         rtr = (RangeTblRef *) jtnode;
122         rte = planner_rt_fetch(rtr->rtindex, root);
123         if (rte->rtekind != RTE_RELATION || rte->inh)
124                 return NULL;
125         rel = find_base_rel(root, rtr->rtindex);
126
127         /*
128          * Since this optimization is not applicable all that often, we want to
129          * fall out before doing very much work if possible.  Therefore we do the
130          * work in several passes.      The first pass scans the tlist and HAVING qual
131          * to find all the aggregates and verify that each of them is a MIN/MAX
132          * aggregate.  If that succeeds, the second pass looks at each aggregate
133          * to see if it is optimizable; if so we make an IndexPath describing how
134          * we would scan it.  (We do not try to optimize if only some aggs are
135          * optimizable, since that means we'll have to scan all the rows anyway.)
136          * If that succeeds, we have enough info to compare costs against the
137          * generic implementation. Only if that test passes do we build a Plan.
138          */
139
140         /* Pass 1: find all the aggregates */
141         aggs_list = NIL;
142         if (find_minmax_aggs_walker((Node *) tlist, &aggs_list))
143                 return NULL;
144         if (find_minmax_aggs_walker(parse->havingQual, &aggs_list))
145                 return NULL;
146
147         /* Pass 2: see if each one is optimizable */
148         total_cost = 0;
149         foreach(l, aggs_list)
150         {
151                 MinMaxAggInfo *info = (MinMaxAggInfo *) lfirst(l);
152
153                 if (!build_minmax_path(root, rel, info))
154                         return NULL;
155                 total_cost += info->pathcost;
156         }
157
158         /*
159          * Make the cost comparison.
160          *
161          * Note that we don't include evaluation cost of the tlist here; this is
162          * OK since it isn't included in best_path's cost either, and should be
163          * the same in either case.
164          */
165         cost_agg(&agg_p, root, AGG_PLAIN, list_length(aggs_list),
166                          0, 0,
167                          best_path->startup_cost, best_path->total_cost,
168                          best_path->parent->rows);
169
170         if (total_cost > agg_p.total_cost)
171                 return NULL;                    /* too expensive */
172
173         /*
174          * OK, we are going to generate an optimized plan.
175          */
176
177         /* Pass 3: generate subplans and output Param nodes */
178         foreach(l, aggs_list)
179         {
180                 make_agg_subplan(root, (MinMaxAggInfo *) lfirst(l));
181         }
182
183         /*
184          * Modify the targetlist and HAVING qual to reference subquery outputs
185          */
186         tlist = (List *) replace_aggs_with_params_mutator((Node *) tlist,
187                                                                                                           &aggs_list);
188         hqual = replace_aggs_with_params_mutator(parse->havingQual,
189                                                                                          &aggs_list);
190
191         /*
192          * We have to replace Aggrefs with Params in equivalence classes too,
193          * else ORDER BY or DISTINCT on an optimized aggregate will fail.
194          *
195          * Note: at some point it might become necessary to mutate other
196          * data structures too, such as the query's sortClause or distinctClause.
197          * Right now, those won't be examined after this point.
198          */
199         mutate_eclass_expressions(root,
200                                                           replace_aggs_with_params_mutator,
201                                                           &aggs_list);
202
203         /*
204          * Generate the output plan --- basically just a Result
205          */
206         plan = (Plan *) make_result(root, tlist, hqual, NULL);
207
208         /* Account for evaluation cost of the tlist (make_result did the rest) */
209         cost_qual_eval(&tlist_cost, tlist, root);
210         plan->startup_cost += tlist_cost.startup;
211         plan->total_cost += tlist_cost.startup + tlist_cost.per_tuple;
212
213         return plan;
214 }
215
216 /*
217  * find_minmax_aggs_walker
218  *              Recursively scan the Aggref nodes in an expression tree, and check
219  *              that each one is a MIN/MAX aggregate.  If so, build a list of the
220  *              distinct aggregate calls in the tree.
221  *
222  * Returns TRUE if a non-MIN/MAX aggregate is found, FALSE otherwise.
223  * (This seemingly-backward definition is used because expression_tree_walker
224  * aborts the scan on TRUE return, which is what we want.)
225  *
226  * Found aggregates are added to the list at *context; it's up to the caller
227  * to initialize the list to NIL.
228  *
229  * This does not descend into subqueries, and so should be used only after
230  * reduction of sublinks to subplans.  There mustn't be outer-aggregate
231  * references either.
232  */
233 static bool
234 find_minmax_aggs_walker(Node *node, List **context)
235 {
236         if (node == NULL)
237                 return false;
238         if (IsA(node, Aggref))
239         {
240                 Aggref     *aggref = (Aggref *) node;
241                 Oid                     aggsortop;
242                 Expr       *curTarget;
243                 MinMaxAggInfo *info;
244                 ListCell   *l;
245
246                 Assert(aggref->agglevelsup == 0);
247                 if (list_length(aggref->args) != 1)
248                         return true;            /* it couldn't be MIN/MAX */
249                 /* note: we do not care if DISTINCT is mentioned ... */
250
251                 aggsortop = fetch_agg_sort_op(aggref->aggfnoid);
252                 if (!OidIsValid(aggsortop))
253                         return true;            /* not a MIN/MAX aggregate */
254
255                 /*
256                  * Check whether it's already in the list, and add it if not.
257                  */
258                 curTarget = linitial(aggref->args);
259                 foreach(l, *context)
260                 {
261                         info = (MinMaxAggInfo *) lfirst(l);
262                         if (info->aggfnoid == aggref->aggfnoid &&
263                                 equal(info->target, curTarget))
264                                 return false;
265                 }
266
267                 info = (MinMaxAggInfo *) palloc0(sizeof(MinMaxAggInfo));
268                 info->aggfnoid = aggref->aggfnoid;
269                 info->aggsortop = aggsortop;
270                 info->target = curTarget;
271
272                 *context = lappend(*context, info);
273
274                 /*
275                  * We need not recurse into the argument, since it can't contain any
276                  * aggregates.
277                  */
278                 return false;
279         }
280         Assert(!IsA(node, SubLink));
281         return expression_tree_walker(node, find_minmax_aggs_walker,
282                                                                   (void *) context);
283 }
284
285 /*
286  * build_minmax_path
287  *              Given a MIN/MAX aggregate, try to find an index it can be optimized
288  *              with.  Build a Path describing the best such index path.
289  *
290  * Returns TRUE if successful, FALSE if not.  In the TRUE case, info->path
291  * is filled in.
292  *
293  * XXX look at sharing more code with indxpath.c.
294  *
295  * Note: check_partial_indexes() must have been run previously.
296  */
297 static bool
298 build_minmax_path(PlannerInfo *root, RelOptInfo *rel, MinMaxAggInfo *info)
299 {
300         IndexPath  *best_path = NULL;
301         Cost            best_cost = 0;
302         bool            best_nulls_first = false;
303         NullTest   *ntest;
304         List       *allquals;
305         ListCell   *l;
306
307         /* Build "target IS NOT NULL" expression for use below */
308         ntest = makeNode(NullTest);
309         ntest->nulltesttype = IS_NOT_NULL;
310         ntest->arg = copyObject(info->target);
311         info->notnulltest = (Expr *) ntest;
312
313         /*
314          * Build list of existing restriction clauses plus the notnull test. We
315          * cheat a bit by not bothering with a RestrictInfo node for the notnull
316          * test --- predicate_implied_by() won't care.
317          */
318         allquals = list_concat(list_make1(ntest), rel->baserestrictinfo);
319
320         foreach(l, rel->indexlist)
321         {
322                 IndexOptInfo *index = (IndexOptInfo *) lfirst(l);
323                 ScanDirection indexscandir = NoMovementScanDirection;
324                 int                     indexcol;
325                 int                     prevcol;
326                 List       *restrictclauses;
327                 IndexPath  *new_path;
328                 Cost            new_cost;
329                 bool            found_clause;
330
331                 /* Ignore non-btree indexes */
332                 if (index->relam != BTREE_AM_OID)
333                         continue;
334
335                 /*
336                  * Ignore partial indexes that do not match the query --- unless their
337                  * predicates can be proven from the baserestrict list plus the IS NOT
338                  * NULL test.  In that case we can use them.
339                  */
340                 if (index->indpred != NIL && !index->predOK &&
341                         !predicate_implied_by(index->indpred, allquals))
342                         continue;
343
344                 /*
345                  * Look for a match to one of the index columns.  (In a stupidly
346                  * designed index, there could be multiple matches, but we only care
347                  * about the first one.)
348                  */
349                 for (indexcol = 0; indexcol < index->ncolumns; indexcol++)
350                 {
351                         indexscandir = match_agg_to_index_col(info, index, indexcol);
352                         if (!ScanDirectionIsNoMovement(indexscandir))
353                                 break;
354                 }
355                 if (ScanDirectionIsNoMovement(indexscandir))
356                         continue;
357
358                 /*
359                  * If the match is not at the first index column, we have to verify
360                  * that there are "x = something" restrictions on all the earlier
361                  * index columns.  Since we'll need the restrictclauses list anyway to
362                  * build the path, it's convenient to extract that first and then look
363                  * through it for the equality restrictions.
364                  */
365                 restrictclauses = group_clauses_by_indexkey(index,
366                                                                                                 index->rel->baserestrictinfo,
367                                                                                                         NIL,
368                                                                                                         NULL,
369                                                                                                         SAOP_FORBID,
370                                                                                                         &found_clause);
371
372                 if (list_length(restrictclauses) < indexcol)
373                         continue;                       /* definitely haven't got enough */
374                 for (prevcol = 0; prevcol < indexcol; prevcol++)
375                 {
376                         List       *rinfos = (List *) list_nth(restrictclauses, prevcol);
377                         ListCell   *ll;
378
379                         foreach(ll, rinfos)
380                         {
381                                 RestrictInfo *rinfo = (RestrictInfo *) lfirst(ll);
382                                 int                     strategy;
383
384                                 /* Could be an IS_NULL test, if so ignore */
385                                 if (!is_opclause(rinfo->clause))
386                                         continue;
387                                 strategy =
388                                         get_op_opfamily_strategy(((OpExpr *) rinfo->clause)->opno,
389                                                                                          index->opfamily[prevcol]);
390                                 if (strategy == BTEqualStrategyNumber)
391                                         break;
392                         }
393                         if (ll == NULL)
394                                 break;                  /* none are Equal for this index col */
395                 }
396                 if (prevcol < indexcol)
397                         continue;                       /* didn't find all Equal clauses */
398
399                 /*
400                  * Build the access path.  We don't bother marking it with pathkeys.
401                  */
402                 new_path = create_index_path(root, index,
403                                                                          restrictclauses,
404                                                                          NIL,
405                                                                          indexscandir,
406                                                                          NULL);
407
408                 /*
409                  * Estimate actual cost of fetching just one row.
410                  */
411                 if (new_path->rows > 1.0)
412                         new_cost = new_path->path.startup_cost +
413                                 (new_path->path.total_cost - new_path->path.startup_cost)
414                                 * 1.0 / new_path->rows;
415                 else
416                         new_cost = new_path->path.total_cost;
417
418                 /*
419                  * Keep if first or if cheaper than previous best.
420                  */
421                 if (best_path == NULL || new_cost < best_cost)
422                 {
423                         best_path = new_path;
424                         best_cost = new_cost;
425                         if (ScanDirectionIsForward(indexscandir))
426                                 best_nulls_first = index->nulls_first[indexcol];
427                         else
428                                 best_nulls_first = !index->nulls_first[indexcol];
429                 }
430         }
431
432         info->path = best_path;
433         info->pathcost = best_cost;
434         info->nulls_first = best_nulls_first;
435         return (best_path != NULL);
436 }
437
438 /*
439  * match_agg_to_index_col
440  *              Does an aggregate match an index column?
441  *
442  * It matches if its argument is equal to the index column's data and its
443  * sortop is either the forward or reverse sort operator for the column.
444  *
445  * We return ForwardScanDirection if match the forward sort operator,
446  * BackwardScanDirection if match the reverse sort operator,
447  * and NoMovementScanDirection if there's no match.
448  */
449 static ScanDirection
450 match_agg_to_index_col(MinMaxAggInfo *info, IndexOptInfo *index, int indexcol)
451 {
452         ScanDirection result;
453
454         /* Check for operator match first (cheaper) */
455         if (info->aggsortop == index->fwdsortop[indexcol])
456                 result = ForwardScanDirection;
457         else if (info->aggsortop == index->revsortop[indexcol])
458                 result = BackwardScanDirection;
459         else
460                 return NoMovementScanDirection;
461
462         /* Check for data match */
463         if (!match_index_to_operand((Node *) info->target, indexcol, index))
464                 return NoMovementScanDirection;
465
466         return result;
467 }
468
469 /*
470  * Construct a suitable plan for a converted aggregate query
471  */
472 static void
473 make_agg_subplan(PlannerInfo *root, MinMaxAggInfo *info)
474 {
475         PlannerInfo subroot;
476         Query      *subparse;
477         Plan       *plan;
478         Plan       *iplan;
479         TargetEntry *tle;
480         SortClause *sortcl;
481
482         /*
483          * Generate a suitably modified query.  Much of the work here is probably
484          * unnecessary in the normal case, but we want to make it look good if
485          * someone tries to EXPLAIN the result.
486          */
487         memcpy(&subroot, root, sizeof(PlannerInfo));
488         subroot.parse = subparse = (Query *) copyObject(root->parse);
489         subroot.init_plans = NIL;
490         subparse->commandType = CMD_SELECT;
491         subparse->resultRelation = 0;
492         subparse->returningList = NIL;
493         subparse->utilityStmt = NULL;
494         subparse->intoClause = NULL;
495         subparse->hasAggs = false;
496         subparse->groupClause = NIL;
497         subparse->havingQual = NULL;
498         subparse->distinctClause = NIL;
499         subroot.hasHavingQual = false;
500
501         /* single tlist entry that is the aggregate target */
502         tle = makeTargetEntry(copyObject(info->target),
503                                                   1,
504                                                   pstrdup("agg_target"),
505                                                   false);
506         subparse->targetList = list_make1(tle);
507
508         /* set up the appropriate ORDER BY entry */
509         sortcl = makeNode(SortClause);
510         sortcl->tleSortGroupRef = assignSortGroupRef(tle, subparse->targetList);
511         sortcl->sortop = info->aggsortop;
512         sortcl->nulls_first = info->nulls_first;
513         subparse->sortClause = list_make1(sortcl);
514
515         /* set up LIMIT 1 */
516         subparse->limitOffset = NULL;
517         subparse->limitCount = (Node *) makeConst(INT8OID, -1, sizeof(int64),
518                                                                                           Int64GetDatum(1), false,
519                                                                                           FLOAT8PASSBYVAL);
520
521         /*
522          * Generate the plan for the subquery.  We already have a Path for the
523          * basic indexscan, but we have to convert it to a Plan and attach a LIMIT
524          * node above it.
525          *
526          * Also we must add a "WHERE target IS NOT NULL" restriction to the
527          * indexscan, to be sure we don't return a NULL, which'd be contrary to
528          * the standard behavior of MIN/MAX.  XXX ideally this should be done
529          * earlier, so that the selectivity of the restriction could be included
530          * in our cost estimates.  But that looks painful, and in most cases the
531          * fraction of NULLs isn't high enough to change the decision.
532          *
533          * The NOT NULL qual has to go on the actual indexscan; create_plan might
534          * have stuck a gating Result atop that, if there were any pseudoconstant
535          * quals.
536          *
537          * We can skip adding the NOT NULL qual if it's redundant with either an
538          * already-given WHERE condition, or a clause of the index predicate.
539          */
540         plan = create_plan(&subroot, (Path *) info->path);
541
542         plan->targetlist = copyObject(subparse->targetList);
543
544         if (IsA(plan, Result))
545                 iplan = plan->lefttree;
546         else
547                 iplan = plan;
548         Assert(IsA(iplan, IndexScan));
549
550         if (!list_member(iplan->qual, info->notnulltest) &&
551                 !list_member(info->path->indexinfo->indpred, info->notnulltest))
552                 iplan->qual = lcons(info->notnulltest, iplan->qual);
553
554         plan = (Plan *) make_limit(plan,
555                                                            subparse->limitOffset,
556                                                            subparse->limitCount,
557                                                            0, 1);
558
559         /*
560          * Convert the plan into an InitPlan, and make a Param for its result.
561          */
562         info->param = SS_make_initplan_from_plan(&subroot, plan,
563                                                                                          exprType((Node *) tle->expr),
564                                                                                          -1);
565
566         /*
567          * Make sure the InitPlan gets into the outer list.  It has to appear
568          * after any other InitPlans it might depend on, too (see comments in
569          * ExecReScan).
570          */
571         root->init_plans = list_concat(root->init_plans, subroot.init_plans);
572 }
573
574 /*
575  * Replace original aggregate calls with subplan output Params
576  */
577 static Node *
578 replace_aggs_with_params_mutator(Node *node, List **context)
579 {
580         if (node == NULL)
581                 return NULL;
582         if (IsA(node, Aggref))
583         {
584                 Aggref     *aggref = (Aggref *) node;
585                 ListCell   *l;
586                 Expr       *curTarget = linitial(aggref->args);
587
588                 foreach(l, *context)
589                 {
590                         MinMaxAggInfo *info = (MinMaxAggInfo *) lfirst(l);
591
592                         if (info->aggfnoid == aggref->aggfnoid &&
593                                 equal(info->target, curTarget))
594                                 return (Node *) info->param;
595                 }
596                 elog(ERROR, "failed to re-find aggregate info record");
597         }
598         Assert(!IsA(node, SubLink));
599         return expression_tree_mutator(node, replace_aggs_with_params_mutator,
600                                                                    (void *) context);
601 }
602
603 /*
604  * Get the OID of the sort operator, if any, associated with an aggregate.
605  * Returns InvalidOid if there is no such operator.
606  */
607 static Oid
608 fetch_agg_sort_op(Oid aggfnoid)
609 {
610         HeapTuple       aggTuple;
611         Form_pg_aggregate aggform;
612         Oid                     aggsortop;
613
614         /* fetch aggregate entry from pg_aggregate */
615         aggTuple = SearchSysCache(AGGFNOID,
616                                                           ObjectIdGetDatum(aggfnoid),
617                                                           0, 0, 0);
618         if (!HeapTupleIsValid(aggTuple))
619                 return InvalidOid;
620         aggform = (Form_pg_aggregate) GETSTRUCT(aggTuple);
621         aggsortop = aggform->aggsortop;
622         ReleaseSysCache(aggTuple);
623
624         return aggsortop;
625 }