]> granicus.if.org Git - postgresql/blob - src/backend/optimizer/path/allpaths.c
New pgindent run with fixes suggested by Tom. Patch manually reviewed,
[postgresql] / src / backend / optimizer / path / allpaths.c
1 /*-------------------------------------------------------------------------
2  *
3  * allpaths.c
4  *        Routines to find possible search paths for processing a query
5  *
6  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *        $Header: /cvsroot/pgsql/src/backend/optimizer/path/allpaths.c,v 1.82 2001/11/05 17:46:25 momjian Exp $
12  *
13  *-------------------------------------------------------------------------
14  */
15
16 #include "postgres.h"
17
18 #ifdef OPTIMIZER_DEBUG
19 #include "nodes/print.h"
20 #endif
21 #include "optimizer/clauses.h"
22 #include "optimizer/cost.h"
23 #include "optimizer/geqo.h"
24 #include "optimizer/pathnode.h"
25 #include "optimizer/paths.h"
26 #include "optimizer/plancat.h"
27 #include "optimizer/planner.h"
28 #include "optimizer/prep.h"
29 #include "parser/parsetree.h"
30 #include "rewrite/rewriteManip.h"
31
32
33 bool            enable_geqo = true;
34 int                     geqo_rels = DEFAULT_GEQO_RELS;
35
36
37 static void set_base_rel_pathlists(Query *root);
38 static void set_plain_rel_pathlist(Query *root, RelOptInfo *rel,
39                                            RangeTblEntry *rte);
40 static void set_inherited_rel_pathlist(Query *root, RelOptInfo *rel,
41                                                    Index rti, RangeTblEntry *rte,
42                                                    List *inheritlist);
43 static void set_subquery_pathlist(Query *root, RelOptInfo *rel,
44                                           Index rti, RangeTblEntry *rte);
45 static RelOptInfo *make_one_rel_by_joins(Query *root, int levels_needed,
46                                           List *initial_rels);
47
48
49 /*
50  * make_one_rel
51  *        Finds all possible access paths for executing a query, returning a
52  *        single rel that represents the join of all base rels in the query.
53  */
54 RelOptInfo *
55 make_one_rel(Query *root)
56 {
57         RelOptInfo *rel;
58
59         /*
60          * Generate access paths for the base rels.
61          */
62         set_base_rel_pathlists(root);
63
64         /*
65          * Generate access paths for the entire join tree.
66          */
67         Assert(root->jointree != NULL && IsA(root->jointree, FromExpr));
68
69         rel = make_fromexpr_rel(root, root->jointree);
70
71         /*
72          * The result should join all the query's base rels.
73          */
74         Assert(length(rel->relids) == length(root->base_rel_list));
75
76         return rel;
77 }
78
79 /*
80  * set_base_rel_pathlists
81  *        Finds all paths available for scanning each base-relation entry.
82  *        Sequential scan and any available indices are considered.
83  *        Each useful path is attached to its relation's 'pathlist' field.
84  */
85 static void
86 set_base_rel_pathlists(Query *root)
87 {
88         List       *rellist;
89
90         foreach(rellist, root->base_rel_list)
91         {
92                 RelOptInfo *rel = (RelOptInfo *) lfirst(rellist);
93                 Index           rti;
94                 RangeTblEntry *rte;
95                 List       *inheritlist;
96
97                 Assert(length(rel->relids) == 1);               /* better be base rel */
98                 rti = lfirsti(rel->relids);
99                 rte = rt_fetch(rti, root->rtable);
100
101                 if (rel->issubquery)
102                 {
103                         /* Subquery --- generate a separate plan for it */
104                         set_subquery_pathlist(root, rel, rti, rte);
105                 }
106                 else if ((inheritlist = expand_inherted_rtentry(root, rti, true))
107                                  != NIL)
108                 {
109                         /* Relation is root of an inheritance tree, process specially */
110                         set_inherited_rel_pathlist(root, rel, rti, rte, inheritlist);
111                 }
112                 else
113                 {
114                         /* Plain relation */
115                         set_plain_rel_pathlist(root, rel, rte);
116                 }
117
118 #ifdef OPTIMIZER_DEBUG
119                 debug_print_rel(root, rel);
120 #endif
121         }
122 }
123
124 /*
125  * set_plain_rel_pathlist
126  *        Build access paths for a plain relation (no subquery, no inheritance)
127  */
128 static void
129 set_plain_rel_pathlist(Query *root, RelOptInfo *rel, RangeTblEntry *rte)
130 {
131         /* Mark rel with estimated output rows, width, etc */
132         set_baserel_size_estimates(root, rel);
133
134         /*
135          * Generate paths and add them to the rel's pathlist.
136          *
137          * Note: add_path() will discard any paths that are dominated by another
138          * available path, keeping only those paths that are superior along at
139          * least one dimension of cost or sortedness.
140          */
141
142         /* Consider sequential scan */
143         add_path(rel, create_seqscan_path(root, rel));
144
145         /* Consider TID scans */
146         create_tidscan_paths(root, rel);
147
148         /* Consider index paths for both simple and OR index clauses */
149         create_index_paths(root, rel);
150
151         /* create_index_paths must be done before create_or_index_paths */
152         create_or_index_paths(root, rel);
153
154         /* Now find the cheapest of the paths for this rel */
155         set_cheapest(rel);
156 }
157
158 /*
159  * set_inherited_rel_pathlist
160  *        Build access paths for a inheritance tree rooted at rel
161  *
162  * inheritlist is a list of RT indexes of all tables in the inheritance tree,
163  * including a duplicate of the parent itself.  Note we will not come here
164  * unless there's at least one child in addition to the parent.
165  *
166  * NOTE: the passed-in rel and RTE will henceforth represent the appended
167  * result of the whole inheritance tree.  The members of inheritlist represent
168  * the individual tables --- in particular, the inheritlist member that is a
169  * duplicate of the parent RTE represents the parent table alone.
170  * We will generate plans to scan the individual tables that refer to
171  * the inheritlist RTEs, whereas Vars elsewhere in the plan tree that
172  * refer to the original RTE are taken to refer to the append output.
173  * In particular, this means we have separate RelOptInfos for the parent
174  * table and for the append output, which is a good thing because they're
175  * not the same size.
176  */
177 static void
178 set_inherited_rel_pathlist(Query *root, RelOptInfo *rel,
179                                                    Index rti, RangeTblEntry *rte,
180                                                    List *inheritlist)
181 {
182         int                     parentRTindex = rti;
183         Oid                     parentOID = rte->relid;
184         List       *subpaths = NIL;
185         List       *il;
186
187         /*
188          * XXX for now, can't handle inherited expansion of FOR UPDATE; can we
189          * do better?
190          */
191         if (intMember(parentRTindex, root->rowMarks))
192                 elog(ERROR, "SELECT FOR UPDATE is not supported for inherit queries");
193
194         /*
195          * The executor will check the parent table's access permissions when
196          * it examines the parent's inheritlist entry.  There's no need to
197          * check twice, so turn off access check bits in the original RTE.
198          */
199         rte->checkForRead = false;
200         rte->checkForWrite = false;
201
202         /*
203          * Initialize to compute size estimates for whole inheritance tree
204          */
205         rel->rows = 0;
206         rel->width = 0;
207
208         /*
209          * Generate access paths for each table in the tree (parent AND
210          * children), and pick the cheapest path for each table.
211          */
212         foreach(il, inheritlist)
213         {
214                 int                     childRTindex = lfirsti(il);
215                 RangeTblEntry *childrte;
216                 Oid                     childOID;
217                 RelOptInfo *childrel;
218
219                 childrte = rt_fetch(childRTindex, root->rtable);
220                 childOID = childrte->relid;
221
222                 /*
223                  * Make a RelOptInfo for the child so we can do planning.  Do NOT
224                  * attach the RelOptInfo to the query's base_rel_list, however,
225                  * since the child is not part of the main join tree.  Instead,
226                  * the child RelOptInfo is added to other_rel_list.
227                  */
228                 childrel = build_other_rel(root, childRTindex);
229
230                 /*
231                  * Copy the parent's targetlist and restriction quals to the
232                  * child, with attribute-number adjustment as needed.  We don't
233                  * bother to copy the join quals, since we can't do any joining of
234                  * the individual tables.
235                  */
236                 childrel->targetlist = (List *)
237                         adjust_inherited_attrs((Node *) rel->targetlist,
238                                                                    parentRTindex,
239                                                                    parentOID,
240                                                                    childRTindex,
241                                                                    childOID);
242                 childrel->baserestrictinfo = (List *)
243                         adjust_inherited_attrs((Node *) rel->baserestrictinfo,
244                                                                    parentRTindex,
245                                                                    parentOID,
246                                                                    childRTindex,
247                                                                    childOID);
248                 childrel->baserestrictcost = rel->baserestrictcost;
249
250                 /*
251                  * Now compute child access paths, and save the cheapest.
252                  */
253                 set_plain_rel_pathlist(root, childrel, childrte);
254
255                 subpaths = lappend(subpaths, childrel->cheapest_total_path);
256
257                 /* Also update total size estimates */
258                 rel->rows += childrel->rows;
259                 if (childrel->width > rel->width)
260                         rel->width = childrel->width;
261         }
262
263         /*
264          * Finally, build Append path and install it as the only access path
265          * for the parent rel.
266          */
267         add_path(rel, (Path *) create_append_path(rel, subpaths));
268
269         /* Select cheapest path (pretty easy in this case...) */
270         set_cheapest(rel);
271 }
272
273 /*
274  * set_subquery_pathlist
275  *              Build the (single) access path for a subquery RTE
276  */
277 static void
278 set_subquery_pathlist(Query *root, RelOptInfo *rel,
279                                           Index rti, RangeTblEntry *rte)
280 {
281         Query      *subquery = rte->subquery;
282
283         /*
284          * If there are any restriction clauses that have been attached to the
285          * subquery relation, consider pushing them down to become HAVING
286          * quals of the subquery itself.  (Not WHERE clauses, since they may
287          * refer to subquery outputs that are aggregate results.  But
288          * planner.c will transfer them into the subquery's WHERE if they do
289          * not.)  This transformation is useful because it may allow us to
290          * generate a better plan for the subquery than evaluating all the
291          * subquery output rows and then filtering them.
292          *
293          * There are several cases where we cannot push down clauses:
294          *
295          * 1. If the subquery contains set ops (UNION/INTERSECT/EXCEPT) we do not
296          * push down any qual clauses, since the planner doesn't support quals
297          * at the top level of a setop.  (With suitable analysis we could try
298          * to push the quals down into the component queries of the setop, but
299          * getting it right seems nontrivial.  Work on this later.)
300          *
301          * 2. If the subquery has a LIMIT clause or a DISTINCT ON clause, we must
302          * not push down any quals, since that could change the set of rows
303          * returned.  (Actually, we could push down quals into a DISTINCT ON
304          * subquery if they refer only to DISTINCT-ed output columns, but
305          * checking that seems more work than it's worth.  In any case, a
306          * plain DISTINCT is safe to push down past.)
307          *
308          * 3. We do not push down clauses that contain subselects, mainly because
309          * I'm not sure it will work correctly (the subplan hasn't yet
310          * transformed sublinks to subselects).
311          *
312          * Non-pushed-down clauses will get evaluated as qpquals of the
313          * SubqueryScan node.
314          *
315          * XXX Are there any cases where we want to make a policy decision not to
316          * push down, because it'd result in a worse plan?
317          */
318         if (subquery->setOperations == NULL &&
319                 subquery->limitOffset == NULL &&
320                 subquery->limitCount == NULL &&
321                 !has_distinct_on_clause(subquery))
322         {
323                 /* OK to consider pushing down individual quals */
324                 List       *upperrestrictlist = NIL;
325                 List       *lst;
326
327                 foreach(lst, rel->baserestrictinfo)
328                 {
329                         RestrictInfo *rinfo = (RestrictInfo *) lfirst(lst);
330                         Node       *clause = (Node *) rinfo->clause;
331
332                         if (contain_subplans(clause))
333                         {
334                                 /* Keep it in the upper query */
335                                 upperrestrictlist = lappend(upperrestrictlist, rinfo);
336                         }
337                         else
338                         {
339                                 /*
340                                  * We need to replace Vars in the clause (which must refer
341                                  * to outputs of the subquery) with copies of the
342                                  * subquery's targetlist expressions.  Note that at this
343                                  * point, any uplevel Vars in the clause should have been
344                                  * replaced with Params, so they need no work.
345                                  */
346                                 clause = ResolveNew(clause, rti, 0,
347                                                                         subquery->targetList,
348                                                                         CMD_SELECT, 0);
349                                 subquery->havingQual = make_and_qual(subquery->havingQual,
350                                                                                                          clause);
351
352                                 /*
353                                  * We need not change the subquery's hasAggs or
354                                  * hasSublinks flags, since we can't be pushing down any
355                                  * aggregates that weren't there before, and we don't push
356                                  * down subselects at all.
357                                  */
358                         }
359                 }
360                 rel->baserestrictinfo = upperrestrictlist;
361         }
362
363         /* Generate the plan for the subquery */
364         rel->subplan = subquery_planner(subquery,
365                                                                         -1.0 /* default case */ );
366
367         /* Copy number of output rows from subplan */
368         rel->tuples = rel->subplan->plan_rows;
369
370         /* Mark rel with estimated output rows, width, etc */
371         set_baserel_size_estimates(root, rel);
372
373         /* Generate appropriate path */
374         add_path(rel, create_subqueryscan_path(rel));
375
376         /* Select cheapest path (pretty easy in this case...) */
377         set_cheapest(rel);
378 }
379
380 /*
381  * make_fromexpr_rel
382  *        Build access paths for a FromExpr jointree node.
383  */
384 RelOptInfo *
385 make_fromexpr_rel(Query *root, FromExpr *from)
386 {
387         int                     levels_needed;
388         List       *initial_rels = NIL;
389         List       *jt;
390
391         /*
392          * Count the number of child jointree nodes.  This is the depth of the
393          * dynamic-programming algorithm we must employ to consider all ways
394          * of joining the child nodes.
395          */
396         levels_needed = length(from->fromlist);
397
398         if (levels_needed <= 0)
399                 return NULL;                    /* nothing to do? */
400
401         /*
402          * Construct a list of rels corresponding to the child jointree nodes.
403          * This may contain both base rels and rels constructed according to
404          * explicit JOIN directives.
405          */
406         foreach(jt, from->fromlist)
407         {
408                 Node       *jtnode = (Node *) lfirst(jt);
409
410                 initial_rels = lappend(initial_rels,
411                                                            make_jointree_rel(root, jtnode));
412         }
413
414         if (levels_needed == 1)
415         {
416                 /*
417                  * Single jointree node, so we're done.
418                  */
419                 return (RelOptInfo *) lfirst(initial_rels);
420         }
421         else
422         {
423                 /*
424                  * Consider the different orders in which we could join the rels,
425                  * using either GEQO or regular optimizer.
426                  */
427                 if (enable_geqo && levels_needed >= geqo_rels)
428                         return geqo(root, levels_needed, initial_rels);
429                 else
430                         return make_one_rel_by_joins(root, levels_needed, initial_rels);
431         }
432 }
433
434 /*
435  * make_one_rel_by_joins
436  *        Find all possible joinpaths for a query by successively finding ways
437  *        to join component relations into join relations.
438  *
439  * 'levels_needed' is the number of iterations needed, ie, the number of
440  *              independent jointree items in the query.  This is > 1.
441  *
442  * 'initial_rels' is a list of RelOptInfo nodes for each independent
443  *              jointree item.  These are the components to be joined together.
444  *
445  * Returns the final level of join relations, i.e., the relation that is
446  * the result of joining all the original relations together.
447  */
448 static RelOptInfo *
449 make_one_rel_by_joins(Query *root, int levels_needed, List *initial_rels)
450 {
451         List      **joinitems;
452         int                     lev;
453         RelOptInfo *rel;
454
455         /*
456          * We employ a simple "dynamic programming" algorithm: we first find
457          * all ways to build joins of two jointree items, then all ways to
458          * build joins of three items (from two-item joins and single items),
459          * then four-item joins, and so on until we have considered all ways
460          * to join all the items into one rel.
461          *
462          * joinitems[j] is a list of all the j-item rels.  Initially we set
463          * joinitems[1] to represent all the single-jointree-item relations.
464          */
465         joinitems = (List **) palloc((levels_needed + 1) * sizeof(List *));
466         MemSet(joinitems, 0, (levels_needed + 1) * sizeof(List *));
467
468         joinitems[1] = initial_rels;
469
470         for (lev = 2; lev <= levels_needed; lev++)
471         {
472                 List       *x;
473
474                 /*
475                  * Determine all possible pairs of relations to be joined at this
476                  * level, and build paths for making each one from every available
477                  * pair of lower-level relations.
478                  */
479                 joinitems[lev] = make_rels_by_joins(root, lev, joinitems);
480
481                 /*
482                  * Do cleanup work on each just-processed rel.
483                  */
484                 foreach(x, joinitems[lev])
485                 {
486                         rel = (RelOptInfo *) lfirst(x);
487
488 #ifdef NOT_USED
489
490                         /*
491                          * * for each expensive predicate in each path in each
492                          * distinct rel, * consider doing pullup  -- JMH
493                          */
494                         if (XfuncMode != XFUNC_NOPULL && XfuncMode != XFUNC_OFF)
495                                 xfunc_trypullup(rel);
496 #endif
497
498                         /* Find and save the cheapest paths for this rel */
499                         set_cheapest(rel);
500
501 #ifdef OPTIMIZER_DEBUG
502                         debug_print_rel(root, rel);
503 #endif
504                 }
505         }
506
507         /*
508          * We should have a single rel at the final level.
509          */
510         Assert(length(joinitems[levels_needed]) == 1);
511
512         rel = (RelOptInfo *) lfirst(joinitems[levels_needed]);
513
514         return rel;
515 }
516
517 /*****************************************************************************
518  *
519  *****************************************************************************/
520
521 #ifdef OPTIMIZER_DEBUG
522
523 static void
524 print_relids(Relids relids)
525 {
526         List       *l;
527
528         foreach(l, relids)
529         {
530                 printf("%d", lfirsti(l));
531                 if (lnext(l))
532                         printf(" ");
533         }
534 }
535
536 static void
537 print_restrictclauses(Query *root, List *clauses)
538 {
539         List       *l;
540
541         foreach(l, clauses)
542         {
543                 RestrictInfo *c = lfirst(l);
544
545                 print_expr((Node *) c->clause, root->rtable);
546                 if (lnext(l))
547                         printf(", ");
548         }
549 }
550
551 static void
552 print_path(Query *root, Path *path, int indent)
553 {
554         const char *ptype;
555         bool            join;
556         int                     i;
557
558         switch (nodeTag(path))
559         {
560                 case T_Path:
561                         ptype = "SeqScan";
562                         join = false;
563                         break;
564                 case T_IndexPath:
565                         ptype = "IdxScan";
566                         join = false;
567                         break;
568                 case T_TidPath:
569                         ptype = "TidScan";
570                         join = false;
571                         break;
572                 case T_NestPath:
573                         ptype = "Nestloop";
574                         join = true;
575                         break;
576                 case T_MergePath:
577                         ptype = "MergeJoin";
578                         join = true;
579                         break;
580                 case T_HashPath:
581                         ptype = "HashJoin";
582                         join = true;
583                         break;
584                 default:
585                         ptype = "???Path";
586                         join = false;
587                         break;
588         }
589
590         for (i = 0; i < indent; i++)
591                 printf("\t");
592         printf("%s(", ptype);
593         print_relids(path->parent->relids);
594         printf(") rows=%.0f cost=%.2f..%.2f\n",
595                    path->parent->rows, path->startup_cost, path->total_cost);
596
597         if (path->pathkeys)
598         {
599                 for (i = 0; i < indent; i++)
600                         printf("\t");
601                 printf("  pathkeys: ");
602                 print_pathkeys(path->pathkeys, root->rtable);
603         }
604
605         if (join)
606         {
607                 JoinPath   *jp = (JoinPath *) path;
608
609                 for (i = 0; i < indent; i++)
610                         printf("\t");
611                 printf("  clauses: ");
612                 print_restrictclauses(root, jp->joinrestrictinfo);
613                 printf("\n");
614
615                 if (nodeTag(path) == T_MergePath)
616                 {
617                         MergePath  *mp = (MergePath *) path;
618
619                         if (mp->outersortkeys || mp->innersortkeys)
620                         {
621                                 for (i = 0; i < indent; i++)
622                                         printf("\t");
623                                 printf("  sortouter=%d sortinner=%d\n",
624                                            ((mp->outersortkeys) ? 1 : 0),
625                                            ((mp->innersortkeys) ? 1 : 0));
626                         }
627                 }
628
629                 print_path(root, jp->outerjoinpath, indent + 1);
630                 print_path(root, jp->innerjoinpath, indent + 1);
631         }
632 }
633
634 void
635 debug_print_rel(Query *root, RelOptInfo *rel)
636 {
637         List       *l;
638
639         printf("RELOPTINFO (");
640         print_relids(rel->relids);
641         printf("): rows=%.0f width=%d\n", rel->rows, rel->width);
642
643         if (rel->baserestrictinfo)
644         {
645                 printf("\tbaserestrictinfo: ");
646                 print_restrictclauses(root, rel->baserestrictinfo);
647                 printf("\n");
648         }
649
650         foreach(l, rel->joininfo)
651         {
652                 JoinInfo   *j = (JoinInfo *) lfirst(l);
653
654                 printf("\tjoininfo (");
655                 print_relids(j->unjoined_relids);
656                 printf("): ");
657                 print_restrictclauses(root, j->jinfo_restrictinfo);
658                 printf("\n");
659         }
660
661         printf("\tpath list:\n");
662         foreach(l, rel->pathlist)
663                 print_path(root, lfirst(l), 1);
664         printf("\n\tcheapest startup path:\n");
665         print_path(root, rel->cheapest_startup_path, 1);
666         printf("\n\tcheapest total path:\n");
667         print_path(root, rel->cheapest_total_path, 1);
668         printf("\n");
669         fflush(stdout);
670 }
671
672 #endif   /* OPTIMIZER_DEBUG */