]> granicus.if.org Git - postgresql/blob - src/backend/optimizer/path/allpaths.c
Get rid of some old and crufty global variables in the planner. When
[postgresql] / src / backend / optimizer / path / allpaths.c
1 /*-------------------------------------------------------------------------
2  *
3  * allpaths.c
4  *        Routines to find possible search paths for processing a query
5  *
6  * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *        $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.159 2007/02/19 07:03:28 tgl Exp $
12  *
13  *-------------------------------------------------------------------------
14  */
15
16 #include "postgres.h"
17
18 #ifdef OPTIMIZER_DEBUG
19 #include "nodes/print.h"
20 #endif
21 #include "optimizer/clauses.h"
22 #include "optimizer/cost.h"
23 #include "optimizer/geqo.h"
24 #include "optimizer/pathnode.h"
25 #include "optimizer/paths.h"
26 #include "optimizer/plancat.h"
27 #include "optimizer/planner.h"
28 #include "optimizer/prep.h"
29 #include "optimizer/var.h"
30 #include "parser/parse_clause.h"
31 #include "parser/parse_expr.h"
32 #include "parser/parsetree.h"
33 #include "rewrite/rewriteManip.h"
34
35
36 /* These parameters are set by GUC */
37 bool            enable_geqo = false;    /* just in case GUC doesn't set it */
38 int                     geqo_threshold;
39
40
41 static void set_base_rel_pathlists(PlannerInfo *root);
42 static void set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, Index rti);
43 static void set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
44                                            RangeTblEntry *rte);
45 static void set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
46                                                 Index rti, RangeTblEntry *rte);
47 static void set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
48                                           Index rti, RangeTblEntry *rte);
49 static void set_function_pathlist(PlannerInfo *root, RelOptInfo *rel,
50                                           RangeTblEntry *rte);
51 static void set_values_pathlist(PlannerInfo *root, RelOptInfo *rel,
52                                         RangeTblEntry *rte);
53 static RelOptInfo *make_rel_from_joinlist(PlannerInfo *root, List *joinlist);
54 static RelOptInfo *make_one_rel_by_joins(PlannerInfo *root, int levels_needed,
55                                           List *initial_rels);
56 static bool subquery_is_pushdown_safe(Query *subquery, Query *topquery,
57                                                   bool *differentTypes);
58 static bool recurse_pushdown_safe(Node *setOp, Query *topquery,
59                                           bool *differentTypes);
60 static void compare_tlist_datatypes(List *tlist, List *colTypes,
61                                                 bool *differentTypes);
62 static bool qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
63                                           bool *differentTypes);
64 static void subquery_push_qual(Query *subquery,
65                                    RangeTblEntry *rte, Index rti, Node *qual);
66 static void recurse_push_qual(Node *setOp, Query *topquery,
67                                   RangeTblEntry *rte, Index rti, Node *qual);
68
69
70 /*
71  * make_one_rel
72  *        Finds all possible access paths for executing a query, returning a
73  *        single rel that represents the join of all base rels in the query.
74  */
75 RelOptInfo *
76 make_one_rel(PlannerInfo *root, List *joinlist)
77 {
78         RelOptInfo *rel;
79
80         /*
81          * Generate access paths for the base rels.
82          */
83         set_base_rel_pathlists(root);
84
85         /*
86          * Generate access paths for the entire join tree.
87          */
88         rel = make_rel_from_joinlist(root, joinlist);
89
90         /*
91          * The result should join all and only the query's base rels.
92          */
93 #ifdef USE_ASSERT_CHECKING
94         {
95                 int                     num_base_rels = 0;
96                 Index           rti;
97
98                 for (rti = 1; rti < root->simple_rel_array_size; rti++)
99                 {
100                         RelOptInfo *brel = root->simple_rel_array[rti];
101
102                         if (brel == NULL)
103                                 continue;
104
105                         Assert(brel->relid == rti); /* sanity check on array */
106
107                         /* ignore RTEs that are "other rels" */
108                         if (brel->reloptkind != RELOPT_BASEREL)
109                                 continue;
110
111                         Assert(bms_is_member(rti, rel->relids));
112                         num_base_rels++;
113                 }
114
115                 Assert(bms_num_members(rel->relids) == num_base_rels);
116         }
117 #endif
118
119         return rel;
120 }
121
122 /*
123  * set_base_rel_pathlists
124  *        Finds all paths available for scanning each base-relation entry.
125  *        Sequential scan and any available indices are considered.
126  *        Each useful path is attached to its relation's 'pathlist' field.
127  */
128 static void
129 set_base_rel_pathlists(PlannerInfo *root)
130 {
131         Index           rti;
132
133         for (rti = 1; rti < root->simple_rel_array_size; rti++)
134         {
135                 RelOptInfo *rel = root->simple_rel_array[rti];
136
137                 /* there may be empty slots corresponding to non-baserel RTEs */
138                 if (rel == NULL)
139                         continue;
140
141                 Assert(rel->relid == rti);              /* sanity check on array */
142
143                 /* ignore RTEs that are "other rels" */
144                 if (rel->reloptkind != RELOPT_BASEREL)
145                         continue;
146
147                 set_rel_pathlist(root, rel, rti);
148         }
149 }
150
151 /*
152  * set_rel_pathlist
153  *        Build access paths for a base relation
154  */
155 static void
156 set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, Index rti)
157 {
158         RangeTblEntry *rte = rt_fetch(rti, root->parse->rtable);
159
160         if (rte->inh)
161         {
162                 /* It's an "append relation", process accordingly */
163                 set_append_rel_pathlist(root, rel, rti, rte);
164         }
165         else if (rel->rtekind == RTE_SUBQUERY)
166         {
167                 /* Subquery --- generate a separate plan for it */
168                 set_subquery_pathlist(root, rel, rti, rte);
169         }
170         else if (rel->rtekind == RTE_FUNCTION)
171         {
172                 /* RangeFunction --- generate a separate plan for it */
173                 set_function_pathlist(root, rel, rte);
174         }
175         else if (rel->rtekind == RTE_VALUES)
176         {
177                 /* Values list --- generate a separate plan for it */
178                 set_values_pathlist(root, rel, rte);
179         }
180         else
181         {
182                 /* Plain relation */
183                 Assert(rel->rtekind == RTE_RELATION);
184                 set_plain_rel_pathlist(root, rel, rte);
185         }
186
187 #ifdef OPTIMIZER_DEBUG
188         debug_print_rel(root, rel);
189 #endif
190 }
191
192 /*
193  * set_plain_rel_pathlist
194  *        Build access paths for a plain relation (no subquery, no inheritance)
195  */
196 static void
197 set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
198 {
199         /* Mark rel with estimated output rows, width, etc */
200         set_baserel_size_estimates(root, rel);
201
202         /* Test any partial indexes of rel for applicability */
203         check_partial_indexes(root, rel);
204
205         /*
206          * Check to see if we can extract any restriction conditions from join
207          * quals that are OR-of-AND structures.  If so, add them to the rel's
208          * restriction list, and recompute the size estimates.
209          */
210         if (create_or_index_quals(root, rel))
211                 set_baserel_size_estimates(root, rel);
212
213         /*
214          * If we can prove we don't need to scan the rel via constraint exclusion,
215          * set up a single dummy path for it.  (Rather than inventing a special
216          * "dummy" path type, we represent this as an AppendPath with no members.)
217          */
218         if (relation_excluded_by_constraints(rel, rte))
219         {
220                 /* Reset output-rows estimate to 0 */
221                 rel->rows = 0;
222
223                 add_path(rel, (Path *) create_append_path(rel, NIL));
224
225                 /* Select cheapest path (pretty easy in this case...) */
226                 set_cheapest(rel);
227
228                 return;
229         }
230
231         /*
232          * Generate paths and add them to the rel's pathlist.
233          *
234          * Note: add_path() will discard any paths that are dominated by another
235          * available path, keeping only those paths that are superior along at
236          * least one dimension of cost or sortedness.
237          */
238
239         /* Consider sequential scan */
240         add_path(rel, create_seqscan_path(root, rel));
241
242         /* Consider index scans */
243         create_index_paths(root, rel);
244
245         /* Consider TID scans */
246         create_tidscan_paths(root, rel);
247
248         /* Now find the cheapest of the paths for this rel */
249         set_cheapest(rel);
250 }
251
252 /*
253  * set_append_rel_pathlist
254  *        Build access paths for an "append relation"
255  *
256  * The passed-in rel and RTE represent the entire append relation.      The
257  * relation's contents are computed by appending together the output of
258  * the individual member relations.  Note that in the inheritance case,
259  * the first member relation is actually the same table as is mentioned in
260  * the parent RTE ... but it has a different RTE and RelOptInfo.  This is
261  * a good thing because their outputs are not the same size.
262  */
263 static void
264 set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
265                                                 Index rti, RangeTblEntry *rte)
266 {
267         int                     parentRTindex = rti;
268         List       *subpaths = NIL;
269         ListCell   *l;
270
271         /*
272          * XXX for now, can't handle inherited expansion of FOR UPDATE/SHARE; can
273          * we do better?  (This will take some redesign because the executor
274          * currently supposes that every rowMark relation is involved in every row
275          * returned by the query.)
276          */
277         if (get_rowmark(root->parse, parentRTindex))
278                 ereport(ERROR,
279                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
280                                  errmsg("SELECT FOR UPDATE/SHARE is not supported for inheritance queries")));
281
282         /*
283          * Initialize to compute size estimates for whole append relation
284          */
285         rel->rows = 0;
286         rel->width = 0;
287
288         /*
289          * Generate access paths for each member relation, and pick the cheapest
290          * path for each one.
291          */
292         foreach(l, root->append_rel_list)
293         {
294                 AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l);
295                 int                     childRTindex;
296                 RelOptInfo *childrel;
297                 Path       *childpath;
298                 ListCell   *parentvars;
299                 ListCell   *childvars;
300
301                 /* append_rel_list contains all append rels; ignore others */
302                 if (appinfo->parent_relid != parentRTindex)
303                         continue;
304
305                 childRTindex = appinfo->child_relid;
306
307                 /*
308                  * The child rel's RelOptInfo was already created during
309                  * add_base_rels_to_query.
310                  */
311                 childrel = find_base_rel(root, childRTindex);
312                 Assert(childrel->reloptkind == RELOPT_OTHER_MEMBER_REL);
313
314                 /*
315                  * Copy the parent's targetlist and quals to the child, with
316                  * appropriate substitution of variables.
317                  */
318                 childrel->reltargetlist = (List *)
319                         adjust_appendrel_attrs((Node *) rel->reltargetlist,
320                                                                    appinfo);
321                 childrel->baserestrictinfo = (List *)
322                         adjust_appendrel_attrs((Node *) rel->baserestrictinfo,
323                                                                    appinfo);
324                 childrel->joininfo = (List *)
325                         adjust_appendrel_attrs((Node *) rel->joininfo,
326                                                                    appinfo);
327
328                 /*
329                  * We have to make child entries in the EquivalenceClass data
330                  * structures as well.
331                  */
332                 if (rel->has_eclass_joins)
333                 {
334                         add_child_rel_equivalences(root, appinfo, rel, childrel);
335                         childrel->has_eclass_joins = true;
336                 }
337
338                 /*
339                  * Copy the parent's attr_needed data as well, with appropriate
340                  * adjustment of relids and attribute numbers.
341                  */
342                 pfree(childrel->attr_needed);
343                 childrel->attr_needed =
344                         adjust_appendrel_attr_needed(rel, appinfo,
345                                                                                  childrel->min_attr,
346                                                                                  childrel->max_attr);
347
348                 /*
349                  * Compute the child's access paths, and add the cheapest one to the
350                  * Append path we are constructing for the parent.
351                  *
352                  * It's possible that the child is itself an appendrel, in which case
353                  * we can "cut out the middleman" and just add its child paths to our
354                  * own list.  (We don't try to do this earlier because we need to
355                  * apply both levels of transformation to the quals.) This test also
356                  * handles the case where the child rel need not be scanned because of
357                  * constraint exclusion: it'll have an Append path with no subpaths,
358                  * and will vanish from our list.
359                  */
360                 set_rel_pathlist(root, childrel, childRTindex);
361
362                 childpath = childrel->cheapest_total_path;
363                 if (IsA(childpath, AppendPath))
364                         subpaths = list_concat(subpaths,
365                                                                    ((AppendPath *) childpath)->subpaths);
366                 else
367                         subpaths = lappend(subpaths, childpath);
368
369                 /*
370                  * Propagate size information from the child back to the parent. For
371                  * simplicity, we use the largest widths from any child as the parent
372                  * estimates.
373                  */
374                 rel->rows += childrel->rows;
375                 if (childrel->width > rel->width)
376                         rel->width = childrel->width;
377
378                 forboth(parentvars, rel->reltargetlist,
379                                 childvars, childrel->reltargetlist)
380                 {
381                         Var                *parentvar = (Var *) lfirst(parentvars);
382                         Var                *childvar = (Var *) lfirst(childvars);
383
384                         if (IsA(parentvar, Var) &&
385                                 IsA(childvar, Var))
386                         {
387                                 int                     pndx = parentvar->varattno - rel->min_attr;
388                                 int                     cndx = childvar->varattno - childrel->min_attr;
389
390                                 if (childrel->attr_widths[cndx] > rel->attr_widths[pndx])
391                                         rel->attr_widths[pndx] = childrel->attr_widths[cndx];
392                         }
393                 }
394         }
395
396         /*
397          * Set "raw tuples" count equal to "rows" for the appendrel; needed
398          * because some places assume rel->tuples is valid for any baserel.
399          */
400         rel->tuples = rel->rows;
401
402         /*
403          * Finally, build Append path and install it as the only access path for
404          * the parent rel.      (Note: this is correct even if we have zero or one
405          * live subpath due to constraint exclusion.)
406          */
407         add_path(rel, (Path *) create_append_path(rel, subpaths));
408
409         /* Select cheapest path (pretty easy in this case...) */
410         set_cheapest(rel);
411 }
412
413 /* quick-and-dirty test to see if any joining is needed */
414 static bool
415 has_multiple_baserels(PlannerInfo *root)
416 {
417         int                     num_base_rels = 0;
418         Index           rti;
419
420         for (rti = 1; rti < root->simple_rel_array_size; rti++)
421         {
422                 RelOptInfo *brel = root->simple_rel_array[rti];
423
424                 if (brel == NULL)
425                         continue;
426
427                 /* ignore RTEs that are "other rels" */
428                 if (brel->reloptkind == RELOPT_BASEREL)
429                         if (++num_base_rels > 1)
430                                 return true;
431         }
432         return false;
433 }
434
435 /*
436  * set_subquery_pathlist
437  *              Build the (single) access path for a subquery RTE
438  */
439 static void
440 set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
441                                           Index rti, RangeTblEntry *rte)
442 {
443         Query      *parse = root->parse;
444         Query      *subquery = rte->subquery;
445         bool       *differentTypes;
446         double          tuple_fraction;
447         List       *pathkeys;
448         List       *subquery_pathkeys;
449
450         /* We need a workspace for keeping track of set-op type coercions */
451         differentTypes = (bool *)
452                 palloc0((list_length(subquery->targetList) + 1) * sizeof(bool));
453
454         /*
455          * If there are any restriction clauses that have been attached to the
456          * subquery relation, consider pushing them down to become WHERE or HAVING
457          * quals of the subquery itself.  This transformation is useful because it
458          * may allow us to generate a better plan for the subquery than evaluating
459          * all the subquery output rows and then filtering them.
460          *
461          * There are several cases where we cannot push down clauses. Restrictions
462          * involving the subquery are checked by subquery_is_pushdown_safe().
463          * Restrictions on individual clauses are checked by
464          * qual_is_pushdown_safe().  Also, we don't want to push down
465          * pseudoconstant clauses; better to have the gating node above the
466          * subquery.
467          *
468          * Non-pushed-down clauses will get evaluated as qpquals of the
469          * SubqueryScan node.
470          *
471          * XXX Are there any cases where we want to make a policy decision not to
472          * push down a pushable qual, because it'd result in a worse plan?
473          */
474         if (rel->baserestrictinfo != NIL &&
475                 subquery_is_pushdown_safe(subquery, subquery, differentTypes))
476         {
477                 /* OK to consider pushing down individual quals */
478                 List       *upperrestrictlist = NIL;
479                 ListCell   *l;
480
481                 foreach(l, rel->baserestrictinfo)
482                 {
483                         RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
484                         Node       *clause = (Node *) rinfo->clause;
485
486                         if (!rinfo->pseudoconstant &&
487                                 qual_is_pushdown_safe(subquery, rti, clause, differentTypes))
488                         {
489                                 /* Push it down */
490                                 subquery_push_qual(subquery, rte, rti, clause);
491                         }
492                         else
493                         {
494                                 /* Keep it in the upper query */
495                                 upperrestrictlist = lappend(upperrestrictlist, rinfo);
496                         }
497                 }
498                 rel->baserestrictinfo = upperrestrictlist;
499         }
500
501         pfree(differentTypes);
502
503         /*
504          * We can safely pass the outer tuple_fraction down to the subquery if the
505          * outer level has no joining, aggregation, or sorting to do. Otherwise
506          * we'd better tell the subquery to plan for full retrieval. (XXX This
507          * could probably be made more intelligent ...)
508          */
509         if (parse->hasAggs ||
510                 parse->groupClause ||
511                 parse->havingQual ||
512                 parse->distinctClause ||
513                 parse->sortClause ||
514                 has_multiple_baserels(root))
515                 tuple_fraction = 0.0;   /* default case */
516         else
517                 tuple_fraction = root->tuple_fraction;
518
519         /* Generate the plan for the subquery */
520         rel->subplan = subquery_planner(root->glob, subquery,
521                                                                         root->query_level + 1,
522                                                                         tuple_fraction,
523                                                                         &subquery_pathkeys);
524
525         /* Copy number of output rows from subplan */
526         rel->tuples = rel->subplan->plan_rows;
527
528         /* Mark rel with estimated output rows, width, etc */
529         set_baserel_size_estimates(root, rel);
530
531         /* Convert subquery pathkeys to outer representation */
532         pathkeys = convert_subquery_pathkeys(root, rel, subquery_pathkeys);
533
534         /* Generate appropriate path */
535         add_path(rel, create_subqueryscan_path(rel, pathkeys));
536
537         /* Select cheapest path (pretty easy in this case...) */
538         set_cheapest(rel);
539 }
540
541 /*
542  * set_function_pathlist
543  *              Build the (single) access path for a function RTE
544  */
545 static void
546 set_function_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
547 {
548         /* Mark rel with estimated output rows, width, etc */
549         set_function_size_estimates(root, rel);
550
551         /* Generate appropriate path */
552         add_path(rel, create_functionscan_path(root, rel));
553
554         /* Select cheapest path (pretty easy in this case...) */
555         set_cheapest(rel);
556 }
557
558 /*
559  * set_values_pathlist
560  *              Build the (single) access path for a VALUES RTE
561  */
562 static void
563 set_values_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
564 {
565         /* Mark rel with estimated output rows, width, etc */
566         set_values_size_estimates(root, rel);
567
568         /* Generate appropriate path */
569         add_path(rel, create_valuesscan_path(root, rel));
570
571         /* Select cheapest path (pretty easy in this case...) */
572         set_cheapest(rel);
573 }
574
575 /*
576  * make_rel_from_joinlist
577  *        Build access paths using a "joinlist" to guide the join path search.
578  *
579  * See comments for deconstruct_jointree() for definition of the joinlist
580  * data structure.
581  */
582 static RelOptInfo *
583 make_rel_from_joinlist(PlannerInfo *root, List *joinlist)
584 {
585         int                     levels_needed;
586         List       *initial_rels;
587         ListCell   *jl;
588
589         /*
590          * Count the number of child joinlist nodes.  This is the depth of the
591          * dynamic-programming algorithm we must employ to consider all ways of
592          * joining the child nodes.
593          */
594         levels_needed = list_length(joinlist);
595
596         if (levels_needed <= 0)
597                 return NULL;                    /* nothing to do? */
598
599         /*
600          * Construct a list of rels corresponding to the child joinlist nodes.
601          * This may contain both base rels and rels constructed according to
602          * sub-joinlists.
603          */
604         initial_rels = NIL;
605         foreach(jl, joinlist)
606         {
607                 Node       *jlnode = (Node *) lfirst(jl);
608                 RelOptInfo *thisrel;
609
610                 if (IsA(jlnode, RangeTblRef))
611                 {
612                         int                     varno = ((RangeTblRef *) jlnode)->rtindex;
613
614                         thisrel = find_base_rel(root, varno);
615                 }
616                 else if (IsA(jlnode, List))
617                 {
618                         /* Recurse to handle subproblem */
619                         thisrel = make_rel_from_joinlist(root, (List *) jlnode);
620                 }
621                 else
622                 {
623                         elog(ERROR, "unrecognized joinlist node type: %d",
624                                  (int) nodeTag(jlnode));
625                         thisrel = NULL;         /* keep compiler quiet */
626                 }
627
628                 initial_rels = lappend(initial_rels, thisrel);
629         }
630
631         if (levels_needed == 1)
632         {
633                 /*
634                  * Single joinlist node, so we're done.
635                  */
636                 return (RelOptInfo *) linitial(initial_rels);
637         }
638         else
639         {
640                 /*
641                  * Consider the different orders in which we could join the rels,
642                  * using either GEQO or regular optimizer.
643                  */
644                 if (enable_geqo && levels_needed >= geqo_threshold)
645                         return geqo(root, levels_needed, initial_rels);
646                 else
647                         return make_one_rel_by_joins(root, levels_needed, initial_rels);
648         }
649 }
650
651 /*
652  * make_one_rel_by_joins
653  *        Find all possible joinpaths for a query by successively finding ways
654  *        to join component relations into join relations.
655  *
656  * 'levels_needed' is the number of iterations needed, ie, the number of
657  *              independent jointree items in the query.  This is > 1.
658  *
659  * 'initial_rels' is a list of RelOptInfo nodes for each independent
660  *              jointree item.  These are the components to be joined together.
661  *
662  * Returns the final level of join relations, i.e., the relation that is
663  * the result of joining all the original relations together.
664  */
665 static RelOptInfo *
666 make_one_rel_by_joins(PlannerInfo *root, int levels_needed, List *initial_rels)
667 {
668         List      **joinitems;
669         int                     lev;
670         RelOptInfo *rel;
671
672         /*
673          * We employ a simple "dynamic programming" algorithm: we first find all
674          * ways to build joins of two jointree items, then all ways to build joins
675          * of three items (from two-item joins and single items), then four-item
676          * joins, and so on until we have considered all ways to join all the
677          * items into one rel.
678          *
679          * joinitems[j] is a list of all the j-item rels.  Initially we set
680          * joinitems[1] to represent all the single-jointree-item relations.
681          */
682         joinitems = (List **) palloc0((levels_needed + 1) * sizeof(List *));
683
684         joinitems[1] = initial_rels;
685
686         for (lev = 2; lev <= levels_needed; lev++)
687         {
688                 ListCell   *x;
689
690                 /*
691                  * Determine all possible pairs of relations to be joined at this
692                  * level, and build paths for making each one from every available
693                  * pair of lower-level relations.
694                  */
695                 joinitems[lev] = make_rels_by_joins(root, lev, joinitems);
696
697                 /*
698                  * Do cleanup work on each just-processed rel.
699                  */
700                 foreach(x, joinitems[lev])
701                 {
702                         rel = (RelOptInfo *) lfirst(x);
703
704                         /* Find and save the cheapest paths for this rel */
705                         set_cheapest(rel);
706
707 #ifdef OPTIMIZER_DEBUG
708                         debug_print_rel(root, rel);
709 #endif
710                 }
711         }
712
713         /*
714          * We should have a single rel at the final level.
715          */
716         if (joinitems[levels_needed] == NIL)
717                 elog(ERROR, "failed to build any %d-way joins", levels_needed);
718         Assert(list_length(joinitems[levels_needed]) == 1);
719
720         rel = (RelOptInfo *) linitial(joinitems[levels_needed]);
721
722         return rel;
723 }
724
725 /*****************************************************************************
726  *                      PUSHING QUALS DOWN INTO SUBQUERIES
727  *****************************************************************************/
728
729 /*
730  * subquery_is_pushdown_safe - is a subquery safe for pushing down quals?
731  *
732  * subquery is the particular component query being checked.  topquery
733  * is the top component of a set-operations tree (the same Query if no
734  * set-op is involved).
735  *
736  * Conditions checked here:
737  *
738  * 1. If the subquery has a LIMIT clause, we must not push down any quals,
739  * since that could change the set of rows returned.
740  *
741  * 2. If the subquery contains EXCEPT or EXCEPT ALL set ops we cannot push
742  * quals into it, because that would change the results.
743  *
744  * 3. For subqueries using UNION/UNION ALL/INTERSECT/INTERSECT ALL, we can
745  * push quals into each component query, but the quals can only reference
746  * subquery columns that suffer no type coercions in the set operation.
747  * Otherwise there are possible semantic gotchas.  So, we check the
748  * component queries to see if any of them have different output types;
749  * differentTypes[k] is set true if column k has different type in any
750  * component.
751  */
752 static bool
753 subquery_is_pushdown_safe(Query *subquery, Query *topquery,
754                                                   bool *differentTypes)
755 {
756         SetOperationStmt *topop;
757
758         /* Check point 1 */
759         if (subquery->limitOffset != NULL || subquery->limitCount != NULL)
760                 return false;
761
762         /* Are we at top level, or looking at a setop component? */
763         if (subquery == topquery)
764         {
765                 /* Top level, so check any component queries */
766                 if (subquery->setOperations != NULL)
767                         if (!recurse_pushdown_safe(subquery->setOperations, topquery,
768                                                                            differentTypes))
769                                 return false;
770         }
771         else
772         {
773                 /* Setop component must not have more components (too weird) */
774                 if (subquery->setOperations != NULL)
775                         return false;
776                 /* Check whether setop component output types match top level */
777                 topop = (SetOperationStmt *) topquery->setOperations;
778                 Assert(topop && IsA(topop, SetOperationStmt));
779                 compare_tlist_datatypes(subquery->targetList,
780                                                                 topop->colTypes,
781                                                                 differentTypes);
782         }
783         return true;
784 }
785
786 /*
787  * Helper routine to recurse through setOperations tree
788  */
789 static bool
790 recurse_pushdown_safe(Node *setOp, Query *topquery,
791                                           bool *differentTypes)
792 {
793         if (IsA(setOp, RangeTblRef))
794         {
795                 RangeTblRef *rtr = (RangeTblRef *) setOp;
796                 RangeTblEntry *rte = rt_fetch(rtr->rtindex, topquery->rtable);
797                 Query      *subquery = rte->subquery;
798
799                 Assert(subquery != NULL);
800                 return subquery_is_pushdown_safe(subquery, topquery, differentTypes);
801         }
802         else if (IsA(setOp, SetOperationStmt))
803         {
804                 SetOperationStmt *op = (SetOperationStmt *) setOp;
805
806                 /* EXCEPT is no good */
807                 if (op->op == SETOP_EXCEPT)
808                         return false;
809                 /* Else recurse */
810                 if (!recurse_pushdown_safe(op->larg, topquery, differentTypes))
811                         return false;
812                 if (!recurse_pushdown_safe(op->rarg, topquery, differentTypes))
813                         return false;
814         }
815         else
816         {
817                 elog(ERROR, "unrecognized node type: %d",
818                          (int) nodeTag(setOp));
819         }
820         return true;
821 }
822
823 /*
824  * Compare tlist's datatypes against the list of set-operation result types.
825  * For any items that are different, mark the appropriate element of
826  * differentTypes[] to show that this column will have type conversions.
827  *
828  * We don't have to care about typmods here: the only allowed difference
829  * between set-op input and output typmods is input is a specific typmod
830  * and output is -1, and that does not require a coercion.
831  */
832 static void
833 compare_tlist_datatypes(List *tlist, List *colTypes,
834                                                 bool *differentTypes)
835 {
836         ListCell   *l;
837         ListCell   *colType = list_head(colTypes);
838
839         foreach(l, tlist)
840         {
841                 TargetEntry *tle = (TargetEntry *) lfirst(l);
842
843                 if (tle->resjunk)
844                         continue;                       /* ignore resjunk columns */
845                 if (colType == NULL)
846                         elog(ERROR, "wrong number of tlist entries");
847                 if (exprType((Node *) tle->expr) != lfirst_oid(colType))
848                         differentTypes[tle->resno] = true;
849                 colType = lnext(colType);
850         }
851         if (colType != NULL)
852                 elog(ERROR, "wrong number of tlist entries");
853 }
854
855 /*
856  * qual_is_pushdown_safe - is a particular qual safe to push down?
857  *
858  * qual is a restriction clause applying to the given subquery (whose RTE
859  * has index rti in the parent query).
860  *
861  * Conditions checked here:
862  *
863  * 1. The qual must not contain any subselects (mainly because I'm not sure
864  * it will work correctly: sublinks will already have been transformed into
865  * subplans in the qual, but not in the subquery).
866  *
867  * 2. The qual must not refer to the whole-row output of the subquery
868  * (since there is no easy way to name that within the subquery itself).
869  *
870  * 3. The qual must not refer to any subquery output columns that were
871  * found to have inconsistent types across a set operation tree by
872  * subquery_is_pushdown_safe().
873  *
874  * 4. If the subquery uses DISTINCT ON, we must not push down any quals that
875  * refer to non-DISTINCT output columns, because that could change the set
876  * of rows returned.  This condition is vacuous for DISTINCT, because then
877  * there are no non-DISTINCT output columns, but unfortunately it's fairly
878  * expensive to tell the difference between DISTINCT and DISTINCT ON in the
879  * parsetree representation.  It's cheaper to just make sure all the Vars
880  * in the qual refer to DISTINCT columns.
881  *
882  * 5. We must not push down any quals that refer to subselect outputs that
883  * return sets, else we'd introduce functions-returning-sets into the
884  * subquery's WHERE/HAVING quals.
885  *
886  * 6. We must not push down any quals that refer to subselect outputs that
887  * contain volatile functions, for fear of introducing strange results due
888  * to multiple evaluation of a volatile function.
889  */
890 static bool
891 qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
892                                           bool *differentTypes)
893 {
894         bool            safe = true;
895         List       *vars;
896         ListCell   *vl;
897         Bitmapset  *tested = NULL;
898
899         /* Refuse subselects (point 1) */
900         if (contain_subplans(qual))
901                 return false;
902
903         /*
904          * Examine all Vars used in clause; since it's a restriction clause, all
905          * such Vars must refer to subselect output columns.
906          */
907         vars = pull_var_clause(qual, false);
908         foreach(vl, vars)
909         {
910                 Var                *var = (Var *) lfirst(vl);
911                 TargetEntry *tle;
912
913                 Assert(var->varno == rti);
914
915                 /* Check point 2 */
916                 if (var->varattno == 0)
917                 {
918                         safe = false;
919                         break;
920                 }
921
922                 /*
923                  * We use a bitmapset to avoid testing the same attno more than once.
924                  * (NB: this only works because subquery outputs can't have negative
925                  * attnos.)
926                  */
927                 if (bms_is_member(var->varattno, tested))
928                         continue;
929                 tested = bms_add_member(tested, var->varattno);
930
931                 /* Check point 3 */
932                 if (differentTypes[var->varattno])
933                 {
934                         safe = false;
935                         break;
936                 }
937
938                 /* Must find the tlist element referenced by the Var */
939                 tle = get_tle_by_resno(subquery->targetList, var->varattno);
940                 Assert(tle != NULL);
941                 Assert(!tle->resjunk);
942
943                 /* If subquery uses DISTINCT or DISTINCT ON, check point 4 */
944                 if (subquery->distinctClause != NIL &&
945                         !targetIsInSortList(tle, InvalidOid, subquery->distinctClause))
946                 {
947                         /* non-DISTINCT column, so fail */
948                         safe = false;
949                         break;
950                 }
951
952                 /* Refuse functions returning sets (point 5) */
953                 if (expression_returns_set((Node *) tle->expr))
954                 {
955                         safe = false;
956                         break;
957                 }
958
959                 /* Refuse volatile functions (point 6) */
960                 if (contain_volatile_functions((Node *) tle->expr))
961                 {
962                         safe = false;
963                         break;
964                 }
965         }
966
967         list_free(vars);
968         bms_free(tested);
969
970         return safe;
971 }
972
973 /*
974  * subquery_push_qual - push down a qual that we have determined is safe
975  */
976 static void
977 subquery_push_qual(Query *subquery, RangeTblEntry *rte, Index rti, Node *qual)
978 {
979         if (subquery->setOperations != NULL)
980         {
981                 /* Recurse to push it separately to each component query */
982                 recurse_push_qual(subquery->setOperations, subquery,
983                                                   rte, rti, qual);
984         }
985         else
986         {
987                 /*
988                  * We need to replace Vars in the qual (which must refer to outputs of
989                  * the subquery) with copies of the subquery's targetlist expressions.
990                  * Note that at this point, any uplevel Vars in the qual should have
991                  * been replaced with Params, so they need no work.
992                  *
993                  * This step also ensures that when we are pushing into a setop tree,
994                  * each component query gets its own copy of the qual.
995                  */
996                 qual = ResolveNew(qual, rti, 0, rte,
997                                                   subquery->targetList,
998                                                   CMD_SELECT, 0);
999
1000                 /*
1001                  * Now attach the qual to the proper place: normally WHERE, but if the
1002                  * subquery uses grouping or aggregation, put it in HAVING (since the
1003                  * qual really refers to the group-result rows).
1004                  */
1005                 if (subquery->hasAggs || subquery->groupClause || subquery->havingQual)
1006                         subquery->havingQual = make_and_qual(subquery->havingQual, qual);
1007                 else
1008                         subquery->jointree->quals =
1009                                 make_and_qual(subquery->jointree->quals, qual);
1010
1011                 /*
1012                  * We need not change the subquery's hasAggs or hasSublinks flags,
1013                  * since we can't be pushing down any aggregates that weren't there
1014                  * before, and we don't push down subselects at all.
1015                  */
1016         }
1017 }
1018
1019 /*
1020  * Helper routine to recurse through setOperations tree
1021  */
1022 static void
1023 recurse_push_qual(Node *setOp, Query *topquery,
1024                                   RangeTblEntry *rte, Index rti, Node *qual)
1025 {
1026         if (IsA(setOp, RangeTblRef))
1027         {
1028                 RangeTblRef *rtr = (RangeTblRef *) setOp;
1029                 RangeTblEntry *subrte = rt_fetch(rtr->rtindex, topquery->rtable);
1030                 Query      *subquery = subrte->subquery;
1031
1032                 Assert(subquery != NULL);
1033                 subquery_push_qual(subquery, rte, rti, qual);
1034         }
1035         else if (IsA(setOp, SetOperationStmt))
1036         {
1037                 SetOperationStmt *op = (SetOperationStmt *) setOp;
1038
1039                 recurse_push_qual(op->larg, topquery, rte, rti, qual);
1040                 recurse_push_qual(op->rarg, topquery, rte, rti, qual);
1041         }
1042         else
1043         {
1044                 elog(ERROR, "unrecognized node type: %d",
1045                          (int) nodeTag(setOp));
1046         }
1047 }
1048
1049 /*****************************************************************************
1050  *                      DEBUG SUPPORT
1051  *****************************************************************************/
1052
1053 #ifdef OPTIMIZER_DEBUG
1054
1055 static void
1056 print_relids(Relids relids)
1057 {
1058         Relids          tmprelids;
1059         int                     x;
1060         bool            first = true;
1061
1062         tmprelids = bms_copy(relids);
1063         while ((x = bms_first_member(tmprelids)) >= 0)
1064         {
1065                 if (!first)
1066                         printf(" ");
1067                 printf("%d", x);
1068                 first = false;
1069         }
1070         bms_free(tmprelids);
1071 }
1072
1073 static void
1074 print_restrictclauses(PlannerInfo *root, List *clauses)
1075 {
1076         ListCell   *l;
1077
1078         foreach(l, clauses)
1079         {
1080                 RestrictInfo *c = lfirst(l);
1081
1082                 print_expr((Node *) c->clause, root->parse->rtable);
1083                 if (lnext(l))
1084                         printf(", ");
1085         }
1086 }
1087
1088 static void
1089 print_path(PlannerInfo *root, Path *path, int indent)
1090 {
1091         const char *ptype;
1092         bool            join = false;
1093         Path       *subpath = NULL;
1094         int                     i;
1095
1096         switch (nodeTag(path))
1097         {
1098                 case T_Path:
1099                         ptype = "SeqScan";
1100                         break;
1101                 case T_IndexPath:
1102                         ptype = "IdxScan";
1103                         break;
1104                 case T_BitmapHeapPath:
1105                         ptype = "BitmapHeapScan";
1106                         break;
1107                 case T_BitmapAndPath:
1108                         ptype = "BitmapAndPath";
1109                         break;
1110                 case T_BitmapOrPath:
1111                         ptype = "BitmapOrPath";
1112                         break;
1113                 case T_TidPath:
1114                         ptype = "TidScan";
1115                         break;
1116                 case T_AppendPath:
1117                         ptype = "Append";
1118                         break;
1119                 case T_ResultPath:
1120                         ptype = "Result";
1121                         break;
1122                 case T_MaterialPath:
1123                         ptype = "Material";
1124                         subpath = ((MaterialPath *) path)->subpath;
1125                         break;
1126                 case T_UniquePath:
1127                         ptype = "Unique";
1128                         subpath = ((UniquePath *) path)->subpath;
1129                         break;
1130                 case T_NestPath:
1131                         ptype = "NestLoop";
1132                         join = true;
1133                         break;
1134                 case T_MergePath:
1135                         ptype = "MergeJoin";
1136                         join = true;
1137                         break;
1138                 case T_HashPath:
1139                         ptype = "HashJoin";
1140                         join = true;
1141                         break;
1142                 default:
1143                         ptype = "???Path";
1144                         break;
1145         }
1146
1147         for (i = 0; i < indent; i++)
1148                 printf("\t");
1149         printf("%s", ptype);
1150
1151         if (path->parent)
1152         {
1153                 printf("(");
1154                 print_relids(path->parent->relids);
1155                 printf(") rows=%.0f", path->parent->rows);
1156         }
1157         printf(" cost=%.2f..%.2f\n", path->startup_cost, path->total_cost);
1158
1159         if (path->pathkeys)
1160         {
1161                 for (i = 0; i < indent; i++)
1162                         printf("\t");
1163                 printf("  pathkeys: ");
1164                 print_pathkeys(path->pathkeys, root->parse->rtable);
1165         }
1166
1167         if (join)
1168         {
1169                 JoinPath   *jp = (JoinPath *) path;
1170
1171                 for (i = 0; i < indent; i++)
1172                         printf("\t");
1173                 printf("  clauses: ");
1174                 print_restrictclauses(root, jp->joinrestrictinfo);
1175                 printf("\n");
1176
1177                 if (IsA(path, MergePath))
1178                 {
1179                         MergePath  *mp = (MergePath *) path;
1180
1181                         if (mp->outersortkeys || mp->innersortkeys)
1182                         {
1183                                 for (i = 0; i < indent; i++)
1184                                         printf("\t");
1185                                 printf("  sortouter=%d sortinner=%d\n",
1186                                            ((mp->outersortkeys) ? 1 : 0),
1187                                            ((mp->innersortkeys) ? 1 : 0));
1188                         }
1189                 }
1190
1191                 print_path(root, jp->outerjoinpath, indent + 1);
1192                 print_path(root, jp->innerjoinpath, indent + 1);
1193         }
1194
1195         if (subpath)
1196                 print_path(root, subpath, indent + 1);
1197 }
1198
1199 void
1200 debug_print_rel(PlannerInfo *root, RelOptInfo *rel)
1201 {
1202         ListCell   *l;
1203
1204         printf("RELOPTINFO (");
1205         print_relids(rel->relids);
1206         printf("): rows=%.0f width=%d\n", rel->rows, rel->width);
1207
1208         if (rel->baserestrictinfo)
1209         {
1210                 printf("\tbaserestrictinfo: ");
1211                 print_restrictclauses(root, rel->baserestrictinfo);
1212                 printf("\n");
1213         }
1214
1215         if (rel->joininfo)
1216         {
1217                 printf("\tjoininfo: ");
1218                 print_restrictclauses(root, rel->joininfo);
1219                 printf("\n");
1220         }
1221
1222         printf("\tpath list:\n");
1223         foreach(l, rel->pathlist)
1224                 print_path(root, lfirst(l), 1);
1225         printf("\n\tcheapest startup path:\n");
1226         print_path(root, rel->cheapest_startup_path, 1);
1227         printf("\n\tcheapest total path:\n");
1228         print_path(root, rel->cheapest_total_path, 1);
1229         printf("\n");
1230         fflush(stdout);
1231 }
1232
1233 #endif   /* OPTIMIZER_DEBUG */