]> granicus.if.org Git - postgresql/blob - src/backend/optimizer/path/allpaths.c
Repair two constraint-exclusion corner cases triggered by proving that an
[postgresql] / src / backend / optimizer / path / allpaths.c
1 /*-------------------------------------------------------------------------
2  *
3  * allpaths.c
4  *        Routines to find possible search paths for processing a query
5  *
6  * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *        $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.164 2007/05/26 18:23:01 tgl Exp $
12  *
13  *-------------------------------------------------------------------------
14  */
15
16 #include "postgres.h"
17
18 #ifdef OPTIMIZER_DEBUG
19 #include "nodes/print.h"
20 #endif
21 #include "optimizer/clauses.h"
22 #include "optimizer/cost.h"
23 #include "optimizer/geqo.h"
24 #include "optimizer/pathnode.h"
25 #include "optimizer/paths.h"
26 #include "optimizer/plancat.h"
27 #include "optimizer/planner.h"
28 #include "optimizer/prep.h"
29 #include "optimizer/var.h"
30 #include "parser/parse_clause.h"
31 #include "parser/parse_expr.h"
32 #include "parser/parsetree.h"
33 #include "rewrite/rewriteManip.h"
34
35
36 /* These parameters are set by GUC */
37 bool            enable_geqo = false;    /* just in case GUC doesn't set it */
38 int                     geqo_threshold;
39
40
41 static void set_base_rel_pathlists(PlannerInfo *root);
42 static void set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
43                                                          Index rti, RangeTblEntry *rte);
44 static void set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
45                                            RangeTblEntry *rte);
46 static void set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
47                                                 Index rti, RangeTblEntry *rte);
48 static void set_dummy_rel_pathlist(RelOptInfo *rel);
49 static void set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
50                                           Index rti, RangeTblEntry *rte);
51 static void set_function_pathlist(PlannerInfo *root, RelOptInfo *rel,
52                                           RangeTblEntry *rte);
53 static void set_values_pathlist(PlannerInfo *root, RelOptInfo *rel,
54                                         RangeTblEntry *rte);
55 static RelOptInfo *make_rel_from_joinlist(PlannerInfo *root, List *joinlist);
56 static RelOptInfo *make_one_rel_by_joins(PlannerInfo *root, int levels_needed,
57                                           List *initial_rels);
58 static bool subquery_is_pushdown_safe(Query *subquery, Query *topquery,
59                                                   bool *differentTypes);
60 static bool recurse_pushdown_safe(Node *setOp, Query *topquery,
61                                           bool *differentTypes);
62 static void compare_tlist_datatypes(List *tlist, List *colTypes,
63                                                 bool *differentTypes);
64 static bool qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
65                                           bool *differentTypes);
66 static void subquery_push_qual(Query *subquery,
67                                    RangeTblEntry *rte, Index rti, Node *qual);
68 static void recurse_push_qual(Node *setOp, Query *topquery,
69                                   RangeTblEntry *rte, Index rti, Node *qual);
70
71
72 /*
73  * make_one_rel
74  *        Finds all possible access paths for executing a query, returning a
75  *        single rel that represents the join of all base rels in the query.
76  */
77 RelOptInfo *
78 make_one_rel(PlannerInfo *root, List *joinlist)
79 {
80         RelOptInfo *rel;
81
82         /*
83          * Generate access paths for the base rels.
84          */
85         set_base_rel_pathlists(root);
86
87         /*
88          * Generate access paths for the entire join tree.
89          */
90         rel = make_rel_from_joinlist(root, joinlist);
91
92         /*
93          * The result should join all and only the query's base rels.
94          */
95 #ifdef USE_ASSERT_CHECKING
96         {
97                 int                     num_base_rels = 0;
98                 Index           rti;
99
100                 for (rti = 1; rti < root->simple_rel_array_size; rti++)
101                 {
102                         RelOptInfo *brel = root->simple_rel_array[rti];
103
104                         if (brel == NULL)
105                                 continue;
106
107                         Assert(brel->relid == rti); /* sanity check on array */
108
109                         /* ignore RTEs that are "other rels" */
110                         if (brel->reloptkind != RELOPT_BASEREL)
111                                 continue;
112
113                         Assert(bms_is_member(rti, rel->relids));
114                         num_base_rels++;
115                 }
116
117                 Assert(bms_num_members(rel->relids) == num_base_rels);
118         }
119 #endif
120
121         return rel;
122 }
123
124 /*
125  * set_base_rel_pathlists
126  *        Finds all paths available for scanning each base-relation entry.
127  *        Sequential scan and any available indices are considered.
128  *        Each useful path is attached to its relation's 'pathlist' field.
129  */
130 static void
131 set_base_rel_pathlists(PlannerInfo *root)
132 {
133         Index           rti;
134
135         for (rti = 1; rti < root->simple_rel_array_size; rti++)
136         {
137                 RelOptInfo *rel = root->simple_rel_array[rti];
138
139                 /* there may be empty slots corresponding to non-baserel RTEs */
140                 if (rel == NULL)
141                         continue;
142
143                 Assert(rel->relid == rti);              /* sanity check on array */
144
145                 /* ignore RTEs that are "other rels" */
146                 if (rel->reloptkind != RELOPT_BASEREL)
147                         continue;
148
149                 set_rel_pathlist(root, rel, rti, root->simple_rte_array[rti]);
150         }
151 }
152
153 /*
154  * set_rel_pathlist
155  *        Build access paths for a base relation
156  */
157 static void
158 set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
159                                  Index rti, RangeTblEntry *rte)
160 {
161         if (rte->inh)
162         {
163                 /* It's an "append relation", process accordingly */
164                 set_append_rel_pathlist(root, rel, rti, rte);
165         }
166         else if (rel->rtekind == RTE_SUBQUERY)
167         {
168                 /* Subquery --- generate a separate plan for it */
169                 set_subquery_pathlist(root, rel, rti, rte);
170         }
171         else if (rel->rtekind == RTE_FUNCTION)
172         {
173                 /* RangeFunction --- generate a separate plan for it */
174                 set_function_pathlist(root, rel, rte);
175         }
176         else if (rel->rtekind == RTE_VALUES)
177         {
178                 /* Values list --- generate a separate plan for it */
179                 set_values_pathlist(root, rel, rte);
180         }
181         else
182         {
183                 /* Plain relation */
184                 Assert(rel->rtekind == RTE_RELATION);
185                 set_plain_rel_pathlist(root, rel, rte);
186         }
187
188 #ifdef OPTIMIZER_DEBUG
189         debug_print_rel(root, rel);
190 #endif
191 }
192
193 /*
194  * set_plain_rel_pathlist
195  *        Build access paths for a plain relation (no subquery, no inheritance)
196  */
197 static void
198 set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
199 {
200         /*
201          * If we can prove we don't need to scan the rel via constraint exclusion,
202          * set up a single dummy path for it.  We only need to check for regular
203          * baserels; if it's an otherrel, CE was already checked in
204          * set_append_rel_pathlist().
205          */
206         if (rel->reloptkind == RELOPT_BASEREL &&
207                 relation_excluded_by_constraints(rel, rte))
208         {
209                 set_dummy_rel_pathlist(rel);
210                 return;
211         }
212
213         /* Mark rel with estimated output rows, width, etc */
214         set_baserel_size_estimates(root, rel);
215
216         /* Test any partial indexes of rel for applicability */
217         check_partial_indexes(root, rel);
218
219         /*
220          * Check to see if we can extract any restriction conditions from join
221          * quals that are OR-of-AND structures.  If so, add them to the rel's
222          * restriction list, and recompute the size estimates.
223          */
224         if (create_or_index_quals(root, rel))
225                 set_baserel_size_estimates(root, rel);
226
227         /*
228          * Generate paths and add them to the rel's pathlist.
229          *
230          * Note: add_path() will discard any paths that are dominated by another
231          * available path, keeping only those paths that are superior along at
232          * least one dimension of cost or sortedness.
233          */
234
235         /* Consider sequential scan */
236         add_path(rel, create_seqscan_path(root, rel));
237
238         /* Consider index scans */
239         create_index_paths(root, rel);
240
241         /* Consider TID scans */
242         create_tidscan_paths(root, rel);
243
244         /* Now find the cheapest of the paths for this rel */
245         set_cheapest(rel);
246 }
247
248 /*
249  * set_append_rel_pathlist
250  *        Build access paths for an "append relation"
251  *
252  * The passed-in rel and RTE represent the entire append relation.      The
253  * relation's contents are computed by appending together the output of
254  * the individual member relations.  Note that in the inheritance case,
255  * the first member relation is actually the same table as is mentioned in
256  * the parent RTE ... but it has a different RTE and RelOptInfo.  This is
257  * a good thing because their outputs are not the same size.
258  */
259 static void
260 set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
261                                                 Index rti, RangeTblEntry *rte)
262 {
263         int                     parentRTindex = rti;
264         List       *subpaths = NIL;
265         ListCell   *l;
266
267         /*
268          * XXX for now, can't handle inherited expansion of FOR UPDATE/SHARE; can
269          * we do better?  (This will take some redesign because the executor
270          * currently supposes that every rowMark relation is involved in every row
271          * returned by the query.)
272          */
273         if (get_rowmark(root->parse, parentRTindex))
274                 ereport(ERROR,
275                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
276                                  errmsg("SELECT FOR UPDATE/SHARE is not supported for inheritance queries")));
277
278         /*
279          * Initialize to compute size estimates for whole append relation
280          */
281         rel->rows = 0;
282         rel->width = 0;
283
284         /*
285          * Generate access paths for each member relation, and pick the cheapest
286          * path for each one.
287          */
288         foreach(l, root->append_rel_list)
289         {
290                 AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l);
291                 int                     childRTindex;
292                 RangeTblEntry *childRTE;
293                 RelOptInfo *childrel;
294                 Path       *childpath;
295                 ListCell   *parentvars;
296                 ListCell   *childvars;
297
298                 /* append_rel_list contains all append rels; ignore others */
299                 if (appinfo->parent_relid != parentRTindex)
300                         continue;
301
302                 childRTindex = appinfo->child_relid;
303                 childRTE = root->simple_rte_array[childRTindex];
304
305                 /*
306                  * The child rel's RelOptInfo was already created during
307                  * add_base_rels_to_query.
308                  */
309                 childrel = find_base_rel(root, childRTindex);
310                 Assert(childrel->reloptkind == RELOPT_OTHER_MEMBER_REL);
311
312                 /*
313                  * We have to copy the parent's targetlist and quals to the child,
314                  * with appropriate substitution of variables.  However, only the
315                  * baserestrictinfo quals are needed before we can check for
316                  * constraint exclusion; so do that first and then check to see
317                  * if we can disregard this child.
318                  */
319                 childrel->baserestrictinfo = (List *)
320                         adjust_appendrel_attrs((Node *) rel->baserestrictinfo,
321                                                                    appinfo);
322
323                 if (relation_excluded_by_constraints(childrel, childRTE))
324                 {
325                         /*
326                          * This child need not be scanned, so we can omit it from the
327                          * appendrel.  Mark it with a dummy cheapest-path though, in
328                          * case best_appendrel_indexscan() looks at it later.
329                          */
330                         set_dummy_rel_pathlist(childrel);
331                         continue;
332                 }
333
334                 /* CE failed, so finish copying targetlist and join quals */
335                 childrel->joininfo = (List *)
336                         adjust_appendrel_attrs((Node *) rel->joininfo,
337                                                                    appinfo);
338                 childrel->reltargetlist = (List *)
339                         adjust_appendrel_attrs((Node *) rel->reltargetlist,
340                                                                    appinfo);
341
342                 /*
343                  * We have to make child entries in the EquivalenceClass data
344                  * structures as well.
345                  */
346                 if (rel->has_eclass_joins)
347                 {
348                         add_child_rel_equivalences(root, appinfo, rel, childrel);
349                         childrel->has_eclass_joins = true;
350                 }
351
352                 /*
353                  * Copy the parent's attr_needed data as well, with appropriate
354                  * adjustment of relids and attribute numbers.
355                  */
356                 pfree(childrel->attr_needed);
357                 childrel->attr_needed =
358                         adjust_appendrel_attr_needed(rel, appinfo,
359                                                                                  childrel->min_attr,
360                                                                                  childrel->max_attr);
361
362                 /*
363                  * Compute the child's access paths, and add the cheapest one to the
364                  * Append path we are constructing for the parent.
365                  *
366                  * It's possible that the child is itself an appendrel, in which case
367                  * we can "cut out the middleman" and just add its child paths to our
368                  * own list.  (We don't try to do this earlier because we need to
369                  * apply both levels of transformation to the quals.)
370                  */
371                 set_rel_pathlist(root, childrel, childRTindex, childRTE);
372
373                 childpath = childrel->cheapest_total_path;
374                 if (IsA(childpath, AppendPath))
375                         subpaths = list_concat(subpaths,
376                                                                    ((AppendPath *) childpath)->subpaths);
377                 else
378                         subpaths = lappend(subpaths, childpath);
379
380                 /*
381                  * Propagate size information from the child back to the parent. For
382                  * simplicity, we use the largest widths from any child as the parent
383                  * estimates.  (If you want to change this, beware of child
384                  * attr_widths[] entries that haven't been set and are still 0.)
385                  */
386                 rel->rows += childrel->rows;
387                 if (childrel->width > rel->width)
388                         rel->width = childrel->width;
389
390                 forboth(parentvars, rel->reltargetlist,
391                                 childvars, childrel->reltargetlist)
392                 {
393                         Var                *parentvar = (Var *) lfirst(parentvars);
394                         Var                *childvar = (Var *) lfirst(childvars);
395
396                         if (IsA(parentvar, Var) &&
397                                 IsA(childvar, Var))
398                         {
399                                 int                     pndx = parentvar->varattno - rel->min_attr;
400                                 int                     cndx = childvar->varattno - childrel->min_attr;
401
402                                 if (childrel->attr_widths[cndx] > rel->attr_widths[pndx])
403                                         rel->attr_widths[pndx] = childrel->attr_widths[cndx];
404                         }
405                 }
406         }
407
408         /*
409          * Set "raw tuples" count equal to "rows" for the appendrel; needed
410          * because some places assume rel->tuples is valid for any baserel.
411          */
412         rel->tuples = rel->rows;
413
414         /*
415          * Finally, build Append path and install it as the only access path for
416          * the parent rel.      (Note: this is correct even if we have zero or one
417          * live subpath due to constraint exclusion.)
418          */
419         add_path(rel, (Path *) create_append_path(rel, subpaths));
420
421         /* Select cheapest path (pretty easy in this case...) */
422         set_cheapest(rel);
423 }
424
425 /*
426  * set_dummy_rel_pathlist
427  *        Build a dummy path for a relation that's been excluded by constraints
428  *
429  * Rather than inventing a special "dummy" path type, we represent this as an
430  * AppendPath with no members.
431  */
432 static void
433 set_dummy_rel_pathlist(RelOptInfo *rel)
434 {
435         /* Set dummy size estimates --- we leave attr_widths[] as zeroes */
436         rel->rows = 0;
437         rel->width = 0;
438
439         add_path(rel, (Path *) create_append_path(rel, NIL));
440
441         /* Select cheapest path (pretty easy in this case...) */
442         set_cheapest(rel);
443 }
444
445 /* quick-and-dirty test to see if any joining is needed */
446 static bool
447 has_multiple_baserels(PlannerInfo *root)
448 {
449         int                     num_base_rels = 0;
450         Index           rti;
451
452         for (rti = 1; rti < root->simple_rel_array_size; rti++)
453         {
454                 RelOptInfo *brel = root->simple_rel_array[rti];
455
456                 if (brel == NULL)
457                         continue;
458
459                 /* ignore RTEs that are "other rels" */
460                 if (brel->reloptkind == RELOPT_BASEREL)
461                         if (++num_base_rels > 1)
462                                 return true;
463         }
464         return false;
465 }
466
467 /*
468  * set_subquery_pathlist
469  *              Build the (single) access path for a subquery RTE
470  */
471 static void
472 set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
473                                           Index rti, RangeTblEntry *rte)
474 {
475         Query      *parse = root->parse;
476         Query      *subquery = rte->subquery;
477         bool       *differentTypes;
478         double          tuple_fraction;
479         PlannerInfo *subroot;
480         List       *pathkeys;
481
482         /* We need a workspace for keeping track of set-op type coercions */
483         differentTypes = (bool *)
484                 palloc0((list_length(subquery->targetList) + 1) * sizeof(bool));
485
486         /*
487          * If there are any restriction clauses that have been attached to the
488          * subquery relation, consider pushing them down to become WHERE or HAVING
489          * quals of the subquery itself.  This transformation is useful because it
490          * may allow us to generate a better plan for the subquery than evaluating
491          * all the subquery output rows and then filtering them.
492          *
493          * There are several cases where we cannot push down clauses. Restrictions
494          * involving the subquery are checked by subquery_is_pushdown_safe().
495          * Restrictions on individual clauses are checked by
496          * qual_is_pushdown_safe().  Also, we don't want to push down
497          * pseudoconstant clauses; better to have the gating node above the
498          * subquery.
499          *
500          * Non-pushed-down clauses will get evaluated as qpquals of the
501          * SubqueryScan node.
502          *
503          * XXX Are there any cases where we want to make a policy decision not to
504          * push down a pushable qual, because it'd result in a worse plan?
505          */
506         if (rel->baserestrictinfo != NIL &&
507                 subquery_is_pushdown_safe(subquery, subquery, differentTypes))
508         {
509                 /* OK to consider pushing down individual quals */
510                 List       *upperrestrictlist = NIL;
511                 ListCell   *l;
512
513                 foreach(l, rel->baserestrictinfo)
514                 {
515                         RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
516                         Node       *clause = (Node *) rinfo->clause;
517
518                         if (!rinfo->pseudoconstant &&
519                                 qual_is_pushdown_safe(subquery, rti, clause, differentTypes))
520                         {
521                                 /* Push it down */
522                                 subquery_push_qual(subquery, rte, rti, clause);
523                         }
524                         else
525                         {
526                                 /* Keep it in the upper query */
527                                 upperrestrictlist = lappend(upperrestrictlist, rinfo);
528                         }
529                 }
530                 rel->baserestrictinfo = upperrestrictlist;
531         }
532
533         pfree(differentTypes);
534
535         /*
536          * We can safely pass the outer tuple_fraction down to the subquery if the
537          * outer level has no joining, aggregation, or sorting to do. Otherwise
538          * we'd better tell the subquery to plan for full retrieval. (XXX This
539          * could probably be made more intelligent ...)
540          */
541         if (parse->hasAggs ||
542                 parse->groupClause ||
543                 parse->havingQual ||
544                 parse->distinctClause ||
545                 parse->sortClause ||
546                 has_multiple_baserels(root))
547                 tuple_fraction = 0.0;   /* default case */
548         else
549                 tuple_fraction = root->tuple_fraction;
550
551         /* Generate the plan for the subquery */
552         rel->subplan = subquery_planner(root->glob, subquery,
553                                                                         root->query_level + 1,
554                                                                         tuple_fraction,
555                                                                         &subroot);
556         rel->subrtable = subroot->parse->rtable;
557
558         /* Copy number of output rows from subplan */
559         rel->tuples = rel->subplan->plan_rows;
560
561         /* Mark rel with estimated output rows, width, etc */
562         set_baserel_size_estimates(root, rel);
563
564         /* Convert subquery pathkeys to outer representation */
565         pathkeys = convert_subquery_pathkeys(root, rel, subroot->query_pathkeys);
566
567         /* Generate appropriate path */
568         add_path(rel, create_subqueryscan_path(rel, pathkeys));
569
570         /* Select cheapest path (pretty easy in this case...) */
571         set_cheapest(rel);
572 }
573
574 /*
575  * set_function_pathlist
576  *              Build the (single) access path for a function RTE
577  */
578 static void
579 set_function_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
580 {
581         /* Mark rel with estimated output rows, width, etc */
582         set_function_size_estimates(root, rel);
583
584         /* Generate appropriate path */
585         add_path(rel, create_functionscan_path(root, rel));
586
587         /* Select cheapest path (pretty easy in this case...) */
588         set_cheapest(rel);
589 }
590
591 /*
592  * set_values_pathlist
593  *              Build the (single) access path for a VALUES RTE
594  */
595 static void
596 set_values_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
597 {
598         /* Mark rel with estimated output rows, width, etc */
599         set_values_size_estimates(root, rel);
600
601         /* Generate appropriate path */
602         add_path(rel, create_valuesscan_path(root, rel));
603
604         /* Select cheapest path (pretty easy in this case...) */
605         set_cheapest(rel);
606 }
607
608 /*
609  * make_rel_from_joinlist
610  *        Build access paths using a "joinlist" to guide the join path search.
611  *
612  * See comments for deconstruct_jointree() for definition of the joinlist
613  * data structure.
614  */
615 static RelOptInfo *
616 make_rel_from_joinlist(PlannerInfo *root, List *joinlist)
617 {
618         int                     levels_needed;
619         List       *initial_rels;
620         ListCell   *jl;
621
622         /*
623          * Count the number of child joinlist nodes.  This is the depth of the
624          * dynamic-programming algorithm we must employ to consider all ways of
625          * joining the child nodes.
626          */
627         levels_needed = list_length(joinlist);
628
629         if (levels_needed <= 0)
630                 return NULL;                    /* nothing to do? */
631
632         /*
633          * Construct a list of rels corresponding to the child joinlist nodes.
634          * This may contain both base rels and rels constructed according to
635          * sub-joinlists.
636          */
637         initial_rels = NIL;
638         foreach(jl, joinlist)
639         {
640                 Node       *jlnode = (Node *) lfirst(jl);
641                 RelOptInfo *thisrel;
642
643                 if (IsA(jlnode, RangeTblRef))
644                 {
645                         int                     varno = ((RangeTblRef *) jlnode)->rtindex;
646
647                         thisrel = find_base_rel(root, varno);
648                 }
649                 else if (IsA(jlnode, List))
650                 {
651                         /* Recurse to handle subproblem */
652                         thisrel = make_rel_from_joinlist(root, (List *) jlnode);
653                 }
654                 else
655                 {
656                         elog(ERROR, "unrecognized joinlist node type: %d",
657                                  (int) nodeTag(jlnode));
658                         thisrel = NULL;         /* keep compiler quiet */
659                 }
660
661                 initial_rels = lappend(initial_rels, thisrel);
662         }
663
664         if (levels_needed == 1)
665         {
666                 /*
667                  * Single joinlist node, so we're done.
668                  */
669                 return (RelOptInfo *) linitial(initial_rels);
670         }
671         else
672         {
673                 /*
674                  * Consider the different orders in which we could join the rels,
675                  * using either GEQO or regular optimizer.
676                  */
677                 if (enable_geqo && levels_needed >= geqo_threshold)
678                         return geqo(root, levels_needed, initial_rels);
679                 else
680                         return make_one_rel_by_joins(root, levels_needed, initial_rels);
681         }
682 }
683
684 /*
685  * make_one_rel_by_joins
686  *        Find all possible joinpaths for a query by successively finding ways
687  *        to join component relations into join relations.
688  *
689  * 'levels_needed' is the number of iterations needed, ie, the number of
690  *              independent jointree items in the query.  This is > 1.
691  *
692  * 'initial_rels' is a list of RelOptInfo nodes for each independent
693  *              jointree item.  These are the components to be joined together.
694  *
695  * Returns the final level of join relations, i.e., the relation that is
696  * the result of joining all the original relations together.
697  */
698 static RelOptInfo *
699 make_one_rel_by_joins(PlannerInfo *root, int levels_needed, List *initial_rels)
700 {
701         List      **joinitems;
702         int                     lev;
703         RelOptInfo *rel;
704
705         /*
706          * We employ a simple "dynamic programming" algorithm: we first find all
707          * ways to build joins of two jointree items, then all ways to build joins
708          * of three items (from two-item joins and single items), then four-item
709          * joins, and so on until we have considered all ways to join all the
710          * items into one rel.
711          *
712          * joinitems[j] is a list of all the j-item rels.  Initially we set
713          * joinitems[1] to represent all the single-jointree-item relations.
714          */
715         joinitems = (List **) palloc0((levels_needed + 1) * sizeof(List *));
716
717         joinitems[1] = initial_rels;
718
719         for (lev = 2; lev <= levels_needed; lev++)
720         {
721                 ListCell   *x;
722
723                 /*
724                  * Determine all possible pairs of relations to be joined at this
725                  * level, and build paths for making each one from every available
726                  * pair of lower-level relations.
727                  */
728                 joinitems[lev] = make_rels_by_joins(root, lev, joinitems);
729
730                 /*
731                  * Do cleanup work on each just-processed rel.
732                  */
733                 foreach(x, joinitems[lev])
734                 {
735                         rel = (RelOptInfo *) lfirst(x);
736
737                         /* Find and save the cheapest paths for this rel */
738                         set_cheapest(rel);
739
740 #ifdef OPTIMIZER_DEBUG
741                         debug_print_rel(root, rel);
742 #endif
743                 }
744         }
745
746         /*
747          * We should have a single rel at the final level.
748          */
749         if (joinitems[levels_needed] == NIL)
750                 elog(ERROR, "failed to build any %d-way joins", levels_needed);
751         Assert(list_length(joinitems[levels_needed]) == 1);
752
753         rel = (RelOptInfo *) linitial(joinitems[levels_needed]);
754
755         return rel;
756 }
757
758 /*****************************************************************************
759  *                      PUSHING QUALS DOWN INTO SUBQUERIES
760  *****************************************************************************/
761
762 /*
763  * subquery_is_pushdown_safe - is a subquery safe for pushing down quals?
764  *
765  * subquery is the particular component query being checked.  topquery
766  * is the top component of a set-operations tree (the same Query if no
767  * set-op is involved).
768  *
769  * Conditions checked here:
770  *
771  * 1. If the subquery has a LIMIT clause, we must not push down any quals,
772  * since that could change the set of rows returned.
773  *
774  * 2. If the subquery contains EXCEPT or EXCEPT ALL set ops we cannot push
775  * quals into it, because that would change the results.
776  *
777  * 3. For subqueries using UNION/UNION ALL/INTERSECT/INTERSECT ALL, we can
778  * push quals into each component query, but the quals can only reference
779  * subquery columns that suffer no type coercions in the set operation.
780  * Otherwise there are possible semantic gotchas.  So, we check the
781  * component queries to see if any of them have different output types;
782  * differentTypes[k] is set true if column k has different type in any
783  * component.
784  */
785 static bool
786 subquery_is_pushdown_safe(Query *subquery, Query *topquery,
787                                                   bool *differentTypes)
788 {
789         SetOperationStmt *topop;
790
791         /* Check point 1 */
792         if (subquery->limitOffset != NULL || subquery->limitCount != NULL)
793                 return false;
794
795         /* Are we at top level, or looking at a setop component? */
796         if (subquery == topquery)
797         {
798                 /* Top level, so check any component queries */
799                 if (subquery->setOperations != NULL)
800                         if (!recurse_pushdown_safe(subquery->setOperations, topquery,
801                                                                            differentTypes))
802                                 return false;
803         }
804         else
805         {
806                 /* Setop component must not have more components (too weird) */
807                 if (subquery->setOperations != NULL)
808                         return false;
809                 /* Check whether setop component output types match top level */
810                 topop = (SetOperationStmt *) topquery->setOperations;
811                 Assert(topop && IsA(topop, SetOperationStmt));
812                 compare_tlist_datatypes(subquery->targetList,
813                                                                 topop->colTypes,
814                                                                 differentTypes);
815         }
816         return true;
817 }
818
819 /*
820  * Helper routine to recurse through setOperations tree
821  */
822 static bool
823 recurse_pushdown_safe(Node *setOp, Query *topquery,
824                                           bool *differentTypes)
825 {
826         if (IsA(setOp, RangeTblRef))
827         {
828                 RangeTblRef *rtr = (RangeTblRef *) setOp;
829                 RangeTblEntry *rte = rt_fetch(rtr->rtindex, topquery->rtable);
830                 Query      *subquery = rte->subquery;
831
832                 Assert(subquery != NULL);
833                 return subquery_is_pushdown_safe(subquery, topquery, differentTypes);
834         }
835         else if (IsA(setOp, SetOperationStmt))
836         {
837                 SetOperationStmt *op = (SetOperationStmt *) setOp;
838
839                 /* EXCEPT is no good */
840                 if (op->op == SETOP_EXCEPT)
841                         return false;
842                 /* Else recurse */
843                 if (!recurse_pushdown_safe(op->larg, topquery, differentTypes))
844                         return false;
845                 if (!recurse_pushdown_safe(op->rarg, topquery, differentTypes))
846                         return false;
847         }
848         else
849         {
850                 elog(ERROR, "unrecognized node type: %d",
851                          (int) nodeTag(setOp));
852         }
853         return true;
854 }
855
856 /*
857  * Compare tlist's datatypes against the list of set-operation result types.
858  * For any items that are different, mark the appropriate element of
859  * differentTypes[] to show that this column will have type conversions.
860  *
861  * We don't have to care about typmods here: the only allowed difference
862  * between set-op input and output typmods is input is a specific typmod
863  * and output is -1, and that does not require a coercion.
864  */
865 static void
866 compare_tlist_datatypes(List *tlist, List *colTypes,
867                                                 bool *differentTypes)
868 {
869         ListCell   *l;
870         ListCell   *colType = list_head(colTypes);
871
872         foreach(l, tlist)
873         {
874                 TargetEntry *tle = (TargetEntry *) lfirst(l);
875
876                 if (tle->resjunk)
877                         continue;                       /* ignore resjunk columns */
878                 if (colType == NULL)
879                         elog(ERROR, "wrong number of tlist entries");
880                 if (exprType((Node *) tle->expr) != lfirst_oid(colType))
881                         differentTypes[tle->resno] = true;
882                 colType = lnext(colType);
883         }
884         if (colType != NULL)
885                 elog(ERROR, "wrong number of tlist entries");
886 }
887
888 /*
889  * qual_is_pushdown_safe - is a particular qual safe to push down?
890  *
891  * qual is a restriction clause applying to the given subquery (whose RTE
892  * has index rti in the parent query).
893  *
894  * Conditions checked here:
895  *
896  * 1. The qual must not contain any subselects (mainly because I'm not sure
897  * it will work correctly: sublinks will already have been transformed into
898  * subplans in the qual, but not in the subquery).
899  *
900  * 2. The qual must not refer to the whole-row output of the subquery
901  * (since there is no easy way to name that within the subquery itself).
902  *
903  * 3. The qual must not refer to any subquery output columns that were
904  * found to have inconsistent types across a set operation tree by
905  * subquery_is_pushdown_safe().
906  *
907  * 4. If the subquery uses DISTINCT ON, we must not push down any quals that
908  * refer to non-DISTINCT output columns, because that could change the set
909  * of rows returned.  This condition is vacuous for DISTINCT, because then
910  * there are no non-DISTINCT output columns, but unfortunately it's fairly
911  * expensive to tell the difference between DISTINCT and DISTINCT ON in the
912  * parsetree representation.  It's cheaper to just make sure all the Vars
913  * in the qual refer to DISTINCT columns.
914  *
915  * 5. We must not push down any quals that refer to subselect outputs that
916  * return sets, else we'd introduce functions-returning-sets into the
917  * subquery's WHERE/HAVING quals.
918  *
919  * 6. We must not push down any quals that refer to subselect outputs that
920  * contain volatile functions, for fear of introducing strange results due
921  * to multiple evaluation of a volatile function.
922  */
923 static bool
924 qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
925                                           bool *differentTypes)
926 {
927         bool            safe = true;
928         List       *vars;
929         ListCell   *vl;
930         Bitmapset  *tested = NULL;
931
932         /* Refuse subselects (point 1) */
933         if (contain_subplans(qual))
934                 return false;
935
936         /*
937          * Examine all Vars used in clause; since it's a restriction clause, all
938          * such Vars must refer to subselect output columns.
939          */
940         vars = pull_var_clause(qual, false);
941         foreach(vl, vars)
942         {
943                 Var                *var = (Var *) lfirst(vl);
944                 TargetEntry *tle;
945
946                 Assert(var->varno == rti);
947
948                 /* Check point 2 */
949                 if (var->varattno == 0)
950                 {
951                         safe = false;
952                         break;
953                 }
954
955                 /*
956                  * We use a bitmapset to avoid testing the same attno more than once.
957                  * (NB: this only works because subquery outputs can't have negative
958                  * attnos.)
959                  */
960                 if (bms_is_member(var->varattno, tested))
961                         continue;
962                 tested = bms_add_member(tested, var->varattno);
963
964                 /* Check point 3 */
965                 if (differentTypes[var->varattno])
966                 {
967                         safe = false;
968                         break;
969                 }
970
971                 /* Must find the tlist element referenced by the Var */
972                 tle = get_tle_by_resno(subquery->targetList, var->varattno);
973                 Assert(tle != NULL);
974                 Assert(!tle->resjunk);
975
976                 /* If subquery uses DISTINCT or DISTINCT ON, check point 4 */
977                 if (subquery->distinctClause != NIL &&
978                         !targetIsInSortList(tle, InvalidOid, subquery->distinctClause))
979                 {
980                         /* non-DISTINCT column, so fail */
981                         safe = false;
982                         break;
983                 }
984
985                 /* Refuse functions returning sets (point 5) */
986                 if (expression_returns_set((Node *) tle->expr))
987                 {
988                         safe = false;
989                         break;
990                 }
991
992                 /* Refuse volatile functions (point 6) */
993                 if (contain_volatile_functions((Node *) tle->expr))
994                 {
995                         safe = false;
996                         break;
997                 }
998         }
999
1000         list_free(vars);
1001         bms_free(tested);
1002
1003         return safe;
1004 }
1005
1006 /*
1007  * subquery_push_qual - push down a qual that we have determined is safe
1008  */
1009 static void
1010 subquery_push_qual(Query *subquery, RangeTblEntry *rte, Index rti, Node *qual)
1011 {
1012         if (subquery->setOperations != NULL)
1013         {
1014                 /* Recurse to push it separately to each component query */
1015                 recurse_push_qual(subquery->setOperations, subquery,
1016                                                   rte, rti, qual);
1017         }
1018         else
1019         {
1020                 /*
1021                  * We need to replace Vars in the qual (which must refer to outputs of
1022                  * the subquery) with copies of the subquery's targetlist expressions.
1023                  * Note that at this point, any uplevel Vars in the qual should have
1024                  * been replaced with Params, so they need no work.
1025                  *
1026                  * This step also ensures that when we are pushing into a setop tree,
1027                  * each component query gets its own copy of the qual.
1028                  */
1029                 qual = ResolveNew(qual, rti, 0, rte,
1030                                                   subquery->targetList,
1031                                                   CMD_SELECT, 0);
1032
1033                 /*
1034                  * Now attach the qual to the proper place: normally WHERE, but if the
1035                  * subquery uses grouping or aggregation, put it in HAVING (since the
1036                  * qual really refers to the group-result rows).
1037                  */
1038                 if (subquery->hasAggs || subquery->groupClause || subquery->havingQual)
1039                         subquery->havingQual = make_and_qual(subquery->havingQual, qual);
1040                 else
1041                         subquery->jointree->quals =
1042                                 make_and_qual(subquery->jointree->quals, qual);
1043
1044                 /*
1045                  * We need not change the subquery's hasAggs or hasSublinks flags,
1046                  * since we can't be pushing down any aggregates that weren't there
1047                  * before, and we don't push down subselects at all.
1048                  */
1049         }
1050 }
1051
1052 /*
1053  * Helper routine to recurse through setOperations tree
1054  */
1055 static void
1056 recurse_push_qual(Node *setOp, Query *topquery,
1057                                   RangeTblEntry *rte, Index rti, Node *qual)
1058 {
1059         if (IsA(setOp, RangeTblRef))
1060         {
1061                 RangeTblRef *rtr = (RangeTblRef *) setOp;
1062                 RangeTblEntry *subrte = rt_fetch(rtr->rtindex, topquery->rtable);
1063                 Query      *subquery = subrte->subquery;
1064
1065                 Assert(subquery != NULL);
1066                 subquery_push_qual(subquery, rte, rti, qual);
1067         }
1068         else if (IsA(setOp, SetOperationStmt))
1069         {
1070                 SetOperationStmt *op = (SetOperationStmt *) setOp;
1071
1072                 recurse_push_qual(op->larg, topquery, rte, rti, qual);
1073                 recurse_push_qual(op->rarg, topquery, rte, rti, qual);
1074         }
1075         else
1076         {
1077                 elog(ERROR, "unrecognized node type: %d",
1078                          (int) nodeTag(setOp));
1079         }
1080 }
1081
1082 /*****************************************************************************
1083  *                      DEBUG SUPPORT
1084  *****************************************************************************/
1085
1086 #ifdef OPTIMIZER_DEBUG
1087
1088 static void
1089 print_relids(Relids relids)
1090 {
1091         Relids          tmprelids;
1092         int                     x;
1093         bool            first = true;
1094
1095         tmprelids = bms_copy(relids);
1096         while ((x = bms_first_member(tmprelids)) >= 0)
1097         {
1098                 if (!first)
1099                         printf(" ");
1100                 printf("%d", x);
1101                 first = false;
1102         }
1103         bms_free(tmprelids);
1104 }
1105
1106 static void
1107 print_restrictclauses(PlannerInfo *root, List *clauses)
1108 {
1109         ListCell   *l;
1110
1111         foreach(l, clauses)
1112         {
1113                 RestrictInfo *c = lfirst(l);
1114
1115                 print_expr((Node *) c->clause, root->parse->rtable);
1116                 if (lnext(l))
1117                         printf(", ");
1118         }
1119 }
1120
1121 static void
1122 print_path(PlannerInfo *root, Path *path, int indent)
1123 {
1124         const char *ptype;
1125         bool            join = false;
1126         Path       *subpath = NULL;
1127         int                     i;
1128
1129         switch (nodeTag(path))
1130         {
1131                 case T_Path:
1132                         ptype = "SeqScan";
1133                         break;
1134                 case T_IndexPath:
1135                         ptype = "IdxScan";
1136                         break;
1137                 case T_BitmapHeapPath:
1138                         ptype = "BitmapHeapScan";
1139                         break;
1140                 case T_BitmapAndPath:
1141                         ptype = "BitmapAndPath";
1142                         break;
1143                 case T_BitmapOrPath:
1144                         ptype = "BitmapOrPath";
1145                         break;
1146                 case T_TidPath:
1147                         ptype = "TidScan";
1148                         break;
1149                 case T_AppendPath:
1150                         ptype = "Append";
1151                         break;
1152                 case T_ResultPath:
1153                         ptype = "Result";
1154                         break;
1155                 case T_MaterialPath:
1156                         ptype = "Material";
1157                         subpath = ((MaterialPath *) path)->subpath;
1158                         break;
1159                 case T_UniquePath:
1160                         ptype = "Unique";
1161                         subpath = ((UniquePath *) path)->subpath;
1162                         break;
1163                 case T_NestPath:
1164                         ptype = "NestLoop";
1165                         join = true;
1166                         break;
1167                 case T_MergePath:
1168                         ptype = "MergeJoin";
1169                         join = true;
1170                         break;
1171                 case T_HashPath:
1172                         ptype = "HashJoin";
1173                         join = true;
1174                         break;
1175                 default:
1176                         ptype = "???Path";
1177                         break;
1178         }
1179
1180         for (i = 0; i < indent; i++)
1181                 printf("\t");
1182         printf("%s", ptype);
1183
1184         if (path->parent)
1185         {
1186                 printf("(");
1187                 print_relids(path->parent->relids);
1188                 printf(") rows=%.0f", path->parent->rows);
1189         }
1190         printf(" cost=%.2f..%.2f\n", path->startup_cost, path->total_cost);
1191
1192         if (path->pathkeys)
1193         {
1194                 for (i = 0; i < indent; i++)
1195                         printf("\t");
1196                 printf("  pathkeys: ");
1197                 print_pathkeys(path->pathkeys, root->parse->rtable);
1198         }
1199
1200         if (join)
1201         {
1202                 JoinPath   *jp = (JoinPath *) path;
1203
1204                 for (i = 0; i < indent; i++)
1205                         printf("\t");
1206                 printf("  clauses: ");
1207                 print_restrictclauses(root, jp->joinrestrictinfo);
1208                 printf("\n");
1209
1210                 if (IsA(path, MergePath))
1211                 {
1212                         MergePath  *mp = (MergePath *) path;
1213
1214                         if (mp->outersortkeys || mp->innersortkeys)
1215                         {
1216                                 for (i = 0; i < indent; i++)
1217                                         printf("\t");
1218                                 printf("  sortouter=%d sortinner=%d\n",
1219                                            ((mp->outersortkeys) ? 1 : 0),
1220                                            ((mp->innersortkeys) ? 1 : 0));
1221                         }
1222                 }
1223
1224                 print_path(root, jp->outerjoinpath, indent + 1);
1225                 print_path(root, jp->innerjoinpath, indent + 1);
1226         }
1227
1228         if (subpath)
1229                 print_path(root, subpath, indent + 1);
1230 }
1231
1232 void
1233 debug_print_rel(PlannerInfo *root, RelOptInfo *rel)
1234 {
1235         ListCell   *l;
1236
1237         printf("RELOPTINFO (");
1238         print_relids(rel->relids);
1239         printf("): rows=%.0f width=%d\n", rel->rows, rel->width);
1240
1241         if (rel->baserestrictinfo)
1242         {
1243                 printf("\tbaserestrictinfo: ");
1244                 print_restrictclauses(root, rel->baserestrictinfo);
1245                 printf("\n");
1246         }
1247
1248         if (rel->joininfo)
1249         {
1250                 printf("\tjoininfo: ");
1251                 print_restrictclauses(root, rel->joininfo);
1252                 printf("\n");
1253         }
1254
1255         printf("\tpath list:\n");
1256         foreach(l, rel->pathlist)
1257                 print_path(root, lfirst(l), 1);
1258         printf("\n\tcheapest startup path:\n");
1259         print_path(root, rel->cheapest_startup_path, 1);
1260         printf("\n\tcheapest total path:\n");
1261         print_path(root, rel->cheapest_total_path, 1);
1262         printf("\n");
1263         fflush(stdout);
1264 }
1265
1266 #endif   /* OPTIMIZER_DEBUG */