]> granicus.if.org Git - postgresql/blob - src/backend/optimizer/path/allpaths.c
Update copyrights in source tree to 2008.
[postgresql] / src / backend / optimizer / path / allpaths.c
1 /*-------------------------------------------------------------------------
2  *
3  * allpaths.c
4  *        Routines to find possible search paths for processing a query
5  *
6  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *        $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.167 2008/01/01 19:45:50 momjian Exp $
12  *
13  *-------------------------------------------------------------------------
14  */
15
16 #include "postgres.h"
17
18 #ifdef OPTIMIZER_DEBUG
19 #include "nodes/print.h"
20 #endif
21 #include "optimizer/clauses.h"
22 #include "optimizer/cost.h"
23 #include "optimizer/geqo.h"
24 #include "optimizer/pathnode.h"
25 #include "optimizer/paths.h"
26 #include "optimizer/plancat.h"
27 #include "optimizer/planner.h"
28 #include "optimizer/prep.h"
29 #include "optimizer/var.h"
30 #include "parser/parse_clause.h"
31 #include "parser/parse_expr.h"
32 #include "parser/parsetree.h"
33 #include "rewrite/rewriteManip.h"
34
35
36 /* These parameters are set by GUC */
37 bool            enable_geqo = false;    /* just in case GUC doesn't set it */
38 int                     geqo_threshold;
39
40 /* Hook for plugins to replace standard_join_search() */
41 join_search_hook_type join_search_hook = NULL;
42
43
44 static void set_base_rel_pathlists(PlannerInfo *root);
45 static void set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
46                                  Index rti, RangeTblEntry *rte);
47 static void set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
48                                            RangeTblEntry *rte);
49 static void set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
50                                                 Index rti, RangeTblEntry *rte);
51 static void set_dummy_rel_pathlist(RelOptInfo *rel);
52 static void set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
53                                           Index rti, RangeTblEntry *rte);
54 static void set_function_pathlist(PlannerInfo *root, RelOptInfo *rel,
55                                           RangeTblEntry *rte);
56 static void set_values_pathlist(PlannerInfo *root, RelOptInfo *rel,
57                                         RangeTblEntry *rte);
58 static RelOptInfo *make_rel_from_joinlist(PlannerInfo *root, List *joinlist);
59 static bool subquery_is_pushdown_safe(Query *subquery, Query *topquery,
60                                                   bool *differentTypes);
61 static bool recurse_pushdown_safe(Node *setOp, Query *topquery,
62                                           bool *differentTypes);
63 static void compare_tlist_datatypes(List *tlist, List *colTypes,
64                                                 bool *differentTypes);
65 static bool qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
66                                           bool *differentTypes);
67 static void subquery_push_qual(Query *subquery,
68                                    RangeTblEntry *rte, Index rti, Node *qual);
69 static void recurse_push_qual(Node *setOp, Query *topquery,
70                                   RangeTblEntry *rte, Index rti, Node *qual);
71
72
73 /*
74  * make_one_rel
75  *        Finds all possible access paths for executing a query, returning a
76  *        single rel that represents the join of all base rels in the query.
77  */
78 RelOptInfo *
79 make_one_rel(PlannerInfo *root, List *joinlist)
80 {
81         RelOptInfo *rel;
82
83         /*
84          * Generate access paths for the base rels.
85          */
86         set_base_rel_pathlists(root);
87
88         /*
89          * Generate access paths for the entire join tree.
90          */
91         rel = make_rel_from_joinlist(root, joinlist);
92
93         /*
94          * The result should join all and only the query's base rels.
95          */
96 #ifdef USE_ASSERT_CHECKING
97         {
98                 int                     num_base_rels = 0;
99                 Index           rti;
100
101                 for (rti = 1; rti < root->simple_rel_array_size; rti++)
102                 {
103                         RelOptInfo *brel = root->simple_rel_array[rti];
104
105                         if (brel == NULL)
106                                 continue;
107
108                         Assert(brel->relid == rti); /* sanity check on array */
109
110                         /* ignore RTEs that are "other rels" */
111                         if (brel->reloptkind != RELOPT_BASEREL)
112                                 continue;
113
114                         Assert(bms_is_member(rti, rel->relids));
115                         num_base_rels++;
116                 }
117
118                 Assert(bms_num_members(rel->relids) == num_base_rels);
119         }
120 #endif
121
122         return rel;
123 }
124
125 /*
126  * set_base_rel_pathlists
127  *        Finds all paths available for scanning each base-relation entry.
128  *        Sequential scan and any available indices are considered.
129  *        Each useful path is attached to its relation's 'pathlist' field.
130  */
131 static void
132 set_base_rel_pathlists(PlannerInfo *root)
133 {
134         Index           rti;
135
136         for (rti = 1; rti < root->simple_rel_array_size; rti++)
137         {
138                 RelOptInfo *rel = root->simple_rel_array[rti];
139
140                 /* there may be empty slots corresponding to non-baserel RTEs */
141                 if (rel == NULL)
142                         continue;
143
144                 Assert(rel->relid == rti);              /* sanity check on array */
145
146                 /* ignore RTEs that are "other rels" */
147                 if (rel->reloptkind != RELOPT_BASEREL)
148                         continue;
149
150                 set_rel_pathlist(root, rel, rti, root->simple_rte_array[rti]);
151         }
152 }
153
154 /*
155  * set_rel_pathlist
156  *        Build access paths for a base relation
157  */
158 static void
159 set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
160                                  Index rti, RangeTblEntry *rte)
161 {
162         if (rte->inh)
163         {
164                 /* It's an "append relation", process accordingly */
165                 set_append_rel_pathlist(root, rel, rti, rte);
166         }
167         else if (rel->rtekind == RTE_SUBQUERY)
168         {
169                 /* Subquery --- generate a separate plan for it */
170                 set_subquery_pathlist(root, rel, rti, rte);
171         }
172         else if (rel->rtekind == RTE_FUNCTION)
173         {
174                 /* RangeFunction --- generate a separate plan for it */
175                 set_function_pathlist(root, rel, rte);
176         }
177         else if (rel->rtekind == RTE_VALUES)
178         {
179                 /* Values list --- generate a separate plan for it */
180                 set_values_pathlist(root, rel, rte);
181         }
182         else
183         {
184                 /* Plain relation */
185                 Assert(rel->rtekind == RTE_RELATION);
186                 set_plain_rel_pathlist(root, rel, rte);
187         }
188
189 #ifdef OPTIMIZER_DEBUG
190         debug_print_rel(root, rel);
191 #endif
192 }
193
194 /*
195  * set_plain_rel_pathlist
196  *        Build access paths for a plain relation (no subquery, no inheritance)
197  */
198 static void
199 set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
200 {
201         /*
202          * If we can prove we don't need to scan the rel via constraint exclusion,
203          * set up a single dummy path for it.  We only need to check for regular
204          * baserels; if it's an otherrel, CE was already checked in
205          * set_append_rel_pathlist().
206          */
207         if (rel->reloptkind == RELOPT_BASEREL &&
208                 relation_excluded_by_constraints(rel, rte))
209         {
210                 set_dummy_rel_pathlist(rel);
211                 return;
212         }
213
214         /* Mark rel with estimated output rows, width, etc */
215         set_baserel_size_estimates(root, rel);
216
217         /* Test any partial indexes of rel for applicability */
218         check_partial_indexes(root, rel);
219
220         /*
221          * Check to see if we can extract any restriction conditions from join
222          * quals that are OR-of-AND structures.  If so, add them to the rel's
223          * restriction list, and recompute the size estimates.
224          */
225         if (create_or_index_quals(root, rel))
226                 set_baserel_size_estimates(root, rel);
227
228         /*
229          * Generate paths and add them to the rel's pathlist.
230          *
231          * Note: add_path() will discard any paths that are dominated by another
232          * available path, keeping only those paths that are superior along at
233          * least one dimension of cost or sortedness.
234          */
235
236         /* Consider sequential scan */
237         add_path(rel, create_seqscan_path(root, rel));
238
239         /* Consider index scans */
240         create_index_paths(root, rel);
241
242         /* Consider TID scans */
243         create_tidscan_paths(root, rel);
244
245         /* Now find the cheapest of the paths for this rel */
246         set_cheapest(rel);
247 }
248
249 /*
250  * set_append_rel_pathlist
251  *        Build access paths for an "append relation"
252  *
253  * The passed-in rel and RTE represent the entire append relation.      The
254  * relation's contents are computed by appending together the output of
255  * the individual member relations.  Note that in the inheritance case,
256  * the first member relation is actually the same table as is mentioned in
257  * the parent RTE ... but it has a different RTE and RelOptInfo.  This is
258  * a good thing because their outputs are not the same size.
259  */
260 static void
261 set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
262                                                 Index rti, RangeTblEntry *rte)
263 {
264         int                     parentRTindex = rti;
265         List       *subpaths = NIL;
266         ListCell   *l;
267
268         /*
269          * XXX for now, can't handle inherited expansion of FOR UPDATE/SHARE; can
270          * we do better?  (This will take some redesign because the executor
271          * currently supposes that every rowMark relation is involved in every row
272          * returned by the query.)
273          */
274         if (get_rowmark(root->parse, parentRTindex))
275                 ereport(ERROR,
276                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
277                                  errmsg("SELECT FOR UPDATE/SHARE is not supported for inheritance queries")));
278
279         /*
280          * Initialize to compute size estimates for whole append relation
281          */
282         rel->rows = 0;
283         rel->width = 0;
284
285         /*
286          * Generate access paths for each member relation, and pick the cheapest
287          * path for each one.
288          */
289         foreach(l, root->append_rel_list)
290         {
291                 AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l);
292                 int                     childRTindex;
293                 RangeTblEntry *childRTE;
294                 RelOptInfo *childrel;
295                 Path       *childpath;
296                 ListCell   *parentvars;
297                 ListCell   *childvars;
298
299                 /* append_rel_list contains all append rels; ignore others */
300                 if (appinfo->parent_relid != parentRTindex)
301                         continue;
302
303                 childRTindex = appinfo->child_relid;
304                 childRTE = root->simple_rte_array[childRTindex];
305
306                 /*
307                  * The child rel's RelOptInfo was already created during
308                  * add_base_rels_to_query.
309                  */
310                 childrel = find_base_rel(root, childRTindex);
311                 Assert(childrel->reloptkind == RELOPT_OTHER_MEMBER_REL);
312
313                 /*
314                  * We have to copy the parent's targetlist and quals to the child,
315                  * with appropriate substitution of variables.  However, only the
316                  * baserestrictinfo quals are needed before we can check for
317                  * constraint exclusion; so do that first and then check to see if we
318                  * can disregard this child.
319                  */
320                 childrel->baserestrictinfo = (List *)
321                         adjust_appendrel_attrs((Node *) rel->baserestrictinfo,
322                                                                    appinfo);
323
324                 if (relation_excluded_by_constraints(childrel, childRTE))
325                 {
326                         /*
327                          * This child need not be scanned, so we can omit it from the
328                          * appendrel.  Mark it with a dummy cheapest-path though, in case
329                          * best_appendrel_indexscan() looks at it later.
330                          */
331                         set_dummy_rel_pathlist(childrel);
332                         continue;
333                 }
334
335                 /* CE failed, so finish copying targetlist and join quals */
336                 childrel->joininfo = (List *)
337                         adjust_appendrel_attrs((Node *) rel->joininfo,
338                                                                    appinfo);
339                 childrel->reltargetlist = (List *)
340                         adjust_appendrel_attrs((Node *) rel->reltargetlist,
341                                                                    appinfo);
342
343                 /*
344                  * We have to make child entries in the EquivalenceClass data
345                  * structures as well.
346                  */
347                 if (rel->has_eclass_joins)
348                 {
349                         add_child_rel_equivalences(root, appinfo, rel, childrel);
350                         childrel->has_eclass_joins = true;
351                 }
352
353                 /*
354                  * Copy the parent's attr_needed data as well, with appropriate
355                  * adjustment of relids and attribute numbers.
356                  */
357                 pfree(childrel->attr_needed);
358                 childrel->attr_needed =
359                         adjust_appendrel_attr_needed(rel, appinfo,
360                                                                                  childrel->min_attr,
361                                                                                  childrel->max_attr);
362
363                 /*
364                  * Compute the child's access paths, and add the cheapest one to the
365                  * Append path we are constructing for the parent.
366                  *
367                  * It's possible that the child is itself an appendrel, in which case
368                  * we can "cut out the middleman" and just add its child paths to our
369                  * own list.  (We don't try to do this earlier because we need to
370                  * apply both levels of transformation to the quals.)
371                  */
372                 set_rel_pathlist(root, childrel, childRTindex, childRTE);
373
374                 childpath = childrel->cheapest_total_path;
375                 if (IsA(childpath, AppendPath))
376                         subpaths = list_concat(subpaths,
377                                                                    ((AppendPath *) childpath)->subpaths);
378                 else
379                         subpaths = lappend(subpaths, childpath);
380
381                 /*
382                  * Propagate size information from the child back to the parent. For
383                  * simplicity, we use the largest widths from any child as the parent
384                  * estimates.  (If you want to change this, beware of child
385                  * attr_widths[] entries that haven't been set and are still 0.)
386                  */
387                 rel->rows += childrel->rows;
388                 if (childrel->width > rel->width)
389                         rel->width = childrel->width;
390
391                 forboth(parentvars, rel->reltargetlist,
392                                 childvars, childrel->reltargetlist)
393                 {
394                         Var                *parentvar = (Var *) lfirst(parentvars);
395                         Var                *childvar = (Var *) lfirst(childvars);
396
397                         if (IsA(parentvar, Var) &&
398                                 IsA(childvar, Var))
399                         {
400                                 int                     pndx = parentvar->varattno - rel->min_attr;
401                                 int                     cndx = childvar->varattno - childrel->min_attr;
402
403                                 if (childrel->attr_widths[cndx] > rel->attr_widths[pndx])
404                                         rel->attr_widths[pndx] = childrel->attr_widths[cndx];
405                         }
406                 }
407         }
408
409         /*
410          * Set "raw tuples" count equal to "rows" for the appendrel; needed
411          * because some places assume rel->tuples is valid for any baserel.
412          */
413         rel->tuples = rel->rows;
414
415         /*
416          * Finally, build Append path and install it as the only access path for
417          * the parent rel.      (Note: this is correct even if we have zero or one
418          * live subpath due to constraint exclusion.)
419          */
420         add_path(rel, (Path *) create_append_path(rel, subpaths));
421
422         /* Select cheapest path (pretty easy in this case...) */
423         set_cheapest(rel);
424 }
425
426 /*
427  * set_dummy_rel_pathlist
428  *        Build a dummy path for a relation that's been excluded by constraints
429  *
430  * Rather than inventing a special "dummy" path type, we represent this as an
431  * AppendPath with no members.
432  */
433 static void
434 set_dummy_rel_pathlist(RelOptInfo *rel)
435 {
436         /* Set dummy size estimates --- we leave attr_widths[] as zeroes */
437         rel->rows = 0;
438         rel->width = 0;
439
440         add_path(rel, (Path *) create_append_path(rel, NIL));
441
442         /* Select cheapest path (pretty easy in this case...) */
443         set_cheapest(rel);
444 }
445
446 /* quick-and-dirty test to see if any joining is needed */
447 static bool
448 has_multiple_baserels(PlannerInfo *root)
449 {
450         int                     num_base_rels = 0;
451         Index           rti;
452
453         for (rti = 1; rti < root->simple_rel_array_size; rti++)
454         {
455                 RelOptInfo *brel = root->simple_rel_array[rti];
456
457                 if (brel == NULL)
458                         continue;
459
460                 /* ignore RTEs that are "other rels" */
461                 if (brel->reloptkind == RELOPT_BASEREL)
462                         if (++num_base_rels > 1)
463                                 return true;
464         }
465         return false;
466 }
467
468 /*
469  * set_subquery_pathlist
470  *              Build the (single) access path for a subquery RTE
471  */
472 static void
473 set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
474                                           Index rti, RangeTblEntry *rte)
475 {
476         Query      *parse = root->parse;
477         Query      *subquery = rte->subquery;
478         bool       *differentTypes;
479         double          tuple_fraction;
480         PlannerInfo *subroot;
481         List       *pathkeys;
482
483         /* We need a workspace for keeping track of set-op type coercions */
484         differentTypes = (bool *)
485                 palloc0((list_length(subquery->targetList) + 1) * sizeof(bool));
486
487         /*
488          * If there are any restriction clauses that have been attached to the
489          * subquery relation, consider pushing them down to become WHERE or HAVING
490          * quals of the subquery itself.  This transformation is useful because it
491          * may allow us to generate a better plan for the subquery than evaluating
492          * all the subquery output rows and then filtering them.
493          *
494          * There are several cases where we cannot push down clauses. Restrictions
495          * involving the subquery are checked by subquery_is_pushdown_safe().
496          * Restrictions on individual clauses are checked by
497          * qual_is_pushdown_safe().  Also, we don't want to push down
498          * pseudoconstant clauses; better to have the gating node above the
499          * subquery.
500          *
501          * Non-pushed-down clauses will get evaluated as qpquals of the
502          * SubqueryScan node.
503          *
504          * XXX Are there any cases where we want to make a policy decision not to
505          * push down a pushable qual, because it'd result in a worse plan?
506          */
507         if (rel->baserestrictinfo != NIL &&
508                 subquery_is_pushdown_safe(subquery, subquery, differentTypes))
509         {
510                 /* OK to consider pushing down individual quals */
511                 List       *upperrestrictlist = NIL;
512                 ListCell   *l;
513
514                 foreach(l, rel->baserestrictinfo)
515                 {
516                         RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
517                         Node       *clause = (Node *) rinfo->clause;
518
519                         if (!rinfo->pseudoconstant &&
520                                 qual_is_pushdown_safe(subquery, rti, clause, differentTypes))
521                         {
522                                 /* Push it down */
523                                 subquery_push_qual(subquery, rte, rti, clause);
524                         }
525                         else
526                         {
527                                 /* Keep it in the upper query */
528                                 upperrestrictlist = lappend(upperrestrictlist, rinfo);
529                         }
530                 }
531                 rel->baserestrictinfo = upperrestrictlist;
532         }
533
534         pfree(differentTypes);
535
536         /*
537          * We can safely pass the outer tuple_fraction down to the subquery if the
538          * outer level has no joining, aggregation, or sorting to do. Otherwise
539          * we'd better tell the subquery to plan for full retrieval. (XXX This
540          * could probably be made more intelligent ...)
541          */
542         if (parse->hasAggs ||
543                 parse->groupClause ||
544                 parse->havingQual ||
545                 parse->distinctClause ||
546                 parse->sortClause ||
547                 has_multiple_baserels(root))
548                 tuple_fraction = 0.0;   /* default case */
549         else
550                 tuple_fraction = root->tuple_fraction;
551
552         /* Generate the plan for the subquery */
553         rel->subplan = subquery_planner(root->glob, subquery,
554                                                                         root->query_level + 1,
555                                                                         tuple_fraction,
556                                                                         &subroot);
557         rel->subrtable = subroot->parse->rtable;
558
559         /* Copy number of output rows from subplan */
560         rel->tuples = rel->subplan->plan_rows;
561
562         /* Mark rel with estimated output rows, width, etc */
563         set_baserel_size_estimates(root, rel);
564
565         /* Convert subquery pathkeys to outer representation */
566         pathkeys = convert_subquery_pathkeys(root, rel, subroot->query_pathkeys);
567
568         /* Generate appropriate path */
569         add_path(rel, create_subqueryscan_path(rel, pathkeys));
570
571         /* Select cheapest path (pretty easy in this case...) */
572         set_cheapest(rel);
573 }
574
575 /*
576  * set_function_pathlist
577  *              Build the (single) access path for a function RTE
578  */
579 static void
580 set_function_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
581 {
582         /* Mark rel with estimated output rows, width, etc */
583         set_function_size_estimates(root, rel);
584
585         /* Generate appropriate path */
586         add_path(rel, create_functionscan_path(root, rel));
587
588         /* Select cheapest path (pretty easy in this case...) */
589         set_cheapest(rel);
590 }
591
592 /*
593  * set_values_pathlist
594  *              Build the (single) access path for a VALUES RTE
595  */
596 static void
597 set_values_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
598 {
599         /* Mark rel with estimated output rows, width, etc */
600         set_values_size_estimates(root, rel);
601
602         /* Generate appropriate path */
603         add_path(rel, create_valuesscan_path(root, rel));
604
605         /* Select cheapest path (pretty easy in this case...) */
606         set_cheapest(rel);
607 }
608
609 /*
610  * make_rel_from_joinlist
611  *        Build access paths using a "joinlist" to guide the join path search.
612  *
613  * See comments for deconstruct_jointree() for definition of the joinlist
614  * data structure.
615  */
616 static RelOptInfo *
617 make_rel_from_joinlist(PlannerInfo *root, List *joinlist)
618 {
619         int                     levels_needed;
620         List       *initial_rels;
621         ListCell   *jl;
622
623         /*
624          * Count the number of child joinlist nodes.  This is the depth of the
625          * dynamic-programming algorithm we must employ to consider all ways of
626          * joining the child nodes.
627          */
628         levels_needed = list_length(joinlist);
629
630         if (levels_needed <= 0)
631                 return NULL;                    /* nothing to do? */
632
633         /*
634          * Construct a list of rels corresponding to the child joinlist nodes.
635          * This may contain both base rels and rels constructed according to
636          * sub-joinlists.
637          */
638         initial_rels = NIL;
639         foreach(jl, joinlist)
640         {
641                 Node       *jlnode = (Node *) lfirst(jl);
642                 RelOptInfo *thisrel;
643
644                 if (IsA(jlnode, RangeTblRef))
645                 {
646                         int                     varno = ((RangeTblRef *) jlnode)->rtindex;
647
648                         thisrel = find_base_rel(root, varno);
649                 }
650                 else if (IsA(jlnode, List))
651                 {
652                         /* Recurse to handle subproblem */
653                         thisrel = make_rel_from_joinlist(root, (List *) jlnode);
654                 }
655                 else
656                 {
657                         elog(ERROR, "unrecognized joinlist node type: %d",
658                                  (int) nodeTag(jlnode));
659                         thisrel = NULL;         /* keep compiler quiet */
660                 }
661
662                 initial_rels = lappend(initial_rels, thisrel);
663         }
664
665         if (levels_needed == 1)
666         {
667                 /*
668                  * Single joinlist node, so we're done.
669                  */
670                 return (RelOptInfo *) linitial(initial_rels);
671         }
672         else
673         {
674                 /*
675                  * Consider the different orders in which we could join the rels,
676                  * using a plugin, GEQO, or the regular join search code.
677                  */
678                 if (join_search_hook)
679                         return (*join_search_hook) (root, levels_needed, initial_rels);
680                 else if (enable_geqo && levels_needed >= geqo_threshold)
681                         return geqo(root, levels_needed, initial_rels);
682                 else
683                         return standard_join_search(root, levels_needed, initial_rels);
684         }
685 }
686
687 /*
688  * standard_join_search
689  *        Find possible joinpaths for a query by successively finding ways
690  *        to join component relations into join relations.
691  *
692  * 'levels_needed' is the number of iterations needed, ie, the number of
693  *              independent jointree items in the query.  This is > 1.
694  *
695  * 'initial_rels' is a list of RelOptInfo nodes for each independent
696  *              jointree item.  These are the components to be joined together.
697  *              Note that levels_needed == list_length(initial_rels).
698  *
699  * Returns the final level of join relations, i.e., the relation that is
700  * the result of joining all the original relations together.
701  * At least one implementation path must be provided for this relation and
702  * all required sub-relations.
703  *
704  * To support loadable plugins that modify planner behavior by changing the
705  * join searching algorithm, we provide a hook variable that lets a plugin
706  * replace or supplement this function.  Any such hook must return the same
707  * final join relation as the standard code would, but it might have a
708  * different set of implementation paths attached, and only the sub-joinrels
709  * needed for these paths need have been instantiated.
710  *
711  * Note to plugin authors: the functions invoked during standard_join_search()
712  * modify root->join_rel_list and root->join_rel_hash.  If you want to do more
713  * than one join-order search, you'll probably need to save and restore the
714  * original states of those data structures.  See geqo_eval() for an example.
715  */
716 RelOptInfo *
717 standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels)
718 {
719         List      **joinitems;
720         int                     lev;
721         RelOptInfo *rel;
722
723         /*
724          * We employ a simple "dynamic programming" algorithm: we first find all
725          * ways to build joins of two jointree items, then all ways to build joins
726          * of three items (from two-item joins and single items), then four-item
727          * joins, and so on until we have considered all ways to join all the
728          * items into one rel.
729          *
730          * joinitems[j] is a list of all the j-item rels.  Initially we set
731          * joinitems[1] to represent all the single-jointree-item relations.
732          */
733         joinitems = (List **) palloc0((levels_needed + 1) * sizeof(List *));
734
735         joinitems[1] = initial_rels;
736
737         for (lev = 2; lev <= levels_needed; lev++)
738         {
739                 ListCell   *x;
740
741                 /*
742                  * Determine all possible pairs of relations to be joined at this
743                  * level, and build paths for making each one from every available
744                  * pair of lower-level relations.
745                  */
746                 joinitems[lev] = join_search_one_level(root, lev, joinitems);
747
748                 /*
749                  * Do cleanup work on each just-processed rel.
750                  */
751                 foreach(x, joinitems[lev])
752                 {
753                         rel = (RelOptInfo *) lfirst(x);
754
755                         /* Find and save the cheapest paths for this rel */
756                         set_cheapest(rel);
757
758 #ifdef OPTIMIZER_DEBUG
759                         debug_print_rel(root, rel);
760 #endif
761                 }
762         }
763
764         /*
765          * We should have a single rel at the final level.
766          */
767         if (joinitems[levels_needed] == NIL)
768                 elog(ERROR, "failed to build any %d-way joins", levels_needed);
769         Assert(list_length(joinitems[levels_needed]) == 1);
770
771         rel = (RelOptInfo *) linitial(joinitems[levels_needed]);
772
773         return rel;
774 }
775
776 /*****************************************************************************
777  *                      PUSHING QUALS DOWN INTO SUBQUERIES
778  *****************************************************************************/
779
780 /*
781  * subquery_is_pushdown_safe - is a subquery safe for pushing down quals?
782  *
783  * subquery is the particular component query being checked.  topquery
784  * is the top component of a set-operations tree (the same Query if no
785  * set-op is involved).
786  *
787  * Conditions checked here:
788  *
789  * 1. If the subquery has a LIMIT clause, we must not push down any quals,
790  * since that could change the set of rows returned.
791  *
792  * 2. If the subquery contains EXCEPT or EXCEPT ALL set ops we cannot push
793  * quals into it, because that would change the results.
794  *
795  * 3. For subqueries using UNION/UNION ALL/INTERSECT/INTERSECT ALL, we can
796  * push quals into each component query, but the quals can only reference
797  * subquery columns that suffer no type coercions in the set operation.
798  * Otherwise there are possible semantic gotchas.  So, we check the
799  * component queries to see if any of them have different output types;
800  * differentTypes[k] is set true if column k has different type in any
801  * component.
802  */
803 static bool
804 subquery_is_pushdown_safe(Query *subquery, Query *topquery,
805                                                   bool *differentTypes)
806 {
807         SetOperationStmt *topop;
808
809         /* Check point 1 */
810         if (subquery->limitOffset != NULL || subquery->limitCount != NULL)
811                 return false;
812
813         /* Are we at top level, or looking at a setop component? */
814         if (subquery == topquery)
815         {
816                 /* Top level, so check any component queries */
817                 if (subquery->setOperations != NULL)
818                         if (!recurse_pushdown_safe(subquery->setOperations, topquery,
819                                                                            differentTypes))
820                                 return false;
821         }
822         else
823         {
824                 /* Setop component must not have more components (too weird) */
825                 if (subquery->setOperations != NULL)
826                         return false;
827                 /* Check whether setop component output types match top level */
828                 topop = (SetOperationStmt *) topquery->setOperations;
829                 Assert(topop && IsA(topop, SetOperationStmt));
830                 compare_tlist_datatypes(subquery->targetList,
831                                                                 topop->colTypes,
832                                                                 differentTypes);
833         }
834         return true;
835 }
836
837 /*
838  * Helper routine to recurse through setOperations tree
839  */
840 static bool
841 recurse_pushdown_safe(Node *setOp, Query *topquery,
842                                           bool *differentTypes)
843 {
844         if (IsA(setOp, RangeTblRef))
845         {
846                 RangeTblRef *rtr = (RangeTblRef *) setOp;
847                 RangeTblEntry *rte = rt_fetch(rtr->rtindex, topquery->rtable);
848                 Query      *subquery = rte->subquery;
849
850                 Assert(subquery != NULL);
851                 return subquery_is_pushdown_safe(subquery, topquery, differentTypes);
852         }
853         else if (IsA(setOp, SetOperationStmt))
854         {
855                 SetOperationStmt *op = (SetOperationStmt *) setOp;
856
857                 /* EXCEPT is no good */
858                 if (op->op == SETOP_EXCEPT)
859                         return false;
860                 /* Else recurse */
861                 if (!recurse_pushdown_safe(op->larg, topquery, differentTypes))
862                         return false;
863                 if (!recurse_pushdown_safe(op->rarg, topquery, differentTypes))
864                         return false;
865         }
866         else
867         {
868                 elog(ERROR, "unrecognized node type: %d",
869                          (int) nodeTag(setOp));
870         }
871         return true;
872 }
873
874 /*
875  * Compare tlist's datatypes against the list of set-operation result types.
876  * For any items that are different, mark the appropriate element of
877  * differentTypes[] to show that this column will have type conversions.
878  *
879  * We don't have to care about typmods here: the only allowed difference
880  * between set-op input and output typmods is input is a specific typmod
881  * and output is -1, and that does not require a coercion.
882  */
883 static void
884 compare_tlist_datatypes(List *tlist, List *colTypes,
885                                                 bool *differentTypes)
886 {
887         ListCell   *l;
888         ListCell   *colType = list_head(colTypes);
889
890         foreach(l, tlist)
891         {
892                 TargetEntry *tle = (TargetEntry *) lfirst(l);
893
894                 if (tle->resjunk)
895                         continue;                       /* ignore resjunk columns */
896                 if (colType == NULL)
897                         elog(ERROR, "wrong number of tlist entries");
898                 if (exprType((Node *) tle->expr) != lfirst_oid(colType))
899                         differentTypes[tle->resno] = true;
900                 colType = lnext(colType);
901         }
902         if (colType != NULL)
903                 elog(ERROR, "wrong number of tlist entries");
904 }
905
906 /*
907  * qual_is_pushdown_safe - is a particular qual safe to push down?
908  *
909  * qual is a restriction clause applying to the given subquery (whose RTE
910  * has index rti in the parent query).
911  *
912  * Conditions checked here:
913  *
914  * 1. The qual must not contain any subselects (mainly because I'm not sure
915  * it will work correctly: sublinks will already have been transformed into
916  * subplans in the qual, but not in the subquery).
917  *
918  * 2. The qual must not refer to the whole-row output of the subquery
919  * (since there is no easy way to name that within the subquery itself).
920  *
921  * 3. The qual must not refer to any subquery output columns that were
922  * found to have inconsistent types across a set operation tree by
923  * subquery_is_pushdown_safe().
924  *
925  * 4. If the subquery uses DISTINCT ON, we must not push down any quals that
926  * refer to non-DISTINCT output columns, because that could change the set
927  * of rows returned.  This condition is vacuous for DISTINCT, because then
928  * there are no non-DISTINCT output columns, but unfortunately it's fairly
929  * expensive to tell the difference between DISTINCT and DISTINCT ON in the
930  * parsetree representation.  It's cheaper to just make sure all the Vars
931  * in the qual refer to DISTINCT columns.
932  *
933  * 5. We must not push down any quals that refer to subselect outputs that
934  * return sets, else we'd introduce functions-returning-sets into the
935  * subquery's WHERE/HAVING quals.
936  *
937  * 6. We must not push down any quals that refer to subselect outputs that
938  * contain volatile functions, for fear of introducing strange results due
939  * to multiple evaluation of a volatile function.
940  */
941 static bool
942 qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
943                                           bool *differentTypes)
944 {
945         bool            safe = true;
946         List       *vars;
947         ListCell   *vl;
948         Bitmapset  *tested = NULL;
949
950         /* Refuse subselects (point 1) */
951         if (contain_subplans(qual))
952                 return false;
953
954         /*
955          * Examine all Vars used in clause; since it's a restriction clause, all
956          * such Vars must refer to subselect output columns.
957          */
958         vars = pull_var_clause(qual, false);
959         foreach(vl, vars)
960         {
961                 Var                *var = (Var *) lfirst(vl);
962                 TargetEntry *tle;
963
964                 Assert(var->varno == rti);
965
966                 /* Check point 2 */
967                 if (var->varattno == 0)
968                 {
969                         safe = false;
970                         break;
971                 }
972
973                 /*
974                  * We use a bitmapset to avoid testing the same attno more than once.
975                  * (NB: this only works because subquery outputs can't have negative
976                  * attnos.)
977                  */
978                 if (bms_is_member(var->varattno, tested))
979                         continue;
980                 tested = bms_add_member(tested, var->varattno);
981
982                 /* Check point 3 */
983                 if (differentTypes[var->varattno])
984                 {
985                         safe = false;
986                         break;
987                 }
988
989                 /* Must find the tlist element referenced by the Var */
990                 tle = get_tle_by_resno(subquery->targetList, var->varattno);
991                 Assert(tle != NULL);
992                 Assert(!tle->resjunk);
993
994                 /* If subquery uses DISTINCT or DISTINCT ON, check point 4 */
995                 if (subquery->distinctClause != NIL &&
996                         !targetIsInSortList(tle, InvalidOid, subquery->distinctClause))
997                 {
998                         /* non-DISTINCT column, so fail */
999                         safe = false;
1000                         break;
1001                 }
1002
1003                 /* Refuse functions returning sets (point 5) */
1004                 if (expression_returns_set((Node *) tle->expr))
1005                 {
1006                         safe = false;
1007                         break;
1008                 }
1009
1010                 /* Refuse volatile functions (point 6) */
1011                 if (contain_volatile_functions((Node *) tle->expr))
1012                 {
1013                         safe = false;
1014                         break;
1015                 }
1016         }
1017
1018         list_free(vars);
1019         bms_free(tested);
1020
1021         return safe;
1022 }
1023
1024 /*
1025  * subquery_push_qual - push down a qual that we have determined is safe
1026  */
1027 static void
1028 subquery_push_qual(Query *subquery, RangeTblEntry *rte, Index rti, Node *qual)
1029 {
1030         if (subquery->setOperations != NULL)
1031         {
1032                 /* Recurse to push it separately to each component query */
1033                 recurse_push_qual(subquery->setOperations, subquery,
1034                                                   rte, rti, qual);
1035         }
1036         else
1037         {
1038                 /*
1039                  * We need to replace Vars in the qual (which must refer to outputs of
1040                  * the subquery) with copies of the subquery's targetlist expressions.
1041                  * Note that at this point, any uplevel Vars in the qual should have
1042                  * been replaced with Params, so they need no work.
1043                  *
1044                  * This step also ensures that when we are pushing into a setop tree,
1045                  * each component query gets its own copy of the qual.
1046                  */
1047                 qual = ResolveNew(qual, rti, 0, rte,
1048                                                   subquery->targetList,
1049                                                   CMD_SELECT, 0);
1050
1051                 /*
1052                  * Now attach the qual to the proper place: normally WHERE, but if the
1053                  * subquery uses grouping or aggregation, put it in HAVING (since the
1054                  * qual really refers to the group-result rows).
1055                  */
1056                 if (subquery->hasAggs || subquery->groupClause || subquery->havingQual)
1057                         subquery->havingQual = make_and_qual(subquery->havingQual, qual);
1058                 else
1059                         subquery->jointree->quals =
1060                                 make_and_qual(subquery->jointree->quals, qual);
1061
1062                 /*
1063                  * We need not change the subquery's hasAggs or hasSublinks flags,
1064                  * since we can't be pushing down any aggregates that weren't there
1065                  * before, and we don't push down subselects at all.
1066                  */
1067         }
1068 }
1069
1070 /*
1071  * Helper routine to recurse through setOperations tree
1072  */
1073 static void
1074 recurse_push_qual(Node *setOp, Query *topquery,
1075                                   RangeTblEntry *rte, Index rti, Node *qual)
1076 {
1077         if (IsA(setOp, RangeTblRef))
1078         {
1079                 RangeTblRef *rtr = (RangeTblRef *) setOp;
1080                 RangeTblEntry *subrte = rt_fetch(rtr->rtindex, topquery->rtable);
1081                 Query      *subquery = subrte->subquery;
1082
1083                 Assert(subquery != NULL);
1084                 subquery_push_qual(subquery, rte, rti, qual);
1085         }
1086         else if (IsA(setOp, SetOperationStmt))
1087         {
1088                 SetOperationStmt *op = (SetOperationStmt *) setOp;
1089
1090                 recurse_push_qual(op->larg, topquery, rte, rti, qual);
1091                 recurse_push_qual(op->rarg, topquery, rte, rti, qual);
1092         }
1093         else
1094         {
1095                 elog(ERROR, "unrecognized node type: %d",
1096                          (int) nodeTag(setOp));
1097         }
1098 }
1099
1100 /*****************************************************************************
1101  *                      DEBUG SUPPORT
1102  *****************************************************************************/
1103
1104 #ifdef OPTIMIZER_DEBUG
1105
1106 static void
1107 print_relids(Relids relids)
1108 {
1109         Relids          tmprelids;
1110         int                     x;
1111         bool            first = true;
1112
1113         tmprelids = bms_copy(relids);
1114         while ((x = bms_first_member(tmprelids)) >= 0)
1115         {
1116                 if (!first)
1117                         printf(" ");
1118                 printf("%d", x);
1119                 first = false;
1120         }
1121         bms_free(tmprelids);
1122 }
1123
1124 static void
1125 print_restrictclauses(PlannerInfo *root, List *clauses)
1126 {
1127         ListCell   *l;
1128
1129         foreach(l, clauses)
1130         {
1131                 RestrictInfo *c = lfirst(l);
1132
1133                 print_expr((Node *) c->clause, root->parse->rtable);
1134                 if (lnext(l))
1135                         printf(", ");
1136         }
1137 }
1138
1139 static void
1140 print_path(PlannerInfo *root, Path *path, int indent)
1141 {
1142         const char *ptype;
1143         bool            join = false;
1144         Path       *subpath = NULL;
1145         int                     i;
1146
1147         switch (nodeTag(path))
1148         {
1149                 case T_Path:
1150                         ptype = "SeqScan";
1151                         break;
1152                 case T_IndexPath:
1153                         ptype = "IdxScan";
1154                         break;
1155                 case T_BitmapHeapPath:
1156                         ptype = "BitmapHeapScan";
1157                         break;
1158                 case T_BitmapAndPath:
1159                         ptype = "BitmapAndPath";
1160                         break;
1161                 case T_BitmapOrPath:
1162                         ptype = "BitmapOrPath";
1163                         break;
1164                 case T_TidPath:
1165                         ptype = "TidScan";
1166                         break;
1167                 case T_AppendPath:
1168                         ptype = "Append";
1169                         break;
1170                 case T_ResultPath:
1171                         ptype = "Result";
1172                         break;
1173                 case T_MaterialPath:
1174                         ptype = "Material";
1175                         subpath = ((MaterialPath *) path)->subpath;
1176                         break;
1177                 case T_UniquePath:
1178                         ptype = "Unique";
1179                         subpath = ((UniquePath *) path)->subpath;
1180                         break;
1181                 case T_NestPath:
1182                         ptype = "NestLoop";
1183                         join = true;
1184                         break;
1185                 case T_MergePath:
1186                         ptype = "MergeJoin";
1187                         join = true;
1188                         break;
1189                 case T_HashPath:
1190                         ptype = "HashJoin";
1191                         join = true;
1192                         break;
1193                 default:
1194                         ptype = "???Path";
1195                         break;
1196         }
1197
1198         for (i = 0; i < indent; i++)
1199                 printf("\t");
1200         printf("%s", ptype);
1201
1202         if (path->parent)
1203         {
1204                 printf("(");
1205                 print_relids(path->parent->relids);
1206                 printf(") rows=%.0f", path->parent->rows);
1207         }
1208         printf(" cost=%.2f..%.2f\n", path->startup_cost, path->total_cost);
1209
1210         if (path->pathkeys)
1211         {
1212                 for (i = 0; i < indent; i++)
1213                         printf("\t");
1214                 printf("  pathkeys: ");
1215                 print_pathkeys(path->pathkeys, root->parse->rtable);
1216         }
1217
1218         if (join)
1219         {
1220                 JoinPath   *jp = (JoinPath *) path;
1221
1222                 for (i = 0; i < indent; i++)
1223                         printf("\t");
1224                 printf("  clauses: ");
1225                 print_restrictclauses(root, jp->joinrestrictinfo);
1226                 printf("\n");
1227
1228                 if (IsA(path, MergePath))
1229                 {
1230                         MergePath  *mp = (MergePath *) path;
1231
1232                         if (mp->outersortkeys || mp->innersortkeys)
1233                         {
1234                                 for (i = 0; i < indent; i++)
1235                                         printf("\t");
1236                                 printf("  sortouter=%d sortinner=%d\n",
1237                                            ((mp->outersortkeys) ? 1 : 0),
1238                                            ((mp->innersortkeys) ? 1 : 0));
1239                         }
1240                 }
1241
1242                 print_path(root, jp->outerjoinpath, indent + 1);
1243                 print_path(root, jp->innerjoinpath, indent + 1);
1244         }
1245
1246         if (subpath)
1247                 print_path(root, subpath, indent + 1);
1248 }
1249
1250 void
1251 debug_print_rel(PlannerInfo *root, RelOptInfo *rel)
1252 {
1253         ListCell   *l;
1254
1255         printf("RELOPTINFO (");
1256         print_relids(rel->relids);
1257         printf("): rows=%.0f width=%d\n", rel->rows, rel->width);
1258
1259         if (rel->baserestrictinfo)
1260         {
1261                 printf("\tbaserestrictinfo: ");
1262                 print_restrictclauses(root, rel->baserestrictinfo);
1263                 printf("\n");
1264         }
1265
1266         if (rel->joininfo)
1267         {
1268                 printf("\tjoininfo: ");
1269                 print_restrictclauses(root, rel->joininfo);
1270                 printf("\n");
1271         }
1272
1273         printf("\tpath list:\n");
1274         foreach(l, rel->pathlist)
1275                 print_path(root, lfirst(l), 1);
1276         printf("\n\tcheapest startup path:\n");
1277         print_path(root, rel->cheapest_startup_path, 1);
1278         printf("\n\tcheapest total path:\n");
1279         print_path(root, rel->cheapest_total_path, 1);
1280         printf("\n");
1281         fflush(stdout);
1282 }
1283
1284 #endif   /* OPTIMIZER_DEBUG */