]> granicus.if.org Git - postgresql/blob - src/backend/optimizer/path/allpaths.c
adbf95b31debba8d061c8881c7d496b33c3430b4
[postgresql] / src / backend / optimizer / path / allpaths.c
1 /*-------------------------------------------------------------------------
2  *
3  * allpaths.c
4  *        Routines to find possible search paths for processing a query
5  *
6  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *        $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.169 2008/03/24 21:53:03 tgl Exp $
12  *
13  *-------------------------------------------------------------------------
14  */
15
16 #include "postgres.h"
17
18 #ifdef OPTIMIZER_DEBUG
19 #include "nodes/print.h"
20 #endif
21 #include "optimizer/clauses.h"
22 #include "optimizer/cost.h"
23 #include "optimizer/geqo.h"
24 #include "optimizer/pathnode.h"
25 #include "optimizer/paths.h"
26 #include "optimizer/plancat.h"
27 #include "optimizer/planner.h"
28 #include "optimizer/prep.h"
29 #include "optimizer/var.h"
30 #include "parser/parse_clause.h"
31 #include "parser/parse_expr.h"
32 #include "parser/parsetree.h"
33 #include "rewrite/rewriteManip.h"
34
35
36 /* These parameters are set by GUC */
37 bool            enable_geqo = false;    /* just in case GUC doesn't set it */
38 int                     geqo_threshold;
39
40 /* Hook for plugins to replace standard_join_search() */
41 join_search_hook_type join_search_hook = NULL;
42
43
44 static void set_base_rel_pathlists(PlannerInfo *root);
45 static void set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
46                                  Index rti, RangeTblEntry *rte);
47 static void set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
48                                            RangeTblEntry *rte);
49 static void set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
50                                                 Index rti, RangeTblEntry *rte);
51 static void set_dummy_rel_pathlist(RelOptInfo *rel);
52 static void set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
53                                           Index rti, RangeTblEntry *rte);
54 static void set_function_pathlist(PlannerInfo *root, RelOptInfo *rel,
55                                           RangeTblEntry *rte);
56 static void set_values_pathlist(PlannerInfo *root, RelOptInfo *rel,
57                                         RangeTblEntry *rte);
58 static RelOptInfo *make_rel_from_joinlist(PlannerInfo *root, List *joinlist);
59 static bool subquery_is_pushdown_safe(Query *subquery, Query *topquery,
60                                                   bool *differentTypes);
61 static bool recurse_pushdown_safe(Node *setOp, Query *topquery,
62                                           bool *differentTypes);
63 static void compare_tlist_datatypes(List *tlist, List *colTypes,
64                                                 bool *differentTypes);
65 static bool qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
66                                           bool *differentTypes);
67 static void subquery_push_qual(Query *subquery,
68                                    RangeTblEntry *rte, Index rti, Node *qual);
69 static void recurse_push_qual(Node *setOp, Query *topquery,
70                                   RangeTblEntry *rte, Index rti, Node *qual);
71
72
73 /*
74  * make_one_rel
75  *        Finds all possible access paths for executing a query, returning a
76  *        single rel that represents the join of all base rels in the query.
77  */
78 RelOptInfo *
79 make_one_rel(PlannerInfo *root, List *joinlist)
80 {
81         RelOptInfo *rel;
82
83         /*
84          * Generate access paths for the base rels.
85          */
86         set_base_rel_pathlists(root);
87
88         /*
89          * Generate access paths for the entire join tree.
90          */
91         rel = make_rel_from_joinlist(root, joinlist);
92
93         /*
94          * The result should join all and only the query's base rels.
95          */
96 #ifdef USE_ASSERT_CHECKING
97         {
98                 int                     num_base_rels = 0;
99                 Index           rti;
100
101                 for (rti = 1; rti < root->simple_rel_array_size; rti++)
102                 {
103                         RelOptInfo *brel = root->simple_rel_array[rti];
104
105                         if (brel == NULL)
106                                 continue;
107
108                         Assert(brel->relid == rti); /* sanity check on array */
109
110                         /* ignore RTEs that are "other rels" */
111                         if (brel->reloptkind != RELOPT_BASEREL)
112                                 continue;
113
114                         Assert(bms_is_member(rti, rel->relids));
115                         num_base_rels++;
116                 }
117
118                 Assert(bms_num_members(rel->relids) == num_base_rels);
119         }
120 #endif
121
122         return rel;
123 }
124
125 /*
126  * set_base_rel_pathlists
127  *        Finds all paths available for scanning each base-relation entry.
128  *        Sequential scan and any available indices are considered.
129  *        Each useful path is attached to its relation's 'pathlist' field.
130  */
131 static void
132 set_base_rel_pathlists(PlannerInfo *root)
133 {
134         Index           rti;
135
136         for (rti = 1; rti < root->simple_rel_array_size; rti++)
137         {
138                 RelOptInfo *rel = root->simple_rel_array[rti];
139
140                 /* there may be empty slots corresponding to non-baserel RTEs */
141                 if (rel == NULL)
142                         continue;
143
144                 Assert(rel->relid == rti);              /* sanity check on array */
145
146                 /* ignore RTEs that are "other rels" */
147                 if (rel->reloptkind != RELOPT_BASEREL)
148                         continue;
149
150                 set_rel_pathlist(root, rel, rti, root->simple_rte_array[rti]);
151         }
152 }
153
154 /*
155  * set_rel_pathlist
156  *        Build access paths for a base relation
157  */
158 static void
159 set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
160                                  Index rti, RangeTblEntry *rte)
161 {
162         if (rte->inh)
163         {
164                 /* It's an "append relation", process accordingly */
165                 set_append_rel_pathlist(root, rel, rti, rte);
166         }
167         else if (rel->rtekind == RTE_SUBQUERY)
168         {
169                 /* Subquery --- generate a separate plan for it */
170                 set_subquery_pathlist(root, rel, rti, rte);
171         }
172         else if (rel->rtekind == RTE_FUNCTION)
173         {
174                 /* RangeFunction --- generate a separate plan for it */
175                 set_function_pathlist(root, rel, rte);
176         }
177         else if (rel->rtekind == RTE_VALUES)
178         {
179                 /* Values list --- generate a separate plan for it */
180                 set_values_pathlist(root, rel, rte);
181         }
182         else
183         {
184                 /* Plain relation */
185                 Assert(rel->rtekind == RTE_RELATION);
186                 set_plain_rel_pathlist(root, rel, rte);
187         }
188
189 #ifdef OPTIMIZER_DEBUG
190         debug_print_rel(root, rel);
191 #endif
192 }
193
194 /*
195  * set_plain_rel_pathlist
196  *        Build access paths for a plain relation (no subquery, no inheritance)
197  */
198 static void
199 set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
200 {
201         /*
202          * If we can prove we don't need to scan the rel via constraint exclusion,
203          * set up a single dummy path for it.  We only need to check for regular
204          * baserels; if it's an otherrel, CE was already checked in
205          * set_append_rel_pathlist().
206          */
207         if (rel->reloptkind == RELOPT_BASEREL &&
208                 relation_excluded_by_constraints(rel, rte))
209         {
210                 set_dummy_rel_pathlist(rel);
211                 return;
212         }
213
214         /* Mark rel with estimated output rows, width, etc */
215         set_baserel_size_estimates(root, rel);
216
217         /* Test any partial indexes of rel for applicability */
218         check_partial_indexes(root, rel);
219
220         /*
221          * Check to see if we can extract any restriction conditions from join
222          * quals that are OR-of-AND structures.  If so, add them to the rel's
223          * restriction list, and recompute the size estimates.
224          */
225         if (create_or_index_quals(root, rel))
226                 set_baserel_size_estimates(root, rel);
227
228         /*
229          * Generate paths and add them to the rel's pathlist.
230          *
231          * Note: add_path() will discard any paths that are dominated by another
232          * available path, keeping only those paths that are superior along at
233          * least one dimension of cost or sortedness.
234          */
235
236         /* Consider sequential scan */
237         add_path(rel, create_seqscan_path(root, rel));
238
239         /* Consider index scans */
240         create_index_paths(root, rel);
241
242         /* Consider TID scans */
243         create_tidscan_paths(root, rel);
244
245         /* Now find the cheapest of the paths for this rel */
246         set_cheapest(rel);
247 }
248
249 /*
250  * set_append_rel_pathlist
251  *        Build access paths for an "append relation"
252  *
253  * The passed-in rel and RTE represent the entire append relation.      The
254  * relation's contents are computed by appending together the output of
255  * the individual member relations.  Note that in the inheritance case,
256  * the first member relation is actually the same table as is mentioned in
257  * the parent RTE ... but it has a different RTE and RelOptInfo.  This is
258  * a good thing because their outputs are not the same size.
259  */
260 static void
261 set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
262                                                 Index rti, RangeTblEntry *rte)
263 {
264         int                     parentRTindex = rti;
265         List       *subpaths = NIL;
266         ListCell   *l;
267
268         /*
269          * XXX for now, can't handle inherited expansion of FOR UPDATE/SHARE; can
270          * we do better?  (This will take some redesign because the executor
271          * currently supposes that every rowMark relation is involved in every row
272          * returned by the query.)
273          */
274         if (get_rowmark(root->parse, parentRTindex))
275                 ereport(ERROR,
276                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
277                                  errmsg("SELECT FOR UPDATE/SHARE is not supported for inheritance queries")));
278
279         /*
280          * Initialize to compute size estimates for whole append relation
281          */
282         rel->rows = 0;
283         rel->width = 0;
284
285         /*
286          * Generate access paths for each member relation, and pick the cheapest
287          * path for each one.
288          */
289         foreach(l, root->append_rel_list)
290         {
291                 AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l);
292                 int                     childRTindex;
293                 RangeTblEntry *childRTE;
294                 RelOptInfo *childrel;
295                 Path       *childpath;
296                 ListCell   *parentvars;
297                 ListCell   *childvars;
298
299                 /* append_rel_list contains all append rels; ignore others */
300                 if (appinfo->parent_relid != parentRTindex)
301                         continue;
302
303                 childRTindex = appinfo->child_relid;
304                 childRTE = root->simple_rte_array[childRTindex];
305
306                 /*
307                  * The child rel's RelOptInfo was already created during
308                  * add_base_rels_to_query.
309                  */
310                 childrel = find_base_rel(root, childRTindex);
311                 Assert(childrel->reloptkind == RELOPT_OTHER_MEMBER_REL);
312
313                 /*
314                  * We have to copy the parent's targetlist and quals to the child,
315                  * with appropriate substitution of variables.  However, only the
316                  * baserestrictinfo quals are needed before we can check for
317                  * constraint exclusion; so do that first and then check to see if we
318                  * can disregard this child.
319                  */
320                 childrel->baserestrictinfo = (List *)
321                         adjust_appendrel_attrs((Node *) rel->baserestrictinfo,
322                                                                    appinfo);
323
324                 if (relation_excluded_by_constraints(childrel, childRTE))
325                 {
326                         /*
327                          * This child need not be scanned, so we can omit it from the
328                          * appendrel.  Mark it with a dummy cheapest-path though, in case
329                          * best_appendrel_indexscan() looks at it later.
330                          */
331                         set_dummy_rel_pathlist(childrel);
332                         continue;
333                 }
334
335                 /* CE failed, so finish copying targetlist and join quals */
336                 childrel->joininfo = (List *)
337                         adjust_appendrel_attrs((Node *) rel->joininfo,
338                                                                    appinfo);
339                 childrel->reltargetlist = (List *)
340                         adjust_appendrel_attrs((Node *) rel->reltargetlist,
341                                                                    appinfo);
342
343                 /*
344                  * We have to make child entries in the EquivalenceClass data
345                  * structures as well.
346                  */
347                 if (rel->has_eclass_joins)
348                 {
349                         add_child_rel_equivalences(root, appinfo, rel, childrel);
350                         childrel->has_eclass_joins = true;
351                 }
352
353                 /*
354                  * Copy the parent's attr_needed data as well, with appropriate
355                  * adjustment of relids and attribute numbers.
356                  */
357                 pfree(childrel->attr_needed);
358                 childrel->attr_needed =
359                         adjust_appendrel_attr_needed(rel, appinfo,
360                                                                                  childrel->min_attr,
361                                                                                  childrel->max_attr);
362
363                 /*
364                  * Compute the child's access paths, and add the cheapest one to the
365                  * Append path we are constructing for the parent.
366                  *
367                  * It's possible that the child is itself an appendrel, in which case
368                  * we can "cut out the middleman" and just add its child paths to our
369                  * own list.  (We don't try to do this earlier because we need to
370                  * apply both levels of transformation to the quals.)
371                  */
372                 set_rel_pathlist(root, childrel, childRTindex, childRTE);
373
374                 childpath = childrel->cheapest_total_path;
375                 if (IsA(childpath, AppendPath))
376                         subpaths = list_concat(subpaths,
377                                                                    ((AppendPath *) childpath)->subpaths);
378                 else
379                         subpaths = lappend(subpaths, childpath);
380
381                 /*
382                  * Propagate size information from the child back to the parent. For
383                  * simplicity, we use the largest widths from any child as the parent
384                  * estimates.  (If you want to change this, beware of child
385                  * attr_widths[] entries that haven't been set and are still 0.)
386                  */
387                 rel->rows += childrel->rows;
388                 if (childrel->width > rel->width)
389                         rel->width = childrel->width;
390
391                 forboth(parentvars, rel->reltargetlist,
392                                 childvars, childrel->reltargetlist)
393                 {
394                         Var                *parentvar = (Var *) lfirst(parentvars);
395                         Var                *childvar = (Var *) lfirst(childvars);
396
397                         if (IsA(parentvar, Var) &&
398                                 IsA(childvar, Var))
399                         {
400                                 int                     pndx = parentvar->varattno - rel->min_attr;
401                                 int                     cndx = childvar->varattno - childrel->min_attr;
402
403                                 if (childrel->attr_widths[cndx] > rel->attr_widths[pndx])
404                                         rel->attr_widths[pndx] = childrel->attr_widths[cndx];
405                         }
406                 }
407         }
408
409         /*
410          * Set "raw tuples" count equal to "rows" for the appendrel; needed
411          * because some places assume rel->tuples is valid for any baserel.
412          */
413         rel->tuples = rel->rows;
414
415         /*
416          * Finally, build Append path and install it as the only access path for
417          * the parent rel.      (Note: this is correct even if we have zero or one
418          * live subpath due to constraint exclusion.)
419          */
420         add_path(rel, (Path *) create_append_path(rel, subpaths));
421
422         /* Select cheapest path (pretty easy in this case...) */
423         set_cheapest(rel);
424 }
425
426 /*
427  * set_dummy_rel_pathlist
428  *        Build a dummy path for a relation that's been excluded by constraints
429  *
430  * Rather than inventing a special "dummy" path type, we represent this as an
431  * AppendPath with no members (see also IS_DUMMY_PATH macro).
432  */
433 static void
434 set_dummy_rel_pathlist(RelOptInfo *rel)
435 {
436         /* Set dummy size estimates --- we leave attr_widths[] as zeroes */
437         rel->rows = 0;
438         rel->width = 0;
439
440         add_path(rel, (Path *) create_append_path(rel, NIL));
441
442         /* Select cheapest path (pretty easy in this case...) */
443         set_cheapest(rel);
444 }
445
446 /* quick-and-dirty test to see if any joining is needed */
447 static bool
448 has_multiple_baserels(PlannerInfo *root)
449 {
450         int                     num_base_rels = 0;
451         Index           rti;
452
453         for (rti = 1; rti < root->simple_rel_array_size; rti++)
454         {
455                 RelOptInfo *brel = root->simple_rel_array[rti];
456
457                 if (brel == NULL)
458                         continue;
459
460                 /* ignore RTEs that are "other rels" */
461                 if (brel->reloptkind == RELOPT_BASEREL)
462                         if (++num_base_rels > 1)
463                                 return true;
464         }
465         return false;
466 }
467
468 /*
469  * set_subquery_pathlist
470  *              Build the (single) access path for a subquery RTE
471  */
472 static void
473 set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
474                                           Index rti, RangeTblEntry *rte)
475 {
476         Query      *parse = root->parse;
477         Query      *subquery = rte->subquery;
478         bool       *differentTypes;
479         double          tuple_fraction;
480         PlannerInfo *subroot;
481         List       *pathkeys;
482
483         /* We need a workspace for keeping track of set-op type coercions */
484         differentTypes = (bool *)
485                 palloc0((list_length(subquery->targetList) + 1) * sizeof(bool));
486
487         /*
488          * If there are any restriction clauses that have been attached to the
489          * subquery relation, consider pushing them down to become WHERE or HAVING
490          * quals of the subquery itself.  This transformation is useful because it
491          * may allow us to generate a better plan for the subquery than evaluating
492          * all the subquery output rows and then filtering them.
493          *
494          * There are several cases where we cannot push down clauses. Restrictions
495          * involving the subquery are checked by subquery_is_pushdown_safe().
496          * Restrictions on individual clauses are checked by
497          * qual_is_pushdown_safe().  Also, we don't want to push down
498          * pseudoconstant clauses; better to have the gating node above the
499          * subquery.
500          *
501          * Non-pushed-down clauses will get evaluated as qpquals of the
502          * SubqueryScan node.
503          *
504          * XXX Are there any cases where we want to make a policy decision not to
505          * push down a pushable qual, because it'd result in a worse plan?
506          */
507         if (rel->baserestrictinfo != NIL &&
508                 subquery_is_pushdown_safe(subquery, subquery, differentTypes))
509         {
510                 /* OK to consider pushing down individual quals */
511                 List       *upperrestrictlist = NIL;
512                 ListCell   *l;
513
514                 foreach(l, rel->baserestrictinfo)
515                 {
516                         RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
517                         Node       *clause = (Node *) rinfo->clause;
518
519                         if (!rinfo->pseudoconstant &&
520                                 qual_is_pushdown_safe(subquery, rti, clause, differentTypes))
521                         {
522                                 /* Push it down */
523                                 subquery_push_qual(subquery, rte, rti, clause);
524                         }
525                         else
526                         {
527                                 /* Keep it in the upper query */
528                                 upperrestrictlist = lappend(upperrestrictlist, rinfo);
529                         }
530                 }
531                 rel->baserestrictinfo = upperrestrictlist;
532         }
533
534         pfree(differentTypes);
535
536         /*
537          * We can safely pass the outer tuple_fraction down to the subquery if the
538          * outer level has no joining, aggregation, or sorting to do. Otherwise
539          * we'd better tell the subquery to plan for full retrieval. (XXX This
540          * could probably be made more intelligent ...)
541          */
542         if (parse->hasAggs ||
543                 parse->groupClause ||
544                 parse->havingQual ||
545                 parse->distinctClause ||
546                 parse->sortClause ||
547                 has_multiple_baserels(root))
548                 tuple_fraction = 0.0;   /* default case */
549         else
550                 tuple_fraction = root->tuple_fraction;
551
552         /* Generate the plan for the subquery */
553         rel->subplan = subquery_planner(root->glob, subquery,
554                                                                         root->query_level + 1,
555                                                                         tuple_fraction,
556                                                                         &subroot);
557         rel->subrtable = subroot->parse->rtable;
558
559         /* Copy number of output rows from subplan */
560         rel->tuples = rel->subplan->plan_rows;
561
562         /* Mark rel with estimated output rows, width, etc */
563         set_baserel_size_estimates(root, rel);
564
565         /* Convert subquery pathkeys to outer representation */
566         pathkeys = convert_subquery_pathkeys(root, rel, subroot->query_pathkeys);
567
568         /* Generate appropriate path */
569         add_path(rel, create_subqueryscan_path(rel, pathkeys));
570
571         /* Select cheapest path (pretty easy in this case...) */
572         set_cheapest(rel);
573 }
574
575 /*
576  * set_function_pathlist
577  *              Build the (single) access path for a function RTE
578  */
579 static void
580 set_function_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
581 {
582         /* Mark rel with estimated output rows, width, etc */
583         set_function_size_estimates(root, rel);
584
585         /* Generate appropriate path */
586         add_path(rel, create_functionscan_path(root, rel));
587
588         /* Select cheapest path (pretty easy in this case...) */
589         set_cheapest(rel);
590 }
591
592 /*
593  * set_values_pathlist
594  *              Build the (single) access path for a VALUES RTE
595  */
596 static void
597 set_values_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
598 {
599         /* Mark rel with estimated output rows, width, etc */
600         set_values_size_estimates(root, rel);
601
602         /* Generate appropriate path */
603         add_path(rel, create_valuesscan_path(root, rel));
604
605         /* Select cheapest path (pretty easy in this case...) */
606         set_cheapest(rel);
607 }
608
609 /*
610  * make_rel_from_joinlist
611  *        Build access paths using a "joinlist" to guide the join path search.
612  *
613  * See comments for deconstruct_jointree() for definition of the joinlist
614  * data structure.
615  */
616 static RelOptInfo *
617 make_rel_from_joinlist(PlannerInfo *root, List *joinlist)
618 {
619         int                     levels_needed;
620         List       *initial_rels;
621         ListCell   *jl;
622
623         /*
624          * Count the number of child joinlist nodes.  This is the depth of the
625          * dynamic-programming algorithm we must employ to consider all ways of
626          * joining the child nodes.
627          */
628         levels_needed = list_length(joinlist);
629
630         if (levels_needed <= 0)
631                 return NULL;                    /* nothing to do? */
632
633         /*
634          * Construct a list of rels corresponding to the child joinlist nodes.
635          * This may contain both base rels and rels constructed according to
636          * sub-joinlists.
637          */
638         initial_rels = NIL;
639         foreach(jl, joinlist)
640         {
641                 Node       *jlnode = (Node *) lfirst(jl);
642                 RelOptInfo *thisrel;
643
644                 if (IsA(jlnode, RangeTblRef))
645                 {
646                         int                     varno = ((RangeTblRef *) jlnode)->rtindex;
647
648                         thisrel = find_base_rel(root, varno);
649                 }
650                 else if (IsA(jlnode, List))
651                 {
652                         /* Recurse to handle subproblem */
653                         thisrel = make_rel_from_joinlist(root, (List *) jlnode);
654                 }
655                 else
656                 {
657                         elog(ERROR, "unrecognized joinlist node type: %d",
658                                  (int) nodeTag(jlnode));
659                         thisrel = NULL;         /* keep compiler quiet */
660                 }
661
662                 initial_rels = lappend(initial_rels, thisrel);
663         }
664
665         if (levels_needed == 1)
666         {
667                 /*
668                  * Single joinlist node, so we're done.
669                  */
670                 return (RelOptInfo *) linitial(initial_rels);
671         }
672         else
673         {
674                 /*
675                  * Consider the different orders in which we could join the rels,
676                  * using a plugin, GEQO, or the regular join search code.
677                  *
678                  * We put the initial_rels list into a PlannerInfo field because
679                  * has_legal_joinclause() needs to look at it (ugly :-().
680                  */
681                 root->initial_rels = initial_rels;
682
683                 if (join_search_hook)
684                         return (*join_search_hook) (root, levels_needed, initial_rels);
685                 else if (enable_geqo && levels_needed >= geqo_threshold)
686                         return geqo(root, levels_needed, initial_rels);
687                 else
688                         return standard_join_search(root, levels_needed, initial_rels);
689         }
690 }
691
692 /*
693  * standard_join_search
694  *        Find possible joinpaths for a query by successively finding ways
695  *        to join component relations into join relations.
696  *
697  * 'levels_needed' is the number of iterations needed, ie, the number of
698  *              independent jointree items in the query.  This is > 1.
699  *
700  * 'initial_rels' is a list of RelOptInfo nodes for each independent
701  *              jointree item.  These are the components to be joined together.
702  *              Note that levels_needed == list_length(initial_rels).
703  *
704  * Returns the final level of join relations, i.e., the relation that is
705  * the result of joining all the original relations together.
706  * At least one implementation path must be provided for this relation and
707  * all required sub-relations.
708  *
709  * To support loadable plugins that modify planner behavior by changing the
710  * join searching algorithm, we provide a hook variable that lets a plugin
711  * replace or supplement this function.  Any such hook must return the same
712  * final join relation as the standard code would, but it might have a
713  * different set of implementation paths attached, and only the sub-joinrels
714  * needed for these paths need have been instantiated.
715  *
716  * Note to plugin authors: the functions invoked during standard_join_search()
717  * modify root->join_rel_list and root->join_rel_hash.  If you want to do more
718  * than one join-order search, you'll probably need to save and restore the
719  * original states of those data structures.  See geqo_eval() for an example.
720  */
721 RelOptInfo *
722 standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels)
723 {
724         List      **joinitems;
725         int                     lev;
726         RelOptInfo *rel;
727
728         /*
729          * We employ a simple "dynamic programming" algorithm: we first find all
730          * ways to build joins of two jointree items, then all ways to build joins
731          * of three items (from two-item joins and single items), then four-item
732          * joins, and so on until we have considered all ways to join all the
733          * items into one rel.
734          *
735          * joinitems[j] is a list of all the j-item rels.  Initially we set
736          * joinitems[1] to represent all the single-jointree-item relations.
737          */
738         joinitems = (List **) palloc0((levels_needed + 1) * sizeof(List *));
739
740         joinitems[1] = initial_rels;
741
742         for (lev = 2; lev <= levels_needed; lev++)
743         {
744                 ListCell   *x;
745
746                 /*
747                  * Determine all possible pairs of relations to be joined at this
748                  * level, and build paths for making each one from every available
749                  * pair of lower-level relations.
750                  */
751                 joinitems[lev] = join_search_one_level(root, lev, joinitems);
752
753                 /*
754                  * Do cleanup work on each just-processed rel.
755                  */
756                 foreach(x, joinitems[lev])
757                 {
758                         rel = (RelOptInfo *) lfirst(x);
759
760                         /* Find and save the cheapest paths for this rel */
761                         set_cheapest(rel);
762
763 #ifdef OPTIMIZER_DEBUG
764                         debug_print_rel(root, rel);
765 #endif
766                 }
767         }
768
769         /*
770          * We should have a single rel at the final level.
771          */
772         if (joinitems[levels_needed] == NIL)
773                 elog(ERROR, "failed to build any %d-way joins", levels_needed);
774         Assert(list_length(joinitems[levels_needed]) == 1);
775
776         rel = (RelOptInfo *) linitial(joinitems[levels_needed]);
777
778         return rel;
779 }
780
781 /*****************************************************************************
782  *                      PUSHING QUALS DOWN INTO SUBQUERIES
783  *****************************************************************************/
784
785 /*
786  * subquery_is_pushdown_safe - is a subquery safe for pushing down quals?
787  *
788  * subquery is the particular component query being checked.  topquery
789  * is the top component of a set-operations tree (the same Query if no
790  * set-op is involved).
791  *
792  * Conditions checked here:
793  *
794  * 1. If the subquery has a LIMIT clause, we must not push down any quals,
795  * since that could change the set of rows returned.
796  *
797  * 2. If the subquery contains EXCEPT or EXCEPT ALL set ops we cannot push
798  * quals into it, because that would change the results.
799  *
800  * 3. For subqueries using UNION/UNION ALL/INTERSECT/INTERSECT ALL, we can
801  * push quals into each component query, but the quals can only reference
802  * subquery columns that suffer no type coercions in the set operation.
803  * Otherwise there are possible semantic gotchas.  So, we check the
804  * component queries to see if any of them have different output types;
805  * differentTypes[k] is set true if column k has different type in any
806  * component.
807  */
808 static bool
809 subquery_is_pushdown_safe(Query *subquery, Query *topquery,
810                                                   bool *differentTypes)
811 {
812         SetOperationStmt *topop;
813
814         /* Check point 1 */
815         if (subquery->limitOffset != NULL || subquery->limitCount != NULL)
816                 return false;
817
818         /* Are we at top level, or looking at a setop component? */
819         if (subquery == topquery)
820         {
821                 /* Top level, so check any component queries */
822                 if (subquery->setOperations != NULL)
823                         if (!recurse_pushdown_safe(subquery->setOperations, topquery,
824                                                                            differentTypes))
825                                 return false;
826         }
827         else
828         {
829                 /* Setop component must not have more components (too weird) */
830                 if (subquery->setOperations != NULL)
831                         return false;
832                 /* Check whether setop component output types match top level */
833                 topop = (SetOperationStmt *) topquery->setOperations;
834                 Assert(topop && IsA(topop, SetOperationStmt));
835                 compare_tlist_datatypes(subquery->targetList,
836                                                                 topop->colTypes,
837                                                                 differentTypes);
838         }
839         return true;
840 }
841
842 /*
843  * Helper routine to recurse through setOperations tree
844  */
845 static bool
846 recurse_pushdown_safe(Node *setOp, Query *topquery,
847                                           bool *differentTypes)
848 {
849         if (IsA(setOp, RangeTblRef))
850         {
851                 RangeTblRef *rtr = (RangeTblRef *) setOp;
852                 RangeTblEntry *rte = rt_fetch(rtr->rtindex, topquery->rtable);
853                 Query      *subquery = rte->subquery;
854
855                 Assert(subquery != NULL);
856                 return subquery_is_pushdown_safe(subquery, topquery, differentTypes);
857         }
858         else if (IsA(setOp, SetOperationStmt))
859         {
860                 SetOperationStmt *op = (SetOperationStmt *) setOp;
861
862                 /* EXCEPT is no good */
863                 if (op->op == SETOP_EXCEPT)
864                         return false;
865                 /* Else recurse */
866                 if (!recurse_pushdown_safe(op->larg, topquery, differentTypes))
867                         return false;
868                 if (!recurse_pushdown_safe(op->rarg, topquery, differentTypes))
869                         return false;
870         }
871         else
872         {
873                 elog(ERROR, "unrecognized node type: %d",
874                          (int) nodeTag(setOp));
875         }
876         return true;
877 }
878
879 /*
880  * Compare tlist's datatypes against the list of set-operation result types.
881  * For any items that are different, mark the appropriate element of
882  * differentTypes[] to show that this column will have type conversions.
883  *
884  * We don't have to care about typmods here: the only allowed difference
885  * between set-op input and output typmods is input is a specific typmod
886  * and output is -1, and that does not require a coercion.
887  */
888 static void
889 compare_tlist_datatypes(List *tlist, List *colTypes,
890                                                 bool *differentTypes)
891 {
892         ListCell   *l;
893         ListCell   *colType = list_head(colTypes);
894
895         foreach(l, tlist)
896         {
897                 TargetEntry *tle = (TargetEntry *) lfirst(l);
898
899                 if (tle->resjunk)
900                         continue;                       /* ignore resjunk columns */
901                 if (colType == NULL)
902                         elog(ERROR, "wrong number of tlist entries");
903                 if (exprType((Node *) tle->expr) != lfirst_oid(colType))
904                         differentTypes[tle->resno] = true;
905                 colType = lnext(colType);
906         }
907         if (colType != NULL)
908                 elog(ERROR, "wrong number of tlist entries");
909 }
910
911 /*
912  * qual_is_pushdown_safe - is a particular qual safe to push down?
913  *
914  * qual is a restriction clause applying to the given subquery (whose RTE
915  * has index rti in the parent query).
916  *
917  * Conditions checked here:
918  *
919  * 1. The qual must not contain any subselects (mainly because I'm not sure
920  * it will work correctly: sublinks will already have been transformed into
921  * subplans in the qual, but not in the subquery).
922  *
923  * 2. The qual must not refer to the whole-row output of the subquery
924  * (since there is no easy way to name that within the subquery itself).
925  *
926  * 3. The qual must not refer to any subquery output columns that were
927  * found to have inconsistent types across a set operation tree by
928  * subquery_is_pushdown_safe().
929  *
930  * 4. If the subquery uses DISTINCT ON, we must not push down any quals that
931  * refer to non-DISTINCT output columns, because that could change the set
932  * of rows returned.  This condition is vacuous for DISTINCT, because then
933  * there are no non-DISTINCT output columns, but unfortunately it's fairly
934  * expensive to tell the difference between DISTINCT and DISTINCT ON in the
935  * parsetree representation.  It's cheaper to just make sure all the Vars
936  * in the qual refer to DISTINCT columns.
937  *
938  * 5. We must not push down any quals that refer to subselect outputs that
939  * return sets, else we'd introduce functions-returning-sets into the
940  * subquery's WHERE/HAVING quals.
941  *
942  * 6. We must not push down any quals that refer to subselect outputs that
943  * contain volatile functions, for fear of introducing strange results due
944  * to multiple evaluation of a volatile function.
945  */
946 static bool
947 qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
948                                           bool *differentTypes)
949 {
950         bool            safe = true;
951         List       *vars;
952         ListCell   *vl;
953         Bitmapset  *tested = NULL;
954
955         /* Refuse subselects (point 1) */
956         if (contain_subplans(qual))
957                 return false;
958
959         /*
960          * Examine all Vars used in clause; since it's a restriction clause, all
961          * such Vars must refer to subselect output columns.
962          */
963         vars = pull_var_clause(qual, false);
964         foreach(vl, vars)
965         {
966                 Var                *var = (Var *) lfirst(vl);
967                 TargetEntry *tle;
968
969                 Assert(var->varno == rti);
970
971                 /* Check point 2 */
972                 if (var->varattno == 0)
973                 {
974                         safe = false;
975                         break;
976                 }
977
978                 /*
979                  * We use a bitmapset to avoid testing the same attno more than once.
980                  * (NB: this only works because subquery outputs can't have negative
981                  * attnos.)
982                  */
983                 if (bms_is_member(var->varattno, tested))
984                         continue;
985                 tested = bms_add_member(tested, var->varattno);
986
987                 /* Check point 3 */
988                 if (differentTypes[var->varattno])
989                 {
990                         safe = false;
991                         break;
992                 }
993
994                 /* Must find the tlist element referenced by the Var */
995                 tle = get_tle_by_resno(subquery->targetList, var->varattno);
996                 Assert(tle != NULL);
997                 Assert(!tle->resjunk);
998
999                 /* If subquery uses DISTINCT or DISTINCT ON, check point 4 */
1000                 if (subquery->distinctClause != NIL &&
1001                         !targetIsInSortList(tle, InvalidOid, subquery->distinctClause))
1002                 {
1003                         /* non-DISTINCT column, so fail */
1004                         safe = false;
1005                         break;
1006                 }
1007
1008                 /* Refuse functions returning sets (point 5) */
1009                 if (expression_returns_set((Node *) tle->expr))
1010                 {
1011                         safe = false;
1012                         break;
1013                 }
1014
1015                 /* Refuse volatile functions (point 6) */
1016                 if (contain_volatile_functions((Node *) tle->expr))
1017                 {
1018                         safe = false;
1019                         break;
1020                 }
1021         }
1022
1023         list_free(vars);
1024         bms_free(tested);
1025
1026         return safe;
1027 }
1028
1029 /*
1030  * subquery_push_qual - push down a qual that we have determined is safe
1031  */
1032 static void
1033 subquery_push_qual(Query *subquery, RangeTblEntry *rte, Index rti, Node *qual)
1034 {
1035         if (subquery->setOperations != NULL)
1036         {
1037                 /* Recurse to push it separately to each component query */
1038                 recurse_push_qual(subquery->setOperations, subquery,
1039                                                   rte, rti, qual);
1040         }
1041         else
1042         {
1043                 /*
1044                  * We need to replace Vars in the qual (which must refer to outputs of
1045                  * the subquery) with copies of the subquery's targetlist expressions.
1046                  * Note that at this point, any uplevel Vars in the qual should have
1047                  * been replaced with Params, so they need no work.
1048                  *
1049                  * This step also ensures that when we are pushing into a setop tree,
1050                  * each component query gets its own copy of the qual.
1051                  */
1052                 qual = ResolveNew(qual, rti, 0, rte,
1053                                                   subquery->targetList,
1054                                                   CMD_SELECT, 0);
1055
1056                 /*
1057                  * Now attach the qual to the proper place: normally WHERE, but if the
1058                  * subquery uses grouping or aggregation, put it in HAVING (since the
1059                  * qual really refers to the group-result rows).
1060                  */
1061                 if (subquery->hasAggs || subquery->groupClause || subquery->havingQual)
1062                         subquery->havingQual = make_and_qual(subquery->havingQual, qual);
1063                 else
1064                         subquery->jointree->quals =
1065                                 make_and_qual(subquery->jointree->quals, qual);
1066
1067                 /*
1068                  * We need not change the subquery's hasAggs or hasSublinks flags,
1069                  * since we can't be pushing down any aggregates that weren't there
1070                  * before, and we don't push down subselects at all.
1071                  */
1072         }
1073 }
1074
1075 /*
1076  * Helper routine to recurse through setOperations tree
1077  */
1078 static void
1079 recurse_push_qual(Node *setOp, Query *topquery,
1080                                   RangeTblEntry *rte, Index rti, Node *qual)
1081 {
1082         if (IsA(setOp, RangeTblRef))
1083         {
1084                 RangeTblRef *rtr = (RangeTblRef *) setOp;
1085                 RangeTblEntry *subrte = rt_fetch(rtr->rtindex, topquery->rtable);
1086                 Query      *subquery = subrte->subquery;
1087
1088                 Assert(subquery != NULL);
1089                 subquery_push_qual(subquery, rte, rti, qual);
1090         }
1091         else if (IsA(setOp, SetOperationStmt))
1092         {
1093                 SetOperationStmt *op = (SetOperationStmt *) setOp;
1094
1095                 recurse_push_qual(op->larg, topquery, rte, rti, qual);
1096                 recurse_push_qual(op->rarg, topquery, rte, rti, qual);
1097         }
1098         else
1099         {
1100                 elog(ERROR, "unrecognized node type: %d",
1101                          (int) nodeTag(setOp));
1102         }
1103 }
1104
1105 /*****************************************************************************
1106  *                      DEBUG SUPPORT
1107  *****************************************************************************/
1108
1109 #ifdef OPTIMIZER_DEBUG
1110
1111 static void
1112 print_relids(Relids relids)
1113 {
1114         Relids          tmprelids;
1115         int                     x;
1116         bool            first = true;
1117
1118         tmprelids = bms_copy(relids);
1119         while ((x = bms_first_member(tmprelids)) >= 0)
1120         {
1121                 if (!first)
1122                         printf(" ");
1123                 printf("%d", x);
1124                 first = false;
1125         }
1126         bms_free(tmprelids);
1127 }
1128
1129 static void
1130 print_restrictclauses(PlannerInfo *root, List *clauses)
1131 {
1132         ListCell   *l;
1133
1134         foreach(l, clauses)
1135         {
1136                 RestrictInfo *c = lfirst(l);
1137
1138                 print_expr((Node *) c->clause, root->parse->rtable);
1139                 if (lnext(l))
1140                         printf(", ");
1141         }
1142 }
1143
1144 static void
1145 print_path(PlannerInfo *root, Path *path, int indent)
1146 {
1147         const char *ptype;
1148         bool            join = false;
1149         Path       *subpath = NULL;
1150         int                     i;
1151
1152         switch (nodeTag(path))
1153         {
1154                 case T_Path:
1155                         ptype = "SeqScan";
1156                         break;
1157                 case T_IndexPath:
1158                         ptype = "IdxScan";
1159                         break;
1160                 case T_BitmapHeapPath:
1161                         ptype = "BitmapHeapScan";
1162                         break;
1163                 case T_BitmapAndPath:
1164                         ptype = "BitmapAndPath";
1165                         break;
1166                 case T_BitmapOrPath:
1167                         ptype = "BitmapOrPath";
1168                         break;
1169                 case T_TidPath:
1170                         ptype = "TidScan";
1171                         break;
1172                 case T_AppendPath:
1173                         ptype = "Append";
1174                         break;
1175                 case T_ResultPath:
1176                         ptype = "Result";
1177                         break;
1178                 case T_MaterialPath:
1179                         ptype = "Material";
1180                         subpath = ((MaterialPath *) path)->subpath;
1181                         break;
1182                 case T_UniquePath:
1183                         ptype = "Unique";
1184                         subpath = ((UniquePath *) path)->subpath;
1185                         break;
1186                 case T_NestPath:
1187                         ptype = "NestLoop";
1188                         join = true;
1189                         break;
1190                 case T_MergePath:
1191                         ptype = "MergeJoin";
1192                         join = true;
1193                         break;
1194                 case T_HashPath:
1195                         ptype = "HashJoin";
1196                         join = true;
1197                         break;
1198                 default:
1199                         ptype = "???Path";
1200                         break;
1201         }
1202
1203         for (i = 0; i < indent; i++)
1204                 printf("\t");
1205         printf("%s", ptype);
1206
1207         if (path->parent)
1208         {
1209                 printf("(");
1210                 print_relids(path->parent->relids);
1211                 printf(") rows=%.0f", path->parent->rows);
1212         }
1213         printf(" cost=%.2f..%.2f\n", path->startup_cost, path->total_cost);
1214
1215         if (path->pathkeys)
1216         {
1217                 for (i = 0; i < indent; i++)
1218                         printf("\t");
1219                 printf("  pathkeys: ");
1220                 print_pathkeys(path->pathkeys, root->parse->rtable);
1221         }
1222
1223         if (join)
1224         {
1225                 JoinPath   *jp = (JoinPath *) path;
1226
1227                 for (i = 0; i < indent; i++)
1228                         printf("\t");
1229                 printf("  clauses: ");
1230                 print_restrictclauses(root, jp->joinrestrictinfo);
1231                 printf("\n");
1232
1233                 if (IsA(path, MergePath))
1234                 {
1235                         MergePath  *mp = (MergePath *) path;
1236
1237                         if (mp->outersortkeys || mp->innersortkeys)
1238                         {
1239                                 for (i = 0; i < indent; i++)
1240                                         printf("\t");
1241                                 printf("  sortouter=%d sortinner=%d\n",
1242                                            ((mp->outersortkeys) ? 1 : 0),
1243                                            ((mp->innersortkeys) ? 1 : 0));
1244                         }
1245                 }
1246
1247                 print_path(root, jp->outerjoinpath, indent + 1);
1248                 print_path(root, jp->innerjoinpath, indent + 1);
1249         }
1250
1251         if (subpath)
1252                 print_path(root, subpath, indent + 1);
1253 }
1254
1255 void
1256 debug_print_rel(PlannerInfo *root, RelOptInfo *rel)
1257 {
1258         ListCell   *l;
1259
1260         printf("RELOPTINFO (");
1261         print_relids(rel->relids);
1262         printf("): rows=%.0f width=%d\n", rel->rows, rel->width);
1263
1264         if (rel->baserestrictinfo)
1265         {
1266                 printf("\tbaserestrictinfo: ");
1267                 print_restrictclauses(root, rel->baserestrictinfo);
1268                 printf("\n");
1269         }
1270
1271         if (rel->joininfo)
1272         {
1273                 printf("\tjoininfo: ");
1274                 print_restrictclauses(root, rel->joininfo);
1275                 printf("\n");
1276         }
1277
1278         printf("\tpath list:\n");
1279         foreach(l, rel->pathlist)
1280                 print_path(root, lfirst(l), 1);
1281         printf("\n\tcheapest startup path:\n");
1282         print_path(root, rel->cheapest_startup_path, 1);
1283         printf("\n\tcheapest total path:\n");
1284         print_path(root, rel->cheapest_total_path, 1);
1285         printf("\n");
1286         fflush(stdout);
1287 }
1288
1289 #endif   /* OPTIMIZER_DEBUG */