]> granicus.if.org Git - postgresql/blob - src/backend/optimizer/path/allpaths.c
Some further performance tweaks for planning large inheritance trees that
[postgresql] / src / backend / optimizer / path / allpaths.c
1 /*-------------------------------------------------------------------------
2  *
3  * allpaths.c
4  *        Routines to find possible search paths for processing a query
5  *
6  * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *        $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.163 2007/04/21 21:01:44 tgl Exp $
12  *
13  *-------------------------------------------------------------------------
14  */
15
16 #include "postgres.h"
17
18 #ifdef OPTIMIZER_DEBUG
19 #include "nodes/print.h"
20 #endif
21 #include "optimizer/clauses.h"
22 #include "optimizer/cost.h"
23 #include "optimizer/geqo.h"
24 #include "optimizer/pathnode.h"
25 #include "optimizer/paths.h"
26 #include "optimizer/plancat.h"
27 #include "optimizer/planner.h"
28 #include "optimizer/prep.h"
29 #include "optimizer/var.h"
30 #include "parser/parse_clause.h"
31 #include "parser/parse_expr.h"
32 #include "parser/parsetree.h"
33 #include "rewrite/rewriteManip.h"
34
35
36 /* These parameters are set by GUC */
37 bool            enable_geqo = false;    /* just in case GUC doesn't set it */
38 int                     geqo_threshold;
39
40
41 static void set_base_rel_pathlists(PlannerInfo *root);
42 static void set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
43                                                          Index rti, RangeTblEntry *rte);
44 static void set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
45                                            RangeTblEntry *rte);
46 static void set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
47                                                 Index rti, RangeTblEntry *rte);
48 static void set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
49                                           Index rti, RangeTblEntry *rte);
50 static void set_function_pathlist(PlannerInfo *root, RelOptInfo *rel,
51                                           RangeTblEntry *rte);
52 static void set_values_pathlist(PlannerInfo *root, RelOptInfo *rel,
53                                         RangeTblEntry *rte);
54 static RelOptInfo *make_rel_from_joinlist(PlannerInfo *root, List *joinlist);
55 static RelOptInfo *make_one_rel_by_joins(PlannerInfo *root, int levels_needed,
56                                           List *initial_rels);
57 static bool subquery_is_pushdown_safe(Query *subquery, Query *topquery,
58                                                   bool *differentTypes);
59 static bool recurse_pushdown_safe(Node *setOp, Query *topquery,
60                                           bool *differentTypes);
61 static void compare_tlist_datatypes(List *tlist, List *colTypes,
62                                                 bool *differentTypes);
63 static bool qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
64                                           bool *differentTypes);
65 static void subquery_push_qual(Query *subquery,
66                                    RangeTblEntry *rte, Index rti, Node *qual);
67 static void recurse_push_qual(Node *setOp, Query *topquery,
68                                   RangeTblEntry *rte, Index rti, Node *qual);
69
70
71 /*
72  * make_one_rel
73  *        Finds all possible access paths for executing a query, returning a
74  *        single rel that represents the join of all base rels in the query.
75  */
76 RelOptInfo *
77 make_one_rel(PlannerInfo *root, List *joinlist)
78 {
79         RelOptInfo *rel;
80
81         /*
82          * Generate access paths for the base rels.
83          */
84         set_base_rel_pathlists(root);
85
86         /*
87          * Generate access paths for the entire join tree.
88          */
89         rel = make_rel_from_joinlist(root, joinlist);
90
91         /*
92          * The result should join all and only the query's base rels.
93          */
94 #ifdef USE_ASSERT_CHECKING
95         {
96                 int                     num_base_rels = 0;
97                 Index           rti;
98
99                 for (rti = 1; rti < root->simple_rel_array_size; rti++)
100                 {
101                         RelOptInfo *brel = root->simple_rel_array[rti];
102
103                         if (brel == NULL)
104                                 continue;
105
106                         Assert(brel->relid == rti); /* sanity check on array */
107
108                         /* ignore RTEs that are "other rels" */
109                         if (brel->reloptkind != RELOPT_BASEREL)
110                                 continue;
111
112                         Assert(bms_is_member(rti, rel->relids));
113                         num_base_rels++;
114                 }
115
116                 Assert(bms_num_members(rel->relids) == num_base_rels);
117         }
118 #endif
119
120         return rel;
121 }
122
123 /*
124  * set_base_rel_pathlists
125  *        Finds all paths available for scanning each base-relation entry.
126  *        Sequential scan and any available indices are considered.
127  *        Each useful path is attached to its relation's 'pathlist' field.
128  */
129 static void
130 set_base_rel_pathlists(PlannerInfo *root)
131 {
132         Index           rti;
133
134         for (rti = 1; rti < root->simple_rel_array_size; rti++)
135         {
136                 RelOptInfo *rel = root->simple_rel_array[rti];
137
138                 /* there may be empty slots corresponding to non-baserel RTEs */
139                 if (rel == NULL)
140                         continue;
141
142                 Assert(rel->relid == rti);              /* sanity check on array */
143
144                 /* ignore RTEs that are "other rels" */
145                 if (rel->reloptkind != RELOPT_BASEREL)
146                         continue;
147
148                 set_rel_pathlist(root, rel, rti, root->simple_rte_array[rti]);
149         }
150 }
151
152 /*
153  * set_rel_pathlist
154  *        Build access paths for a base relation
155  */
156 static void
157 set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
158                                  Index rti, RangeTblEntry *rte)
159 {
160         if (rte->inh)
161         {
162                 /* It's an "append relation", process accordingly */
163                 set_append_rel_pathlist(root, rel, rti, rte);
164         }
165         else if (rel->rtekind == RTE_SUBQUERY)
166         {
167                 /* Subquery --- generate a separate plan for it */
168                 set_subquery_pathlist(root, rel, rti, rte);
169         }
170         else if (rel->rtekind == RTE_FUNCTION)
171         {
172                 /* RangeFunction --- generate a separate plan for it */
173                 set_function_pathlist(root, rel, rte);
174         }
175         else if (rel->rtekind == RTE_VALUES)
176         {
177                 /* Values list --- generate a separate plan for it */
178                 set_values_pathlist(root, rel, rte);
179         }
180         else
181         {
182                 /* Plain relation */
183                 Assert(rel->rtekind == RTE_RELATION);
184                 set_plain_rel_pathlist(root, rel, rte);
185         }
186
187 #ifdef OPTIMIZER_DEBUG
188         debug_print_rel(root, rel);
189 #endif
190 }
191
192 /*
193  * set_plain_rel_pathlist
194  *        Build access paths for a plain relation (no subquery, no inheritance)
195  */
196 static void
197 set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
198 {
199         /*
200          * If we can prove we don't need to scan the rel via constraint exclusion,
201          * set up a single dummy path for it.  (Rather than inventing a special
202          * "dummy" path type, we represent this as an AppendPath with no members.)
203          * We only need to check for regular baserels; if it's an otherrel, CE
204          * was already checked in set_append_rel_pathlist().
205          */
206         if (rel->reloptkind == RELOPT_BASEREL &&
207                 relation_excluded_by_constraints(rel, rte))
208         {
209                 /* Set dummy size estimates --- we leave attr_widths[] as zeroes */
210                 rel->rows = 0;
211                 rel->width = 0;
212
213                 add_path(rel, (Path *) create_append_path(rel, NIL));
214
215                 /* Select cheapest path (pretty easy in this case...) */
216                 set_cheapest(rel);
217
218                 return;
219         }
220
221         /* Mark rel with estimated output rows, width, etc */
222         set_baserel_size_estimates(root, rel);
223
224         /* Test any partial indexes of rel for applicability */
225         check_partial_indexes(root, rel);
226
227         /*
228          * Check to see if we can extract any restriction conditions from join
229          * quals that are OR-of-AND structures.  If so, add them to the rel's
230          * restriction list, and recompute the size estimates.
231          */
232         if (create_or_index_quals(root, rel))
233                 set_baserel_size_estimates(root, rel);
234
235         /*
236          * Generate paths and add them to the rel's pathlist.
237          *
238          * Note: add_path() will discard any paths that are dominated by another
239          * available path, keeping only those paths that are superior along at
240          * least one dimension of cost or sortedness.
241          */
242
243         /* Consider sequential scan */
244         add_path(rel, create_seqscan_path(root, rel));
245
246         /* Consider index scans */
247         create_index_paths(root, rel);
248
249         /* Consider TID scans */
250         create_tidscan_paths(root, rel);
251
252         /* Now find the cheapest of the paths for this rel */
253         set_cheapest(rel);
254 }
255
256 /*
257  * set_append_rel_pathlist
258  *        Build access paths for an "append relation"
259  *
260  * The passed-in rel and RTE represent the entire append relation.      The
261  * relation's contents are computed by appending together the output of
262  * the individual member relations.  Note that in the inheritance case,
263  * the first member relation is actually the same table as is mentioned in
264  * the parent RTE ... but it has a different RTE and RelOptInfo.  This is
265  * a good thing because their outputs are not the same size.
266  */
267 static void
268 set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
269                                                 Index rti, RangeTblEntry *rte)
270 {
271         int                     parentRTindex = rti;
272         List       *subpaths = NIL;
273         ListCell   *l;
274
275         /*
276          * XXX for now, can't handle inherited expansion of FOR UPDATE/SHARE; can
277          * we do better?  (This will take some redesign because the executor
278          * currently supposes that every rowMark relation is involved in every row
279          * returned by the query.)
280          */
281         if (get_rowmark(root->parse, parentRTindex))
282                 ereport(ERROR,
283                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
284                                  errmsg("SELECT FOR UPDATE/SHARE is not supported for inheritance queries")));
285
286         /*
287          * Initialize to compute size estimates for whole append relation
288          */
289         rel->rows = 0;
290         rel->width = 0;
291
292         /*
293          * Generate access paths for each member relation, and pick the cheapest
294          * path for each one.
295          */
296         foreach(l, root->append_rel_list)
297         {
298                 AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l);
299                 int                     childRTindex;
300                 RangeTblEntry *childRTE;
301                 RelOptInfo *childrel;
302                 Path       *childpath;
303                 ListCell   *parentvars;
304                 ListCell   *childvars;
305
306                 /* append_rel_list contains all append rels; ignore others */
307                 if (appinfo->parent_relid != parentRTindex)
308                         continue;
309
310                 childRTindex = appinfo->child_relid;
311                 childRTE = root->simple_rte_array[childRTindex];
312
313                 /*
314                  * The child rel's RelOptInfo was already created during
315                  * add_base_rels_to_query.
316                  */
317                 childrel = find_base_rel(root, childRTindex);
318                 Assert(childrel->reloptkind == RELOPT_OTHER_MEMBER_REL);
319
320                 /*
321                  * We have to copy the parent's targetlist and quals to the child,
322                  * with appropriate substitution of variables.  However, only the
323                  * baserestrictinfo quals are needed before we can check for
324                  * constraint exclusion; so do that first and then check to see
325                  * if we can disregard this child.
326                  */
327                 childrel->baserestrictinfo = (List *)
328                         adjust_appendrel_attrs((Node *) rel->baserestrictinfo,
329                                                                    appinfo);
330
331                 if (relation_excluded_by_constraints(childrel, childRTE))
332                 {
333                         /* this child need not be scanned, so just disregard it */
334                         continue;
335                 }
336
337                 /* CE failed, so finish copying targetlist and join quals */
338                 childrel->joininfo = (List *)
339                         adjust_appendrel_attrs((Node *) rel->joininfo,
340                                                                    appinfo);
341                 childrel->reltargetlist = (List *)
342                         adjust_appendrel_attrs((Node *) rel->reltargetlist,
343                                                                    appinfo);
344
345                 /*
346                  * We have to make child entries in the EquivalenceClass data
347                  * structures as well.
348                  */
349                 if (rel->has_eclass_joins)
350                 {
351                         add_child_rel_equivalences(root, appinfo, rel, childrel);
352                         childrel->has_eclass_joins = true;
353                 }
354
355                 /*
356                  * Copy the parent's attr_needed data as well, with appropriate
357                  * adjustment of relids and attribute numbers.
358                  */
359                 pfree(childrel->attr_needed);
360                 childrel->attr_needed =
361                         adjust_appendrel_attr_needed(rel, appinfo,
362                                                                                  childrel->min_attr,
363                                                                                  childrel->max_attr);
364
365                 /*
366                  * Compute the child's access paths, and add the cheapest one to the
367                  * Append path we are constructing for the parent.
368                  *
369                  * It's possible that the child is itself an appendrel, in which case
370                  * we can "cut out the middleman" and just add its child paths to our
371                  * own list.  (We don't try to do this earlier because we need to
372                  * apply both levels of transformation to the quals.)
373                  */
374                 set_rel_pathlist(root, childrel, childRTindex, childRTE);
375
376                 childpath = childrel->cheapest_total_path;
377                 if (IsA(childpath, AppendPath))
378                         subpaths = list_concat(subpaths,
379                                                                    ((AppendPath *) childpath)->subpaths);
380                 else
381                         subpaths = lappend(subpaths, childpath);
382
383                 /*
384                  * Propagate size information from the child back to the parent. For
385                  * simplicity, we use the largest widths from any child as the parent
386                  * estimates.  (If you want to change this, beware of child
387                  * attr_widths[] entries that haven't been set and are still 0.)
388                  */
389                 rel->rows += childrel->rows;
390                 if (childrel->width > rel->width)
391                         rel->width = childrel->width;
392
393                 forboth(parentvars, rel->reltargetlist,
394                                 childvars, childrel->reltargetlist)
395                 {
396                         Var                *parentvar = (Var *) lfirst(parentvars);
397                         Var                *childvar = (Var *) lfirst(childvars);
398
399                         if (IsA(parentvar, Var) &&
400                                 IsA(childvar, Var))
401                         {
402                                 int                     pndx = parentvar->varattno - rel->min_attr;
403                                 int                     cndx = childvar->varattno - childrel->min_attr;
404
405                                 if (childrel->attr_widths[cndx] > rel->attr_widths[pndx])
406                                         rel->attr_widths[pndx] = childrel->attr_widths[cndx];
407                         }
408                 }
409         }
410
411         /*
412          * Set "raw tuples" count equal to "rows" for the appendrel; needed
413          * because some places assume rel->tuples is valid for any baserel.
414          */
415         rel->tuples = rel->rows;
416
417         /*
418          * Finally, build Append path and install it as the only access path for
419          * the parent rel.      (Note: this is correct even if we have zero or one
420          * live subpath due to constraint exclusion.)
421          */
422         add_path(rel, (Path *) create_append_path(rel, subpaths));
423
424         /* Select cheapest path (pretty easy in this case...) */
425         set_cheapest(rel);
426 }
427
428 /* quick-and-dirty test to see if any joining is needed */
429 static bool
430 has_multiple_baserels(PlannerInfo *root)
431 {
432         int                     num_base_rels = 0;
433         Index           rti;
434
435         for (rti = 1; rti < root->simple_rel_array_size; rti++)
436         {
437                 RelOptInfo *brel = root->simple_rel_array[rti];
438
439                 if (brel == NULL)
440                         continue;
441
442                 /* ignore RTEs that are "other rels" */
443                 if (brel->reloptkind == RELOPT_BASEREL)
444                         if (++num_base_rels > 1)
445                                 return true;
446         }
447         return false;
448 }
449
450 /*
451  * set_subquery_pathlist
452  *              Build the (single) access path for a subquery RTE
453  */
454 static void
455 set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
456                                           Index rti, RangeTblEntry *rte)
457 {
458         Query      *parse = root->parse;
459         Query      *subquery = rte->subquery;
460         bool       *differentTypes;
461         double          tuple_fraction;
462         PlannerInfo *subroot;
463         List       *pathkeys;
464
465         /* We need a workspace for keeping track of set-op type coercions */
466         differentTypes = (bool *)
467                 palloc0((list_length(subquery->targetList) + 1) * sizeof(bool));
468
469         /*
470          * If there are any restriction clauses that have been attached to the
471          * subquery relation, consider pushing them down to become WHERE or HAVING
472          * quals of the subquery itself.  This transformation is useful because it
473          * may allow us to generate a better plan for the subquery than evaluating
474          * all the subquery output rows and then filtering them.
475          *
476          * There are several cases where we cannot push down clauses. Restrictions
477          * involving the subquery are checked by subquery_is_pushdown_safe().
478          * Restrictions on individual clauses are checked by
479          * qual_is_pushdown_safe().  Also, we don't want to push down
480          * pseudoconstant clauses; better to have the gating node above the
481          * subquery.
482          *
483          * Non-pushed-down clauses will get evaluated as qpquals of the
484          * SubqueryScan node.
485          *
486          * XXX Are there any cases where we want to make a policy decision not to
487          * push down a pushable qual, because it'd result in a worse plan?
488          */
489         if (rel->baserestrictinfo != NIL &&
490                 subquery_is_pushdown_safe(subquery, subquery, differentTypes))
491         {
492                 /* OK to consider pushing down individual quals */
493                 List       *upperrestrictlist = NIL;
494                 ListCell   *l;
495
496                 foreach(l, rel->baserestrictinfo)
497                 {
498                         RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
499                         Node       *clause = (Node *) rinfo->clause;
500
501                         if (!rinfo->pseudoconstant &&
502                                 qual_is_pushdown_safe(subquery, rti, clause, differentTypes))
503                         {
504                                 /* Push it down */
505                                 subquery_push_qual(subquery, rte, rti, clause);
506                         }
507                         else
508                         {
509                                 /* Keep it in the upper query */
510                                 upperrestrictlist = lappend(upperrestrictlist, rinfo);
511                         }
512                 }
513                 rel->baserestrictinfo = upperrestrictlist;
514         }
515
516         pfree(differentTypes);
517
518         /*
519          * We can safely pass the outer tuple_fraction down to the subquery if the
520          * outer level has no joining, aggregation, or sorting to do. Otherwise
521          * we'd better tell the subquery to plan for full retrieval. (XXX This
522          * could probably be made more intelligent ...)
523          */
524         if (parse->hasAggs ||
525                 parse->groupClause ||
526                 parse->havingQual ||
527                 parse->distinctClause ||
528                 parse->sortClause ||
529                 has_multiple_baserels(root))
530                 tuple_fraction = 0.0;   /* default case */
531         else
532                 tuple_fraction = root->tuple_fraction;
533
534         /* Generate the plan for the subquery */
535         rel->subplan = subquery_planner(root->glob, subquery,
536                                                                         root->query_level + 1,
537                                                                         tuple_fraction,
538                                                                         &subroot);
539         rel->subrtable = subroot->parse->rtable;
540
541         /* Copy number of output rows from subplan */
542         rel->tuples = rel->subplan->plan_rows;
543
544         /* Mark rel with estimated output rows, width, etc */
545         set_baserel_size_estimates(root, rel);
546
547         /* Convert subquery pathkeys to outer representation */
548         pathkeys = convert_subquery_pathkeys(root, rel, subroot->query_pathkeys);
549
550         /* Generate appropriate path */
551         add_path(rel, create_subqueryscan_path(rel, pathkeys));
552
553         /* Select cheapest path (pretty easy in this case...) */
554         set_cheapest(rel);
555 }
556
557 /*
558  * set_function_pathlist
559  *              Build the (single) access path for a function RTE
560  */
561 static void
562 set_function_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
563 {
564         /* Mark rel with estimated output rows, width, etc */
565         set_function_size_estimates(root, rel);
566
567         /* Generate appropriate path */
568         add_path(rel, create_functionscan_path(root, rel));
569
570         /* Select cheapest path (pretty easy in this case...) */
571         set_cheapest(rel);
572 }
573
574 /*
575  * set_values_pathlist
576  *              Build the (single) access path for a VALUES RTE
577  */
578 static void
579 set_values_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
580 {
581         /* Mark rel with estimated output rows, width, etc */
582         set_values_size_estimates(root, rel);
583
584         /* Generate appropriate path */
585         add_path(rel, create_valuesscan_path(root, rel));
586
587         /* Select cheapest path (pretty easy in this case...) */
588         set_cheapest(rel);
589 }
590
591 /*
592  * make_rel_from_joinlist
593  *        Build access paths using a "joinlist" to guide the join path search.
594  *
595  * See comments for deconstruct_jointree() for definition of the joinlist
596  * data structure.
597  */
598 static RelOptInfo *
599 make_rel_from_joinlist(PlannerInfo *root, List *joinlist)
600 {
601         int                     levels_needed;
602         List       *initial_rels;
603         ListCell   *jl;
604
605         /*
606          * Count the number of child joinlist nodes.  This is the depth of the
607          * dynamic-programming algorithm we must employ to consider all ways of
608          * joining the child nodes.
609          */
610         levels_needed = list_length(joinlist);
611
612         if (levels_needed <= 0)
613                 return NULL;                    /* nothing to do? */
614
615         /*
616          * Construct a list of rels corresponding to the child joinlist nodes.
617          * This may contain both base rels and rels constructed according to
618          * sub-joinlists.
619          */
620         initial_rels = NIL;
621         foreach(jl, joinlist)
622         {
623                 Node       *jlnode = (Node *) lfirst(jl);
624                 RelOptInfo *thisrel;
625
626                 if (IsA(jlnode, RangeTblRef))
627                 {
628                         int                     varno = ((RangeTblRef *) jlnode)->rtindex;
629
630                         thisrel = find_base_rel(root, varno);
631                 }
632                 else if (IsA(jlnode, List))
633                 {
634                         /* Recurse to handle subproblem */
635                         thisrel = make_rel_from_joinlist(root, (List *) jlnode);
636                 }
637                 else
638                 {
639                         elog(ERROR, "unrecognized joinlist node type: %d",
640                                  (int) nodeTag(jlnode));
641                         thisrel = NULL;         /* keep compiler quiet */
642                 }
643
644                 initial_rels = lappend(initial_rels, thisrel);
645         }
646
647         if (levels_needed == 1)
648         {
649                 /*
650                  * Single joinlist node, so we're done.
651                  */
652                 return (RelOptInfo *) linitial(initial_rels);
653         }
654         else
655         {
656                 /*
657                  * Consider the different orders in which we could join the rels,
658                  * using either GEQO or regular optimizer.
659                  */
660                 if (enable_geqo && levels_needed >= geqo_threshold)
661                         return geqo(root, levels_needed, initial_rels);
662                 else
663                         return make_one_rel_by_joins(root, levels_needed, initial_rels);
664         }
665 }
666
667 /*
668  * make_one_rel_by_joins
669  *        Find all possible joinpaths for a query by successively finding ways
670  *        to join component relations into join relations.
671  *
672  * 'levels_needed' is the number of iterations needed, ie, the number of
673  *              independent jointree items in the query.  This is > 1.
674  *
675  * 'initial_rels' is a list of RelOptInfo nodes for each independent
676  *              jointree item.  These are the components to be joined together.
677  *
678  * Returns the final level of join relations, i.e., the relation that is
679  * the result of joining all the original relations together.
680  */
681 static RelOptInfo *
682 make_one_rel_by_joins(PlannerInfo *root, int levels_needed, List *initial_rels)
683 {
684         List      **joinitems;
685         int                     lev;
686         RelOptInfo *rel;
687
688         /*
689          * We employ a simple "dynamic programming" algorithm: we first find all
690          * ways to build joins of two jointree items, then all ways to build joins
691          * of three items (from two-item joins and single items), then four-item
692          * joins, and so on until we have considered all ways to join all the
693          * items into one rel.
694          *
695          * joinitems[j] is a list of all the j-item rels.  Initially we set
696          * joinitems[1] to represent all the single-jointree-item relations.
697          */
698         joinitems = (List **) palloc0((levels_needed + 1) * sizeof(List *));
699
700         joinitems[1] = initial_rels;
701
702         for (lev = 2; lev <= levels_needed; lev++)
703         {
704                 ListCell   *x;
705
706                 /*
707                  * Determine all possible pairs of relations to be joined at this
708                  * level, and build paths for making each one from every available
709                  * pair of lower-level relations.
710                  */
711                 joinitems[lev] = make_rels_by_joins(root, lev, joinitems);
712
713                 /*
714                  * Do cleanup work on each just-processed rel.
715                  */
716                 foreach(x, joinitems[lev])
717                 {
718                         rel = (RelOptInfo *) lfirst(x);
719
720                         /* Find and save the cheapest paths for this rel */
721                         set_cheapest(rel);
722
723 #ifdef OPTIMIZER_DEBUG
724                         debug_print_rel(root, rel);
725 #endif
726                 }
727         }
728
729         /*
730          * We should have a single rel at the final level.
731          */
732         if (joinitems[levels_needed] == NIL)
733                 elog(ERROR, "failed to build any %d-way joins", levels_needed);
734         Assert(list_length(joinitems[levels_needed]) == 1);
735
736         rel = (RelOptInfo *) linitial(joinitems[levels_needed]);
737
738         return rel;
739 }
740
741 /*****************************************************************************
742  *                      PUSHING QUALS DOWN INTO SUBQUERIES
743  *****************************************************************************/
744
745 /*
746  * subquery_is_pushdown_safe - is a subquery safe for pushing down quals?
747  *
748  * subquery is the particular component query being checked.  topquery
749  * is the top component of a set-operations tree (the same Query if no
750  * set-op is involved).
751  *
752  * Conditions checked here:
753  *
754  * 1. If the subquery has a LIMIT clause, we must not push down any quals,
755  * since that could change the set of rows returned.
756  *
757  * 2. If the subquery contains EXCEPT or EXCEPT ALL set ops we cannot push
758  * quals into it, because that would change the results.
759  *
760  * 3. For subqueries using UNION/UNION ALL/INTERSECT/INTERSECT ALL, we can
761  * push quals into each component query, but the quals can only reference
762  * subquery columns that suffer no type coercions in the set operation.
763  * Otherwise there are possible semantic gotchas.  So, we check the
764  * component queries to see if any of them have different output types;
765  * differentTypes[k] is set true if column k has different type in any
766  * component.
767  */
768 static bool
769 subquery_is_pushdown_safe(Query *subquery, Query *topquery,
770                                                   bool *differentTypes)
771 {
772         SetOperationStmt *topop;
773
774         /* Check point 1 */
775         if (subquery->limitOffset != NULL || subquery->limitCount != NULL)
776                 return false;
777
778         /* Are we at top level, or looking at a setop component? */
779         if (subquery == topquery)
780         {
781                 /* Top level, so check any component queries */
782                 if (subquery->setOperations != NULL)
783                         if (!recurse_pushdown_safe(subquery->setOperations, topquery,
784                                                                            differentTypes))
785                                 return false;
786         }
787         else
788         {
789                 /* Setop component must not have more components (too weird) */
790                 if (subquery->setOperations != NULL)
791                         return false;
792                 /* Check whether setop component output types match top level */
793                 topop = (SetOperationStmt *) topquery->setOperations;
794                 Assert(topop && IsA(topop, SetOperationStmt));
795                 compare_tlist_datatypes(subquery->targetList,
796                                                                 topop->colTypes,
797                                                                 differentTypes);
798         }
799         return true;
800 }
801
802 /*
803  * Helper routine to recurse through setOperations tree
804  */
805 static bool
806 recurse_pushdown_safe(Node *setOp, Query *topquery,
807                                           bool *differentTypes)
808 {
809         if (IsA(setOp, RangeTblRef))
810         {
811                 RangeTblRef *rtr = (RangeTblRef *) setOp;
812                 RangeTblEntry *rte = rt_fetch(rtr->rtindex, topquery->rtable);
813                 Query      *subquery = rte->subquery;
814
815                 Assert(subquery != NULL);
816                 return subquery_is_pushdown_safe(subquery, topquery, differentTypes);
817         }
818         else if (IsA(setOp, SetOperationStmt))
819         {
820                 SetOperationStmt *op = (SetOperationStmt *) setOp;
821
822                 /* EXCEPT is no good */
823                 if (op->op == SETOP_EXCEPT)
824                         return false;
825                 /* Else recurse */
826                 if (!recurse_pushdown_safe(op->larg, topquery, differentTypes))
827                         return false;
828                 if (!recurse_pushdown_safe(op->rarg, topquery, differentTypes))
829                         return false;
830         }
831         else
832         {
833                 elog(ERROR, "unrecognized node type: %d",
834                          (int) nodeTag(setOp));
835         }
836         return true;
837 }
838
839 /*
840  * Compare tlist's datatypes against the list of set-operation result types.
841  * For any items that are different, mark the appropriate element of
842  * differentTypes[] to show that this column will have type conversions.
843  *
844  * We don't have to care about typmods here: the only allowed difference
845  * between set-op input and output typmods is input is a specific typmod
846  * and output is -1, and that does not require a coercion.
847  */
848 static void
849 compare_tlist_datatypes(List *tlist, List *colTypes,
850                                                 bool *differentTypes)
851 {
852         ListCell   *l;
853         ListCell   *colType = list_head(colTypes);
854
855         foreach(l, tlist)
856         {
857                 TargetEntry *tle = (TargetEntry *) lfirst(l);
858
859                 if (tle->resjunk)
860                         continue;                       /* ignore resjunk columns */
861                 if (colType == NULL)
862                         elog(ERROR, "wrong number of tlist entries");
863                 if (exprType((Node *) tle->expr) != lfirst_oid(colType))
864                         differentTypes[tle->resno] = true;
865                 colType = lnext(colType);
866         }
867         if (colType != NULL)
868                 elog(ERROR, "wrong number of tlist entries");
869 }
870
871 /*
872  * qual_is_pushdown_safe - is a particular qual safe to push down?
873  *
874  * qual is a restriction clause applying to the given subquery (whose RTE
875  * has index rti in the parent query).
876  *
877  * Conditions checked here:
878  *
879  * 1. The qual must not contain any subselects (mainly because I'm not sure
880  * it will work correctly: sublinks will already have been transformed into
881  * subplans in the qual, but not in the subquery).
882  *
883  * 2. The qual must not refer to the whole-row output of the subquery
884  * (since there is no easy way to name that within the subquery itself).
885  *
886  * 3. The qual must not refer to any subquery output columns that were
887  * found to have inconsistent types across a set operation tree by
888  * subquery_is_pushdown_safe().
889  *
890  * 4. If the subquery uses DISTINCT ON, we must not push down any quals that
891  * refer to non-DISTINCT output columns, because that could change the set
892  * of rows returned.  This condition is vacuous for DISTINCT, because then
893  * there are no non-DISTINCT output columns, but unfortunately it's fairly
894  * expensive to tell the difference between DISTINCT and DISTINCT ON in the
895  * parsetree representation.  It's cheaper to just make sure all the Vars
896  * in the qual refer to DISTINCT columns.
897  *
898  * 5. We must not push down any quals that refer to subselect outputs that
899  * return sets, else we'd introduce functions-returning-sets into the
900  * subquery's WHERE/HAVING quals.
901  *
902  * 6. We must not push down any quals that refer to subselect outputs that
903  * contain volatile functions, for fear of introducing strange results due
904  * to multiple evaluation of a volatile function.
905  */
906 static bool
907 qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
908                                           bool *differentTypes)
909 {
910         bool            safe = true;
911         List       *vars;
912         ListCell   *vl;
913         Bitmapset  *tested = NULL;
914
915         /* Refuse subselects (point 1) */
916         if (contain_subplans(qual))
917                 return false;
918
919         /*
920          * Examine all Vars used in clause; since it's a restriction clause, all
921          * such Vars must refer to subselect output columns.
922          */
923         vars = pull_var_clause(qual, false);
924         foreach(vl, vars)
925         {
926                 Var                *var = (Var *) lfirst(vl);
927                 TargetEntry *tle;
928
929                 Assert(var->varno == rti);
930
931                 /* Check point 2 */
932                 if (var->varattno == 0)
933                 {
934                         safe = false;
935                         break;
936                 }
937
938                 /*
939                  * We use a bitmapset to avoid testing the same attno more than once.
940                  * (NB: this only works because subquery outputs can't have negative
941                  * attnos.)
942                  */
943                 if (bms_is_member(var->varattno, tested))
944                         continue;
945                 tested = bms_add_member(tested, var->varattno);
946
947                 /* Check point 3 */
948                 if (differentTypes[var->varattno])
949                 {
950                         safe = false;
951                         break;
952                 }
953
954                 /* Must find the tlist element referenced by the Var */
955                 tle = get_tle_by_resno(subquery->targetList, var->varattno);
956                 Assert(tle != NULL);
957                 Assert(!tle->resjunk);
958
959                 /* If subquery uses DISTINCT or DISTINCT ON, check point 4 */
960                 if (subquery->distinctClause != NIL &&
961                         !targetIsInSortList(tle, InvalidOid, subquery->distinctClause))
962                 {
963                         /* non-DISTINCT column, so fail */
964                         safe = false;
965                         break;
966                 }
967
968                 /* Refuse functions returning sets (point 5) */
969                 if (expression_returns_set((Node *) tle->expr))
970                 {
971                         safe = false;
972                         break;
973                 }
974
975                 /* Refuse volatile functions (point 6) */
976                 if (contain_volatile_functions((Node *) tle->expr))
977                 {
978                         safe = false;
979                         break;
980                 }
981         }
982
983         list_free(vars);
984         bms_free(tested);
985
986         return safe;
987 }
988
989 /*
990  * subquery_push_qual - push down a qual that we have determined is safe
991  */
992 static void
993 subquery_push_qual(Query *subquery, RangeTblEntry *rte, Index rti, Node *qual)
994 {
995         if (subquery->setOperations != NULL)
996         {
997                 /* Recurse to push it separately to each component query */
998                 recurse_push_qual(subquery->setOperations, subquery,
999                                                   rte, rti, qual);
1000         }
1001         else
1002         {
1003                 /*
1004                  * We need to replace Vars in the qual (which must refer to outputs of
1005                  * the subquery) with copies of the subquery's targetlist expressions.
1006                  * Note that at this point, any uplevel Vars in the qual should have
1007                  * been replaced with Params, so they need no work.
1008                  *
1009                  * This step also ensures that when we are pushing into a setop tree,
1010                  * each component query gets its own copy of the qual.
1011                  */
1012                 qual = ResolveNew(qual, rti, 0, rte,
1013                                                   subquery->targetList,
1014                                                   CMD_SELECT, 0);
1015
1016                 /*
1017                  * Now attach the qual to the proper place: normally WHERE, but if the
1018                  * subquery uses grouping or aggregation, put it in HAVING (since the
1019                  * qual really refers to the group-result rows).
1020                  */
1021                 if (subquery->hasAggs || subquery->groupClause || subquery->havingQual)
1022                         subquery->havingQual = make_and_qual(subquery->havingQual, qual);
1023                 else
1024                         subquery->jointree->quals =
1025                                 make_and_qual(subquery->jointree->quals, qual);
1026
1027                 /*
1028                  * We need not change the subquery's hasAggs or hasSublinks flags,
1029                  * since we can't be pushing down any aggregates that weren't there
1030                  * before, and we don't push down subselects at all.
1031                  */
1032         }
1033 }
1034
1035 /*
1036  * Helper routine to recurse through setOperations tree
1037  */
1038 static void
1039 recurse_push_qual(Node *setOp, Query *topquery,
1040                                   RangeTblEntry *rte, Index rti, Node *qual)
1041 {
1042         if (IsA(setOp, RangeTblRef))
1043         {
1044                 RangeTblRef *rtr = (RangeTblRef *) setOp;
1045                 RangeTblEntry *subrte = rt_fetch(rtr->rtindex, topquery->rtable);
1046                 Query      *subquery = subrte->subquery;
1047
1048                 Assert(subquery != NULL);
1049                 subquery_push_qual(subquery, rte, rti, qual);
1050         }
1051         else if (IsA(setOp, SetOperationStmt))
1052         {
1053                 SetOperationStmt *op = (SetOperationStmt *) setOp;
1054
1055                 recurse_push_qual(op->larg, topquery, rte, rti, qual);
1056                 recurse_push_qual(op->rarg, topquery, rte, rti, qual);
1057         }
1058         else
1059         {
1060                 elog(ERROR, "unrecognized node type: %d",
1061                          (int) nodeTag(setOp));
1062         }
1063 }
1064
1065 /*****************************************************************************
1066  *                      DEBUG SUPPORT
1067  *****************************************************************************/
1068
1069 #ifdef OPTIMIZER_DEBUG
1070
1071 static void
1072 print_relids(Relids relids)
1073 {
1074         Relids          tmprelids;
1075         int                     x;
1076         bool            first = true;
1077
1078         tmprelids = bms_copy(relids);
1079         while ((x = bms_first_member(tmprelids)) >= 0)
1080         {
1081                 if (!first)
1082                         printf(" ");
1083                 printf("%d", x);
1084                 first = false;
1085         }
1086         bms_free(tmprelids);
1087 }
1088
1089 static void
1090 print_restrictclauses(PlannerInfo *root, List *clauses)
1091 {
1092         ListCell   *l;
1093
1094         foreach(l, clauses)
1095         {
1096                 RestrictInfo *c = lfirst(l);
1097
1098                 print_expr((Node *) c->clause, root->parse->rtable);
1099                 if (lnext(l))
1100                         printf(", ");
1101         }
1102 }
1103
1104 static void
1105 print_path(PlannerInfo *root, Path *path, int indent)
1106 {
1107         const char *ptype;
1108         bool            join = false;
1109         Path       *subpath = NULL;
1110         int                     i;
1111
1112         switch (nodeTag(path))
1113         {
1114                 case T_Path:
1115                         ptype = "SeqScan";
1116                         break;
1117                 case T_IndexPath:
1118                         ptype = "IdxScan";
1119                         break;
1120                 case T_BitmapHeapPath:
1121                         ptype = "BitmapHeapScan";
1122                         break;
1123                 case T_BitmapAndPath:
1124                         ptype = "BitmapAndPath";
1125                         break;
1126                 case T_BitmapOrPath:
1127                         ptype = "BitmapOrPath";
1128                         break;
1129                 case T_TidPath:
1130                         ptype = "TidScan";
1131                         break;
1132                 case T_AppendPath:
1133                         ptype = "Append";
1134                         break;
1135                 case T_ResultPath:
1136                         ptype = "Result";
1137                         break;
1138                 case T_MaterialPath:
1139                         ptype = "Material";
1140                         subpath = ((MaterialPath *) path)->subpath;
1141                         break;
1142                 case T_UniquePath:
1143                         ptype = "Unique";
1144                         subpath = ((UniquePath *) path)->subpath;
1145                         break;
1146                 case T_NestPath:
1147                         ptype = "NestLoop";
1148                         join = true;
1149                         break;
1150                 case T_MergePath:
1151                         ptype = "MergeJoin";
1152                         join = true;
1153                         break;
1154                 case T_HashPath:
1155                         ptype = "HashJoin";
1156                         join = true;
1157                         break;
1158                 default:
1159                         ptype = "???Path";
1160                         break;
1161         }
1162
1163         for (i = 0; i < indent; i++)
1164                 printf("\t");
1165         printf("%s", ptype);
1166
1167         if (path->parent)
1168         {
1169                 printf("(");
1170                 print_relids(path->parent->relids);
1171                 printf(") rows=%.0f", path->parent->rows);
1172         }
1173         printf(" cost=%.2f..%.2f\n", path->startup_cost, path->total_cost);
1174
1175         if (path->pathkeys)
1176         {
1177                 for (i = 0; i < indent; i++)
1178                         printf("\t");
1179                 printf("  pathkeys: ");
1180                 print_pathkeys(path->pathkeys, root->parse->rtable);
1181         }
1182
1183         if (join)
1184         {
1185                 JoinPath   *jp = (JoinPath *) path;
1186
1187                 for (i = 0; i < indent; i++)
1188                         printf("\t");
1189                 printf("  clauses: ");
1190                 print_restrictclauses(root, jp->joinrestrictinfo);
1191                 printf("\n");
1192
1193                 if (IsA(path, MergePath))
1194                 {
1195                         MergePath  *mp = (MergePath *) path;
1196
1197                         if (mp->outersortkeys || mp->innersortkeys)
1198                         {
1199                                 for (i = 0; i < indent; i++)
1200                                         printf("\t");
1201                                 printf("  sortouter=%d sortinner=%d\n",
1202                                            ((mp->outersortkeys) ? 1 : 0),
1203                                            ((mp->innersortkeys) ? 1 : 0));
1204                         }
1205                 }
1206
1207                 print_path(root, jp->outerjoinpath, indent + 1);
1208                 print_path(root, jp->innerjoinpath, indent + 1);
1209         }
1210
1211         if (subpath)
1212                 print_path(root, subpath, indent + 1);
1213 }
1214
1215 void
1216 debug_print_rel(PlannerInfo *root, RelOptInfo *rel)
1217 {
1218         ListCell   *l;
1219
1220         printf("RELOPTINFO (");
1221         print_relids(rel->relids);
1222         printf("): rows=%.0f width=%d\n", rel->rows, rel->width);
1223
1224         if (rel->baserestrictinfo)
1225         {
1226                 printf("\tbaserestrictinfo: ");
1227                 print_restrictclauses(root, rel->baserestrictinfo);
1228                 printf("\n");
1229         }
1230
1231         if (rel->joininfo)
1232         {
1233                 printf("\tjoininfo: ");
1234                 print_restrictclauses(root, rel->joininfo);
1235                 printf("\n");
1236         }
1237
1238         printf("\tpath list:\n");
1239         foreach(l, rel->pathlist)
1240                 print_path(root, lfirst(l), 1);
1241         printf("\n\tcheapest startup path:\n");
1242         print_path(root, rel->cheapest_startup_path, 1);
1243         printf("\n\tcheapest total path:\n");
1244         print_path(root, rel->cheapest_total_path, 1);
1245         printf("\n");
1246         fflush(stdout);
1247 }
1248
1249 #endif   /* OPTIMIZER_DEBUG */