]> granicus.if.org Git - postgresql/blob - src/backend/optimizer/path/allpaths.c
d2100905406fe4048d83a99bcf3d16116d8f5ec3
[postgresql] / src / backend / optimizer / path / allpaths.c
1 /*-------------------------------------------------------------------------
2  *
3  * allpaths.c
4  *        Routines to find possible search paths for processing a query
5  *
6  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *        $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.142 2006/02/04 23:03:20 tgl Exp $
12  *
13  *-------------------------------------------------------------------------
14  */
15
16 #include "postgres.h"
17
18 #include "nodes/makefuncs.h"
19 #ifdef OPTIMIZER_DEBUG
20 #include "nodes/print.h"
21 #endif
22 #include "optimizer/clauses.h"
23 #include "optimizer/cost.h"
24 #include "optimizer/geqo.h"
25 #include "optimizer/pathnode.h"
26 #include "optimizer/paths.h"
27 #include "optimizer/plancat.h"
28 #include "optimizer/planner.h"
29 #include "optimizer/prep.h"
30 #include "optimizer/var.h"
31 #include "parser/parsetree.h"
32 #include "parser/parse_clause.h"
33 #include "parser/parse_expr.h"
34 #include "rewrite/rewriteManip.h"
35
36
37 /* These parameters are set by GUC */
38 bool            enable_geqo = false;    /* just in case GUC doesn't set it */
39 int                     geqo_threshold;
40
41
42 static void set_base_rel_pathlists(PlannerInfo *root);
43 static void set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, Index rti);
44 static void set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
45                                            RangeTblEntry *rte);
46 static void set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
47                                                                         Index rti, RangeTblEntry *rte);
48 static void set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
49                                           Index rti, RangeTblEntry *rte);
50 static void set_function_pathlist(PlannerInfo *root, RelOptInfo *rel,
51                                           RangeTblEntry *rte);
52 static RelOptInfo *make_rel_from_joinlist(PlannerInfo *root, List *joinlist);
53 static RelOptInfo *make_one_rel_by_joins(PlannerInfo *root, int levels_needed,
54                                           List *initial_rels);
55 static bool subquery_is_pushdown_safe(Query *subquery, Query *topquery,
56                                                   bool *differentTypes);
57 static bool recurse_pushdown_safe(Node *setOp, Query *topquery,
58                                           bool *differentTypes);
59 static void compare_tlist_datatypes(List *tlist, List *colTypes,
60                                                 bool *differentTypes);
61 static bool qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
62                                           bool *differentTypes);
63 static void subquery_push_qual(Query *subquery,
64                                    RangeTblEntry *rte, Index rti, Node *qual);
65 static void recurse_push_qual(Node *setOp, Query *topquery,
66                                   RangeTblEntry *rte, Index rti, Node *qual);
67
68
69 /*
70  * make_one_rel
71  *        Finds all possible access paths for executing a query, returning a
72  *        single rel that represents the join of all base rels in the query.
73  */
74 RelOptInfo *
75 make_one_rel(PlannerInfo *root, List *joinlist)
76 {
77         RelOptInfo *rel;
78
79         /*
80          * Generate access paths for the base rels.
81          */
82         set_base_rel_pathlists(root);
83
84         /*
85          * Generate access paths for the entire join tree.
86          */
87         rel = make_rel_from_joinlist(root, joinlist);
88
89         /*
90          * The result should join all and only the query's base rels.
91          */
92 #ifdef USE_ASSERT_CHECKING
93         {
94                 int                     num_base_rels = 0;
95                 Index           rti;
96
97                 for (rti = 1; rti < root->simple_rel_array_size; rti++)
98                 {
99                         RelOptInfo *brel = root->simple_rel_array[rti];
100
101                         if (brel == NULL)
102                                 continue;
103
104                         Assert(brel->relid == rti); /* sanity check on array */
105
106                         /* ignore RTEs that are "other rels" */
107                         if (brel->reloptkind != RELOPT_BASEREL)
108                                 continue;
109
110                         Assert(bms_is_member(rti, rel->relids));
111                         num_base_rels++;
112                 }
113
114                 Assert(bms_num_members(rel->relids) == num_base_rels);
115         }
116 #endif
117
118         return rel;
119 }
120
121 /*
122  * set_base_rel_pathlists
123  *        Finds all paths available for scanning each base-relation entry.
124  *        Sequential scan and any available indices are considered.
125  *        Each useful path is attached to its relation's 'pathlist' field.
126  */
127 static void
128 set_base_rel_pathlists(PlannerInfo *root)
129 {
130         Index           rti;
131
132         for (rti = 1; rti < root->simple_rel_array_size; rti++)
133         {
134                 RelOptInfo *rel = root->simple_rel_array[rti];
135
136                 /* there may be empty slots corresponding to non-baserel RTEs */
137                 if (rel == NULL)
138                         continue;
139
140                 Assert(rel->relid == rti);              /* sanity check on array */
141
142                 /* ignore RTEs that are "other rels" */
143                 if (rel->reloptkind != RELOPT_BASEREL)
144                         continue;
145
146                 set_rel_pathlist(root, rel, rti);
147         }
148 }
149
150 /*
151  * set_rel_pathlist
152  *        Build access paths for a base relation
153  */
154 static void
155 set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, Index rti)
156 {
157         RangeTblEntry *rte = rt_fetch(rti, root->parse->rtable);
158
159         if (rte->inh)
160         {
161                 /* It's an "append relation", process accordingly */
162                 set_append_rel_pathlist(root, rel, rti, rte);
163         }
164         else if (rel->rtekind == RTE_SUBQUERY)
165         {
166                 /* Subquery --- generate a separate plan for it */
167                 set_subquery_pathlist(root, rel, rti, rte);
168         }
169         else if (rel->rtekind == RTE_FUNCTION)
170         {
171                 /* RangeFunction --- generate a separate plan for it */
172                 set_function_pathlist(root, rel, rte);
173         }
174         else
175         {
176                 /* Plain relation */
177                 Assert(rel->rtekind == RTE_RELATION);
178                 set_plain_rel_pathlist(root, rel, rte);
179         }
180
181 #ifdef OPTIMIZER_DEBUG
182         debug_print_rel(root, rel);
183 #endif
184 }
185
186 /*
187  * set_plain_rel_pathlist
188  *        Build access paths for a plain relation (no subquery, no inheritance)
189  */
190 static void
191 set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
192 {
193         /* Mark rel with estimated output rows, width, etc */
194         set_baserel_size_estimates(root, rel);
195
196         /* Test any partial indexes of rel for applicability */
197         check_partial_indexes(root, rel);
198
199         /*
200          * Check to see if we can extract any restriction conditions from join
201          * quals that are OR-of-AND structures.  If so, add them to the rel's
202          * restriction list, and recompute the size estimates.
203          */
204         if (create_or_index_quals(root, rel))
205                 set_baserel_size_estimates(root, rel);
206
207         /*
208          * If we can prove we don't need to scan the rel via constraint exclusion,
209          * set up a single dummy path for it.  (Rather than inventing a special
210          * "dummy" path type, we represent this as an AppendPath with no members.)
211          */
212         if (relation_excluded_by_constraints(rel, rte))
213         {
214                 /* Reset output-rows estimate to 0 */
215                 rel->rows = 0;
216
217                 add_path(rel, (Path *) create_append_path(rel, NIL));
218
219                 /* Select cheapest path (pretty easy in this case...) */
220                 set_cheapest(rel);
221
222                 return;
223         }
224
225         /*
226          * Generate paths and add them to the rel's pathlist.
227          *
228          * Note: add_path() will discard any paths that are dominated by another
229          * available path, keeping only those paths that are superior along at
230          * least one dimension of cost or sortedness.
231          */
232
233         /* Consider sequential scan */
234         add_path(rel, create_seqscan_path(root, rel));
235
236         /* Consider index scans */
237         create_index_paths(root, rel);
238
239         /* Consider TID scans */
240         create_tidscan_paths(root, rel);
241
242         /* Now find the cheapest of the paths for this rel */
243         set_cheapest(rel);
244 }
245
246 /*
247  * set_append_rel_pathlist
248  *        Build access paths for an "append relation"
249  *
250  * The passed-in rel and RTE represent the entire append relation.  The
251  * relation's contents are computed by appending together the output of
252  * the individual member relations.  Note that in the inheritance case,
253  * the first member relation is actually the same table as is mentioned in
254  * the parent RTE ... but it has a different RTE and RelOptInfo.  This is
255  * a good thing because their outputs are not the same size.
256  */
257 static void
258 set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
259                                                 Index rti, RangeTblEntry *rte)
260 {
261         int                     parentRTindex = rti;
262         List       *subpaths = NIL;
263         ListCell   *l;
264
265         /*
266          * XXX for now, can't handle inherited expansion of FOR UPDATE/SHARE; can
267          * we do better?  (This will take some redesign because the executor
268          * currently supposes that every rowMark relation is involved in every
269          * row returned by the query.)
270          */
271         if (list_member_int(root->parse->rowMarks, parentRTindex))
272                 ereport(ERROR,
273                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
274                                  errmsg("SELECT FOR UPDATE/SHARE is not supported for inheritance queries")));
275
276         /*
277          * Initialize to compute size estimates for whole append relation
278          */
279         rel->rows = 0;
280         rel->width = 0;
281
282         /*
283          * Generate access paths for each member relation, and pick the cheapest
284          * path for each one.
285          */
286         foreach(l, root->append_rel_list)
287         {
288                 AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l);
289                 int                     childRTindex;
290                 RelOptInfo *childrel;
291                 Path       *childpath;
292                 ListCell   *parentvars;
293                 ListCell   *childvars;
294
295                 /* append_rel_list contains all append rels; ignore others */
296                 if (appinfo->parent_relid != parentRTindex)
297                         continue;
298
299                 childRTindex = appinfo->child_relid;
300
301                 /*
302                  * Make a RelOptInfo for the child so we can do planning. Mark it as
303                  * an "other rel" since it will not be part of the main join tree.
304                  */
305                 childrel = build_simple_rel(root, childRTindex,
306                                                                         RELOPT_OTHER_MEMBER_REL);
307
308                 /*
309                  * Copy the parent's targetlist and quals to the child, with
310                  * appropriate substitution of variables.
311                  */
312                 childrel->reltargetlist = (List *)
313                         adjust_appendrel_attrs((Node *) rel->reltargetlist,
314                                                                    appinfo);
315                 childrel->baserestrictinfo = (List *)
316                         adjust_appendrel_attrs((Node *) rel->baserestrictinfo,
317                                                                    appinfo);
318                 childrel->joininfo = (List *)
319                         adjust_appendrel_attrs((Node *) rel->joininfo,
320                                                                    appinfo);
321
322                 /*
323                  * Copy the parent's attr_needed data as well, with appropriate
324                  * adjustment of relids and attribute numbers.
325                  */
326                 pfree(childrel->attr_needed);
327                 childrel->attr_needed =
328                         adjust_appendrel_attr_needed(rel, appinfo,
329                                                                                  childrel->min_attr,
330                                                                                  childrel->max_attr);
331
332                 /*
333                  * Compute the child's access paths, and add the cheapest one
334                  * to the Append path we are constructing for the parent.
335                  *
336                  * It's possible that the child is itself an appendrel, in which
337                  * case we can "cut out the middleman" and just add its child
338                  * paths to our own list.  (We don't try to do this earlier because
339                  * we need to apply both levels of transformation to the quals.)
340                  * This test also handles the case where the child rel need not
341                  * be scanned because of constraint exclusion: it'll have an
342                  * Append path with no subpaths, and will vanish from our list.
343                  */
344                 set_rel_pathlist(root, childrel, childRTindex);
345
346                 childpath = childrel->cheapest_total_path;
347                 if (IsA(childpath, AppendPath))
348                         subpaths = list_concat(subpaths,
349                                                                    ((AppendPath *) childpath)->subpaths);
350                 else
351                         subpaths = lappend(subpaths, childpath);
352
353                 /*
354                  * Propagate size information from the child back to the parent. For
355                  * simplicity, we use the largest widths from any child as the parent
356                  * estimates.
357                  */
358                 rel->rows += childrel->rows;
359                 if (childrel->width > rel->width)
360                         rel->width = childrel->width;
361
362                 forboth(parentvars, rel->reltargetlist,
363                                 childvars, childrel->reltargetlist)
364                 {
365                         Var                *parentvar = (Var *) lfirst(parentvars);
366                         Var                *childvar = (Var *) lfirst(childvars);
367
368                         if (IsA(parentvar, Var) &&
369                                 IsA(childvar, Var))
370                         {
371                                 int                     pndx = parentvar->varattno - rel->min_attr;
372                                 int                     cndx = childvar->varattno - childrel->min_attr;
373
374                                 if (childrel->attr_widths[cndx] > rel->attr_widths[pndx])
375                                         rel->attr_widths[pndx] = childrel->attr_widths[cndx];
376                         }
377                 }
378         }
379
380         /*
381          * Finally, build Append path and install it as the only access path for
382          * the parent rel.      (Note: this is correct even if we have zero or one
383          * live subpath due to constraint exclusion.)
384          */
385         add_path(rel, (Path *) create_append_path(rel, subpaths));
386
387         /* Select cheapest path (pretty easy in this case...) */
388         set_cheapest(rel);
389 }
390
391 /* quick-and-dirty test to see if any joining is needed */
392 static bool
393 has_multiple_baserels(PlannerInfo *root)
394 {
395         int                     num_base_rels = 0;
396         Index           rti;
397
398         for (rti = 1; rti < root->simple_rel_array_size; rti++)
399         {
400                 RelOptInfo *brel = root->simple_rel_array[rti];
401
402                 if (brel == NULL)
403                         continue;
404
405                 /* ignore RTEs that are "other rels" */
406                 if (brel->reloptkind == RELOPT_BASEREL)
407                         if (++num_base_rels > 1)
408                                 return true;
409         }
410         return false;
411 }
412
413 /*
414  * set_subquery_pathlist
415  *              Build the (single) access path for a subquery RTE
416  */
417 static void
418 set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
419                                           Index rti, RangeTblEntry *rte)
420 {
421         Query      *parse = root->parse;
422         Query      *subquery = rte->subquery;
423         bool       *differentTypes;
424         double          tuple_fraction;
425         List       *pathkeys;
426         List       *subquery_pathkeys;
427
428         /* We need a workspace for keeping track of set-op type coercions */
429         differentTypes = (bool *)
430                 palloc0((list_length(subquery->targetList) + 1) * sizeof(bool));
431
432         /*
433          * If there are any restriction clauses that have been attached to the
434          * subquery relation, consider pushing them down to become WHERE or HAVING
435          * quals of the subquery itself.  This transformation is useful because it
436          * may allow us to generate a better plan for the subquery than evaluating
437          * all the subquery output rows and then filtering them.
438          *
439          * There are several cases where we cannot push down clauses. Restrictions
440          * involving the subquery are checked by subquery_is_pushdown_safe().
441          * Restrictions on individual clauses are checked by
442          * qual_is_pushdown_safe().
443          *
444          * Non-pushed-down clauses will get evaluated as qpquals of the
445          * SubqueryScan node.
446          *
447          * XXX Are there any cases where we want to make a policy decision not to
448          * push down a pushable qual, because it'd result in a worse plan?
449          */
450         if (rel->baserestrictinfo != NIL &&
451                 subquery_is_pushdown_safe(subquery, subquery, differentTypes))
452         {
453                 /* OK to consider pushing down individual quals */
454                 List       *upperrestrictlist = NIL;
455                 ListCell   *l;
456
457                 foreach(l, rel->baserestrictinfo)
458                 {
459                         RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
460                         Node       *clause = (Node *) rinfo->clause;
461
462                         if (qual_is_pushdown_safe(subquery, rti, clause, differentTypes))
463                         {
464                                 /* Push it down */
465                                 subquery_push_qual(subquery, rte, rti, clause);
466                         }
467                         else
468                         {
469                                 /* Keep it in the upper query */
470                                 upperrestrictlist = lappend(upperrestrictlist, rinfo);
471                         }
472                 }
473                 rel->baserestrictinfo = upperrestrictlist;
474         }
475
476         pfree(differentTypes);
477
478         /*
479          * We can safely pass the outer tuple_fraction down to the subquery if the
480          * outer level has no joining, aggregation, or sorting to do. Otherwise
481          * we'd better tell the subquery to plan for full retrieval. (XXX This
482          * could probably be made more intelligent ...)
483          */
484         if (parse->hasAggs ||
485                 parse->groupClause ||
486                 parse->havingQual ||
487                 parse->distinctClause ||
488                 parse->sortClause ||
489                 has_multiple_baserels(root))
490                 tuple_fraction = 0.0;   /* default case */
491         else
492                 tuple_fraction = root->tuple_fraction;
493
494         /* Generate the plan for the subquery */
495         rel->subplan = subquery_planner(subquery, tuple_fraction,
496                                                                         &subquery_pathkeys);
497
498         /* Copy number of output rows from subplan */
499         rel->tuples = rel->subplan->plan_rows;
500
501         /* Mark rel with estimated output rows, width, etc */
502         set_baserel_size_estimates(root, rel);
503
504         /* Convert subquery pathkeys to outer representation */
505         pathkeys = convert_subquery_pathkeys(root, rel, subquery_pathkeys);
506
507         /* Generate appropriate path */
508         add_path(rel, create_subqueryscan_path(rel, pathkeys));
509
510         /* Select cheapest path (pretty easy in this case...) */
511         set_cheapest(rel);
512 }
513
514 /*
515  * set_function_pathlist
516  *              Build the (single) access path for a function RTE
517  */
518 static void
519 set_function_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
520 {
521         /* Mark rel with estimated output rows, width, etc */
522         set_function_size_estimates(root, rel);
523
524         /* Generate appropriate path */
525         add_path(rel, create_functionscan_path(root, rel));
526
527         /* Select cheapest path (pretty easy in this case...) */
528         set_cheapest(rel);
529 }
530
531 /*
532  * make_rel_from_joinlist
533  *        Build access paths using a "joinlist" to guide the join path search.
534  *
535  * See comments for deconstruct_jointree() for definition of the joinlist
536  * data structure.
537  */
538 static RelOptInfo *
539 make_rel_from_joinlist(PlannerInfo *root, List *joinlist)
540 {
541         int                     levels_needed;
542         List       *initial_rels;
543         ListCell   *jl;
544
545         /*
546          * Count the number of child joinlist nodes.  This is the depth of the
547          * dynamic-programming algorithm we must employ to consider all ways of
548          * joining the child nodes.
549          */
550         levels_needed = list_length(joinlist);
551
552         if (levels_needed <= 0)
553                 return NULL;                    /* nothing to do? */
554
555         /*
556          * Construct a list of rels corresponding to the child joinlist nodes.
557          * This may contain both base rels and rels constructed according to
558          * sub-joinlists.
559          */
560         initial_rels = NIL;
561         foreach(jl, joinlist)
562         {
563                 Node       *jlnode = (Node *) lfirst(jl);
564                 RelOptInfo *thisrel;
565
566                 if (IsA(jlnode, RangeTblRef))
567                 {
568                         int                     varno = ((RangeTblRef *) jlnode)->rtindex;
569
570                         thisrel = find_base_rel(root, varno);
571                 }
572                 else if (IsA(jlnode, List))
573                 {
574                         /* Recurse to handle subproblem */
575                         thisrel = make_rel_from_joinlist(root, (List *) jlnode);
576                 }
577                 else
578                 {
579                         elog(ERROR, "unrecognized joinlist node type: %d",
580                                  (int) nodeTag(jlnode));
581                         thisrel = NULL;         /* keep compiler quiet */
582                 }
583
584                 initial_rels = lappend(initial_rels, thisrel);
585         }
586
587         if (levels_needed == 1)
588         {
589                 /*
590                  * Single joinlist node, so we're done.
591                  */
592                 return (RelOptInfo *) linitial(initial_rels);
593         }
594         else
595         {
596                 /*
597                  * Consider the different orders in which we could join the rels,
598                  * using either GEQO or regular optimizer.
599                  */
600                 if (enable_geqo && levels_needed >= geqo_threshold)
601                         return geqo(root, levels_needed, initial_rels);
602                 else
603                         return make_one_rel_by_joins(root, levels_needed, initial_rels);
604         }
605 }
606
607 /*
608  * make_one_rel_by_joins
609  *        Find all possible joinpaths for a query by successively finding ways
610  *        to join component relations into join relations.
611  *
612  * 'levels_needed' is the number of iterations needed, ie, the number of
613  *              independent jointree items in the query.  This is > 1.
614  *
615  * 'initial_rels' is a list of RelOptInfo nodes for each independent
616  *              jointree item.  These are the components to be joined together.
617  *
618  * Returns the final level of join relations, i.e., the relation that is
619  * the result of joining all the original relations together.
620  */
621 static RelOptInfo *
622 make_one_rel_by_joins(PlannerInfo *root, int levels_needed, List *initial_rels)
623 {
624         List      **joinitems;
625         int                     lev;
626         RelOptInfo *rel;
627
628         /*
629          * We employ a simple "dynamic programming" algorithm: we first find all
630          * ways to build joins of two jointree items, then all ways to build joins
631          * of three items (from two-item joins and single items), then four-item
632          * joins, and so on until we have considered all ways to join all the
633          * items into one rel.
634          *
635          * joinitems[j] is a list of all the j-item rels.  Initially we set
636          * joinitems[1] to represent all the single-jointree-item relations.
637          */
638         joinitems = (List **) palloc0((levels_needed + 1) * sizeof(List *));
639
640         joinitems[1] = initial_rels;
641
642         for (lev = 2; lev <= levels_needed; lev++)
643         {
644                 ListCell   *x;
645
646                 /*
647                  * Determine all possible pairs of relations to be joined at this
648                  * level, and build paths for making each one from every available
649                  * pair of lower-level relations.
650                  */
651                 joinitems[lev] = make_rels_by_joins(root, lev, joinitems);
652
653                 /*
654                  * Do cleanup work on each just-processed rel.
655                  */
656                 foreach(x, joinitems[lev])
657                 {
658                         rel = (RelOptInfo *) lfirst(x);
659
660                         /* Find and save the cheapest paths for this rel */
661                         set_cheapest(rel);
662
663 #ifdef OPTIMIZER_DEBUG
664                         debug_print_rel(root, rel);
665 #endif
666                 }
667         }
668
669         /*
670          * We should have a single rel at the final level.
671          */
672         if (joinitems[levels_needed] == NIL)
673                 elog(ERROR, "failed to build any %d-way joins", levels_needed);
674         Assert(list_length(joinitems[levels_needed]) == 1);
675
676         rel = (RelOptInfo *) linitial(joinitems[levels_needed]);
677
678         return rel;
679 }
680
681 /*****************************************************************************
682  *                      PUSHING QUALS DOWN INTO SUBQUERIES
683  *****************************************************************************/
684
685 /*
686  * subquery_is_pushdown_safe - is a subquery safe for pushing down quals?
687  *
688  * subquery is the particular component query being checked.  topquery
689  * is the top component of a set-operations tree (the same Query if no
690  * set-op is involved).
691  *
692  * Conditions checked here:
693  *
694  * 1. If the subquery has a LIMIT clause, we must not push down any quals,
695  * since that could change the set of rows returned.
696  *
697  * 2. If the subquery contains EXCEPT or EXCEPT ALL set ops we cannot push
698  * quals into it, because that would change the results.
699  *
700  * 3. For subqueries using UNION/UNION ALL/INTERSECT/INTERSECT ALL, we can
701  * push quals into each component query, but the quals can only reference
702  * subquery columns that suffer no type coercions in the set operation.
703  * Otherwise there are possible semantic gotchas.  So, we check the
704  * component queries to see if any of them have different output types;
705  * differentTypes[k] is set true if column k has different type in any
706  * component.
707  */
708 static bool
709 subquery_is_pushdown_safe(Query *subquery, Query *topquery,
710                                                   bool *differentTypes)
711 {
712         SetOperationStmt *topop;
713
714         /* Check point 1 */
715         if (subquery->limitOffset != NULL || subquery->limitCount != NULL)
716                 return false;
717
718         /* Are we at top level, or looking at a setop component? */
719         if (subquery == topquery)
720         {
721                 /* Top level, so check any component queries */
722                 if (subquery->setOperations != NULL)
723                         if (!recurse_pushdown_safe(subquery->setOperations, topquery,
724                                                                            differentTypes))
725                                 return false;
726         }
727         else
728         {
729                 /* Setop component must not have more components (too weird) */
730                 if (subquery->setOperations != NULL)
731                         return false;
732                 /* Check whether setop component output types match top level */
733                 topop = (SetOperationStmt *) topquery->setOperations;
734                 Assert(topop && IsA(topop, SetOperationStmt));
735                 compare_tlist_datatypes(subquery->targetList,
736                                                                 topop->colTypes,
737                                                                 differentTypes);
738         }
739         return true;
740 }
741
742 /*
743  * Helper routine to recurse through setOperations tree
744  */
745 static bool
746 recurse_pushdown_safe(Node *setOp, Query *topquery,
747                                           bool *differentTypes)
748 {
749         if (IsA(setOp, RangeTblRef))
750         {
751                 RangeTblRef *rtr = (RangeTblRef *) setOp;
752                 RangeTblEntry *rte = rt_fetch(rtr->rtindex, topquery->rtable);
753                 Query      *subquery = rte->subquery;
754
755                 Assert(subquery != NULL);
756                 return subquery_is_pushdown_safe(subquery, topquery, differentTypes);
757         }
758         else if (IsA(setOp, SetOperationStmt))
759         {
760                 SetOperationStmt *op = (SetOperationStmt *) setOp;
761
762                 /* EXCEPT is no good */
763                 if (op->op == SETOP_EXCEPT)
764                         return false;
765                 /* Else recurse */
766                 if (!recurse_pushdown_safe(op->larg, topquery, differentTypes))
767                         return false;
768                 if (!recurse_pushdown_safe(op->rarg, topquery, differentTypes))
769                         return false;
770         }
771         else
772         {
773                 elog(ERROR, "unrecognized node type: %d",
774                          (int) nodeTag(setOp));
775         }
776         return true;
777 }
778
779 /*
780  * Compare tlist's datatypes against the list of set-operation result types.
781  * For any items that are different, mark the appropriate element of
782  * differentTypes[] to show that this column will have type conversions.
783  */
784 static void
785 compare_tlist_datatypes(List *tlist, List *colTypes,
786                                                 bool *differentTypes)
787 {
788         ListCell   *l;
789         ListCell   *colType = list_head(colTypes);
790
791         foreach(l, tlist)
792         {
793                 TargetEntry *tle = (TargetEntry *) lfirst(l);
794
795                 if (tle->resjunk)
796                         continue;                       /* ignore resjunk columns */
797                 if (colType == NULL)
798                         elog(ERROR, "wrong number of tlist entries");
799                 if (exprType((Node *) tle->expr) != lfirst_oid(colType))
800                         differentTypes[tle->resno] = true;
801                 colType = lnext(colType);
802         }
803         if (colType != NULL)
804                 elog(ERROR, "wrong number of tlist entries");
805 }
806
807 /*
808  * qual_is_pushdown_safe - is a particular qual safe to push down?
809  *
810  * qual is a restriction clause applying to the given subquery (whose RTE
811  * has index rti in the parent query).
812  *
813  * Conditions checked here:
814  *
815  * 1. The qual must not contain any subselects (mainly because I'm not sure
816  * it will work correctly: sublinks will already have been transformed into
817  * subplans in the qual, but not in the subquery).
818  *
819  * 2. The qual must not refer to any subquery output columns that were
820  * found to have inconsistent types across a set operation tree by
821  * subquery_is_pushdown_safe().
822  *
823  * 3. If the subquery uses DISTINCT ON, we must not push down any quals that
824  * refer to non-DISTINCT output columns, because that could change the set
825  * of rows returned.  This condition is vacuous for DISTINCT, because then
826  * there are no non-DISTINCT output columns, but unfortunately it's fairly
827  * expensive to tell the difference between DISTINCT and DISTINCT ON in the
828  * parsetree representation.  It's cheaper to just make sure all the Vars
829  * in the qual refer to DISTINCT columns.
830  *
831  * 4. We must not push down any quals that refer to subselect outputs that
832  * return sets, else we'd introduce functions-returning-sets into the
833  * subquery's WHERE/HAVING quals.
834  */
835 static bool
836 qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
837                                           bool *differentTypes)
838 {
839         bool            safe = true;
840         List       *vars;
841         ListCell   *vl;
842         Bitmapset  *tested = NULL;
843
844         /* Refuse subselects (point 1) */
845         if (contain_subplans(qual))
846                 return false;
847
848         /*
849          * Examine all Vars used in clause; since it's a restriction clause, all
850          * such Vars must refer to subselect output columns.
851          */
852         vars = pull_var_clause(qual, false);
853         foreach(vl, vars)
854         {
855                 Var                *var = (Var *) lfirst(vl);
856                 TargetEntry *tle;
857
858                 Assert(var->varno == rti);
859
860                 /*
861                  * We use a bitmapset to avoid testing the same attno more than once.
862                  * (NB: this only works because subquery outputs can't have negative
863                  * attnos.)
864                  */
865                 if (bms_is_member(var->varattno, tested))
866                         continue;
867                 tested = bms_add_member(tested, var->varattno);
868
869                 /* Check point 2 */
870                 if (differentTypes[var->varattno])
871                 {
872                         safe = false;
873                         break;
874                 }
875
876                 /* Must find the tlist element referenced by the Var */
877                 tle = get_tle_by_resno(subquery->targetList, var->varattno);
878                 Assert(tle != NULL);
879                 Assert(!tle->resjunk);
880
881                 /* If subquery uses DISTINCT or DISTINCT ON, check point 3 */
882                 if (subquery->distinctClause != NIL &&
883                         !targetIsInSortList(tle, subquery->distinctClause))
884                 {
885                         /* non-DISTINCT column, so fail */
886                         safe = false;
887                         break;
888                 }
889
890                 /* Refuse functions returning sets (point 4) */
891                 if (expression_returns_set((Node *) tle->expr))
892                 {
893                         safe = false;
894                         break;
895                 }
896         }
897
898         list_free(vars);
899         bms_free(tested);
900
901         return safe;
902 }
903
904 /*
905  * subquery_push_qual - push down a qual that we have determined is safe
906  */
907 static void
908 subquery_push_qual(Query *subquery, RangeTblEntry *rte, Index rti, Node *qual)
909 {
910         if (subquery->setOperations != NULL)
911         {
912                 /* Recurse to push it separately to each component query */
913                 recurse_push_qual(subquery->setOperations, subquery,
914                                                   rte, rti, qual);
915         }
916         else
917         {
918                 /*
919                  * We need to replace Vars in the qual (which must refer to outputs of
920                  * the subquery) with copies of the subquery's targetlist expressions.
921                  * Note that at this point, any uplevel Vars in the qual should have
922                  * been replaced with Params, so they need no work.
923                  *
924                  * This step also ensures that when we are pushing into a setop tree,
925                  * each component query gets its own copy of the qual.
926                  */
927                 qual = ResolveNew(qual, rti, 0, rte,
928                                                   subquery->targetList,
929                                                   CMD_SELECT, 0);
930
931                 /*
932                  * Now attach the qual to the proper place: normally WHERE, but if the
933                  * subquery uses grouping or aggregation, put it in HAVING (since the
934                  * qual really refers to the group-result rows).
935                  */
936                 if (subquery->hasAggs || subquery->groupClause || subquery->havingQual)
937                         subquery->havingQual = make_and_qual(subquery->havingQual, qual);
938                 else
939                         subquery->jointree->quals =
940                                 make_and_qual(subquery->jointree->quals, qual);
941
942                 /*
943                  * We need not change the subquery's hasAggs or hasSublinks flags,
944                  * since we can't be pushing down any aggregates that weren't there
945                  * before, and we don't push down subselects at all.
946                  */
947         }
948 }
949
950 /*
951  * Helper routine to recurse through setOperations tree
952  */
953 static void
954 recurse_push_qual(Node *setOp, Query *topquery,
955                                   RangeTblEntry *rte, Index rti, Node *qual)
956 {
957         if (IsA(setOp, RangeTblRef))
958         {
959                 RangeTblRef *rtr = (RangeTblRef *) setOp;
960                 RangeTblEntry *subrte = rt_fetch(rtr->rtindex, topquery->rtable);
961                 Query      *subquery = subrte->subquery;
962
963                 Assert(subquery != NULL);
964                 subquery_push_qual(subquery, rte, rti, qual);
965         }
966         else if (IsA(setOp, SetOperationStmt))
967         {
968                 SetOperationStmt *op = (SetOperationStmt *) setOp;
969
970                 recurse_push_qual(op->larg, topquery, rte, rti, qual);
971                 recurse_push_qual(op->rarg, topquery, rte, rti, qual);
972         }
973         else
974         {
975                 elog(ERROR, "unrecognized node type: %d",
976                          (int) nodeTag(setOp));
977         }
978 }
979
980 /*****************************************************************************
981  *                      DEBUG SUPPORT
982  *****************************************************************************/
983
984 #ifdef OPTIMIZER_DEBUG
985
986 static void
987 print_relids(Relids relids)
988 {
989         Relids          tmprelids;
990         int                     x;
991         bool            first = true;
992
993         tmprelids = bms_copy(relids);
994         while ((x = bms_first_member(tmprelids)) >= 0)
995         {
996                 if (!first)
997                         printf(" ");
998                 printf("%d", x);
999                 first = false;
1000         }
1001         bms_free(tmprelids);
1002 }
1003
1004 static void
1005 print_restrictclauses(PlannerInfo *root, List *clauses)
1006 {
1007         ListCell   *l;
1008
1009         foreach(l, clauses)
1010         {
1011                 RestrictInfo *c = lfirst(l);
1012
1013                 print_expr((Node *) c->clause, root->parse->rtable);
1014                 if (lnext(l))
1015                         printf(", ");
1016         }
1017 }
1018
1019 static void
1020 print_path(PlannerInfo *root, Path *path, int indent)
1021 {
1022         const char *ptype;
1023         bool            join = false;
1024         Path       *subpath = NULL;
1025         int                     i;
1026
1027         switch (nodeTag(path))
1028         {
1029                 case T_Path:
1030                         ptype = "SeqScan";
1031                         break;
1032                 case T_IndexPath:
1033                         ptype = "IdxScan";
1034                         break;
1035                 case T_BitmapHeapPath:
1036                         ptype = "BitmapHeapScan";
1037                         break;
1038                 case T_BitmapAndPath:
1039                         ptype = "BitmapAndPath";
1040                         break;
1041                 case T_BitmapOrPath:
1042                         ptype = "BitmapOrPath";
1043                         break;
1044                 case T_TidPath:
1045                         ptype = "TidScan";
1046                         break;
1047                 case T_AppendPath:
1048                         ptype = "Append";
1049                         break;
1050                 case T_ResultPath:
1051                         ptype = "Result";
1052                         subpath = ((ResultPath *) path)->subpath;
1053                         break;
1054                 case T_MaterialPath:
1055                         ptype = "Material";
1056                         subpath = ((MaterialPath *) path)->subpath;
1057                         break;
1058                 case T_UniquePath:
1059                         ptype = "Unique";
1060                         subpath = ((UniquePath *) path)->subpath;
1061                         break;
1062                 case T_NestPath:
1063                         ptype = "NestLoop";
1064                         join = true;
1065                         break;
1066                 case T_MergePath:
1067                         ptype = "MergeJoin";
1068                         join = true;
1069                         break;
1070                 case T_HashPath:
1071                         ptype = "HashJoin";
1072                         join = true;
1073                         break;
1074                 default:
1075                         ptype = "???Path";
1076                         break;
1077         }
1078
1079         for (i = 0; i < indent; i++)
1080                 printf("\t");
1081         printf("%s", ptype);
1082
1083         if (path->parent)
1084         {
1085                 printf("(");
1086                 print_relids(path->parent->relids);
1087                 printf(") rows=%.0f", path->parent->rows);
1088         }
1089         printf(" cost=%.2f..%.2f\n", path->startup_cost, path->total_cost);
1090
1091         if (path->pathkeys)
1092         {
1093                 for (i = 0; i < indent; i++)
1094                         printf("\t");
1095                 printf("  pathkeys: ");
1096                 print_pathkeys(path->pathkeys, root->parse->rtable);
1097         }
1098
1099         if (join)
1100         {
1101                 JoinPath   *jp = (JoinPath *) path;
1102
1103                 for (i = 0; i < indent; i++)
1104                         printf("\t");
1105                 printf("  clauses: ");
1106                 print_restrictclauses(root, jp->joinrestrictinfo);
1107                 printf("\n");
1108
1109                 if (IsA(path, MergePath))
1110                 {
1111                         MergePath  *mp = (MergePath *) path;
1112
1113                         if (mp->outersortkeys || mp->innersortkeys)
1114                         {
1115                                 for (i = 0; i < indent; i++)
1116                                         printf("\t");
1117                                 printf("  sortouter=%d sortinner=%d\n",
1118                                            ((mp->outersortkeys) ? 1 : 0),
1119                                            ((mp->innersortkeys) ? 1 : 0));
1120                         }
1121                 }
1122
1123                 print_path(root, jp->outerjoinpath, indent + 1);
1124                 print_path(root, jp->innerjoinpath, indent + 1);
1125         }
1126
1127         if (subpath)
1128                 print_path(root, subpath, indent + 1);
1129 }
1130
1131 void
1132 debug_print_rel(PlannerInfo *root, RelOptInfo *rel)
1133 {
1134         ListCell   *l;
1135
1136         printf("RELOPTINFO (");
1137         print_relids(rel->relids);
1138         printf("): rows=%.0f width=%d\n", rel->rows, rel->width);
1139
1140         if (rel->baserestrictinfo)
1141         {
1142                 printf("\tbaserestrictinfo: ");
1143                 print_restrictclauses(root, rel->baserestrictinfo);
1144                 printf("\n");
1145         }
1146
1147         if (rel->joininfo)
1148         {
1149                 printf("\tjoininfo: ");
1150                 print_restrictclauses(root, rel->joininfo);
1151                 printf("\n");
1152         }
1153
1154         printf("\tpath list:\n");
1155         foreach(l, rel->pathlist)
1156                 print_path(root, lfirst(l), 1);
1157         printf("\n\tcheapest startup path:\n");
1158         print_path(root, rel->cheapest_startup_path, 1);
1159         printf("\n\tcheapest total path:\n");
1160         print_path(root, rel->cheapest_total_path, 1);
1161         printf("\n");
1162         fflush(stdout);
1163 }
1164
1165 #endif   /* OPTIMIZER_DEBUG */