]> granicus.if.org Git - postgresql/blob - src/backend/optimizer/path/allpaths.c
Update copyright to 2004.
[postgresql] / src / backend / optimizer / path / allpaths.c
1 /*-------------------------------------------------------------------------
2  *
3  * allpaths.c
4  *        Routines to find possible search paths for processing a query
5  *
6  * Portions Copyright (c) 1996-2004, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *        $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.120 2004/08/29 04:12:33 momjian Exp $
12  *
13  *-------------------------------------------------------------------------
14  */
15
16 #include "postgres.h"
17
18 #ifdef OPTIMIZER_DEBUG
19 #include "nodes/print.h"
20 #endif
21 #include "optimizer/clauses.h"
22 #include "optimizer/cost.h"
23 #include "optimizer/geqo.h"
24 #include "optimizer/pathnode.h"
25 #include "optimizer/paths.h"
26 #include "optimizer/plancat.h"
27 #include "optimizer/planner.h"
28 #include "optimizer/prep.h"
29 #include "optimizer/var.h"
30 #include "parser/parsetree.h"
31 #include "parser/parse_clause.h"
32 #include "rewrite/rewriteManip.h"
33
34
35 /* These parameters are set by GUC */
36 bool            enable_geqo = false;    /* just in case GUC doesn't set it */
37 int                     geqo_threshold;
38
39
40 static void set_base_rel_pathlists(Query *root);
41 static void set_plain_rel_pathlist(Query *root, RelOptInfo *rel,
42                                            RangeTblEntry *rte);
43 static void set_inherited_rel_pathlist(Query *root, RelOptInfo *rel,
44                                                    Index rti, RangeTblEntry *rte,
45                                                    List *inheritlist);
46 static void set_subquery_pathlist(Query *root, RelOptInfo *rel,
47                                           Index rti, RangeTblEntry *rte);
48 static void set_function_pathlist(Query *root, RelOptInfo *rel,
49                                           RangeTblEntry *rte);
50 static RelOptInfo *make_one_rel_by_joins(Query *root, int levels_needed,
51                                           List *initial_rels);
52 static bool subquery_is_pushdown_safe(Query *subquery, Query *topquery,
53                                                   bool *differentTypes);
54 static bool recurse_pushdown_safe(Node *setOp, Query *topquery,
55                                           bool *differentTypes);
56 static void compare_tlist_datatypes(List *tlist, List *colTypes,
57                                                 bool *differentTypes);
58 static bool qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
59                                           bool *differentTypes);
60 static void subquery_push_qual(Query *subquery, List *rtable,
61                                                            Index rti, Node *qual);
62 static void recurse_push_qual(Node *setOp, Query *topquery,
63                                                           List *rtable, Index rti, Node *qual);
64
65
66 /*
67  * make_one_rel
68  *        Finds all possible access paths for executing a query, returning a
69  *        single rel that represents the join of all base rels in the query.
70  */
71 RelOptInfo *
72 make_one_rel(Query *root)
73 {
74         RelOptInfo *rel;
75
76         /*
77          * Generate access paths for the base rels.
78          */
79         set_base_rel_pathlists(root);
80
81         /*
82          * Generate access paths for the entire join tree.
83          */
84         Assert(root->jointree != NULL && IsA(root->jointree, FromExpr));
85
86         rel = make_fromexpr_rel(root, root->jointree);
87
88         /*
89          * The result should join all the query's base rels.
90          */
91         Assert(bms_num_members(rel->relids) == list_length(root->base_rel_list));
92
93         return rel;
94 }
95
96 /*
97  * set_base_rel_pathlists
98  *        Finds all paths available for scanning each base-relation entry.
99  *        Sequential scan and any available indices are considered.
100  *        Each useful path is attached to its relation's 'pathlist' field.
101  */
102 static void
103 set_base_rel_pathlists(Query *root)
104 {
105         ListCell           *l;
106
107         foreach(l, root->base_rel_list)
108         {
109                 RelOptInfo *rel = (RelOptInfo *) lfirst(l);
110                 Index           rti = rel->relid;
111                 RangeTblEntry *rte;
112                 List       *inheritlist;
113
114                 Assert(rti > 0);                /* better be base rel */
115                 rte = rt_fetch(rti, root->rtable);
116
117                 if (rel->rtekind == RTE_SUBQUERY)
118                 {
119                         /* Subquery --- generate a separate plan for it */
120                         set_subquery_pathlist(root, rel, rti, rte);
121                 }
122                 else if (rel->rtekind == RTE_FUNCTION)
123                 {
124                         /* RangeFunction --- generate a separate plan for it */
125                         set_function_pathlist(root, rel, rte);
126                 }
127                 else if ((inheritlist = expand_inherited_rtentry(root, rti, true))
128                                  != NIL)
129                 {
130                         /* Relation is root of an inheritance tree, process specially */
131                         set_inherited_rel_pathlist(root, rel, rti, rte, inheritlist);
132                 }
133                 else
134                 {
135                         /* Plain relation */
136                         set_plain_rel_pathlist(root, rel, rte);
137                 }
138
139 #ifdef OPTIMIZER_DEBUG
140                 debug_print_rel(root, rel);
141 #endif
142         }
143 }
144
145 /*
146  * set_plain_rel_pathlist
147  *        Build access paths for a plain relation (no subquery, no inheritance)
148  */
149 static void
150 set_plain_rel_pathlist(Query *root, RelOptInfo *rel, RangeTblEntry *rte)
151 {
152         /* Mark rel with estimated output rows, width, etc */
153         set_baserel_size_estimates(root, rel);
154
155         /* Test any partial indexes of rel for applicability */
156         check_partial_indexes(root, rel);
157
158         /*
159          * Check to see if we can extract any restriction conditions from
160          * join quals that are OR-of-AND structures.  If so, add them to the
161          * rel's restriction list, and recompute the size estimates.
162          */
163         if (create_or_index_quals(root, rel))
164                 set_baserel_size_estimates(root, rel);
165
166         /*
167          * Generate paths and add them to the rel's pathlist.
168          *
169          * Note: add_path() will discard any paths that are dominated by another
170          * available path, keeping only those paths that are superior along at
171          * least one dimension of cost or sortedness.
172          */
173
174         /* Consider sequential scan */
175         add_path(rel, create_seqscan_path(root, rel));
176
177         /* Consider TID scans */
178         create_tidscan_paths(root, rel);
179
180         /* Consider index paths for both simple and OR index clauses */
181         create_index_paths(root, rel);
182         create_or_index_paths(root, rel);
183
184         /* Now find the cheapest of the paths for this rel */
185         set_cheapest(rel);
186 }
187
188 /*
189  * set_inherited_rel_pathlist
190  *        Build access paths for a inheritance tree rooted at rel
191  *
192  * inheritlist is a list of RT indexes of all tables in the inheritance tree,
193  * including a duplicate of the parent itself.  Note we will not come here
194  * unless there's at least one child in addition to the parent.
195  *
196  * NOTE: the passed-in rel and RTE will henceforth represent the appended
197  * result of the whole inheritance tree.  The members of inheritlist represent
198  * the individual tables --- in particular, the inheritlist member that is a
199  * duplicate of the parent RTE represents the parent table alone.
200  * We will generate plans to scan the individual tables that refer to
201  * the inheritlist RTEs, whereas Vars elsewhere in the plan tree that
202  * refer to the original RTE are taken to refer to the append output.
203  * In particular, this means we have separate RelOptInfos for the parent
204  * table and for the append output, which is a good thing because they're
205  * not the same size.
206  */
207 static void
208 set_inherited_rel_pathlist(Query *root, RelOptInfo *rel,
209                                                    Index rti, RangeTblEntry *rte,
210                                                    List *inheritlist)
211 {
212         int                     parentRTindex = rti;
213         Oid                     parentOID = rte->relid;
214         List       *subpaths = NIL;
215         ListCell   *il;
216
217         /*
218          * XXX for now, can't handle inherited expansion of FOR UPDATE; can we
219          * do better?
220          */
221         if (list_member_int(root->rowMarks, parentRTindex))
222                 ereport(ERROR,
223                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
224                                  errmsg("SELECT FOR UPDATE is not supported for inheritance queries")));
225
226         /*
227          * The executor will check the parent table's access permissions when
228          * it examines the parent's inheritlist entry.  There's no need to
229          * check twice, so turn off access check bits in the original RTE.
230          */
231         rte->requiredPerms = 0;
232
233         /*
234          * Initialize to compute size estimates for whole inheritance tree
235          */
236         rel->rows = 0;
237         rel->width = 0;
238
239         /*
240          * Generate access paths for each table in the tree (parent AND
241          * children), and pick the cheapest path for each table.
242          */
243         foreach(il, inheritlist)
244         {
245                 int                     childRTindex = lfirst_int(il);
246                 RangeTblEntry *childrte;
247                 Oid                     childOID;
248                 RelOptInfo *childrel;
249                 ListCell   *parentvars;
250                 ListCell   *childvars;
251
252                 childrte = rt_fetch(childRTindex, root->rtable);
253                 childOID = childrte->relid;
254
255                 /*
256                  * Make a RelOptInfo for the child so we can do planning.  Do NOT
257                  * attach the RelOptInfo to the query's base_rel_list, however,
258                  * since the child is not part of the main join tree.  Instead,
259                  * the child RelOptInfo is added to other_rel_list.
260                  */
261                 childrel = build_other_rel(root, childRTindex);
262
263                 /*
264                  * Copy the parent's targetlist and restriction quals to the
265                  * child, with attribute-number adjustment as needed.  We don't
266                  * bother to copy the join quals, since we can't do any joining of
267                  * the individual tables.  Also, we just zap attr_needed rather
268                  * than trying to adjust it; it won't be looked at in the child.
269                  */
270                 childrel->reltargetlist = (List *)
271                         adjust_inherited_attrs((Node *) rel->reltargetlist,
272                                                                    parentRTindex,
273                                                                    parentOID,
274                                                                    childRTindex,
275                                                                    childOID);
276                 childrel->attr_needed = NULL;
277                 childrel->baserestrictinfo = (List *)
278                         adjust_inherited_attrs((Node *) rel->baserestrictinfo,
279                                                                    parentRTindex,
280                                                                    parentOID,
281                                                                    childRTindex,
282                                                                    childOID);
283
284                 /*
285                  * Now compute child access paths, and save the cheapest.
286                  */
287                 set_plain_rel_pathlist(root, childrel, childrte);
288
289                 subpaths = lappend(subpaths, childrel->cheapest_total_path);
290
291                 /*
292                  * Propagate size information from the child back to the parent.
293                  * For simplicity, we use the largest widths from any child as the
294                  * parent estimates.
295                  */
296                 rel->rows += childrel->rows;
297                 if (childrel->width > rel->width)
298                         rel->width = childrel->width;
299
300                 forboth(parentvars, rel->reltargetlist,
301                                 childvars, childrel->reltargetlist)
302                 {
303                         Var                *parentvar = (Var *) lfirst(parentvars);
304                         Var                *childvar = (Var *) lfirst(childvars);
305
306                         if (IsA(parentvar, Var) && IsA(childvar, Var))
307                         {
308                                 int                     pndx = parentvar->varattno - rel->min_attr;
309                                 int                     cndx = childvar->varattno - childrel->min_attr;
310
311                                 if (childrel->attr_widths[cndx] > rel->attr_widths[pndx])
312                                         rel->attr_widths[pndx] = childrel->attr_widths[cndx];
313                         }
314                 }
315         }
316
317         /*
318          * Finally, build Append path and install it as the only access path
319          * for the parent rel.
320          */
321         add_path(rel, (Path *) create_append_path(rel, subpaths));
322
323         /* Select cheapest path (pretty easy in this case...) */
324         set_cheapest(rel);
325 }
326
327 /*
328  * set_subquery_pathlist
329  *              Build the (single) access path for a subquery RTE
330  */
331 static void
332 set_subquery_pathlist(Query *root, RelOptInfo *rel,
333                                           Index rti, RangeTblEntry *rte)
334 {
335         Query      *subquery = rte->subquery;
336         bool       *differentTypes;
337         List       *pathkeys;
338
339         /* We need a workspace for keeping track of set-op type coercions */
340         differentTypes = (bool *)
341                 palloc0((list_length(subquery->targetList) + 1) * sizeof(bool));
342
343         /*
344          * If there are any restriction clauses that have been attached to the
345          * subquery relation, consider pushing them down to become HAVING
346          * quals of the subquery itself.  (Not WHERE clauses, since they may
347          * refer to subquery outputs that are aggregate results.  But
348          * planner.c will transfer them into the subquery's WHERE if they do
349          * not.)  This transformation is useful because it may allow us to
350          * generate a better plan for the subquery than evaluating all the
351          * subquery output rows and then filtering them.
352          *
353          * There are several cases where we cannot push down clauses.
354          * Restrictions involving the subquery are checked by
355          * subquery_is_pushdown_safe().  Restrictions on individual clauses
356          * are checked by qual_is_pushdown_safe().
357          *
358          * Non-pushed-down clauses will get evaluated as qpquals of the
359          * SubqueryScan node.
360          *
361          * XXX Are there any cases where we want to make a policy decision not to
362          * push down a pushable qual, because it'd result in a worse plan?
363          */
364         if (rel->baserestrictinfo != NIL &&
365                 subquery_is_pushdown_safe(subquery, subquery, differentTypes))
366         {
367                 /* OK to consider pushing down individual quals */
368                 List       *upperrestrictlist = NIL;
369                 ListCell   *l;
370
371                 foreach(l, rel->baserestrictinfo)
372                 {
373                         RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
374                         Node       *clause = (Node *) rinfo->clause;
375
376                         if (qual_is_pushdown_safe(subquery, rti, clause, differentTypes))
377                         {
378                                 /* Push it down */
379                                 subquery_push_qual(subquery, root->rtable, rti, clause);
380                         }
381                         else
382                         {
383                                 /* Keep it in the upper query */
384                                 upperrestrictlist = lappend(upperrestrictlist, rinfo);
385                         }
386                 }
387                 rel->baserestrictinfo = upperrestrictlist;
388         }
389
390         pfree(differentTypes);
391
392         /* Generate the plan for the subquery */
393         rel->subplan = subquery_planner(subquery, 0.0 /* default case */ );
394
395         /* Copy number of output rows from subplan */
396         rel->tuples = rel->subplan->plan_rows;
397
398         /* Mark rel with estimated output rows, width, etc */
399         set_baserel_size_estimates(root, rel);
400
401         /* Convert subquery pathkeys to outer representation */
402         pathkeys = build_subquery_pathkeys(root, rel, subquery);
403
404         /* Generate appropriate path */
405         add_path(rel, create_subqueryscan_path(rel, pathkeys));
406
407         /* Select cheapest path (pretty easy in this case...) */
408         set_cheapest(rel);
409 }
410
411 /*
412  * set_function_pathlist
413  *              Build the (single) access path for a function RTE
414  */
415 static void
416 set_function_pathlist(Query *root, RelOptInfo *rel, RangeTblEntry *rte)
417 {
418         /* Mark rel with estimated output rows, width, etc */
419         set_function_size_estimates(root, rel);
420
421         /* Generate appropriate path */
422         add_path(rel, create_functionscan_path(root, rel));
423
424         /* Select cheapest path (pretty easy in this case...) */
425         set_cheapest(rel);
426 }
427
428 /*
429  * make_fromexpr_rel
430  *        Build access paths for a FromExpr jointree node.
431  */
432 RelOptInfo *
433 make_fromexpr_rel(Query *root, FromExpr *from)
434 {
435         int                     levels_needed;
436         List       *initial_rels = NIL;
437         ListCell   *jt;
438
439         /*
440          * Count the number of child jointree nodes.  This is the depth of the
441          * dynamic-programming algorithm we must employ to consider all ways
442          * of joining the child nodes.
443          */
444         levels_needed = list_length(from->fromlist);
445
446         if (levels_needed <= 0)
447                 return NULL;                    /* nothing to do? */
448
449         /*
450          * Construct a list of rels corresponding to the child jointree nodes.
451          * This may contain both base rels and rels constructed according to
452          * explicit JOIN directives.
453          */
454         foreach(jt, from->fromlist)
455         {
456                 Node       *jtnode = (Node *) lfirst(jt);
457
458                 initial_rels = lappend(initial_rels,
459                                                            make_jointree_rel(root, jtnode));
460         }
461
462         if (levels_needed == 1)
463         {
464                 /*
465                  * Single jointree node, so we're done.
466                  */
467                 return (RelOptInfo *) linitial(initial_rels);
468         }
469         else
470         {
471                 /*
472                  * Consider the different orders in which we could join the rels,
473                  * using either GEQO or regular optimizer.
474                  */
475                 if (enable_geqo && levels_needed >= geqo_threshold)
476                         return geqo(root, levels_needed, initial_rels);
477                 else
478                         return make_one_rel_by_joins(root, levels_needed, initial_rels);
479         }
480 }
481
482 /*
483  * make_one_rel_by_joins
484  *        Find all possible joinpaths for a query by successively finding ways
485  *        to join component relations into join relations.
486  *
487  * 'levels_needed' is the number of iterations needed, ie, the number of
488  *              independent jointree items in the query.  This is > 1.
489  *
490  * 'initial_rels' is a list of RelOptInfo nodes for each independent
491  *              jointree item.  These are the components to be joined together.
492  *
493  * Returns the final level of join relations, i.e., the relation that is
494  * the result of joining all the original relations together.
495  */
496 static RelOptInfo *
497 make_one_rel_by_joins(Query *root, int levels_needed, List *initial_rels)
498 {
499         List      **joinitems;
500         int                     lev;
501         RelOptInfo *rel;
502
503         /*
504          * We employ a simple "dynamic programming" algorithm: we first find
505          * all ways to build joins of two jointree items, then all ways to
506          * build joins of three items (from two-item joins and single items),
507          * then four-item joins, and so on until we have considered all ways
508          * to join all the items into one rel.
509          *
510          * joinitems[j] is a list of all the j-item rels.  Initially we set
511          * joinitems[1] to represent all the single-jointree-item relations.
512          */
513         joinitems = (List **) palloc0((levels_needed + 1) * sizeof(List *));
514
515         joinitems[1] = initial_rels;
516
517         for (lev = 2; lev <= levels_needed; lev++)
518         {
519                 ListCell   *x;
520
521                 /*
522                  * Determine all possible pairs of relations to be joined at this
523                  * level, and build paths for making each one from every available
524                  * pair of lower-level relations.
525                  */
526                 joinitems[lev] = make_rels_by_joins(root, lev, joinitems);
527
528                 /*
529                  * Do cleanup work on each just-processed rel.
530                  */
531                 foreach(x, joinitems[lev])
532                 {
533                         rel = (RelOptInfo *) lfirst(x);
534
535                         /* Find and save the cheapest paths for this rel */
536                         set_cheapest(rel);
537
538 #ifdef OPTIMIZER_DEBUG
539                         debug_print_rel(root, rel);
540 #endif
541                 }
542         }
543
544         /*
545          * We should have a single rel at the final level.
546          */
547         if (joinitems[levels_needed] == NIL)
548                 elog(ERROR, "failed to build any %d-way joins", levels_needed);
549         Assert(list_length(joinitems[levels_needed]) == 1);
550
551         rel = (RelOptInfo *) linitial(joinitems[levels_needed]);
552
553         return rel;
554 }
555
556 /*****************************************************************************
557  *                      PUSHING QUALS DOWN INTO SUBQUERIES
558  *****************************************************************************/
559
560 /*
561  * subquery_is_pushdown_safe - is a subquery safe for pushing down quals?
562  *
563  * subquery is the particular component query being checked.  topquery
564  * is the top component of a set-operations tree (the same Query if no
565  * set-op is involved).
566  *
567  * Conditions checked here:
568  *
569  * 1. If the subquery has a LIMIT clause, we must not push down any quals,
570  * since that could change the set of rows returned.
571  *
572  * 2. If the subquery contains EXCEPT or EXCEPT ALL set ops we cannot push
573  * quals into it, because that would change the results.
574  *
575  * 3. For subqueries using UNION/UNION ALL/INTERSECT/INTERSECT ALL, we can
576  * push quals into each component query, but the quals can only reference
577  * subquery columns that suffer no type coercions in the set operation.
578  * Otherwise there are possible semantic gotchas.  So, we check the
579  * component queries to see if any of them have different output types;
580  * differentTypes[k] is set true if column k has different type in any
581  * component.
582  */
583 static bool
584 subquery_is_pushdown_safe(Query *subquery, Query *topquery,
585                                                   bool *differentTypes)
586 {
587         SetOperationStmt *topop;
588
589         /* Check point 1 */
590         if (subquery->limitOffset != NULL || subquery->limitCount != NULL)
591                 return false;
592
593         /* Are we at top level, or looking at a setop component? */
594         if (subquery == topquery)
595         {
596                 /* Top level, so check any component queries */
597                 if (subquery->setOperations != NULL)
598                         if (!recurse_pushdown_safe(subquery->setOperations, topquery,
599                                                                            differentTypes))
600                                 return false;
601         }
602         else
603         {
604                 /* Setop component must not have more components (too weird) */
605                 if (subquery->setOperations != NULL)
606                         return false;
607                 /* Check whether setop component output types match top level */
608                 topop = (SetOperationStmt *) topquery->setOperations;
609                 Assert(topop && IsA(topop, SetOperationStmt));
610                 compare_tlist_datatypes(subquery->targetList,
611                                                                 topop->colTypes,
612                                                                 differentTypes);
613         }
614         return true;
615 }
616
617 /*
618  * Helper routine to recurse through setOperations tree
619  */
620 static bool
621 recurse_pushdown_safe(Node *setOp, Query *topquery,
622                                           bool *differentTypes)
623 {
624         if (IsA(setOp, RangeTblRef))
625         {
626                 RangeTblRef *rtr = (RangeTblRef *) setOp;
627                 RangeTblEntry *rte = rt_fetch(rtr->rtindex, topquery->rtable);
628                 Query      *subquery = rte->subquery;
629
630                 Assert(subquery != NULL);
631                 return subquery_is_pushdown_safe(subquery, topquery, differentTypes);
632         }
633         else if (IsA(setOp, SetOperationStmt))
634         {
635                 SetOperationStmt *op = (SetOperationStmt *) setOp;
636
637                 /* EXCEPT is no good */
638                 if (op->op == SETOP_EXCEPT)
639                         return false;
640                 /* Else recurse */
641                 if (!recurse_pushdown_safe(op->larg, topquery, differentTypes))
642                         return false;
643                 if (!recurse_pushdown_safe(op->rarg, topquery, differentTypes))
644                         return false;
645         }
646         else
647         {
648                 elog(ERROR, "unrecognized node type: %d",
649                          (int) nodeTag(setOp));
650         }
651         return true;
652 }
653
654 /*
655  * Compare tlist's datatypes against the list of set-operation result types.
656  * For any items that are different, mark the appropriate element of
657  * differentTypes[] to show that this column will have type conversions.
658  */
659 static void
660 compare_tlist_datatypes(List *tlist, List *colTypes,
661                                                 bool *differentTypes)
662 {
663         ListCell   *l;
664         ListCell   *colType = list_head(colTypes);
665
666         foreach(l, tlist)
667         {
668                 TargetEntry *tle = (TargetEntry *) lfirst(l);
669
670                 if (tle->resdom->resjunk)
671                         continue;                       /* ignore resjunk columns */
672                 if (colType == NULL)
673                         elog(ERROR, "wrong number of tlist entries");
674                 if (tle->resdom->restype != lfirst_oid(colType))
675                         differentTypes[tle->resdom->resno] = true;
676                 colType = lnext(colType);
677         }
678         if (colType != NULL)
679                 elog(ERROR, "wrong number of tlist entries");
680 }
681
682 /*
683  * qual_is_pushdown_safe - is a particular qual safe to push down?
684  *
685  * qual is a restriction clause applying to the given subquery (whose RTE
686  * has index rti in the parent query).
687  *
688  * Conditions checked here:
689  *
690  * 1. The qual must not contain any subselects (mainly because I'm not sure
691  * it will work correctly: sublinks will already have been transformed into
692  * subplans in the qual, but not in the subquery).
693  *
694  * 2. The qual must not refer to any subquery output columns that were
695  * found to have inconsistent types across a set operation tree by
696  * subquery_is_pushdown_safe().
697  *
698  * 3. If the subquery uses DISTINCT ON, we must not push down any quals that
699  * refer to non-DISTINCT output columns, because that could change the set
700  * of rows returned.  This condition is vacuous for DISTINCT, because then
701  * there are no non-DISTINCT output columns, but unfortunately it's fairly
702  * expensive to tell the difference between DISTINCT and DISTINCT ON in the
703  * parsetree representation.  It's cheaper to just make sure all the Vars
704  * in the qual refer to DISTINCT columns.
705  *
706  * 4. We must not push down any quals that refer to subselect outputs that
707  * return sets, else we'd introduce functions-returning-sets into the
708  * subquery's WHERE/HAVING quals.
709  */
710 static bool
711 qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
712                                           bool *differentTypes)
713 {
714         bool            safe = true;
715         List       *vars;
716         ListCell   *vl;
717         Bitmapset  *tested = NULL;
718
719         /* Refuse subselects (point 1) */
720         if (contain_subplans(qual))
721                 return false;
722
723         /*
724          * Examine all Vars used in clause; since it's a restriction clause,
725          * all such Vars must refer to subselect output columns.
726          */
727         vars = pull_var_clause(qual, false);
728         foreach(vl, vars)
729         {
730                 Var                *var = (Var *) lfirst(vl);
731                 TargetEntry *tle;
732
733                 Assert(var->varno == rti);
734
735                 /*
736                  * We use a bitmapset to avoid testing the same attno more than
737                  * once.  (NB: this only works because subquery outputs can't have
738                  * negative attnos.)
739                  */
740                 if (bms_is_member(var->varattno, tested))
741                         continue;
742                 tested = bms_add_member(tested, var->varattno);
743
744                 /* Check point 2 */
745                 if (differentTypes[var->varattno])
746                 {
747                         safe = false;
748                         break;
749                 }
750
751                 /* Must find the tlist element referenced by the Var */
752                 tle = get_tle_by_resno(subquery->targetList, var->varattno);
753                 Assert(tle != NULL);
754                 Assert(!tle->resdom->resjunk);
755
756                 /* If subquery uses DISTINCT or DISTINCT ON, check point 3 */
757                 if (subquery->distinctClause != NIL &&
758                         !targetIsInSortList(tle, subquery->distinctClause))
759                 {
760                         /* non-DISTINCT column, so fail */
761                         safe = false;
762                         break;
763                 }
764
765                 /* Refuse functions returning sets (point 4) */
766                 if (expression_returns_set((Node *) tle->expr))
767                 {
768                         safe = false;
769                         break;
770                 }
771         }
772
773         list_free(vars);
774         bms_free(tested);
775
776         return safe;
777 }
778
779 /*
780  * subquery_push_qual - push down a qual that we have determined is safe
781  */
782 static void
783 subquery_push_qual(Query *subquery, List *rtable, Index rti, Node *qual)
784 {
785         if (subquery->setOperations != NULL)
786         {
787                 /* Recurse to push it separately to each component query */
788                 recurse_push_qual(subquery->setOperations, subquery,
789                                                   rtable, rti, qual);
790         }
791         else
792         {
793                 /*
794                  * We need to replace Vars in the qual (which must refer to
795                  * outputs of the subquery) with copies of the subquery's
796                  * targetlist expressions.      Note that at this point, any uplevel
797                  * Vars in the qual should have been replaced with Params, so they
798                  * need no work.
799                  *
800                  * This step also ensures that when we are pushing into a setop tree,
801                  * each component query gets its own copy of the qual.
802                  */
803                 qual = ResolveNew(qual, rti, 0, rtable,
804                                                   subquery->targetList,
805                                                   CMD_SELECT, 0);
806                 subquery->havingQual = make_and_qual(subquery->havingQual,
807                                                                                          qual);
808
809                 /*
810                  * We need not change the subquery's hasAggs or hasSublinks flags,
811                  * since we can't be pushing down any aggregates that weren't
812                  * there before, and we don't push down subselects at all.
813                  */
814         }
815 }
816
817 /*
818  * Helper routine to recurse through setOperations tree
819  */
820 static void
821 recurse_push_qual(Node *setOp, Query *topquery,
822                                   List *rtable, Index rti, Node *qual)
823 {
824         if (IsA(setOp, RangeTblRef))
825         {
826                 RangeTblRef *rtr = (RangeTblRef *) setOp;
827                 RangeTblEntry *subrte = rt_fetch(rtr->rtindex, topquery->rtable);
828                 Query      *subquery = subrte->subquery;
829
830                 Assert(subquery != NULL);
831                 subquery_push_qual(subquery, rtable, rti, qual);
832         }
833         else if (IsA(setOp, SetOperationStmt))
834         {
835                 SetOperationStmt *op = (SetOperationStmt *) setOp;
836
837                 recurse_push_qual(op->larg, topquery, rtable, rti, qual);
838                 recurse_push_qual(op->rarg, topquery, rtable, rti, qual);
839         }
840         else
841         {
842                 elog(ERROR, "unrecognized node type: %d",
843                          (int) nodeTag(setOp));
844         }
845 }
846
847 /*****************************************************************************
848  *                      DEBUG SUPPORT
849  *****************************************************************************/
850
851 #ifdef OPTIMIZER_DEBUG
852
853 static void
854 print_relids(Relids relids)
855 {
856         Relids          tmprelids;
857         int                     x;
858         bool            first = true;
859
860         tmprelids = bms_copy(relids);
861         while ((x = bms_first_member(tmprelids)) >= 0)
862         {
863                 if (!first)
864                         printf(" ");
865                 printf("%d", x);
866                 first = false;
867         }
868         bms_free(tmprelids);
869 }
870
871 static void
872 print_restrictclauses(Query *root, List *clauses)
873 {
874         ListCell   *l;
875
876         foreach(l, clauses)
877         {
878                 RestrictInfo *c = lfirst(l);
879
880                 print_expr((Node *) c->clause, root->rtable);
881                 if (lnext(l))
882                         printf(", ");
883         }
884 }
885
886 static void
887 print_path(Query *root, Path *path, int indent)
888 {
889         const char *ptype;
890         bool            join = false;
891         Path       *subpath = NULL;
892         int                     i;
893
894         switch (nodeTag(path))
895         {
896                 case T_Path:
897                         ptype = "SeqScan";
898                         break;
899                 case T_IndexPath:
900                         ptype = "IdxScan";
901                         break;
902                 case T_TidPath:
903                         ptype = "TidScan";
904                         break;
905                 case T_AppendPath:
906                         ptype = "Append";
907                         break;
908                 case T_ResultPath:
909                         ptype = "Result";
910                         subpath = ((ResultPath *) path)->subpath;
911                         break;
912                 case T_MaterialPath:
913                         ptype = "Material";
914                         subpath = ((MaterialPath *) path)->subpath;
915                         break;
916                 case T_UniquePath:
917                         ptype = "Unique";
918                         subpath = ((UniquePath *) path)->subpath;
919                         break;
920                 case T_NestPath:
921                         ptype = "NestLoop";
922                         join = true;
923                         break;
924                 case T_MergePath:
925                         ptype = "MergeJoin";
926                         join = true;
927                         break;
928                 case T_HashPath:
929                         ptype = "HashJoin";
930                         join = true;
931                         break;
932                 default:
933                         ptype = "???Path";
934                         break;
935         }
936
937         for (i = 0; i < indent; i++)
938                 printf("\t");
939         printf("%s", ptype);
940
941         if (path->parent)
942         {
943                 printf("(");
944                 print_relids(path->parent->relids);
945                 printf(") rows=%.0f", path->parent->rows);
946         }
947         printf(" cost=%.2f..%.2f\n", path->startup_cost, path->total_cost);
948
949         if (path->pathkeys)
950         {
951                 for (i = 0; i < indent; i++)
952                         printf("\t");
953                 printf("  pathkeys: ");
954                 print_pathkeys(path->pathkeys, root->rtable);
955         }
956
957         if (join)
958         {
959                 JoinPath   *jp = (JoinPath *) path;
960
961                 for (i = 0; i < indent; i++)
962                         printf("\t");
963                 printf("  clauses: ");
964                 print_restrictclauses(root, jp->joinrestrictinfo);
965                 printf("\n");
966
967                 if (IsA(path, MergePath))
968                 {
969                         MergePath  *mp = (MergePath *) path;
970
971                         if (mp->outersortkeys || mp->innersortkeys)
972                         {
973                                 for (i = 0; i < indent; i++)
974                                         printf("\t");
975                                 printf("  sortouter=%d sortinner=%d\n",
976                                            ((mp->outersortkeys) ? 1 : 0),
977                                            ((mp->innersortkeys) ? 1 : 0));
978                         }
979                 }
980
981                 print_path(root, jp->outerjoinpath, indent + 1);
982                 print_path(root, jp->innerjoinpath, indent + 1);
983         }
984
985         if (subpath)
986                 print_path(root, subpath, indent + 1);
987 }
988
989 void
990 debug_print_rel(Query *root, RelOptInfo *rel)
991 {
992         ListCell   *l;
993
994         printf("RELOPTINFO (");
995         print_relids(rel->relids);
996         printf("): rows=%.0f width=%d\n", rel->rows, rel->width);
997
998         if (rel->baserestrictinfo)
999         {
1000                 printf("\tbaserestrictinfo: ");
1001                 print_restrictclauses(root, rel->baserestrictinfo);
1002                 printf("\n");
1003         }
1004
1005         foreach(l, rel->joininfo)
1006         {
1007                 JoinInfo   *j = (JoinInfo *) lfirst(l);
1008
1009                 printf("\tjoininfo (");
1010                 print_relids(j->unjoined_relids);
1011                 printf("): ");
1012                 print_restrictclauses(root, j->jinfo_restrictinfo);
1013                 printf("\n");
1014         }
1015
1016         printf("\tpath list:\n");
1017         foreach(l, rel->pathlist)
1018                 print_path(root, lfirst(l), 1);
1019         printf("\n\tcheapest startup path:\n");
1020         print_path(root, rel->cheapest_startup_path, 1);
1021         printf("\n\tcheapest total path:\n");
1022         print_path(root, rel->cheapest_total_path, 1);
1023         printf("\n");
1024         fflush(stdout);
1025 }
1026
1027 #endif   /* OPTIMIZER_DEBUG */