]> granicus.if.org Git - postgresql/blob - src/backend/optimizer/path/allpaths.c
TABLESAMPLE, SQL Standard and extensible
[postgresql] / src / backend / optimizer / path / allpaths.c
1 /*-------------------------------------------------------------------------
2  *
3  * allpaths.c
4  *        Routines to find possible search paths for processing a query
5  *
6  * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *        src/backend/optimizer/path/allpaths.c
12  *
13  *-------------------------------------------------------------------------
14  */
15
16 #include "postgres.h"
17
18 #include <math.h>
19
20 #include "access/sysattr.h"
21 #include "catalog/pg_class.h"
22 #include "catalog/pg_operator.h"
23 #include "foreign/fdwapi.h"
24 #include "nodes/makefuncs.h"
25 #include "nodes/nodeFuncs.h"
26 #ifdef OPTIMIZER_DEBUG
27 #include "nodes/print.h"
28 #endif
29 #include "optimizer/clauses.h"
30 #include "optimizer/cost.h"
31 #include "optimizer/geqo.h"
32 #include "optimizer/pathnode.h"
33 #include "optimizer/paths.h"
34 #include "optimizer/plancat.h"
35 #include "optimizer/planner.h"
36 #include "optimizer/prep.h"
37 #include "optimizer/restrictinfo.h"
38 #include "optimizer/var.h"
39 #include "parser/parse_clause.h"
40 #include "parser/parsetree.h"
41 #include "rewrite/rewriteManip.h"
42 #include "utils/lsyscache.h"
43
44
45 /* results of subquery_is_pushdown_safe */
46 typedef struct pushdown_safety_info
47 {
48         bool       *unsafeColumns;      /* which output columns are unsafe to use */
49         bool            unsafeVolatile; /* don't push down volatile quals */
50         bool            unsafeLeaky;    /* don't push down leaky quals */
51 } pushdown_safety_info;
52
53 /* These parameters are set by GUC */
54 bool            enable_geqo = false;    /* just in case GUC doesn't set it */
55 int                     geqo_threshold;
56
57 /* Hook for plugins to get control in set_rel_pathlist() */
58 set_rel_pathlist_hook_type set_rel_pathlist_hook = NULL;
59
60 /* Hook for plugins to replace standard_join_search() */
61 join_search_hook_type join_search_hook = NULL;
62
63
64 static void set_base_rel_sizes(PlannerInfo *root);
65 static void set_base_rel_pathlists(PlannerInfo *root);
66 static void set_rel_size(PlannerInfo *root, RelOptInfo *rel,
67                          Index rti, RangeTblEntry *rte);
68 static void set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
69                                  Index rti, RangeTblEntry *rte);
70 static void set_plain_rel_size(PlannerInfo *root, RelOptInfo *rel,
71                                    RangeTblEntry *rte);
72 static void set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
73                                            RangeTblEntry *rte);
74 static void set_tablesample_rel_size(PlannerInfo *root, RelOptInfo *rel,
75                                    RangeTblEntry *rte);
76 static void set_tablesample_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
77                                                                                  RangeTblEntry *rte);
78 static void set_foreign_size(PlannerInfo *root, RelOptInfo *rel,
79                                  RangeTblEntry *rte);
80 static void set_foreign_pathlist(PlannerInfo *root, RelOptInfo *rel,
81                                          RangeTblEntry *rte);
82 static void set_append_rel_size(PlannerInfo *root, RelOptInfo *rel,
83                                         Index rti, RangeTblEntry *rte);
84 static void set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
85                                                 Index rti, RangeTblEntry *rte);
86 static void generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel,
87                                                    List *live_childrels,
88                                                    List *all_child_pathkeys);
89 static Path *get_cheapest_parameterized_child_path(PlannerInfo *root,
90                                                                           RelOptInfo *rel,
91                                                                           Relids required_outer);
92 static List *accumulate_append_subpath(List *subpaths, Path *path);
93 static void set_dummy_rel_pathlist(RelOptInfo *rel);
94 static void set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
95                                           Index rti, RangeTblEntry *rte);
96 static void set_function_pathlist(PlannerInfo *root, RelOptInfo *rel,
97                                           RangeTblEntry *rte);
98 static void set_values_pathlist(PlannerInfo *root, RelOptInfo *rel,
99                                         RangeTblEntry *rte);
100 static void set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel,
101                                  RangeTblEntry *rte);
102 static void set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel,
103                                            RangeTblEntry *rte);
104 static RelOptInfo *make_rel_from_joinlist(PlannerInfo *root, List *joinlist);
105 static bool subquery_is_pushdown_safe(Query *subquery, Query *topquery,
106                                                   pushdown_safety_info *safetyInfo);
107 static bool recurse_pushdown_safe(Node *setOp, Query *topquery,
108                                           pushdown_safety_info *safetyInfo);
109 static void check_output_expressions(Query *subquery,
110                                                  pushdown_safety_info *safetyInfo);
111 static void compare_tlist_datatypes(List *tlist, List *colTypes,
112                                                 pushdown_safety_info *safetyInfo);
113 static bool targetIsInAllPartitionLists(TargetEntry *tle, Query *query);
114 static bool qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
115                                           pushdown_safety_info *safetyInfo);
116 static void subquery_push_qual(Query *subquery,
117                                    RangeTblEntry *rte, Index rti, Node *qual);
118 static void recurse_push_qual(Node *setOp, Query *topquery,
119                                   RangeTblEntry *rte, Index rti, Node *qual);
120 static void remove_unused_subquery_outputs(Query *subquery, RelOptInfo *rel);
121
122
123 /*
124  * make_one_rel
125  *        Finds all possible access paths for executing a query, returning a
126  *        single rel that represents the join of all base rels in the query.
127  */
128 RelOptInfo *
129 make_one_rel(PlannerInfo *root, List *joinlist)
130 {
131         RelOptInfo *rel;
132         Index           rti;
133
134         /*
135          * Construct the all_baserels Relids set.
136          */
137         root->all_baserels = NULL;
138         for (rti = 1; rti < root->simple_rel_array_size; rti++)
139         {
140                 RelOptInfo *brel = root->simple_rel_array[rti];
141
142                 /* there may be empty slots corresponding to non-baserel RTEs */
143                 if (brel == NULL)
144                         continue;
145
146                 Assert(brel->relid == rti);             /* sanity check on array */
147
148                 /* ignore RTEs that are "other rels" */
149                 if (brel->reloptkind != RELOPT_BASEREL)
150                         continue;
151
152                 root->all_baserels = bms_add_member(root->all_baserels, brel->relid);
153         }
154
155         /*
156          * Generate access paths for the base rels.
157          */
158         set_base_rel_sizes(root);
159         set_base_rel_pathlists(root);
160
161         /*
162          * Generate access paths for the entire join tree.
163          */
164         rel = make_rel_from_joinlist(root, joinlist);
165
166         /*
167          * The result should join all and only the query's base rels.
168          */
169         Assert(bms_equal(rel->relids, root->all_baserels));
170
171         return rel;
172 }
173
174 /*
175  * set_base_rel_sizes
176  *        Set the size estimates (rows and widths) for each base-relation entry.
177  *
178  * We do this in a separate pass over the base rels so that rowcount
179  * estimates are available for parameterized path generation.
180  */
181 static void
182 set_base_rel_sizes(PlannerInfo *root)
183 {
184         Index           rti;
185
186         for (rti = 1; rti < root->simple_rel_array_size; rti++)
187         {
188                 RelOptInfo *rel = root->simple_rel_array[rti];
189
190                 /* there may be empty slots corresponding to non-baserel RTEs */
191                 if (rel == NULL)
192                         continue;
193
194                 Assert(rel->relid == rti);              /* sanity check on array */
195
196                 /* ignore RTEs that are "other rels" */
197                 if (rel->reloptkind != RELOPT_BASEREL)
198                         continue;
199
200                 set_rel_size(root, rel, rti, root->simple_rte_array[rti]);
201         }
202 }
203
204 /*
205  * set_base_rel_pathlists
206  *        Finds all paths available for scanning each base-relation entry.
207  *        Sequential scan and any available indices are considered.
208  *        Each useful path is attached to its relation's 'pathlist' field.
209  */
210 static void
211 set_base_rel_pathlists(PlannerInfo *root)
212 {
213         Index           rti;
214
215         for (rti = 1; rti < root->simple_rel_array_size; rti++)
216         {
217                 RelOptInfo *rel = root->simple_rel_array[rti];
218
219                 /* there may be empty slots corresponding to non-baserel RTEs */
220                 if (rel == NULL)
221                         continue;
222
223                 Assert(rel->relid == rti);              /* sanity check on array */
224
225                 /* ignore RTEs that are "other rels" */
226                 if (rel->reloptkind != RELOPT_BASEREL)
227                         continue;
228
229                 set_rel_pathlist(root, rel, rti, root->simple_rte_array[rti]);
230         }
231 }
232
233 /*
234  * set_rel_size
235  *        Set size estimates for a base relation
236  */
237 static void
238 set_rel_size(PlannerInfo *root, RelOptInfo *rel,
239                          Index rti, RangeTblEntry *rte)
240 {
241         if (rel->reloptkind == RELOPT_BASEREL &&
242                 relation_excluded_by_constraints(root, rel, rte))
243         {
244                 /*
245                  * We proved we don't need to scan the rel via constraint exclusion,
246                  * so set up a single dummy path for it.  Here we only check this for
247                  * regular baserels; if it's an otherrel, CE was already checked in
248                  * set_append_rel_size().
249                  *
250                  * In this case, we go ahead and set up the relation's path right away
251                  * instead of leaving it for set_rel_pathlist to do.  This is because
252                  * we don't have a convention for marking a rel as dummy except by
253                  * assigning a dummy path to it.
254                  */
255                 set_dummy_rel_pathlist(rel);
256         }
257         else if (rte->inh)
258         {
259                 /* It's an "append relation", process accordingly */
260                 set_append_rel_size(root, rel, rti, rte);
261         }
262         else
263         {
264                 switch (rel->rtekind)
265                 {
266                         case RTE_RELATION:
267                                 if (rte->relkind == RELKIND_FOREIGN_TABLE)
268                                 {
269                                         /* Foreign table */
270                                         set_foreign_size(root, rel, rte);
271                                 }
272                                 else if (rte->tablesample != NULL)
273                                 {
274                                         /* Sampled relation */
275                                         set_tablesample_rel_size(root, rel, rte);
276                                 }
277                                 else
278                                 {
279                                         /* Plain relation */
280                                         set_plain_rel_size(root, rel, rte);
281                                 }
282                                 break;
283                         case RTE_SUBQUERY:
284
285                                 /*
286                                  * Subqueries don't support making a choice between
287                                  * parameterized and unparameterized paths, so just go ahead
288                                  * and build their paths immediately.
289                                  */
290                                 set_subquery_pathlist(root, rel, rti, rte);
291                                 break;
292                         case RTE_FUNCTION:
293                                 set_function_size_estimates(root, rel);
294                                 break;
295                         case RTE_VALUES:
296                                 set_values_size_estimates(root, rel);
297                                 break;
298                         case RTE_CTE:
299
300                                 /*
301                                  * CTEs don't support making a choice between parameterized
302                                  * and unparameterized paths, so just go ahead and build their
303                                  * paths immediately.
304                                  */
305                                 if (rte->self_reference)
306                                         set_worktable_pathlist(root, rel, rte);
307                                 else
308                                         set_cte_pathlist(root, rel, rte);
309                                 break;
310                         default:
311                                 elog(ERROR, "unexpected rtekind: %d", (int) rel->rtekind);
312                                 break;
313                 }
314         }
315 }
316
317 /*
318  * set_rel_pathlist
319  *        Build access paths for a base relation
320  */
321 static void
322 set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
323                                  Index rti, RangeTblEntry *rte)
324 {
325         if (IS_DUMMY_REL(rel))
326         {
327                 /* We already proved the relation empty, so nothing more to do */
328         }
329         else if (rte->inh)
330         {
331                 /* It's an "append relation", process accordingly */
332                 set_append_rel_pathlist(root, rel, rti, rte);
333         }
334         else
335         {
336                 switch (rel->rtekind)
337                 {
338                         case RTE_RELATION:
339                                 if (rte->relkind == RELKIND_FOREIGN_TABLE)
340                                 {
341                                         /* Foreign table */
342                                         set_foreign_pathlist(root, rel, rte);
343                                 }
344                                 else if (rte->tablesample != NULL)
345                                 {
346                                         /* Build sample scan on relation */
347                                         set_tablesample_rel_pathlist(root, rel, rte);
348                                 }
349                                 else
350                                 {
351                                         /* Plain relation */
352                                         set_plain_rel_pathlist(root, rel, rte);
353                                 }
354                                 break;
355                         case RTE_SUBQUERY:
356                                 /* Subquery --- fully handled during set_rel_size */
357                                 break;
358                         case RTE_FUNCTION:
359                                 /* RangeFunction */
360                                 set_function_pathlist(root, rel, rte);
361                                 break;
362                         case RTE_VALUES:
363                                 /* Values list */
364                                 set_values_pathlist(root, rel, rte);
365                                 break;
366                         case RTE_CTE:
367                                 /* CTE reference --- fully handled during set_rel_size */
368                                 break;
369                         default:
370                                 elog(ERROR, "unexpected rtekind: %d", (int) rel->rtekind);
371                                 break;
372                 }
373         }
374
375         /*
376          * Allow a plugin to editorialize on the set of Paths for this base
377          * relation.  It could add new paths (such as CustomPaths) by calling
378          * add_path(), or delete or modify paths added by the core code.
379          */
380         if (set_rel_pathlist_hook)
381                 (*set_rel_pathlist_hook) (root, rel, rti, rte);
382
383         /* Now find the cheapest of the paths for this rel */
384         set_cheapest(rel);
385
386 #ifdef OPTIMIZER_DEBUG
387         debug_print_rel(root, rel);
388 #endif
389 }
390
391 /*
392  * set_plain_rel_size
393  *        Set size estimates for a plain relation (no subquery, no inheritance)
394  */
395 static void
396 set_plain_rel_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
397 {
398         /*
399          * Test any partial indexes of rel for applicability.  We must do this
400          * first since partial unique indexes can affect size estimates.
401          */
402         check_partial_indexes(root, rel);
403
404         /* Mark rel with estimated output rows, width, etc */
405         set_baserel_size_estimates(root, rel);
406 }
407
408 /*
409  * set_plain_rel_pathlist
410  *        Build access paths for a plain relation (no subquery, no inheritance)
411  */
412 static void
413 set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
414 {
415         Relids          required_outer;
416
417         /*
418          * We don't support pushing join clauses into the quals of a seqscan, but
419          * it could still have required parameterization due to LATERAL refs in
420          * its tlist.
421          */
422         required_outer = rel->lateral_relids;
423
424         /* Consider sequential scan */
425         add_path(rel, create_seqscan_path(root, rel, required_outer));
426
427         /* Consider index scans */
428         create_index_paths(root, rel);
429
430         /* Consider TID scans */
431         create_tidscan_paths(root, rel);
432 }
433
434 /*
435  * set_tablesample_rel_size
436  *        Set size estimates for a sampled relation.
437  */
438 static void
439 set_tablesample_rel_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
440 {
441         /* Mark rel with estimated output rows, width, etc */
442         set_baserel_size_estimates(root, rel);
443 }
444
445 /*
446  * set_tablesample_rel_pathlist
447  *        Build access paths for a sampled relation
448  *
449  * There is only one possible path - sampling scan
450  */
451 static void
452 set_tablesample_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
453 {
454         Relids  required_outer;
455         Path   *path;
456
457         /*
458          * We don't support pushing join clauses into the quals of a seqscan, but
459          * it could still have required parameterization due to LATERAL refs in
460          * its tlist.
461          */
462         required_outer = rel->lateral_relids;
463
464         /* We only do sample scan if it was requested */
465         path = create_samplescan_path(root, rel, required_outer);
466         rel->pathlist = list_make1(path);
467 }
468
469 /*
470  * set_foreign_size
471  *              Set size estimates for a foreign table RTE
472  */
473 static void
474 set_foreign_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
475 {
476         /* Mark rel with estimated output rows, width, etc */
477         set_foreign_size_estimates(root, rel);
478
479         /* Let FDW adjust the size estimates, if it can */
480         rel->fdwroutine->GetForeignRelSize(root, rel, rte->relid);
481 }
482
483 /*
484  * set_foreign_pathlist
485  *              Build access paths for a foreign table RTE
486  */
487 static void
488 set_foreign_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
489 {
490         /* Call the FDW's GetForeignPaths function to generate path(s) */
491         rel->fdwroutine->GetForeignPaths(root, rel, rte->relid);
492 }
493
494 /*
495  * set_append_rel_size
496  *        Set size estimates for an "append relation"
497  *
498  * The passed-in rel and RTE represent the entire append relation.  The
499  * relation's contents are computed by appending together the output of
500  * the individual member relations.  Note that in the inheritance case,
501  * the first member relation is actually the same table as is mentioned in
502  * the parent RTE ... but it has a different RTE and RelOptInfo.  This is
503  * a good thing because their outputs are not the same size.
504  */
505 static void
506 set_append_rel_size(PlannerInfo *root, RelOptInfo *rel,
507                                         Index rti, RangeTblEntry *rte)
508 {
509         int                     parentRTindex = rti;
510         double          parent_rows;
511         double          parent_size;
512         double     *parent_attrsizes;
513         int                     nattrs;
514         ListCell   *l;
515
516         /*
517          * Initialize to compute size estimates for whole append relation.
518          *
519          * We handle width estimates by weighting the widths of different child
520          * rels proportionally to their number of rows.  This is sensible because
521          * the use of width estimates is mainly to compute the total relation
522          * "footprint" if we have to sort or hash it.  To do this, we sum the
523          * total equivalent size (in "double" arithmetic) and then divide by the
524          * total rowcount estimate.  This is done separately for the total rel
525          * width and each attribute.
526          *
527          * Note: if you consider changing this logic, beware that child rels could
528          * have zero rows and/or width, if they were excluded by constraints.
529          */
530         parent_rows = 0;
531         parent_size = 0;
532         nattrs = rel->max_attr - rel->min_attr + 1;
533         parent_attrsizes = (double *) palloc0(nattrs * sizeof(double));
534
535         foreach(l, root->append_rel_list)
536         {
537                 AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l);
538                 int                     childRTindex;
539                 RangeTblEntry *childRTE;
540                 RelOptInfo *childrel;
541                 List       *childquals;
542                 Node       *childqual;
543                 ListCell   *parentvars;
544                 ListCell   *childvars;
545
546                 /* append_rel_list contains all append rels; ignore others */
547                 if (appinfo->parent_relid != parentRTindex)
548                         continue;
549
550                 childRTindex = appinfo->child_relid;
551                 childRTE = root->simple_rte_array[childRTindex];
552
553                 /*
554                  * The child rel's RelOptInfo was already created during
555                  * add_base_rels_to_query.
556                  */
557                 childrel = find_base_rel(root, childRTindex);
558                 Assert(childrel->reloptkind == RELOPT_OTHER_MEMBER_REL);
559
560                 /*
561                  * We have to copy the parent's targetlist and quals to the child,
562                  * with appropriate substitution of variables.  However, only the
563                  * baserestrictinfo quals are needed before we can check for
564                  * constraint exclusion; so do that first and then check to see if we
565                  * can disregard this child.
566                  *
567                  * As of 8.4, the child rel's targetlist might contain non-Var
568                  * expressions, which means that substitution into the quals could
569                  * produce opportunities for const-simplification, and perhaps even
570                  * pseudoconstant quals.  To deal with this, we strip the RestrictInfo
571                  * nodes, do the substitution, do const-simplification, and then
572                  * reconstitute the RestrictInfo layer.
573                  */
574                 childquals = get_all_actual_clauses(rel->baserestrictinfo);
575                 childquals = (List *) adjust_appendrel_attrs(root,
576                                                                                                          (Node *) childquals,
577                                                                                                          appinfo);
578                 childqual = eval_const_expressions(root, (Node *)
579                                                                                    make_ands_explicit(childquals));
580                 if (childqual && IsA(childqual, Const) &&
581                         (((Const *) childqual)->constisnull ||
582                          !DatumGetBool(((Const *) childqual)->constvalue)))
583                 {
584                         /*
585                          * Restriction reduces to constant FALSE or constant NULL after
586                          * substitution, so this child need not be scanned.
587                          */
588                         set_dummy_rel_pathlist(childrel);
589                         continue;
590                 }
591                 childquals = make_ands_implicit((Expr *) childqual);
592                 childquals = make_restrictinfos_from_actual_clauses(root,
593                                                                                                                         childquals);
594                 childrel->baserestrictinfo = childquals;
595
596                 if (relation_excluded_by_constraints(root, childrel, childRTE))
597                 {
598                         /*
599                          * This child need not be scanned, so we can omit it from the
600                          * appendrel.
601                          */
602                         set_dummy_rel_pathlist(childrel);
603                         continue;
604                 }
605
606                 /*
607                  * CE failed, so finish copying/modifying targetlist and join quals.
608                  *
609                  * Note: the resulting childrel->reltargetlist may contain arbitrary
610                  * expressions, which otherwise would not occur in a reltargetlist.
611                  * Code that might be looking at an appendrel child must cope with
612                  * such.  (Normally, a reltargetlist would only include Vars and
613                  * PlaceHolderVars.)
614                  */
615                 childrel->joininfo = (List *)
616                         adjust_appendrel_attrs(root,
617                                                                    (Node *) rel->joininfo,
618                                                                    appinfo);
619                 childrel->reltargetlist = (List *)
620                         adjust_appendrel_attrs(root,
621                                                                    (Node *) rel->reltargetlist,
622                                                                    appinfo);
623
624                 /*
625                  * We have to make child entries in the EquivalenceClass data
626                  * structures as well.  This is needed either if the parent
627                  * participates in some eclass joins (because we will want to consider
628                  * inner-indexscan joins on the individual children) or if the parent
629                  * has useful pathkeys (because we should try to build MergeAppend
630                  * paths that produce those sort orderings).
631                  */
632                 if (rel->has_eclass_joins || has_useful_pathkeys(root, rel))
633                         add_child_rel_equivalences(root, appinfo, rel, childrel);
634                 childrel->has_eclass_joins = rel->has_eclass_joins;
635
636                 /*
637                  * Note: we could compute appropriate attr_needed data for the child's
638                  * variables, by transforming the parent's attr_needed through the
639                  * translated_vars mapping.  However, currently there's no need
640                  * because attr_needed is only examined for base relations not
641                  * otherrels.  So we just leave the child's attr_needed empty.
642                  */
643
644                 /*
645                  * Compute the child's size.
646                  */
647                 set_rel_size(root, childrel, childRTindex, childRTE);
648
649                 /*
650                  * It is possible that constraint exclusion detected a contradiction
651                  * within a child subquery, even though we didn't prove one above. If
652                  * so, we can skip this child.
653                  */
654                 if (IS_DUMMY_REL(childrel))
655                         continue;
656
657                 /*
658                  * Accumulate size information from each live child.
659                  */
660                 if (childrel->rows > 0)
661                 {
662                         parent_rows += childrel->rows;
663                         parent_size += childrel->width * childrel->rows;
664
665                         /*
666                          * Accumulate per-column estimates too.  We need not do anything
667                          * for PlaceHolderVars in the parent list.  If child expression
668                          * isn't a Var, or we didn't record a width estimate for it, we
669                          * have to fall back on a datatype-based estimate.
670                          *
671                          * By construction, child's reltargetlist is 1-to-1 with parent's.
672                          */
673                         forboth(parentvars, rel->reltargetlist,
674                                         childvars, childrel->reltargetlist)
675                         {
676                                 Var                *parentvar = (Var *) lfirst(parentvars);
677                                 Node       *childvar = (Node *) lfirst(childvars);
678
679                                 if (IsA(parentvar, Var))
680                                 {
681                                         int                     pndx = parentvar->varattno - rel->min_attr;
682                                         int32           child_width = 0;
683
684                                         if (IsA(childvar, Var) &&
685                                                 ((Var *) childvar)->varno == childrel->relid)
686                                         {
687                                                 int                     cndx = ((Var *) childvar)->varattno - childrel->min_attr;
688
689                                                 child_width = childrel->attr_widths[cndx];
690                                         }
691                                         if (child_width <= 0)
692                                                 child_width = get_typavgwidth(exprType(childvar),
693                                                                                                           exprTypmod(childvar));
694                                         Assert(child_width > 0);
695                                         parent_attrsizes[pndx] += child_width * childrel->rows;
696                                 }
697                         }
698                 }
699         }
700
701         /*
702          * Save the finished size estimates.
703          */
704         rel->rows = parent_rows;
705         if (parent_rows > 0)
706         {
707                 int                     i;
708
709                 rel->width = rint(parent_size / parent_rows);
710                 for (i = 0; i < nattrs; i++)
711                         rel->attr_widths[i] = rint(parent_attrsizes[i] / parent_rows);
712         }
713         else
714                 rel->width = 0;                 /* attr_widths should be zero already */
715
716         /*
717          * Set "raw tuples" count equal to "rows" for the appendrel; needed
718          * because some places assume rel->tuples is valid for any baserel.
719          */
720         rel->tuples = parent_rows;
721
722         pfree(parent_attrsizes);
723 }
724
725 /*
726  * set_append_rel_pathlist
727  *        Build access paths for an "append relation"
728  */
729 static void
730 set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
731                                                 Index rti, RangeTblEntry *rte)
732 {
733         int                     parentRTindex = rti;
734         List       *live_childrels = NIL;
735         List       *subpaths = NIL;
736         bool            subpaths_valid = true;
737         List       *all_child_pathkeys = NIL;
738         List       *all_child_outers = NIL;
739         ListCell   *l;
740
741         /*
742          * Generate access paths for each member relation, and remember the
743          * cheapest path for each one.  Also, identify all pathkeys (orderings)
744          * and parameterizations (required_outer sets) available for the member
745          * relations.
746          */
747         foreach(l, root->append_rel_list)
748         {
749                 AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l);
750                 int                     childRTindex;
751                 RangeTblEntry *childRTE;
752                 RelOptInfo *childrel;
753                 ListCell   *lcp;
754
755                 /* append_rel_list contains all append rels; ignore others */
756                 if (appinfo->parent_relid != parentRTindex)
757                         continue;
758
759                 /* Re-locate the child RTE and RelOptInfo */
760                 childRTindex = appinfo->child_relid;
761                 childRTE = root->simple_rte_array[childRTindex];
762                 childrel = root->simple_rel_array[childRTindex];
763
764                 /*
765                  * Compute the child's access paths.
766                  */
767                 set_rel_pathlist(root, childrel, childRTindex, childRTE);
768
769                 /*
770                  * If child is dummy, ignore it.
771                  */
772                 if (IS_DUMMY_REL(childrel))
773                         continue;
774
775                 /*
776                  * Child is live, so add it to the live_childrels list for use below.
777                  */
778                 live_childrels = lappend(live_childrels, childrel);
779
780                 /*
781                  * If child has an unparameterized cheapest-total path, add that to
782                  * the unparameterized Append path we are constructing for the parent.
783                  * If not, there's no workable unparameterized path.
784                  */
785                 if (childrel->cheapest_total_path->param_info == NULL)
786                         subpaths = accumulate_append_subpath(subpaths,
787                                                                                           childrel->cheapest_total_path);
788                 else
789                         subpaths_valid = false;
790
791                 /*
792                  * Collect lists of all the available path orderings and
793                  * parameterizations for all the children.  We use these as a
794                  * heuristic to indicate which sort orderings and parameterizations we
795                  * should build Append and MergeAppend paths for.
796                  */
797                 foreach(lcp, childrel->pathlist)
798                 {
799                         Path       *childpath = (Path *) lfirst(lcp);
800                         List       *childkeys = childpath->pathkeys;
801                         Relids          childouter = PATH_REQ_OUTER(childpath);
802
803                         /* Unsorted paths don't contribute to pathkey list */
804                         if (childkeys != NIL)
805                         {
806                                 ListCell   *lpk;
807                                 bool            found = false;
808
809                                 /* Have we already seen this ordering? */
810                                 foreach(lpk, all_child_pathkeys)
811                                 {
812                                         List       *existing_pathkeys = (List *) lfirst(lpk);
813
814                                         if (compare_pathkeys(existing_pathkeys,
815                                                                                  childkeys) == PATHKEYS_EQUAL)
816                                         {
817                                                 found = true;
818                                                 break;
819                                         }
820                                 }
821                                 if (!found)
822                                 {
823                                         /* No, so add it to all_child_pathkeys */
824                                         all_child_pathkeys = lappend(all_child_pathkeys,
825                                                                                                  childkeys);
826                                 }
827                         }
828
829                         /* Unparameterized paths don't contribute to param-set list */
830                         if (childouter)
831                         {
832                                 ListCell   *lco;
833                                 bool            found = false;
834
835                                 /* Have we already seen this param set? */
836                                 foreach(lco, all_child_outers)
837                                 {
838                                         Relids          existing_outers = (Relids) lfirst(lco);
839
840                                         if (bms_equal(existing_outers, childouter))
841                                         {
842                                                 found = true;
843                                                 break;
844                                         }
845                                 }
846                                 if (!found)
847                                 {
848                                         /* No, so add it to all_child_outers */
849                                         all_child_outers = lappend(all_child_outers,
850                                                                                            childouter);
851                                 }
852                         }
853                 }
854         }
855
856         /*
857          * If we found unparameterized paths for all children, build an unordered,
858          * unparameterized Append path for the rel.  (Note: this is correct even
859          * if we have zero or one live subpath due to constraint exclusion.)
860          */
861         if (subpaths_valid)
862                 add_path(rel, (Path *) create_append_path(rel, subpaths, NULL));
863
864         /*
865          * Also build unparameterized MergeAppend paths based on the collected
866          * list of child pathkeys.
867          */
868         if (subpaths_valid)
869                 generate_mergeappend_paths(root, rel, live_childrels,
870                                                                    all_child_pathkeys);
871
872         /*
873          * Build Append paths for each parameterization seen among the child rels.
874          * (This may look pretty expensive, but in most cases of practical
875          * interest, the child rels will expose mostly the same parameterizations,
876          * so that not that many cases actually get considered here.)
877          *
878          * The Append node itself cannot enforce quals, so all qual checking must
879          * be done in the child paths.  This means that to have a parameterized
880          * Append path, we must have the exact same parameterization for each
881          * child path; otherwise some children might be failing to check the
882          * moved-down quals.  To make them match up, we can try to increase the
883          * parameterization of lesser-parameterized paths.
884          */
885         foreach(l, all_child_outers)
886         {
887                 Relids          required_outer = (Relids) lfirst(l);
888                 ListCell   *lcr;
889
890                 /* Select the child paths for an Append with this parameterization */
891                 subpaths = NIL;
892                 subpaths_valid = true;
893                 foreach(lcr, live_childrels)
894                 {
895                         RelOptInfo *childrel = (RelOptInfo *) lfirst(lcr);
896                         Path       *subpath;
897
898                         subpath = get_cheapest_parameterized_child_path(root,
899                                                                                                                         childrel,
900                                                                                                                         required_outer);
901                         if (subpath == NULL)
902                         {
903                                 /* failed to make a suitable path for this child */
904                                 subpaths_valid = false;
905                                 break;
906                         }
907                         subpaths = accumulate_append_subpath(subpaths, subpath);
908                 }
909
910                 if (subpaths_valid)
911                         add_path(rel, (Path *)
912                                          create_append_path(rel, subpaths, required_outer));
913         }
914 }
915
916 /*
917  * generate_mergeappend_paths
918  *              Generate MergeAppend paths for an append relation
919  *
920  * Generate a path for each ordering (pathkey list) appearing in
921  * all_child_pathkeys.
922  *
923  * We consider both cheapest-startup and cheapest-total cases, ie, for each
924  * interesting ordering, collect all the cheapest startup subpaths and all the
925  * cheapest total paths, and build a MergeAppend path for each case.
926  *
927  * We don't currently generate any parameterized MergeAppend paths.  While
928  * it would not take much more code here to do so, it's very unclear that it
929  * is worth the planning cycles to investigate such paths: there's little
930  * use for an ordered path on the inside of a nestloop.  In fact, it's likely
931  * that the current coding of add_path would reject such paths out of hand,
932  * because add_path gives no credit for sort ordering of parameterized paths,
933  * and a parameterized MergeAppend is going to be more expensive than the
934  * corresponding parameterized Append path.  If we ever try harder to support
935  * parameterized mergejoin plans, it might be worth adding support for
936  * parameterized MergeAppends to feed such joins.  (See notes in
937  * optimizer/README for why that might not ever happen, though.)
938  */
939 static void
940 generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel,
941                                                    List *live_childrels,
942                                                    List *all_child_pathkeys)
943 {
944         ListCell   *lcp;
945
946         foreach(lcp, all_child_pathkeys)
947         {
948                 List       *pathkeys = (List *) lfirst(lcp);
949                 List       *startup_subpaths = NIL;
950                 List       *total_subpaths = NIL;
951                 bool            startup_neq_total = false;
952                 ListCell   *lcr;
953
954                 /* Select the child paths for this ordering... */
955                 foreach(lcr, live_childrels)
956                 {
957                         RelOptInfo *childrel = (RelOptInfo *) lfirst(lcr);
958                         Path       *cheapest_startup,
959                                            *cheapest_total;
960
961                         /* Locate the right paths, if they are available. */
962                         cheapest_startup =
963                                 get_cheapest_path_for_pathkeys(childrel->pathlist,
964                                                                                            pathkeys,
965                                                                                            NULL,
966                                                                                            STARTUP_COST);
967                         cheapest_total =
968                                 get_cheapest_path_for_pathkeys(childrel->pathlist,
969                                                                                            pathkeys,
970                                                                                            NULL,
971                                                                                            TOTAL_COST);
972
973                         /*
974                          * If we can't find any paths with the right order just use the
975                          * cheapest-total path; we'll have to sort it later.
976                          */
977                         if (cheapest_startup == NULL || cheapest_total == NULL)
978                         {
979                                 cheapest_startup = cheapest_total =
980                                         childrel->cheapest_total_path;
981                                 /* Assert we do have an unparameterized path for this child */
982                                 Assert(cheapest_total->param_info == NULL);
983                         }
984
985                         /*
986                          * Notice whether we actually have different paths for the
987                          * "cheapest" and "total" cases; frequently there will be no point
988                          * in two create_merge_append_path() calls.
989                          */
990                         if (cheapest_startup != cheapest_total)
991                                 startup_neq_total = true;
992
993                         startup_subpaths =
994                                 accumulate_append_subpath(startup_subpaths, cheapest_startup);
995                         total_subpaths =
996                                 accumulate_append_subpath(total_subpaths, cheapest_total);
997                 }
998
999                 /* ... and build the MergeAppend paths */
1000                 add_path(rel, (Path *) create_merge_append_path(root,
1001                                                                                                                 rel,
1002                                                                                                                 startup_subpaths,
1003                                                                                                                 pathkeys,
1004                                                                                                                 NULL));
1005                 if (startup_neq_total)
1006                         add_path(rel, (Path *) create_merge_append_path(root,
1007                                                                                                                         rel,
1008                                                                                                                         total_subpaths,
1009                                                                                                                         pathkeys,
1010                                                                                                                         NULL));
1011         }
1012 }
1013
1014 /*
1015  * get_cheapest_parameterized_child_path
1016  *              Get cheapest path for this relation that has exactly the requested
1017  *              parameterization.
1018  *
1019  * Returns NULL if unable to create such a path.
1020  */
1021 static Path *
1022 get_cheapest_parameterized_child_path(PlannerInfo *root, RelOptInfo *rel,
1023                                                                           Relids required_outer)
1024 {
1025         Path       *cheapest;
1026         ListCell   *lc;
1027
1028         /*
1029          * Look up the cheapest existing path with no more than the needed
1030          * parameterization.  If it has exactly the needed parameterization, we're
1031          * done.
1032          */
1033         cheapest = get_cheapest_path_for_pathkeys(rel->pathlist,
1034                                                                                           NIL,
1035                                                                                           required_outer,
1036                                                                                           TOTAL_COST);
1037         Assert(cheapest != NULL);
1038         if (bms_equal(PATH_REQ_OUTER(cheapest), required_outer))
1039                 return cheapest;
1040
1041         /*
1042          * Otherwise, we can "reparameterize" an existing path to match the given
1043          * parameterization, which effectively means pushing down additional
1044          * joinquals to be checked within the path's scan.  However, some existing
1045          * paths might check the available joinquals already while others don't;
1046          * therefore, it's not clear which existing path will be cheapest after
1047          * reparameterization.  We have to go through them all and find out.
1048          */
1049         cheapest = NULL;
1050         foreach(lc, rel->pathlist)
1051         {
1052                 Path       *path = (Path *) lfirst(lc);
1053
1054                 /* Can't use it if it needs more than requested parameterization */
1055                 if (!bms_is_subset(PATH_REQ_OUTER(path), required_outer))
1056                         continue;
1057
1058                 /*
1059                  * Reparameterization can only increase the path's cost, so if it's
1060                  * already more expensive than the current cheapest, forget it.
1061                  */
1062                 if (cheapest != NULL &&
1063                         compare_path_costs(cheapest, path, TOTAL_COST) <= 0)
1064                         continue;
1065
1066                 /* Reparameterize if needed, then recheck cost */
1067                 if (!bms_equal(PATH_REQ_OUTER(path), required_outer))
1068                 {
1069                         path = reparameterize_path(root, path, required_outer, 1.0);
1070                         if (path == NULL)
1071                                 continue;               /* failed to reparameterize this one */
1072                         Assert(bms_equal(PATH_REQ_OUTER(path), required_outer));
1073
1074                         if (cheapest != NULL &&
1075                                 compare_path_costs(cheapest, path, TOTAL_COST) <= 0)
1076                                 continue;
1077                 }
1078
1079                 /* We have a new best path */
1080                 cheapest = path;
1081         }
1082
1083         /* Return the best path, or NULL if we found no suitable candidate */
1084         return cheapest;
1085 }
1086
1087 /*
1088  * accumulate_append_subpath
1089  *              Add a subpath to the list being built for an Append or MergeAppend
1090  *
1091  * It's possible that the child is itself an Append or MergeAppend path, in
1092  * which case we can "cut out the middleman" and just add its child paths to
1093  * our own list.  (We don't try to do this earlier because we need to apply
1094  * both levels of transformation to the quals.)
1095  *
1096  * Note that if we omit a child MergeAppend in this way, we are effectively
1097  * omitting a sort step, which seems fine: if the parent is to be an Append,
1098  * its result would be unsorted anyway, while if the parent is to be a
1099  * MergeAppend, there's no point in a separate sort on a child.
1100  */
1101 static List *
1102 accumulate_append_subpath(List *subpaths, Path *path)
1103 {
1104         if (IsA(path, AppendPath))
1105         {
1106                 AppendPath *apath = (AppendPath *) path;
1107
1108                 /* list_copy is important here to avoid sharing list substructure */
1109                 return list_concat(subpaths, list_copy(apath->subpaths));
1110         }
1111         else if (IsA(path, MergeAppendPath))
1112         {
1113                 MergeAppendPath *mpath = (MergeAppendPath *) path;
1114
1115                 /* list_copy is important here to avoid sharing list substructure */
1116                 return list_concat(subpaths, list_copy(mpath->subpaths));
1117         }
1118         else
1119                 return lappend(subpaths, path);
1120 }
1121
1122 /*
1123  * set_dummy_rel_pathlist
1124  *        Build a dummy path for a relation that's been excluded by constraints
1125  *
1126  * Rather than inventing a special "dummy" path type, we represent this as an
1127  * AppendPath with no members (see also IS_DUMMY_PATH/IS_DUMMY_REL macros).
1128  */
1129 static void
1130 set_dummy_rel_pathlist(RelOptInfo *rel)
1131 {
1132         /* Set dummy size estimates --- we leave attr_widths[] as zeroes */
1133         rel->rows = 0;
1134         rel->width = 0;
1135
1136         /* Discard any pre-existing paths; no further need for them */
1137         rel->pathlist = NIL;
1138
1139         add_path(rel, (Path *) create_append_path(rel, NIL, NULL));
1140
1141         /*
1142          * We set the cheapest path immediately, to ensure that IS_DUMMY_REL()
1143          * will recognize the relation as dummy if anyone asks.  This is redundant
1144          * when we're called from set_rel_size(), but not when called from
1145          * elsewhere, and doing it twice is harmless anyway.
1146          */
1147         set_cheapest(rel);
1148 }
1149
1150 /* quick-and-dirty test to see if any joining is needed */
1151 static bool
1152 has_multiple_baserels(PlannerInfo *root)
1153 {
1154         int                     num_base_rels = 0;
1155         Index           rti;
1156
1157         for (rti = 1; rti < root->simple_rel_array_size; rti++)
1158         {
1159                 RelOptInfo *brel = root->simple_rel_array[rti];
1160
1161                 if (brel == NULL)
1162                         continue;
1163
1164                 /* ignore RTEs that are "other rels" */
1165                 if (brel->reloptkind == RELOPT_BASEREL)
1166                         if (++num_base_rels > 1)
1167                                 return true;
1168         }
1169         return false;
1170 }
1171
1172 /*
1173  * set_subquery_pathlist
1174  *              Build the (single) access path for a subquery RTE
1175  *
1176  * We don't currently support generating parameterized paths for subqueries
1177  * by pushing join clauses down into them; it seems too expensive to re-plan
1178  * the subquery multiple times to consider different alternatives.  So the
1179  * subquery will have exactly one path.  (The path will be parameterized
1180  * if the subquery contains LATERAL references, otherwise not.)  Since there's
1181  * no freedom of action here, there's no need for a separate set_subquery_size
1182  * phase: we just make the path right away.
1183  */
1184 static void
1185 set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
1186                                           Index rti, RangeTblEntry *rte)
1187 {
1188         Query      *parse = root->parse;
1189         Query      *subquery = rte->subquery;
1190         Relids          required_outer;
1191         pushdown_safety_info safetyInfo;
1192         double          tuple_fraction;
1193         PlannerInfo *subroot;
1194         List       *pathkeys;
1195
1196         /*
1197          * Must copy the Query so that planning doesn't mess up the RTE contents
1198          * (really really need to fix the planner to not scribble on its input,
1199          * someday ... but see remove_unused_subquery_outputs to start with).
1200          */
1201         subquery = copyObject(subquery);
1202
1203         /*
1204          * If it's a LATERAL subquery, it might contain some Vars of the current
1205          * query level, requiring it to be treated as parameterized, even though
1206          * we don't support pushing down join quals into subqueries.
1207          */
1208         required_outer = rel->lateral_relids;
1209
1210         /*
1211          * Zero out result area for subquery_is_pushdown_safe, so that it can set
1212          * flags as needed while recursing.  In particular, we need a workspace
1213          * for keeping track of unsafe-to-reference columns.  unsafeColumns[i]
1214          * will be set TRUE if we find that output column i of the subquery is
1215          * unsafe to use in a pushed-down qual.
1216          */
1217         memset(&safetyInfo, 0, sizeof(safetyInfo));
1218         safetyInfo.unsafeColumns = (bool *)
1219                 palloc0((list_length(subquery->targetList) + 1) * sizeof(bool));
1220
1221         /*
1222          * If the subquery has the "security_barrier" flag, it means the subquery
1223          * originated from a view that must enforce row level security.  Then we
1224          * must not push down quals that contain leaky functions.  (Ideally this
1225          * would be checked inside subquery_is_pushdown_safe, but since we don't
1226          * currently pass the RTE to that function, we must do it here.)
1227          */
1228         safetyInfo.unsafeLeaky = rte->security_barrier;
1229
1230         /*
1231          * If there are any restriction clauses that have been attached to the
1232          * subquery relation, consider pushing them down to become WHERE or HAVING
1233          * quals of the subquery itself.  This transformation is useful because it
1234          * may allow us to generate a better plan for the subquery than evaluating
1235          * all the subquery output rows and then filtering them.
1236          *
1237          * There are several cases where we cannot push down clauses. Restrictions
1238          * involving the subquery are checked by subquery_is_pushdown_safe().
1239          * Restrictions on individual clauses are checked by
1240          * qual_is_pushdown_safe().  Also, we don't want to push down
1241          * pseudoconstant clauses; better to have the gating node above the
1242          * subquery.
1243          *
1244          * Non-pushed-down clauses will get evaluated as qpquals of the
1245          * SubqueryScan node.
1246          *
1247          * XXX Are there any cases where we want to make a policy decision not to
1248          * push down a pushable qual, because it'd result in a worse plan?
1249          */
1250         if (rel->baserestrictinfo != NIL &&
1251                 subquery_is_pushdown_safe(subquery, subquery, &safetyInfo))
1252         {
1253                 /* OK to consider pushing down individual quals */
1254                 List       *upperrestrictlist = NIL;
1255                 ListCell   *l;
1256
1257                 foreach(l, rel->baserestrictinfo)
1258                 {
1259                         RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
1260                         Node       *clause = (Node *) rinfo->clause;
1261
1262                         if (!rinfo->pseudoconstant &&
1263                                 qual_is_pushdown_safe(subquery, rti, clause, &safetyInfo))
1264                         {
1265                                 /* Push it down */
1266                                 subquery_push_qual(subquery, rte, rti, clause);
1267                         }
1268                         else
1269                         {
1270                                 /* Keep it in the upper query */
1271                                 upperrestrictlist = lappend(upperrestrictlist, rinfo);
1272                         }
1273                 }
1274                 rel->baserestrictinfo = upperrestrictlist;
1275         }
1276
1277         pfree(safetyInfo.unsafeColumns);
1278
1279         /*
1280          * The upper query might not use all the subquery's output columns; if
1281          * not, we can simplify.
1282          */
1283         remove_unused_subquery_outputs(subquery, rel);
1284
1285         /*
1286          * We can safely pass the outer tuple_fraction down to the subquery if the
1287          * outer level has no joining, aggregation, or sorting to do. Otherwise
1288          * we'd better tell the subquery to plan for full retrieval. (XXX This
1289          * could probably be made more intelligent ...)
1290          */
1291         if (parse->hasAggs ||
1292                 parse->groupClause ||
1293                 parse->havingQual ||
1294                 parse->distinctClause ||
1295                 parse->sortClause ||
1296                 has_multiple_baserels(root))
1297                 tuple_fraction = 0.0;   /* default case */
1298         else
1299                 tuple_fraction = root->tuple_fraction;
1300
1301         /* plan_params should not be in use in current query level */
1302         Assert(root->plan_params == NIL);
1303
1304         /* Generate the plan for the subquery */
1305         rel->subplan = subquery_planner(root->glob, subquery,
1306                                                                         root,
1307                                                                         false, tuple_fraction,
1308                                                                         &subroot);
1309         rel->subroot = subroot;
1310
1311         /* Isolate the params needed by this specific subplan */
1312         rel->subplan_params = root->plan_params;
1313         root->plan_params = NIL;
1314
1315         /*
1316          * It's possible that constraint exclusion proved the subquery empty. If
1317          * so, it's convenient to turn it back into a dummy path so that we will
1318          * recognize appropriate optimizations at this query level.
1319          */
1320         if (is_dummy_plan(rel->subplan))
1321         {
1322                 set_dummy_rel_pathlist(rel);
1323                 return;
1324         }
1325
1326         /* Mark rel with estimated output rows, width, etc */
1327         set_subquery_size_estimates(root, rel);
1328
1329         /* Convert subquery pathkeys to outer representation */
1330         pathkeys = convert_subquery_pathkeys(root, rel, subroot->query_pathkeys);
1331
1332         /* Generate appropriate path */
1333         add_path(rel, create_subqueryscan_path(root, rel, pathkeys, required_outer));
1334 }
1335
1336 /*
1337  * set_function_pathlist
1338  *              Build the (single) access path for a function RTE
1339  */
1340 static void
1341 set_function_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
1342 {
1343         Relids          required_outer;
1344         List       *pathkeys = NIL;
1345
1346         /*
1347          * We don't support pushing join clauses into the quals of a function
1348          * scan, but it could still have required parameterization due to LATERAL
1349          * refs in the function expression.
1350          */
1351         required_outer = rel->lateral_relids;
1352
1353         /*
1354          * The result is considered unordered unless ORDINALITY was used, in which
1355          * case it is ordered by the ordinal column (the last one).  See if we
1356          * care, by checking for uses of that Var in equivalence classes.
1357          */
1358         if (rte->funcordinality)
1359         {
1360                 AttrNumber      ordattno = rel->max_attr;
1361                 Var                *var = NULL;
1362                 ListCell   *lc;
1363
1364                 /*
1365                  * Is there a Var for it in reltargetlist?      If not, the query did not
1366                  * reference the ordinality column, or at least not in any way that
1367                  * would be interesting for sorting.
1368                  */
1369                 foreach(lc, rel->reltargetlist)
1370                 {
1371                         Var                *node = (Var *) lfirst(lc);
1372
1373                         /* checking varno/varlevelsup is just paranoia */
1374                         if (IsA(node, Var) &&
1375                                 node->varattno == ordattno &&
1376                                 node->varno == rel->relid &&
1377                                 node->varlevelsup == 0)
1378                         {
1379                                 var = node;
1380                                 break;
1381                         }
1382                 }
1383
1384                 /*
1385                  * Try to build pathkeys for this Var with int8 sorting.  We tell
1386                  * build_expression_pathkey not to build any new equivalence class; if
1387                  * the Var isn't already mentioned in some EC, it means that nothing
1388                  * cares about the ordering.
1389                  */
1390                 if (var)
1391                         pathkeys = build_expression_pathkey(root,
1392                                                                                                 (Expr *) var,
1393                                                                                                 NULL,   /* below outer joins */
1394                                                                                                 Int8LessOperator,
1395                                                                                                 rel->relids,
1396                                                                                                 false);
1397         }
1398
1399         /* Generate appropriate path */
1400         add_path(rel, create_functionscan_path(root, rel,
1401                                                                                    pathkeys, required_outer));
1402 }
1403
1404 /*
1405  * set_values_pathlist
1406  *              Build the (single) access path for a VALUES RTE
1407  */
1408 static void
1409 set_values_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
1410 {
1411         Relids          required_outer;
1412
1413         /*
1414          * We don't support pushing join clauses into the quals of a values scan,
1415          * but it could still have required parameterization due to LATERAL refs
1416          * in the values expressions.
1417          */
1418         required_outer = rel->lateral_relids;
1419
1420         /* Generate appropriate path */
1421         add_path(rel, create_valuesscan_path(root, rel, required_outer));
1422 }
1423
1424 /*
1425  * set_cte_pathlist
1426  *              Build the (single) access path for a non-self-reference CTE RTE
1427  *
1428  * There's no need for a separate set_cte_size phase, since we don't
1429  * support join-qual-parameterized paths for CTEs.
1430  */
1431 static void
1432 set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
1433 {
1434         Plan       *cteplan;
1435         PlannerInfo *cteroot;
1436         Index           levelsup;
1437         int                     ndx;
1438         ListCell   *lc;
1439         int                     plan_id;
1440         Relids          required_outer;
1441
1442         /*
1443          * Find the referenced CTE, and locate the plan previously made for it.
1444          */
1445         levelsup = rte->ctelevelsup;
1446         cteroot = root;
1447         while (levelsup-- > 0)
1448         {
1449                 cteroot = cteroot->parent_root;
1450                 if (!cteroot)                   /* shouldn't happen */
1451                         elog(ERROR, "bad levelsup for CTE \"%s\"", rte->ctename);
1452         }
1453
1454         /*
1455          * Note: cte_plan_ids can be shorter than cteList, if we are still working
1456          * on planning the CTEs (ie, this is a side-reference from another CTE).
1457          * So we mustn't use forboth here.
1458          */
1459         ndx = 0;
1460         foreach(lc, cteroot->parse->cteList)
1461         {
1462                 CommonTableExpr *cte = (CommonTableExpr *) lfirst(lc);
1463
1464                 if (strcmp(cte->ctename, rte->ctename) == 0)
1465                         break;
1466                 ndx++;
1467         }
1468         if (lc == NULL)                         /* shouldn't happen */
1469                 elog(ERROR, "could not find CTE \"%s\"", rte->ctename);
1470         if (ndx >= list_length(cteroot->cte_plan_ids))
1471                 elog(ERROR, "could not find plan for CTE \"%s\"", rte->ctename);
1472         plan_id = list_nth_int(cteroot->cte_plan_ids, ndx);
1473         Assert(plan_id > 0);
1474         cteplan = (Plan *) list_nth(root->glob->subplans, plan_id - 1);
1475
1476         /* Mark rel with estimated output rows, width, etc */
1477         set_cte_size_estimates(root, rel, cteplan);
1478
1479         /*
1480          * We don't support pushing join clauses into the quals of a CTE scan, but
1481          * it could still have required parameterization due to LATERAL refs in
1482          * its tlist.
1483          */
1484         required_outer = rel->lateral_relids;
1485
1486         /* Generate appropriate path */
1487         add_path(rel, create_ctescan_path(root, rel, required_outer));
1488 }
1489
1490 /*
1491  * set_worktable_pathlist
1492  *              Build the (single) access path for a self-reference CTE RTE
1493  *
1494  * There's no need for a separate set_worktable_size phase, since we don't
1495  * support join-qual-parameterized paths for CTEs.
1496  */
1497 static void
1498 set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
1499 {
1500         Plan       *cteplan;
1501         PlannerInfo *cteroot;
1502         Index           levelsup;
1503         Relids          required_outer;
1504
1505         /*
1506          * We need to find the non-recursive term's plan, which is in the plan
1507          * level that's processing the recursive UNION, which is one level *below*
1508          * where the CTE comes from.
1509          */
1510         levelsup = rte->ctelevelsup;
1511         if (levelsup == 0)                      /* shouldn't happen */
1512                 elog(ERROR, "bad levelsup for CTE \"%s\"", rte->ctename);
1513         levelsup--;
1514         cteroot = root;
1515         while (levelsup-- > 0)
1516         {
1517                 cteroot = cteroot->parent_root;
1518                 if (!cteroot)                   /* shouldn't happen */
1519                         elog(ERROR, "bad levelsup for CTE \"%s\"", rte->ctename);
1520         }
1521         cteplan = cteroot->non_recursive_plan;
1522         if (!cteplan)                           /* shouldn't happen */
1523                 elog(ERROR, "could not find plan for CTE \"%s\"", rte->ctename);
1524
1525         /* Mark rel with estimated output rows, width, etc */
1526         set_cte_size_estimates(root, rel, cteplan);
1527
1528         /*
1529          * We don't support pushing join clauses into the quals of a worktable
1530          * scan, but it could still have required parameterization due to LATERAL
1531          * refs in its tlist.  (I'm not sure this is actually possible given the
1532          * restrictions on recursive references, but it's easy enough to support.)
1533          */
1534         required_outer = rel->lateral_relids;
1535
1536         /* Generate appropriate path */
1537         add_path(rel, create_worktablescan_path(root, rel, required_outer));
1538 }
1539
1540 /*
1541  * make_rel_from_joinlist
1542  *        Build access paths using a "joinlist" to guide the join path search.
1543  *
1544  * See comments for deconstruct_jointree() for definition of the joinlist
1545  * data structure.
1546  */
1547 static RelOptInfo *
1548 make_rel_from_joinlist(PlannerInfo *root, List *joinlist)
1549 {
1550         int                     levels_needed;
1551         List       *initial_rels;
1552         ListCell   *jl;
1553
1554         /*
1555          * Count the number of child joinlist nodes.  This is the depth of the
1556          * dynamic-programming algorithm we must employ to consider all ways of
1557          * joining the child nodes.
1558          */
1559         levels_needed = list_length(joinlist);
1560
1561         if (levels_needed <= 0)
1562                 return NULL;                    /* nothing to do? */
1563
1564         /*
1565          * Construct a list of rels corresponding to the child joinlist nodes.
1566          * This may contain both base rels and rels constructed according to
1567          * sub-joinlists.
1568          */
1569         initial_rels = NIL;
1570         foreach(jl, joinlist)
1571         {
1572                 Node       *jlnode = (Node *) lfirst(jl);
1573                 RelOptInfo *thisrel;
1574
1575                 if (IsA(jlnode, RangeTblRef))
1576                 {
1577                         int                     varno = ((RangeTblRef *) jlnode)->rtindex;
1578
1579                         thisrel = find_base_rel(root, varno);
1580                 }
1581                 else if (IsA(jlnode, List))
1582                 {
1583                         /* Recurse to handle subproblem */
1584                         thisrel = make_rel_from_joinlist(root, (List *) jlnode);
1585                 }
1586                 else
1587                 {
1588                         elog(ERROR, "unrecognized joinlist node type: %d",
1589                                  (int) nodeTag(jlnode));
1590                         thisrel = NULL;         /* keep compiler quiet */
1591                 }
1592
1593                 initial_rels = lappend(initial_rels, thisrel);
1594         }
1595
1596         if (levels_needed == 1)
1597         {
1598                 /*
1599                  * Single joinlist node, so we're done.
1600                  */
1601                 return (RelOptInfo *) linitial(initial_rels);
1602         }
1603         else
1604         {
1605                 /*
1606                  * Consider the different orders in which we could join the rels,
1607                  * using a plugin, GEQO, or the regular join search code.
1608                  *
1609                  * We put the initial_rels list into a PlannerInfo field because
1610                  * has_legal_joinclause() needs to look at it (ugly :-().
1611                  */
1612                 root->initial_rels = initial_rels;
1613
1614                 if (join_search_hook)
1615                         return (*join_search_hook) (root, levels_needed, initial_rels);
1616                 else if (enable_geqo && levels_needed >= geqo_threshold)
1617                         return geqo(root, levels_needed, initial_rels);
1618                 else
1619                         return standard_join_search(root, levels_needed, initial_rels);
1620         }
1621 }
1622
1623 /*
1624  * standard_join_search
1625  *        Find possible joinpaths for a query by successively finding ways
1626  *        to join component relations into join relations.
1627  *
1628  * 'levels_needed' is the number of iterations needed, ie, the number of
1629  *              independent jointree items in the query.  This is > 1.
1630  *
1631  * 'initial_rels' is a list of RelOptInfo nodes for each independent
1632  *              jointree item.  These are the components to be joined together.
1633  *              Note that levels_needed == list_length(initial_rels).
1634  *
1635  * Returns the final level of join relations, i.e., the relation that is
1636  * the result of joining all the original relations together.
1637  * At least one implementation path must be provided for this relation and
1638  * all required sub-relations.
1639  *
1640  * To support loadable plugins that modify planner behavior by changing the
1641  * join searching algorithm, we provide a hook variable that lets a plugin
1642  * replace or supplement this function.  Any such hook must return the same
1643  * final join relation as the standard code would, but it might have a
1644  * different set of implementation paths attached, and only the sub-joinrels
1645  * needed for these paths need have been instantiated.
1646  *
1647  * Note to plugin authors: the functions invoked during standard_join_search()
1648  * modify root->join_rel_list and root->join_rel_hash.  If you want to do more
1649  * than one join-order search, you'll probably need to save and restore the
1650  * original states of those data structures.  See geqo_eval() for an example.
1651  */
1652 RelOptInfo *
1653 standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels)
1654 {
1655         int                     lev;
1656         RelOptInfo *rel;
1657
1658         /*
1659          * This function cannot be invoked recursively within any one planning
1660          * problem, so join_rel_level[] can't be in use already.
1661          */
1662         Assert(root->join_rel_level == NULL);
1663
1664         /*
1665          * We employ a simple "dynamic programming" algorithm: we first find all
1666          * ways to build joins of two jointree items, then all ways to build joins
1667          * of three items (from two-item joins and single items), then four-item
1668          * joins, and so on until we have considered all ways to join all the
1669          * items into one rel.
1670          *
1671          * root->join_rel_level[j] is a list of all the j-item rels.  Initially we
1672          * set root->join_rel_level[1] to represent all the single-jointree-item
1673          * relations.
1674          */
1675         root->join_rel_level = (List **) palloc0((levels_needed + 1) * sizeof(List *));
1676
1677         root->join_rel_level[1] = initial_rels;
1678
1679         for (lev = 2; lev <= levels_needed; lev++)
1680         {
1681                 ListCell   *lc;
1682
1683                 /*
1684                  * Determine all possible pairs of relations to be joined at this
1685                  * level, and build paths for making each one from every available
1686                  * pair of lower-level relations.
1687                  */
1688                 join_search_one_level(root, lev);
1689
1690                 /*
1691                  * Do cleanup work on each just-processed rel.
1692                  */
1693                 foreach(lc, root->join_rel_level[lev])
1694                 {
1695                         rel = (RelOptInfo *) lfirst(lc);
1696
1697                         /* Find and save the cheapest paths for this rel */
1698                         set_cheapest(rel);
1699
1700 #ifdef OPTIMIZER_DEBUG
1701                         debug_print_rel(root, rel);
1702 #endif
1703                 }
1704         }
1705
1706         /*
1707          * We should have a single rel at the final level.
1708          */
1709         if (root->join_rel_level[levels_needed] == NIL)
1710                 elog(ERROR, "failed to build any %d-way joins", levels_needed);
1711         Assert(list_length(root->join_rel_level[levels_needed]) == 1);
1712
1713         rel = (RelOptInfo *) linitial(root->join_rel_level[levels_needed]);
1714
1715         root->join_rel_level = NULL;
1716
1717         return rel;
1718 }
1719
1720 /*****************************************************************************
1721  *                      PUSHING QUALS DOWN INTO SUBQUERIES
1722  *****************************************************************************/
1723
1724 /*
1725  * subquery_is_pushdown_safe - is a subquery safe for pushing down quals?
1726  *
1727  * subquery is the particular component query being checked.  topquery
1728  * is the top component of a set-operations tree (the same Query if no
1729  * set-op is involved).
1730  *
1731  * Conditions checked here:
1732  *
1733  * 1. If the subquery has a LIMIT clause, we must not push down any quals,
1734  * since that could change the set of rows returned.
1735  *
1736  * 2. If the subquery contains EXCEPT or EXCEPT ALL set ops we cannot push
1737  * quals into it, because that could change the results.
1738  *
1739  * 3. If the subquery uses DISTINCT, we cannot push volatile quals into it.
1740  * This is because upper-level quals should semantically be evaluated only
1741  * once per distinct row, not once per original row, and if the qual is
1742  * volatile then extra evaluations could change the results.  (This issue
1743  * does not apply to other forms of aggregation such as GROUP BY, because
1744  * when those are present we push into HAVING not WHERE, so that the quals
1745  * are still applied after aggregation.)
1746  *
1747  * 4. If the subquery contains window functions, we cannot push volatile quals
1748  * into it.  The issue here is a bit different from DISTINCT: a volatile qual
1749  * might succeed for some rows of a window partition and fail for others,
1750  * thereby changing the partition contents and thus the window functions'
1751  * results for rows that remain.
1752  *
1753  * In addition, we make several checks on the subquery's output columns to see
1754  * if it is safe to reference them in pushed-down quals.  If output column k
1755  * is found to be unsafe to reference, we set safetyInfo->unsafeColumns[k]
1756  * to TRUE, but we don't reject the subquery overall since column k might not
1757  * be referenced by some/all quals.  The unsafeColumns[] array will be
1758  * consulted later by qual_is_pushdown_safe().  It's better to do it this way
1759  * than to make the checks directly in qual_is_pushdown_safe(), because when
1760  * the subquery involves set operations we have to check the output
1761  * expressions in each arm of the set op.
1762  *
1763  * Note: pushing quals into a DISTINCT subquery is theoretically dubious:
1764  * we're effectively assuming that the quals cannot distinguish values that
1765  * the DISTINCT's equality operator sees as equal, yet there are many
1766  * counterexamples to that assumption.  However use of such a qual with a
1767  * DISTINCT subquery would be unsafe anyway, since there's no guarantee which
1768  * "equal" value will be chosen as the output value by the DISTINCT operation.
1769  * So we don't worry too much about that.  Another objection is that if the
1770  * qual is expensive to evaluate, running it for each original row might cost
1771  * more than we save by eliminating rows before the DISTINCT step.  But it
1772  * would be very hard to estimate that at this stage, and in practice pushdown
1773  * seldom seems to make things worse, so we ignore that problem too.
1774  *
1775  * Note: likewise, pushing quals into a subquery with window functions is a
1776  * bit dubious: the quals might remove some rows of a window partition while
1777  * leaving others, causing changes in the window functions' results for the
1778  * surviving rows.  We insist that such a qual reference only partitioning
1779  * columns, but again that only protects us if the qual does not distinguish
1780  * values that the partitioning equality operator sees as equal.  The risks
1781  * here are perhaps larger than for DISTINCT, since no de-duplication of rows
1782  * occurs and thus there is no theoretical problem with such a qual.  But
1783  * we'll do this anyway because the potential performance benefits are very
1784  * large, and we've seen no field complaints about the longstanding comparable
1785  * behavior with DISTINCT.
1786  */
1787 static bool
1788 subquery_is_pushdown_safe(Query *subquery, Query *topquery,
1789                                                   pushdown_safety_info *safetyInfo)
1790 {
1791         SetOperationStmt *topop;
1792
1793         /* Check point 1 */
1794         if (subquery->limitOffset != NULL || subquery->limitCount != NULL)
1795                 return false;
1796
1797         /* Check points 3 and 4 */
1798         if (subquery->distinctClause || subquery->hasWindowFuncs)
1799                 safetyInfo->unsafeVolatile = true;
1800
1801         /*
1802          * If we're at a leaf query, check for unsafe expressions in its target
1803          * list, and mark any unsafe ones in unsafeColumns[].  (Non-leaf nodes in
1804          * setop trees have only simple Vars in their tlists, so no need to check
1805          * them.)
1806          */
1807         if (subquery->setOperations == NULL)
1808                 check_output_expressions(subquery, safetyInfo);
1809
1810         /* Are we at top level, or looking at a setop component? */
1811         if (subquery == topquery)
1812         {
1813                 /* Top level, so check any component queries */
1814                 if (subquery->setOperations != NULL)
1815                         if (!recurse_pushdown_safe(subquery->setOperations, topquery,
1816                                                                            safetyInfo))
1817                                 return false;
1818         }
1819         else
1820         {
1821                 /* Setop component must not have more components (too weird) */
1822                 if (subquery->setOperations != NULL)
1823                         return false;
1824                 /* Check whether setop component output types match top level */
1825                 topop = (SetOperationStmt *) topquery->setOperations;
1826                 Assert(topop && IsA(topop, SetOperationStmt));
1827                 compare_tlist_datatypes(subquery->targetList,
1828                                                                 topop->colTypes,
1829                                                                 safetyInfo);
1830         }
1831         return true;
1832 }
1833
1834 /*
1835  * Helper routine to recurse through setOperations tree
1836  */
1837 static bool
1838 recurse_pushdown_safe(Node *setOp, Query *topquery,
1839                                           pushdown_safety_info *safetyInfo)
1840 {
1841         if (IsA(setOp, RangeTblRef))
1842         {
1843                 RangeTblRef *rtr = (RangeTblRef *) setOp;
1844                 RangeTblEntry *rte = rt_fetch(rtr->rtindex, topquery->rtable);
1845                 Query      *subquery = rte->subquery;
1846
1847                 Assert(subquery != NULL);
1848                 return subquery_is_pushdown_safe(subquery, topquery, safetyInfo);
1849         }
1850         else if (IsA(setOp, SetOperationStmt))
1851         {
1852                 SetOperationStmt *op = (SetOperationStmt *) setOp;
1853
1854                 /* EXCEPT is no good (point 2 for subquery_is_pushdown_safe) */
1855                 if (op->op == SETOP_EXCEPT)
1856                         return false;
1857                 /* Else recurse */
1858                 if (!recurse_pushdown_safe(op->larg, topquery, safetyInfo))
1859                         return false;
1860                 if (!recurse_pushdown_safe(op->rarg, topquery, safetyInfo))
1861                         return false;
1862         }
1863         else
1864         {
1865                 elog(ERROR, "unrecognized node type: %d",
1866                          (int) nodeTag(setOp));
1867         }
1868         return true;
1869 }
1870
1871 /*
1872  * check_output_expressions - check subquery's output expressions for safety
1873  *
1874  * There are several cases in which it's unsafe to push down an upper-level
1875  * qual if it references a particular output column of a subquery.  We check
1876  * each output column of the subquery and set unsafeColumns[k] to TRUE if
1877  * that column is unsafe for a pushed-down qual to reference.  The conditions
1878  * checked here are:
1879  *
1880  * 1. We must not push down any quals that refer to subselect outputs that
1881  * return sets, else we'd introduce functions-returning-sets into the
1882  * subquery's WHERE/HAVING quals.
1883  *
1884  * 2. We must not push down any quals that refer to subselect outputs that
1885  * contain volatile functions, for fear of introducing strange results due
1886  * to multiple evaluation of a volatile function.
1887  *
1888  * 3. If the subquery uses DISTINCT ON, we must not push down any quals that
1889  * refer to non-DISTINCT output columns, because that could change the set
1890  * of rows returned.  (This condition is vacuous for DISTINCT, because then
1891  * there are no non-DISTINCT output columns, so we needn't check.  Note that
1892  * subquery_is_pushdown_safe already reported that we can't use volatile
1893  * quals if there's DISTINCT or DISTINCT ON.)
1894  *
1895  * 4. If the subquery has any window functions, we must not push down quals
1896  * that reference any output columns that are not listed in all the subquery's
1897  * window PARTITION BY clauses.  We can push down quals that use only
1898  * partitioning columns because they should succeed or fail identically for
1899  * every row of any one window partition, and totally excluding some
1900  * partitions will not change a window function's results for remaining
1901  * partitions.  (Again, this also requires nonvolatile quals, but
1902  * subquery_is_pushdown_safe handles that.)
1903  */
1904 static void
1905 check_output_expressions(Query *subquery, pushdown_safety_info *safetyInfo)
1906 {
1907         ListCell   *lc;
1908
1909         foreach(lc, subquery->targetList)
1910         {
1911                 TargetEntry *tle = (TargetEntry *) lfirst(lc);
1912
1913                 if (tle->resjunk)
1914                         continue;                       /* ignore resjunk columns */
1915
1916                 /* We need not check further if output col is already known unsafe */
1917                 if (safetyInfo->unsafeColumns[tle->resno])
1918                         continue;
1919
1920                 /* Functions returning sets are unsafe (point 1) */
1921                 if (expression_returns_set((Node *) tle->expr))
1922                 {
1923                         safetyInfo->unsafeColumns[tle->resno] = true;
1924                         continue;
1925                 }
1926
1927                 /* Volatile functions are unsafe (point 2) */
1928                 if (contain_volatile_functions((Node *) tle->expr))
1929                 {
1930                         safetyInfo->unsafeColumns[tle->resno] = true;
1931                         continue;
1932                 }
1933
1934                 /* If subquery uses DISTINCT ON, check point 3 */
1935                 if (subquery->hasDistinctOn &&
1936                         !targetIsInSortList(tle, InvalidOid, subquery->distinctClause))
1937                 {
1938                         /* non-DISTINCT column, so mark it unsafe */
1939                         safetyInfo->unsafeColumns[tle->resno] = true;
1940                         continue;
1941                 }
1942
1943                 /* If subquery uses window functions, check point 4 */
1944                 if (subquery->hasWindowFuncs &&
1945                         !targetIsInAllPartitionLists(tle, subquery))
1946                 {
1947                         /* not present in all PARTITION BY clauses, so mark it unsafe */
1948                         safetyInfo->unsafeColumns[tle->resno] = true;
1949                         continue;
1950                 }
1951         }
1952 }
1953
1954 /*
1955  * For subqueries using UNION/UNION ALL/INTERSECT/INTERSECT ALL, we can
1956  * push quals into each component query, but the quals can only reference
1957  * subquery columns that suffer no type coercions in the set operation.
1958  * Otherwise there are possible semantic gotchas.  So, we check the
1959  * component queries to see if any of them have output types different from
1960  * the top-level setop outputs.  unsafeColumns[k] is set true if column k
1961  * has different type in any component.
1962  *
1963  * We don't have to care about typmods here: the only allowed difference
1964  * between set-op input and output typmods is input is a specific typmod
1965  * and output is -1, and that does not require a coercion.
1966  *
1967  * tlist is a subquery tlist.
1968  * colTypes is an OID list of the top-level setop's output column types.
1969  * safetyInfo->unsafeColumns[] is the result array.
1970  */
1971 static void
1972 compare_tlist_datatypes(List *tlist, List *colTypes,
1973                                                 pushdown_safety_info *safetyInfo)
1974 {
1975         ListCell   *l;
1976         ListCell   *colType = list_head(colTypes);
1977
1978         foreach(l, tlist)
1979         {
1980                 TargetEntry *tle = (TargetEntry *) lfirst(l);
1981
1982                 if (tle->resjunk)
1983                         continue;                       /* ignore resjunk columns */
1984                 if (colType == NULL)
1985                         elog(ERROR, "wrong number of tlist entries");
1986                 if (exprType((Node *) tle->expr) != lfirst_oid(colType))
1987                         safetyInfo->unsafeColumns[tle->resno] = true;
1988                 colType = lnext(colType);
1989         }
1990         if (colType != NULL)
1991                 elog(ERROR, "wrong number of tlist entries");
1992 }
1993
1994 /*
1995  * targetIsInAllPartitionLists
1996  *              True if the TargetEntry is listed in the PARTITION BY clause
1997  *              of every window defined in the query.
1998  *
1999  * It would be safe to ignore windows not actually used by any window
2000  * function, but it's not easy to get that info at this stage; and it's
2001  * unlikely to be useful to spend any extra cycles getting it, since
2002  * unreferenced window definitions are probably infrequent in practice.
2003  */
2004 static bool
2005 targetIsInAllPartitionLists(TargetEntry *tle, Query *query)
2006 {
2007         ListCell   *lc;
2008
2009         foreach(lc, query->windowClause)
2010         {
2011                 WindowClause *wc = (WindowClause *) lfirst(lc);
2012
2013                 if (!targetIsInSortList(tle, InvalidOid, wc->partitionClause))
2014                         return false;
2015         }
2016         return true;
2017 }
2018
2019 /*
2020  * qual_is_pushdown_safe - is a particular qual safe to push down?
2021  *
2022  * qual is a restriction clause applying to the given subquery (whose RTE
2023  * has index rti in the parent query).
2024  *
2025  * Conditions checked here:
2026  *
2027  * 1. The qual must not contain any subselects (mainly because I'm not sure
2028  * it will work correctly: sublinks will already have been transformed into
2029  * subplans in the qual, but not in the subquery).
2030  *
2031  * 2. If unsafeVolatile is set, the qual must not contain any volatile
2032  * functions.
2033  *
2034  * 3. If unsafeLeaky is set, the qual must not contain any leaky functions
2035  * that are passed Var nodes, and therefore might reveal values from the
2036  * subquery as side effects.
2037  *
2038  * 4. The qual must not refer to the whole-row output of the subquery
2039  * (since there is no easy way to name that within the subquery itself).
2040  *
2041  * 5. The qual must not refer to any subquery output columns that were
2042  * found to be unsafe to reference by subquery_is_pushdown_safe().
2043  */
2044 static bool
2045 qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
2046                                           pushdown_safety_info *safetyInfo)
2047 {
2048         bool            safe = true;
2049         List       *vars;
2050         ListCell   *vl;
2051
2052         /* Refuse subselects (point 1) */
2053         if (contain_subplans(qual))
2054                 return false;
2055
2056         /* Refuse volatile quals if we found they'd be unsafe (point 2) */
2057         if (safetyInfo->unsafeVolatile &&
2058                 contain_volatile_functions(qual))
2059                 return false;
2060
2061         /* Refuse leaky quals if told to (point 3) */
2062         if (safetyInfo->unsafeLeaky &&
2063                 contain_leaked_vars(qual))
2064                 return false;
2065
2066         /*
2067          * It would be unsafe to push down window function calls, but at least for
2068          * the moment we could never see any in a qual anyhow.  (The same applies
2069          * to aggregates, which we check for in pull_var_clause below.)
2070          */
2071         Assert(!contain_window_function(qual));
2072
2073         /*
2074          * Examine all Vars used in clause; since it's a restriction clause, all
2075          * such Vars must refer to subselect output columns.
2076          */
2077         vars = pull_var_clause(qual,
2078                                                    PVC_REJECT_AGGREGATES,
2079                                                    PVC_INCLUDE_PLACEHOLDERS);
2080         foreach(vl, vars)
2081         {
2082                 Var                *var = (Var *) lfirst(vl);
2083
2084                 /*
2085                  * XXX Punt if we find any PlaceHolderVars in the restriction clause.
2086                  * It's not clear whether a PHV could safely be pushed down, and even
2087                  * less clear whether such a situation could arise in any cases of
2088                  * practical interest anyway.  So for the moment, just refuse to push
2089                  * down.
2090                  */
2091                 if (!IsA(var, Var))
2092                 {
2093                         safe = false;
2094                         break;
2095                 }
2096
2097                 Assert(var->varno == rti);
2098                 Assert(var->varattno >= 0);
2099
2100                 /* Check point 4 */
2101                 if (var->varattno == 0)
2102                 {
2103                         safe = false;
2104                         break;
2105                 }
2106
2107                 /* Check point 5 */
2108                 if (safetyInfo->unsafeColumns[var->varattno])
2109                 {
2110                         safe = false;
2111                         break;
2112                 }
2113         }
2114
2115         list_free(vars);
2116
2117         return safe;
2118 }
2119
2120 /*
2121  * subquery_push_qual - push down a qual that we have determined is safe
2122  */
2123 static void
2124 subquery_push_qual(Query *subquery, RangeTblEntry *rte, Index rti, Node *qual)
2125 {
2126         if (subquery->setOperations != NULL)
2127         {
2128                 /* Recurse to push it separately to each component query */
2129                 recurse_push_qual(subquery->setOperations, subquery,
2130                                                   rte, rti, qual);
2131         }
2132         else
2133         {
2134                 /*
2135                  * We need to replace Vars in the qual (which must refer to outputs of
2136                  * the subquery) with copies of the subquery's targetlist expressions.
2137                  * Note that at this point, any uplevel Vars in the qual should have
2138                  * been replaced with Params, so they need no work.
2139                  *
2140                  * This step also ensures that when we are pushing into a setop tree,
2141                  * each component query gets its own copy of the qual.
2142                  */
2143                 qual = ReplaceVarsFromTargetList(qual, rti, 0, rte,
2144                                                                                  subquery->targetList,
2145                                                                                  REPLACEVARS_REPORT_ERROR, 0,
2146                                                                                  &subquery->hasSubLinks);
2147
2148                 /*
2149                  * Now attach the qual to the proper place: normally WHERE, but if the
2150                  * subquery uses grouping or aggregation, put it in HAVING (since the
2151                  * qual really refers to the group-result rows).
2152                  */
2153                 if (subquery->hasAggs || subquery->groupClause || subquery->havingQual)
2154                         subquery->havingQual = make_and_qual(subquery->havingQual, qual);
2155                 else
2156                         subquery->jointree->quals =
2157                                 make_and_qual(subquery->jointree->quals, qual);
2158
2159                 /*
2160                  * We need not change the subquery's hasAggs or hasSublinks flags,
2161                  * since we can't be pushing down any aggregates that weren't there
2162                  * before, and we don't push down subselects at all.
2163                  */
2164         }
2165 }
2166
2167 /*
2168  * Helper routine to recurse through setOperations tree
2169  */
2170 static void
2171 recurse_push_qual(Node *setOp, Query *topquery,
2172                                   RangeTblEntry *rte, Index rti, Node *qual)
2173 {
2174         if (IsA(setOp, RangeTblRef))
2175         {
2176                 RangeTblRef *rtr = (RangeTblRef *) setOp;
2177                 RangeTblEntry *subrte = rt_fetch(rtr->rtindex, topquery->rtable);
2178                 Query      *subquery = subrte->subquery;
2179
2180                 Assert(subquery != NULL);
2181                 subquery_push_qual(subquery, rte, rti, qual);
2182         }
2183         else if (IsA(setOp, SetOperationStmt))
2184         {
2185                 SetOperationStmt *op = (SetOperationStmt *) setOp;
2186
2187                 recurse_push_qual(op->larg, topquery, rte, rti, qual);
2188                 recurse_push_qual(op->rarg, topquery, rte, rti, qual);
2189         }
2190         else
2191         {
2192                 elog(ERROR, "unrecognized node type: %d",
2193                          (int) nodeTag(setOp));
2194         }
2195 }
2196
2197 /*****************************************************************************
2198  *                      SIMPLIFYING SUBQUERY TARGETLISTS
2199  *****************************************************************************/
2200
2201 /*
2202  * remove_unused_subquery_outputs
2203  *              Remove subquery targetlist items we don't need
2204  *
2205  * It's possible, even likely, that the upper query does not read all the
2206  * output columns of the subquery.  We can remove any such outputs that are
2207  * not needed by the subquery itself (e.g., as sort/group columns) and do not
2208  * affect semantics otherwise (e.g., volatile functions can't be removed).
2209  * This is useful not only because we might be able to remove expensive-to-
2210  * compute expressions, but because deletion of output columns might allow
2211  * optimizations such as join removal to occur within the subquery.
2212  *
2213  * To avoid affecting column numbering in the targetlist, we don't physically
2214  * remove unused tlist entries, but rather replace their expressions with NULL
2215  * constants.  This is implemented by modifying subquery->targetList.
2216  */
2217 static void
2218 remove_unused_subquery_outputs(Query *subquery, RelOptInfo *rel)
2219 {
2220         Bitmapset  *attrs_used = NULL;
2221         ListCell   *lc;
2222
2223         /*
2224          * Do nothing if subquery has UNION/INTERSECT/EXCEPT: in principle we
2225          * could update all the child SELECTs' tlists, but it seems not worth the
2226          * trouble presently.
2227          */
2228         if (subquery->setOperations)
2229                 return;
2230
2231         /*
2232          * If subquery has regular DISTINCT (not DISTINCT ON), we're wasting our
2233          * time: all its output columns must be used in the distinctClause.
2234          */
2235         if (subquery->distinctClause && !subquery->hasDistinctOn)
2236                 return;
2237
2238         /*
2239          * Collect a bitmap of all the output column numbers used by the upper
2240          * query.
2241          *
2242          * Add all the attributes needed for joins or final output.  Note: we must
2243          * look at reltargetlist, not the attr_needed data, because attr_needed
2244          * isn't computed for inheritance child rels, cf set_append_rel_size().
2245          * (XXX might be worth changing that sometime.)
2246          */
2247         pull_varattnos((Node *) rel->reltargetlist, rel->relid, &attrs_used);
2248
2249         /* Add all the attributes used by un-pushed-down restriction clauses. */
2250         foreach(lc, rel->baserestrictinfo)
2251         {
2252                 RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
2253
2254                 pull_varattnos((Node *) rinfo->clause, rel->relid, &attrs_used);
2255         }
2256
2257         /*
2258          * If there's a whole-row reference to the subquery, we can't remove
2259          * anything.
2260          */
2261         if (bms_is_member(0 - FirstLowInvalidHeapAttributeNumber, attrs_used))
2262                 return;
2263
2264         /*
2265          * Run through the tlist and zap entries we don't need.  It's okay to
2266          * modify the tlist items in-place because set_subquery_pathlist made a
2267          * copy of the subquery.
2268          */
2269         foreach(lc, subquery->targetList)
2270         {
2271                 TargetEntry *tle = (TargetEntry *) lfirst(lc);
2272                 Node       *texpr = (Node *) tle->expr;
2273
2274                 /*
2275                  * If it has a sortgroupref number, it's used in some sort/group
2276                  * clause so we'd better not remove it.  Also, don't remove any
2277                  * resjunk columns, since their reason for being has nothing to do
2278                  * with anybody reading the subquery's output.  (It's likely that
2279                  * resjunk columns in a sub-SELECT would always have ressortgroupref
2280                  * set, but even if they don't, it seems imprudent to remove them.)
2281                  */
2282                 if (tle->ressortgroupref || tle->resjunk)
2283                         continue;
2284
2285                 /*
2286                  * If it's used by the upper query, we can't remove it.
2287                  */
2288                 if (bms_is_member(tle->resno - FirstLowInvalidHeapAttributeNumber,
2289                                                   attrs_used))
2290                         continue;
2291
2292                 /*
2293                  * If it contains a set-returning function, we can't remove it since
2294                  * that could change the number of rows returned by the subquery.
2295                  */
2296                 if (expression_returns_set(texpr))
2297                         continue;
2298
2299                 /*
2300                  * If it contains volatile functions, we daren't remove it for fear
2301                  * that the user is expecting their side-effects to happen.
2302                  */
2303                 if (contain_volatile_functions(texpr))
2304                         continue;
2305
2306                 /*
2307                  * OK, we don't need it.  Replace the expression with a NULL constant.
2308                  * Preserve the exposed type of the expression, in case something
2309                  * looks at the rowtype of the subquery's result.
2310                  */
2311                 tle->expr = (Expr *) makeNullConst(exprType(texpr),
2312                                                                                    exprTypmod(texpr),
2313                                                                                    exprCollation(texpr));
2314         }
2315 }
2316
2317 /*****************************************************************************
2318  *                      DEBUG SUPPORT
2319  *****************************************************************************/
2320
2321 #ifdef OPTIMIZER_DEBUG
2322
2323 static void
2324 print_relids(Relids relids)
2325 {
2326         int                     x;
2327         bool            first = true;
2328
2329         x = -1;
2330         while ((x = bms_next_member(relids, x)) >= 0)
2331         {
2332                 if (!first)
2333                         printf(" ");
2334                 printf("%d", x);
2335                 first = false;
2336         }
2337 }
2338
2339 static void
2340 print_restrictclauses(PlannerInfo *root, List *clauses)
2341 {
2342         ListCell   *l;
2343
2344         foreach(l, clauses)
2345         {
2346                 RestrictInfo *c = lfirst(l);
2347
2348                 print_expr((Node *) c->clause, root->parse->rtable);
2349                 if (lnext(l))
2350                         printf(", ");
2351         }
2352 }
2353
2354 static void
2355 print_path(PlannerInfo *root, Path *path, int indent)
2356 {
2357         const char *ptype;
2358         bool            join = false;
2359         Path       *subpath = NULL;
2360         int                     i;
2361
2362         switch (nodeTag(path))
2363         {
2364                 case T_Path:
2365                         ptype = "SeqScan";
2366                         break;
2367                 case T_IndexPath:
2368                         ptype = "IdxScan";
2369                         break;
2370                 case T_BitmapHeapPath:
2371                         ptype = "BitmapHeapScan";
2372                         break;
2373                 case T_BitmapAndPath:
2374                         ptype = "BitmapAndPath";
2375                         break;
2376                 case T_BitmapOrPath:
2377                         ptype = "BitmapOrPath";
2378                         break;
2379                 case T_TidPath:
2380                         ptype = "TidScan";
2381                         break;
2382                 case T_ForeignPath:
2383                         ptype = "ForeignScan";
2384                         break;
2385                 case T_AppendPath:
2386                         ptype = "Append";
2387                         break;
2388                 case T_MergeAppendPath:
2389                         ptype = "MergeAppend";
2390                         break;
2391                 case T_ResultPath:
2392                         ptype = "Result";
2393                         break;
2394                 case T_MaterialPath:
2395                         ptype = "Material";
2396                         subpath = ((MaterialPath *) path)->subpath;
2397                         break;
2398                 case T_UniquePath:
2399                         ptype = "Unique";
2400                         subpath = ((UniquePath *) path)->subpath;
2401                         break;
2402                 case T_NestPath:
2403                         ptype = "NestLoop";
2404                         join = true;
2405                         break;
2406                 case T_MergePath:
2407                         ptype = "MergeJoin";
2408                         join = true;
2409                         break;
2410                 case T_HashPath:
2411                         ptype = "HashJoin";
2412                         join = true;
2413                         break;
2414                 default:
2415                         ptype = "???Path";
2416                         break;
2417         }
2418
2419         for (i = 0; i < indent; i++)
2420                 printf("\t");
2421         printf("%s", ptype);
2422
2423         if (path->parent)
2424         {
2425                 printf("(");
2426                 print_relids(path->parent->relids);
2427                 printf(") rows=%.0f", path->parent->rows);
2428         }
2429         printf(" cost=%.2f..%.2f\n", path->startup_cost, path->total_cost);
2430
2431         if (path->pathkeys)
2432         {
2433                 for (i = 0; i < indent; i++)
2434                         printf("\t");
2435                 printf("  pathkeys: ");
2436                 print_pathkeys(path->pathkeys, root->parse->rtable);
2437         }
2438
2439         if (join)
2440         {
2441                 JoinPath   *jp = (JoinPath *) path;
2442
2443                 for (i = 0; i < indent; i++)
2444                         printf("\t");
2445                 printf("  clauses: ");
2446                 print_restrictclauses(root, jp->joinrestrictinfo);
2447                 printf("\n");
2448
2449                 if (IsA(path, MergePath))
2450                 {
2451                         MergePath  *mp = (MergePath *) path;
2452
2453                         for (i = 0; i < indent; i++)
2454                                 printf("\t");
2455                         printf("  sortouter=%d sortinner=%d materializeinner=%d\n",
2456                                    ((mp->outersortkeys) ? 1 : 0),
2457                                    ((mp->innersortkeys) ? 1 : 0),
2458                                    ((mp->materialize_inner) ? 1 : 0));
2459                 }
2460
2461                 print_path(root, jp->outerjoinpath, indent + 1);
2462                 print_path(root, jp->innerjoinpath, indent + 1);
2463         }
2464
2465         if (subpath)
2466                 print_path(root, subpath, indent + 1);
2467 }
2468
2469 void
2470 debug_print_rel(PlannerInfo *root, RelOptInfo *rel)
2471 {
2472         ListCell   *l;
2473
2474         printf("RELOPTINFO (");
2475         print_relids(rel->relids);
2476         printf("): rows=%.0f width=%d\n", rel->rows, rel->width);
2477
2478         if (rel->baserestrictinfo)
2479         {
2480                 printf("\tbaserestrictinfo: ");
2481                 print_restrictclauses(root, rel->baserestrictinfo);
2482                 printf("\n");
2483         }
2484
2485         if (rel->joininfo)
2486         {
2487                 printf("\tjoininfo: ");
2488                 print_restrictclauses(root, rel->joininfo);
2489                 printf("\n");
2490         }
2491
2492         printf("\tpath list:\n");
2493         foreach(l, rel->pathlist)
2494                 print_path(root, lfirst(l), 1);
2495         if (rel->cheapest_startup_path)
2496         {
2497                 printf("\n\tcheapest startup path:\n");
2498                 print_path(root, rel->cheapest_startup_path, 1);
2499         }
2500         if (rel->cheapest_total_path)
2501         {
2502                 printf("\n\tcheapest total path:\n");
2503                 print_path(root, rel->cheapest_total_path, 1);
2504         }
2505         printf("\n");
2506         fflush(stdout);
2507 }
2508
2509 #endif   /* OPTIMIZER_DEBUG */