1 /*-------------------------------------------------------------------------
4 * Routines to find possible search paths for processing a query
6 * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
11 * src/backend/optimizer/path/allpaths.c
13 *-------------------------------------------------------------------------
20 #include "access/sysattr.h"
21 #include "catalog/pg_class.h"
22 #include "catalog/pg_operator.h"
23 #include "foreign/fdwapi.h"
24 #include "nodes/makefuncs.h"
25 #include "nodes/nodeFuncs.h"
26 #ifdef OPTIMIZER_DEBUG
27 #include "nodes/print.h"
29 #include "optimizer/clauses.h"
30 #include "optimizer/cost.h"
31 #include "optimizer/geqo.h"
32 #include "optimizer/pathnode.h"
33 #include "optimizer/paths.h"
34 #include "optimizer/plancat.h"
35 #include "optimizer/planner.h"
36 #include "optimizer/prep.h"
37 #include "optimizer/restrictinfo.h"
38 #include "optimizer/var.h"
39 #include "parser/parse_clause.h"
40 #include "parser/parsetree.h"
41 #include "rewrite/rewriteManip.h"
42 #include "utils/lsyscache.h"
45 /* results of subquery_is_pushdown_safe */
46 typedef struct pushdown_safety_info
48 bool *unsafeColumns; /* which output columns are unsafe to use */
49 bool unsafeVolatile; /* don't push down volatile quals */
50 bool unsafeLeaky; /* don't push down leaky quals */
51 } pushdown_safety_info;
53 /* These parameters are set by GUC */
54 bool enable_geqo = false; /* just in case GUC doesn't set it */
57 /* Hook for plugins to get control in set_rel_pathlist() */
58 set_rel_pathlist_hook_type set_rel_pathlist_hook = NULL;
60 /* Hook for plugins to replace standard_join_search() */
61 join_search_hook_type join_search_hook = NULL;
64 static void set_base_rel_sizes(PlannerInfo *root);
65 static void set_base_rel_pathlists(PlannerInfo *root);
66 static void set_rel_size(PlannerInfo *root, RelOptInfo *rel,
67 Index rti, RangeTblEntry *rte);
68 static void set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
69 Index rti, RangeTblEntry *rte);
70 static void set_plain_rel_size(PlannerInfo *root, RelOptInfo *rel,
72 static void set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
74 static void set_tablesample_rel_size(PlannerInfo *root, RelOptInfo *rel,
76 static void set_tablesample_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
78 static void set_foreign_size(PlannerInfo *root, RelOptInfo *rel,
80 static void set_foreign_pathlist(PlannerInfo *root, RelOptInfo *rel,
82 static void set_append_rel_size(PlannerInfo *root, RelOptInfo *rel,
83 Index rti, RangeTblEntry *rte);
84 static void set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
85 Index rti, RangeTblEntry *rte);
86 static void generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel,
88 List *all_child_pathkeys);
89 static Path *get_cheapest_parameterized_child_path(PlannerInfo *root,
91 Relids required_outer);
92 static List *accumulate_append_subpath(List *subpaths, Path *path);
93 static void set_dummy_rel_pathlist(RelOptInfo *rel);
94 static void set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
95 Index rti, RangeTblEntry *rte);
96 static void set_function_pathlist(PlannerInfo *root, RelOptInfo *rel,
98 static void set_values_pathlist(PlannerInfo *root, RelOptInfo *rel,
100 static void set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel,
102 static void set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel,
104 static RelOptInfo *make_rel_from_joinlist(PlannerInfo *root, List *joinlist);
105 static bool subquery_is_pushdown_safe(Query *subquery, Query *topquery,
106 pushdown_safety_info *safetyInfo);
107 static bool recurse_pushdown_safe(Node *setOp, Query *topquery,
108 pushdown_safety_info *safetyInfo);
109 static void check_output_expressions(Query *subquery,
110 pushdown_safety_info *safetyInfo);
111 static void compare_tlist_datatypes(List *tlist, List *colTypes,
112 pushdown_safety_info *safetyInfo);
113 static bool targetIsInAllPartitionLists(TargetEntry *tle, Query *query);
114 static bool qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
115 pushdown_safety_info *safetyInfo);
116 static void subquery_push_qual(Query *subquery,
117 RangeTblEntry *rte, Index rti, Node *qual);
118 static void recurse_push_qual(Node *setOp, Query *topquery,
119 RangeTblEntry *rte, Index rti, Node *qual);
120 static void remove_unused_subquery_outputs(Query *subquery, RelOptInfo *rel);
125 * Finds all possible access paths for executing a query, returning a
126 * single rel that represents the join of all base rels in the query.
129 make_one_rel(PlannerInfo *root, List *joinlist)
135 * Construct the all_baserels Relids set.
137 root->all_baserels = NULL;
138 for (rti = 1; rti < root->simple_rel_array_size; rti++)
140 RelOptInfo *brel = root->simple_rel_array[rti];
142 /* there may be empty slots corresponding to non-baserel RTEs */
146 Assert(brel->relid == rti); /* sanity check on array */
148 /* ignore RTEs that are "other rels" */
149 if (brel->reloptkind != RELOPT_BASEREL)
152 root->all_baserels = bms_add_member(root->all_baserels, brel->relid);
156 * Generate access paths for the base rels.
158 set_base_rel_sizes(root);
159 set_base_rel_pathlists(root);
162 * Generate access paths for the entire join tree.
164 rel = make_rel_from_joinlist(root, joinlist);
167 * The result should join all and only the query's base rels.
169 Assert(bms_equal(rel->relids, root->all_baserels));
176 * Set the size estimates (rows and widths) for each base-relation entry.
178 * We do this in a separate pass over the base rels so that rowcount
179 * estimates are available for parameterized path generation.
182 set_base_rel_sizes(PlannerInfo *root)
186 for (rti = 1; rti < root->simple_rel_array_size; rti++)
188 RelOptInfo *rel = root->simple_rel_array[rti];
190 /* there may be empty slots corresponding to non-baserel RTEs */
194 Assert(rel->relid == rti); /* sanity check on array */
196 /* ignore RTEs that are "other rels" */
197 if (rel->reloptkind != RELOPT_BASEREL)
200 set_rel_size(root, rel, rti, root->simple_rte_array[rti]);
205 * set_base_rel_pathlists
206 * Finds all paths available for scanning each base-relation entry.
207 * Sequential scan and any available indices are considered.
208 * Each useful path is attached to its relation's 'pathlist' field.
211 set_base_rel_pathlists(PlannerInfo *root)
215 for (rti = 1; rti < root->simple_rel_array_size; rti++)
217 RelOptInfo *rel = root->simple_rel_array[rti];
219 /* there may be empty slots corresponding to non-baserel RTEs */
223 Assert(rel->relid == rti); /* sanity check on array */
225 /* ignore RTEs that are "other rels" */
226 if (rel->reloptkind != RELOPT_BASEREL)
229 set_rel_pathlist(root, rel, rti, root->simple_rte_array[rti]);
235 * Set size estimates for a base relation
238 set_rel_size(PlannerInfo *root, RelOptInfo *rel,
239 Index rti, RangeTblEntry *rte)
241 if (rel->reloptkind == RELOPT_BASEREL &&
242 relation_excluded_by_constraints(root, rel, rte))
245 * We proved we don't need to scan the rel via constraint exclusion,
246 * so set up a single dummy path for it. Here we only check this for
247 * regular baserels; if it's an otherrel, CE was already checked in
248 * set_append_rel_size().
250 * In this case, we go ahead and set up the relation's path right away
251 * instead of leaving it for set_rel_pathlist to do. This is because
252 * we don't have a convention for marking a rel as dummy except by
253 * assigning a dummy path to it.
255 set_dummy_rel_pathlist(rel);
259 /* It's an "append relation", process accordingly */
260 set_append_rel_size(root, rel, rti, rte);
264 switch (rel->rtekind)
267 if (rte->relkind == RELKIND_FOREIGN_TABLE)
270 set_foreign_size(root, rel, rte);
272 else if (rte->tablesample != NULL)
274 /* Sampled relation */
275 set_tablesample_rel_size(root, rel, rte);
280 set_plain_rel_size(root, rel, rte);
286 * Subqueries don't support making a choice between
287 * parameterized and unparameterized paths, so just go ahead
288 * and build their paths immediately.
290 set_subquery_pathlist(root, rel, rti, rte);
293 set_function_size_estimates(root, rel);
296 set_values_size_estimates(root, rel);
301 * CTEs don't support making a choice between parameterized
302 * and unparameterized paths, so just go ahead and build their
305 if (rte->self_reference)
306 set_worktable_pathlist(root, rel, rte);
308 set_cte_pathlist(root, rel, rte);
311 elog(ERROR, "unexpected rtekind: %d", (int) rel->rtekind);
319 * Build access paths for a base relation
322 set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
323 Index rti, RangeTblEntry *rte)
325 if (IS_DUMMY_REL(rel))
327 /* We already proved the relation empty, so nothing more to do */
331 /* It's an "append relation", process accordingly */
332 set_append_rel_pathlist(root, rel, rti, rte);
336 switch (rel->rtekind)
339 if (rte->relkind == RELKIND_FOREIGN_TABLE)
342 set_foreign_pathlist(root, rel, rte);
344 else if (rte->tablesample != NULL)
346 /* Build sample scan on relation */
347 set_tablesample_rel_pathlist(root, rel, rte);
352 set_plain_rel_pathlist(root, rel, rte);
356 /* Subquery --- fully handled during set_rel_size */
360 set_function_pathlist(root, rel, rte);
364 set_values_pathlist(root, rel, rte);
367 /* CTE reference --- fully handled during set_rel_size */
370 elog(ERROR, "unexpected rtekind: %d", (int) rel->rtekind);
376 * Allow a plugin to editorialize on the set of Paths for this base
377 * relation. It could add new paths (such as CustomPaths) by calling
378 * add_path(), or delete or modify paths added by the core code.
380 if (set_rel_pathlist_hook)
381 (*set_rel_pathlist_hook) (root, rel, rti, rte);
383 /* Now find the cheapest of the paths for this rel */
386 #ifdef OPTIMIZER_DEBUG
387 debug_print_rel(root, rel);
393 * Set size estimates for a plain relation (no subquery, no inheritance)
396 set_plain_rel_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
399 * Test any partial indexes of rel for applicability. We must do this
400 * first since partial unique indexes can affect size estimates.
402 check_partial_indexes(root, rel);
404 /* Mark rel with estimated output rows, width, etc */
405 set_baserel_size_estimates(root, rel);
409 * set_plain_rel_pathlist
410 * Build access paths for a plain relation (no subquery, no inheritance)
413 set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
415 Relids required_outer;
418 * We don't support pushing join clauses into the quals of a seqscan, but
419 * it could still have required parameterization due to LATERAL refs in
422 required_outer = rel->lateral_relids;
424 /* Consider sequential scan */
425 add_path(rel, create_seqscan_path(root, rel, required_outer));
427 /* Consider index scans */
428 create_index_paths(root, rel);
430 /* Consider TID scans */
431 create_tidscan_paths(root, rel);
435 * set_tablesample_rel_size
436 * Set size estimates for a sampled relation.
439 set_tablesample_rel_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
441 /* Mark rel with estimated output rows, width, etc */
442 set_baserel_size_estimates(root, rel);
446 * set_tablesample_rel_pathlist
447 * Build access paths for a sampled relation
449 * There is only one possible path - sampling scan
452 set_tablesample_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
454 Relids required_outer;
458 * We don't support pushing join clauses into the quals of a seqscan, but
459 * it could still have required parameterization due to LATERAL refs in
462 required_outer = rel->lateral_relids;
464 /* We only do sample scan if it was requested */
465 path = create_samplescan_path(root, rel, required_outer);
466 rel->pathlist = list_make1(path);
471 * Set size estimates for a foreign table RTE
474 set_foreign_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
476 /* Mark rel with estimated output rows, width, etc */
477 set_foreign_size_estimates(root, rel);
479 /* Let FDW adjust the size estimates, if it can */
480 rel->fdwroutine->GetForeignRelSize(root, rel, rte->relid);
484 * set_foreign_pathlist
485 * Build access paths for a foreign table RTE
488 set_foreign_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
490 /* Call the FDW's GetForeignPaths function to generate path(s) */
491 rel->fdwroutine->GetForeignPaths(root, rel, rte->relid);
495 * set_append_rel_size
496 * Set size estimates for an "append relation"
498 * The passed-in rel and RTE represent the entire append relation. The
499 * relation's contents are computed by appending together the output of
500 * the individual member relations. Note that in the inheritance case,
501 * the first member relation is actually the same table as is mentioned in
502 * the parent RTE ... but it has a different RTE and RelOptInfo. This is
503 * a good thing because their outputs are not the same size.
506 set_append_rel_size(PlannerInfo *root, RelOptInfo *rel,
507 Index rti, RangeTblEntry *rte)
509 int parentRTindex = rti;
512 double *parent_attrsizes;
517 * Initialize to compute size estimates for whole append relation.
519 * We handle width estimates by weighting the widths of different child
520 * rels proportionally to their number of rows. This is sensible because
521 * the use of width estimates is mainly to compute the total relation
522 * "footprint" if we have to sort or hash it. To do this, we sum the
523 * total equivalent size (in "double" arithmetic) and then divide by the
524 * total rowcount estimate. This is done separately for the total rel
525 * width and each attribute.
527 * Note: if you consider changing this logic, beware that child rels could
528 * have zero rows and/or width, if they were excluded by constraints.
532 nattrs = rel->max_attr - rel->min_attr + 1;
533 parent_attrsizes = (double *) palloc0(nattrs * sizeof(double));
535 foreach(l, root->append_rel_list)
537 AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l);
539 RangeTblEntry *childRTE;
540 RelOptInfo *childrel;
543 ListCell *parentvars;
546 /* append_rel_list contains all append rels; ignore others */
547 if (appinfo->parent_relid != parentRTindex)
550 childRTindex = appinfo->child_relid;
551 childRTE = root->simple_rte_array[childRTindex];
554 * The child rel's RelOptInfo was already created during
555 * add_base_rels_to_query.
557 childrel = find_base_rel(root, childRTindex);
558 Assert(childrel->reloptkind == RELOPT_OTHER_MEMBER_REL);
561 * We have to copy the parent's targetlist and quals to the child,
562 * with appropriate substitution of variables. However, only the
563 * baserestrictinfo quals are needed before we can check for
564 * constraint exclusion; so do that first and then check to see if we
565 * can disregard this child.
567 * As of 8.4, the child rel's targetlist might contain non-Var
568 * expressions, which means that substitution into the quals could
569 * produce opportunities for const-simplification, and perhaps even
570 * pseudoconstant quals. To deal with this, we strip the RestrictInfo
571 * nodes, do the substitution, do const-simplification, and then
572 * reconstitute the RestrictInfo layer.
574 childquals = get_all_actual_clauses(rel->baserestrictinfo);
575 childquals = (List *) adjust_appendrel_attrs(root,
578 childqual = eval_const_expressions(root, (Node *)
579 make_ands_explicit(childquals));
580 if (childqual && IsA(childqual, Const) &&
581 (((Const *) childqual)->constisnull ||
582 !DatumGetBool(((Const *) childqual)->constvalue)))
585 * Restriction reduces to constant FALSE or constant NULL after
586 * substitution, so this child need not be scanned.
588 set_dummy_rel_pathlist(childrel);
591 childquals = make_ands_implicit((Expr *) childqual);
592 childquals = make_restrictinfos_from_actual_clauses(root,
594 childrel->baserestrictinfo = childquals;
596 if (relation_excluded_by_constraints(root, childrel, childRTE))
599 * This child need not be scanned, so we can omit it from the
602 set_dummy_rel_pathlist(childrel);
607 * CE failed, so finish copying/modifying targetlist and join quals.
609 * Note: the resulting childrel->reltargetlist may contain arbitrary
610 * expressions, which otherwise would not occur in a reltargetlist.
611 * Code that might be looking at an appendrel child must cope with
612 * such. (Normally, a reltargetlist would only include Vars and
615 childrel->joininfo = (List *)
616 adjust_appendrel_attrs(root,
617 (Node *) rel->joininfo,
619 childrel->reltargetlist = (List *)
620 adjust_appendrel_attrs(root,
621 (Node *) rel->reltargetlist,
625 * We have to make child entries in the EquivalenceClass data
626 * structures as well. This is needed either if the parent
627 * participates in some eclass joins (because we will want to consider
628 * inner-indexscan joins on the individual children) or if the parent
629 * has useful pathkeys (because we should try to build MergeAppend
630 * paths that produce those sort orderings).
632 if (rel->has_eclass_joins || has_useful_pathkeys(root, rel))
633 add_child_rel_equivalences(root, appinfo, rel, childrel);
634 childrel->has_eclass_joins = rel->has_eclass_joins;
637 * Note: we could compute appropriate attr_needed data for the child's
638 * variables, by transforming the parent's attr_needed through the
639 * translated_vars mapping. However, currently there's no need
640 * because attr_needed is only examined for base relations not
641 * otherrels. So we just leave the child's attr_needed empty.
645 * Compute the child's size.
647 set_rel_size(root, childrel, childRTindex, childRTE);
650 * It is possible that constraint exclusion detected a contradiction
651 * within a child subquery, even though we didn't prove one above. If
652 * so, we can skip this child.
654 if (IS_DUMMY_REL(childrel))
658 * Accumulate size information from each live child.
660 if (childrel->rows > 0)
662 parent_rows += childrel->rows;
663 parent_size += childrel->width * childrel->rows;
666 * Accumulate per-column estimates too. We need not do anything
667 * for PlaceHolderVars in the parent list. If child expression
668 * isn't a Var, or we didn't record a width estimate for it, we
669 * have to fall back on a datatype-based estimate.
671 * By construction, child's reltargetlist is 1-to-1 with parent's.
673 forboth(parentvars, rel->reltargetlist,
674 childvars, childrel->reltargetlist)
676 Var *parentvar = (Var *) lfirst(parentvars);
677 Node *childvar = (Node *) lfirst(childvars);
679 if (IsA(parentvar, Var))
681 int pndx = parentvar->varattno - rel->min_attr;
682 int32 child_width = 0;
684 if (IsA(childvar, Var) &&
685 ((Var *) childvar)->varno == childrel->relid)
687 int cndx = ((Var *) childvar)->varattno - childrel->min_attr;
689 child_width = childrel->attr_widths[cndx];
691 if (child_width <= 0)
692 child_width = get_typavgwidth(exprType(childvar),
693 exprTypmod(childvar));
694 Assert(child_width > 0);
695 parent_attrsizes[pndx] += child_width * childrel->rows;
702 * Save the finished size estimates.
704 rel->rows = parent_rows;
709 rel->width = rint(parent_size / parent_rows);
710 for (i = 0; i < nattrs; i++)
711 rel->attr_widths[i] = rint(parent_attrsizes[i] / parent_rows);
714 rel->width = 0; /* attr_widths should be zero already */
717 * Set "raw tuples" count equal to "rows" for the appendrel; needed
718 * because some places assume rel->tuples is valid for any baserel.
720 rel->tuples = parent_rows;
722 pfree(parent_attrsizes);
726 * set_append_rel_pathlist
727 * Build access paths for an "append relation"
730 set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
731 Index rti, RangeTblEntry *rte)
733 int parentRTindex = rti;
734 List *live_childrels = NIL;
735 List *subpaths = NIL;
736 bool subpaths_valid = true;
737 List *all_child_pathkeys = NIL;
738 List *all_child_outers = NIL;
742 * Generate access paths for each member relation, and remember the
743 * cheapest path for each one. Also, identify all pathkeys (orderings)
744 * and parameterizations (required_outer sets) available for the member
747 foreach(l, root->append_rel_list)
749 AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l);
751 RangeTblEntry *childRTE;
752 RelOptInfo *childrel;
755 /* append_rel_list contains all append rels; ignore others */
756 if (appinfo->parent_relid != parentRTindex)
759 /* Re-locate the child RTE and RelOptInfo */
760 childRTindex = appinfo->child_relid;
761 childRTE = root->simple_rte_array[childRTindex];
762 childrel = root->simple_rel_array[childRTindex];
765 * Compute the child's access paths.
767 set_rel_pathlist(root, childrel, childRTindex, childRTE);
770 * If child is dummy, ignore it.
772 if (IS_DUMMY_REL(childrel))
776 * Child is live, so add it to the live_childrels list for use below.
778 live_childrels = lappend(live_childrels, childrel);
781 * If child has an unparameterized cheapest-total path, add that to
782 * the unparameterized Append path we are constructing for the parent.
783 * If not, there's no workable unparameterized path.
785 if (childrel->cheapest_total_path->param_info == NULL)
786 subpaths = accumulate_append_subpath(subpaths,
787 childrel->cheapest_total_path);
789 subpaths_valid = false;
792 * Collect lists of all the available path orderings and
793 * parameterizations for all the children. We use these as a
794 * heuristic to indicate which sort orderings and parameterizations we
795 * should build Append and MergeAppend paths for.
797 foreach(lcp, childrel->pathlist)
799 Path *childpath = (Path *) lfirst(lcp);
800 List *childkeys = childpath->pathkeys;
801 Relids childouter = PATH_REQ_OUTER(childpath);
803 /* Unsorted paths don't contribute to pathkey list */
804 if (childkeys != NIL)
809 /* Have we already seen this ordering? */
810 foreach(lpk, all_child_pathkeys)
812 List *existing_pathkeys = (List *) lfirst(lpk);
814 if (compare_pathkeys(existing_pathkeys,
815 childkeys) == PATHKEYS_EQUAL)
823 /* No, so add it to all_child_pathkeys */
824 all_child_pathkeys = lappend(all_child_pathkeys,
829 /* Unparameterized paths don't contribute to param-set list */
835 /* Have we already seen this param set? */
836 foreach(lco, all_child_outers)
838 Relids existing_outers = (Relids) lfirst(lco);
840 if (bms_equal(existing_outers, childouter))
848 /* No, so add it to all_child_outers */
849 all_child_outers = lappend(all_child_outers,
857 * If we found unparameterized paths for all children, build an unordered,
858 * unparameterized Append path for the rel. (Note: this is correct even
859 * if we have zero or one live subpath due to constraint exclusion.)
862 add_path(rel, (Path *) create_append_path(rel, subpaths, NULL));
865 * Also build unparameterized MergeAppend paths based on the collected
866 * list of child pathkeys.
869 generate_mergeappend_paths(root, rel, live_childrels,
873 * Build Append paths for each parameterization seen among the child rels.
874 * (This may look pretty expensive, but in most cases of practical
875 * interest, the child rels will expose mostly the same parameterizations,
876 * so that not that many cases actually get considered here.)
878 * The Append node itself cannot enforce quals, so all qual checking must
879 * be done in the child paths. This means that to have a parameterized
880 * Append path, we must have the exact same parameterization for each
881 * child path; otherwise some children might be failing to check the
882 * moved-down quals. To make them match up, we can try to increase the
883 * parameterization of lesser-parameterized paths.
885 foreach(l, all_child_outers)
887 Relids required_outer = (Relids) lfirst(l);
890 /* Select the child paths for an Append with this parameterization */
892 subpaths_valid = true;
893 foreach(lcr, live_childrels)
895 RelOptInfo *childrel = (RelOptInfo *) lfirst(lcr);
898 subpath = get_cheapest_parameterized_child_path(root,
903 /* failed to make a suitable path for this child */
904 subpaths_valid = false;
907 subpaths = accumulate_append_subpath(subpaths, subpath);
911 add_path(rel, (Path *)
912 create_append_path(rel, subpaths, required_outer));
917 * generate_mergeappend_paths
918 * Generate MergeAppend paths for an append relation
920 * Generate a path for each ordering (pathkey list) appearing in
921 * all_child_pathkeys.
923 * We consider both cheapest-startup and cheapest-total cases, ie, for each
924 * interesting ordering, collect all the cheapest startup subpaths and all the
925 * cheapest total paths, and build a MergeAppend path for each case.
927 * We don't currently generate any parameterized MergeAppend paths. While
928 * it would not take much more code here to do so, it's very unclear that it
929 * is worth the planning cycles to investigate such paths: there's little
930 * use for an ordered path on the inside of a nestloop. In fact, it's likely
931 * that the current coding of add_path would reject such paths out of hand,
932 * because add_path gives no credit for sort ordering of parameterized paths,
933 * and a parameterized MergeAppend is going to be more expensive than the
934 * corresponding parameterized Append path. If we ever try harder to support
935 * parameterized mergejoin plans, it might be worth adding support for
936 * parameterized MergeAppends to feed such joins. (See notes in
937 * optimizer/README for why that might not ever happen, though.)
940 generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel,
941 List *live_childrels,
942 List *all_child_pathkeys)
946 foreach(lcp, all_child_pathkeys)
948 List *pathkeys = (List *) lfirst(lcp);
949 List *startup_subpaths = NIL;
950 List *total_subpaths = NIL;
951 bool startup_neq_total = false;
954 /* Select the child paths for this ordering... */
955 foreach(lcr, live_childrels)
957 RelOptInfo *childrel = (RelOptInfo *) lfirst(lcr);
958 Path *cheapest_startup,
961 /* Locate the right paths, if they are available. */
963 get_cheapest_path_for_pathkeys(childrel->pathlist,
968 get_cheapest_path_for_pathkeys(childrel->pathlist,
974 * If we can't find any paths with the right order just use the
975 * cheapest-total path; we'll have to sort it later.
977 if (cheapest_startup == NULL || cheapest_total == NULL)
979 cheapest_startup = cheapest_total =
980 childrel->cheapest_total_path;
981 /* Assert we do have an unparameterized path for this child */
982 Assert(cheapest_total->param_info == NULL);
986 * Notice whether we actually have different paths for the
987 * "cheapest" and "total" cases; frequently there will be no point
988 * in two create_merge_append_path() calls.
990 if (cheapest_startup != cheapest_total)
991 startup_neq_total = true;
994 accumulate_append_subpath(startup_subpaths, cheapest_startup);
996 accumulate_append_subpath(total_subpaths, cheapest_total);
999 /* ... and build the MergeAppend paths */
1000 add_path(rel, (Path *) create_merge_append_path(root,
1005 if (startup_neq_total)
1006 add_path(rel, (Path *) create_merge_append_path(root,
1015 * get_cheapest_parameterized_child_path
1016 * Get cheapest path for this relation that has exactly the requested
1019 * Returns NULL if unable to create such a path.
1022 get_cheapest_parameterized_child_path(PlannerInfo *root, RelOptInfo *rel,
1023 Relids required_outer)
1029 * Look up the cheapest existing path with no more than the needed
1030 * parameterization. If it has exactly the needed parameterization, we're
1033 cheapest = get_cheapest_path_for_pathkeys(rel->pathlist,
1037 Assert(cheapest != NULL);
1038 if (bms_equal(PATH_REQ_OUTER(cheapest), required_outer))
1042 * Otherwise, we can "reparameterize" an existing path to match the given
1043 * parameterization, which effectively means pushing down additional
1044 * joinquals to be checked within the path's scan. However, some existing
1045 * paths might check the available joinquals already while others don't;
1046 * therefore, it's not clear which existing path will be cheapest after
1047 * reparameterization. We have to go through them all and find out.
1050 foreach(lc, rel->pathlist)
1052 Path *path = (Path *) lfirst(lc);
1054 /* Can't use it if it needs more than requested parameterization */
1055 if (!bms_is_subset(PATH_REQ_OUTER(path), required_outer))
1059 * Reparameterization can only increase the path's cost, so if it's
1060 * already more expensive than the current cheapest, forget it.
1062 if (cheapest != NULL &&
1063 compare_path_costs(cheapest, path, TOTAL_COST) <= 0)
1066 /* Reparameterize if needed, then recheck cost */
1067 if (!bms_equal(PATH_REQ_OUTER(path), required_outer))
1069 path = reparameterize_path(root, path, required_outer, 1.0);
1071 continue; /* failed to reparameterize this one */
1072 Assert(bms_equal(PATH_REQ_OUTER(path), required_outer));
1074 if (cheapest != NULL &&
1075 compare_path_costs(cheapest, path, TOTAL_COST) <= 0)
1079 /* We have a new best path */
1083 /* Return the best path, or NULL if we found no suitable candidate */
1088 * accumulate_append_subpath
1089 * Add a subpath to the list being built for an Append or MergeAppend
1091 * It's possible that the child is itself an Append or MergeAppend path, in
1092 * which case we can "cut out the middleman" and just add its child paths to
1093 * our own list. (We don't try to do this earlier because we need to apply
1094 * both levels of transformation to the quals.)
1096 * Note that if we omit a child MergeAppend in this way, we are effectively
1097 * omitting a sort step, which seems fine: if the parent is to be an Append,
1098 * its result would be unsorted anyway, while if the parent is to be a
1099 * MergeAppend, there's no point in a separate sort on a child.
1102 accumulate_append_subpath(List *subpaths, Path *path)
1104 if (IsA(path, AppendPath))
1106 AppendPath *apath = (AppendPath *) path;
1108 /* list_copy is important here to avoid sharing list substructure */
1109 return list_concat(subpaths, list_copy(apath->subpaths));
1111 else if (IsA(path, MergeAppendPath))
1113 MergeAppendPath *mpath = (MergeAppendPath *) path;
1115 /* list_copy is important here to avoid sharing list substructure */
1116 return list_concat(subpaths, list_copy(mpath->subpaths));
1119 return lappend(subpaths, path);
1123 * set_dummy_rel_pathlist
1124 * Build a dummy path for a relation that's been excluded by constraints
1126 * Rather than inventing a special "dummy" path type, we represent this as an
1127 * AppendPath with no members (see also IS_DUMMY_PATH/IS_DUMMY_REL macros).
1130 set_dummy_rel_pathlist(RelOptInfo *rel)
1132 /* Set dummy size estimates --- we leave attr_widths[] as zeroes */
1136 /* Discard any pre-existing paths; no further need for them */
1137 rel->pathlist = NIL;
1139 add_path(rel, (Path *) create_append_path(rel, NIL, NULL));
1142 * We set the cheapest path immediately, to ensure that IS_DUMMY_REL()
1143 * will recognize the relation as dummy if anyone asks. This is redundant
1144 * when we're called from set_rel_size(), but not when called from
1145 * elsewhere, and doing it twice is harmless anyway.
1150 /* quick-and-dirty test to see if any joining is needed */
1152 has_multiple_baserels(PlannerInfo *root)
1154 int num_base_rels = 0;
1157 for (rti = 1; rti < root->simple_rel_array_size; rti++)
1159 RelOptInfo *brel = root->simple_rel_array[rti];
1164 /* ignore RTEs that are "other rels" */
1165 if (brel->reloptkind == RELOPT_BASEREL)
1166 if (++num_base_rels > 1)
1173 * set_subquery_pathlist
1174 * Build the (single) access path for a subquery RTE
1176 * We don't currently support generating parameterized paths for subqueries
1177 * by pushing join clauses down into them; it seems too expensive to re-plan
1178 * the subquery multiple times to consider different alternatives. So the
1179 * subquery will have exactly one path. (The path will be parameterized
1180 * if the subquery contains LATERAL references, otherwise not.) Since there's
1181 * no freedom of action here, there's no need for a separate set_subquery_size
1182 * phase: we just make the path right away.
1185 set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
1186 Index rti, RangeTblEntry *rte)
1188 Query *parse = root->parse;
1189 Query *subquery = rte->subquery;
1190 Relids required_outer;
1191 pushdown_safety_info safetyInfo;
1192 double tuple_fraction;
1193 PlannerInfo *subroot;
1197 * Must copy the Query so that planning doesn't mess up the RTE contents
1198 * (really really need to fix the planner to not scribble on its input,
1199 * someday ... but see remove_unused_subquery_outputs to start with).
1201 subquery = copyObject(subquery);
1204 * If it's a LATERAL subquery, it might contain some Vars of the current
1205 * query level, requiring it to be treated as parameterized, even though
1206 * we don't support pushing down join quals into subqueries.
1208 required_outer = rel->lateral_relids;
1211 * Zero out result area for subquery_is_pushdown_safe, so that it can set
1212 * flags as needed while recursing. In particular, we need a workspace
1213 * for keeping track of unsafe-to-reference columns. unsafeColumns[i]
1214 * will be set TRUE if we find that output column i of the subquery is
1215 * unsafe to use in a pushed-down qual.
1217 memset(&safetyInfo, 0, sizeof(safetyInfo));
1218 safetyInfo.unsafeColumns = (bool *)
1219 palloc0((list_length(subquery->targetList) + 1) * sizeof(bool));
1222 * If the subquery has the "security_barrier" flag, it means the subquery
1223 * originated from a view that must enforce row level security. Then we
1224 * must not push down quals that contain leaky functions. (Ideally this
1225 * would be checked inside subquery_is_pushdown_safe, but since we don't
1226 * currently pass the RTE to that function, we must do it here.)
1228 safetyInfo.unsafeLeaky = rte->security_barrier;
1231 * If there are any restriction clauses that have been attached to the
1232 * subquery relation, consider pushing them down to become WHERE or HAVING
1233 * quals of the subquery itself. This transformation is useful because it
1234 * may allow us to generate a better plan for the subquery than evaluating
1235 * all the subquery output rows and then filtering them.
1237 * There are several cases where we cannot push down clauses. Restrictions
1238 * involving the subquery are checked by subquery_is_pushdown_safe().
1239 * Restrictions on individual clauses are checked by
1240 * qual_is_pushdown_safe(). Also, we don't want to push down
1241 * pseudoconstant clauses; better to have the gating node above the
1244 * Non-pushed-down clauses will get evaluated as qpquals of the
1245 * SubqueryScan node.
1247 * XXX Are there any cases where we want to make a policy decision not to
1248 * push down a pushable qual, because it'd result in a worse plan?
1250 if (rel->baserestrictinfo != NIL &&
1251 subquery_is_pushdown_safe(subquery, subquery, &safetyInfo))
1253 /* OK to consider pushing down individual quals */
1254 List *upperrestrictlist = NIL;
1257 foreach(l, rel->baserestrictinfo)
1259 RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
1260 Node *clause = (Node *) rinfo->clause;
1262 if (!rinfo->pseudoconstant &&
1263 qual_is_pushdown_safe(subquery, rti, clause, &safetyInfo))
1266 subquery_push_qual(subquery, rte, rti, clause);
1270 /* Keep it in the upper query */
1271 upperrestrictlist = lappend(upperrestrictlist, rinfo);
1274 rel->baserestrictinfo = upperrestrictlist;
1277 pfree(safetyInfo.unsafeColumns);
1280 * The upper query might not use all the subquery's output columns; if
1281 * not, we can simplify.
1283 remove_unused_subquery_outputs(subquery, rel);
1286 * We can safely pass the outer tuple_fraction down to the subquery if the
1287 * outer level has no joining, aggregation, or sorting to do. Otherwise
1288 * we'd better tell the subquery to plan for full retrieval. (XXX This
1289 * could probably be made more intelligent ...)
1291 if (parse->hasAggs ||
1292 parse->groupClause ||
1293 parse->havingQual ||
1294 parse->distinctClause ||
1295 parse->sortClause ||
1296 has_multiple_baserels(root))
1297 tuple_fraction = 0.0; /* default case */
1299 tuple_fraction = root->tuple_fraction;
1301 /* plan_params should not be in use in current query level */
1302 Assert(root->plan_params == NIL);
1304 /* Generate the plan for the subquery */
1305 rel->subplan = subquery_planner(root->glob, subquery,
1307 false, tuple_fraction,
1309 rel->subroot = subroot;
1311 /* Isolate the params needed by this specific subplan */
1312 rel->subplan_params = root->plan_params;
1313 root->plan_params = NIL;
1316 * It's possible that constraint exclusion proved the subquery empty. If
1317 * so, it's convenient to turn it back into a dummy path so that we will
1318 * recognize appropriate optimizations at this query level.
1320 if (is_dummy_plan(rel->subplan))
1322 set_dummy_rel_pathlist(rel);
1326 /* Mark rel with estimated output rows, width, etc */
1327 set_subquery_size_estimates(root, rel);
1329 /* Convert subquery pathkeys to outer representation */
1330 pathkeys = convert_subquery_pathkeys(root, rel, subroot->query_pathkeys);
1332 /* Generate appropriate path */
1333 add_path(rel, create_subqueryscan_path(root, rel, pathkeys, required_outer));
1337 * set_function_pathlist
1338 * Build the (single) access path for a function RTE
1341 set_function_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
1343 Relids required_outer;
1344 List *pathkeys = NIL;
1347 * We don't support pushing join clauses into the quals of a function
1348 * scan, but it could still have required parameterization due to LATERAL
1349 * refs in the function expression.
1351 required_outer = rel->lateral_relids;
1354 * The result is considered unordered unless ORDINALITY was used, in which
1355 * case it is ordered by the ordinal column (the last one). See if we
1356 * care, by checking for uses of that Var in equivalence classes.
1358 if (rte->funcordinality)
1360 AttrNumber ordattno = rel->max_attr;
1365 * Is there a Var for it in reltargetlist? If not, the query did not
1366 * reference the ordinality column, or at least not in any way that
1367 * would be interesting for sorting.
1369 foreach(lc, rel->reltargetlist)
1371 Var *node = (Var *) lfirst(lc);
1373 /* checking varno/varlevelsup is just paranoia */
1374 if (IsA(node, Var) &&
1375 node->varattno == ordattno &&
1376 node->varno == rel->relid &&
1377 node->varlevelsup == 0)
1385 * Try to build pathkeys for this Var with int8 sorting. We tell
1386 * build_expression_pathkey not to build any new equivalence class; if
1387 * the Var isn't already mentioned in some EC, it means that nothing
1388 * cares about the ordering.
1391 pathkeys = build_expression_pathkey(root,
1393 NULL, /* below outer joins */
1399 /* Generate appropriate path */
1400 add_path(rel, create_functionscan_path(root, rel,
1401 pathkeys, required_outer));
1405 * set_values_pathlist
1406 * Build the (single) access path for a VALUES RTE
1409 set_values_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
1411 Relids required_outer;
1414 * We don't support pushing join clauses into the quals of a values scan,
1415 * but it could still have required parameterization due to LATERAL refs
1416 * in the values expressions.
1418 required_outer = rel->lateral_relids;
1420 /* Generate appropriate path */
1421 add_path(rel, create_valuesscan_path(root, rel, required_outer));
1426 * Build the (single) access path for a non-self-reference CTE RTE
1428 * There's no need for a separate set_cte_size phase, since we don't
1429 * support join-qual-parameterized paths for CTEs.
1432 set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
1435 PlannerInfo *cteroot;
1440 Relids required_outer;
1443 * Find the referenced CTE, and locate the plan previously made for it.
1445 levelsup = rte->ctelevelsup;
1447 while (levelsup-- > 0)
1449 cteroot = cteroot->parent_root;
1450 if (!cteroot) /* shouldn't happen */
1451 elog(ERROR, "bad levelsup for CTE \"%s\"", rte->ctename);
1455 * Note: cte_plan_ids can be shorter than cteList, if we are still working
1456 * on planning the CTEs (ie, this is a side-reference from another CTE).
1457 * So we mustn't use forboth here.
1460 foreach(lc, cteroot->parse->cteList)
1462 CommonTableExpr *cte = (CommonTableExpr *) lfirst(lc);
1464 if (strcmp(cte->ctename, rte->ctename) == 0)
1468 if (lc == NULL) /* shouldn't happen */
1469 elog(ERROR, "could not find CTE \"%s\"", rte->ctename);
1470 if (ndx >= list_length(cteroot->cte_plan_ids))
1471 elog(ERROR, "could not find plan for CTE \"%s\"", rte->ctename);
1472 plan_id = list_nth_int(cteroot->cte_plan_ids, ndx);
1473 Assert(plan_id > 0);
1474 cteplan = (Plan *) list_nth(root->glob->subplans, plan_id - 1);
1476 /* Mark rel with estimated output rows, width, etc */
1477 set_cte_size_estimates(root, rel, cteplan);
1480 * We don't support pushing join clauses into the quals of a CTE scan, but
1481 * it could still have required parameterization due to LATERAL refs in
1484 required_outer = rel->lateral_relids;
1486 /* Generate appropriate path */
1487 add_path(rel, create_ctescan_path(root, rel, required_outer));
1491 * set_worktable_pathlist
1492 * Build the (single) access path for a self-reference CTE RTE
1494 * There's no need for a separate set_worktable_size phase, since we don't
1495 * support join-qual-parameterized paths for CTEs.
1498 set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
1501 PlannerInfo *cteroot;
1503 Relids required_outer;
1506 * We need to find the non-recursive term's plan, which is in the plan
1507 * level that's processing the recursive UNION, which is one level *below*
1508 * where the CTE comes from.
1510 levelsup = rte->ctelevelsup;
1511 if (levelsup == 0) /* shouldn't happen */
1512 elog(ERROR, "bad levelsup for CTE \"%s\"", rte->ctename);
1515 while (levelsup-- > 0)
1517 cteroot = cteroot->parent_root;
1518 if (!cteroot) /* shouldn't happen */
1519 elog(ERROR, "bad levelsup for CTE \"%s\"", rte->ctename);
1521 cteplan = cteroot->non_recursive_plan;
1522 if (!cteplan) /* shouldn't happen */
1523 elog(ERROR, "could not find plan for CTE \"%s\"", rte->ctename);
1525 /* Mark rel with estimated output rows, width, etc */
1526 set_cte_size_estimates(root, rel, cteplan);
1529 * We don't support pushing join clauses into the quals of a worktable
1530 * scan, but it could still have required parameterization due to LATERAL
1531 * refs in its tlist. (I'm not sure this is actually possible given the
1532 * restrictions on recursive references, but it's easy enough to support.)
1534 required_outer = rel->lateral_relids;
1536 /* Generate appropriate path */
1537 add_path(rel, create_worktablescan_path(root, rel, required_outer));
1541 * make_rel_from_joinlist
1542 * Build access paths using a "joinlist" to guide the join path search.
1544 * See comments for deconstruct_jointree() for definition of the joinlist
1548 make_rel_from_joinlist(PlannerInfo *root, List *joinlist)
1555 * Count the number of child joinlist nodes. This is the depth of the
1556 * dynamic-programming algorithm we must employ to consider all ways of
1557 * joining the child nodes.
1559 levels_needed = list_length(joinlist);
1561 if (levels_needed <= 0)
1562 return NULL; /* nothing to do? */
1565 * Construct a list of rels corresponding to the child joinlist nodes.
1566 * This may contain both base rels and rels constructed according to
1570 foreach(jl, joinlist)
1572 Node *jlnode = (Node *) lfirst(jl);
1573 RelOptInfo *thisrel;
1575 if (IsA(jlnode, RangeTblRef))
1577 int varno = ((RangeTblRef *) jlnode)->rtindex;
1579 thisrel = find_base_rel(root, varno);
1581 else if (IsA(jlnode, List))
1583 /* Recurse to handle subproblem */
1584 thisrel = make_rel_from_joinlist(root, (List *) jlnode);
1588 elog(ERROR, "unrecognized joinlist node type: %d",
1589 (int) nodeTag(jlnode));
1590 thisrel = NULL; /* keep compiler quiet */
1593 initial_rels = lappend(initial_rels, thisrel);
1596 if (levels_needed == 1)
1599 * Single joinlist node, so we're done.
1601 return (RelOptInfo *) linitial(initial_rels);
1606 * Consider the different orders in which we could join the rels,
1607 * using a plugin, GEQO, or the regular join search code.
1609 * We put the initial_rels list into a PlannerInfo field because
1610 * has_legal_joinclause() needs to look at it (ugly :-().
1612 root->initial_rels = initial_rels;
1614 if (join_search_hook)
1615 return (*join_search_hook) (root, levels_needed, initial_rels);
1616 else if (enable_geqo && levels_needed >= geqo_threshold)
1617 return geqo(root, levels_needed, initial_rels);
1619 return standard_join_search(root, levels_needed, initial_rels);
1624 * standard_join_search
1625 * Find possible joinpaths for a query by successively finding ways
1626 * to join component relations into join relations.
1628 * 'levels_needed' is the number of iterations needed, ie, the number of
1629 * independent jointree items in the query. This is > 1.
1631 * 'initial_rels' is a list of RelOptInfo nodes for each independent
1632 * jointree item. These are the components to be joined together.
1633 * Note that levels_needed == list_length(initial_rels).
1635 * Returns the final level of join relations, i.e., the relation that is
1636 * the result of joining all the original relations together.
1637 * At least one implementation path must be provided for this relation and
1638 * all required sub-relations.
1640 * To support loadable plugins that modify planner behavior by changing the
1641 * join searching algorithm, we provide a hook variable that lets a plugin
1642 * replace or supplement this function. Any such hook must return the same
1643 * final join relation as the standard code would, but it might have a
1644 * different set of implementation paths attached, and only the sub-joinrels
1645 * needed for these paths need have been instantiated.
1647 * Note to plugin authors: the functions invoked during standard_join_search()
1648 * modify root->join_rel_list and root->join_rel_hash. If you want to do more
1649 * than one join-order search, you'll probably need to save and restore the
1650 * original states of those data structures. See geqo_eval() for an example.
1653 standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels)
1659 * This function cannot be invoked recursively within any one planning
1660 * problem, so join_rel_level[] can't be in use already.
1662 Assert(root->join_rel_level == NULL);
1665 * We employ a simple "dynamic programming" algorithm: we first find all
1666 * ways to build joins of two jointree items, then all ways to build joins
1667 * of three items (from two-item joins and single items), then four-item
1668 * joins, and so on until we have considered all ways to join all the
1669 * items into one rel.
1671 * root->join_rel_level[j] is a list of all the j-item rels. Initially we
1672 * set root->join_rel_level[1] to represent all the single-jointree-item
1675 root->join_rel_level = (List **) palloc0((levels_needed + 1) * sizeof(List *));
1677 root->join_rel_level[1] = initial_rels;
1679 for (lev = 2; lev <= levels_needed; lev++)
1684 * Determine all possible pairs of relations to be joined at this
1685 * level, and build paths for making each one from every available
1686 * pair of lower-level relations.
1688 join_search_one_level(root, lev);
1691 * Do cleanup work on each just-processed rel.
1693 foreach(lc, root->join_rel_level[lev])
1695 rel = (RelOptInfo *) lfirst(lc);
1697 /* Find and save the cheapest paths for this rel */
1700 #ifdef OPTIMIZER_DEBUG
1701 debug_print_rel(root, rel);
1707 * We should have a single rel at the final level.
1709 if (root->join_rel_level[levels_needed] == NIL)
1710 elog(ERROR, "failed to build any %d-way joins", levels_needed);
1711 Assert(list_length(root->join_rel_level[levels_needed]) == 1);
1713 rel = (RelOptInfo *) linitial(root->join_rel_level[levels_needed]);
1715 root->join_rel_level = NULL;
1720 /*****************************************************************************
1721 * PUSHING QUALS DOWN INTO SUBQUERIES
1722 *****************************************************************************/
1725 * subquery_is_pushdown_safe - is a subquery safe for pushing down quals?
1727 * subquery is the particular component query being checked. topquery
1728 * is the top component of a set-operations tree (the same Query if no
1729 * set-op is involved).
1731 * Conditions checked here:
1733 * 1. If the subquery has a LIMIT clause, we must not push down any quals,
1734 * since that could change the set of rows returned.
1736 * 2. If the subquery contains EXCEPT or EXCEPT ALL set ops we cannot push
1737 * quals into it, because that could change the results.
1739 * 3. If the subquery uses DISTINCT, we cannot push volatile quals into it.
1740 * This is because upper-level quals should semantically be evaluated only
1741 * once per distinct row, not once per original row, and if the qual is
1742 * volatile then extra evaluations could change the results. (This issue
1743 * does not apply to other forms of aggregation such as GROUP BY, because
1744 * when those are present we push into HAVING not WHERE, so that the quals
1745 * are still applied after aggregation.)
1747 * 4. If the subquery contains window functions, we cannot push volatile quals
1748 * into it. The issue here is a bit different from DISTINCT: a volatile qual
1749 * might succeed for some rows of a window partition and fail for others,
1750 * thereby changing the partition contents and thus the window functions'
1751 * results for rows that remain.
1753 * In addition, we make several checks on the subquery's output columns to see
1754 * if it is safe to reference them in pushed-down quals. If output column k
1755 * is found to be unsafe to reference, we set safetyInfo->unsafeColumns[k]
1756 * to TRUE, but we don't reject the subquery overall since column k might not
1757 * be referenced by some/all quals. The unsafeColumns[] array will be
1758 * consulted later by qual_is_pushdown_safe(). It's better to do it this way
1759 * than to make the checks directly in qual_is_pushdown_safe(), because when
1760 * the subquery involves set operations we have to check the output
1761 * expressions in each arm of the set op.
1763 * Note: pushing quals into a DISTINCT subquery is theoretically dubious:
1764 * we're effectively assuming that the quals cannot distinguish values that
1765 * the DISTINCT's equality operator sees as equal, yet there are many
1766 * counterexamples to that assumption. However use of such a qual with a
1767 * DISTINCT subquery would be unsafe anyway, since there's no guarantee which
1768 * "equal" value will be chosen as the output value by the DISTINCT operation.
1769 * So we don't worry too much about that. Another objection is that if the
1770 * qual is expensive to evaluate, running it for each original row might cost
1771 * more than we save by eliminating rows before the DISTINCT step. But it
1772 * would be very hard to estimate that at this stage, and in practice pushdown
1773 * seldom seems to make things worse, so we ignore that problem too.
1775 * Note: likewise, pushing quals into a subquery with window functions is a
1776 * bit dubious: the quals might remove some rows of a window partition while
1777 * leaving others, causing changes in the window functions' results for the
1778 * surviving rows. We insist that such a qual reference only partitioning
1779 * columns, but again that only protects us if the qual does not distinguish
1780 * values that the partitioning equality operator sees as equal. The risks
1781 * here are perhaps larger than for DISTINCT, since no de-duplication of rows
1782 * occurs and thus there is no theoretical problem with such a qual. But
1783 * we'll do this anyway because the potential performance benefits are very
1784 * large, and we've seen no field complaints about the longstanding comparable
1785 * behavior with DISTINCT.
1788 subquery_is_pushdown_safe(Query *subquery, Query *topquery,
1789 pushdown_safety_info *safetyInfo)
1791 SetOperationStmt *topop;
1794 if (subquery->limitOffset != NULL || subquery->limitCount != NULL)
1797 /* Check points 3 and 4 */
1798 if (subquery->distinctClause || subquery->hasWindowFuncs)
1799 safetyInfo->unsafeVolatile = true;
1802 * If we're at a leaf query, check for unsafe expressions in its target
1803 * list, and mark any unsafe ones in unsafeColumns[]. (Non-leaf nodes in
1804 * setop trees have only simple Vars in their tlists, so no need to check
1807 if (subquery->setOperations == NULL)
1808 check_output_expressions(subquery, safetyInfo);
1810 /* Are we at top level, or looking at a setop component? */
1811 if (subquery == topquery)
1813 /* Top level, so check any component queries */
1814 if (subquery->setOperations != NULL)
1815 if (!recurse_pushdown_safe(subquery->setOperations, topquery,
1821 /* Setop component must not have more components (too weird) */
1822 if (subquery->setOperations != NULL)
1824 /* Check whether setop component output types match top level */
1825 topop = (SetOperationStmt *) topquery->setOperations;
1826 Assert(topop && IsA(topop, SetOperationStmt));
1827 compare_tlist_datatypes(subquery->targetList,
1835 * Helper routine to recurse through setOperations tree
1838 recurse_pushdown_safe(Node *setOp, Query *topquery,
1839 pushdown_safety_info *safetyInfo)
1841 if (IsA(setOp, RangeTblRef))
1843 RangeTblRef *rtr = (RangeTblRef *) setOp;
1844 RangeTblEntry *rte = rt_fetch(rtr->rtindex, topquery->rtable);
1845 Query *subquery = rte->subquery;
1847 Assert(subquery != NULL);
1848 return subquery_is_pushdown_safe(subquery, topquery, safetyInfo);
1850 else if (IsA(setOp, SetOperationStmt))
1852 SetOperationStmt *op = (SetOperationStmt *) setOp;
1854 /* EXCEPT is no good (point 2 for subquery_is_pushdown_safe) */
1855 if (op->op == SETOP_EXCEPT)
1858 if (!recurse_pushdown_safe(op->larg, topquery, safetyInfo))
1860 if (!recurse_pushdown_safe(op->rarg, topquery, safetyInfo))
1865 elog(ERROR, "unrecognized node type: %d",
1866 (int) nodeTag(setOp));
1872 * check_output_expressions - check subquery's output expressions for safety
1874 * There are several cases in which it's unsafe to push down an upper-level
1875 * qual if it references a particular output column of a subquery. We check
1876 * each output column of the subquery and set unsafeColumns[k] to TRUE if
1877 * that column is unsafe for a pushed-down qual to reference. The conditions
1880 * 1. We must not push down any quals that refer to subselect outputs that
1881 * return sets, else we'd introduce functions-returning-sets into the
1882 * subquery's WHERE/HAVING quals.
1884 * 2. We must not push down any quals that refer to subselect outputs that
1885 * contain volatile functions, for fear of introducing strange results due
1886 * to multiple evaluation of a volatile function.
1888 * 3. If the subquery uses DISTINCT ON, we must not push down any quals that
1889 * refer to non-DISTINCT output columns, because that could change the set
1890 * of rows returned. (This condition is vacuous for DISTINCT, because then
1891 * there are no non-DISTINCT output columns, so we needn't check. Note that
1892 * subquery_is_pushdown_safe already reported that we can't use volatile
1893 * quals if there's DISTINCT or DISTINCT ON.)
1895 * 4. If the subquery has any window functions, we must not push down quals
1896 * that reference any output columns that are not listed in all the subquery's
1897 * window PARTITION BY clauses. We can push down quals that use only
1898 * partitioning columns because they should succeed or fail identically for
1899 * every row of any one window partition, and totally excluding some
1900 * partitions will not change a window function's results for remaining
1901 * partitions. (Again, this also requires nonvolatile quals, but
1902 * subquery_is_pushdown_safe handles that.)
1905 check_output_expressions(Query *subquery, pushdown_safety_info *safetyInfo)
1909 foreach(lc, subquery->targetList)
1911 TargetEntry *tle = (TargetEntry *) lfirst(lc);
1914 continue; /* ignore resjunk columns */
1916 /* We need not check further if output col is already known unsafe */
1917 if (safetyInfo->unsafeColumns[tle->resno])
1920 /* Functions returning sets are unsafe (point 1) */
1921 if (expression_returns_set((Node *) tle->expr))
1923 safetyInfo->unsafeColumns[tle->resno] = true;
1927 /* Volatile functions are unsafe (point 2) */
1928 if (contain_volatile_functions((Node *) tle->expr))
1930 safetyInfo->unsafeColumns[tle->resno] = true;
1934 /* If subquery uses DISTINCT ON, check point 3 */
1935 if (subquery->hasDistinctOn &&
1936 !targetIsInSortList(tle, InvalidOid, subquery->distinctClause))
1938 /* non-DISTINCT column, so mark it unsafe */
1939 safetyInfo->unsafeColumns[tle->resno] = true;
1943 /* If subquery uses window functions, check point 4 */
1944 if (subquery->hasWindowFuncs &&
1945 !targetIsInAllPartitionLists(tle, subquery))
1947 /* not present in all PARTITION BY clauses, so mark it unsafe */
1948 safetyInfo->unsafeColumns[tle->resno] = true;
1955 * For subqueries using UNION/UNION ALL/INTERSECT/INTERSECT ALL, we can
1956 * push quals into each component query, but the quals can only reference
1957 * subquery columns that suffer no type coercions in the set operation.
1958 * Otherwise there are possible semantic gotchas. So, we check the
1959 * component queries to see if any of them have output types different from
1960 * the top-level setop outputs. unsafeColumns[k] is set true if column k
1961 * has different type in any component.
1963 * We don't have to care about typmods here: the only allowed difference
1964 * between set-op input and output typmods is input is a specific typmod
1965 * and output is -1, and that does not require a coercion.
1967 * tlist is a subquery tlist.
1968 * colTypes is an OID list of the top-level setop's output column types.
1969 * safetyInfo->unsafeColumns[] is the result array.
1972 compare_tlist_datatypes(List *tlist, List *colTypes,
1973 pushdown_safety_info *safetyInfo)
1976 ListCell *colType = list_head(colTypes);
1980 TargetEntry *tle = (TargetEntry *) lfirst(l);
1983 continue; /* ignore resjunk columns */
1984 if (colType == NULL)
1985 elog(ERROR, "wrong number of tlist entries");
1986 if (exprType((Node *) tle->expr) != lfirst_oid(colType))
1987 safetyInfo->unsafeColumns[tle->resno] = true;
1988 colType = lnext(colType);
1990 if (colType != NULL)
1991 elog(ERROR, "wrong number of tlist entries");
1995 * targetIsInAllPartitionLists
1996 * True if the TargetEntry is listed in the PARTITION BY clause
1997 * of every window defined in the query.
1999 * It would be safe to ignore windows not actually used by any window
2000 * function, but it's not easy to get that info at this stage; and it's
2001 * unlikely to be useful to spend any extra cycles getting it, since
2002 * unreferenced window definitions are probably infrequent in practice.
2005 targetIsInAllPartitionLists(TargetEntry *tle, Query *query)
2009 foreach(lc, query->windowClause)
2011 WindowClause *wc = (WindowClause *) lfirst(lc);
2013 if (!targetIsInSortList(tle, InvalidOid, wc->partitionClause))
2020 * qual_is_pushdown_safe - is a particular qual safe to push down?
2022 * qual is a restriction clause applying to the given subquery (whose RTE
2023 * has index rti in the parent query).
2025 * Conditions checked here:
2027 * 1. The qual must not contain any subselects (mainly because I'm not sure
2028 * it will work correctly: sublinks will already have been transformed into
2029 * subplans in the qual, but not in the subquery).
2031 * 2. If unsafeVolatile is set, the qual must not contain any volatile
2034 * 3. If unsafeLeaky is set, the qual must not contain any leaky functions
2035 * that are passed Var nodes, and therefore might reveal values from the
2036 * subquery as side effects.
2038 * 4. The qual must not refer to the whole-row output of the subquery
2039 * (since there is no easy way to name that within the subquery itself).
2041 * 5. The qual must not refer to any subquery output columns that were
2042 * found to be unsafe to reference by subquery_is_pushdown_safe().
2045 qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
2046 pushdown_safety_info *safetyInfo)
2052 /* Refuse subselects (point 1) */
2053 if (contain_subplans(qual))
2056 /* Refuse volatile quals if we found they'd be unsafe (point 2) */
2057 if (safetyInfo->unsafeVolatile &&
2058 contain_volatile_functions(qual))
2061 /* Refuse leaky quals if told to (point 3) */
2062 if (safetyInfo->unsafeLeaky &&
2063 contain_leaked_vars(qual))
2067 * It would be unsafe to push down window function calls, but at least for
2068 * the moment we could never see any in a qual anyhow. (The same applies
2069 * to aggregates, which we check for in pull_var_clause below.)
2071 Assert(!contain_window_function(qual));
2074 * Examine all Vars used in clause; since it's a restriction clause, all
2075 * such Vars must refer to subselect output columns.
2077 vars = pull_var_clause(qual,
2078 PVC_REJECT_AGGREGATES,
2079 PVC_INCLUDE_PLACEHOLDERS);
2082 Var *var = (Var *) lfirst(vl);
2085 * XXX Punt if we find any PlaceHolderVars in the restriction clause.
2086 * It's not clear whether a PHV could safely be pushed down, and even
2087 * less clear whether such a situation could arise in any cases of
2088 * practical interest anyway. So for the moment, just refuse to push
2097 Assert(var->varno == rti);
2098 Assert(var->varattno >= 0);
2101 if (var->varattno == 0)
2108 if (safetyInfo->unsafeColumns[var->varattno])
2121 * subquery_push_qual - push down a qual that we have determined is safe
2124 subquery_push_qual(Query *subquery, RangeTblEntry *rte, Index rti, Node *qual)
2126 if (subquery->setOperations != NULL)
2128 /* Recurse to push it separately to each component query */
2129 recurse_push_qual(subquery->setOperations, subquery,
2135 * We need to replace Vars in the qual (which must refer to outputs of
2136 * the subquery) with copies of the subquery's targetlist expressions.
2137 * Note that at this point, any uplevel Vars in the qual should have
2138 * been replaced with Params, so they need no work.
2140 * This step also ensures that when we are pushing into a setop tree,
2141 * each component query gets its own copy of the qual.
2143 qual = ReplaceVarsFromTargetList(qual, rti, 0, rte,
2144 subquery->targetList,
2145 REPLACEVARS_REPORT_ERROR, 0,
2146 &subquery->hasSubLinks);
2149 * Now attach the qual to the proper place: normally WHERE, but if the
2150 * subquery uses grouping or aggregation, put it in HAVING (since the
2151 * qual really refers to the group-result rows).
2153 if (subquery->hasAggs || subquery->groupClause || subquery->havingQual)
2154 subquery->havingQual = make_and_qual(subquery->havingQual, qual);
2156 subquery->jointree->quals =
2157 make_and_qual(subquery->jointree->quals, qual);
2160 * We need not change the subquery's hasAggs or hasSublinks flags,
2161 * since we can't be pushing down any aggregates that weren't there
2162 * before, and we don't push down subselects at all.
2168 * Helper routine to recurse through setOperations tree
2171 recurse_push_qual(Node *setOp, Query *topquery,
2172 RangeTblEntry *rte, Index rti, Node *qual)
2174 if (IsA(setOp, RangeTblRef))
2176 RangeTblRef *rtr = (RangeTblRef *) setOp;
2177 RangeTblEntry *subrte = rt_fetch(rtr->rtindex, topquery->rtable);
2178 Query *subquery = subrte->subquery;
2180 Assert(subquery != NULL);
2181 subquery_push_qual(subquery, rte, rti, qual);
2183 else if (IsA(setOp, SetOperationStmt))
2185 SetOperationStmt *op = (SetOperationStmt *) setOp;
2187 recurse_push_qual(op->larg, topquery, rte, rti, qual);
2188 recurse_push_qual(op->rarg, topquery, rte, rti, qual);
2192 elog(ERROR, "unrecognized node type: %d",
2193 (int) nodeTag(setOp));
2197 /*****************************************************************************
2198 * SIMPLIFYING SUBQUERY TARGETLISTS
2199 *****************************************************************************/
2202 * remove_unused_subquery_outputs
2203 * Remove subquery targetlist items we don't need
2205 * It's possible, even likely, that the upper query does not read all the
2206 * output columns of the subquery. We can remove any such outputs that are
2207 * not needed by the subquery itself (e.g., as sort/group columns) and do not
2208 * affect semantics otherwise (e.g., volatile functions can't be removed).
2209 * This is useful not only because we might be able to remove expensive-to-
2210 * compute expressions, but because deletion of output columns might allow
2211 * optimizations such as join removal to occur within the subquery.
2213 * To avoid affecting column numbering in the targetlist, we don't physically
2214 * remove unused tlist entries, but rather replace their expressions with NULL
2215 * constants. This is implemented by modifying subquery->targetList.
2218 remove_unused_subquery_outputs(Query *subquery, RelOptInfo *rel)
2220 Bitmapset *attrs_used = NULL;
2224 * Do nothing if subquery has UNION/INTERSECT/EXCEPT: in principle we
2225 * could update all the child SELECTs' tlists, but it seems not worth the
2226 * trouble presently.
2228 if (subquery->setOperations)
2232 * If subquery has regular DISTINCT (not DISTINCT ON), we're wasting our
2233 * time: all its output columns must be used in the distinctClause.
2235 if (subquery->distinctClause && !subquery->hasDistinctOn)
2239 * Collect a bitmap of all the output column numbers used by the upper
2242 * Add all the attributes needed for joins or final output. Note: we must
2243 * look at reltargetlist, not the attr_needed data, because attr_needed
2244 * isn't computed for inheritance child rels, cf set_append_rel_size().
2245 * (XXX might be worth changing that sometime.)
2247 pull_varattnos((Node *) rel->reltargetlist, rel->relid, &attrs_used);
2249 /* Add all the attributes used by un-pushed-down restriction clauses. */
2250 foreach(lc, rel->baserestrictinfo)
2252 RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
2254 pull_varattnos((Node *) rinfo->clause, rel->relid, &attrs_used);
2258 * If there's a whole-row reference to the subquery, we can't remove
2261 if (bms_is_member(0 - FirstLowInvalidHeapAttributeNumber, attrs_used))
2265 * Run through the tlist and zap entries we don't need. It's okay to
2266 * modify the tlist items in-place because set_subquery_pathlist made a
2267 * copy of the subquery.
2269 foreach(lc, subquery->targetList)
2271 TargetEntry *tle = (TargetEntry *) lfirst(lc);
2272 Node *texpr = (Node *) tle->expr;
2275 * If it has a sortgroupref number, it's used in some sort/group
2276 * clause so we'd better not remove it. Also, don't remove any
2277 * resjunk columns, since their reason for being has nothing to do
2278 * with anybody reading the subquery's output. (It's likely that
2279 * resjunk columns in a sub-SELECT would always have ressortgroupref
2280 * set, but even if they don't, it seems imprudent to remove them.)
2282 if (tle->ressortgroupref || tle->resjunk)
2286 * If it's used by the upper query, we can't remove it.
2288 if (bms_is_member(tle->resno - FirstLowInvalidHeapAttributeNumber,
2293 * If it contains a set-returning function, we can't remove it since
2294 * that could change the number of rows returned by the subquery.
2296 if (expression_returns_set(texpr))
2300 * If it contains volatile functions, we daren't remove it for fear
2301 * that the user is expecting their side-effects to happen.
2303 if (contain_volatile_functions(texpr))
2307 * OK, we don't need it. Replace the expression with a NULL constant.
2308 * Preserve the exposed type of the expression, in case something
2309 * looks at the rowtype of the subquery's result.
2311 tle->expr = (Expr *) makeNullConst(exprType(texpr),
2313 exprCollation(texpr));
2317 /*****************************************************************************
2319 *****************************************************************************/
2321 #ifdef OPTIMIZER_DEBUG
2324 print_relids(Relids relids)
2330 while ((x = bms_next_member(relids, x)) >= 0)
2340 print_restrictclauses(PlannerInfo *root, List *clauses)
2346 RestrictInfo *c = lfirst(l);
2348 print_expr((Node *) c->clause, root->parse->rtable);
2355 print_path(PlannerInfo *root, Path *path, int indent)
2359 Path *subpath = NULL;
2362 switch (nodeTag(path))
2370 case T_BitmapHeapPath:
2371 ptype = "BitmapHeapScan";
2373 case T_BitmapAndPath:
2374 ptype = "BitmapAndPath";
2376 case T_BitmapOrPath:
2377 ptype = "BitmapOrPath";
2383 ptype = "ForeignScan";
2388 case T_MergeAppendPath:
2389 ptype = "MergeAppend";
2394 case T_MaterialPath:
2396 subpath = ((MaterialPath *) path)->subpath;
2400 subpath = ((UniquePath *) path)->subpath;
2407 ptype = "MergeJoin";
2419 for (i = 0; i < indent; i++)
2421 printf("%s", ptype);
2426 print_relids(path->parent->relids);
2427 printf(") rows=%.0f", path->parent->rows);
2429 printf(" cost=%.2f..%.2f\n", path->startup_cost, path->total_cost);
2433 for (i = 0; i < indent; i++)
2435 printf(" pathkeys: ");
2436 print_pathkeys(path->pathkeys, root->parse->rtable);
2441 JoinPath *jp = (JoinPath *) path;
2443 for (i = 0; i < indent; i++)
2445 printf(" clauses: ");
2446 print_restrictclauses(root, jp->joinrestrictinfo);
2449 if (IsA(path, MergePath))
2451 MergePath *mp = (MergePath *) path;
2453 for (i = 0; i < indent; i++)
2455 printf(" sortouter=%d sortinner=%d materializeinner=%d\n",
2456 ((mp->outersortkeys) ? 1 : 0),
2457 ((mp->innersortkeys) ? 1 : 0),
2458 ((mp->materialize_inner) ? 1 : 0));
2461 print_path(root, jp->outerjoinpath, indent + 1);
2462 print_path(root, jp->innerjoinpath, indent + 1);
2466 print_path(root, subpath, indent + 1);
2470 debug_print_rel(PlannerInfo *root, RelOptInfo *rel)
2474 printf("RELOPTINFO (");
2475 print_relids(rel->relids);
2476 printf("): rows=%.0f width=%d\n", rel->rows, rel->width);
2478 if (rel->baserestrictinfo)
2480 printf("\tbaserestrictinfo: ");
2481 print_restrictclauses(root, rel->baserestrictinfo);
2487 printf("\tjoininfo: ");
2488 print_restrictclauses(root, rel->joininfo);
2492 printf("\tpath list:\n");
2493 foreach(l, rel->pathlist)
2494 print_path(root, lfirst(l), 1);
2495 if (rel->cheapest_startup_path)
2497 printf("\n\tcheapest startup path:\n");
2498 print_path(root, rel->cheapest_startup_path, 1);
2500 if (rel->cheapest_total_path)
2502 printf("\n\tcheapest total path:\n");
2503 print_path(root, rel->cheapest_total_path, 1);
2509 #endif /* OPTIMIZER_DEBUG */