X-Git-Url: https://granicus.if.org/sourcecode?a=blobdiff_plain;f=src%2Fbackend%2Foptimizer%2Fpath%2Fallpaths.c;h=458dae0489c029bd743c75c82f8e5102067e89bf;hb=46c508fbcf98ac334f1e831d21021d731c882fbb;hp=aa14deacd0cc831197ff1d03166e5276549949e8;hpb=a7f49252d2476cc5996b1e9dbb253cdc521fbb7f;p=postgresql diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index aa14deacd0..458dae0489 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -3,19 +3,23 @@ * allpaths.c * Routines to find possible search paths for processing a query * - * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.136 2005/08/22 17:34:58 momjian Exp $ + * src/backend/optimizer/path/allpaths.c * *------------------------------------------------------------------------- */ #include "postgres.h" -#include "nodes/makefuncs.h" +#include + +#include "catalog/pg_class.h" +#include "foreign/fdwapi.h" +#include "nodes/nodeFuncs.h" #ifdef OPTIMIZER_DEBUG #include "nodes/print.h" #endif @@ -26,33 +30,57 @@ #include "optimizer/paths.h" #include "optimizer/plancat.h" #include "optimizer/planner.h" -#include "optimizer/predtest.h" #include "optimizer/prep.h" +#include "optimizer/restrictinfo.h" #include "optimizer/var.h" -#include "parser/parsetree.h" #include "parser/parse_clause.h" -#include "parser/parse_expr.h" +#include "parser/parsetree.h" #include "rewrite/rewriteManip.h" +#include "utils/lsyscache.h" /* These parameters are set by GUC */ -bool constraint_exclusion = false; bool enable_geqo = false; /* just in case GUC doesn't set it */ int geqo_threshold; +/* Hook for plugins to replace standard_join_search() */ +join_search_hook_type join_search_hook = NULL; + +static void set_base_rel_sizes(PlannerInfo *root); static void set_base_rel_pathlists(PlannerInfo *root); +static void set_rel_size(PlannerInfo *root, RelOptInfo *rel, + Index rti, RangeTblEntry *rte); +static void set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, + Index rti, RangeTblEntry *rte); +static void set_plain_rel_size(PlannerInfo *root, RelOptInfo *rel, + RangeTblEntry *rte); static void set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte); -static void set_inherited_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, - Index rti, RangeTblEntry *rte, - List *inheritlist); +static void set_foreign_size(PlannerInfo *root, RelOptInfo *rel, + RangeTblEntry *rte); +static void set_foreign_pathlist(PlannerInfo *root, RelOptInfo *rel, + RangeTblEntry *rte); +static void set_append_rel_size(PlannerInfo *root, RelOptInfo *rel, + Index rti, RangeTblEntry *rte); +static void set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, + Index rti, RangeTblEntry *rte); +static void generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel, + List *live_childrels, + List *all_child_pathkeys); +static List *accumulate_append_subpath(List *subpaths, Path *path); +static void set_dummy_rel_pathlist(RelOptInfo *rel); static void set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTblEntry *rte); static void set_function_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte); -static RelOptInfo *make_one_rel_by_joins(PlannerInfo *root, int levels_needed, - List *initial_rels); +static void set_values_pathlist(PlannerInfo *root, RelOptInfo *rel, + RangeTblEntry *rte); +static void set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel, + RangeTblEntry *rte); +static void set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel, + RangeTblEntry *rte); +static RelOptInfo *make_rel_from_joinlist(PlannerInfo *root, List *joinlist); static bool subquery_is_pushdown_safe(Query *subquery, Query *topquery, bool *differentTypes); static bool recurse_pushdown_safe(Node *setOp, Query *topquery, @@ -62,7 +90,7 @@ static void compare_tlist_datatypes(List *tlist, List *colTypes, static bool qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual, bool *differentTypes); static void subquery_push_qual(Query *subquery, - RangeTblEntry *rte, Index rti, Node *qual); + RangeTblEntry *rte, Index rti, Node *qual); static void recurse_push_qual(Node *setOp, Query *topquery, RangeTblEntry *rte, Index rti, Node *qual); @@ -73,53 +101,79 @@ static void recurse_push_qual(Node *setOp, Query *topquery, * single rel that represents the join of all base rels in the query. */ RelOptInfo * -make_one_rel(PlannerInfo *root) +make_one_rel(PlannerInfo *root, List *joinlist) { RelOptInfo *rel; + Index rti; + + /* + * Construct the all_baserels Relids set. + */ + root->all_baserels = NULL; + for (rti = 1; rti < root->simple_rel_array_size; rti++) + { + RelOptInfo *brel = root->simple_rel_array[rti]; + + /* there may be empty slots corresponding to non-baserel RTEs */ + if (brel == NULL) + continue; + + Assert(brel->relid == rti); /* sanity check on array */ + + /* ignore RTEs that are "other rels" */ + if (brel->reloptkind != RELOPT_BASEREL) + continue; + + root->all_baserels = bms_add_member(root->all_baserels, brel->relid); + } /* * Generate access paths for the base rels. */ + set_base_rel_sizes(root); set_base_rel_pathlists(root); /* * Generate access paths for the entire join tree. */ - Assert(root->parse->jointree != NULL && - IsA(root->parse->jointree, FromExpr)); - - rel = make_fromexpr_rel(root, root->parse->jointree); + rel = make_rel_from_joinlist(root, joinlist); /* * The result should join all and only the query's base rels. */ -#ifdef USE_ASSERT_CHECKING - { - int num_base_rels = 0; - Index rti; + Assert(bms_equal(rel->relids, root->all_baserels)); - for (rti = 1; rti < root->base_rel_array_size; rti++) - { - RelOptInfo *brel = root->base_rel_array[rti]; + return rel; +} + +/* + * set_base_rel_sizes + * Set the size estimates (rows and widths) for each base-relation entry. + * + * We do this in a separate pass over the base rels so that rowcount + * estimates are available for parameterized path generation. + */ +static void +set_base_rel_sizes(PlannerInfo *root) +{ + Index rti; - if (brel == NULL) - continue; + for (rti = 1; rti < root->simple_rel_array_size; rti++) + { + RelOptInfo *rel = root->simple_rel_array[rti]; - Assert(brel->relid == rti); /* sanity check on array */ + /* there may be empty slots corresponding to non-baserel RTEs */ + if (rel == NULL) + continue; - /* ignore RTEs that are "other rels" */ - if (brel->reloptkind != RELOPT_BASEREL) - continue; + Assert(rel->relid == rti); /* sanity check on array */ - Assert(bms_is_member(rti, rel->relids)); - num_base_rels++; - } + /* ignore RTEs that are "other rels" */ + if (rel->reloptkind != RELOPT_BASEREL) + continue; - Assert(bms_num_members(rel->relids) == num_base_rels); + set_rel_size(root, rel, rti, root->simple_rte_array[rti]); } -#endif - - return rel; } /* @@ -133,16 +187,9 @@ set_base_rel_pathlists(PlannerInfo *root) { Index rti; - /* - * Note: because we call expand_inherited_rtentry inside the loop, - * it's quite possible for the base_rel_array to be enlarged while - * the loop runs. Hence don't try to optimize the loop. - */ - for (rti = 1; rti < root->base_rel_array_size; rti++) + for (rti = 1; rti < root->simple_rel_array_size; rti++) { - RelOptInfo *rel = root->base_rel_array[rti]; - RangeTblEntry *rte; - List *inheritlist; + RelOptInfo *rel = root->simple_rel_array[rti]; /* there may be empty slots corresponding to non-baserel RTEs */ if (rel == NULL) @@ -154,66 +201,194 @@ set_base_rel_pathlists(PlannerInfo *root) if (rel->reloptkind != RELOPT_BASEREL) continue; - rte = rt_fetch(rti, root->parse->rtable); + set_rel_pathlist(root, rel, rti, root->simple_rte_array[rti]); + } +} - if (rel->rtekind == RTE_SUBQUERY) - { - /* Subquery --- generate a separate plan for it */ - set_subquery_pathlist(root, rel, rti, rte); - } - else if (rel->rtekind == RTE_FUNCTION) - { - /* RangeFunction --- generate a separate plan for it */ - set_function_pathlist(root, rel, rte); - } - else if ((inheritlist = expand_inherited_rtentry(root, rti)) != NIL) +/* + * set_rel_size + * Set size estimates for a base relation + */ +static void +set_rel_size(PlannerInfo *root, RelOptInfo *rel, + Index rti, RangeTblEntry *rte) +{ + if (rel->reloptkind == RELOPT_BASEREL && + relation_excluded_by_constraints(root, rel, rte)) + { + /* + * We proved we don't need to scan the rel via constraint exclusion, + * so set up a single dummy path for it. Here we only check this for + * regular baserels; if it's an otherrel, CE was already checked in + * set_append_rel_pathlist(). + * + * In this case, we go ahead and set up the relation's path right away + * instead of leaving it for set_rel_pathlist to do. This is because + * we don't have a convention for marking a rel as dummy except by + * assigning a dummy path to it. + */ + set_dummy_rel_pathlist(rel); + } + else if (rte->inh) + { + /* It's an "append relation", process accordingly */ + set_append_rel_size(root, rel, rti, rte); + } + else + { + switch (rel->rtekind) { - /* Relation is root of an inheritance tree, process specially */ - set_inherited_rel_pathlist(root, rel, rti, rte, inheritlist); + case RTE_RELATION: + if (rte->relkind == RELKIND_FOREIGN_TABLE) + { + /* Foreign table */ + set_foreign_size(root, rel, rte); + } + else + { + /* Plain relation */ + set_plain_rel_size(root, rel, rte); + } + break; + case RTE_SUBQUERY: + + /* + * Subqueries don't support making a choice between + * parameterized and unparameterized paths, so just go ahead + * and build their paths immediately. + */ + set_subquery_pathlist(root, rel, rti, rte); + break; + case RTE_FUNCTION: + set_function_size_estimates(root, rel); + break; + case RTE_VALUES: + set_values_size_estimates(root, rel); + break; + case RTE_CTE: + + /* + * CTEs don't support making a choice between parameterized + * and unparameterized paths, so just go ahead and build their + * paths immediately. + */ + if (rte->self_reference) + set_worktable_pathlist(root, rel, rte); + else + set_cte_pathlist(root, rel, rte); + break; + default: + elog(ERROR, "unexpected rtekind: %d", (int) rel->rtekind); + break; } - else + } +} + +/* + * set_rel_pathlist + * Build access paths for a base relation + */ +static void +set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, + Index rti, RangeTblEntry *rte) +{ + if (IS_DUMMY_REL(rel)) + { + /* We already proved the relation empty, so nothing more to do */ + } + else if (rte->inh) + { + /* It's an "append relation", process accordingly */ + set_append_rel_pathlist(root, rel, rti, rte); + } + else + { + switch (rel->rtekind) { - /* Plain relation */ - set_plain_rel_pathlist(root, rel, rte); + case RTE_RELATION: + if (rte->relkind == RELKIND_FOREIGN_TABLE) + { + /* Foreign table */ + set_foreign_pathlist(root, rel, rte); + } + else + { + /* Plain relation */ + set_plain_rel_pathlist(root, rel, rte); + } + break; + case RTE_SUBQUERY: + /* Subquery --- fully handled during set_rel_size */ + break; + case RTE_FUNCTION: + /* RangeFunction */ + set_function_pathlist(root, rel, rte); + break; + case RTE_VALUES: + /* Values list */ + set_values_pathlist(root, rel, rte); + break; + case RTE_CTE: + /* CTE reference --- fully handled during set_rel_size */ + break; + default: + elog(ERROR, "unexpected rtekind: %d", (int) rel->rtekind); + break; } + } #ifdef OPTIMIZER_DEBUG - debug_print_rel(root, rel); + debug_print_rel(root, rel); #endif - } } /* - * set_plain_rel_pathlist - * Build access paths for a plain relation (no subquery, no inheritance) + * set_plain_rel_size + * Set size estimates for a plain relation (no subquery, no inheritance) */ static void -set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) +set_plain_rel_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) { + /* + * Test any partial indexes of rel for applicability. We must do this + * first since partial unique indexes can affect size estimates. + */ + check_partial_indexes(root, rel); + /* Mark rel with estimated output rows, width, etc */ set_baserel_size_estimates(root, rel); - /* Test any partial indexes of rel for applicability */ - check_partial_indexes(root, rel); - /* * Check to see if we can extract any restriction conditions from join * quals that are OR-of-AND structures. If so, add them to the rel's - * restriction list, and recompute the size estimates. + * restriction list, and redo the above steps. */ if (create_or_index_quals(root, rel)) + { + check_partial_indexes(root, rel); set_baserel_size_estimates(root, rel); + } +} + +/* + * set_plain_rel_pathlist + * Build access paths for a plain relation (no subquery, no inheritance) + */ +static void +set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) +{ + Relids required_outer; /* - * Generate paths and add them to the rel's pathlist. - * - * Note: add_path() will discard any paths that are dominated by another - * available path, keeping only those paths that are superior along at - * least one dimension of cost or sortedness. + * We don't support pushing join clauses into the quals of a seqscan, but + * it could still have required parameterization due to LATERAL refs in + * its tlist. (That can only happen if the seqscan is on a relation + * pulled up out of a UNION ALL appendrel.) */ + required_outer = rel->lateral_relids; /* Consider sequential scan */ - add_path(rel, create_seqscan_path(root, rel)); + add_path(rel, create_seqscan_path(root, rel, required_outer)); /* Consider index scans */ create_index_paths(root, rel); @@ -226,162 +401,610 @@ set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) } /* - * set_inherited_rel_pathlist - * Build access paths for a inheritance tree rooted at rel - * - * inheritlist is a list of RT indexes of all tables in the inheritance tree, - * including a duplicate of the parent itself. Note we will not come here - * unless there's at least one child in addition to the parent. + * set_foreign_size + * Set size estimates for a foreign table RTE + */ +static void +set_foreign_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) +{ + /* Mark rel with estimated output rows, width, etc */ + set_foreign_size_estimates(root, rel); + + /* Get FDW routine pointers for the rel */ + rel->fdwroutine = GetFdwRoutineByRelId(rte->relid); + + /* Let FDW adjust the size estimates, if it can */ + rel->fdwroutine->GetForeignRelSize(root, rel, rte->relid); +} + +/* + * set_foreign_pathlist + * Build access paths for a foreign table RTE + */ +static void +set_foreign_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) +{ + /* Call the FDW's GetForeignPaths function to generate path(s) */ + rel->fdwroutine->GetForeignPaths(root, rel, rte->relid); + + /* Select cheapest path */ + set_cheapest(rel); +} + +/* + * set_append_rel_size + * Set size estimates for an "append relation" * - * NOTE: the passed-in rel and RTE will henceforth represent the appended - * result of the whole inheritance tree. The members of inheritlist represent - * the individual tables --- in particular, the inheritlist member that is a - * duplicate of the parent RTE represents the parent table alone. - * We will generate plans to scan the individual tables that refer to - * the inheritlist RTEs, whereas Vars elsewhere in the plan tree that - * refer to the original RTE are taken to refer to the append output. - * In particular, this means we have separate RelOptInfos for the parent - * table and for the append output, which is a good thing because they're - * not the same size. + * The passed-in rel and RTE represent the entire append relation. The + * relation's contents are computed by appending together the output of + * the individual member relations. Note that in the inheritance case, + * the first member relation is actually the same table as is mentioned in + * the parent RTE ... but it has a different RTE and RelOptInfo. This is + * a good thing because their outputs are not the same size. */ static void -set_inherited_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, - Index rti, RangeTblEntry *rte, - List *inheritlist) +set_append_rel_size(PlannerInfo *root, RelOptInfo *rel, + Index rti, RangeTblEntry *rte) { int parentRTindex = rti; - Oid parentOID = rte->relid; - List *subpaths = NIL; - ListCell *il; - - /* - * XXX for now, can't handle inherited expansion of FOR UPDATE/SHARE; - * can we do better? - */ - if (list_member_int(root->parse->rowMarks, parentRTindex)) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("SELECT FOR UPDATE/SHARE is not supported for inheritance queries"))); + double parent_rows; + double parent_size; + double *parent_attrsizes; + int nattrs; + ListCell *l; /* - * Initialize to compute size estimates for whole inheritance tree + * Initialize to compute size estimates for whole append relation. + * + * We handle width estimates by weighting the widths of different child + * rels proportionally to their number of rows. This is sensible because + * the use of width estimates is mainly to compute the total relation + * "footprint" if we have to sort or hash it. To do this, we sum the + * total equivalent size (in "double" arithmetic) and then divide by the + * total rowcount estimate. This is done separately for the total rel + * width and each attribute. + * + * Note: if you consider changing this logic, beware that child rels could + * have zero rows and/or width, if they were excluded by constraints. */ - rel->rows = 0; - rel->width = 0; + parent_rows = 0; + parent_size = 0; + nattrs = rel->max_attr - rel->min_attr + 1; + parent_attrsizes = (double *) palloc0(nattrs * sizeof(double)); - /* - * Generate access paths for each table in the tree (parent AND - * children), and pick the cheapest path for each table. - */ - foreach(il, inheritlist) + foreach(l, root->append_rel_list) { - int childRTindex = lfirst_int(il); - RangeTblEntry *childrte; - Oid childOID; + AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l); + int childRTindex; + RangeTblEntry *childRTE; RelOptInfo *childrel; + List *childquals; + Node *childqual; ListCell *parentvars; ListCell *childvars; - childrte = rt_fetch(childRTindex, root->parse->rtable); - childOID = childrte->relid; + /* append_rel_list contains all append rels; ignore others */ + if (appinfo->parent_relid != parentRTindex) + continue; + + childRTindex = appinfo->child_relid; + childRTE = root->simple_rte_array[childRTindex]; + + /* + * The child rel's RelOptInfo was already created during + * add_base_rels_to_query. + */ + childrel = find_base_rel(root, childRTindex); + Assert(childrel->reloptkind == RELOPT_OTHER_MEMBER_REL); /* - * Make a RelOptInfo for the child so we can do planning. - * Mark it as an "other rel" since it will not be part of the - * main join tree. + * We have to copy the parent's targetlist and quals to the child, + * with appropriate substitution of variables. However, only the + * baserestrictinfo quals are needed before we can check for + * constraint exclusion; so do that first and then check to see if we + * can disregard this child. + * + * As of 8.4, the child rel's targetlist might contain non-Var + * expressions, which means that substitution into the quals could + * produce opportunities for const-simplification, and perhaps even + * pseudoconstant quals. To deal with this, we strip the RestrictInfo + * nodes, do the substitution, do const-simplification, and then + * reconstitute the RestrictInfo layer. */ - childrel = build_other_rel(root, childRTindex); + childquals = get_all_actual_clauses(rel->baserestrictinfo); + childquals = (List *) adjust_appendrel_attrs(root, + (Node *) childquals, + appinfo); + childqual = eval_const_expressions(root, (Node *) + make_ands_explicit(childquals)); + if (childqual && IsA(childqual, Const) && + (((Const *) childqual)->constisnull || + !DatumGetBool(((Const *) childqual)->constvalue))) + { + /* + * Restriction reduces to constant FALSE or constant NULL after + * substitution, so this child need not be scanned. + */ + set_dummy_rel_pathlist(childrel); + continue; + } + childquals = make_ands_implicit((Expr *) childqual); + childquals = make_restrictinfos_from_actual_clauses(root, + childquals); + childrel->baserestrictinfo = childquals; + + if (relation_excluded_by_constraints(root, childrel, childRTE)) + { + /* + * This child need not be scanned, so we can omit it from the + * appendrel. + */ + set_dummy_rel_pathlist(childrel); + continue; + } /* - * Copy the parent's targetlist and restriction quals to the - * child, with attribute-number adjustment as needed. We don't - * bother to copy the join quals, since we can't do any joining of - * the individual tables. Also, we just zap attr_needed rather - * than trying to adjust it; it won't be looked at in the child. + * CE failed, so finish copying/modifying targetlist and join quals. + * + * Note: the resulting childrel->reltargetlist may contain arbitrary + * expressions, which otherwise would not occur in a reltargetlist. + * Code that might be looking at an appendrel child must cope with + * such. Note in particular that "arbitrary expression" can include + * "Var belonging to another relation", due to LATERAL references. */ + childrel->joininfo = (List *) + adjust_appendrel_attrs(root, + (Node *) rel->joininfo, + appinfo); childrel->reltargetlist = (List *) - adjust_inherited_attrs((Node *) rel->reltargetlist, - parentRTindex, - parentOID, - childRTindex, - childOID); - childrel->attr_needed = NULL; - childrel->baserestrictinfo = (List *) - adjust_inherited_attrs((Node *) rel->baserestrictinfo, - parentRTindex, - parentOID, - childRTindex, - childOID); + adjust_appendrel_attrs(root, + (Node *) rel->reltargetlist, + appinfo); /* - * If we can prove we don't need to scan this child via constraint - * exclusion, just ignore it. (We have to have converted the - * baserestrictinfo Vars before we can make the test.) + * We have to make child entries in the EquivalenceClass data + * structures as well. This is needed either if the parent + * participates in some eclass joins (because we will want to consider + * inner-indexscan joins on the individual children) or if the parent + * has useful pathkeys (because we should try to build MergeAppend + * paths that produce those sort orderings). */ - if (constraint_exclusion) + if (rel->has_eclass_joins || has_useful_pathkeys(root, rel)) + add_child_rel_equivalences(root, appinfo, rel, childrel); + childrel->has_eclass_joins = rel->has_eclass_joins; + + /* + * Note: we could compute appropriate attr_needed data for the child's + * variables, by transforming the parent's attr_needed through the + * translated_vars mapping. However, currently there's no need + * because attr_needed is only examined for base relations not + * otherrels. So we just leave the child's attr_needed empty. + */ + + /* + * Compute the child's size. + */ + set_rel_size(root, childrel, childRTindex, childRTE); + + /* + * It is possible that constraint exclusion detected a contradiction + * within a child subquery, even though we didn't prove one above. If + * so, we can skip this child. + */ + if (IS_DUMMY_REL(childrel)) + continue; + + /* + * Accumulate size information from each live child. + */ + if (childrel->rows > 0) { - List *constraint_pred; + parent_rows += childrel->rows; + parent_size += childrel->width * childrel->rows; - constraint_pred = get_relation_constraints(childOID, childrel); /* - * We do not currently enforce that CHECK constraints contain - * only immutable functions, so it's necessary to check here. - * We daren't draw conclusions from plan-time evaluation of - * non-immutable functions. + * Accumulate per-column estimates too. We need not do anything + * for PlaceHolderVars in the parent list. If child expression + * isn't a Var, or we didn't record a width estimate for it, we + * have to fall back on a datatype-based estimate. + * + * By construction, child's reltargetlist is 1-to-1 with parent's. */ - if (!contain_mutable_functions((Node *) constraint_pred)) + forboth(parentvars, rel->reltargetlist, + childvars, childrel->reltargetlist) { - /* - * The constraints are effectively ANDed together, so we can - * just try to refute the entire collection at once. This may - * allow us to make proofs that would fail if we took them - * individually. - */ - if (predicate_refuted_by(constraint_pred, - childrel->baserestrictinfo)) - continue; + Var *parentvar = (Var *) lfirst(parentvars); + Node *childvar = (Node *) lfirst(childvars); + + if (IsA(parentvar, Var)) + { + int pndx = parentvar->varattno - rel->min_attr; + int32 child_width = 0; + + if (IsA(childvar, Var) && + ((Var *) childvar)->varno == childrel->relid) + { + int cndx = ((Var *) childvar)->varattno - childrel->min_attr; + + child_width = childrel->attr_widths[cndx]; + } + if (child_width <= 0) + child_width = get_typavgwidth(exprType(childvar), + exprTypmod(childvar)); + Assert(child_width > 0); + parent_attrsizes[pndx] += child_width * childrel->rows; + } } } + } + + /* + * Save the finished size estimates. + */ + rel->rows = parent_rows; + if (parent_rows > 0) + { + int i; + + rel->width = rint(parent_size / parent_rows); + for (i = 0; i < nattrs; i++) + rel->attr_widths[i] = rint(parent_attrsizes[i] / parent_rows); + } + else + rel->width = 0; /* attr_widths should be zero already */ + + /* + * Set "raw tuples" count equal to "rows" for the appendrel; needed + * because some places assume rel->tuples is valid for any baserel. + */ + rel->tuples = parent_rows; + + pfree(parent_attrsizes); +} + +/* + * set_append_rel_pathlist + * Build access paths for an "append relation" + */ +static void +set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, + Index rti, RangeTblEntry *rte) +{ + int parentRTindex = rti; + List *live_childrels = NIL; + List *subpaths = NIL; + bool subpaths_valid = true; + List *all_child_pathkeys = NIL; + List *all_child_outers = NIL; + ListCell *l; + + /* + * Generate access paths for each member relation, and remember the + * cheapest path for each one. Also, identify all pathkeys (orderings) + * and parameterizations (required_outer sets) available for the member + * relations. + */ + foreach(l, root->append_rel_list) + { + AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l); + int childRTindex; + RangeTblEntry *childRTE; + RelOptInfo *childrel; + ListCell *lcp; + + /* append_rel_list contains all append rels; ignore others */ + if (appinfo->parent_relid != parentRTindex) + continue; + + /* Re-locate the child RTE and RelOptInfo */ + childRTindex = appinfo->child_relid; + childRTE = root->simple_rte_array[childRTindex]; + childrel = root->simple_rel_array[childRTindex]; + + /* + * Compute the child's access paths. + */ + set_rel_pathlist(root, childrel, childRTindex, childRTE); /* - * Compute the child's access paths, and save the cheapest. + * If child is dummy, ignore it. */ - set_plain_rel_pathlist(root, childrel, childrte); + if (IS_DUMMY_REL(childrel)) + continue; - subpaths = lappend(subpaths, childrel->cheapest_total_path); + /* + * Child is live, so add it to the live_childrels list for use below. + */ + live_childrels = lappend(live_childrels, childrel); /* - * Propagate size information from the child back to the parent. - * For simplicity, we use the largest widths from any child as the - * parent estimates. + * If child has an unparameterized cheapest-total path, add that to + * the unparameterized Append path we are constructing for the parent. + * If not, there's no workable unparameterized path. */ - rel->rows += childrel->rows; - if (childrel->width > rel->width) - rel->width = childrel->width; + if (childrel->cheapest_total_path->param_info == NULL) + subpaths = accumulate_append_subpath(subpaths, + childrel->cheapest_total_path); + else + subpaths_valid = false; - forboth(parentvars, rel->reltargetlist, - childvars, childrel->reltargetlist) + /* + * Collect lists of all the available path orderings and + * parameterizations for all the children. We use these as a + * heuristic to indicate which sort orderings and parameterizations we + * should build Append and MergeAppend paths for. + */ + foreach(lcp, childrel->pathlist) { - Var *parentvar = (Var *) lfirst(parentvars); - Var *childvar = (Var *) lfirst(childvars); + Path *childpath = (Path *) lfirst(lcp); + List *childkeys = childpath->pathkeys; + Relids childouter = PATH_REQ_OUTER(childpath); - if (IsA(parentvar, Var) &&IsA(childvar, Var)) + /* Unsorted paths don't contribute to pathkey list */ + if (childkeys != NIL) { - int pndx = parentvar->varattno - rel->min_attr; - int cndx = childvar->varattno - childrel->min_attr; + ListCell *lpk; + bool found = false; + + /* Have we already seen this ordering? */ + foreach(lpk, all_child_pathkeys) + { + List *existing_pathkeys = (List *) lfirst(lpk); + + if (compare_pathkeys(existing_pathkeys, + childkeys) == PATHKEYS_EQUAL) + { + found = true; + break; + } + } + if (!found) + { + /* No, so add it to all_child_pathkeys */ + all_child_pathkeys = lappend(all_child_pathkeys, + childkeys); + } + } - if (childrel->attr_widths[cndx] > rel->attr_widths[pndx]) - rel->attr_widths[pndx] = childrel->attr_widths[cndx]; + /* Unparameterized paths don't contribute to param-set list */ + if (childouter) + { + ListCell *lco; + bool found = false; + + /* Have we already seen this param set? */ + foreach(lco, all_child_outers) + { + Relids existing_outers = (Relids) lfirst(lco); + + if (bms_equal(existing_outers, childouter)) + { + found = true; + break; + } + } + if (!found) + { + /* No, so add it to all_child_outers */ + all_child_outers = lappend(all_child_outers, + childouter); + } } } } /* - * Finally, build Append path and install it as the only access path - * for the parent rel. (Note: this is correct even if we have zero - * or one live subpath due to constraint exclusion.) + * If we found unparameterized paths for all children, build an unordered, + * unparameterized Append path for the rel. (Note: this is correct even + * if we have zero or one live subpath due to constraint exclusion.) */ - add_path(rel, (Path *) create_append_path(rel, subpaths)); + if (subpaths_valid) + add_path(rel, (Path *) create_append_path(rel, subpaths, NULL)); + + /* + * Also build unparameterized MergeAppend paths based on the collected + * list of child pathkeys. + */ + if (subpaths_valid) + generate_mergeappend_paths(root, rel, live_childrels, + all_child_pathkeys); + + /* + * Build Append paths for each parameterization seen among the child rels. + * (This may look pretty expensive, but in most cases of practical + * interest, the child rels will expose mostly the same parameterizations, + * so that not that many cases actually get considered here.) + * + * The Append node itself cannot enforce quals, so all qual checking must + * be done in the child paths. This means that to have a parameterized + * Append path, we must have the exact same parameterization for each + * child path; otherwise some children might be failing to check the + * moved-down quals. To make them match up, we can try to increase the + * parameterization of lesser-parameterized paths. + */ + foreach(l, all_child_outers) + { + Relids required_outer = (Relids) lfirst(l); + ListCell *lcr; + + /* Select the child paths for an Append with this parameterization */ + subpaths = NIL; + subpaths_valid = true; + foreach(lcr, live_childrels) + { + RelOptInfo *childrel = (RelOptInfo *) lfirst(lcr); + Path *cheapest_total; + + cheapest_total = + get_cheapest_path_for_pathkeys(childrel->pathlist, + NIL, + required_outer, + TOTAL_COST); + Assert(cheapest_total != NULL); + + /* Children must have exactly the desired parameterization */ + if (!bms_equal(PATH_REQ_OUTER(cheapest_total), required_outer)) + { + cheapest_total = reparameterize_path(root, cheapest_total, + required_outer, 1.0); + if (cheapest_total == NULL) + { + subpaths_valid = false; + break; + } + } + + subpaths = accumulate_append_subpath(subpaths, cheapest_total); + } + + if (subpaths_valid) + add_path(rel, (Path *) + create_append_path(rel, subpaths, required_outer)); + } + + /* Select cheapest paths */ + set_cheapest(rel); +} + +/* + * generate_mergeappend_paths + * Generate MergeAppend paths for an append relation + * + * Generate a path for each ordering (pathkey list) appearing in + * all_child_pathkeys. + * + * We consider both cheapest-startup and cheapest-total cases, ie, for each + * interesting ordering, collect all the cheapest startup subpaths and all the + * cheapest total paths, and build a MergeAppend path for each case. + * + * We don't currently generate any parameterized MergeAppend paths. While + * it would not take much more code here to do so, it's very unclear that it + * is worth the planning cycles to investigate such paths: there's little + * use for an ordered path on the inside of a nestloop. In fact, it's likely + * that the current coding of add_path would reject such paths out of hand, + * because add_path gives no credit for sort ordering of parameterized paths, + * and a parameterized MergeAppend is going to be more expensive than the + * corresponding parameterized Append path. If we ever try harder to support + * parameterized mergejoin plans, it might be worth adding support for + * parameterized MergeAppends to feed such joins. (See notes in + * optimizer/README for why that might not ever happen, though.) + */ +static void +generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel, + List *live_childrels, + List *all_child_pathkeys) +{ + ListCell *lcp; + + foreach(lcp, all_child_pathkeys) + { + List *pathkeys = (List *) lfirst(lcp); + List *startup_subpaths = NIL; + List *total_subpaths = NIL; + bool startup_neq_total = false; + ListCell *lcr; + + /* Select the child paths for this ordering... */ + foreach(lcr, live_childrels) + { + RelOptInfo *childrel = (RelOptInfo *) lfirst(lcr); + Path *cheapest_startup, + *cheapest_total; + + /* Locate the right paths, if they are available. */ + cheapest_startup = + get_cheapest_path_for_pathkeys(childrel->pathlist, + pathkeys, + NULL, + STARTUP_COST); + cheapest_total = + get_cheapest_path_for_pathkeys(childrel->pathlist, + pathkeys, + NULL, + TOTAL_COST); + + /* + * If we can't find any paths with the right order just use the + * cheapest-total path; we'll have to sort it later. + */ + if (cheapest_startup == NULL || cheapest_total == NULL) + { + cheapest_startup = cheapest_total = + childrel->cheapest_total_path; + /* Assert we do have an unparameterized path for this child */ + Assert(cheapest_total->param_info == NULL); + } + + /* + * Notice whether we actually have different paths for the + * "cheapest" and "total" cases; frequently there will be no point + * in two create_merge_append_path() calls. + */ + if (cheapest_startup != cheapest_total) + startup_neq_total = true; + + startup_subpaths = + accumulate_append_subpath(startup_subpaths, cheapest_startup); + total_subpaths = + accumulate_append_subpath(total_subpaths, cheapest_total); + } + + /* ... and build the MergeAppend paths */ + add_path(rel, (Path *) create_merge_append_path(root, + rel, + startup_subpaths, + pathkeys, + NULL)); + if (startup_neq_total) + add_path(rel, (Path *) create_merge_append_path(root, + rel, + total_subpaths, + pathkeys, + NULL)); + } +} + +/* + * accumulate_append_subpath + * Add a subpath to the list being built for an Append or MergeAppend + * + * It's possible that the child is itself an Append path, in which case + * we can "cut out the middleman" and just add its child paths to our + * own list. (We don't try to do this earlier because we need to + * apply both levels of transformation to the quals.) + */ +static List * +accumulate_append_subpath(List *subpaths, Path *path) +{ + if (IsA(path, AppendPath)) + { + AppendPath *apath = (AppendPath *) path; + + /* list_copy is important here to avoid sharing list substructure */ + return list_concat(subpaths, list_copy(apath->subpaths)); + } + else + return lappend(subpaths, path); +} + +/* + * set_dummy_rel_pathlist + * Build a dummy path for a relation that's been excluded by constraints + * + * Rather than inventing a special "dummy" path type, we represent this as an + * AppendPath with no members (see also IS_DUMMY_PATH/IS_DUMMY_REL macros). + */ +static void +set_dummy_rel_pathlist(RelOptInfo *rel) +{ + /* Set dummy size estimates --- we leave attr_widths[] as zeroes */ + rel->rows = 0; + rel->width = 0; + + /* Discard any pre-existing paths; no further need for them */ + rel->pathlist = NIL; + + add_path(rel, (Path *) create_append_path(rel, NIL, NULL)); /* Select cheapest path (pretty easy in this case...) */ set_cheapest(rel); @@ -394,9 +1017,9 @@ has_multiple_baserels(PlannerInfo *root) int num_base_rels = 0; Index rti; - for (rti = 1; rti < root->base_rel_array_size; rti++) + for (rti = 1; rti < root->simple_rel_array_size; rti++) { - RelOptInfo *brel = root->base_rel_array[rti]; + RelOptInfo *brel = root->simple_rel_array[rti]; if (brel == NULL) continue; @@ -412,6 +1035,14 @@ has_multiple_baserels(PlannerInfo *root) /* * set_subquery_pathlist * Build the (single) access path for a subquery RTE + * + * We don't currently support generating parameterized paths for subqueries + * by pushing join clauses down into them; it seems too expensive to re-plan + * the subquery multiple times to consider different alternatives. So the + * subquery will have exactly one path. (The path will be parameterized + * if the subquery contains LATERAL references, otherwise not.) Since there's + * no freedom of action here, there's no need for a separate set_subquery_size + * phase: we just make the path right away. */ static void set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel, @@ -419,10 +1050,25 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel, { Query *parse = root->parse; Query *subquery = rte->subquery; + Relids required_outer; bool *differentTypes; double tuple_fraction; + PlannerInfo *subroot; List *pathkeys; - List *subquery_pathkeys; + + /* + * Must copy the Query so that planning doesn't mess up the RTE contents + * (really really need to fix the planner to not scribble on its input, + * someday). + */ + subquery = copyObject(subquery); + + /* + * If it's a LATERAL subquery, it might contain some Vars of the current + * query level, requiring it to be treated as parameterized, even though + * we don't support pushing down join quals into subqueries. + */ + required_outer = rel->lateral_relids; /* We need a workspace for keeping track of set-op type coercions */ differentTypes = (bool *) @@ -430,15 +1076,21 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel, /* * If there are any restriction clauses that have been attached to the - * subquery relation, consider pushing them down to become WHERE or - * HAVING quals of the subquery itself. This transformation is useful - * because it may allow us to generate a better plan for the subquery - * than evaluating all the subquery output rows and then filtering them. + * subquery relation, consider pushing them down to become WHERE or HAVING + * quals of the subquery itself. This transformation is useful because it + * may allow us to generate a better plan for the subquery than evaluating + * all the subquery output rows and then filtering them. + * + * There are several cases where we cannot push down clauses. Restrictions + * involving the subquery are checked by subquery_is_pushdown_safe(). + * Restrictions on individual clauses are checked by + * qual_is_pushdown_safe(). Also, we don't want to push down + * pseudoconstant clauses; better to have the gating node above the + * subquery. * - * There are several cases where we cannot push down clauses. - * Restrictions involving the subquery are checked by - * subquery_is_pushdown_safe(). Restrictions on individual clauses - * are checked by qual_is_pushdown_safe(). + * Also, if the sub-query has the "security_barrier" flag, it means the + * sub-query originated from a view that must enforce row-level security. + * Then we must not push down quals that contain leaky functions. * * Non-pushed-down clauses will get evaluated as qpquals of the * SubqueryScan node. @@ -458,7 +1110,10 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel, RestrictInfo *rinfo = (RestrictInfo *) lfirst(l); Node *clause = (Node *) rinfo->clause; - if (qual_is_pushdown_safe(subquery, rti, clause, differentTypes)) + if (!rinfo->pseudoconstant && + (!rte->security_barrier || + !contain_leaky_functions(clause)) && + qual_is_pushdown_safe(subquery, rti, clause, differentTypes)) { /* Push it down */ subquery_push_qual(subquery, rte, rti, clause); @@ -475,10 +1130,10 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel, pfree(differentTypes); /* - * We can safely pass the outer tuple_fraction down to the subquery - * if the outer level has no joining, aggregation, or sorting to do. - * Otherwise we'd better tell the subquery to plan for full retrieval. - * (XXX This could probably be made more intelligent ...) + * We can safely pass the outer tuple_fraction down to the subquery if the + * outer level has no joining, aggregation, or sorting to do. Otherwise + * we'd better tell the subquery to plan for full retrieval. (XXX This + * could probably be made more intelligent ...) */ if (parse->hasAggs || parse->groupClause || @@ -490,21 +1145,39 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel, else tuple_fraction = root->tuple_fraction; + /* plan_params should not be in use in current query level */ + Assert(root->plan_params == NIL); + /* Generate the plan for the subquery */ - rel->subplan = subquery_planner(subquery, tuple_fraction, - &subquery_pathkeys); + rel->subplan = subquery_planner(root->glob, subquery, + root, + false, tuple_fraction, + &subroot); + rel->subroot = subroot; - /* Copy number of output rows from subplan */ - rel->tuples = rel->subplan->plan_rows; + /* Isolate the params needed by this specific subplan */ + rel->subplan_params = root->plan_params; + root->plan_params = NIL; + + /* + * It's possible that constraint exclusion proved the subquery empty. If + * so, it's convenient to turn it back into a dummy path so that we will + * recognize appropriate optimizations at this level. + */ + if (is_dummy_plan(rel->subplan)) + { + set_dummy_rel_pathlist(rel); + return; + } /* Mark rel with estimated output rows, width, etc */ - set_baserel_size_estimates(root, rel); + set_subquery_size_estimates(root, rel); /* Convert subquery pathkeys to outer representation */ - pathkeys = convert_subquery_pathkeys(root, rel, subquery_pathkeys); + pathkeys = convert_subquery_pathkeys(root, rel, subroot->query_pathkeys); /* Generate appropriate path */ - add_path(rel, create_subqueryscan_path(rel, pathkeys)); + add_path(rel, create_subqueryscan_path(root, rel, pathkeys, required_outer)); /* Select cheapest path (pretty easy in this case...) */ set_cheapest(rel); @@ -517,54 +1190,230 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel, static void set_function_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) { + Relids required_outer; + + /* + * We don't support pushing join clauses into the quals of a function + * scan, but it could still have required parameterization due to LATERAL + * refs in the function expression. + */ + required_outer = rel->lateral_relids; + + /* Generate appropriate path */ + add_path(rel, create_functionscan_path(root, rel, required_outer)); + + /* Select cheapest path (pretty easy in this case...) */ + set_cheapest(rel); +} + +/* + * set_values_pathlist + * Build the (single) access path for a VALUES RTE + */ +static void +set_values_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) +{ + Relids required_outer; + + /* + * We don't support pushing join clauses into the quals of a values scan, + * but it could still have required parameterization due to LATERAL refs + * in the values expressions. + */ + required_outer = rel->lateral_relids; + + /* Generate appropriate path */ + add_path(rel, create_valuesscan_path(root, rel, required_outer)); + + /* Select cheapest path (pretty easy in this case...) */ + set_cheapest(rel); +} + +/* + * set_cte_pathlist + * Build the (single) access path for a non-self-reference CTE RTE + * + * There's no need for a separate set_cte_size phase, since we don't + * support join-qual-parameterized paths for CTEs. + */ +static void +set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) +{ + Plan *cteplan; + PlannerInfo *cteroot; + Index levelsup; + int ndx; + ListCell *lc; + int plan_id; + Relids required_outer; + + /* + * Find the referenced CTE, and locate the plan previously made for it. + */ + levelsup = rte->ctelevelsup; + cteroot = root; + while (levelsup-- > 0) + { + cteroot = cteroot->parent_root; + if (!cteroot) /* shouldn't happen */ + elog(ERROR, "bad levelsup for CTE \"%s\"", rte->ctename); + } + + /* + * Note: cte_plan_ids can be shorter than cteList, if we are still working + * on planning the CTEs (ie, this is a side-reference from another CTE). + * So we mustn't use forboth here. + */ + ndx = 0; + foreach(lc, cteroot->parse->cteList) + { + CommonTableExpr *cte = (CommonTableExpr *) lfirst(lc); + + if (strcmp(cte->ctename, rte->ctename) == 0) + break; + ndx++; + } + if (lc == NULL) /* shouldn't happen */ + elog(ERROR, "could not find CTE \"%s\"", rte->ctename); + if (ndx >= list_length(cteroot->cte_plan_ids)) + elog(ERROR, "could not find plan for CTE \"%s\"", rte->ctename); + plan_id = list_nth_int(cteroot->cte_plan_ids, ndx); + Assert(plan_id > 0); + cteplan = (Plan *) list_nth(root->glob->subplans, plan_id - 1); + /* Mark rel with estimated output rows, width, etc */ - set_function_size_estimates(root, rel); + set_cte_size_estimates(root, rel, cteplan); + + /* + * We don't support pushing join clauses into the quals of a CTE scan, but + * it could still have required parameterization due to LATERAL refs in + * its tlist. (That can only happen if the CTE scan is on a relation + * pulled up out of a UNION ALL appendrel.) + */ + required_outer = rel->lateral_relids; /* Generate appropriate path */ - add_path(rel, create_functionscan_path(root, rel)); + add_path(rel, create_ctescan_path(root, rel, required_outer)); /* Select cheapest path (pretty easy in this case...) */ set_cheapest(rel); } /* - * make_fromexpr_rel - * Build access paths for a FromExpr jointree node. + * set_worktable_pathlist + * Build the (single) access path for a self-reference CTE RTE + * + * There's no need for a separate set_worktable_size phase, since we don't + * support join-qual-parameterized paths for CTEs. */ -RelOptInfo * -make_fromexpr_rel(PlannerInfo *root, FromExpr *from) +static void +set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) +{ + Plan *cteplan; + PlannerInfo *cteroot; + Index levelsup; + Relids required_outer; + + /* + * We need to find the non-recursive term's plan, which is in the plan + * level that's processing the recursive UNION, which is one level *below* + * where the CTE comes from. + */ + levelsup = rte->ctelevelsup; + if (levelsup == 0) /* shouldn't happen */ + elog(ERROR, "bad levelsup for CTE \"%s\"", rte->ctename); + levelsup--; + cteroot = root; + while (levelsup-- > 0) + { + cteroot = cteroot->parent_root; + if (!cteroot) /* shouldn't happen */ + elog(ERROR, "bad levelsup for CTE \"%s\"", rte->ctename); + } + cteplan = cteroot->non_recursive_plan; + if (!cteplan) /* shouldn't happen */ + elog(ERROR, "could not find plan for CTE \"%s\"", rte->ctename); + + /* Mark rel with estimated output rows, width, etc */ + set_cte_size_estimates(root, rel, cteplan); + + /* + * We don't support pushing join clauses into the quals of a worktable + * scan, but it could still have required parameterization due to LATERAL + * refs in its tlist. (That can only happen if the worktable scan is on a + * relation pulled up out of a UNION ALL appendrel. I'm not sure this is + * actually possible given the restrictions on recursive references, but + * it's easy enough to support.) + */ + required_outer = rel->lateral_relids; + + /* Generate appropriate path */ + add_path(rel, create_worktablescan_path(root, rel, required_outer)); + + /* Select cheapest path (pretty easy in this case...) */ + set_cheapest(rel); +} + +/* + * make_rel_from_joinlist + * Build access paths using a "joinlist" to guide the join path search. + * + * See comments for deconstruct_jointree() for definition of the joinlist + * data structure. + */ +static RelOptInfo * +make_rel_from_joinlist(PlannerInfo *root, List *joinlist) { int levels_needed; - List *initial_rels = NIL; - ListCell *jt; + List *initial_rels; + ListCell *jl; /* - * Count the number of child jointree nodes. This is the depth of the - * dynamic-programming algorithm we must employ to consider all ways - * of joining the child nodes. + * Count the number of child joinlist nodes. This is the depth of the + * dynamic-programming algorithm we must employ to consider all ways of + * joining the child nodes. */ - levels_needed = list_length(from->fromlist); + levels_needed = list_length(joinlist); if (levels_needed <= 0) return NULL; /* nothing to do? */ /* - * Construct a list of rels corresponding to the child jointree nodes. + * Construct a list of rels corresponding to the child joinlist nodes. * This may contain both base rels and rels constructed according to - * explicit JOIN directives. + * sub-joinlists. */ - foreach(jt, from->fromlist) + initial_rels = NIL; + foreach(jl, joinlist) { - Node *jtnode = (Node *) lfirst(jt); + Node *jlnode = (Node *) lfirst(jl); + RelOptInfo *thisrel; + + if (IsA(jlnode, RangeTblRef)) + { + int varno = ((RangeTblRef *) jlnode)->rtindex; - initial_rels = lappend(initial_rels, - make_jointree_rel(root, jtnode)); + thisrel = find_base_rel(root, varno); + } + else if (IsA(jlnode, List)) + { + /* Recurse to handle subproblem */ + thisrel = make_rel_from_joinlist(root, (List *) jlnode); + } + else + { + elog(ERROR, "unrecognized joinlist node type: %d", + (int) nodeTag(jlnode)); + thisrel = NULL; /* keep compiler quiet */ + } + + initial_rels = lappend(initial_rels, thisrel); } if (levels_needed == 1) { /* - * Single jointree node, so we're done. + * Single joinlist node, so we're done. */ return (RelOptInfo *) linitial(initial_rels); } @@ -572,18 +1421,25 @@ make_fromexpr_rel(PlannerInfo *root, FromExpr *from) { /* * Consider the different orders in which we could join the rels, - * using either GEQO or regular optimizer. + * using a plugin, GEQO, or the regular join search code. + * + * We put the initial_rels list into a PlannerInfo field because + * has_legal_joinclause() needs to look at it (ugly :-(). */ - if (enable_geqo && levels_needed >= geqo_threshold) + root->initial_rels = initial_rels; + + if (join_search_hook) + return (*join_search_hook) (root, levels_needed, initial_rels); + else if (enable_geqo && levels_needed >= geqo_threshold) return geqo(root, levels_needed, initial_rels); else - return make_one_rel_by_joins(root, levels_needed, initial_rels); + return standard_join_search(root, levels_needed, initial_rels); } } /* - * make_one_rel_by_joins - * Find all possible joinpaths for a query by successively finding ways + * standard_join_search + * Find possible joinpaths for a query by successively finding ways * to join component relations into join relations. * * 'levels_needed' is the number of iterations needed, ie, the number of @@ -591,48 +1447,69 @@ make_fromexpr_rel(PlannerInfo *root, FromExpr *from) * * 'initial_rels' is a list of RelOptInfo nodes for each independent * jointree item. These are the components to be joined together. + * Note that levels_needed == list_length(initial_rels). * * Returns the final level of join relations, i.e., the relation that is * the result of joining all the original relations together. + * At least one implementation path must be provided for this relation and + * all required sub-relations. + * + * To support loadable plugins that modify planner behavior by changing the + * join searching algorithm, we provide a hook variable that lets a plugin + * replace or supplement this function. Any such hook must return the same + * final join relation as the standard code would, but it might have a + * different set of implementation paths attached, and only the sub-joinrels + * needed for these paths need have been instantiated. + * + * Note to plugin authors: the functions invoked during standard_join_search() + * modify root->join_rel_list and root->join_rel_hash. If you want to do more + * than one join-order search, you'll probably need to save and restore the + * original states of those data structures. See geqo_eval() for an example. */ -static RelOptInfo * -make_one_rel_by_joins(PlannerInfo *root, int levels_needed, List *initial_rels) +RelOptInfo * +standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels) { - List **joinitems; int lev; RelOptInfo *rel; /* - * We employ a simple "dynamic programming" algorithm: we first find - * all ways to build joins of two jointree items, then all ways to - * build joins of three items (from two-item joins and single items), - * then four-item joins, and so on until we have considered all ways - * to join all the items into one rel. + * This function cannot be invoked recursively within any one planning + * problem, so join_rel_level[] can't be in use already. + */ + Assert(root->join_rel_level == NULL); + + /* + * We employ a simple "dynamic programming" algorithm: we first find all + * ways to build joins of two jointree items, then all ways to build joins + * of three items (from two-item joins and single items), then four-item + * joins, and so on until we have considered all ways to join all the + * items into one rel. * - * joinitems[j] is a list of all the j-item rels. Initially we set - * joinitems[1] to represent all the single-jointree-item relations. + * root->join_rel_level[j] is a list of all the j-item rels. Initially we + * set root->join_rel_level[1] to represent all the single-jointree-item + * relations. */ - joinitems = (List **) palloc0((levels_needed + 1) * sizeof(List *)); + root->join_rel_level = (List **) palloc0((levels_needed + 1) * sizeof(List *)); - joinitems[1] = initial_rels; + root->join_rel_level[1] = initial_rels; for (lev = 2; lev <= levels_needed; lev++) { - ListCell *x; + ListCell *lc; /* * Determine all possible pairs of relations to be joined at this * level, and build paths for making each one from every available * pair of lower-level relations. */ - joinitems[lev] = make_rels_by_joins(root, lev, joinitems); + join_search_one_level(root, lev); /* * Do cleanup work on each just-processed rel. */ - foreach(x, joinitems[lev]) + foreach(lc, root->join_rel_level[lev]) { - rel = (RelOptInfo *) lfirst(x); + rel = (RelOptInfo *) lfirst(lc); /* Find and save the cheapest paths for this rel */ set_cheapest(rel); @@ -646,11 +1523,13 @@ make_one_rel_by_joins(PlannerInfo *root, int levels_needed, List *initial_rels) /* * We should have a single rel at the final level. */ - if (joinitems[levels_needed] == NIL) + if (root->join_rel_level[levels_needed] == NIL) elog(ERROR, "failed to build any %d-way joins", levels_needed); - Assert(list_length(joinitems[levels_needed]) == 1); + Assert(list_length(root->join_rel_level[levels_needed]) == 1); + + rel = (RelOptInfo *) linitial(root->join_rel_level[levels_needed]); - rel = (RelOptInfo *) linitial(joinitems[levels_needed]); + root->join_rel_level = NULL; return rel; } @@ -671,10 +1550,13 @@ make_one_rel_by_joins(PlannerInfo *root, int levels_needed, List *initial_rels) * 1. If the subquery has a LIMIT clause, we must not push down any quals, * since that could change the set of rows returned. * - * 2. If the subquery contains EXCEPT or EXCEPT ALL set ops we cannot push - * quals into it, because that would change the results. + * 2. If the subquery contains any window functions, we can't push quals + * into it, because that could change the results. * - * 3. For subqueries using UNION/UNION ALL/INTERSECT/INTERSECT ALL, we can + * 3. If the subquery contains EXCEPT or EXCEPT ALL set ops we cannot push + * quals into it, because that could change the results. + * + * 4. For subqueries using UNION/UNION ALL/INTERSECT/INTERSECT ALL, we can * push quals into each component query, but the quals can only reference * subquery columns that suffer no type coercions in the set operation. * Otherwise there are possible semantic gotchas. So, we check the @@ -692,6 +1574,10 @@ subquery_is_pushdown_safe(Query *subquery, Query *topquery, if (subquery->limitOffset != NULL || subquery->limitCount != NULL) return false; + /* Check point 2 */ + if (subquery->hasWindowFuncs) + return false; + /* Are we at top level, or looking at a setop component? */ if (subquery == topquery) { @@ -757,6 +1643,10 @@ recurse_pushdown_safe(Node *setOp, Query *topquery, * Compare tlist's datatypes against the list of set-operation result types. * For any items that are different, mark the appropriate element of * differentTypes[] to show that this column will have type conversions. + * + * We don't have to care about typmods here: the only allowed difference + * between set-op input and output typmods is input is a specific typmod + * and output is -1, and that does not require a coercion. */ static void compare_tlist_datatypes(List *tlist, List *colTypes, @@ -793,21 +1683,29 @@ compare_tlist_datatypes(List *tlist, List *colTypes, * it will work correctly: sublinks will already have been transformed into * subplans in the qual, but not in the subquery). * - * 2. The qual must not refer to any subquery output columns that were + * 2. The qual must not refer to the whole-row output of the subquery + * (since there is no easy way to name that within the subquery itself). + * + * 3. The qual must not refer to any subquery output columns that were * found to have inconsistent types across a set operation tree by * subquery_is_pushdown_safe(). * - * 3. If the subquery uses DISTINCT ON, we must not push down any quals that + * 4. If the subquery uses DISTINCT ON, we must not push down any quals that * refer to non-DISTINCT output columns, because that could change the set - * of rows returned. This condition is vacuous for DISTINCT, because then - * there are no non-DISTINCT output columns, but unfortunately it's fairly - * expensive to tell the difference between DISTINCT and DISTINCT ON in the - * parsetree representation. It's cheaper to just make sure all the Vars - * in the qual refer to DISTINCT columns. + * of rows returned. (This condition is vacuous for DISTINCT, because then + * there are no non-DISTINCT output columns, so we needn't check. But note + * we are assuming that the qual can't distinguish values that the DISTINCT + * operator sees as equal. This is a bit shaky but we have no way to test + * for the case, and it's unlikely enough that we shouldn't refuse the + * optimization just because it could theoretically happen.) * - * 4. We must not push down any quals that refer to subselect outputs that + * 5. We must not push down any quals that refer to subselect outputs that * return sets, else we'd introduce functions-returning-sets into the * subquery's WHERE/HAVING quals. + * + * 6. We must not push down any quals that refer to subselect outputs that + * contain volatile functions, for fear of introducing strange results due + * to multiple evaluation of a volatile function. */ static bool qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual, @@ -823,27 +1721,56 @@ qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual, return false; /* - * Examine all Vars used in clause; since it's a restriction clause, - * all such Vars must refer to subselect output columns. + * It would be unsafe to push down window function calls, but at least for + * the moment we could never see any in a qual anyhow. (The same applies + * to aggregates, which we check for in pull_var_clause below.) + */ + Assert(!contain_window_function(qual)); + + /* + * Examine all Vars used in clause; since it's a restriction clause, all + * such Vars must refer to subselect output columns. */ - vars = pull_var_clause(qual, false); + vars = pull_var_clause(qual, + PVC_REJECT_AGGREGATES, + PVC_INCLUDE_PLACEHOLDERS); foreach(vl, vars) { Var *var = (Var *) lfirst(vl); TargetEntry *tle; + /* + * XXX Punt if we find any PlaceHolderVars in the restriction clause. + * It's not clear whether a PHV could safely be pushed down, and even + * less clear whether such a situation could arise in any cases of + * practical interest anyway. So for the moment, just refuse to push + * down. + */ + if (!IsA(var, Var)) + { + safe = false; + break; + } + Assert(var->varno == rti); + /* Check point 2 */ + if (var->varattno == 0) + { + safe = false; + break; + } + /* - * We use a bitmapset to avoid testing the same attno more than - * once. (NB: this only works because subquery outputs can't have - * negative attnos.) + * We use a bitmapset to avoid testing the same attno more than once. + * (NB: this only works because subquery outputs can't have negative + * attnos.) */ if (bms_is_member(var->varattno, tested)) continue; tested = bms_add_member(tested, var->varattno); - /* Check point 2 */ + /* Check point 3 */ if (differentTypes[var->varattno]) { safe = false; @@ -855,21 +1782,28 @@ qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual, Assert(tle != NULL); Assert(!tle->resjunk); - /* If subquery uses DISTINCT or DISTINCT ON, check point 3 */ - if (subquery->distinctClause != NIL && - !targetIsInSortList(tle, subquery->distinctClause)) + /* If subquery uses DISTINCT ON, check point 4 */ + if (subquery->hasDistinctOn && + !targetIsInSortList(tle, InvalidOid, subquery->distinctClause)) { /* non-DISTINCT column, so fail */ safe = false; break; } - /* Refuse functions returning sets (point 4) */ + /* Refuse functions returning sets (point 5) */ if (expression_returns_set((Node *) tle->expr)) { safe = false; break; } + + /* Refuse volatile functions (point 6) */ + if (contain_volatile_functions((Node *) tle->expr)) + { + safe = false; + break; + } } list_free(vars); @@ -893,23 +1827,23 @@ subquery_push_qual(Query *subquery, RangeTblEntry *rte, Index rti, Node *qual) else { /* - * We need to replace Vars in the qual (which must refer to - * outputs of the subquery) with copies of the subquery's - * targetlist expressions. Note that at this point, any uplevel - * Vars in the qual should have been replaced with Params, so they - * need no work. + * We need to replace Vars in the qual (which must refer to outputs of + * the subquery) with copies of the subquery's targetlist expressions. + * Note that at this point, any uplevel Vars in the qual should have + * been replaced with Params, so they need no work. * * This step also ensures that when we are pushing into a setop tree, * each component query gets its own copy of the qual. */ qual = ResolveNew(qual, rti, 0, rte, subquery->targetList, - CMD_SELECT, 0); + CMD_SELECT, 0, + &subquery->hasSubLinks); /* - * Now attach the qual to the proper place: normally WHERE, but - * if the subquery uses grouping or aggregation, put it in HAVING - * (since the qual really refers to the group-result rows). + * Now attach the qual to the proper place: normally WHERE, but if the + * subquery uses grouping or aggregation, put it in HAVING (since the + * qual really refers to the group-result rows). */ if (subquery->hasAggs || subquery->groupClause || subquery->havingQual) subquery->havingQual = make_and_qual(subquery->havingQual, qual); @@ -919,8 +1853,8 @@ subquery_push_qual(Query *subquery, RangeTblEntry *rte, Index rti, Node *qual) /* * We need not change the subquery's hasAggs or hasSublinks flags, - * since we can't be pushing down any aggregates that weren't - * there before, and we don't push down subselects at all. + * since we can't be pushing down any aggregates that weren't there + * before, and we don't push down subselects at all. */ } } @@ -1022,12 +1956,17 @@ print_path(PlannerInfo *root, Path *path, int indent) case T_TidPath: ptype = "TidScan"; break; + case T_ForeignPath: + ptype = "ForeignScan"; + break; case T_AppendPath: ptype = "Append"; break; + case T_MergeAppendPath: + ptype = "MergeAppend"; + break; case T_ResultPath: ptype = "Result"; - subpath = ((ResultPath *) path)->subpath; break; case T_MaterialPath: ptype = "Material"; @@ -1088,14 +2027,12 @@ print_path(PlannerInfo *root, Path *path, int indent) { MergePath *mp = (MergePath *) path; - if (mp->outersortkeys || mp->innersortkeys) - { - for (i = 0; i < indent; i++) - printf("\t"); - printf(" sortouter=%d sortinner=%d\n", - ((mp->outersortkeys) ? 1 : 0), - ((mp->innersortkeys) ? 1 : 0)); - } + for (i = 0; i < indent; i++) + printf("\t"); + printf(" sortouter=%d sortinner=%d materializeinner=%d\n", + ((mp->outersortkeys) ? 1 : 0), + ((mp->innersortkeys) ? 1 : 0), + ((mp->materialize_inner) ? 1 : 0)); } print_path(root, jp->outerjoinpath, indent + 1); @@ -1132,10 +2069,16 @@ debug_print_rel(PlannerInfo *root, RelOptInfo *rel) printf("\tpath list:\n"); foreach(l, rel->pathlist) print_path(root, lfirst(l), 1); - printf("\n\tcheapest startup path:\n"); - print_path(root, rel->cheapest_startup_path, 1); - printf("\n\tcheapest total path:\n"); - print_path(root, rel->cheapest_total_path, 1); + if (rel->cheapest_startup_path) + { + printf("\n\tcheapest startup path:\n"); + print_path(root, rel->cheapest_startup_path, 1); + } + if (rel->cheapest_total_path) + { + printf("\n\tcheapest total path:\n"); + print_path(root, rel->cheapest_total_path, 1); + } printf("\n"); fflush(stdout); }