Add a concept of "placeholder" variables to the planner. These are variables

[postgresql] / src / backend / optimizer / path / allpaths.c
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c

index ad55360a85337ec397009ee3576c7a1f43344d83..7d6a3b8d6b58a1c053fb376516f68d4d31d3799a 100644 (file)
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -3,19 +3,21 @@
   * allpaths.c
   *       Routines to find possible search paths for processing a query
   *
- * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.147 2006/07/01 18:38:32 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.175 2008/10/21 20:42:52 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
  
  #include "postgres.h"
  
-#include "nodes/makefuncs.h"
+#include <math.h>
+
+#include "nodes/nodeFuncs.h"
  #ifdef OPTIMIZER_DEBUG
  #include "nodes/print.h"
  #endif
@@ -28,9 +30,8 @@
  #include "optimizer/planner.h"
  #include "optimizer/prep.h"
  #include "optimizer/var.h"
-#include "parser/parsetree.h"
  #include "parser/parse_clause.h"
-#include "parser/parse_expr.h"
+#include "parser/parsetree.h"
  #include "rewrite/rewriteManip.h"
  
  
@@ -38,20 +39,29 @@
  bool           enable_geqo = false;    /* just in case GUC doesn't set it */
  int                    geqo_threshold;
  
+/* Hook for plugins to replace standard_join_search() */
+join_search_hook_type join_search_hook = NULL;
+
  
  static void set_base_rel_pathlists(PlannerInfo *root);
-static void set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, Index rti);
+static void set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
+                                Index rti, RangeTblEntry *rte);
  static void set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
                                            RangeTblEntry *rte);
  static void set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
-                                                                       Index rti, RangeTblEntry *rte);
+                                               Index rti, RangeTblEntry *rte);
+static void set_dummy_rel_pathlist(RelOptInfo *rel);
  static void set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
                                           Index rti, RangeTblEntry *rte);
  static void set_function_pathlist(PlannerInfo *root, RelOptInfo *rel,
                                           RangeTblEntry *rte);
+static void set_values_pathlist(PlannerInfo *root, RelOptInfo *rel,
+                                       RangeTblEntry *rte);
+static void set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel,
+                                                        RangeTblEntry *rte);
+static void set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel,
+                                                                  RangeTblEntry *rte);
  static RelOptInfo *make_rel_from_joinlist(PlannerInfo *root, List *joinlist);
-static RelOptInfo *make_one_rel_by_joins(PlannerInfo *root, int levels_needed,
-                                         List *initial_rels);
  static bool subquery_is_pushdown_safe(Query *subquery, Query *topquery,
                                                   bool *differentTypes);
  static bool recurse_pushdown_safe(Node *setOp, Query *topquery,
@@ -143,7 +153,7 @@ set_base_rel_pathlists(PlannerInfo *root)
                 if (rel->reloptkind != RELOPT_BASEREL)
                         continue;
  
-               set_rel_pathlist(root, rel, rti);
+               set_rel_pathlist(root, rel, rti, root->simple_rte_array[rti]);
         }
  }
  
@@ -152,10 +162,9 @@ set_base_rel_pathlists(PlannerInfo *root)
   *       Build access paths for a base relation
   */
  static void
-set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, Index rti)
+set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
+                                Index rti, RangeTblEntry *rte)
  {
-       RangeTblEntry *rte = rt_fetch(rti, root->parse->rtable);
-
         if (rte->inh)
         {
                 /* It's an "append relation", process accordingly */
@@ -168,9 +177,22 @@ set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, Index rti)
         }
         else if (rel->rtekind == RTE_FUNCTION)
         {
-               /* RangeFunction --- generate a separate plan for it */
+               /* RangeFunction --- generate a suitable path for it */
                 set_function_pathlist(root, rel, rte);
         }
+       else if (rel->rtekind == RTE_VALUES)
+       {
+               /* Values list --- generate a suitable path for it */
+               set_values_pathlist(root, rel, rte);
+       }
+       else if (rel->rtekind == RTE_CTE)
+       {
+               /* CTE reference --- generate a suitable path for it */
+               if (rte->self_reference)
+                       set_worktable_pathlist(root, rel, rte);
+               else
+                       set_cte_pathlist(root, rel, rte);
+       }
         else
         {
                 /* Plain relation */
@@ -190,6 +212,19 @@ set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, Index rti)
  static void
  set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
  {
+       /*
+        * If we can prove we don't need to scan the rel via constraint exclusion,
+        * set up a single dummy path for it.  We only need to check for regular
+        * baserels; if it's an otherrel, CE was already checked in
+        * set_append_rel_pathlist().
+        */
+       if (rel->reloptkind == RELOPT_BASEREL &&
+               relation_excluded_by_constraints(root, rel, rte))
+       {
+               set_dummy_rel_pathlist(rel);
+               return;
+       }
+
         /* Mark rel with estimated output rows, width, etc */
         set_baserel_size_estimates(root, rel);
  
@@ -204,24 +239,6 @@ set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
         if (create_or_index_quals(root, rel))
                 set_baserel_size_estimates(root, rel);
  
-       /*
-        * If we can prove we don't need to scan the rel via constraint exclusion,
-        * set up a single dummy path for it.  (Rather than inventing a special
-        * "dummy" path type, we represent this as an AppendPath with no members.)
-        */
-       if (relation_excluded_by_constraints(rel, rte))
-       {
-               /* Reset output-rows estimate to 0 */
-               rel->rows = 0;
-
-               add_path(rel, (Path *) create_append_path(rel, NIL));
-
-               /* Select cheapest path (pretty easy in this case...) */
-               set_cheapest(rel);
-
-               return;
-       }
-
         /*
          * Generate paths and add them to the rel's pathlist.
          *
@@ -247,7 +264,7 @@ set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
   * set_append_rel_pathlist
   *       Build access paths for an "append relation"
   *
- * The passed-in rel and RTE represent the entire append relation.  The
+ * The passed-in rel and RTE represent the entire append relation.     The
   * relation's contents are computed by appending together the output of
   * the individual member relations.  Note that in the inheritance case,
   * the first member relation is actually the same table as is mentioned in
@@ -260,13 +277,17 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
  {
         int                     parentRTindex = rti;
         List       *subpaths = NIL;
+       double          parent_rows;
+       double          parent_size;
+       double     *parent_attrsizes;
+       int                     nattrs;
         ListCell   *l;
  
         /*
          * XXX for now, can't handle inherited expansion of FOR UPDATE/SHARE; can
          * we do better?  (This will take some redesign because the executor
-        * currently supposes that every rowMark relation is involved in every
-        * row returned by the query.)
+        * currently supposes that every rowMark relation is involved in every row
+        * returned by the query.)
          */
         if (get_rowmark(root->parse, parentRTindex))
                 ereport(ERROR,
@@ -274,17 +295,23 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
                                  errmsg("SELECT FOR UPDATE/SHARE is not supported for inheritance queries")));
  
         /*
-        * We might have looked up indexes for the parent rel, but they're
-        * really not relevant to the appendrel.  Reset the pointer to avoid
-        * any confusion.
-        */
-       rel->indexlist = NIL;
-
-       /*
-        * Initialize to compute size estimates for whole append relation
+        * Initialize to compute size estimates for whole append relation.
+        *
+        * We handle width estimates by weighting the widths of different
+        * child rels proportionally to their number of rows.  This is sensible
+        * because the use of width estimates is mainly to compute the total
+        * relation "footprint" if we have to sort or hash it.  To do this,
+        * we sum the total equivalent size (in "double" arithmetic) and then
+        * divide by the total rowcount estimate.  This is done separately for
+        * the total rel width and each attribute.
+        *
+        * Note: if you consider changing this logic, beware that child rels could
+        * have zero rows and/or width, if they were excluded by constraints.
          */
-       rel->rows = 0;
-       rel->width = 0;
+       parent_rows = 0;
+       parent_size = 0;
+       nattrs = rel->max_attr - rel->min_attr + 1;
+       parent_attrsizes = (double *) palloc0(nattrs * sizeof(double));
  
         /*
          * Generate access paths for each member relation, and pick the cheapest
@@ -294,6 +321,7 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
         {
                 AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l);
                 int                     childRTindex;
+               RangeTblEntry *childRTE;
                 RelOptInfo *childrel;
                 Path       *childpath;
                 ListCell   *parentvars;
@@ -304,27 +332,54 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
                         continue;
  
                 childRTindex = appinfo->child_relid;
+               childRTE = root->simple_rte_array[childRTindex];
  
                 /*
-                * Make a RelOptInfo for the child so we can do planning. Mark it as
-                * an "other rel" since it will not be part of the main join tree.
+                * The child rel's RelOptInfo was already created during
+                * add_base_rels_to_query.
                  */
-               childrel = build_simple_rel(root, childRTindex,
-                                                                       RELOPT_OTHER_MEMBER_REL);
+               childrel = find_base_rel(root, childRTindex);
+               Assert(childrel->reloptkind == RELOPT_OTHER_MEMBER_REL);
  
                 /*
-                * Copy the parent's targetlist and quals to the child, with
-                * appropriate substitution of variables.
+                * We have to copy the parent's targetlist and quals to the child,
+                * with appropriate substitution of variables.  However, only the
+                * baserestrictinfo quals are needed before we can check for
+                * constraint exclusion; so do that first and then check to see if we
+                * can disregard this child.
                  */
-               childrel->reltargetlist = (List *)
-                       adjust_appendrel_attrs((Node *) rel->reltargetlist,
-                                                                  appinfo);
                 childrel->baserestrictinfo = (List *)
                         adjust_appendrel_attrs((Node *) rel->baserestrictinfo,
                                                                    appinfo);
+
+               if (relation_excluded_by_constraints(root, childrel, childRTE))
+               {
+                       /*
+                        * This child need not be scanned, so we can omit it from the
+                        * appendrel.  Mark it with a dummy cheapest-path though, in case
+                        * best_appendrel_indexscan() looks at it later.
+                        */
+                       set_dummy_rel_pathlist(childrel);
+                       continue;
+               }
+
+               /* CE failed, so finish copying targetlist and join quals */
                 childrel->joininfo = (List *)
                         adjust_appendrel_attrs((Node *) rel->joininfo,
                                                                    appinfo);
+               childrel->reltargetlist = (List *)
+                       adjust_appendrel_attrs((Node *) rel->reltargetlist,
+                                                                  appinfo);
+
+               /*
+                * We have to make child entries in the EquivalenceClass data
+                * structures as well.
+                */
+               if (rel->has_eclass_joins)
+               {
+                       add_child_rel_equivalences(root, appinfo, rel, childrel);
+                       childrel->has_eclass_joins = true;
+               }
  
                 /*
                  * Copy the parent's attr_needed data as well, with appropriate
@@ -337,18 +392,15 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
                                                                                  childrel->max_attr);
  
                 /*
-                * Compute the child's access paths, and add the cheapest one
-                * to the Append path we are constructing for the parent.
+                * Compute the child's access paths, and add the cheapest one to the
+                * Append path we are constructing for the parent.
                  *
-                * It's possible that the child is itself an appendrel, in which
-                * case we can "cut out the middleman" and just add its child
-                * paths to our own list.  (We don't try to do this earlier because
-                * we need to apply both levels of transformation to the quals.)
-                * This test also handles the case where the child rel need not
-                * be scanned because of constraint exclusion: it'll have an
-                * Append path with no subpaths, and will vanish from our list.
+                * It's possible that the child is itself an appendrel, in which case
+                * we can "cut out the middleman" and just add its child paths to our
+                * own list.  (We don't try to do this earlier because we need to
+                * apply both levels of transformation to the quals.)
                  */
-               set_rel_pathlist(root, childrel, childRTindex);
+               set_rel_pathlist(root, childrel, childRTindex, childRTE);
  
                 childpath = childrel->cheapest_total_path;
                 if (IsA(childpath, AppendPath))
@@ -358,32 +410,58 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
                         subpaths = lappend(subpaths, childpath);
  
                 /*
-                * Propagate size information from the child back to the parent. For
-                * simplicity, we use the largest widths from any child as the parent
-                * estimates.
+                * Accumulate size information from each child.
                  */
-               rel->rows += childrel->rows;
-               if (childrel->width > rel->width)
-                       rel->width = childrel->width;
-
-               forboth(parentvars, rel->reltargetlist,
-                               childvars, childrel->reltargetlist)
+               if (childrel->rows > 0)
                 {
-                       Var                *parentvar = (Var *) lfirst(parentvars);
-                       Var                *childvar = (Var *) lfirst(childvars);
+                       parent_rows += childrel->rows;
+                       parent_size += childrel->width * childrel->rows;
  
-                       if (IsA(parentvar, Var) &&
-                               IsA(childvar, Var))
+                       forboth(parentvars, rel->reltargetlist,
+                                       childvars, childrel->reltargetlist)
                         {
-                               int                     pndx = parentvar->varattno - rel->min_attr;
-                               int                     cndx = childvar->varattno - childrel->min_attr;
-
-                               if (childrel->attr_widths[cndx] > rel->attr_widths[pndx])
-                                       rel->attr_widths[pndx] = childrel->attr_widths[cndx];
+                               Var                *parentvar = (Var *) lfirst(parentvars);
+                               Var                *childvar = (Var *) lfirst(childvars);
+
+                               /*
+                                * Accumulate per-column estimates too.  Whole-row Vars and
+                                * PlaceHolderVars can be ignored here.
+                                */
+                               if (IsA(parentvar, Var) &&
+                                       IsA(childvar, Var))
+                               {
+                                       int                     pndx = parentvar->varattno - rel->min_attr;
+                                       int                     cndx = childvar->varattno - childrel->min_attr;
+
+                                       parent_attrsizes[pndx] += childrel->attr_widths[cndx] * childrel->rows;
+                               }
                         }
                 }
         }
  
+       /*
+        * Save the finished size estimates.
+        */
+       rel->rows = parent_rows;
+       if (parent_rows > 0)
+       {
+               int             i;
+
+               rel->width = rint(parent_size / parent_rows);
+               for (i = 0; i < nattrs; i++)
+                       rel->attr_widths[i] = rint(parent_attrsizes[i] / parent_rows);
+       }
+       else
+               rel->width = 0;                 /* attr_widths should be zero already */
+
+       /*
+        * Set "raw tuples" count equal to "rows" for the appendrel; needed
+        * because some places assume rel->tuples is valid for any baserel.
+        */
+       rel->tuples = parent_rows;
+
+       pfree(parent_attrsizes);
+
         /*
          * Finally, build Append path and install it as the only access path for
          * the parent rel.      (Note: this is correct even if we have zero or one
@@ -395,6 +473,26 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
         set_cheapest(rel);
  }
  
+/*
+ * set_dummy_rel_pathlist
+ *       Build a dummy path for a relation that's been excluded by constraints
+ *
+ * Rather than inventing a special "dummy" path type, we represent this as an
+ * AppendPath with no members (see also IS_DUMMY_PATH macro).
+ */
+static void
+set_dummy_rel_pathlist(RelOptInfo *rel)
+{
+       /* Set dummy size estimates --- we leave attr_widths[] as zeroes */
+       rel->rows = 0;
+       rel->width = 0;
+
+       add_path(rel, (Path *) create_append_path(rel, NIL));
+
+       /* Select cheapest path (pretty easy in this case...) */
+       set_cheapest(rel);
+}
+
  /* quick-and-dirty test to see if any joining is needed */
  static bool
  has_multiple_baserels(PlannerInfo *root)
@@ -429,8 +527,8 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
         Query      *subquery = rte->subquery;
         bool       *differentTypes;
         double          tuple_fraction;
+       PlannerInfo *subroot;
         List       *pathkeys;
-       List       *subquery_pathkeys;
  
         /* We need a workspace for keeping track of set-op type coercions */
         differentTypes = (bool *)
@@ -502,8 +600,11 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
                 tuple_fraction = root->tuple_fraction;
  
         /* Generate the plan for the subquery */
-       rel->subplan = subquery_planner(subquery, tuple_fraction,
-                                                                       &subquery_pathkeys);
+       rel->subplan = subquery_planner(root->glob, subquery,
+                                                                       root,
+                                                                       false, tuple_fraction,
+                                                                       &subroot);
+       rel->subrtable = subroot->parse->rtable;
  
         /* Copy number of output rows from subplan */
         rel->tuples = rel->subplan->plan_rows;
@@ -512,7 +613,7 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
         set_baserel_size_estimates(root, rel);
  
         /* Convert subquery pathkeys to outer representation */
-       pathkeys = convert_subquery_pathkeys(root, rel, subquery_pathkeys);
+       pathkeys = convert_subquery_pathkeys(root, rel, subroot->query_pathkeys);
  
         /* Generate appropriate path */
         add_path(rel, create_subqueryscan_path(rel, pathkeys));
@@ -538,6 +639,121 @@ set_function_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
         set_cheapest(rel);
  }
  
+/*
+ * set_values_pathlist
+ *             Build the (single) access path for a VALUES RTE
+ */
+static void
+set_values_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
+{
+       /* Mark rel with estimated output rows, width, etc */
+       set_values_size_estimates(root, rel);
+
+       /* Generate appropriate path */
+       add_path(rel, create_valuesscan_path(root, rel));
+
+       /* Select cheapest path (pretty easy in this case...) */
+       set_cheapest(rel);
+}
+
+/*
+ * set_cte_pathlist
+ *             Build the (single) access path for a non-self-reference CTE RTE
+ */
+static void
+set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
+{
+       Plan       *cteplan;
+       PlannerInfo *cteroot;
+       Index           levelsup;
+       int                     ndx;
+       ListCell   *lc;
+       int                     plan_id;
+
+       /*
+        * Find the referenced CTE, and locate the plan previously made for it.
+        */
+       levelsup = rte->ctelevelsup;
+       cteroot = root;
+       while (levelsup-- > 0)
+       {
+               cteroot = cteroot->parent_root;
+               if (!cteroot)                   /* shouldn't happen */
+                       elog(ERROR, "bad levelsup for CTE \"%s\"", rte->ctename);
+       }
+       /*
+        * Note: cte_plan_ids can be shorter than cteList, if we are still working
+        * on planning the CTEs (ie, this is a side-reference from another CTE).
+        * So we mustn't use forboth here.
+        */
+       ndx = 0;
+       foreach(lc, cteroot->parse->cteList)
+       {
+               CommonTableExpr *cte = (CommonTableExpr *) lfirst(lc);
+
+               if (strcmp(cte->ctename, rte->ctename) == 0)
+                       break;
+               ndx++;
+       }
+       if (lc == NULL)                         /* shouldn't happen */
+               elog(ERROR, "could not find CTE \"%s\"", rte->ctename);
+       if (ndx >= list_length(cteroot->cte_plan_ids))
+               elog(ERROR, "could not find plan for CTE \"%s\"", rte->ctename);
+       plan_id = list_nth_int(cteroot->cte_plan_ids, ndx);
+       Assert(plan_id > 0);
+       cteplan = (Plan *) list_nth(root->glob->subplans, plan_id - 1);
+
+       /* Mark rel with estimated output rows, width, etc */
+       set_cte_size_estimates(root, rel, cteplan);
+
+       /* Generate appropriate path */
+       add_path(rel, create_ctescan_path(root, rel));
+
+       /* Select cheapest path (pretty easy in this case...) */
+       set_cheapest(rel);
+}
+
+/*
+ * set_worktable_pathlist
+ *             Build the (single) access path for a self-reference CTE RTE
+ */
+static void
+set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
+{
+       Plan       *cteplan;
+       PlannerInfo *cteroot;
+       Index           levelsup;
+
+       /*
+        * We need to find the non-recursive term's plan, which is in the plan
+        * level that's processing the recursive UNION, which is one level
+        * *below* where the CTE comes from.
+        */
+       levelsup = rte->ctelevelsup;
+       if (levelsup == 0)                      /* shouldn't happen */
+               elog(ERROR, "bad levelsup for CTE \"%s\"", rte->ctename);
+       levelsup--;
+       cteroot = root;
+       while (levelsup-- > 0)
+       {
+               cteroot = cteroot->parent_root;
+               if (!cteroot)                   /* shouldn't happen */
+                       elog(ERROR, "bad levelsup for CTE \"%s\"", rte->ctename);
+       }
+       cteplan = cteroot->non_recursive_plan;
+       if (!cteplan)                           /* shouldn't happen */
+               elog(ERROR, "could not find plan for CTE \"%s\"", rte->ctename);
+
+       /* Mark rel with estimated output rows, width, etc */
+       set_cte_size_estimates(root, rel, cteplan);
+
+       /* Generate appropriate path */
+       add_path(rel, create_worktablescan_path(root, rel));
+
+       /* Select cheapest path (pretty easy in this case...) */
+       set_cheapest(rel);
+}
+
  /*
   * make_rel_from_joinlist
   *       Build access paths using a "joinlist" to guide the join path search.
@@ -605,18 +821,25 @@ make_rel_from_joinlist(PlannerInfo *root, List *joinlist)
         {
                 /*
                  * Consider the different orders in which we could join the rels,
-                * using either GEQO or regular optimizer.
+                * using a plugin, GEQO, or the regular join search code.
+                *
+                * We put the initial_rels list into a PlannerInfo field because
+                * has_legal_joinclause() needs to look at it (ugly :-().
                  */
-               if (enable_geqo && levels_needed >= geqo_threshold)
+               root->initial_rels = initial_rels;
+
+               if (join_search_hook)
+                       return (*join_search_hook) (root, levels_needed, initial_rels);
+               else if (enable_geqo && levels_needed >= geqo_threshold)
                         return geqo(root, levels_needed, initial_rels);
                 else
-                       return make_one_rel_by_joins(root, levels_needed, initial_rels);
+                       return standard_join_search(root, levels_needed, initial_rels);
         }
  }
  
  /*
- * make_one_rel_by_joins
- *       Find all possible joinpaths for a query by successively finding ways
+ * standard_join_search
+ *       Find possible joinpaths for a query by successively finding ways
   *       to join component relations into join relations.
   *
   * 'levels_needed' is the number of iterations needed, ie, the number of
@@ -624,12 +847,27 @@ make_rel_from_joinlist(PlannerInfo *root, List *joinlist)
   *
   * 'initial_rels' is a list of RelOptInfo nodes for each independent
   *             jointree item.  These are the components to be joined together.
+ *             Note that levels_needed == list_length(initial_rels).
   *
   * Returns the final level of join relations, i.e., the relation that is
   * the result of joining all the original relations together.
+ * At least one implementation path must be provided for this relation and
+ * all required sub-relations.
+ *
+ * To support loadable plugins that modify planner behavior by changing the
+ * join searching algorithm, we provide a hook variable that lets a plugin
+ * replace or supplement this function.  Any such hook must return the same
+ * final join relation as the standard code would, but it might have a
+ * different set of implementation paths attached, and only the sub-joinrels
+ * needed for these paths need have been instantiated.
+ *
+ * Note to plugin authors: the functions invoked during standard_join_search()
+ * modify root->join_rel_list and root->join_rel_hash. If you want to do more
+ * than one join-order search, you'll probably need to save and restore the
+ * original states of those data structures.  See geqo_eval() for an example.
   */
-static RelOptInfo *
-make_one_rel_by_joins(PlannerInfo *root, int levels_needed, List *initial_rels)
+RelOptInfo *
+standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels)
  {
         List      **joinitems;
         int                     lev;
@@ -658,7 +896,7 @@ make_one_rel_by_joins(PlannerInfo *root, int levels_needed, List *initial_rels)
                  * level, and build paths for making each one from every available
                  * pair of lower-level relations.
                  */
-               joinitems[lev] = make_rels_by_joins(root, lev, joinitems);
+               joinitems[lev] = join_search_one_level(root, lev, joinitems);
  
                 /*
                  * Do cleanup work on each just-processed rel.
@@ -790,6 +1028,10 @@ recurse_pushdown_safe(Node *setOp, Query *topquery,
   * Compare tlist's datatypes against the list of set-operation result types.
   * For any items that are different, mark the appropriate element of
   * differentTypes[] to show that this column will have type conversions.
+ *
+ * We don't have to care about typmods here: the only allowed difference
+ * between set-op input and output typmods is input is a specific typmod
+ * and output is -1, and that does not require a coercion.
   */
  static void
  compare_tlist_datatypes(List *tlist, List *colTypes,
@@ -835,15 +1077,20 @@ compare_tlist_datatypes(List *tlist, List *colTypes,
   *
   * 4. If the subquery uses DISTINCT ON, we must not push down any quals that
   * refer to non-DISTINCT output columns, because that could change the set
- * of rows returned.  This condition is vacuous for DISTINCT, because then
- * there are no non-DISTINCT output columns, but unfortunately it's fairly
- * expensive to tell the difference between DISTINCT and DISTINCT ON in the
- * parsetree representation.  It's cheaper to just make sure all the Vars
- * in the qual refer to DISTINCT columns.
+ * of rows returned.  (This condition is vacuous for DISTINCT, because then
+ * there are no non-DISTINCT output columns, so we needn't check.  But note
+ * we are assuming that the qual can't distinguish values that the DISTINCT
+ * operator sees as equal.  This is a bit shaky but we have no way to test
+ * for the case, and it's unlikely enough that we shouldn't refuse the
+ * optimization just because it could theoretically happen.)
   *
   * 5. We must not push down any quals that refer to subselect outputs that
   * return sets, else we'd introduce functions-returning-sets into the
   * subquery's WHERE/HAVING quals.
+ *
+ * 6. We must not push down any quals that refer to subselect outputs that
+ * contain volatile functions, for fear of introducing strange results due
+ * to multiple evaluation of a volatile function.
   */
  static bool
  qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
@@ -862,12 +1109,25 @@ qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
          * Examine all Vars used in clause; since it's a restriction clause, all
          * such Vars must refer to subselect output columns.
          */
-       vars = pull_var_clause(qual, false);
+       vars = pull_var_clause(qual, true);
         foreach(vl, vars)
         {
                 Var                *var = (Var *) lfirst(vl);
                 TargetEntry *tle;
  
+               /*
+                * XXX Punt if we find any PlaceHolderVars in the restriction clause.
+                * It's not clear whether a PHV could safely be pushed down, and even
+                * less clear whether such a situation could arise in any cases of
+                * practical interest anyway.  So for the moment, just refuse to push
+                * down.
+                */
+               if (!IsA(var, Var))
+               {
+                       safe = false;
+                       break;
+               }
+
                 Assert(var->varno == rti);
  
                 /* Check point 2 */
@@ -898,9 +1158,9 @@ qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
                 Assert(tle != NULL);
                 Assert(!tle->resjunk);
  
-               /* If subquery uses DISTINCT or DISTINCT ON, check point 4 */
-               if (subquery->distinctClause != NIL &&
-                       !targetIsInSortList(tle, subquery->distinctClause))
+               /* If subquery uses DISTINCT ON, check point 4 */
+               if (subquery->hasDistinctOn &&
+                       !targetIsInSortList(tle, InvalidOid, subquery->distinctClause))
                 {
                         /* non-DISTINCT column, so fail */
                         safe = false;
@@ -913,6 +1173,13 @@ qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
                         safe = false;
                         break;
                 }
+
+               /* Refuse volatile functions (point 6) */
+               if (contain_volatile_functions((Node *) tle->expr))
+               {
+                       safe = false;
+                       break;
+               }
         }
  
         list_free(vars);