Arrange to convert EXISTS subqueries that are equivalent to hashable IN

author Tom Lane <tgl@sss.pgh.pa.us>

Fri, 22 Aug 2008 00:16:04 +0000 (00:16 +0000)

committer Tom Lane <tgl@sss.pgh.pa.us>

Fri, 22 Aug 2008 00:16:04 +0000 (00:16 +0000)
author Tom Lane <tgl@sss.pgh.pa.us>
Fri, 22 Aug 2008 00:16:04 +0000 (00:16 +0000)
committer Tom Lane <tgl@sss.pgh.pa.us>
Fri, 22 Aug 2008 00:16:04 +0000 (00:16 +0000)
diff --git a/src/backend/catalog/dependency.c b/src/backend/catalog/dependency.c

index 5f5c91366f7d5d32df4f6d7ccdb968679b79e44a..0c85ac785fdf67d361d134371f01b26e2401eda0 100644 (file)
--- a/src/backend/catalog/dependency.c
+++ b/src/backend/catalog/dependency.c
@@ -8,7 +8,7 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/catalog/dependency.c,v 1.78 2008/08/07 01:11:46 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/catalog/dependency.c,v 1.79 2008/08/22 00:16:03 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -1464,7 +1464,7 @@ find_expr_references_walker(Node *node,
                                                    context->addrs);
                 /* fall through to examine arguments */
         }
-       else if (is_subplan(node))
+       else if (IsA(node, SubPlan))
         {
                 /* Extra work needed here if we ever need this case */
                 elog(ERROR, "already-planned subqueries not supported");
diff --git a/src/backend/executor/execQual.c b/src/backend/executor/execQual.c

index 7761485d7c1193acfaf1a68d4328435d1ce9837a..57b2df9c00ec1686434141c08cbd0f3ce112bdc6 100644 (file)
--- a/src/backend/executor/execQual.c
+++ b/src/backend/executor/execQual.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/executor/execQual.c,v 1.231 2008/05/15 00:17:39 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/executor/execQual.c,v 1.232 2008/08/22 00:16:03 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -3957,6 +3957,19 @@ ExecInitExpr(Expr *node, PlanState *parent)
                                 state = (ExprState *) sstate;
                         }
                         break;
+               case T_AlternativeSubPlan:
+                       {
+                               AlternativeSubPlan *asplan = (AlternativeSubPlan *) node;
+                               AlternativeSubPlanState *asstate;
+
+                               if (!parent)
+                                       elog(ERROR, "AlternativeSubPlan found with no parent plan");
+
+                               asstate = ExecInitAlternativeSubPlan(asplan, parent);
+
+                               state = (ExprState *) asstate;
+                       }
+                       break;
                 case T_FieldSelect:
                         {
                                 FieldSelect *fselect = (FieldSelect *) node;
diff --git a/src/backend/executor/nodeSubplan.c b/src/backend/executor/nodeSubplan.c

index 0c9ec82b01d45ebff1019b30217dbb9070474a8b..3ebb045f922190a362f1830a935105ef3a06b840 100644 (file)
--- a/src/backend/executor/nodeSubplan.c
+++ b/src/backend/executor/nodeSubplan.c
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/executor/nodeSubplan.c,v 1.93 2008/05/12 00:00:49 alvherre Exp $
+ *       $PostgreSQL: pgsql/src/backend/executor/nodeSubplan.c,v 1.94 2008/08/22 00:16:03 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -29,6 +29,14 @@
  #include "utils/memutils.h"
  
  
+static Datum ExecSubPlan(SubPlanState *node,
+                       ExprContext *econtext,
+                       bool *isNull,
+                       ExprDoneCond *isDone);
+static Datum ExecAlternativeSubPlan(AlternativeSubPlanState *node,
+                       ExprContext *econtext,
+                       bool *isNull,
+                       ExprDoneCond *isDone);
  static Datum ExecHashSubPlan(SubPlanState *node,
                                 ExprContext *econtext,
                                 bool *isNull);
@@ -45,7 +53,7 @@ static bool slotNoNulls(TupleTableSlot *slot);
   *             ExecSubPlan
   * ----------------------------------------------------------------
   */
-Datum
+static Datum
  ExecSubPlan(SubPlanState *node,
                         ExprContext *econtext,
                         bool *isNull,
@@ -1066,3 +1074,83 @@ ExecReScanSetParamPlan(SubPlanState *node, PlanState *parent)
                 parent->chgParam = bms_add_member(parent->chgParam, paramid);
         }
  }
+
+
+/*
+ * ExecInitAlternativeSubPlan
+ *
+ * Initialize for execution of one of a set of alternative subplans.
+ */
+AlternativeSubPlanState *
+ExecInitAlternativeSubPlan(AlternativeSubPlan *asplan, PlanState *parent)
+{
+       AlternativeSubPlanState *asstate = makeNode(AlternativeSubPlanState);
+       double          num_calls;
+       SubPlan    *subplan1;
+       SubPlan    *subplan2;
+       Cost            cost1;
+       Cost            cost2;
+
+       asstate->xprstate.evalfunc = (ExprStateEvalFunc) ExecAlternativeSubPlan;
+       asstate->xprstate.expr = (Expr *) asplan;
+
+       /*
+        * Initialize subplans.  (Can we get away with only initializing the
+        * one we're going to use?)
+        */
+       asstate->subplans = (List *) ExecInitExpr((Expr *) asplan->subplans,
+                                                                                         parent);
+
+       /*
+        * Select the one to be used.  For this, we need an estimate of the
+        * number of executions of the subplan.  We use the number of output
+        * rows expected from the parent plan node.  This is a good estimate
+        * if we are in the parent's targetlist, and an underestimate (but
+        * probably not by more than a factor of 2) if we are in the qual.
+        */
+       num_calls = parent->plan->plan_rows;
+
+       /*
+        * The planner saved enough info so that we don't have to work very hard
+        * to estimate the total cost, given the number-of-calls estimate.
+        */
+       Assert(list_length(asplan->subplans) == 2);
+       subplan1 = (SubPlan *) linitial(asplan->subplans);
+       subplan2 = (SubPlan *) lsecond(asplan->subplans);
+
+       cost1 = subplan1->startup_cost + num_calls * subplan1->per_call_cost;
+       cost2 = subplan2->startup_cost + num_calls * subplan2->per_call_cost;
+
+       if (cost1 < cost2)
+               asstate->active = 0;
+       else
+               asstate->active = 1;
+
+       return asstate;
+}
+
+/*
+ * ExecAlternativeSubPlan
+ *
+ * Execute one of a set of alternative subplans.
+ *
+ * Note: in future we might consider changing to different subplans on the
+ * fly, in case the original rowcount estimate turns out to be way off.
+ */
+static Datum
+ExecAlternativeSubPlan(AlternativeSubPlanState *node,
+                                          ExprContext *econtext,
+                                          bool *isNull,
+                                          ExprDoneCond *isDone)
+{
+       /* Just pass control to the active subplan */
+       SubPlanState   *activesp = (SubPlanState *) list_nth(node->subplans,
+                                                                                                                node->active);
+
+       Assert(IsA(activesp, SubPlanState));
+
+       return ExecSubPlan(activesp,
+                                          econtext,
+                                          isNull,
+                                          isDone);
+}
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c

index 83322320b091511d64c8ab5838a84ea45e38ec41..4a30a3d82287952e33f15dc36bfe168a18ba03f0 100644 (file)
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -15,7 +15,7 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.400 2008/08/14 18:47:58 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.401 2008/08/22 00:16:03 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -969,6 +969,21 @@ _copySubPlan(SubPlan *from)
         COPY_NODE_FIELD(setParam);
         COPY_NODE_FIELD(parParam);
         COPY_NODE_FIELD(args);
+       COPY_SCALAR_FIELD(startup_cost);
+       COPY_SCALAR_FIELD(per_call_cost);
+
+       return newnode;
+}
+
+/*
+ * _copyAlternativeSubPlan
+ */
+static AlternativeSubPlan *
+_copyAlternativeSubPlan(AlternativeSubPlan *from)
+{
+       AlternativeSubPlan *newnode = makeNode(AlternativeSubPlan);
+
+       COPY_NODE_FIELD(subplans);
  
         return newnode;
  }
@@ -3146,6 +3161,9 @@ copyObject(void *from)
                 case T_SubPlan:
                         retval = _copySubPlan(from);
                         break;
+               case T_AlternativeSubPlan:
+                       retval = _copyAlternativeSubPlan(from);
+                       break;
                 case T_FieldSelect:
                         retval = _copyFieldSelect(from);
                         break;
diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c

index 831a5fe19bec1fd33af859d99832a9e2d005d215..3a25111c60c7f776141634d0e930dd657a771e79 100644 (file)
--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@@ -18,7 +18,7 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/nodes/equalfuncs.c,v 1.327 2008/08/14 18:47:58 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/nodes/equalfuncs.c,v 1.328 2008/08/22 00:16:03 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -314,6 +314,16 @@ _equalSubPlan(SubPlan *a, SubPlan *b)
         COMPARE_NODE_FIELD(setParam);
         COMPARE_NODE_FIELD(parParam);
         COMPARE_NODE_FIELD(args);
+       COMPARE_SCALAR_FIELD(startup_cost);
+       COMPARE_SCALAR_FIELD(per_call_cost);
+
+       return true;
+}
+
+static bool
+_equalAlternativeSubPlan(AlternativeSubPlan *a, AlternativeSubPlan *b)
+{
+       COMPARE_NODE_FIELD(subplans);
  
         return true;
  }
@@ -2098,6 +2108,9 @@ equal(void *a, void *b)
                 case T_SubPlan:
                         retval = _equalSubPlan(a, b);
                         break;
+               case T_AlternativeSubPlan:
+                       retval = _equalAlternativeSubPlan(a, b);
+                       break;
                 case T_FieldSelect:
                         retval = _equalFieldSelect(a, b);
                         break;
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c

index 13c824389ef581c42bdab7c52d4b4488d5a5e033..76f8da1babe7b5b3421296e5f9f184ddf5a247c3 100644 (file)
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.334 2008/08/14 18:47:58 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.335 2008/08/22 00:16:03 tgl Exp $
   *
   * NOTES
   *       Every node type that can appear in stored rules' parsetrees *must*
@@ -846,6 +846,16 @@ _outSubPlan(StringInfo str, SubPlan *node)
         WRITE_NODE_FIELD(setParam);
         WRITE_NODE_FIELD(parParam);
         WRITE_NODE_FIELD(args);
+       WRITE_FLOAT_FIELD(startup_cost, "%.2f");
+       WRITE_FLOAT_FIELD(per_call_cost, "%.2f");
+}
+
+static void
+_outAlternativeSubPlan(StringInfo str, AlternativeSubPlan *node)
+{
+       WRITE_NODE_TYPE("ALTERNATIVESUBPLAN");
+
+       WRITE_NODE_FIELD(subplans);
  }
  
  static void
@@ -2208,6 +2218,9 @@ _outNode(StringInfo str, void *obj)
                         case T_SubPlan:
                                 _outSubPlan(str, obj);
                                 break;
+                       case T_AlternativeSubPlan:
+                               _outAlternativeSubPlan(str, obj);
+                               break;
                         case T_FieldSelect:
                                 _outFieldSelect(str, obj);
                                 break;
diff --git a/src/backend/optimizer/path/clausesel.c b/src/backend/optimizer/path/clausesel.c

index 6db42c5f5860a5edabbe030fa0b0761737d5cca0..9ffdc32e775b63a816f00f17473969cf2b6f3c42 100644 (file)
--- a/src/backend/optimizer/path/clausesel.c
+++ b/src/backend/optimizer/path/clausesel.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/path/clausesel.c,v 1.92 2008/08/16 00:01:36 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/path/clausesel.c,v 1.93 2008/08/22 00:16:03 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -680,7 +680,8 @@ clause_selectivity(PlannerInfo *root,
                 s1 = (Selectivity) 0.3333333;
         }
  #ifdef NOT_USED
-       else if (is_subplan(clause))
+       else if (IsA(clause, SubPlan) ||
+                        IsA(clause, AlternativeSubPlan))
         {
                 /*
                  * Just for the moment! FIX ME! - vadim 02/04/98
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c

index 14c3cf1f21d3b87add746a4ec13dbee03c99a981..487de9ee934409894f25cabe188f81455464c782 100644 (file)
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -54,7 +54,7 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.194 2008/08/16 00:01:36 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.195 2008/08/22 00:16:03 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -1868,6 +1868,93 @@ cost_hashjoin(HashPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
  }
  
  
+/*
+ * cost_subplan
+ *             Figure the costs for a SubPlan (or initplan).
+ *
+ * Note: we could dig the subplan's Plan out of the root list, but in practice
+ * all callers have it handy already, so we make them pass it.
+ */
+void
+cost_subplan(PlannerInfo *root, SubPlan *subplan, Plan *plan)
+{
+       QualCost        sp_cost;
+
+       /* Figure any cost for evaluating the testexpr */
+       cost_qual_eval(&sp_cost,
+                                  make_ands_implicit((Expr *) subplan->testexpr),
+                                  root);
+
+       if (subplan->useHashTable)
+       {
+               /*
+                * If we are using a hash table for the subquery outputs, then the
+                * cost of evaluating the query is a one-time cost.  We charge one
+                * cpu_operator_cost per tuple for the work of loading the hashtable,
+                * too.
+                */
+               sp_cost.startup += plan->total_cost +
+                       cpu_operator_cost * plan->plan_rows;
+
+               /*
+                * The per-tuple costs include the cost of evaluating the lefthand
+                * expressions, plus the cost of probing the hashtable.  We already
+                * accounted for the lefthand expressions as part of the testexpr,
+                * and will also have counted one cpu_operator_cost for each
+                * comparison operator.  That is probably too low for the probing
+                * cost, but it's hard to make a better estimate, so live with it for
+                * now.
+                */
+       }
+       else
+       {
+               /*
+                * Otherwise we will be rescanning the subplan output on each
+                * evaluation.  We need to estimate how much of the output we will
+                * actually need to scan.  NOTE: this logic should agree with the
+                * tuple_fraction estimates used by make_subplan() in
+                * plan/subselect.c.
+                */
+               Cost            plan_run_cost = plan->total_cost - plan->startup_cost;
+
+               if (subplan->subLinkType == EXISTS_SUBLINK)
+               {
+                       /* we only need to fetch 1 tuple */
+                       sp_cost.per_tuple += plan_run_cost / plan->plan_rows;
+               }
+               else if (subplan->subLinkType == ALL_SUBLINK ||
+                                subplan->subLinkType == ANY_SUBLINK)
+               {
+                       /* assume we need 50% of the tuples */
+                       sp_cost.per_tuple += 0.50 * plan_run_cost;
+                       /* also charge a cpu_operator_cost per row examined */
+                       sp_cost.per_tuple += 0.50 * plan->plan_rows * cpu_operator_cost;
+               }
+               else
+               {
+                       /* assume we need all tuples */
+                       sp_cost.per_tuple += plan_run_cost;
+               }
+
+               /*
+                * Also account for subplan's startup cost. If the subplan is
+                * uncorrelated or undirect correlated, AND its topmost node is a Sort
+                * or Material node, assume that we'll only need to pay its startup
+                * cost once; otherwise assume we pay the startup cost every time.
+                */
+               if (subplan->parParam == NIL &&
+                       (IsA(plan, Sort) ||
+                        IsA(plan, Material)))
+                       sp_cost.startup += plan->startup_cost;
+               else
+                       sp_cost.per_tuple += plan->startup_cost;
+       }
+
+       subplan->startup_cost = sp_cost.startup;
+       subplan->per_call_cost = sp_cost.per_tuple;
+}
+
+
  /*
   * cost_qual_eval
   *             Estimate the CPU costs of evaluating a WHERE clause.
@@ -2066,79 +2153,30 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context)
                  * subplan will be executed on each evaluation, so charge accordingly.
                  * (Sub-selects that can be executed as InitPlans have already been
                  * removed from the expression.)
-                *
-                * An exception occurs when we have decided we can implement the
-                * subplan by hashing.
                  */
                 SubPlan    *subplan = (SubPlan *) node;
-               Plan       *plan = planner_subplan_get_plan(context->root, subplan);
  
-               if (subplan->useHashTable)
-               {
-                       /*
-                        * If we are using a hash table for the subquery outputs, then the
-                        * cost of evaluating the query is a one-time cost. We charge one
-                        * cpu_operator_cost per tuple for the work of loading the
-                        * hashtable, too.
-                        */
-                       context->total.startup += plan->total_cost +
-                               cpu_operator_cost * plan->plan_rows;
-
-                       /*
-                        * The per-tuple costs include the cost of evaluating the lefthand
-                        * expressions, plus the cost of probing the hashtable. Recursion
-                        * into the testexpr will handle the lefthand expressions
-                        * properly, and will count one cpu_operator_cost for each
-                        * comparison operator.  That is probably too low for the probing
-                        * cost, but it's hard to make a better estimate, so live with it
-                        * for now.
-                        */
-               }
-               else
-               {
-                       /*
-                        * Otherwise we will be rescanning the subplan output on each
-                        * evaluation.  We need to estimate how much of the output we will
-                        * actually need to scan.  NOTE: this logic should agree with
-                        * get_initplan_cost, below, and with the estimates used by
-                        * make_subplan() in plan/subselect.c.
-                        */
-                       Cost            plan_run_cost = plan->total_cost - plan->startup_cost;
+               context->total.startup += subplan->startup_cost;
+               context->total.per_tuple += subplan->per_call_cost;
  
-                       if (subplan->subLinkType == EXISTS_SUBLINK)
-                       {
-                               /* we only need to fetch 1 tuple */
-                               context->total.per_tuple += plan_run_cost / plan->plan_rows;
-                       }
-                       else if (subplan->subLinkType == ALL_SUBLINK ||
-                                        subplan->subLinkType == ANY_SUBLINK)
-                       {
-                               /* assume we need 50% of the tuples */
-                               context->total.per_tuple += 0.50 * plan_run_cost;
-                               /* also charge a cpu_operator_cost per row examined */
-                               context->total.per_tuple +=
-                                       0.50 * plan->plan_rows * cpu_operator_cost;
-                       }
-                       else
-                       {
-                               /* assume we need all tuples */
-                               context->total.per_tuple += plan_run_cost;
-                       }
+               /*
+                * We don't want to recurse into the testexpr, because it was already
+                * counted in the SubPlan node's costs.  So we're done.
+                */
+               return false;
+       }
+       else if (IsA(node, AlternativeSubPlan))
+       {
+               /*
+                * Arbitrarily use the first alternative plan for costing.  (We should
+                * certainly only include one alternative, and we don't yet have
+                * enough information to know which one the executor is most likely
+                * to use.)
+                */
+               AlternativeSubPlan *asplan = (AlternativeSubPlan *) node;
  
-                       /*
-                        * Also account for subplan's startup cost. If the subplan is
-                        * uncorrelated or undirect correlated, AND its topmost node is a
-                        * Sort or Material node, assume that we'll only need to pay its
-                        * startup cost once; otherwise assume we pay the startup cost
-                        * every time.
-                        */
-                       if (subplan->parParam == NIL &&
-                               (IsA(plan, Sort) ||
-                                IsA(plan, Material)))
-                               context->total.startup += plan->startup_cost;
-                       else
-                               context->total.per_tuple += plan->startup_cost;
-               }
+               return cost_qual_eval_walker((Node *) linitial(asplan->subplans),
+                                                                        context);
         }
  
         /* recurse into children */
@@ -2147,43 +2185,6 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context)
  }
  
  
-/*
- * get_initplan_cost
- *             Get the expected cost of evaluating an initPlan.
- *
- * Keep this in sync with cost_qual_eval_walker's handling of subplans, above,
- * and with the estimates used by make_subplan() in plan/subselect.c.
- */
-Cost
-get_initplan_cost(PlannerInfo *root, SubPlan *subplan)
-{
-       Cost            result;
-       Plan       *plan = planner_subplan_get_plan(root, subplan);
-
-       /* initPlans never use hashtables */
-       Assert(!subplan->useHashTable);
-       /* they are never ALL or ANY, either */
-       Assert(!(subplan->subLinkType == ALL_SUBLINK ||
-                        subplan->subLinkType == ANY_SUBLINK));
-
-       if (subplan->subLinkType == EXISTS_SUBLINK)
-       {
-               /* we only need to fetch 1 tuple */
-               Cost            plan_run_cost = plan->total_cost - plan->startup_cost;
-
-               result = plan->startup_cost;
-               result += plan_run_cost / plan->plan_rows;
-       }
-       else
-       {
-               /* assume we need all tuples */
-               result = plan->total_cost;
-       }
-
-       return result;
-}
-
-
  /*
   * approx_tuple_count
   *             Quick-and-dirty estimation of the number of join rows passing
diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c

index 6e6e34075966c2353eb76eaeff121c5bfb6a42c6..ee2d936b3494c081c759b2176af6a40de4aca669 100644 (file)
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/plan/subselect.c,v 1.137 2008/08/20 19:58:24 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/plan/subselect.c,v 1.138 2008/08/22 00:16:04 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -52,6 +52,9 @@ typedef struct finalize_primnode_context
  } finalize_primnode_context;
  
  
+static Node *build_subplan(PlannerInfo *root, Plan *plan, List *rtable,
+                         SubLinkType subLinkType, Node *testexpr,
+                         bool adjust_testexpr, bool unknownEqFalse);
  static List *generate_subquery_params(PlannerInfo *root, List *tlist,
                                                                           List **paramIds);
  static List *generate_subquery_vars(PlannerInfo *root, List *tlist,
@@ -61,9 +64,12 @@ static Node *convert_testexpr(PlannerInfo *root,
                                  List *subst_nodes);
  static Node *convert_testexpr_mutator(Node *node,
                                                  convert_testexpr_context *context);
-static bool subplan_is_hashable(SubLink *slink, SubPlan *node, Plan *plan);
+static bool subplan_is_hashable(Plan *plan);
+static bool testexpr_is_hashable(Node *testexpr);
  static bool hash_ok_operator(OpExpr *expr);
  static bool simplify_EXISTS_query(Query *query);
+static Query *convert_EXISTS_to_ANY(PlannerInfo *root, Query *subselect,
+                                         Node **testexpr, List **paramIds);
  static Node *replace_correlation_vars_mutator(Node *node, PlannerInfo *root);
  static Node *process_sublinks_mutator(Node *node,
                                                  process_sublinks_context *context);
@@ -229,9 +235,13 @@ get_first_col_type(Plan *plan)
  /*
   * Convert a SubLink (as created by the parser) into a SubPlan.
   *
- * We are given the original SubLink and the already-processed testexpr
- * (use this instead of the SubLink's own field).  We are also told if
- * this expression appears at top level of a WHERE/HAVING qual.
+ * We are given the SubLink's contained query, type, and testexpr.  We are
+ * also told if this expression appears at top level of a WHERE/HAVING qual.
+ *
+ * Note: we assume that the testexpr has been AND/OR flattened (actually,
+ * it's been through eval_const_expressions), but not converted to
+ * implicit-AND form; and any SubLinks in it should already have been
+ * converted to SubPlans.  The subquery is as yet untouched, however.
   *
   * The result is whatever we need to substitute in place of the SubLink
   * node in the executable expression.  This will be either the SubPlan
@@ -240,16 +250,14 @@ get_first_col_type(Plan *plan)
   * tree containing InitPlan Param nodes.
   */
  static Node *
-make_subplan(PlannerInfo *root, SubLink *slink, Node *testexpr, bool isTopQual)
+make_subplan(PlannerInfo *root, Query *orig_subquery, SubLinkType subLinkType,
+                        Node *testexpr, bool isTopQual)
  {
-       Query      *subquery = (Query *) (slink->subselect);
+       Query      *subquery;
+       bool            simple_exists = false;
         double          tuple_fraction;
-       SubPlan    *splan;
         Plan       *plan;
         PlannerInfo *subroot;
-       bool            isInitPlan;
-       Bitmapset  *tmpset;
-       int                     paramid;
         Node       *result;
  
         /*
@@ -258,38 +266,37 @@ make_subplan(PlannerInfo *root, SubLink *slink, Node *testexpr, bool isTopQual)
          * same sub-Query node, but the planner wants to scribble on the Query.
          * Try to clean this up when we do querytree redesign...
          */
-       subquery = (Query *) copyObject(subquery);
+       subquery = (Query *) copyObject(orig_subquery);
  
         /*
          * If it's an EXISTS subplan, we might be able to simplify it.
          */
-       if (slink->subLinkType == EXISTS_SUBLINK)
-               (void) simplify_EXISTS_query(subquery);
+       if (subLinkType == EXISTS_SUBLINK)
+               simple_exists = simplify_EXISTS_query(subquery);
  
         /*
          * For an EXISTS subplan, tell lower-level planner to expect that only the
          * first tuple will be retrieved.  For ALL and ANY subplans, we will be
-        * able to stop evaluating if the test condition fails, so very often not
-        * all the tuples will be retrieved; for lack of a better idea, specify
-        * 50% retrieval.  For EXPR and ROWCOMPARE subplans, use default behavior
-        * (we're only expecting one row out, anyway).
+        * able to stop evaluating if the test condition fails or matches, so very
+        * often not all the tuples will be retrieved; for lack of a better idea,
+        * specify 50% retrieval.  For EXPR and ROWCOMPARE subplans, use default
+        * behavior (we're only expecting one row out, anyway).
          *
-        * NOTE: if you change these numbers, also change cost_qual_eval_walker()
-        * and get_initplan_cost() in path/costsize.c.
+        * NOTE: if you change these numbers, also change cost_subplan() in
+        * path/costsize.c.
          *
-        * XXX If an ALL/ANY subplan is uncorrelated, we may decide to hash or
-        * materialize its result below.  In that case it would've been better to
-        * specify full retrieval.      At present, however, we can only detect
-        * correlation or lack of it after we've made the subplan :-(. Perhaps
-        * detection of correlation should be done as a separate step. Meanwhile,
-        * we don't want to be too optimistic about the percentage of tuples
-        * retrieved, for fear of selecting a plan that's bad for the
-        * materialization case.
-        */
-       if (slink->subLinkType == EXISTS_SUBLINK)
+        * XXX If an ANY subplan is uncorrelated, build_subplan may decide to hash
+        * its output.  In that case it would've been better to specify full
+        * retrieval.  At present, however, we can only check hashability after
+        * we've made the subplan :-(.  (Determining whether it'll fit in work_mem
+        * is the really hard part.)  Therefore, we don't want to be too
+        * optimistic about the percentage of tuples retrieved, for fear of
+        * selecting a plan that's bad for the materialization case.
+        */
+       if (subLinkType == EXISTS_SUBLINK)
                 tuple_fraction = 1.0;   /* just like a LIMIT 1 */
-       else if (slink->subLinkType == ALL_SUBLINK ||
-                        slink->subLinkType == ANY_SUBLINK)
+       else if (subLinkType == ALL_SUBLINK ||
+                        subLinkType == ANY_SUBLINK)
                 tuple_fraction = 0.5;   /* 50% */
         else
                 tuple_fraction = 0.0;   /* default behavior */
@@ -302,24 +309,104 @@ make_subplan(PlannerInfo *root, SubLink *slink, Node *testexpr, bool isTopQual)
                                                         tuple_fraction,
                                                         &subroot);
  
+       /* And convert to SubPlan or InitPlan format. */
+       result = build_subplan(root, plan, subroot->parse->rtable,
+                                                  subLinkType, testexpr, true, isTopQual);
+
+       /*
+        * If it's a correlated EXISTS with an unimportant targetlist, we might be
+        * able to transform it to the equivalent of an IN and then implement it
+        * by hashing.  We don't have enough information yet to tell which way
+        * is likely to be better (it depends on the expected number of executions
+        * of the EXISTS qual, and we are much too early in planning the outer
+        * query to be able to guess that).  So we generate both plans, if
+        * possible, and leave it to the executor to decide which to use.
+        */
+       if (simple_exists && IsA(result, SubPlan))
+       {
+               Node       *newtestexpr;
+               List       *paramIds;
+
+               /* Make a second copy of the original subquery */
+               subquery = (Query *) copyObject(orig_subquery);
+               /* and re-simplify */
+               simple_exists = simplify_EXISTS_query(subquery);
+               Assert(simple_exists);
+               /* See if it can be converted to an ANY query */
+               subquery = convert_EXISTS_to_ANY(root, subquery,
+                                                                                &newtestexpr, &paramIds);
+               if (subquery)
+               {
+                       /* Generate the plan for the ANY subquery; we'll need all rows */
+                       plan = subquery_planner(root->glob, subquery,
+                                                                       root->query_level + 1,
+                                                                       0.0,
+                                                                       &subroot);
+
+                       /* Now we can check if it'll fit in work_mem */
+                       if (subplan_is_hashable(plan))
+                       {
+                               SubPlan    *hashplan;
+                               AlternativeSubPlan *asplan;
+
+                               /* OK, convert to SubPlan format. */
+                               hashplan = (SubPlan *) build_subplan(root, plan,
+                                                                                                        subroot->parse->rtable,
+                                                                                                        ANY_SUBLINK, newtestexpr,
+                                                                                                        false, true);
+                               /* Check we got what we expected */
+                               Assert(IsA(hashplan, SubPlan));
+                               Assert(hashplan->parParam == NIL);
+                               Assert(hashplan->useHashTable);
+                               /* build_subplan won't have filled in paramIds */
+                               hashplan->paramIds = paramIds;
+
+                               /* Leave it to the executor to decide which plan to use */
+                               asplan = makeNode(AlternativeSubPlan);
+                               asplan->subplans = list_make2(result, hashplan);
+                               result = (Node *) asplan;
+                       }
+               }
+       }
+
+       return result;
+}
+
+/*
+ * Build a SubPlan node given the raw inputs --- subroutine for make_subplan
+ *
+ * Returns either the SubPlan, or an expression using initplan output Params,
+ * as explained in the comments for make_subplan.
+ */
+static Node *
+build_subplan(PlannerInfo *root, Plan *plan, List *rtable,
+                         SubLinkType subLinkType, Node *testexpr,
+                         bool adjust_testexpr, bool unknownEqFalse)
+{
+       Node       *result;
+       SubPlan    *splan;
+       bool            isInitPlan;
+       Bitmapset  *tmpset;
+       int                     paramid;
+
         /*
-        * Initialize the SubPlan node.  Note plan_id isn't set yet.
+        * Initialize the SubPlan node.  Note plan_id isn't set till further down,
+        * likewise the cost fields.
          */
         splan = makeNode(SubPlan);
-       splan->subLinkType = slink->subLinkType;
+       splan->subLinkType = subLinkType;
         splan->testexpr = NULL;
         splan->paramIds = NIL;
         splan->firstColType = get_first_col_type(plan);
         splan->useHashTable = false;
-       /* At top level of a qual, can treat UNKNOWN the same as FALSE */
-       splan->unknownEqFalse = isTopQual;
+       splan->unknownEqFalse = unknownEqFalse;
         splan->setParam = NIL;
         splan->parParam = NIL;
         splan->args = NIL;
  
         /*
-        * Make parParam list of params that current query level will pass to this
-        * child plan.
+        * Make parParam and args lists of param IDs and expressions that current
+        * query level will pass to this child plan.
          */
         tmpset = bms_copy(plan->extParam);
         while ((paramid = bms_first_member(tmpset)) >= 0)
@@ -327,7 +414,15 @@ make_subplan(PlannerInfo *root, SubLink *slink, Node *testexpr, bool isTopQual)
                 PlannerParamItem *pitem = list_nth(root->glob->paramlist, paramid);
  
                 if (pitem->abslevel == root->query_level)
+               {
                         splan->parParam = lappend_int(splan->parParam, paramid);
+                       /*
+                        * The Var or Aggref has already been adjusted to have the correct
+                        * varlevelsup or agglevelsup.  We probably don't even need to
+                        * copy it again, but be safe.
+                        */
+                       splan->args = lappend(splan->args, copyObject(pitem->item));
+               }
         }
         bms_free(tmpset);
  
@@ -339,21 +434,23 @@ make_subplan(PlannerInfo *root, SubLink *slink, Node *testexpr, bool isTopQual)
          * PARAM_EXEC Params instead of the PARAM_SUBLINK Params emitted by the
          * parser.
          */
-       if (splan->parParam == NIL && slink->subLinkType == EXISTS_SUBLINK)
+       if (splan->parParam == NIL && subLinkType == EXISTS_SUBLINK)
         {
                 Param      *prm;
  
+               Assert(testexpr == NULL);
                 prm = generate_new_param(root, BOOLOID, -1);
                 splan->setParam = list_make1_int(prm->paramid);
                 isInitPlan = true;
                 result = (Node *) prm;
         }
-       else if (splan->parParam == NIL && slink->subLinkType == EXPR_SUBLINK)
+       else if (splan->parParam == NIL && subLinkType == EXPR_SUBLINK)
         {
                 TargetEntry *te = linitial(plan->targetlist);
                 Param      *prm;
  
                 Assert(!te->resjunk);
+               Assert(testexpr == NULL);
                 prm = generate_new_param(root,
                                                                  exprType((Node *) te->expr),
                                                                  exprTypmod((Node *) te->expr));
@@ -361,13 +458,14 @@ make_subplan(PlannerInfo *root, SubLink *slink, Node *testexpr, bool isTopQual)
                 isInitPlan = true;
                 result = (Node *) prm;
         }
-       else if (splan->parParam == NIL && slink->subLinkType == ARRAY_SUBLINK)
+       else if (splan->parParam == NIL && subLinkType == ARRAY_SUBLINK)
         {
                 TargetEntry *te = linitial(plan->targetlist);
                 Oid                     arraytype;
                 Param      *prm;
  
                 Assert(!te->resjunk);
+               Assert(testexpr == NULL);
                 arraytype = get_array_type(exprType((Node *) te->expr));
                 if (!OidIsValid(arraytype))
                         elog(ERROR, "could not find array type for datatype %s",
@@ -379,11 +477,12 @@ make_subplan(PlannerInfo *root, SubLink *slink, Node *testexpr, bool isTopQual)
                 isInitPlan = true;
                 result = (Node *) prm;
         }
-       else if (splan->parParam == NIL && slink->subLinkType == ROWCOMPARE_SUBLINK)
+       else if (splan->parParam == NIL && subLinkType == ROWCOMPARE_SUBLINK)
         {
                 /* Adjust the Params */
                 List       *params;
  
+               Assert(testexpr != NULL);
                 params = generate_subquery_params(root,
                                                                                   plan->targetlist,
                                                                                   &splan->paramIds);
@@ -400,14 +499,14 @@ make_subplan(PlannerInfo *root, SubLink *slink, Node *testexpr, bool isTopQual)
         }
         else
         {
-               List       *args;
-               ListCell   *l;
-
-               if (testexpr)
+               /*
+                * Adjust the Params in the testexpr, unless caller said it's not
+                * needed.
+                */
+               if (testexpr && adjust_testexpr)
                 {
                         List       *params;
  
-                       /* Adjust the Params in the testexpr */
                         params = generate_subquery_params(root,
                                                                                           plan->targetlist,
                                                                                           &splan->paramIds);
@@ -415,15 +514,20 @@ make_subplan(PlannerInfo *root, SubLink *slink, Node *testexpr, bool isTopQual)
                                                                                            testexpr,
                                                                                            params);
                 }
+               else
+                       splan->testexpr = testexpr;
  
                 /*
                  * We can't convert subplans of ALL_SUBLINK or ANY_SUBLINK types to
                  * initPlans, even when they are uncorrelated or undirect correlated,
                  * because we need to scan the output of the subplan for each outer
-                * tuple.  But if it's an IN (= ANY) test, we might be able to use a
-                * hashtable to avoid comparing all the tuples.
+                * tuple.  But if it's a not-direct-correlated IN (= ANY) test, we
+                * might be able to use a hashtable to avoid comparing all the tuples.
                  */
-               if (subplan_is_hashable(slink, splan, plan))
+               if (subLinkType == ANY_SUBLINK &&
+                       splan->parParam == NIL &&
+                       subplan_is_hashable(plan) &&
+                       testexpr_is_hashable(splan->testexpr))
                         splan->useHashTable = true;
  
                 /*
@@ -453,24 +557,6 @@ make_subplan(PlannerInfo *root, SubLink *slink, Node *testexpr, bool isTopQual)
                                 plan = materialize_finished_plan(plan);
                 }
  
-               /*
-                * Make splan->args from parParam.
-                */
-               args = NIL;
-               foreach(l, splan->parParam)
-               {
-                       PlannerParamItem *pitem = list_nth(root->glob->paramlist,
-                                                                                          lfirst_int(l));
-
-                       /*
-                        * The Var or Aggref has already been adjusted to have the correct
-                        * varlevelsup or agglevelsup.  We probably don't even need to
-                        * copy it again, but be safe.
-                        */
-                       args = lappend(args, copyObject(pitem->item));
-               }
-               splan->args = args;
-
                 result = (Node *) splan;
                 isInitPlan = false;
         }
@@ -478,10 +564,8 @@ make_subplan(PlannerInfo *root, SubLink *slink, Node *testexpr, bool isTopQual)
         /*
          * Add the subplan and its rtable to the global lists.
          */
-       root->glob->subplans = lappend(root->glob->subplans,
-                                                                  plan);
-       root->glob->subrtables = lappend(root->glob->subrtables,
-                                                                        subroot->parse->rtable);
+       root->glob->subplans = lappend(root->glob->subplans, plan);
+       root->glob->subrtables = lappend(root->glob->subrtables, rtable);
         splan->plan_id = list_length(root->glob->subplans);
  
         if (isInitPlan)
@@ -498,6 +582,9 @@ make_subplan(PlannerInfo *root, SubLink *slink, Node *testexpr, bool isTopQual)
                 root->glob->rewindPlanIDs = bms_add_member(root->glob->rewindPlanIDs,
                                                                                                    splan->plan_id);
  
+       /* Lastly, fill in the cost estimates for use later */
+       cost_subplan(root, splan, plan);
+
         return result;
  }
  
@@ -620,40 +707,12 @@ convert_testexpr_mutator(Node *node,
  }
  
  /*
- * subplan_is_hashable: decide whether we can implement a subplan by hashing
- *
- * Caution: the SubPlan node is not completely filled in yet.  We can rely
- * on its plan and parParam fields, however.
+ * subplan_is_hashable: can we implement an ANY subplan by hashing?
   */
  static bool
-subplan_is_hashable(SubLink *slink, SubPlan *node, Plan *plan)
+subplan_is_hashable(Plan *plan)
  {
         double          subquery_size;
-       ListCell   *l;
-
-       /*
-        * The sublink type must be "= ANY" --- that is, an IN operator.  We
-        * expect that the test expression will be either a single OpExpr, or an
-        * AND-clause containing OpExprs.  (If it's anything else then the parser
-        * must have determined that the operators have non-equality-like
-        * semantics.  In the OpExpr case we can't be sure what the operator's
-        * semantics are like, but the test below for hashability will reject
-        * anything that's not equality.)
-        */
-       if (slink->subLinkType != ANY_SUBLINK)
-               return false;
-       if (slink->testexpr == NULL ||
-               (!IsA(slink->testexpr, OpExpr) &&
-                !and_clause(slink->testexpr)))
-               return false;
-
-       /*
-        * The subplan must not have any direct correlation vars --- else we'd
-        * have to recompute its output each time, so that the hashtable wouldn't
-        * gain anything.
-        */
-       if (node->parParam != NIL)
-               return false;
  
         /*
          * The estimated size of the subquery result must fit in work_mem. (Note:
@@ -666,7 +725,19 @@ subplan_is_hashable(SubLink *slink, SubPlan *node, Plan *plan)
         if (subquery_size > work_mem * 1024L)
                 return false;
  
+       return true;
+}
+
+/*
+ * testexpr_is_hashable: is an ANY SubLink's test expression hashable?
+ */
+static bool
+testexpr_is_hashable(Node *testexpr)
+{
         /*
+        * The testexpr must be a single OpExpr, or an AND-clause containing
+        * only OpExprs.
+        *
          * The combining operators must be hashable and strict. The need for
          * hashability is obvious, since we want to use hashing. Without
          * strictness, behavior in the presence of nulls is too unpredictable.  We
@@ -674,25 +745,28 @@ subplan_is_hashable(SubLink *slink, SubPlan *node, Plan *plan)
          * NULL for non-null inputs, either (see nodeSubplan.c).  However, hash
          * indexes and hash joins assume that too.
          */
-       if (IsA(slink->testexpr, OpExpr))
+       if (testexpr && IsA(testexpr, OpExpr))
         {
-               if (!hash_ok_operator((OpExpr *) slink->testexpr))
-                       return false;
+               if (hash_ok_operator((OpExpr *) testexpr))
+                       return true;
         }
-       else
+       else if (and_clause(testexpr))
         {
-               foreach(l, ((BoolExpr *) slink->testexpr)->args)
+               ListCell   *l;
+
+               foreach(l, ((BoolExpr *) testexpr)->args)
                 {
                         Node       *andarg = (Node *) lfirst(l);
  
                         if (!IsA(andarg, OpExpr))
-                               return false;   /* probably can't happen */
+                               return false;
                         if (!hash_ok_operator((OpExpr *) andarg))
                                 return false;
                 }
+               return true;
         }
  
-       return true;
+       return false;
  }
  
  static bool
@@ -702,6 +776,10 @@ hash_ok_operator(OpExpr *expr)
         HeapTuple       tup;
         Form_pg_operator optup;
  
+       /* quick out if not a binary operator */
+       if (list_length(expr->args) != 2)
+               return false;
+       /* else must look up the operator properties */
         tup = SearchSysCache(OPEROID,
                                                  ObjectIdGetDatum(opid),
                                                  0, 0, 0);
@@ -838,63 +916,6 @@ convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink,
         return true;
  }
  
-/*
- * simplify_EXISTS_query: remove any useless stuff in an EXISTS's subquery
- *
- * The only thing that matters about an EXISTS query is whether it returns
- * zero or more than zero rows.  Therefore, we can remove certain SQL features
- * that won't affect that.  The only part that is really likely to matter in
- * typical usage is simplifying the targetlist: it's a common habit to write
- * "SELECT * FROM" even though there is no need to evaluate any columns.
- *
- * Note: by suppressing the targetlist we could cause an observable behavioral
- * change, namely that any errors that might occur in evaluating the tlist
- * won't occur, nor will other side-effects of volatile functions.  This seems
- * unlikely to bother anyone in practice.
- *
- * Returns TRUE if was able to discard the targetlist, else FALSE.
- */
-static bool
-simplify_EXISTS_query(Query *query)
-{
-       /*
-        * We don't try to simplify at all if the query uses set operations,
-        * aggregates, HAVING, LIMIT/OFFSET, or FOR UPDATE/SHARE; none of these
-        * seem likely in normal usage and their possible effects are complex.
-        */
-       if (query->commandType != CMD_SELECT ||
-               query->intoClause ||
-               query->setOperations ||
-               query->hasAggs ||
-               query->havingQual ||
-               query->limitOffset ||
-               query->limitCount ||
-               query->rowMarks)
-               return false;
-
-       /*
-        * Mustn't throw away the targetlist if it contains set-returning
-        * functions; those could affect whether zero rows are returned!
-        */
-       if (expression_returns_set((Node *) query->targetList))
-               return false;
-
-       /*
-        * Otherwise, we can throw away the targetlist, as well as any GROUP,
-        * DISTINCT, and ORDER BY clauses; none of those clauses will change
-        * a nonzero-rows result to zero rows or vice versa.  (Furthermore,
-        * since our parsetree representation of these clauses depends on the
-        * targetlist, we'd better throw them away if we drop the targetlist.)
-        */
-       query->targetList = NIL;
-       query->groupClause = NIL;
-       query->distinctClause = NIL;
-       query->sortClause = NIL;
-       query->hasDistinctOn = false;
-
-       return true;
-}
-
  /*
   * convert_EXISTS_sublink_to_join: can we convert an EXISTS SubLink to a join?
   *
@@ -1044,6 +1065,288 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
         return true;
  }
  
+/*
+ * simplify_EXISTS_query: remove any useless stuff in an EXISTS's subquery
+ *
+ * The only thing that matters about an EXISTS query is whether it returns
+ * zero or more than zero rows.  Therefore, we can remove certain SQL features
+ * that won't affect that.  The only part that is really likely to matter in
+ * typical usage is simplifying the targetlist: it's a common habit to write
+ * "SELECT * FROM" even though there is no need to evaluate any columns.
+ *
+ * Note: by suppressing the targetlist we could cause an observable behavioral
+ * change, namely that any errors that might occur in evaluating the tlist
+ * won't occur, nor will other side-effects of volatile functions.  This seems
+ * unlikely to bother anyone in practice.
+ *
+ * Returns TRUE if was able to discard the targetlist, else FALSE.
+ */
+static bool
+simplify_EXISTS_query(Query *query)
+{
+       /*
+        * We don't try to simplify at all if the query uses set operations,
+        * aggregates, HAVING, LIMIT/OFFSET, or FOR UPDATE/SHARE; none of these
+        * seem likely in normal usage and their possible effects are complex.
+        */
+       if (query->commandType != CMD_SELECT ||
+               query->intoClause ||
+               query->setOperations ||
+               query->hasAggs ||
+               query->havingQual ||
+               query->limitOffset ||
+               query->limitCount ||
+               query->rowMarks)
+               return false;
+
+       /*
+        * Mustn't throw away the targetlist if it contains set-returning
+        * functions; those could affect whether zero rows are returned!
+        */
+       if (expression_returns_set((Node *) query->targetList))
+               return false;
+
+       /*
+        * Otherwise, we can throw away the targetlist, as well as any GROUP,
+        * DISTINCT, and ORDER BY clauses; none of those clauses will change
+        * a nonzero-rows result to zero rows or vice versa.  (Furthermore,
+        * since our parsetree representation of these clauses depends on the
+        * targetlist, we'd better throw them away if we drop the targetlist.)
+        */
+       query->targetList = NIL;
+       query->groupClause = NIL;
+       query->distinctClause = NIL;
+       query->sortClause = NIL;
+       query->hasDistinctOn = false;
+
+       return true;
+}
+
+/*
+ * convert_EXISTS_to_ANY: try to convert EXISTS to a hashable ANY sublink
+ *
+ * The subselect is expected to be a fresh copy that we can munge up,
+ * and to have been successfully passed through simplify_EXISTS_query.
+ *
+ * On success, the modified subselect is returned, and we store a suitable
+ * upper-level test expression at *testexpr, plus a list of the subselect's
+ * output Params at *paramIds.  (The test expression is already Param-ified
+ * and hence need not go through convert_testexpr, which is why we have to
+ * deal with the Param IDs specially.)
+ *
+ * On failure, returns NULL.
+ */
+static Query *
+convert_EXISTS_to_ANY(PlannerInfo *root, Query *subselect,
+                                         Node **testexpr, List **paramIds)
+{
+       Node       *whereClause;
+       List       *leftargs,
+                          *rightargs,
+                          *opids,
+                          *newWhere,
+                          *tlist,
+                          *testlist,
+                          *paramids;
+       ListCell   *lc,
+                          *rc,
+                          *oc;
+       AttrNumber      resno;
+
+       /*
+        * Query must not require a targetlist, since we have to insert a new one.
+        * Caller should have dealt with the case already.
+        */
+       Assert(subselect->targetList == NIL);
+
+       /*
+        * Separate out the WHERE clause.  (We could theoretically also remove
+        * top-level plain JOIN/ON clauses, but it's probably not worth the
+        * trouble.)
+        */
+       whereClause = subselect->jointree->quals;
+       subselect->jointree->quals = NULL;
+
+       /*
+        * The rest of the sub-select must not refer to any Vars of the parent
+        * query.  (Vars of higher levels should be okay, though.)
+        *
+        * Note: we need not check for Aggs separately because we know the
+        * sub-select is as yet unoptimized; any uplevel Agg must therefore
+        * contain an uplevel Var reference.  This is not the case below ...
+        */
+       if (contain_vars_of_level((Node *) subselect, 1))
+               return NULL;
+
+       /*
+        * We don't risk optimizing if the WHERE clause is volatile, either.
+        */
+       if (contain_volatile_functions(whereClause))
+               return NULL;
+
+       /*
+        * Clean up the WHERE clause by doing const-simplification etc on it.
+        * Aside from simplifying the processing we're about to do, this is
+        * important for being able to pull chunks of the WHERE clause up into
+        * the parent query.  Since we are invoked partway through the parent's
+        * preprocess_expression() work, earlier steps of preprocess_expression()
+        * wouldn't get applied to the pulled-up stuff unless we do them here.
+        * For the parts of the WHERE clause that get put back into the child
+        * query, this work is partially duplicative, but it shouldn't hurt.
+        *
+        * Note: we do not run flatten_join_alias_vars.  This is OK because
+        * any parent aliases were flattened already, and we're not going to
+        * pull any child Vars (of any description) into the parent.
+        *
+        * Note: passing the parent's root to eval_const_expressions is technically
+        * wrong, but we can get away with it since only the boundParams (if any)
+        * are used, and those would be the same in a subroot.
+        */
+       whereClause = eval_const_expressions(root, whereClause);
+       whereClause = (Node *) canonicalize_qual((Expr *) whereClause);
+       whereClause = (Node *) make_ands_implicit((Expr *) whereClause);
+
+       /*
+        * We now have a flattened implicit-AND list of clauses, which we
+        * try to break apart into "outervar = innervar" hash clauses.
+        * Anything that can't be broken apart just goes back into the
+        * newWhere list.  Note that we aren't trying hard yet to ensure
+        * that we have only outer or only inner on each side; we'll check
+        * that if we get to the end.
+        */
+       leftargs = rightargs = opids = newWhere = NIL;
+       foreach(lc, (List *) whereClause)
+       {
+               OpExpr     *expr = (OpExpr *) lfirst(lc);
+
+               if (IsA(expr, OpExpr) &&
+                       hash_ok_operator(expr))
+               {
+                       Node   *leftarg = (Node *) linitial(expr->args);
+                       Node   *rightarg = (Node *) lsecond(expr->args);
+
+                       if (contain_vars_of_level(leftarg, 1))
+                       {
+                               leftargs = lappend(leftargs, leftarg);
+                               rightargs = lappend(rightargs, rightarg);
+                               opids = lappend_oid(opids, expr->opno);
+                               continue;
+                       }
+                       if (contain_vars_of_level(rightarg, 1))
+                       {
+                               /*
+                                * We must commute the clause to put the outer var on the
+                                * left, because the hashing code in nodeSubplan.c expects
+                                * that.  This probably shouldn't ever fail, since hashable
+                                * operators ought to have commutators, but be paranoid.
+                                */
+                               expr->opno = get_commutator(expr->opno);
+                               if (OidIsValid(expr->opno) && hash_ok_operator(expr))
+                               {
+                                       leftargs = lappend(leftargs, rightarg);
+                                       rightargs = lappend(rightargs, leftarg);
+                                       opids = lappend_oid(opids, expr->opno);
+                                       continue;
+                               }
+                               /* If no commutator, no chance to optimize the WHERE clause */
+                               return NULL;
+                       }
+               }
+               /* Couldn't handle it as a hash clause */
+               newWhere = lappend(newWhere, expr);
+       }
+
+       /*
+        * If we didn't find anything we could convert, fail.
+        */
+       if (leftargs == NIL)
+               return NULL;
+
+       /*
+        * There mustn't be any parent Vars or Aggs in the stuff that we intend to
+        * put back into the child query.  Note: you might think we don't need to
+        * check for Aggs separately, because an uplevel Agg must contain an
+        * uplevel Var in its argument.  But it is possible that the uplevel Var
+        * got optimized away by eval_const_expressions.  Consider
+        *
+        * SUM(CASE WHEN false THEN uplevelvar ELSE 0 END)
+        */
+       if (contain_vars_of_level((Node *) newWhere, 1) ||
+               contain_vars_of_level((Node *) rightargs, 1))
+               return NULL;
+       if (root->parse->hasAggs &&
+               (contain_aggs_of_level((Node *) newWhere, 1) ||
+                contain_aggs_of_level((Node *) rightargs, 1)))
+               return NULL;
+
+       /*
+        * And there can't be any child Vars in the stuff we intend to pull up.
+        * (Note: we'd need to check for child Aggs too, except we know the
+        * child has no aggs at all because of simplify_EXISTS_query's check.)
+        */
+       if (contain_vars_of_level((Node *) leftargs, 0))
+               return NULL;
+
+       /*
+        * Also reject sublinks in the stuff we intend to pull up.  (It might be
+        * possible to support this, but doesn't seem worth the complication.)
+        */
+       if (contain_subplans((Node *) leftargs))
+               return NULL;
+
+       /*
+        * Okay, adjust the sublevelsup in the stuff we're pulling up.
+        */
+       IncrementVarSublevelsUp((Node *) leftargs, -1, 1);
+
+       /*
+        * Put back any child-level-only WHERE clauses.
+        */
+       if (newWhere)
+               subselect->jointree->quals = (Node *) make_ands_explicit(newWhere);
+
+       /*
+        * Build a new targetlist for the child that emits the expressions
+        * we need.  Concurrently, build a testexpr for the parent using
+        * Params to reference the child outputs.  (Since we generate Params
+        * directly here, there will be no need to convert the testexpr in
+        * build_subplan.)
+        */
+       tlist = testlist = paramids = NIL;
+       resno = 1;
+       /* there's no "for3" so we have to chase one of the lists manually */
+       oc = list_head(opids);
+       forboth(lc, leftargs, rc, rightargs)
+       {
+               Node       *leftarg = (Node *) lfirst(lc);
+               Node       *rightarg = (Node *) lfirst(rc);
+               Oid                     opid = lfirst_oid(oc);
+               Param      *param;
+
+               oc = lnext(oc);
+               param = generate_new_param(root,
+                                                                  exprType(rightarg),
+                                                                  exprTypmod(rightarg));
+               tlist = lappend(tlist,
+                                               makeTargetEntry((Expr *) rightarg,
+                                                                               resno++,
+                                                                               NULL,
+                                                                               false));
+               testlist = lappend(testlist,
+                                                  make_opclause(opid, BOOLOID, false,
+                                                                                (Expr *) leftarg, (Expr *) param));
+               paramids = lappend_int(paramids, param->paramid);
+       }
+
+       /* Put everything where it should go, and we're done */
+       subselect->targetList = tlist;
+       *testexpr = (Node *) make_ands_explicit(testlist);
+       *paramIds = paramids;
+
+       return subselect;
+}
+
+
  /*
   * Replace correlation vars (uplevel vars) with Params.
   *
@@ -1130,7 +1433,8 @@ process_sublinks_mutator(Node *node, process_sublinks_context *context)
                  * Now build the SubPlan node and make the expr to return.
                  */
                 return make_subplan(context->root,
-                                                       sublink,
+                                                       (Query *) sublink->subselect,
+                                                       sublink->subLinkType,
                                                         testexpr,
                                                         context->isTopQual);
         }
@@ -1140,7 +1444,8 @@ process_sublinks_mutator(Node *node, process_sublinks_context *context)
          * the very routine that creates 'em to begin with).  We shouldn't find
          * ourselves invoked directly on a Query, either.
          */
-       Assert(!is_subplan(node));
+       Assert(!IsA(node, SubPlan));
+       Assert(!IsA(node, AlternativeSubPlan));
         Assert(!IsA(node, Query));
  
         /*
@@ -1251,7 +1556,7 @@ SS_finalize_plan(PlannerInfo *root, Plan *plan, bool attach_initplans)
                 {
                         initSetParam = bms_add_member(initSetParam, lfirst_int(l2));
                 }
-               initplan_cost += get_initplan_cost(root, initsubplan);
+               initplan_cost += initsubplan->startup_cost + initsubplan->per_call_cost;
         }
  
         /*
@@ -1555,7 +1860,7 @@ finalize_primnode(Node *node, finalize_primnode_context *context)
                 }
                 return false;                   /* no more to do here */
         }
-       if (is_subplan(node))
+       if (IsA(node, SubPlan))
         {
                 SubPlan    *subplan = (SubPlan *) node;
                 Plan       *plan = planner_subplan_get_plan(context->root, subplan);
@@ -1655,6 +1960,8 @@ SS_make_initplan_from_plan(PlannerInfo *root, Plan *plan,
          * parParam and args lists remain empty.
          */
  
+       cost_subplan(root, node, plan);
+
         /*
          * Make a Param that will be the subplan's output.
          */
diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c

index ffe31bdff28876f6cbf0a470dc14bfc1f2cd8bbe..6a06b0806c27440c63e87c6111dcb0f0674fdc87 100644 (file)
--- a/src/backend/optimizer/util/clauses.c
+++ b/src/backend/optimizer/util/clauses.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/util/clauses.c,v 1.262 2008/08/14 18:47:59 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/util/clauses.c,v 1.263 2008/08/22 00:16:04 tgl Exp $
   *
   * HISTORY
   *       AUTHOR                        DATE                    MAJOR EVENT
@@ -360,7 +360,7 @@ make_ands_implicit(Expr *clause)
   * are no subqueries.  There mustn't be outer-aggregate references either.
   *
   * (If you want something like this but able to deal with subqueries,
- * see rewriteManip.c's checkExprHasAggs().)
+ * see rewriteManip.c's contain_aggs_of_level().)
   */
  bool
  contain_agg_clause(Node *clause)
@@ -566,6 +566,8 @@ expression_returns_set_walker(Node *node, void *context)
                 return false;
         if (IsA(node, SubPlan))
                 return false;
+       if (IsA(node, AlternativeSubPlan))
+               return false;
         if (IsA(node, ArrayExpr))
                 return false;
         if (IsA(node, RowExpr))
@@ -637,6 +639,8 @@ expression_returns_set_rows_walker(Node *node, double *count)
                 return false;
         if (IsA(node, SubPlan))
                 return false;
+       if (IsA(node, AlternativeSubPlan))
+               return false;
         if (IsA(node, ArrayExpr))
                 return false;
         if (IsA(node, RowExpr))
@@ -684,6 +688,7 @@ contain_subplans_walker(Node *node, void *context)
         if (node == NULL)
                 return false;
         if (IsA(node, SubPlan) ||
+               IsA(node, AlternativeSubPlan) ||
                 IsA(node, SubLink))
                 return true;                    /* abort the tree traversal and return true */
         return expression_tree_walker(node, contain_subplans_walker, context);
@@ -1012,6 +1017,8 @@ contain_nonstrict_functions_walker(Node *node, void *context)
         }
         if (IsA(node, SubPlan))
                 return true;
+       if (IsA(node, AlternativeSubPlan))
+               return true;
         /* ArrayCoerceExpr is strict at the array level, regardless of elemfunc */
         if (IsA(node, FieldStore))
                 return true;
@@ -2308,7 +2315,8 @@ eval_const_expressions_mutator(Node *node,
                                 break;
                 }
         }
-       if (IsA(node, SubPlan))
+       if (IsA(node, SubPlan) ||
+               IsA(node, AlternativeSubPlan))
         {
                 /*
                  * Return a SubPlan unchanged --- too late to do anything with it.
@@ -4156,6 +4164,8 @@ expression_tree_walker(Node *node,
                                         return true;
                         }
                         break;
+               case T_AlternativeSubPlan:
+                       return walker(((AlternativeSubPlan *) node)->subplans, context);
                 case T_FieldSelect:
                         return walker(((FieldSelect *) node)->arg, context);
                 case T_FieldStore:
@@ -4630,6 +4640,16 @@ expression_tree_mutator(Node *node,
                                 return (Node *) newnode;
                         }
                         break;
+               case T_AlternativeSubPlan:
+                       {
+                               AlternativeSubPlan *asplan = (AlternativeSubPlan *) node;
+                               AlternativeSubPlan *newnode;
+
+                               FLATCOPY(newnode, asplan, AlternativeSubPlan);
+                               MUTATE(newnode->subplans, asplan->subplans, List *);
+                               return (Node *) newnode;
+                       }
+                       break;
                 case T_FieldSelect:
                         {
                                 FieldSelect *fselect = (FieldSelect *) node;
diff --git a/src/backend/optimizer/util/var.c b/src/backend/optimizer/util/var.c

index 290f9bb64bfbbf3154065fb22311757ff248d1cd..f9bd59c799bce873b4f2c26cfafa34ef36a2d256 100644 (file)
--- a/src/backend/optimizer/util/var.c
+++ b/src/backend/optimizer/util/var.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/util/var.c,v 1.75 2008/08/14 18:47:59 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/util/var.c,v 1.76 2008/08/22 00:16:04 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -171,7 +171,7 @@ pull_varattnos_walker(Node *node, Bitmapset **varattnos)
         }
         /* Should not find a subquery or subplan */
         Assert(!IsA(node, Query));
-       Assert(!is_subplan(node));
+       Assert(!IsA(node, SubPlan));
  
         return expression_tree_walker(node, pull_varattnos_walker,
                                                                   (void *) varattnos);
@@ -677,7 +677,7 @@ flatten_join_alias_vars_mutator(Node *node,
                 return (Node *) newnode;
         }
         /* Already-planned tree not supported */
-       Assert(!is_subplan(node));
+       Assert(!IsA(node, SubPlan));
  
         return expression_tree_mutator(node, flatten_join_alias_vars_mutator,
                                                                    (void *) context);
diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c

index 8addb53e51ee5ac765067cb9619f96b46d4f5120..6309525cabf97d0172d1e7be4326ca55dbc5d60f 100644 (file)
--- a/src/backend/parser/parse_expr.c
+++ b/src/backend/parser/parse_expr.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/parser/parse_expr.c,v 1.229 2008/07/16 01:30:22 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/parser/parse_expr.c,v 1.230 2008/08/22 00:16:04 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -1980,6 +1980,15 @@ exprType(Node *expr)
                                 }
                         }
                         break;
+               case T_AlternativeSubPlan:
+                       {
+                               /* As above, supported for the convenience of ruleutils.c */
+                               AlternativeSubPlan *asplan = (AlternativeSubPlan *) expr;
+
+                               /* subplans should all return the same thing */
+                               type = exprType((Node *) linitial(asplan->subplans));
+                       }
+                       break;
                 case T_FieldSelect:
                         type = ((FieldSelect *) expr)->resulttype;
                         break;
diff --git a/src/backend/rewrite/rewriteManip.c b/src/backend/rewrite/rewriteManip.c

index 1202c31314dd04ae6e5999743bcad3126636c2d1..baa9330016fad13962d062783c11888a4e76e7cb 100644 (file)
--- a/src/backend/rewrite/rewriteManip.c
+++ b/src/backend/rewrite/rewriteManip.c
@@ -7,7 +7,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/rewrite/rewriteManip.c,v 1.109 2008/08/14 20:31:29 heikki Exp $
+ *       $PostgreSQL: pgsql/src/backend/rewrite/rewriteManip.c,v 1.110 2008/08/22 00:16:04 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -25,10 +25,10 @@
  typedef struct
  {
         int                     sublevels_up;
-} checkExprHasAggs_context;
+} contain_aggs_of_level_context;
  
-static bool checkExprHasAggs_walker(Node *node,
-                                               checkExprHasAggs_context *context);
+static bool contain_aggs_of_level_walker(Node *node,
+                                               contain_aggs_of_level_context *context);
  static bool checkExprHasSubLink_walker(Node *node, void *context);
  static Relids offset_relid_set(Relids relids, int offset);
  static Relids adjust_relid_set(Relids relids, int oldrelid, int newrelid);
@@ -37,32 +37,44 @@ static Relids adjust_relid_set(Relids relids, int oldrelid, int newrelid);
  /*
   * checkExprHasAggs -
   *     Check if an expression contains an aggregate function call.
+ */
+bool
+checkExprHasAggs(Node *node)
+{
+       return contain_aggs_of_level(node, 0);
+}
+
+/*
+ * contain_aggs_of_level -
+ *     Check if an expression contains an aggregate function call of a
+ *     specified query level.
   *
   * The objective of this routine is to detect whether there are aggregates
- * belonging to the initial query level.  Aggregates belonging to subqueries
+ * belonging to the given query level.  Aggregates belonging to subqueries
   * or outer queries do NOT cause a true result.  We must recurse into
   * subqueries to detect outer-reference aggregates that logically belong to
- * the initial query level.
+ * the specified query level.
   */
  bool
-checkExprHasAggs(Node *node)
+contain_aggs_of_level(Node *node, int levelsup)
  {
-       checkExprHasAggs_context context;
+       contain_aggs_of_level_context context;
  
-       context.sublevels_up = 0;
+       context.sublevels_up = levelsup;
  
         /*
          * Must be prepared to start with a Query or a bare expression tree; if
          * it's a Query, we don't want to increment sublevels_up.
          */
         return query_or_expression_tree_walker(node,
-                                                                                  checkExprHasAggs_walker,
+                                                                                  contain_aggs_of_level_walker,
                                                                                    (void *) &context,
                                                                                    0);
  }
  
  static bool
-checkExprHasAggs_walker(Node *node, checkExprHasAggs_context *context)
+contain_aggs_of_level_walker(Node *node,
+                                                        contain_aggs_of_level_context *context)
  {
         if (node == NULL)
                 return false;
@@ -79,12 +91,12 @@ checkExprHasAggs_walker(Node *node, checkExprHasAggs_context *context)
  
                 context->sublevels_up++;
                 result = query_tree_walker((Query *) node,
-                                                                  checkExprHasAggs_walker,
+                                                                  contain_aggs_of_level_walker,
                                                                    (void *) context, 0);
                 context->sublevels_up--;
                 return result;
         }
-       return expression_tree_walker(node, checkExprHasAggs_walker,
+       return expression_tree_walker(node, contain_aggs_of_level_walker,
                                                                   (void *) context);
  }
  
diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c

index 0120353344a4ca696757751ecab02477d13d4979..3c62e7a972dcd6b6cdf0b2f99c61a19dfe449b69 100644 (file)
--- a/src/backend/utils/adt/ruleutils.c
+++ b/src/backend/utils/adt/ruleutils.c
@@ -9,7 +9,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/utils/adt/ruleutils.c,v 1.279 2008/08/02 21:32:00 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/utils/adt/ruleutils.c,v 1.280 2008/08/22 00:16:04 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -3732,6 +3732,11 @@ get_rule_expr(Node *node, deparse_context *context,
                         }
                         break;
  
+               case T_AlternativeSubPlan:
+                       /* As above, just punt */
+                       appendStringInfo(buf, "(alternative subplans)");
+                       break;
+
                 case T_FieldSelect:
                         {
                                 FieldSelect *fselect = (FieldSelect *) node;
diff --git a/src/include/executor/nodeSubplan.h b/src/include/executor/nodeSubplan.h

index c7c9f4e2dc4168a7b77b7f7346558cce9019618e..7d12a54b6149e969e2076fe2e297731d6325e37e 100644 (file)
--- a/src/include/executor/nodeSubplan.h
+++ b/src/include/executor/nodeSubplan.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $PostgreSQL: pgsql/src/include/executor/nodeSubplan.h,v 1.27 2008/01/01 19:45:57 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/executor/nodeSubplan.h,v 1.28 2008/08/22 00:16:04 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -17,10 +17,8 @@
  #include "nodes/execnodes.h"
  
  extern SubPlanState *ExecInitSubPlan(SubPlan *subplan, PlanState *parent);
-extern Datum ExecSubPlan(SubPlanState *node,
-                       ExprContext *econtext,
-                       bool *isNull,
-                       ExprDoneCond *isDone);
+
+extern AlternativeSubPlanState *ExecInitAlternativeSubPlan(AlternativeSubPlan *asplan, PlanState *parent);
  
  extern void ExecReScanSetParamPlan(SubPlanState *node, PlanState *parent);
  
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h

index ce41c750afce3c8e9484154f42679171c5ae4dea..ee8c4a2bef7e6d79985d2b0c249c59cb1d2a0d5a 100644 (file)
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $PostgreSQL: pgsql/src/include/nodes/execnodes.h,v 1.186 2008/08/07 03:04:03 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/execnodes.h,v 1.187 2008/08/22 00:16:04 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -627,6 +627,17 @@ typedef struct SubPlanState
         FmgrInfo   *cur_eq_funcs;       /* equality functions for LHS vs. table */
  } SubPlanState;
  
+/* ----------------
+ *             AlternativeSubPlanState node
+ * ----------------
+ */
+typedef struct AlternativeSubPlanState
+{
+       ExprState       xprstate;
+       List       *subplans;           /* states of alternative subplans */
+       int                     active;                 /* list index of the one we're using */
+} AlternativeSubPlanState;
+
  /* ----------------
   *             FieldSelectState node
   * ----------------
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h

index 5fdd23c5a919233f8504c5f7ee992219a242bfb7..b9e00bfddea1adae74a2f9acd410cffe05481f0b 100644 (file)
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $PostgreSQL: pgsql/src/include/nodes/nodes.h,v 1.208 2008/08/14 18:48:00 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/nodes.h,v 1.209 2008/08/22 00:16:04 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -118,6 +118,7 @@ typedef enum NodeTag
         T_BoolExpr,
         T_SubLink,
         T_SubPlan,
+       T_AlternativeSubPlan,
         T_FieldSelect,
         T_FieldStore,
         T_RelabelType,
@@ -160,6 +161,7 @@ typedef enum NodeTag
         T_ScalarArrayOpExprState,
         T_BoolExprState,
         T_SubPlanState,
+       T_AlternativeSubPlanState,
         T_FieldSelectState,
         T_FieldStoreState,
         T_CoerceViaIOState,
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h

index f0c55112cbcf65cadd97c047c624d05bd9fe9ba8..f6da1adbfa07e79c73b8ddfd9428fd646958e106 100644 (file)
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -10,7 +10,7 @@
   * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $PostgreSQL: pgsql/src/include/nodes/primnodes.h,v 1.138 2008/08/02 21:32:00 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/primnodes.h,v 1.139 2008/08/22 00:16:04 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -445,7 +445,8 @@ typedef struct SubLink
   *
   * If the sub-select becomes an initplan rather than a subplan, the executable
   * expression is part of the outer plan's expression tree (and the SubPlan
- * node itself is not).  In this case testexpr is NULL to avoid duplication.
+ * node itself is not, but rather is found in the outer plan's initPlan
+ * list).  In this case testexpr is NULL to avoid duplication.
   *
   * The planner also derives lists of the values that need to be passed into
   * and out of the subplan.     Input values are represented as a list "args" of
@@ -457,6 +458,10 @@ typedef struct SubLink
   * is an initplan; they are listed in order by sub-select output column
   * position.  (parParam and setParam are integer Lists, not Bitmapsets,
   * because their ordering is significant.)
+ *
+ * Also, the planner computes startup and per-call costs for use of the
+ * SubPlan.  Note that these include the cost of the subquery proper,
+ * evaluation of the testexpr if any, and any hashtable management overhead.
   */
  typedef struct SubPlan
  {
@@ -482,8 +487,25 @@ typedef struct SubPlan
                                                                  * Params for parent plan */
         List       *parParam;           /* indices of input Params from parent plan */
         List       *args;                       /* exprs to pass as parParam values */
+       /* Estimated execution costs: */
+       Cost            startup_cost;   /* one-time setup cost */
+       Cost            per_call_cost;  /* cost for each subplan evaluation */
  } SubPlan;
  
+/*
+ * AlternativeSubPlan - expression node for a choice among SubPlans
+ *
+ * The subplans are given as a List so that the node definition need not
+ * change if there's ever more than two alternatives.  For the moment,
+ * though, there are always exactly two; and the first one is the fast-start
+ * plan.
+ */
+typedef struct AlternativeSubPlan
+{
+       Expr            xpr;
+       List       *subplans;           /* SubPlan(s) with equivalent results */
+} AlternativeSubPlan;
+
  /* ----------------
   * FieldSelect
   *
diff --git a/src/include/optimizer/clauses.h b/src/include/optimizer/clauses.h

index 41e52d909aa63a605f23a24c327b52a039d14e52..1ea3d701fe9c1edc4f70e40294ab957a83c05784 100644 (file)
--- a/src/include/optimizer/clauses.h
+++ b/src/include/optimizer/clauses.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $PostgreSQL: pgsql/src/include/optimizer/clauses.h,v 1.92 2008/08/14 18:48:00 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/optimizer/clauses.h,v 1.93 2008/08/22 00:16:04 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -19,7 +19,6 @@
  
  #define is_opclause(clause)            ((clause) != NULL && IsA(clause, OpExpr))
  #define is_funcclause(clause)  ((clause) != NULL && IsA(clause, FuncExpr))
-#define is_subplan(clause)             ((clause) != NULL && IsA(clause, SubPlan))
  
  typedef struct
  {
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h

index 17caeb4ee4187803a449309be01f54cd94058a3b..8ad8f7f0626360e6b180b01588d8bd8952d40a00 100644 (file)
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $PostgreSQL: pgsql/src/include/optimizer/cost.h,v 1.91 2008/08/14 18:48:00 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/optimizer/cost.h,v 1.92 2008/08/22 00:16:04 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -93,9 +93,9 @@ extern void cost_mergejoin(MergePath *path, PlannerInfo *root,
                                                    SpecialJoinInfo *sjinfo);
  extern void cost_hashjoin(HashPath *path, PlannerInfo *root,
                                                   SpecialJoinInfo *sjinfo);
+extern void cost_subplan(PlannerInfo *root, SubPlan *subplan, Plan *plan);
  extern void cost_qual_eval(QualCost *cost, List *quals, PlannerInfo *root);
  extern void cost_qual_eval_node(QualCost *cost, Node *qual, PlannerInfo *root);
-extern Cost get_initplan_cost(PlannerInfo *root, SubPlan *subplan);
  extern void set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel);
  extern void set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel,
                                                    RelOptInfo *outer_rel,
diff --git a/src/include/rewrite/rewriteManip.h b/src/include/rewrite/rewriteManip.h

index 110d8ac0d119c44cf26b436d5306f4ffe5a96c30..a870ce889b040b9d6b3040f40026b7f25c9579ee 100644 (file)
--- a/src/include/rewrite/rewriteManip.h
+++ b/src/include/rewrite/rewriteManip.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $PostgreSQL: pgsql/src/include/rewrite/rewriteManip.h,v 1.45 2008/08/14 20:31:29 heikki Exp $
+ * $PostgreSQL: pgsql/src/include/rewrite/rewriteManip.h,v 1.46 2008/08/22 00:16:04 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -35,6 +35,7 @@ extern Query *getInsertSelectQuery(Query *parsetree, Query ***subquery_ptr);
  extern void AddQual(Query *parsetree, Node *qual);
  extern void AddInvertedQual(Query *parsetree, Node *qual);
  
+extern bool contain_aggs_of_level(Node *node, int levelsup);
  extern bool checkExprHasAggs(Node *node);
  extern bool checkExprHasSubLink(Node *node);
author	Tom Lane <tgl@sss.pgh.pa.us>
	Fri, 22 Aug 2008 00:16:04 +0000 (00:16 +0000)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Fri, 22 Aug 2008 00:16:04 +0000 (00:16 +0000)
src/backend/catalog/dependency.c		patch \| blob \| history
src/backend/executor/execQual.c		patch \| blob \| history
src/backend/executor/nodeSubplan.c		patch \| blob \| history
src/backend/nodes/copyfuncs.c		patch \| blob \| history
src/backend/nodes/equalfuncs.c		patch \| blob \| history
src/backend/nodes/outfuncs.c		patch \| blob \| history
src/backend/optimizer/path/clausesel.c		patch \| blob \| history
src/backend/optimizer/path/costsize.c		patch \| blob \| history
src/backend/optimizer/plan/subselect.c		patch \| blob \| history
src/backend/optimizer/util/clauses.c		patch \| blob \| history
src/backend/optimizer/util/var.c		patch \| blob \| history
src/backend/parser/parse_expr.c		patch \| blob \| history
src/backend/rewrite/rewriteManip.c		patch \| blob \| history
src/backend/utils/adt/ruleutils.c		patch \| blob \| history
src/include/executor/nodeSubplan.h		patch \| blob \| history
src/include/nodes/execnodes.h		patch \| blob \| history
src/include/nodes/nodes.h		patch \| blob \| history
src/include/nodes/primnodes.h		patch \| blob \| history
src/include/optimizer/clauses.h		patch \| blob \| history
src/include/optimizer/cost.h		patch \| blob \| history
src/include/rewrite/rewriteManip.h		patch \| blob \| history