Change the planner-to-executor API so that the planner tells the executor

[postgresql] / src / backend / optimizer / util / pathnode.c
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c

index e1390aa11122d3ee2d29996c810b3d1dc39423eb..8ee6346e5b18d56c05d2df13f2aa0e5772c6659e 100644 (file)
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.135 2007/01/05 22:19:32 momjian Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.136 2007/01/10 18:06:04 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -28,12 +28,14 @@
  #include "parser/parsetree.h"
  #include "utils/memutils.h"
  #include "utils/selfuncs.h"
+#include "utils/lsyscache.h"
  #include "utils/syscache.h"
  
  
  static List *translate_sub_tlist(List *tlist, int relid);
-static bool query_is_distinct_for(Query *query, List *colnos);
-static bool hash_safe_tlist(List *tlist);
+static bool query_is_distinct_for(Query *query, List *colnos, List *opids);
+static Oid     distinct_col_search(int colno, List *colnos, List *opids);
+static bool hash_safe_operators(List *opids);
  
  
  /*****************************************************************************
@@ -733,6 +735,7 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath)
         Path            agg_path;               /* dummy for result of cost_agg */
         MemoryContext oldcontext;
         List       *sub_targetlist;
+       List       *in_operators;
         ListCell   *l;
         int                     numCols;
  
@@ -769,9 +772,11 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath)
         /*
          * Try to identify the targetlist that will actually be unique-ified. In
          * current usage, this routine is only used for sub-selects of IN clauses,
-        * so we should be able to find the tlist in in_info_list.
+        * so we should be able to find the tlist in in_info_list.  Get the IN
+        * clause's operators, too, because they determine what "unique" means.
          */
         sub_targetlist = NIL;
+       in_operators = NIL;
         foreach(l, root->in_info_list)
         {
                 InClauseInfo *ininfo = (InClauseInfo *) lfirst(l);
@@ -779,6 +784,7 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath)
                 if (bms_equal(ininfo->righthand, rel->relids))
                 {
                         sub_targetlist = ininfo->sub_targetlist;
+                       in_operators = ininfo->in_operators;
                         break;
                 }
         }
@@ -801,7 +807,8 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath)
                 sub_tlist_colnos = translate_sub_tlist(sub_targetlist, rel->relid);
  
                 if (sub_tlist_colnos &&
-                       query_is_distinct_for(rte->subquery, sub_tlist_colnos))
+                       query_is_distinct_for(rte->subquery,
+                                                                 sub_tlist_colnos, in_operators))
                 {
                         pathnode->umethod = UNIQUE_PATH_NOOP;
                         pathnode->rows = rel->rows;
@@ -847,11 +854,11 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath)
  
         /*
          * Is it safe to use a hashed implementation?  If so, estimate and compare
-        * costs.  We only try this if we know the targetlist for sure (else we
-        * can't be sure about the datatypes involved).
+        * costs.  We only try this if we know the IN operators, else we can't
+        * check their hashability.
          */
         pathnode->umethod = UNIQUE_PATH_SORT;
-       if (enable_hashagg && sub_targetlist && hash_safe_tlist(sub_targetlist))
+       if (enable_hashagg && in_operators && hash_safe_operators(in_operators))
         {
                 /*
                  * Estimate the overhead per hashtable entry at 64 bytes (same as in
@@ -924,16 +931,25 @@ translate_sub_tlist(List *tlist, int relid)
   *
   * colnos is an integer list of output column numbers (resno's).  We are
   * interested in whether rows consisting of just these columns are certain
- * to be distinct.
+ * to be distinct.  "Distinctness" is defined according to whether the
+ * corresponding upper-level equality operators listed in opids would think
+ * the values are distinct.  (Note: the opids entries could be cross-type
+ * operators, and thus not exactly the equality operators that the subquery
+ * would use itself.  We assume that the subquery is compatible if these
+ * operators appear in the same btree opfamily as the ones the subquery uses.)
   */
  static bool
-query_is_distinct_for(Query *query, List *colnos)
+query_is_distinct_for(Query *query, List *colnos, List *opids)
  {
         ListCell   *l;
+       Oid                     opid;
+
+       Assert(list_length(colnos) == list_length(opids));
  
         /*
          * DISTINCT (including DISTINCT ON) guarantees uniqueness if all the
-        * columns in the DISTINCT clause appear in colnos.
+        * columns in the DISTINCT clause appear in colnos and operator
+        * semantics match.
          */
         if (query->distinctClause)
         {
@@ -943,7 +959,9 @@ query_is_distinct_for(Query *query, List *colnos)
                         TargetEntry *tle = get_sortgroupclause_tle(scl,
                                                                                                            query->targetList);
  
-                       if (!list_member_int(colnos, tle->resno))
+                       opid = distinct_col_search(tle->resno, colnos, opids);
+                       if (!OidIsValid(opid) ||
+                               !ops_in_same_btree_opfamily(opid, scl->sortop))
                                 break;                  /* exit early if no match */
                 }
                 if (l == NULL)                  /* had matches for all? */
@@ -952,7 +970,7 @@ query_is_distinct_for(Query *query, List *colnos)
  
         /*
          * Similarly, GROUP BY guarantees uniqueness if all the grouped columns
-        * appear in colnos.
+        * appear in colnos and operator semantics match.
          */
         if (query->groupClause)
         {
@@ -962,7 +980,9 @@ query_is_distinct_for(Query *query, List *colnos)
                         TargetEntry *tle = get_sortgroupclause_tle(grpcl,
                                                                                                            query->targetList);
  
-                       if (!list_member_int(colnos, tle->resno))
+                       opid = distinct_col_search(tle->resno, colnos, opids);
+                       if (!OidIsValid(opid) ||
+                               !ops_in_same_btree_opfamily(opid, grpcl->sortop))
                                 break;                  /* exit early if no match */
                 }
                 if (l == NULL)                  /* had matches for all? */
@@ -972,7 +992,7 @@ query_is_distinct_for(Query *query, List *colnos)
         {
                 /*
                  * If we have no GROUP BY, but do have aggregates or HAVING, then the
-                * result is at most one row so it's surely unique.
+                * result is at most one row so it's surely unique, for any operators.
                  */
                 if (query->hasAggs || query->havingQual)
                         return true;
@@ -980,7 +1000,13 @@ query_is_distinct_for(Query *query, List *colnos)
  
         /*
          * UNION, INTERSECT, EXCEPT guarantee uniqueness of the whole output row,
-        * except with ALL
+        * except with ALL.
+        *
+        * XXX this code knows that prepunion.c will adopt the default ordering
+        * operator for each column datatype as the sortop.  It'd probably be
+        * better if these operators were chosen at parse time and stored into
+        * the parsetree, instead of leaving bits of the planner to decide
+        * semantics.
          */
         if (query->setOperations)
         {
@@ -996,8 +1022,13 @@ query_is_distinct_for(Query *query, List *colnos)
                         {
                                 TargetEntry *tle = (TargetEntry *) lfirst(l);
  
-                               if (!tle->resjunk &&
-                                       !list_member_int(colnos, tle->resno))
+                               if (tle->resjunk)
+                                       continue;       /* ignore resjunk columns */
+
+                               opid = distinct_col_search(tle->resno, colnos, opids);
+                               if (!OidIsValid(opid) ||
+                                       !ops_in_same_btree_opfamily(opid,
+                                                                                               ordering_oper_opid(exprType((Node *) tle->expr))))
                                         break;          /* exit early if no match */
                         }
                         if (l == NULL)          /* had matches for all? */
@@ -1014,31 +1045,46 @@ query_is_distinct_for(Query *query, List *colnos)
  }
  
  /*
- * hash_safe_tlist - can datatypes of given tlist be hashed?
+ * distinct_col_search - subroutine for query_is_distinct_for
   *
- * We assume hashed aggregation will work if the datatype's equality operator
- * is marked hashjoinable.
+ * If colno is in colnos, return the corresponding element of opids,
+ * else return InvalidOid.  (We expect colnos does not contain duplicates,
+ * so the result is well-defined.)
+ */
+static Oid
+distinct_col_search(int colno, List *colnos, List *opids)
+{
+       ListCell   *lc1,
+                          *lc2;
+
+       forboth(lc1, colnos, lc2, opids)
+       {
+               if (colno == lfirst_int(lc1))
+                       return lfirst_oid(lc2);
+       }
+       return InvalidOid;
+}
+
+/*
+ * hash_safe_operators - can all the specified IN operators be hashed?
   *
- * XXX this probably should be somewhere else. See also hash_safe_grouping
- * in plan/planner.c.
+ * We assume hashed aggregation will work if each IN operator is marked
+ * hashjoinable.  If the IN operators are cross-type, this could conceivably
+ * fail: the aggregation will need a hashable equality operator for the RHS
+ * datatype --- but it's pretty hard to conceive of a hash opclass that has
+ * cross-type hashing without support for hashing the individual types, so
+ * we don't expend cycles here to support the case.  We could check
+ * get_compatible_hash_operator() instead of just op_hashjoinable(), but the
+ * former is a significantly more expensive test.
   */
  static bool
-hash_safe_tlist(List *tlist)
+hash_safe_operators(List *opids)
  {
-       ListCell   *tl;
+       ListCell   *lc;
  
-       foreach(tl, tlist)
+       foreach(lc, opids)
         {
-               Node       *expr = (Node *) lfirst(tl);
-               Operator        optup;
-               bool            oprcanhash;
-
-               optup = equality_oper(exprType(expr), true);
-               if (!optup)
-                       return false;
-               oprcanhash = ((Form_pg_operator) GETSTRUCT(optup))->oprcanhash;
-               ReleaseSysCache(optup);
-               if (!oprcanhash)
+               if (!op_hashjoinable(lfirst_oid(lc)))
                         return false;
         }
         return true;
@@ -1156,6 +1202,7 @@ create_nestloop_path(PlannerInfo *root,
   *             ordering for each merge clause
   * 'mergestrategies' are the btree operator strategies identifying the merge
   *             ordering for each merge clause
+ * 'mergenullsfirst' are the nulls first/last flags for each merge clause
   * 'outersortkeys' are the sort varkeys for the outer relation
   * 'innersortkeys' are the sort varkeys for the inner relation
   */
@@ -1168,8 +1215,9 @@ create_mergejoin_path(PlannerInfo *root,
                                           List *restrict_clauses,
                                           List *pathkeys,
                                           List *mergeclauses,
-                                         List *mergefamilies,
-                                         List *mergestrategies,
+                                         Oid *mergefamilies,
+                                         int *mergestrategies,
+                                         bool *mergenullsfirst,
                                           List *outersortkeys,
                                           List *innersortkeys)
  {
@@ -1210,8 +1258,9 @@ create_mergejoin_path(PlannerInfo *root,
         pathnode->jpath.joinrestrictinfo = restrict_clauses;
         pathnode->jpath.path.pathkeys = pathkeys;
         pathnode->path_mergeclauses = mergeclauses;
-       pathnode->path_mergefamilies = mergefamilies;
-       pathnode->path_mergestrategies = mergestrategies;
+       pathnode->path_mergeFamilies = mergefamilies;
+       pathnode->path_mergeStrategies = mergestrategies;
+       pathnode->path_mergeNullsFirst = mergenullsfirst;
         pathnode->outersortkeys = outersortkeys;
         pathnode->innersortkeys = innersortkeys;