Modify the definition of window-function PARTITION BY and ORDER BY clauses

author Tom Lane <tgl@sss.pgh.pa.us>

Thu, 27 Aug 2009 20:08:03 +0000 (20:08 +0000)

committer Tom Lane <tgl@sss.pgh.pa.us>

Thu, 27 Aug 2009 20:08:03 +0000 (20:08 +0000)
author Tom Lane <tgl@sss.pgh.pa.us>
Thu, 27 Aug 2009 20:08:03 +0000 (20:08 +0000)
committer Tom Lane <tgl@sss.pgh.pa.us>
Thu, 27 Aug 2009 20:08:03 +0000 (20:08 +0000)
diff --git a/doc/src/sgml/ref/select.sgml b/doc/src/sgml/ref/select.sgml

index d67d2b4ff764036e3ee8179de7aba6bff0c10cee..f22eeb2804d911482af4172d8d8deb155e558b67 100644 (file)
--- a/doc/src/sgml/ref/select.sgml
+++ b/doc/src/sgml/ref/select.sgml
@@ -1,5 +1,5 @@
  <!--
-$PostgreSQL: pgsql/doc/src/sgml/ref/select.sgml,v 1.123 2009/08/18 23:40:20 tgl Exp $
+$PostgreSQL: pgsql/doc/src/sgml/ref/select.sgml,v 1.124 2009/08/27 20:08:02 tgl Exp $
  PostgreSQL documentation
  -->
  
@@ -595,17 +595,24 @@ WINDOW <replaceable class="parameter">window_name</replaceable> AS ( <replaceabl
  
     <para>
      The elements of the <literal>PARTITION BY</> list are interpreted in
-    the same fashion as elements of a
-    <xref linkend="sql-groupby" endterm="sql-groupby-title">, and
-    the elements of the <literal>ORDER BY</> list are interpreted in the
-    same fashion as elements of an
-    <xref linkend="sql-orderby" endterm="sql-orderby-title">.
-    The only difference is that these expressions can contain aggregate
+    much the same fashion as elements of a
+    <xref linkend="sql-groupby" endterm="sql-groupby-title">, except that
+    they are always simple expressions and never the name or number of an
+    output column.
+    Another difference is that these expressions can contain aggregate
      function calls, which are not allowed in a regular <literal>GROUP BY</>
      clause.  They are allowed here because windowing occurs after grouping
      and aggregation.
     </para>
  
+   <para>
+    Similarly, the elements of the <literal>ORDER BY</> list are interpreted
+    in much the same fashion as elements of an
+    <xref linkend="sql-orderby" endterm="sql-orderby-title">, except that
+    the expressions are always taken as simple expressions and never the name
+    or number of an output column.
+   </para>
+
     <para>
      The optional <replaceable class="parameter">frame_clause</> defines
      the <firstterm>window frame</> for window functions that depend on the
diff --git a/doc/src/sgml/syntax.sgml b/doc/src/sgml/syntax.sgml

index bc562e1f97be8f294a9843cf369171d11e6b556b..c2dd31b98d37b591ff64774aeab6ed6d8fd46b8c 100644 (file)
--- a/doc/src/sgml/syntax.sgml
+++ b/doc/src/sgml/syntax.sgml
@@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/syntax.sgml,v 1.133 2009/06/17 21:58:49 tgl Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/syntax.sgml,v 1.134 2009/08/27 20:08:02 tgl Exp $ -->
  
  <chapter id="sql-syntax">
   <title>SQL Syntax</title>
@@ -1619,7 +1619,9 @@ ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
      expression that does not itself contain window function calls.
      The <literal>PARTITION BY</> and <literal>ORDER BY</> lists have
      essentially the same syntax and semantics as <literal>GROUP BY</>
-    and <literal>ORDER BY</> clauses of the whole query.
+    and <literal>ORDER BY</> clauses of the whole query, except that their
+    expressions are always just expressions and cannot be output-column
+    names or numbers.
      <replaceable>window_name</replaceable> is a reference to a named window
      specification defined in the query's <literal>WINDOW</literal> clause.
      Named window specifications are usually referenced with just
diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c

index 97c560b30950fab19e309b0661d2d4bb0af7e1c0..c441be220e2ec65a559a3a0a5674f7e7c4ac933a 100644 (file)
--- a/src/backend/parser/analyze.c
+++ b/src/backend/parser/analyze.c
@@ -17,7 +17,7 @@
   * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- *     $PostgreSQL: pgsql/src/backend/parser/analyze.c,v 1.389 2009/06/11 14:48:59 momjian Exp $
+ *     $PostgreSQL: pgsql/src/backend/parser/analyze.c,v 1.390 2009/08/27 20:08:02 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -825,13 +825,14 @@ transformSelectStmt(ParseState *pstate, SelectStmt *stmt)
         qry->sortClause = transformSortClause(pstate,
                                                                                   stmt->sortClause,
                                                                                   &qry->targetList,
-                                                                                 true /* fix unknowns */ );
+                                                                                 true /* fix unknowns */,
+                                                                                 false /* not window function */);
  
         qry->groupClause = transformGroupClause(pstate,
                                                                                         stmt->groupClause,
                                                                                         &qry->targetList,
                                                                                         qry->sortClause,
-                                                                                       false);
+                                                                                       false /* not window function */);
  
         if (stmt->distinctClause == NIL)
         {
@@ -1039,7 +1040,8 @@ transformValuesClause(ParseState *pstate, SelectStmt *stmt)
         qry->sortClause = transformSortClause(pstate,
                                                                                   stmt->sortClause,
                                                                                   &qry->targetList,
-                                                                                 true /* fix unknowns */ );
+                                                                                 true /* fix unknowns */,
+                                                                                 false /* not window function */);
  
         qry->limitOffset = transformLimitClause(pstate, stmt->limitOffset,
                                                                                         "OFFSET");
@@ -1291,7 +1293,8 @@ transformSetOperationStmt(ParseState *pstate, SelectStmt *stmt)
         qry->sortClause = transformSortClause(pstate,
                                                                                   sortClause,
                                                                                   &qry->targetList,
-                                                                                 false /* no unknowns expected */ );
+                                                                                 false /* no unknowns expected */,
+                                                                                 false /* not window function */);
  
         pstate->p_rtable = list_truncate(pstate->p_rtable, sv_rtable_length);
         pstate->p_relnamespace = sv_relnamespace;
diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c

index 139202f66790206478a70ac66c7fd680bf01a611..a63a280a8cbd17e11369cff054d8757b1cfe6651 100644 (file)
--- a/src/backend/parser/parse_clause.c
+++ b/src/backend/parser/parse_clause.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/parser/parse_clause.c,v 1.190 2009/07/16 06:33:43 petere Exp $
+ *       $PostgreSQL: pgsql/src/backend/parser/parse_clause.c,v 1.191 2009/08/27 20:08:02 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -37,16 +37,15 @@
  #include "utils/rel.h"
  
  
+/* clause types for findTargetlistEntrySQL92 */
  #define ORDER_CLAUSE 0
  #define GROUP_CLAUSE 1
  #define DISTINCT_ON_CLAUSE 2
-#define PARTITION_CLAUSE 3
  
  static const char *const clauseText[] = {
         "ORDER BY",
         "GROUP BY",
-       "DISTINCT ON",
-       "PARTITION BY"
+       "DISTINCT ON"
  };
  
  static void extractRemainingColumns(List *common_colnames,
@@ -73,8 +72,10 @@ static Node *transformFromClauseItem(ParseState *pstate, Node *n,
                                                 Relids *containedRels);
  static Node *buildMergedJoinVar(ParseState *pstate, JoinType jointype,
                                    Var *l_colvar, Var *r_colvar);
-static TargetEntry *findTargetlistEntry(ParseState *pstate, Node *node,
-                                       List **tlist, int clause);
+static TargetEntry *findTargetlistEntrySQL92(ParseState *pstate, Node *node,
+                                                List **tlist, int clause);
+static TargetEntry *findTargetlistEntrySQL99(ParseState *pstate, Node *node,
+                                                List **tlist);
  static int get_matching_location(int sortgroupref,
                                           List *sortgrouprefs, List *exprs);
  static List *addTargetToSortList(ParseState *pstate, TargetEntry *tle,
@@ -1216,21 +1217,27 @@ transformLimitClause(ParseState *pstate, Node *clause,
  
  
  /*
- *     findTargetlistEntry -
+ *     findTargetlistEntrySQL92 -
   *       Returns the targetlist entry matching the given (untransformed) node.
   *       If no matching entry exists, one is created and appended to the target
   *       list as a "resjunk" node.
   *
+ * This function supports the old SQL92 ORDER BY interpretation, where the
+ * expression is an output column name or number.  If we fail to find a
+ * match of that sort, we fall through to the SQL99 rules.  For historical
+ * reasons, Postgres also allows this interpretation for GROUP BY, though
+ * the standard never did.  However, for GROUP BY we prefer a SQL99 match.
+ * This function is *not* used for WINDOW definitions.
+ *
   * node                the ORDER BY, GROUP BY, or DISTINCT ON expression to be matched
   * tlist       the target list (passed by reference so we can append to it)
   * clause      identifies clause type being processed
   */
  static TargetEntry *
-findTargetlistEntry(ParseState *pstate, Node *node, List **tlist, int clause)
+findTargetlistEntrySQL92(ParseState *pstate, Node *node, List **tlist,
+                                                int clause)
  {
-       TargetEntry *target_result = NULL;
         ListCell   *tl;
-       Node       *expr;
  
         /*----------
          * Handle two special cases as mandated by the SQL92 spec:
@@ -1258,8 +1265,7 @@ findTargetlistEntry(ParseState *pstate, Node *node, List **tlist, int clause)
          * 2. IntegerConstant
          *        This means to use the n'th item in the existing target list.
          *        Note that it would make no sense to order/group/distinct by an
-        *        actual constant, so this does not create a conflict with our
-        *        extension to order/group by an expression.
+        *        actual constant, so this does not create a conflict with SQL99.
          *        GROUP BY column-number is not allowed by SQL92, but since
          *        the standard has no other behavior defined for this syntax,
          *        we may as well accept this common extension.
@@ -1268,7 +1274,7 @@ findTargetlistEntry(ParseState *pstate, Node *node, List **tlist, int clause)
          * since the user didn't write them in his SELECT list.
          *
          * If neither special case applies, fall through to treat the item as
-        * an expression.
+        * an expression per SQL99.
          *----------
          */
         if (IsA(node, ColumnRef) &&
@@ -1278,15 +1284,15 @@ findTargetlistEntry(ParseState *pstate, Node *node, List **tlist, int clause)
                 char       *name = strVal(linitial(((ColumnRef *) node)->fields));
                 int                     location = ((ColumnRef *) node)->location;
  
-               if (clause == GROUP_CLAUSE || clause == PARTITION_CLAUSE)
+               if (clause == GROUP_CLAUSE)
                 {
                         /*
                          * In GROUP BY, we must prefer a match against a FROM-clause
                          * column to one against the targetlist.  Look to see if there is
-                        * a matching column.  If so, fall through to let transformExpr()
-                        * do the rest.  NOTE: if name could refer ambiguously to more
-                        * than one column name exposed by FROM, colNameToVar will
-                        * ereport(ERROR).      That's just what we want here.
+                        * a matching column.  If so, fall through to use SQL99 rules.
+                        * NOTE: if name could refer ambiguously to more than one column
+                        * name exposed by FROM, colNameToVar will ereport(ERROR). That's
+                        * just what we want here.
                          *
                          * Small tweak for 7.4.3: ignore matches in upper query levels.
                          * This effectively changes the search order for bare names to (1)
@@ -1295,8 +1301,6 @@ findTargetlistEntry(ParseState *pstate, Node *node, List **tlist, int clause)
                          * SQL99 do not allow GROUPing BY an outer reference, so this
                          * breaks no cases that are legal per spec, and it seems a more
                          * self-consistent behavior.
-                        *
-                        * Window PARTITION BY clauses should act exactly like GROUP BY.
                          */
                         if (colNameToVar(pstate, name, true, location) != NULL)
                                 name = NULL;
@@ -1304,6 +1308,8 @@ findTargetlistEntry(ParseState *pstate, Node *node, List **tlist, int clause)
  
                 if (name != NULL)
                 {
+                       TargetEntry *target_result = NULL;
+
                         foreach(tl, *tlist)
                         {
                                 TargetEntry *tle = (TargetEntry *) lfirst(tl);
@@ -1367,12 +1373,36 @@ findTargetlistEntry(ParseState *pstate, Node *node, List **tlist, int clause)
         }
  
         /*
-        * Otherwise, we have an expression (this is a Postgres extension not
-        * found in SQL92).  Convert the untransformed node to a transformed
-        * expression, and search for a match in the tlist. NOTE: it doesn't
-        * really matter whether there is more than one match.  Also, we are
-        * willing to match a resjunk target here, though the above cases must
-        * ignore resjunk targets.
+        * Otherwise, we have an expression, so process it per SQL99 rules.
+        */
+       return findTargetlistEntrySQL99(pstate, node, tlist);
+}
+
+/*
+ *     findTargetlistEntrySQL99 -
+ *       Returns the targetlist entry matching the given (untransformed) node.
+ *       If no matching entry exists, one is created and appended to the target
+ *       list as a "resjunk" node.
+ *
+ * This function supports the SQL99 interpretation, wherein the expression
+ * is just an ordinary expression referencing input column names.
+ *
+ * node                the ORDER BY, GROUP BY, etc expression to be matched
+ * tlist       the target list (passed by reference so we can append to it)
+ */
+static TargetEntry *
+findTargetlistEntrySQL99(ParseState *pstate, Node *node, List **tlist)
+{
+       TargetEntry *target_result;
+       ListCell   *tl;
+       Node       *expr;
+
+       /*
+        * Convert the untransformed node to a transformed expression, and search
+        * for a match in the tlist.  NOTE: it doesn't really matter whether there
+        * is more than one match.  Also, we are willing to match an existing
+        * resjunk target here, though the SQL92 cases above must ignore resjunk
+        * targets.
          */
         expr = transformExpr(pstate, node);
  
@@ -1403,16 +1433,15 @@ findTargetlistEntry(ParseState *pstate, Node *node, List **tlist, int clause)
   * GROUP BY items will be added to the targetlist (as resjunk columns)
   * if not already present, so the targetlist must be passed by reference.
   *
- * This is also used for window PARTITION BY clauses (which actually act
- * just the same, except for the clause name used in error messages).
+ * This is also used for window PARTITION BY clauses (which act almost the
+ * same, but are always interpreted per SQL99 rules).
   */
  List *
  transformGroupClause(ParseState *pstate, List *grouplist,
                                          List **targetlist, List *sortClause,
-                                        bool isPartition)
+                                        bool isWindowFunc)
  {
         List       *result = NIL;
-       int                     clause = isPartition ? PARTITION_CLAUSE : GROUP_CLAUSE;
         ListCell   *gl;
  
         foreach(gl, grouplist)
@@ -1421,7 +1450,11 @@ transformGroupClause(ParseState *pstate, List *grouplist,
                 TargetEntry *tle;
                 bool            found = false;
  
-               tle = findTargetlistEntry(pstate, gexpr, targetlist, clause);
+               if (isWindowFunc)
+                       tle = findTargetlistEntrySQL99(pstate, gexpr, targetlist);
+               else
+                       tle = findTargetlistEntrySQL92(pstate, gexpr, targetlist,
+                                                                                  GROUP_CLAUSE);
  
                 /* Eliminate duplicates (GROUP BY x, x) */
                 if (targetIsInSortList(tle, InvalidOid, result))
@@ -1475,12 +1508,16 @@ transformGroupClause(ParseState *pstate, List *grouplist,
   *
   * ORDER BY items will be added to the targetlist (as resjunk columns)
   * if not already present, so the targetlist must be passed by reference.
+ *
+ * This is also used for window ORDER BY clauses (which act almost the
+ * same, but are always interpreted per SQL99 rules).
   */
  List *
  transformSortClause(ParseState *pstate,
                                         List *orderlist,
                                         List **targetlist,
-                                       bool resolveUnknown)
+                                       bool resolveUnknown,
+                                       bool isWindowFunc)
  {
         List       *sortlist = NIL;
         ListCell   *olitem;
@@ -1490,8 +1527,11 @@ transformSortClause(ParseState *pstate,
                 SortBy     *sortby = (SortBy *) lfirst(olitem);
                 TargetEntry *tle;
  
-               tle = findTargetlistEntry(pstate, sortby->node,
-                                                                 targetlist, ORDER_CLAUSE);
+               if (isWindowFunc)
+                       tle = findTargetlistEntrySQL99(pstate, sortby->node, targetlist);
+               else
+                       tle = findTargetlistEntrySQL92(pstate, sortby->node, targetlist,
+                                                                                  ORDER_CLAUSE);
  
                 sortlist = addTargetToSortList(pstate, tle,
                                                                            sortlist, *targetlist, sortby,
@@ -1550,18 +1590,19 @@ transformWindowDefinitions(ParseState *pstate,
  
                 /*
                  * Transform PARTITION and ORDER specs, if any.  These are treated
-                * exactly like top-level GROUP BY and ORDER BY clauses, including the
-                * special handling of nondefault operator semantics.
+                * almost exactly like top-level GROUP BY and ORDER BY clauses,
+                * including the special handling of nondefault operator semantics.
                  */
                 orderClause = transformSortClause(pstate,
                                                                                   windef->orderClause,
                                                                                   targetlist,
-                                                                                 true);
+                                                                                 true /* fix unknowns */,
+                                                                                 true /* window function */);
                 partitionClause = transformGroupClause(pstate,
                                                                                            windef->partitionClause,
                                                                                            targetlist,
                                                                                            orderClause,
-                                                                                          true);
+                                                                                          true /* window function */);
  
                 /*
                  * And prepare the new WindowClause.
@@ -1736,8 +1777,8 @@ transformDistinctOnClause(ParseState *pstate, List *distinctlist,
                 int                     sortgroupref;
                 TargetEntry *tle;
  
-               tle = findTargetlistEntry(pstate, dexpr,
-                                                                 targetlist, DISTINCT_ON_CLAUSE);
+               tle = findTargetlistEntrySQL92(pstate, dexpr, targetlist,
+                                                                          DISTINCT_ON_CLAUSE);
                 sortgroupref = assignSortGroupRef(tle, *targetlist);
                 sortgrouprefs = lappend_int(sortgrouprefs, sortgroupref);
         }
diff --git a/src/include/parser/parse_clause.h b/src/include/parser/parse_clause.h

index c642761cb812742608b2fb9836ba3924e5df21c3..6bfb021f862d0ac820e9372be2eff08d913e2a83 100644 (file)
--- a/src/include/parser/parse_clause.h
+++ b/src/include/parser/parse_clause.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $PostgreSQL: pgsql/src/include/parser/parse_clause.h,v 1.55 2009/06/11 14:49:11 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/parser/parse_clause.h,v 1.56 2009/08/27 20:08:02 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -28,9 +28,9 @@ extern Node *transformLimitClause(ParseState *pstate, Node *clause,
                                          const char *constructName);
  extern List *transformGroupClause(ParseState *pstate, List *grouplist,
                                          List **targetlist, List *sortClause,
-                                        bool isPartition);
+                                        bool isWindowFunc);
  extern List *transformSortClause(ParseState *pstate, List *orderlist,
-                                       List **targetlist, bool resolveUnknown);
+                                       List **targetlist, bool resolveUnknown, bool isWindowFunc);
  
  extern List *transformWindowDefinitions(ParseState *pstate,
                                                    List *windowdefs,
diff --git a/src/test/regress/expected/window.out b/src/test/regress/expected/window.out

index 5ef3e6c3175d2e1bb6f2ae174c3e121fdc840eb1..c14011ce0e42882ec1af497f906dba97dd4a975f 100644 (file)
--- a/src/test/regress/expected/window.out
+++ b/src/test/regress/expected/window.out
@@ -741,11 +741,11 @@ SELECT rank() OVER (ORDER BY length('abc'));
      1
  (1 row)
  
--- but this draws an error: "ORDER BY 1" means order by first SELECT column
-SELECT rank() OVER (ORDER BY 1);
+-- can't order by another window function
+SELECT rank() OVER (ORDER BY rank() OVER (ORDER BY random()));
  ERROR:  window functions not allowed in window definition
-LINE 1: SELECT rank() OVER (ORDER BY 1);
-               ^
+LINE 1: SELECT rank() OVER (ORDER BY rank() OVER (ORDER BY random())...
+                                     ^
  -- some other errors
  SELECT * FROM empsalary WHERE row_number() OVER (ORDER BY salary) < 10;
  ERROR:  window functions not allowed in WHERE clause
diff --git a/src/test/regress/sql/window.sql b/src/test/regress/sql/window.sql

index 5ecb3e3e6561825a177f55bbf716ae7beef1b649..fc62a6fd6e9d616d6c9a1961d45b801bb4cc449f 100644 (file)
--- a/src/test/regress/sql/window.sql
+++ b/src/test/regress/sql/window.sql
@@ -167,8 +167,8 @@ SELECT count(*) OVER (PARTITION BY four) FROM (SELECT * FROM tenk1 UNION ALL SEL
  -- ordering by a non-integer constant is allowed
  SELECT rank() OVER (ORDER BY length('abc'));
  
--- but this draws an error: "ORDER BY 1" means order by first SELECT column
-SELECT rank() OVER (ORDER BY 1);
+-- can't order by another window function
+SELECT rank() OVER (ORDER BY rank() OVER (ORDER BY random()));
  
  -- some other errors
  SELECT * FROM empsalary WHERE row_number() OVER (ORDER BY salary) < 10;
author	Tom Lane <tgl@sss.pgh.pa.us>
	Thu, 27 Aug 2009 20:08:03 +0000 (20:08 +0000)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Thu, 27 Aug 2009 20:08:03 +0000 (20:08 +0000)
doc/src/sgml/ref/select.sgml		patch \| blob \| history
doc/src/sgml/syntax.sgml		patch \| blob \| history
src/backend/parser/analyze.c		patch \| blob \| history
src/backend/parser/parse_clause.c		patch \| blob \| history
src/include/parser/parse_clause.h		patch \| blob \| history
src/test/regress/expected/window.out		patch \| blob \| history
src/test/regress/sql/window.sql		patch \| blob \| history