]> granicus.if.org Git - postgresql/commitdiff
Modify the definition of window-function PARTITION BY and ORDER BY clauses
authorTom Lane <tgl@sss.pgh.pa.us>
Thu, 27 Aug 2009 20:08:03 +0000 (20:08 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Thu, 27 Aug 2009 20:08:03 +0000 (20:08 +0000)
so that their elements are always taken as simple expressions over the
query's input columns.  It originally seemed like a good idea to make them
act exactly like GROUP BY and ORDER BY, right down to the SQL92-era behavior
of accepting output column names or numbers.  However, that was not such a
great idea, for two reasons:

1. It permits circular references, as exhibited in bug #5018: the output
column could be the one containing the window function itself.  (We actually
had a regression test case illustrating this, but nobody thought twice about
how confusing that would be.)

2. It doesn't seem like a good idea for, eg, "lead(foo) OVER (ORDER BY foo)"
to potentially use two completely different meanings for "foo".

Accordingly, narrow down the behavior of window clauses to use only the
SQL99-compliant interpretation that the expressions are simple expressions.

doc/src/sgml/ref/select.sgml
doc/src/sgml/syntax.sgml
src/backend/parser/analyze.c
src/backend/parser/parse_clause.c
src/include/parser/parse_clause.h
src/test/regress/expected/window.out
src/test/regress/sql/window.sql

index d67d2b4ff764036e3ee8179de7aba6bff0c10cee..f22eeb2804d911482af4172d8d8deb155e558b67 100644 (file)
@@ -1,5 +1,5 @@
 <!--
-$PostgreSQL: pgsql/doc/src/sgml/ref/select.sgml,v 1.123 2009/08/18 23:40:20 tgl Exp $
+$PostgreSQL: pgsql/doc/src/sgml/ref/select.sgml,v 1.124 2009/08/27 20:08:02 tgl Exp $
 PostgreSQL documentation
 -->
 
@@ -595,17 +595,24 @@ WINDOW <replaceable class="parameter">window_name</replaceable> AS ( <replaceabl
 
    <para>
     The elements of the <literal>PARTITION BY</> list are interpreted in
-    the same fashion as elements of a
-    <xref linkend="sql-groupby" endterm="sql-groupby-title">, and
-    the elements of the <literal>ORDER BY</> list are interpreted in the
-    same fashion as elements of an
-    <xref linkend="sql-orderby" endterm="sql-orderby-title">.
-    The only difference is that these expressions can contain aggregate
+    much the same fashion as elements of a
+    <xref linkend="sql-groupby" endterm="sql-groupby-title">, except that
+    they are always simple expressions and never the name or number of an
+    output column.
+    Another difference is that these expressions can contain aggregate
     function calls, which are not allowed in a regular <literal>GROUP BY</>
     clause.  They are allowed here because windowing occurs after grouping
     and aggregation.
    </para>
 
+   <para>
+    Similarly, the elements of the <literal>ORDER BY</> list are interpreted
+    in much the same fashion as elements of an
+    <xref linkend="sql-orderby" endterm="sql-orderby-title">, except that
+    the expressions are always taken as simple expressions and never the name
+    or number of an output column.
+   </para>
+
    <para>
     The optional <replaceable class="parameter">frame_clause</> defines
     the <firstterm>window frame</> for window functions that depend on the
index bc562e1f97be8f294a9843cf369171d11e6b556b..c2dd31b98d37b591ff64774aeab6ed6d8fd46b8c 100644 (file)
@@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/syntax.sgml,v 1.133 2009/06/17 21:58:49 tgl Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/syntax.sgml,v 1.134 2009/08/27 20:08:02 tgl Exp $ -->
 
 <chapter id="sql-syntax">
  <title>SQL Syntax</title>
@@ -1619,7 +1619,9 @@ ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
     expression that does not itself contain window function calls.
     The <literal>PARTITION BY</> and <literal>ORDER BY</> lists have
     essentially the same syntax and semantics as <literal>GROUP BY</>
-    and <literal>ORDER BY</> clauses of the whole query.
+    and <literal>ORDER BY</> clauses of the whole query, except that their
+    expressions are always just expressions and cannot be output-column
+    names or numbers.
     <replaceable>window_name</replaceable> is a reference to a named window
     specification defined in the query's <literal>WINDOW</literal> clause.
     Named window specifications are usually referenced with just
index 97c560b30950fab19e309b0661d2d4bb0af7e1c0..c441be220e2ec65a559a3a0a5674f7e7c4ac933a 100644 (file)
@@ -17,7 +17,7 @@
  * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- *     $PostgreSQL: pgsql/src/backend/parser/analyze.c,v 1.389 2009/06/11 14:48:59 momjian Exp $
+ *     $PostgreSQL: pgsql/src/backend/parser/analyze.c,v 1.390 2009/08/27 20:08:02 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -825,13 +825,14 @@ transformSelectStmt(ParseState *pstate, SelectStmt *stmt)
        qry->sortClause = transformSortClause(pstate,
                                                                                  stmt->sortClause,
                                                                                  &qry->targetList,
-                                                                                 true /* fix unknowns */ );
+                                                                                 true /* fix unknowns */,
+                                                                                 false /* not window function */);
 
        qry->groupClause = transformGroupClause(pstate,
                                                                                        stmt->groupClause,
                                                                                        &qry->targetList,
                                                                                        qry->sortClause,
-                                                                                       false);
+                                                                                       false /* not window function */);
 
        if (stmt->distinctClause == NIL)
        {
@@ -1039,7 +1040,8 @@ transformValuesClause(ParseState *pstate, SelectStmt *stmt)
        qry->sortClause = transformSortClause(pstate,
                                                                                  stmt->sortClause,
                                                                                  &qry->targetList,
-                                                                                 true /* fix unknowns */ );
+                                                                                 true /* fix unknowns */,
+                                                                                 false /* not window function */);
 
        qry->limitOffset = transformLimitClause(pstate, stmt->limitOffset,
                                                                                        "OFFSET");
@@ -1291,7 +1293,8 @@ transformSetOperationStmt(ParseState *pstate, SelectStmt *stmt)
        qry->sortClause = transformSortClause(pstate,
                                                                                  sortClause,
                                                                                  &qry->targetList,
-                                                                                 false /* no unknowns expected */ );
+                                                                                 false /* no unknowns expected */,
+                                                                                 false /* not window function */);
 
        pstate->p_rtable = list_truncate(pstate->p_rtable, sv_rtable_length);
        pstate->p_relnamespace = sv_relnamespace;
index 139202f66790206478a70ac66c7fd680bf01a611..a63a280a8cbd17e11369cff054d8757b1cfe6651 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/parser/parse_clause.c,v 1.190 2009/07/16 06:33:43 petere Exp $
+ *       $PostgreSQL: pgsql/src/backend/parser/parse_clause.c,v 1.191 2009/08/27 20:08:02 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 #include "utils/rel.h"
 
 
+/* clause types for findTargetlistEntrySQL92 */
 #define ORDER_CLAUSE 0
 #define GROUP_CLAUSE 1
 #define DISTINCT_ON_CLAUSE 2
-#define PARTITION_CLAUSE 3
 
 static const char *const clauseText[] = {
        "ORDER BY",
        "GROUP BY",
-       "DISTINCT ON",
-       "PARTITION BY"
+       "DISTINCT ON"
 };
 
 static void extractRemainingColumns(List *common_colnames,
@@ -73,8 +72,10 @@ static Node *transformFromClauseItem(ParseState *pstate, Node *n,
                                                Relids *containedRels);
 static Node *buildMergedJoinVar(ParseState *pstate, JoinType jointype,
                                   Var *l_colvar, Var *r_colvar);
-static TargetEntry *findTargetlistEntry(ParseState *pstate, Node *node,
-                                       List **tlist, int clause);
+static TargetEntry *findTargetlistEntrySQL92(ParseState *pstate, Node *node,
+                                                List **tlist, int clause);
+static TargetEntry *findTargetlistEntrySQL99(ParseState *pstate, Node *node,
+                                                List **tlist);
 static int get_matching_location(int sortgroupref,
                                          List *sortgrouprefs, List *exprs);
 static List *addTargetToSortList(ParseState *pstate, TargetEntry *tle,
@@ -1216,21 +1217,27 @@ transformLimitClause(ParseState *pstate, Node *clause,
 
 
 /*
- *     findTargetlistEntry -
+ *     findTargetlistEntrySQL92 -
  *       Returns the targetlist entry matching the given (untransformed) node.
  *       If no matching entry exists, one is created and appended to the target
  *       list as a "resjunk" node.
  *
+ * This function supports the old SQL92 ORDER BY interpretation, where the
+ * expression is an output column name or number.  If we fail to find a
+ * match of that sort, we fall through to the SQL99 rules.  For historical
+ * reasons, Postgres also allows this interpretation for GROUP BY, though
+ * the standard never did.  However, for GROUP BY we prefer a SQL99 match.
+ * This function is *not* used for WINDOW definitions.
+ *
  * node                the ORDER BY, GROUP BY, or DISTINCT ON expression to be matched
  * tlist       the target list (passed by reference so we can append to it)
  * clause      identifies clause type being processed
  */
 static TargetEntry *
-findTargetlistEntry(ParseState *pstate, Node *node, List **tlist, int clause)
+findTargetlistEntrySQL92(ParseState *pstate, Node *node, List **tlist,
+                                                int clause)
 {
-       TargetEntry *target_result = NULL;
        ListCell   *tl;
-       Node       *expr;
 
        /*----------
         * Handle two special cases as mandated by the SQL92 spec:
@@ -1258,8 +1265,7 @@ findTargetlistEntry(ParseState *pstate, Node *node, List **tlist, int clause)
         * 2. IntegerConstant
         *        This means to use the n'th item in the existing target list.
         *        Note that it would make no sense to order/group/distinct by an
-        *        actual constant, so this does not create a conflict with our
-        *        extension to order/group by an expression.
+        *        actual constant, so this does not create a conflict with SQL99.
         *        GROUP BY column-number is not allowed by SQL92, but since
         *        the standard has no other behavior defined for this syntax,
         *        we may as well accept this common extension.
@@ -1268,7 +1274,7 @@ findTargetlistEntry(ParseState *pstate, Node *node, List **tlist, int clause)
         * since the user didn't write them in his SELECT list.
         *
         * If neither special case applies, fall through to treat the item as
-        * an expression.
+        * an expression per SQL99.
         *----------
         */
        if (IsA(node, ColumnRef) &&
@@ -1278,15 +1284,15 @@ findTargetlistEntry(ParseState *pstate, Node *node, List **tlist, int clause)
                char       *name = strVal(linitial(((ColumnRef *) node)->fields));
                int                     location = ((ColumnRef *) node)->location;
 
-               if (clause == GROUP_CLAUSE || clause == PARTITION_CLAUSE)
+               if (clause == GROUP_CLAUSE)
                {
                        /*
                         * In GROUP BY, we must prefer a match against a FROM-clause
                         * column to one against the targetlist.  Look to see if there is
-                        * a matching column.  If so, fall through to let transformExpr()
-                        * do the rest.  NOTE: if name could refer ambiguously to more
-                        * than one column name exposed by FROM, colNameToVar will
-                        * ereport(ERROR).      That's just what we want here.
+                        * a matching column.  If so, fall through to use SQL99 rules.
+                        * NOTE: if name could refer ambiguously to more than one column
+                        * name exposed by FROM, colNameToVar will ereport(ERROR). That's
+                        * just what we want here.
                         *
                         * Small tweak for 7.4.3: ignore matches in upper query levels.
                         * This effectively changes the search order for bare names to (1)
@@ -1295,8 +1301,6 @@ findTargetlistEntry(ParseState *pstate, Node *node, List **tlist, int clause)
                         * SQL99 do not allow GROUPing BY an outer reference, so this
                         * breaks no cases that are legal per spec, and it seems a more
                         * self-consistent behavior.
-                        *
-                        * Window PARTITION BY clauses should act exactly like GROUP BY.
                         */
                        if (colNameToVar(pstate, name, true, location) != NULL)
                                name = NULL;
@@ -1304,6 +1308,8 @@ findTargetlistEntry(ParseState *pstate, Node *node, List **tlist, int clause)
 
                if (name != NULL)
                {
+                       TargetEntry *target_result = NULL;
+
                        foreach(tl, *tlist)
                        {
                                TargetEntry *tle = (TargetEntry *) lfirst(tl);
@@ -1367,12 +1373,36 @@ findTargetlistEntry(ParseState *pstate, Node *node, List **tlist, int clause)
        }
 
        /*
-        * Otherwise, we have an expression (this is a Postgres extension not
-        * found in SQL92).  Convert the untransformed node to a transformed
-        * expression, and search for a match in the tlist. NOTE: it doesn't
-        * really matter whether there is more than one match.  Also, we are
-        * willing to match a resjunk target here, though the above cases must
-        * ignore resjunk targets.
+        * Otherwise, we have an expression, so process it per SQL99 rules.
+        */
+       return findTargetlistEntrySQL99(pstate, node, tlist);
+}
+
+/*
+ *     findTargetlistEntrySQL99 -
+ *       Returns the targetlist entry matching the given (untransformed) node.
+ *       If no matching entry exists, one is created and appended to the target
+ *       list as a "resjunk" node.
+ *
+ * This function supports the SQL99 interpretation, wherein the expression
+ * is just an ordinary expression referencing input column names.
+ *
+ * node                the ORDER BY, GROUP BY, etc expression to be matched
+ * tlist       the target list (passed by reference so we can append to it)
+ */
+static TargetEntry *
+findTargetlistEntrySQL99(ParseState *pstate, Node *node, List **tlist)
+{
+       TargetEntry *target_result;
+       ListCell   *tl;
+       Node       *expr;
+
+       /*
+        * Convert the untransformed node to a transformed expression, and search
+        * for a match in the tlist.  NOTE: it doesn't really matter whether there
+        * is more than one match.  Also, we are willing to match an existing
+        * resjunk target here, though the SQL92 cases above must ignore resjunk
+        * targets.
         */
        expr = transformExpr(pstate, node);
 
@@ -1403,16 +1433,15 @@ findTargetlistEntry(ParseState *pstate, Node *node, List **tlist, int clause)
  * GROUP BY items will be added to the targetlist (as resjunk columns)
  * if not already present, so the targetlist must be passed by reference.
  *
- * This is also used for window PARTITION BY clauses (which actually act
- * just the same, except for the clause name used in error messages).
+ * This is also used for window PARTITION BY clauses (which act almost the
+ * same, but are always interpreted per SQL99 rules).
  */
 List *
 transformGroupClause(ParseState *pstate, List *grouplist,
                                         List **targetlist, List *sortClause,
-                                        bool isPartition)
+                                        bool isWindowFunc)
 {
        List       *result = NIL;
-       int                     clause = isPartition ? PARTITION_CLAUSE : GROUP_CLAUSE;
        ListCell   *gl;
 
        foreach(gl, grouplist)
@@ -1421,7 +1450,11 @@ transformGroupClause(ParseState *pstate, List *grouplist,
                TargetEntry *tle;
                bool            found = false;
 
-               tle = findTargetlistEntry(pstate, gexpr, targetlist, clause);
+               if (isWindowFunc)
+                       tle = findTargetlistEntrySQL99(pstate, gexpr, targetlist);
+               else
+                       tle = findTargetlistEntrySQL92(pstate, gexpr, targetlist,
+                                                                                  GROUP_CLAUSE);
 
                /* Eliminate duplicates (GROUP BY x, x) */
                if (targetIsInSortList(tle, InvalidOid, result))
@@ -1475,12 +1508,16 @@ transformGroupClause(ParseState *pstate, List *grouplist,
  *
  * ORDER BY items will be added to the targetlist (as resjunk columns)
  * if not already present, so the targetlist must be passed by reference.
+ *
+ * This is also used for window ORDER BY clauses (which act almost the
+ * same, but are always interpreted per SQL99 rules).
  */
 List *
 transformSortClause(ParseState *pstate,
                                        List *orderlist,
                                        List **targetlist,
-                                       bool resolveUnknown)
+                                       bool resolveUnknown,
+                                       bool isWindowFunc)
 {
        List       *sortlist = NIL;
        ListCell   *olitem;
@@ -1490,8 +1527,11 @@ transformSortClause(ParseState *pstate,
                SortBy     *sortby = (SortBy *) lfirst(olitem);
                TargetEntry *tle;
 
-               tle = findTargetlistEntry(pstate, sortby->node,
-                                                                 targetlist, ORDER_CLAUSE);
+               if (isWindowFunc)
+                       tle = findTargetlistEntrySQL99(pstate, sortby->node, targetlist);
+               else
+                       tle = findTargetlistEntrySQL92(pstate, sortby->node, targetlist,
+                                                                                  ORDER_CLAUSE);
 
                sortlist = addTargetToSortList(pstate, tle,
                                                                           sortlist, *targetlist, sortby,
@@ -1550,18 +1590,19 @@ transformWindowDefinitions(ParseState *pstate,
 
                /*
                 * Transform PARTITION and ORDER specs, if any.  These are treated
-                * exactly like top-level GROUP BY and ORDER BY clauses, including the
-                * special handling of nondefault operator semantics.
+                * almost exactly like top-level GROUP BY and ORDER BY clauses,
+                * including the special handling of nondefault operator semantics.
                 */
                orderClause = transformSortClause(pstate,
                                                                                  windef->orderClause,
                                                                                  targetlist,
-                                                                                 true);
+                                                                                 true /* fix unknowns */,
+                                                                                 true /* window function */);
                partitionClause = transformGroupClause(pstate,
                                                                                           windef->partitionClause,
                                                                                           targetlist,
                                                                                           orderClause,
-                                                                                          true);
+                                                                                          true /* window function */);
 
                /*
                 * And prepare the new WindowClause.
@@ -1736,8 +1777,8 @@ transformDistinctOnClause(ParseState *pstate, List *distinctlist,
                int                     sortgroupref;
                TargetEntry *tle;
 
-               tle = findTargetlistEntry(pstate, dexpr,
-                                                                 targetlist, DISTINCT_ON_CLAUSE);
+               tle = findTargetlistEntrySQL92(pstate, dexpr, targetlist,
+                                                                          DISTINCT_ON_CLAUSE);
                sortgroupref = assignSortGroupRef(tle, *targetlist);
                sortgrouprefs = lappend_int(sortgrouprefs, sortgroupref);
        }
index c642761cb812742608b2fb9836ba3924e5df21c3..6bfb021f862d0ac820e9372be2eff08d913e2a83 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/parser/parse_clause.h,v 1.55 2009/06/11 14:49:11 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/parser/parse_clause.h,v 1.56 2009/08/27 20:08:02 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -28,9 +28,9 @@ extern Node *transformLimitClause(ParseState *pstate, Node *clause,
                                         const char *constructName);
 extern List *transformGroupClause(ParseState *pstate, List *grouplist,
                                         List **targetlist, List *sortClause,
-                                        bool isPartition);
+                                        bool isWindowFunc);
 extern List *transformSortClause(ParseState *pstate, List *orderlist,
-                                       List **targetlist, bool resolveUnknown);
+                                       List **targetlist, bool resolveUnknown, bool isWindowFunc);
 
 extern List *transformWindowDefinitions(ParseState *pstate,
                                                   List *windowdefs,
index 5ef3e6c3175d2e1bb6f2ae174c3e121fdc840eb1..c14011ce0e42882ec1af497f906dba97dd4a975f 100644 (file)
@@ -741,11 +741,11 @@ SELECT rank() OVER (ORDER BY length('abc'));
     1
 (1 row)
 
--- but this draws an error: "ORDER BY 1" means order by first SELECT column
-SELECT rank() OVER (ORDER BY 1);
+-- can't order by another window function
+SELECT rank() OVER (ORDER BY rank() OVER (ORDER BY random()));
 ERROR:  window functions not allowed in window definition
-LINE 1: SELECT rank() OVER (ORDER BY 1);
-               ^
+LINE 1: SELECT rank() OVER (ORDER BY rank() OVER (ORDER BY random())...
+                                     ^
 -- some other errors
 SELECT * FROM empsalary WHERE row_number() OVER (ORDER BY salary) < 10;
 ERROR:  window functions not allowed in WHERE clause
index 5ecb3e3e6561825a177f55bbf716ae7beef1b649..fc62a6fd6e9d616d6c9a1961d45b801bb4cc449f 100644 (file)
@@ -167,8 +167,8 @@ SELECT count(*) OVER (PARTITION BY four) FROM (SELECT * FROM tenk1 UNION ALL SEL
 -- ordering by a non-integer constant is allowed
 SELECT rank() OVER (ORDER BY length('abc'));
 
--- but this draws an error: "ORDER BY 1" means order by first SELECT column
-SELECT rank() OVER (ORDER BY 1);
+-- can't order by another window function
+SELECT rank() OVER (ORDER BY rank() OVER (ORDER BY random()));
 
 -- some other errors
 SELECT * FROM empsalary WHERE row_number() OVER (ORDER BY salary) < 10;