]> granicus.if.org Git - postgresql/commitdiff
Fix collation assignment for aggregates with ORDER BY.
authorTom Lane <tgl@sss.pgh.pa.us>
Fri, 26 Apr 2013 19:48:24 +0000 (15:48 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Fri, 26 Apr 2013 19:48:53 +0000 (15:48 -0400)
ORDER BY expressions were being treated the same as regular aggregate
arguments for purposes of collation determination, but really they should
not affect the aggregate's collation at all; only collations of the
aggregate's regular arguments should affect it.

In many cases this mistake would lead to incorrectly throwing a "collation
conflict" error; but in some cases the corrected code will silently assign
a different collation to the aggregate than before, for example
agg(foo ORDER BY bar COLLATE "x")
which will now use foo's collation rather than "x" for the aggregate.
Given this risk and the lack of field complaints about the issue, it
doesn't seem prudent to back-patch.

In passing, rearrange code in assign_collations_walker so that we don't
need multiple copies of the standard logic for computing collation of a
node with children.  (Previously, CaseExpr duplicated the standard logic,
and we would have needed a third copy for Aggref without this change.)

Andrew Gierth and David Fetter

src/backend/parser/parse_collate.c
src/test/regress/expected/collate.out
src/test/regress/sql/collate.sql

index 7ed50cd0bac3c587f7508d7b7661cac544acc0ca..80f6ac7c0854a926b551cb4db54b3eeee5cf6c65 100644 (file)
@@ -319,86 +319,6 @@ assign_collations_walker(Node *node, assign_collations_context *context)
                                }
                        }
                        break;
-               case T_CaseExpr:
-                       {
-                               /*
-                                * CaseExpr is a special case because we do not want to
-                                * recurse into the test expression (if any).  It was already
-                                * marked with collations during transformCaseExpr, and
-                                * furthermore its collation is not relevant to the result of
-                                * the CASE --- only the output expressions are. So we can't
-                                * use expression_tree_walker here.
-                                */
-                               CaseExpr   *expr = (CaseExpr *) node;
-                               Oid                     typcollation;
-                               ListCell   *lc;
-
-                               foreach(lc, expr->args)
-                               {
-                                       CaseWhen   *when = (CaseWhen *) lfirst(lc);
-
-                                       Assert(IsA(when, CaseWhen));
-
-                                       /*
-                                        * The condition expressions mustn't affect the CASE's
-                                        * result collation either; but since they are known to
-                                        * yield boolean, it's safe to recurse directly on them
-                                        * --- they won't change loccontext.
-                                        */
-                                       (void) assign_collations_walker((Node *) when->expr,
-                                                                                                       &loccontext);
-                                       (void) assign_collations_walker((Node *) when->result,
-                                                                                                       &loccontext);
-                               }
-                               (void) assign_collations_walker((Node *) expr->defresult,
-                                                                                               &loccontext);
-
-                               /*
-                                * Now determine the CASE's output collation.  This is the
-                                * same as the general case below.
-                                */
-                               typcollation = get_typcollation(exprType(node));
-                               if (OidIsValid(typcollation))
-                               {
-                                       /* Node's result is collatable; what about its input? */
-                                       if (loccontext.strength > COLLATE_NONE)
-                                       {
-                                               /* Collation state bubbles up from children. */
-                                               collation = loccontext.collation;
-                                               strength = loccontext.strength;
-                                               location = loccontext.location;
-                                       }
-                                       else
-                                       {
-                                               /*
-                                                * Collatable output produced without any collatable
-                                                * input.  Use the type's collation (which is usually
-                                                * DEFAULT_COLLATION_OID, but might be different for a
-                                                * domain).
-                                                */
-                                               collation = typcollation;
-                                               strength = COLLATE_IMPLICIT;
-                                               location = exprLocation(node);
-                                       }
-                               }
-                               else
-                               {
-                                       /* Node's result type isn't collatable. */
-                                       collation = InvalidOid;
-                                       strength = COLLATE_NONE;
-                                       location = -1;          /* won't be used */
-                               }
-
-                               /*
-                                * Save the state into the expression node.  We know it
-                                * doesn't care about input collation.
-                                */
-                               if (strength == COLLATE_CONFLICT)
-                                       exprSetCollation(node, InvalidOid);
-                               else
-                                       exprSetCollation(node, collation);
-                       }
-                       break;
                case T_RowExpr:
                        {
                                /*
@@ -630,14 +550,103 @@ assign_collations_walker(Node *node, assign_collations_context *context)
                        {
                                /*
                                 * General case for most expression nodes with children. First
-                                * recurse, then figure out what to assign here.
+                                * recurse, then figure out what to assign to this node.
                                 */
                                Oid                     typcollation;
 
-                               (void) expression_tree_walker(node,
-                                                                                         assign_collations_walker,
-                                                                                         (void *) &loccontext);
+                               /*
+                                * For most node types, we want to treat all the child
+                                * expressions alike; but there are a few exceptions, hence
+                                * this inner switch.
+                                */
+                               switch (nodeTag(node))
+                               {
+                                       case T_Aggref:
+                                               {
+                                                       /*
+                                                        * Aggref is a special case because expressions
+                                                        * used only for ordering shouldn't be taken to
+                                                        * conflict with each other or with regular args.
+                                                        * So we apply assign_expr_collations() to them
+                                                        * rather than passing down our loccontext.
+                                                        *
+                                                        * Note that we recurse to each TargetEntry, not
+                                                        * directly to its contained expression, so that
+                                                        * the case above for T_TargetEntry will apply
+                                                        * appropriate checks to agg ORDER BY items.
+                                                        *
+                                                        * We need not recurse into the aggorder or
+                                                        * aggdistinct lists, because those contain only
+                                                        * SortGroupClause nodes which we need not
+                                                        * process.
+                                                        */
+                                                       Aggref     *aggref = (Aggref *) node;
+                                                       ListCell   *lc;
+
+                                                       foreach(lc, aggref->args)
+                                                       {
+                                                               TargetEntry *tle = (TargetEntry *) lfirst(lc);
+
+                                                               Assert(IsA(tle, TargetEntry));
+                                                               if (tle->resjunk)
+                                                                       assign_expr_collations(context->pstate,
+                                                                                                                  (Node *) tle);
+                                                               else
+                                                                       (void) assign_collations_walker((Node *) tle,
+                                                                                                                               &loccontext);
+                                                       }
+                                               }
+                                               break;
+                                       case T_CaseExpr:
+                                               {
+                                                       /*
+                                                        * CaseExpr is a special case because we do not
+                                                        * want to recurse into the test expression (if
+                                                        * any).  It was already marked with collations
+                                                        * during transformCaseExpr, and furthermore its
+                                                        * collation is not relevant to the result of the
+                                                        * CASE --- only the output expressions are.
+                                                        */
+                                                       CaseExpr   *expr = (CaseExpr *) node;
+                                                       ListCell   *lc;
+
+                                                       foreach(lc, expr->args)
+                                                       {
+                                                               CaseWhen   *when = (CaseWhen *) lfirst(lc);
+
+                                                               Assert(IsA(when, CaseWhen));
+
+                                                               /*
+                                                                * The condition expressions mustn't affect
+                                                                * the CASE's result collation either; but
+                                                                * since they are known to yield boolean, it's
+                                                                * safe to recurse directly on them --- they
+                                                                * won't change loccontext.
+                                                                */
+                                                               (void) assign_collations_walker((Node *) when->expr,
+                                                                                                                               &loccontext);
+                                                               (void) assign_collations_walker((Node *) when->result,
+                                                                                                                               &loccontext);
+                                                       }
+                                                       (void) assign_collations_walker((Node *) expr->defresult,
+                                                                                                                       &loccontext);
+                                               }
+                                               break;
+                                       default:
+
+                                               /*
+                                                * Normal case: all child expressions contribute
+                                                * equally to loccontext.
+                                                */
+                                               (void) expression_tree_walker(node,
+                                                                                                       assign_collations_walker,
+                                                                                                         (void *) &loccontext);
+                                               break;
+                               }
 
+                               /*
+                                * Now figure out what collation to assign to this node.
+                                */
                                typcollation = get_typcollation(exprType(node));
                                if (OidIsValid(typcollation))
                                {
index 4ab9566cd10be8f9faf5bed6cab94d4e3cca9336..91d574dbe4c77a79692eac54a6e470eaa37a422d 100644 (file)
@@ -362,6 +362,28 @@ SELECT array_agg(b ORDER BY b) FROM collate_test2;
  {ABD,Abc,abc,bbc}
 (1 row)
 
+-- In aggregates, ORDER BY expressions don't affect aggregate's collation
+SELECT string_agg(x COLLATE "C", y COLLATE "POSIX") FROM collate_test10;  -- fail
+ERROR:  collation mismatch between explicit collations "C" and "POSIX"
+LINE 1: SELECT string_agg(x COLLATE "C", y COLLATE "POSIX") FROM col...
+                                           ^
+SELECT array_agg(x COLLATE "C" ORDER BY y COLLATE "POSIX") FROM collate_test10;
+ array_agg 
+-----------
+ {HIJ,hij}
+(1 row)
+
+SELECT array_agg(a ORDER BY x COLLATE "C", y COLLATE "POSIX") FROM collate_test10;
+ array_agg 
+-----------
+ {2,1}
+(1 row)
+
+SELECT array_agg(a ORDER BY x||y) FROM collate_test10;  -- fail
+ERROR:  collation mismatch between implicit collations "C" and "POSIX"
+LINE 1: SELECT array_agg(a ORDER BY x||y) FROM collate_test10;
+                                       ^
+HINT:  You can choose the collation by applying the COLLATE clause to one or both expressions.
 SELECT a, b FROM collate_test1 UNION ALL SELECT a, b FROM collate_test1 ORDER BY 2;
  a |  b  
 ---+-----
index 3c960e7ed93209b0c2b9a63472e3cb039c453fa3..63ab590f3a4153d49e4e3299c067fff4df4a67f8 100644 (file)
@@ -128,6 +128,12 @@ SELECT min(b), max(b) FROM collate_test2;
 SELECT array_agg(b ORDER BY b) FROM collate_test1;
 SELECT array_agg(b ORDER BY b) FROM collate_test2;
 
+-- In aggregates, ORDER BY expressions don't affect aggregate's collation
+SELECT string_agg(x COLLATE "C", y COLLATE "POSIX") FROM collate_test10;  -- fail
+SELECT array_agg(x COLLATE "C" ORDER BY y COLLATE "POSIX") FROM collate_test10;
+SELECT array_agg(a ORDER BY x COLLATE "C", y COLLATE "POSIX") FROM collate_test10;
+SELECT array_agg(a ORDER BY x||y) FROM collate_test10;  -- fail
+
 SELECT a, b FROM collate_test1 UNION ALL SELECT a, b FROM collate_test1 ORDER BY 2;
 SELECT a, b FROM collate_test2 UNION SELECT a, b FROM collate_test2 ORDER BY 2;
 SELECT a, b FROM collate_test2 WHERE a < 4 INTERSECT SELECT a, b FROM collate_test2 WHERE a > 1 ORDER BY 2;