Recognize functional dependency on primary keys. This allows a table's

author Tom Lane <tgl@sss.pgh.pa.us>

Sat, 7 Aug 2010 02:44:09 +0000 (02:44 +0000)

committer Tom Lane <tgl@sss.pgh.pa.us>

Sat, 7 Aug 2010 02:44:09 +0000 (02:44 +0000)
author Tom Lane <tgl@sss.pgh.pa.us>
Sat, 7 Aug 2010 02:44:09 +0000 (02:44 +0000)
committer Tom Lane <tgl@sss.pgh.pa.us>
Sat, 7 Aug 2010 02:44:09 +0000 (02:44 +0000)
diff --git a/doc/src/sgml/queries.sgml b/doc/src/sgml/queries.sgml

index 26bc2243843356b48bd1c8241b97d71f831476c8..9cffbd7071bb546433cd31da3605dbda0bf90bb4 100644 (file)
--- a/doc/src/sgml/queries.sgml
+++ b/doc/src/sgml/queries.sgml
@@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/queries.sgml,v 1.58 2010/07/03 02:57:46 rhaas Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/queries.sgml,v 1.59 2010/08/07 02:44:05 tgl Exp $ -->
  
  <chapter id="queries">
   <title>Queries</title>
@@ -886,10 +886,7 @@ SELECT product_id, p.name, (sum(s.units) * p.price) AS sales
      In this example, the columns <literal>product_id</literal>,
      <literal>p.name</literal>, and <literal>p.price</literal> must be
      in the <literal>GROUP BY</> clause since they are referenced in
-    the query select list.  (Depending on how the products
-    table is set up, name and price might be fully dependent on the
-    product ID, so the additional groupings could theoretically be
-    unnecessary, though this is not implemented.)  The column
+    the query select list (but see below).  The column
      <literal>s.units</> does not have to be in the <literal>GROUP
      BY</> list since it is only used in an aggregate expression
      (<literal>sum(...)</literal>), which represents the sales
@@ -897,6 +894,18 @@ SELECT product_id, p.name, (sum(s.units) * p.price) AS sales
      all sales of the product.
     </para>
  
+   <para>
+    If the products table is set up so that,
+    say, <literal>product_id</literal> is the primary key, then it
+    would be enough to group by <literal>product_id</literal> in the
+    above example, since name and price would
+    be <firstterm>functionally
+    dependent</firstterm><indexterm><primary>functional
+    dependency</primary></indexterm> on the product ID, and so there
+    would be no ambiguity about which name and price value to return
+    for each product ID group.
+   </para>
+
     <para>
      In strict SQL, <literal>GROUP BY</> can only group by columns of
      the source table but <productname>PostgreSQL</productname> extends
diff --git a/doc/src/sgml/ref/select.sgml b/doc/src/sgml/ref/select.sgml

index 15d9b75b59bede312d5b8787bc7484453f598cfb..916146df737558ee8147a54ed9d132ccbe9343bb 100644 (file)
--- a/doc/src/sgml/ref/select.sgml
+++ b/doc/src/sgml/ref/select.sgml
@@ -1,5 +1,5 @@
  <!--
-$PostgreSQL: pgsql/doc/src/sgml/ref/select.sgml,v 1.131 2010/06/15 20:04:53 momjian Exp $
+$PostgreSQL: pgsql/doc/src/sgml/ref/select.sgml,v 1.132 2010/08/07 02:44:05 tgl Exp $
  PostgreSQL documentation
  -->
  
@@ -520,9 +520,12 @@ GROUP BY <replaceable class="parameter">expression</replaceable> [, ...]
      produces a single value computed across all the selected rows).
      When <literal>GROUP BY</literal> is present, it is not valid for
      the <command>SELECT</command> list expressions to refer to
-    ungrouped columns except within aggregate functions, since there
-    would be more than one possible value to return for an ungrouped
-    column.
+    ungrouped columns except within aggregate functions or if the
+    ungrouped column is functionally dependent on the grouped columns,
+    since there would otherwise be more than one possible value to
+    return for an ungrouped column.  A functional dependency exists if
+    the grouped columns (or a subset thereof) are the primary key of
+    the table containing the ungrouped column.
     </para>
    </refsect2>
  
@@ -1590,6 +1593,18 @@ SELECT distributors.* WHERE distributors.name = 'Westward';
     </para>
    </refsect2>
  
+  <refsect2>
+   <title>Functional Dependencies</title>
+
+   <para>
+    <productname>PostgreSQL</productname> recognizes functional dependency
+    (allowing columns to be omitted from <literal>GROUP BY</>) only when
+    a table's primary key is included in the <literal>GROUP BY</> list.
+    The SQL standard specifies additional conditions that should be
+    recognized.
+   </para>
+  </refsect2>
+
    <refsect2>
     <title><literal>WINDOW</literal> Clause Restrictions</title>
  
diff --git a/src/backend/catalog/dependency.c b/src/backend/catalog/dependency.c

index 491c402a03b0025e94097d950e07188d07c87e16..aeffbf4d74a2b64d77d877d5c59e75329595b8a0 100644 (file)
--- a/src/backend/catalog/dependency.c
+++ b/src/backend/catalog/dependency.c
@@ -8,7 +8,7 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/catalog/dependency.c,v 1.96 2010/02/26 02:00:36 momjian Exp $
+ *       $PostgreSQL: pgsql/src/backend/catalog/dependency.c,v 1.97 2010/08/07 02:44:06 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -1594,7 +1594,7 @@ find_expr_references_walker(Node *node,
         {
                 /* Recurse into RTE subquery or not-yet-planned sublink subquery */
                 Query      *query = (Query *) node;
-               ListCell   *rtable;
+               ListCell   *lc;
                 bool            result;
  
                 /*
@@ -1604,9 +1604,9 @@ find_expr_references_walker(Node *node,
                  * of recursing into RTE_FUNCTION RTEs, subqueries, etc, so no need to
                  * do that here.  But keep it from looking at join alias lists.)
                  */
-               foreach(rtable, query->rtable)
+               foreach(lc, query->rtable)
                 {
-                       RangeTblEntry *rte = (RangeTblEntry *) lfirst(rtable);
+                       RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc);
                         ListCell   *ct;
  
                         switch (rte->rtekind)
@@ -1627,6 +1627,15 @@ find_expr_references_walker(Node *node,
                         }
                 }
  
+               /*
+                * Add dependencies on constraints listed in query's constraintDeps
+                */
+               foreach(lc, query->constraintDeps)
+               {
+                       add_object_address(OCLASS_CONSTRAINT, lfirst_oid(lc), 0,
+                                                          context->addrs);
+               }
+
                 /* query_tree_walker ignores ORDER BY etc, but we need those opers */
                 find_expr_references_walker((Node *) query->sortClause, context);
                 find_expr_references_walker((Node *) query->groupClause, context);
diff --git a/src/backend/catalog/pg_constraint.c b/src/backend/catalog/pg_constraint.c

index 4fb96d2100292475d7dd012947540d6c72fe453d..106b8df6e4c6563aa736ae24a47daea720605ee0 100644 (file)
--- a/src/backend/catalog/pg_constraint.c
+++ b/src/backend/catalog/pg_constraint.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/catalog/pg_constraint.c,v 1.54 2010/08/05 15:25:35 rhaas Exp $
+ *       $PostgreSQL: pgsql/src/backend/catalog/pg_constraint.c,v 1.55 2010/08/07 02:44:06 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -783,3 +783,112 @@ get_constraint_oid(Oid relid, const char *conname, bool missing_ok)
  
         return conOid;
  }
+
+/*
+ * Determine whether a relation can be proven functionally dependent on
+ * a set of grouping columns.  If so, return TRUE and add the pg_constraint
+ * OIDs of the constraints needed for the proof to the *constraintDeps list.
+ *
+ * grouping_columns is a list of grouping expressions, in which columns of
+ * the rel of interest are Vars with the indicated varno/varlevelsup.
+ *
+ * Currently we only check to see if the rel has a primary key that is a
+ * subset of the grouping_columns.  We could also use plain unique constraints
+ * if all their columns are known not null, but there's a problem: we need
+ * to be able to represent the not-null-ness as part of the constraints added
+ * to *constraintDeps.  FIXME whenever not-null constraints get represented
+ * in pg_constraint.
+ */
+bool
+check_functional_grouping(Oid relid,
+                                                 Index varno, Index varlevelsup,
+                                                 List *grouping_columns,
+                                                 List **constraintDeps)
+{
+       bool            result = false;
+       Relation        pg_constraint;
+       HeapTuple       tuple;
+       SysScanDesc scan;
+       ScanKeyData skey[1];
+
+       /* Scan pg_constraint for constraints of the target rel */
+       pg_constraint = heap_open(ConstraintRelationId, AccessShareLock);
+
+       ScanKeyInit(&skey[0],
+                               Anum_pg_constraint_conrelid,
+                               BTEqualStrategyNumber, F_OIDEQ,
+                               ObjectIdGetDatum(relid));
+
+       scan = systable_beginscan(pg_constraint, ConstraintRelidIndexId, true,
+                                                         SnapshotNow, 1, skey);
+
+       while (HeapTupleIsValid(tuple = systable_getnext(scan)))
+       {
+               Form_pg_constraint con = (Form_pg_constraint) GETSTRUCT(tuple);
+               Datum           adatum;
+               bool            isNull;
+               ArrayType  *arr;
+               int16      *attnums;
+               int                     numkeys;
+               int                     i;
+               bool            found_col;
+
+               /* Only PK constraints are of interest for now, see comment above */
+               if (con->contype != CONSTRAINT_PRIMARY)
+                       continue;
+
+               /* Extract the conkey array, ie, attnums of PK's columns */
+               adatum = heap_getattr(tuple, Anum_pg_constraint_conkey,
+                                                         RelationGetDescr(pg_constraint), &isNull);
+               if (isNull)
+                       elog(ERROR, "null conkey for constraint %u",
+                                HeapTupleGetOid(tuple));
+               arr = DatumGetArrayTypeP(adatum);       /* ensure not toasted */
+               numkeys = ARR_DIMS(arr)[0];
+               if (ARR_NDIM(arr) != 1 ||
+                       numkeys < 0 ||
+                       ARR_HASNULL(arr) ||
+                       ARR_ELEMTYPE(arr) != INT2OID)
+                       elog(ERROR, "conkey is not a 1-D smallint array");
+               attnums = (int16 *) ARR_DATA_PTR(arr);
+
+               found_col = false;
+               for (i = 0; i < numkeys; i++)
+               {
+                       AttrNumber      attnum = attnums[i];
+                       ListCell   *gl;
+
+                       found_col = false;
+                       foreach(gl, grouping_columns)
+                       {
+                               Var                *gvar = (Var *) lfirst(gl);
+
+                               if (IsA(gvar, Var) &&
+                                       gvar->varno == varno &&
+                                       gvar->varlevelsup == varlevelsup &&
+                                       gvar->varattno == attnum)
+                               {
+                                       found_col = true;
+                                       break;
+                               }
+                       }
+                       if (!found_col)
+                               break;
+               }
+
+               if (found_col)
+               {
+                       /* The PK is a subset of grouping_columns, so we win */
+                       *constraintDeps = lappend_oid(*constraintDeps,
+                                                                                 HeapTupleGetOid(tuple));
+                       result = true;
+                       break;
+               }
+       }
+
+       systable_endscan(scan);
+
+       heap_close(pg_constraint, AccessShareLock);
+
+       return result;
+}
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c

index a5862771a900152c1089ea8f8e64414e874d99a7..54af8b62cacb62293f6121084eb7698dd4c26c33 100644 (file)
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -15,7 +15,7 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.466 2010/07/25 23:21:21 rhaas Exp $
+ *       $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.467 2010/08/07 02:44:06 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -2272,6 +2272,7 @@ _copyQuery(Query *from)
         COPY_NODE_FIELD(limitCount);
         COPY_NODE_FIELD(rowMarks);
         COPY_NODE_FIELD(setOperations);
+       COPY_NODE_FIELD(constraintDeps);
  
         return newnode;
  }
diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c

index 7056287c93b77dfd8a1808117b27564ed814894e..0cb87015b6ec4556f7599bfd48287a990ed8f054 100644 (file)
--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@@ -22,7 +22,7 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/nodes/equalfuncs.c,v 1.386 2010/07/25 23:21:21 rhaas Exp $
+ *       $PostgreSQL: pgsql/src/backend/nodes/equalfuncs.c,v 1.387 2010/08/07 02:44:06 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -877,6 +877,7 @@ _equalQuery(Query *a, Query *b)
         COMPARE_NODE_FIELD(limitCount);
         COMPARE_NODE_FIELD(rowMarks);
         COMPARE_NODE_FIELD(setOperations);
+       COMPARE_NODE_FIELD(constraintDeps);
  
         return true;
  }
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c

index 6089ea3e3aaf573940ac0b1a80e28066fc3991c2..8c69435095071bf89350d4fb461fc7d1337c2a47 100644 (file)
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.387 2010/07/25 23:21:21 rhaas Exp $
+ *       $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.388 2010/08/07 02:44:06 tgl Exp $
   *
   * NOTES
   *       Every node type that can appear in stored rules' parsetrees *must*
@@ -2020,6 +2020,7 @@ _outQuery(StringInfo str, Query *node)
         WRITE_NODE_FIELD(limitCount);
         WRITE_NODE_FIELD(rowMarks);
         WRITE_NODE_FIELD(setOperations);
+       WRITE_NODE_FIELD(constraintDeps);
  }
  
  static void
diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c

index f28191d2d00a1b12b69be87bce9824d6ec792e2f..4d85d6769794e11fdb8d21989941d6bb78b3d3f0 100644 (file)
--- a/src/backend/nodes/readfuncs.c
+++ b/src/backend/nodes/readfuncs.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/nodes/readfuncs.c,v 1.232 2010/02/16 22:34:43 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/nodes/readfuncs.c,v 1.233 2010/08/07 02:44:07 tgl Exp $
   *
   * NOTES
   *       Path and Plan nodes do not have any readfuncs support, because we
@@ -218,6 +218,7 @@ _readQuery(void)
         READ_NODE_FIELD(limitCount);
         READ_NODE_FIELD(rowMarks);
         READ_NODE_FIELD(setOperations);
+       READ_NODE_FIELD(constraintDeps);
  
         READ_DONE();
  }
diff --git a/src/backend/parser/parse_agg.c b/src/backend/parser/parse_agg.c

index c36c5fb08c6144a932886e1307d5916838a2253d..a7c34bc89307ddd77c506ee76496744a8ea52f19 100644 (file)
--- a/src/backend/parser/parse_agg.c
+++ b/src/backend/parser/parse_agg.c
@@ -8,12 +8,13 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/parser/parse_agg.c,v 1.93 2010/03/17 16:52:38 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/parser/parse_agg.c,v 1.94 2010/08/07 02:44:07 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
  #include "postgres.h"
  
+#include "catalog/pg_constraint.h"
  #include "nodes/makefuncs.h"
  #include "nodes/nodeFuncs.h"
  #include "optimizer/tlist.h"
@@ -29,13 +30,16 @@
  typedef struct
  {
         ParseState *pstate;
+       Query      *qry;
         List       *groupClauses;
         bool            have_non_var_grouping;
+       List      **func_grouped_rels;
         int                     sublevels_up;
  } check_ungrouped_columns_context;
  
-static void check_ungrouped_columns(Node *node, ParseState *pstate,
-                                               List *groupClauses, bool have_non_var_grouping);
+static void check_ungrouped_columns(Node *node, ParseState *pstate, Query *qry,
+                                               List *groupClauses, bool have_non_var_grouping,
+                                               List **func_grouped_rels);
  static bool check_ungrouped_columns_walker(Node *node,
                                                            check_ungrouped_columns_context *context);
  
@@ -293,6 +297,7 @@ parseCheckAggregates(ParseState *pstate, Query *qry)
  {
         List       *groupClauses = NIL;
         bool            have_non_var_grouping;
+       List       *func_grouped_rels = NIL;
         ListCell   *l;
         bool            hasJoinRTEs;
         bool            hasSelfRefRTEs;
@@ -408,14 +413,16 @@ parseCheckAggregates(ParseState *pstate, Query *qry)
         clause = (Node *) qry->targetList;
         if (hasJoinRTEs)
                 clause = flatten_join_alias_vars(root, clause);
-       check_ungrouped_columns(clause, pstate,
-                                                       groupClauses, have_non_var_grouping);
+       check_ungrouped_columns(clause, pstate, qry,
+                                                       groupClauses, have_non_var_grouping,
+                                                       &func_grouped_rels);
  
         clause = (Node *) qry->havingQual;
         if (hasJoinRTEs)
                 clause = flatten_join_alias_vars(root, clause);
-       check_ungrouped_columns(clause, pstate,
-                                                       groupClauses, have_non_var_grouping);
+       check_ungrouped_columns(clause, pstate, qry,
+                                                       groupClauses, have_non_var_grouping,
+                                                       &func_grouped_rels);
  
         /*
          * Per spec, aggregates can't appear in a recursive term.
@@ -535,14 +542,17 @@ parseCheckWindowFuncs(ParseState *pstate, Query *qry)
   * way more pain than the feature seems worth.
   */
  static void
-check_ungrouped_columns(Node *node, ParseState *pstate,
-                                               List *groupClauses, bool have_non_var_grouping)
+check_ungrouped_columns(Node *node, ParseState *pstate, Query *qry,
+                                               List *groupClauses, bool have_non_var_grouping,
+                                               List **func_grouped_rels)
  {
         check_ungrouped_columns_context context;
  
         context.pstate = pstate;
+       context.qry = qry;
         context.groupClauses = groupClauses;
         context.have_non_var_grouping = have_non_var_grouping;
+       context.func_grouped_rels = func_grouped_rels;
         context.sublevels_up = 0;
         check_ungrouped_columns_walker(node, &context);
  }
@@ -619,10 +629,43 @@ check_ungrouped_columns_walker(Node *node,
                         }
                 }
  
-               /* Found an ungrouped local variable; generate error message */
+               /*
+                * Check whether the Var is known functionally dependent on the GROUP
+                * BY columns.  If so, we can allow the Var to be used, because the
+                * grouping is really a no-op for this table.  However, this deduction
+                * depends on one or more constraints of the table, so we have to add
+                * those constraints to the query's constraintDeps list, because it's
+                * not semantically valid anymore if the constraint(s) get dropped.
+                * (Therefore, this check must be the last-ditch effort before raising
+                * error: we don't want to add dependencies unnecessarily.)
+                *
+                * Because this is a pretty expensive check, and will have the same
+                * outcome for all columns of a table, we remember which RTEs we've
+                * already proven functional dependency for in the func_grouped_rels
+                * list.  This test also prevents us from adding duplicate entries
+                * to the constraintDeps list.
+                */
+               if (list_member_int(*context->func_grouped_rels, var->varno))
+                       return false;                           /* previously proven acceptable */
+
                 Assert(var->varno > 0 &&
                            (int) var->varno <= list_length(context->pstate->p_rtable));
                 rte = rt_fetch(var->varno, context->pstate->p_rtable);
+               if (rte->rtekind == RTE_RELATION)
+               {
+                       if (check_functional_grouping(rte->relid,
+                                                                                 var->varno,
+                                                                                 0,
+                                                                                 context->groupClauses,
+                                                                                 &context->qry->constraintDeps))
+                       {
+                               *context->func_grouped_rels =
+                                       lappend_int(*context->func_grouped_rels, var->varno);
+                               return false;                   /* acceptable */
+                       }
+               }
+
+               /* Found an ungrouped local variable; generate error message */
                 attname = get_rte_attribute_name(rte, var->varattno);
                 if (context->sublevels_up == 0)
                         ereport(ERROR,
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h

index 69e87bb5850091dc73f90c01943a37a08c232e32..6e498ba104c350c99c41d23fea10d557d9fb6e60 100644 (file)
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -37,7 +37,7 @@
   * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.590 2010/08/05 18:21:17 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.591 2010/08/07 02:44:07 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -53,6 +53,6 @@
   */
  
  /*                                                     yyyymmddN */
-#define CATALOG_VERSION_NO     201008052
+#define CATALOG_VERSION_NO     201008061
  
  #endif
diff --git a/src/include/catalog/pg_constraint.h b/src/include/catalog/pg_constraint.h

index 7a31aadbc1d2377749ca5debd3af8e85b12f439a..63fddc697bc3dfd5e895b2daa85a053a8ce8135d 100644 (file)
--- a/src/include/catalog/pg_constraint.h
+++ b/src/include/catalog/pg_constraint.h
@@ -8,7 +8,7 @@
   * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $PostgreSQL: pgsql/src/include/catalog/pg_constraint.h,v 1.40 2010/08/05 15:25:36 rhaas Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/pg_constraint.h,v 1.41 2010/08/07 02:44:07 tgl Exp $
   *
   * NOTES
   *       the genbki.pl script reads this file and generates .bki
@@ -239,4 +239,9 @@ extern void AlterConstraintNamespaces(Oid ownerId, Oid oldNspId,
                                                   Oid newNspId, bool isType);
  extern Oid     get_constraint_oid(Oid relid, const char *conname, bool missing_ok);
  
+extern bool check_functional_grouping(Oid relid,
+                                                                         Index varno, Index varlevelsup,
+                                                                         List *grouping_columns,
+                                                                         List **constraintDeps);
+
  #endif   /* PG_CONSTRAINT_H */
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h

index 170822536693cddc121967a48b32ffe27018deb9..f02df2aca5bdf5c4ec2a543e7bc8cd4b7d0e9336 100644 (file)
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -13,7 +13,7 @@
   * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $PostgreSQL: pgsql/src/include/nodes/parsenodes.h,v 1.433 2010/07/25 23:21:22 rhaas Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/parsenodes.h,v 1.434 2010/08/07 02:44:07 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -146,6 +146,9 @@ typedef struct Query
  
         Node       *setOperations;      /* set-operation tree if this is top level of
                                                                  * a UNION/INTERSECT/EXCEPT query */
+
+       List       *constraintDeps;     /* a list of pg_constraint OIDs that the query
+                                                                * depends on to be semantically valid */
  } Query;
  
  
diff --git a/src/test/regress/expected/functional_deps.out b/src/test/regress/expected/functional_deps.out

new file mode 100644 (file)

index 0000000..21e3652
--- /dev/null
+++ b/src/test/regress/expected/functional_deps.out
@@ -0,0 +1,241 @@
+-- from http://www.depesz.com/index.php/2010/04/19/getting-unique-elements/
+CREATE TEMP TABLE articles (
+    id int CONSTRAINT articles_pkey PRIMARY KEY,
+    keywords text,
+    title text UNIQUE NOT NULL,
+    body text UNIQUE,
+    created date
+);
+NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index "articles_pkey" for table "articles"
+NOTICE:  CREATE TABLE / UNIQUE will create implicit index "articles_title_key" for table "articles"
+NOTICE:  CREATE TABLE / UNIQUE will create implicit index "articles_body_key" for table "articles"
+CREATE TEMP TABLE articles_in_category (
+    article_id int,
+    category_id int,
+    changed date,
+    PRIMARY KEY (article_id, category_id)
+);
+NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index "articles_in_category_pkey" for table "articles_in_category"
+-- test functional dependencies based on primary keys/unique constraints
+-- base tables
+-- group by primary key (OK)
+SELECT id, keywords, title, body, created
+FROM articles
+GROUP BY id;
+ id | keywords | title | body | created 
+----+----------+-------+------+---------
+(0 rows)
+
+-- group by unique not null (fail/todo)
+SELECT id, keywords, title, body, created
+FROM articles
+GROUP BY title;
+ERROR:  column "articles.id" must appear in the GROUP BY clause or be used in an aggregate function
+LINE 1: SELECT id, keywords, title, body, created
+               ^
+-- group by unique nullable (fail)
+SELECT id, keywords, title, body, created
+FROM articles
+GROUP BY body;
+ERROR:  column "articles.id" must appear in the GROUP BY clause or be used in an aggregate function
+LINE 1: SELECT id, keywords, title, body, created
+               ^
+-- group by something else (fail)
+SELECT id, keywords, title, body, created
+FROM articles
+GROUP BY keywords;
+ERROR:  column "articles.id" must appear in the GROUP BY clause or be used in an aggregate function
+LINE 1: SELECT id, keywords, title, body, created
+               ^
+-- multiple tables
+-- group by primary key (OK)
+SELECT a.id, a.keywords, a.title, a.body, a.created
+FROM articles AS a, articles_in_category AS aic
+WHERE a.id = aic.article_id AND aic.category_id in (14,62,70,53,138)
+GROUP BY a.id;
+ id | keywords | title | body | created 
+----+----------+-------+------+---------
+(0 rows)
+
+-- group by something else (fail)
+SELECT a.id, a.keywords, a.title, a.body, a.created
+FROM articles AS a, articles_in_category AS aic
+WHERE a.id = aic.article_id AND aic.category_id in (14,62,70,53,138)
+GROUP BY aic.article_id, aic.category_id;
+ERROR:  column "a.id" must appear in the GROUP BY clause or be used in an aggregate function
+LINE 1: SELECT a.id, a.keywords, a.title, a.body, a.created
+               ^
+-- JOIN syntax
+-- group by left table's primary key (OK)
+SELECT a.id, a.keywords, a.title, a.body, a.created
+FROM articles AS a JOIN articles_in_category AS aic ON a.id = aic.article_id
+WHERE aic.category_id in (14,62,70,53,138)
+GROUP BY a.id;
+ id | keywords | title | body | created 
+----+----------+-------+------+---------
+(0 rows)
+
+-- group by something else (fail)
+SELECT a.id, a.keywords, a.title, a.body, a.created
+FROM articles AS a JOIN articles_in_category AS aic ON a.id = aic.article_id
+WHERE aic.category_id in (14,62,70,53,138)
+GROUP BY aic.article_id, aic.category_id;
+ERROR:  column "a.id" must appear in the GROUP BY clause or be used in an aggregate function
+LINE 1: SELECT a.id, a.keywords, a.title, a.body, a.created
+               ^
+-- group by right table's (composite) primary key (OK)
+SELECT aic.changed
+FROM articles AS a JOIN articles_in_category AS aic ON a.id = aic.article_id
+WHERE aic.category_id in (14,62,70,53,138)
+GROUP BY aic.category_id, aic.article_id;
+ changed 
+---------
+(0 rows)
+
+-- group by right table's partial primary key (fail)
+SELECT aic.changed
+FROM articles AS a JOIN articles_in_category AS aic ON a.id = aic.article_id
+WHERE aic.category_id in (14,62,70,53,138)
+GROUP BY aic.article_id;
+ERROR:  column "aic.changed" must appear in the GROUP BY clause or be used in an aggregate function
+LINE 1: SELECT aic.changed
+               ^
+-- example from documentation
+CREATE TEMP TABLE products (product_id int, name text, price numeric);
+CREATE TEMP TABLE sales (product_id int, units int);
+-- OK
+SELECT product_id, p.name, (sum(s.units) * p.price) AS sales
+    FROM products p LEFT JOIN sales s USING (product_id)
+    GROUP BY product_id, p.name, p.price;
+ product_id | name | sales 
+------------+------+-------
+(0 rows)
+
+-- fail
+SELECT product_id, p.name, (sum(s.units) * p.price) AS sales
+    FROM products p LEFT JOIN sales s USING (product_id)
+    GROUP BY product_id;
+ERROR:  column "p.name" must appear in the GROUP BY clause or be used in an aggregate function
+LINE 1: SELECT product_id, p.name, (sum(s.units) * p.price) AS sales
+                           ^
+ALTER TABLE products ADD PRIMARY KEY (product_id);
+NOTICE:  ALTER TABLE / ADD PRIMARY KEY will create implicit index "products_pkey" for table "products"
+-- OK now
+SELECT product_id, p.name, (sum(s.units) * p.price) AS sales
+    FROM products p LEFT JOIN sales s USING (product_id)
+    GROUP BY product_id;
+ product_id | name | sales 
+------------+------+-------
+(0 rows)
+
+-- Drupal example, http://drupal.org/node/555530
+CREATE TEMP TABLE node (
+    nid SERIAL,
+    vid integer NOT NULL default '0',
+    type varchar(32) NOT NULL default '',
+    title varchar(128) NOT NULL default '',
+    uid integer NOT NULL default '0',
+    status integer NOT NULL default '1',
+    created integer NOT NULL default '0',
+    -- snip
+    PRIMARY KEY (nid, vid)
+);
+NOTICE:  CREATE TABLE will create implicit sequence "node_nid_seq" for serial column "node.nid"
+NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index "node_pkey" for table "node"
+CREATE TEMP TABLE users (
+    uid integer NOT NULL default '0',
+    name varchar(60) NOT NULL default '',
+    pass varchar(32) NOT NULL default '',
+    -- snip
+    PRIMARY KEY (uid),
+    UNIQUE (name)
+);
+NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index "users_pkey" for table "users"
+NOTICE:  CREATE TABLE / UNIQUE will create implicit index "users_name_key" for table "users"
+-- OK
+SELECT u.uid, u.name FROM node n
+INNER JOIN users u ON u.uid = n.uid
+WHERE n.type = 'blog' AND n.status = 1
+GROUP BY u.uid, u.name;
+ uid | name 
+-----+------
+(0 rows)
+
+-- OK
+SELECT u.uid, u.name FROM node n
+INNER JOIN users u ON u.uid = n.uid
+WHERE n.type = 'blog' AND n.status = 1
+GROUP BY u.uid;
+ uid | name 
+-----+------
+(0 rows)
+
+-- Check views and dependencies
+-- fail
+CREATE TEMP VIEW fdv1 AS
+SELECT id, keywords, title, body, created
+FROM articles
+GROUP BY body;
+ERROR:  column "articles.id" must appear in the GROUP BY clause or be used in an aggregate function
+LINE 2: SELECT id, keywords, title, body, created
+               ^
+-- OK
+CREATE TEMP VIEW fdv1 AS
+SELECT id, keywords, title, body, created
+FROM articles
+GROUP BY id;
+-- fail
+ALTER TABLE articles DROP CONSTRAINT articles_pkey RESTRICT;
+ERROR:  cannot drop constraint articles_pkey on table articles because other objects depend on it
+DETAIL:  view fdv1 depends on constraint articles_pkey on table articles
+HINT:  Use DROP ... CASCADE to drop the dependent objects too.
+DROP VIEW fdv1;
+-- multiple dependencies
+CREATE TEMP VIEW fdv2 AS
+SELECT a.id, a.keywords, a.title, aic.category_id, aic.changed
+FROM articles AS a JOIN articles_in_category AS aic ON a.id = aic.article_id
+WHERE aic.category_id in (14,62,70,53,138)
+GROUP BY a.id, aic.category_id, aic.article_id;
+ALTER TABLE articles DROP CONSTRAINT articles_pkey RESTRICT; -- fail
+ERROR:  cannot drop constraint articles_pkey on table articles because other objects depend on it
+DETAIL:  view fdv2 depends on constraint articles_pkey on table articles
+HINT:  Use DROP ... CASCADE to drop the dependent objects too.
+ALTER TABLE articles_in_category DROP CONSTRAINT articles_in_category_pkey RESTRICT; --fail
+ERROR:  cannot drop constraint articles_in_category_pkey on table articles_in_category because other objects depend on it
+DETAIL:  view fdv2 depends on constraint articles_in_category_pkey on table articles_in_category
+HINT:  Use DROP ... CASCADE to drop the dependent objects too.
+DROP VIEW fdv2;
+-- nested queries
+CREATE TEMP VIEW fdv3 AS
+SELECT id, keywords, title, body, created
+FROM articles
+GROUP BY id
+UNION
+SELECT id, keywords, title, body, created
+FROM articles
+GROUP BY id;
+ALTER TABLE articles DROP CONSTRAINT articles_pkey RESTRICT; -- fail
+ERROR:  cannot drop constraint articles_pkey on table articles because other objects depend on it
+DETAIL:  view fdv3 depends on constraint articles_pkey on table articles
+HINT:  Use DROP ... CASCADE to drop the dependent objects too.
+DROP VIEW fdv3;
+CREATE TEMP VIEW fdv4 AS
+SELECT * FROM articles WHERE title IN (SELECT title FROM articles GROUP BY id);
+ALTER TABLE articles DROP CONSTRAINT articles_pkey RESTRICT; -- fail
+ERROR:  cannot drop constraint articles_pkey on table articles because other objects depend on it
+DETAIL:  view fdv4 depends on constraint articles_pkey on table articles
+HINT:  Use DROP ... CASCADE to drop the dependent objects too.
+DROP VIEW fdv4;
+-- prepared query plans: this results in failure on reuse
+PREPARE foo AS
+  SELECT id, keywords, title, body, created
+  FROM articles
+  GROUP BY id;
+EXECUTE foo;
+ id | keywords | title | body | created 
+----+----------+-------+------+---------
+(0 rows)
+
+ALTER TABLE articles DROP CONSTRAINT articles_pkey RESTRICT;
+EXECUTE foo;  -- fail
+ERROR:  column "articles.keywords" must appear in the GROUP BY clause or be used in an aggregate function
diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule

index 7cdf872df593a3e24a789133230d582acbf2ba8c..554ef39e7d122fb90f192161e2a47337472128fd 100644 (file)
--- a/src/test/regress/parallel_schedule
+++ b/src/test/regress/parallel_schedule
@@ -1,5 +1,5 @@
  # ----------
-# $PostgreSQL: pgsql/src/test/regress/parallel_schedule,v 1.61 2010/02/12 17:33:21 tgl Exp $
+# $PostgreSQL: pgsql/src/test/regress/parallel_schedule,v 1.62 2010/08/07 02:44:08 tgl Exp $
  #
  # By convention, we put no more than twenty tests in any one parallel group;
  # this limits the number of connections needed to run the tests.
@@ -84,7 +84,7 @@ test: rules
  # ----------
  # Another group of parallel tests
  # ----------
-test: select_views portals_p2 foreign_key cluster dependency guc bitmapops combocid tsearch tsdicts foreign_data window xmlmap
+test: select_views portals_p2 foreign_key cluster dependency guc bitmapops combocid tsearch tsdicts foreign_data window xmlmap functional_deps
  
  # ----------
  # Another group of parallel tests
diff --git a/src/test/regress/serial_schedule b/src/test/regress/serial_schedule

index c404d54206ba8b6c6a12e1a28605ecfe0b78973e..1717fe4b5aeed17f57232fa734ecd99d20754102 100644 (file)
--- a/src/test/regress/serial_schedule
+++ b/src/test/regress/serial_schedule
@@ -1,4 +1,4 @@
-# $PostgreSQL: pgsql/src/test/regress/serial_schedule,v 1.56 2010/02/12 17:33:21 tgl Exp $
+# $PostgreSQL: pgsql/src/test/regress/serial_schedule,v 1.57 2010/08/07 02:44:08 tgl Exp $
  # This should probably be in an order similar to parallel_schedule.
  test: tablespace
  test: boolean
@@ -103,6 +103,7 @@ test: tsdicts
  test: foreign_data
  test: window
  test: xmlmap
+test: functional_deps
  test: plancache
  test: limit
  test: plpgsql
diff --git a/src/test/regress/sql/functional_deps.sql b/src/test/regress/sql/functional_deps.sql

new file mode 100644 (file)

index 0000000..406490b
--- /dev/null
+++ b/src/test/regress/sql/functional_deps.sql
@@ -0,0 +1,210 @@
+-- from http://www.depesz.com/index.php/2010/04/19/getting-unique-elements/
+
+CREATE TEMP TABLE articles (
+    id int CONSTRAINT articles_pkey PRIMARY KEY,
+    keywords text,
+    title text UNIQUE NOT NULL,
+    body text UNIQUE,
+    created date
+);
+
+CREATE TEMP TABLE articles_in_category (
+    article_id int,
+    category_id int,
+    changed date,
+    PRIMARY KEY (article_id, category_id)
+);
+
+-- test functional dependencies based on primary keys/unique constraints
+
+-- base tables
+
+-- group by primary key (OK)
+SELECT id, keywords, title, body, created
+FROM articles
+GROUP BY id;
+
+-- group by unique not null (fail/todo)
+SELECT id, keywords, title, body, created
+FROM articles
+GROUP BY title;
+
+-- group by unique nullable (fail)
+SELECT id, keywords, title, body, created
+FROM articles
+GROUP BY body;
+
+-- group by something else (fail)
+SELECT id, keywords, title, body, created
+FROM articles
+GROUP BY keywords;
+
+-- multiple tables
+
+-- group by primary key (OK)
+SELECT a.id, a.keywords, a.title, a.body, a.created
+FROM articles AS a, articles_in_category AS aic
+WHERE a.id = aic.article_id AND aic.category_id in (14,62,70,53,138)
+GROUP BY a.id;
+
+-- group by something else (fail)
+SELECT a.id, a.keywords, a.title, a.body, a.created
+FROM articles AS a, articles_in_category AS aic
+WHERE a.id = aic.article_id AND aic.category_id in (14,62,70,53,138)
+GROUP BY aic.article_id, aic.category_id;
+
+-- JOIN syntax
+
+-- group by left table's primary key (OK)
+SELECT a.id, a.keywords, a.title, a.body, a.created
+FROM articles AS a JOIN articles_in_category AS aic ON a.id = aic.article_id
+WHERE aic.category_id in (14,62,70,53,138)
+GROUP BY a.id;
+
+-- group by something else (fail)
+SELECT a.id, a.keywords, a.title, a.body, a.created
+FROM articles AS a JOIN articles_in_category AS aic ON a.id = aic.article_id
+WHERE aic.category_id in (14,62,70,53,138)
+GROUP BY aic.article_id, aic.category_id;
+
+-- group by right table's (composite) primary key (OK)
+SELECT aic.changed
+FROM articles AS a JOIN articles_in_category AS aic ON a.id = aic.article_id
+WHERE aic.category_id in (14,62,70,53,138)
+GROUP BY aic.category_id, aic.article_id;
+
+-- group by right table's partial primary key (fail)
+SELECT aic.changed
+FROM articles AS a JOIN articles_in_category AS aic ON a.id = aic.article_id
+WHERE aic.category_id in (14,62,70,53,138)
+GROUP BY aic.article_id;
+
+
+-- example from documentation
+
+CREATE TEMP TABLE products (product_id int, name text, price numeric);
+CREATE TEMP TABLE sales (product_id int, units int);
+
+-- OK
+SELECT product_id, p.name, (sum(s.units) * p.price) AS sales
+    FROM products p LEFT JOIN sales s USING (product_id)
+    GROUP BY product_id, p.name, p.price;
+
+-- fail
+SELECT product_id, p.name, (sum(s.units) * p.price) AS sales
+    FROM products p LEFT JOIN sales s USING (product_id)
+    GROUP BY product_id;
+
+ALTER TABLE products ADD PRIMARY KEY (product_id);
+
+-- OK now
+SELECT product_id, p.name, (sum(s.units) * p.price) AS sales
+    FROM products p LEFT JOIN sales s USING (product_id)
+    GROUP BY product_id;
+
+
+-- Drupal example, http://drupal.org/node/555530
+
+CREATE TEMP TABLE node (
+    nid SERIAL,
+    vid integer NOT NULL default '0',
+    type varchar(32) NOT NULL default '',
+    title varchar(128) NOT NULL default '',
+    uid integer NOT NULL default '0',
+    status integer NOT NULL default '1',
+    created integer NOT NULL default '0',
+    -- snip
+    PRIMARY KEY (nid, vid)
+);
+
+CREATE TEMP TABLE users (
+    uid integer NOT NULL default '0',
+    name varchar(60) NOT NULL default '',
+    pass varchar(32) NOT NULL default '',
+    -- snip
+    PRIMARY KEY (uid),
+    UNIQUE (name)
+);
+
+-- OK
+SELECT u.uid, u.name FROM node n
+INNER JOIN users u ON u.uid = n.uid
+WHERE n.type = 'blog' AND n.status = 1
+GROUP BY u.uid, u.name;
+
+-- OK
+SELECT u.uid, u.name FROM node n
+INNER JOIN users u ON u.uid = n.uid
+WHERE n.type = 'blog' AND n.status = 1
+GROUP BY u.uid;
+
+
+-- Check views and dependencies
+
+-- fail
+CREATE TEMP VIEW fdv1 AS
+SELECT id, keywords, title, body, created
+FROM articles
+GROUP BY body;
+
+-- OK
+CREATE TEMP VIEW fdv1 AS
+SELECT id, keywords, title, body, created
+FROM articles
+GROUP BY id;
+
+-- fail
+ALTER TABLE articles DROP CONSTRAINT articles_pkey RESTRICT;
+
+DROP VIEW fdv1;
+
+
+-- multiple dependencies
+CREATE TEMP VIEW fdv2 AS
+SELECT a.id, a.keywords, a.title, aic.category_id, aic.changed
+FROM articles AS a JOIN articles_in_category AS aic ON a.id = aic.article_id
+WHERE aic.category_id in (14,62,70,53,138)
+GROUP BY a.id, aic.category_id, aic.article_id;
+
+ALTER TABLE articles DROP CONSTRAINT articles_pkey RESTRICT; -- fail
+ALTER TABLE articles_in_category DROP CONSTRAINT articles_in_category_pkey RESTRICT; --fail
+
+DROP VIEW fdv2;
+
+
+-- nested queries
+
+CREATE TEMP VIEW fdv3 AS
+SELECT id, keywords, title, body, created
+FROM articles
+GROUP BY id
+UNION
+SELECT id, keywords, title, body, created
+FROM articles
+GROUP BY id;
+
+ALTER TABLE articles DROP CONSTRAINT articles_pkey RESTRICT; -- fail
+
+DROP VIEW fdv3;
+
+
+CREATE TEMP VIEW fdv4 AS
+SELECT * FROM articles WHERE title IN (SELECT title FROM articles GROUP BY id);
+
+ALTER TABLE articles DROP CONSTRAINT articles_pkey RESTRICT; -- fail
+
+DROP VIEW fdv4;
+
+
+-- prepared query plans: this results in failure on reuse
+
+PREPARE foo AS
+  SELECT id, keywords, title, body, created
+  FROM articles
+  GROUP BY id;
+
+EXECUTE foo;
+
+ALTER TABLE articles DROP CONSTRAINT articles_pkey RESTRICT;
+
+EXECUTE foo;  -- fail
author	Tom Lane <tgl@sss.pgh.pa.us>
	Sat, 7 Aug 2010 02:44:09 +0000 (02:44 +0000)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Sat, 7 Aug 2010 02:44:09 +0000 (02:44 +0000)
doc/src/sgml/queries.sgml		patch \| blob \| history
doc/src/sgml/ref/select.sgml		patch \| blob \| history
src/backend/catalog/dependency.c		patch \| blob \| history
src/backend/catalog/pg_constraint.c		patch \| blob \| history
src/backend/nodes/copyfuncs.c		patch \| blob \| history
src/backend/nodes/equalfuncs.c		patch \| blob \| history
src/backend/nodes/outfuncs.c		patch \| blob \| history
src/backend/nodes/readfuncs.c		patch \| blob \| history
src/backend/parser/parse_agg.c		patch \| blob \| history
src/include/catalog/catversion.h		patch \| blob \| history
src/include/catalog/pg_constraint.h		patch \| blob \| history
src/include/nodes/parsenodes.h		patch \| blob \| history
src/test/regress/expected/functional_deps.out	[new file with mode: 0644]	patch \| blob
src/test/regress/parallel_schedule		patch \| blob \| history
src/test/regress/serial_schedule		patch \| blob \| history
src/test/regress/sql/functional_deps.sql	[new file with mode: 0644]	patch \| blob