Give a better error for duplicate entries in VACUUM/ANALYZE column list.
authorTom Lane <tgl@sss.pgh.pa.us>
Thu, 21 Sep 2017 22:13:11 +0000 (18:13 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Thu, 21 Sep 2017 22:13:11 +0000 (18:13 -0400)
Previously, the code didn't think about this case and would just try to
analyze such a column twice.  That would fail at the point of inserting
the second version of the pg_statistic row, with obscure error messsages
like "duplicate key value violates unique constraint" or "tuple already
updated by self", depending on context and PG version.  We could allow
the case by ignoring duplicate column specifications, but it seems better
to reject it explicitly.

The bogus error messages seem like arguably a bug, so back-patch to
all supported versions.

Nathan Bossart, per a report from Michael Paquier, and whacked
around a bit by me.

Discussion: https://postgr.es/m/E061A8E3-5E3D-494D-94F0-E8A9B312BBFC@amazon.com

src/backend/commands/analyze.c
src/test/regress/expected/vacuum.out
src/test/regress/sql/vacuum.sql

index 1afa12e5f5d025bcc974a810eba0321033c30b15..f269e23a28590c6de19abda43d2e033a30acc367 100644 (file)
@@ -367,10 +367,14 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt,
        /*
         * Determine which columns to analyze
         *
-        * Note that system attributes are never analyzed.
+        * Note that system attributes are never analyzed, so we just reject them
+        * at the lookup stage.  We also reject duplicate column mentions.  (We
+        * could alternatively ignore duplicates, but analyzing a column twice
+        * won't work; we'd end up making a conflicting update in pg_statistic.)
         */
        if (vacstmt->va_cols != NIL)
        {
+               Bitmapset  *unique_cols = NULL;
                ListCell   *le;
 
                vacattrstats = (VacAttrStats **) palloc(list_length(vacstmt->va_cols) *
@@ -386,6 +390,13 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt,
                                                (errcode(ERRCODE_UNDEFINED_COLUMN),
                                        errmsg("column \"%s\" of relation \"%s\" does not exist",
                                                   col, RelationGetRelationName(onerel))));
+                       if (bms_is_member(i, unique_cols))
+                               ereport(ERROR,
+                                               (errcode(ERRCODE_DUPLICATE_COLUMN),
+                                                errmsg("column \"%s\" of relation \"%s\" is specified twice",
+                                                               col, RelationGetRelationName(onerel))));
+                       unique_cols = bms_add_member(unique_cols, i);
+
                        vacattrstats[tcnt] = examine_attribute(onerel, i, NULL);
                        if (vacattrstats[tcnt] != NULL)
                                tcnt++;
index 98c604c2c5599f562c6279adeb1a37b06348ff8e..6ba8ec93e337d153188be15eda29895d7a0e4165 100644 (file)
@@ -80,5 +80,10 @@ ERROR:  ANALYZE cannot be executed from VACUUM or ANALYZE
 CONTEXT:  SQL function "do_analyze" statement 1
 SQL function "wrap_do_analyze" statement 1
 VACUUM FULL vactst;
+-- check behavior with duplicate column mentions
+VACUUM ANALYZE vaccluster(i,i);
+ERROR:  column "i" of relation "vaccluster" is specified twice
+ANALYZE vaccluster(i,i);
+ERROR:  column "i" of relation "vaccluster" is specified twice
 DROP TABLE vaccluster;
 DROP TABLE vactst;
index f8412016cf3fc23bad621f5760e3126ea312e0a0..0d58376b943c0048bba116925dea7af182378d44 100644 (file)
@@ -60,5 +60,9 @@ VACUUM FULL pg_database;
 VACUUM FULL vaccluster;
 VACUUM FULL vactst;
 
+-- check behavior with duplicate column mentions
+VACUUM ANALYZE vaccluster(i,i);
+ANALYZE vaccluster(i,i);
+
 DROP TABLE vaccluster;
 DROP TABLE vactst;