Check for tables with sql_identifier during pg_upgrade

author Tomas Vondra <tomas.vondra@postgresql.org>

Mon, 14 Oct 2019 20:31:56 +0000 (22:31 +0200)

committer Tomas Vondra <tomas.vondra@postgresql.org>

Mon, 14 Oct 2019 20:31:56 +0000 (22:31 +0200)
author Tomas Vondra <tomas.vondra@postgresql.org>
Mon, 14 Oct 2019 20:31:56 +0000 (22:31 +0200)
committer Tomas Vondra <tomas.vondra@postgresql.org>
Mon, 14 Oct 2019 20:31:56 +0000 (22:31 +0200)
diff --git a/src/bin/pg_upgrade/check.c b/src/bin/pg_upgrade/check.c

index e7bf48a7394dc0afed9cfbe8e41b932b95cea00a..87b9d328d4cc72dbc4091e1833afb7297db405b8 100644 (file)
--- a/src/bin/pg_upgrade/check.c
+++ b/src/bin/pg_upgrade/check.c
@@ -108,6 +108,14 @@ check_and_dump_old_cluster(bool live_check)
         if (GET_MAJOR_VERSION(old_cluster.major_version) <= 1100)
                 check_for_tables_with_oids(&old_cluster);
  
+       /*
+        * PG 12 changed the 'sql_identifier' type storage to be based on name,
+        * not varchar, which breaks on-disk format for existing data. So we need
+        * to prevent upgrade when used in user objects (tables, indexes, ...).
+        */
+       if (GET_MAJOR_VERSION(old_cluster.major_version) <= 1100)
+               old_11_check_for_sql_identifier_data_type_usage(&old_cluster);
+
         /*
          * Pre-PG 10 allowed tables with 'unknown' type columns and non WAL logged
          * hash indexes
diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h

index f724ecf9caddb28e08982a29636d7b8fa13332c8..729f86aa32d443a4f873d983adf77e3e4d878dfb 100644 (file)
--- a/src/bin/pg_upgrade/pg_upgrade.h
+++ b/src/bin/pg_upgrade/pg_upgrade.h
@@ -451,6 +451,8 @@ void                old_9_6_check_for_unknown_data_type_usage(ClusterInfo *cluster);
  void           old_9_6_invalidate_hash_indexes(ClusterInfo *cluster,
                                                                                         bool check_mode);
  
+void           old_11_check_for_sql_identifier_data_type_usage(ClusterInfo *cluster);
+
  /* parallel.c */
  void           parallel_exec_prog(const char *log_file, const char *opt_log_file,
                                                            const char *fmt,...) pg_attribute_printf(3, 4);
diff --git a/src/bin/pg_upgrade/version.c b/src/bin/pg_upgrade/version.c

index 42f1ce77571878e5bf1ddcc67e139283f7e1a9db..f3fd8fa27378f275fcc7533e31a525ee7b646aea 100644 (file)
--- a/src/bin/pg_upgrade/version.c
+++ b/src/bin/pg_upgrade/version.c
@@ -399,3 +399,122 @@ old_9_6_invalidate_hash_indexes(ClusterInfo *cluster, bool check_mode)
         else
                 check_ok();
  }
+
+/*
+ * old_11_check_for_sql_identifier_data_type_usage()
+ *     11 -> 12
+ *     In 12, the sql_identifier data type was switched from name to varchar,
+ *     which does affect the storage (name is by-ref, but not varlena). This
+ *     means user tables using sql_identifier for columns are broken because
+ *     the on-disk format is different.
+ *
+ *     We need to check all objects that might store sql_identifier on disk,
+ *     i.e. tables, matviews and indexes. Also check composite types in case
+ *     they are used in this context.
+ */
+void
+old_11_check_for_sql_identifier_data_type_usage(ClusterInfo *cluster)
+{
+       int                     dbnum;
+       FILE       *script = NULL;
+       bool            found = false;
+       char            output_path[MAXPGPATH];
+
+       prep_status("Checking for invalid \"sql_identifier\" user columns");
+
+       snprintf(output_path, sizeof(output_path), "tables_using_sql_identifier.txt");
+
+       for (dbnum = 0; dbnum < cluster->dbarr.ndbs; dbnum++)
+       {
+               PGresult   *res;
+               bool            db_used = false;
+               int                     ntups;
+               int                     rowno;
+               int                     i_nspname,
+                                       i_relname,
+                                       i_attname;
+               DbInfo     *active_db = &cluster->dbarr.dbs[dbnum];
+               PGconn     *conn = connectToServer(cluster, active_db->db_name);
+
+               /*
+                * We need the recursive CTE because the sql_identifier may be wrapped
+                * either in a domain or composite type, or both (in arbitrary order).
+                */
+               res = executeQueryOrDie(conn,
+                                                               "WITH RECURSIVE oids AS ( "
+               /* the sql_identifier type itself */
+                                                               "       SELECT 'information_schema.sql_identifier'::regtype AS oid "
+                                                               "       UNION ALL "
+                                                               "       SELECT * FROM ( "
+               /* domains on the type */
+                                                               "               WITH x AS (SELECT oid FROM oids) "
+                                                               "                       SELECT t.oid FROM pg_catalog.pg_type t, x WHERE typbasetype = x.oid AND typtype = 'd' "
+                                                               "                       UNION "
+               /* composite types containing the type */
+                                                               "                       SELECT t.oid FROM pg_catalog.pg_type t, pg_catalog.pg_class c, pg_catalog.pg_attribute a, x "
+                                                               "                       WHERE t.typtype = 'c' AND "
+                                                               "                                 t.oid = c.reltype AND "
+                                                               "                                 c.oid = a.attrelid AND "
+                                                               "                                 NOT a.attisdropped AND "
+                                                               "                                 a.atttypid = x.oid "
+                                                               "       ) foo "
+                                                               ") "
+                                                               "SELECT n.nspname, c.relname, a.attname "
+                                                               "FROM   pg_catalog.pg_class c, "
+                                                               "               pg_catalog.pg_namespace n, "
+                                                               "               pg_catalog.pg_attribute a "
+                                                               "WHERE  c.oid = a.attrelid AND "
+                                                               "               NOT a.attisdropped AND "
+                                                               "               a.atttypid IN (SELECT oid FROM oids) AND "
+                                                               "               c.relkind IN ("
+                                                               CppAsString2(RELKIND_RELATION) ", "
+                                                               CppAsString2(RELKIND_MATVIEW) ", "
+                                                               CppAsString2(RELKIND_INDEX) ") AND "
+                                                               "               c.relnamespace = n.oid AND "
+               /* exclude possible orphaned temp tables */
+                                                               "               n.nspname !~ '^pg_temp_' AND "
+                                                               "               n.nspname !~ '^pg_toast_temp_' AND "
+                                                               "               n.nspname NOT IN ('pg_catalog', 'information_schema')");
+
+               ntups = PQntuples(res);
+               i_nspname = PQfnumber(res, "nspname");
+               i_relname = PQfnumber(res, "relname");
+               i_attname = PQfnumber(res, "attname");
+               for (rowno = 0; rowno < ntups; rowno++)
+               {
+                       found = true;
+                       if (script == NULL && (script = fopen_priv(output_path, "w")) == NULL)
+                               pg_fatal("could not open file \"%s\": %s\n", output_path,
+                                                strerror(errno));
+                       if (!db_used)
+                       {
+                               fprintf(script, "Database: %s\n", active_db->db_name);
+                               db_used = true;
+                       }
+                       fprintf(script, "  %s.%s.%s\n",
+                                       PQgetvalue(res, rowno, i_nspname),
+                                       PQgetvalue(res, rowno, i_relname),
+                                       PQgetvalue(res, rowno, i_attname));
+               }
+
+               PQclear(res);
+
+               PQfinish(conn);
+       }
+
+       if (script)
+               fclose(script);
+
+       if (found)
+       {
+               pg_log(PG_REPORT, "fatal\n");
+               pg_fatal("Your installation contains the \"sql_identifier\" data type in user tables\n"
+                                "and/or indexes.  The on-disk format for this data type has changed, so this\n"
+                                "cluster cannot currently be upgraded.  You can remove the problem tables or\n"
+                                "change the data type to \"name\" and restart the upgrade.\n"
+                                "A list of the problem columns is in the file:\n"
+                                "    %s\n\n", output_path);
+       }
+       else
+               check_ok();
+}
author	Tomas Vondra <tomas.vondra@postgresql.org>
	Mon, 14 Oct 2019 20:31:56 +0000 (22:31 +0200)
committer	Tomas Vondra <tomas.vondra@postgresql.org>
	Mon, 14 Oct 2019 20:31:56 +0000 (22:31 +0200)
src/bin/pg_upgrade/check.c		patch \| blob \| history
src/bin/pg_upgrade/pg_upgrade.h		patch \| blob \| history
src/bin/pg_upgrade/version.c		patch \| blob \| history