Change the way encoding and locale checks are done in pg_upgrade.

author Heikki Linnakangas <heikki.linnakangas@iki.fi>

Fri, 10 Oct 2014 06:59:44 +0000 (09:59 +0300)

committer Heikki Linnakangas <heikki.linnakangas@iki.fi>

Fri, 10 Oct 2014 07:39:32 +0000 (10:39 +0300)
author Heikki Linnakangas <heikki.linnakangas@iki.fi>
Fri, 10 Oct 2014 06:59:44 +0000 (09:59 +0300)
committer Heikki Linnakangas <heikki.linnakangas@iki.fi>
Fri, 10 Oct 2014 07:39:32 +0000 (10:39 +0300)
diff --git a/contrib/pg_upgrade/check.c b/contrib/pg_upgrade/check.c

index bbfcab71ce1242fb905ec42e117449024e867254..3629b3961f6a923e6fe3ab99d55ea6e9bfa1777c 100644 (file)
--- a/contrib/pg_upgrade/check.c
+++ b/contrib/pg_upgrade/check.c
@@ -14,12 +14,10 @@
  #include "pg_upgrade.h"
  
  
-static void set_locale_and_encoding(ClusterInfo *cluster);
  static void check_new_cluster_is_empty(void);
-static void check_locale_and_encoding(ControlData *oldctrl,
-                                                 ControlData *newctrl);
-static bool equivalent_locale(const char *loca, const char *locb);
-static bool equivalent_encoding(const char *chara, const char *charb);
+static void check_databases_are_compatible(void);
+static void check_locale_and_encoding(DbInfo *olddb, DbInfo *newdb);
+static bool equivalent_locale(int category, const char *loca, const char *locb);
  static void check_is_install_user(ClusterInfo *cluster);
  static void check_for_prepared_transactions(ClusterInfo *cluster);
  static void check_for_isn_and_int8_passing_mismatch(ClusterInfo *cluster);
@@ -81,8 +79,6 @@ check_and_dump_old_cluster(bool live_check)
         if (!live_check)
                 start_postmaster(&old_cluster, true);
  
-       set_locale_and_encoding(&old_cluster);
-
         get_pg_database_relfilenode(&old_cluster);
  
         /* Extract a list of databases and tables from the old cluster */
@@ -127,13 +123,10 @@ check_and_dump_old_cluster(bool live_check)
  void
  check_new_cluster(void)
  {
-       set_locale_and_encoding(&new_cluster);
-
-       check_locale_and_encoding(&old_cluster.controldata, &new_cluster.controldata);
-
         get_db_and_rel_infos(&new_cluster);
  
         check_new_cluster_is_empty();
+       check_databases_are_compatible();
  
         check_loadable_libraries();
  
@@ -278,94 +271,26 @@ check_cluster_compatibility(bool live_check)
  }
  
  
-/*
- * set_locale_and_encoding()
- *
- * query the database to get the template0 locale
- */
-static void
-set_locale_and_encoding(ClusterInfo *cluster)
-{
-       ControlData *ctrl = &cluster->controldata;
-       PGconn     *conn;
-       PGresult   *res;
-       int                     i_encoding;
-       int                     cluster_version = cluster->major_version;
-
-       conn = connectToServer(cluster, "template1");
-
-       /* for pg < 80400, we got the values from pg_controldata */
-       if (cluster_version >= 80400)
-       {
-               int                     i_datcollate;
-               int                     i_datctype;
-
-               res = executeQueryOrDie(conn,
-                                                               "SELECT datcollate, datctype "
-                                                               "FROM   pg_catalog.pg_database "
-                                                               "WHERE  datname = 'template0' ");
-               assert(PQntuples(res) == 1);
-
-               i_datcollate = PQfnumber(res, "datcollate");
-               i_datctype = PQfnumber(res, "datctype");
-
-               if (GET_MAJOR_VERSION(cluster->major_version) < 902)
-               {
-                       /*
-                        * Pre-9.2 did not canonicalize the supplied locale names to match
-                        * what the system returns, while 9.2+ does, so convert pre-9.2 to
-                        * match.
-                        */
-                       ctrl->lc_collate = get_canonical_locale_name(LC_COLLATE,
-                                                               pg_strdup(PQgetvalue(res, 0, i_datcollate)));
-                       ctrl->lc_ctype = get_canonical_locale_name(LC_CTYPE,
-                                                                 pg_strdup(PQgetvalue(res, 0, i_datctype)));
-               }
-               else
-               {
-                       ctrl->lc_collate = pg_strdup(PQgetvalue(res, 0, i_datcollate));
-                       ctrl->lc_ctype = pg_strdup(PQgetvalue(res, 0, i_datctype));
-               }
-
-               PQclear(res);
-       }
-
-       res = executeQueryOrDie(conn,
-                                                       "SELECT pg_catalog.pg_encoding_to_char(encoding) "
-                                                       "FROM   pg_catalog.pg_database "
-                                                       "WHERE  datname = 'template0' ");
-       assert(PQntuples(res) == 1);
-
-       i_encoding = PQfnumber(res, "pg_encoding_to_char");
-       ctrl->encoding = pg_strdup(PQgetvalue(res, 0, i_encoding));
-
-       PQclear(res);
-
-       PQfinish(conn);
-}
-
-
  /*
   * check_locale_and_encoding()
   *
- * Check that old and new locale and encoding match.  Even though the backend
- * tries to canonicalize stored locale names, the platform often doesn't
- * cooperate, so it's entirely possible that one DB thinks its locale is
- * "en_US.UTF-8" while the other says "en_US.utf8".  Try to be forgiving.
+ * Check that locale and encoding of a database in the old and new clusters
+ * are compatible.
   */
  static void
-check_locale_and_encoding(ControlData *oldctrl,
-                                                 ControlData *newctrl)
+check_locale_and_encoding(DbInfo *olddb, DbInfo *newdb)
  {
-       if (!equivalent_locale(oldctrl->lc_collate, newctrl->lc_collate))
-               pg_fatal("lc_collate cluster values do not match:  old \"%s\", new \"%s\"\n",
-                                oldctrl->lc_collate, newctrl->lc_collate);
-       if (!equivalent_locale(oldctrl->lc_ctype, newctrl->lc_ctype))
-               pg_fatal("lc_ctype cluster values do not match:  old \"%s\", new \"%s\"\n",
-                                oldctrl->lc_ctype, newctrl->lc_ctype);
-       if (!equivalent_encoding(oldctrl->encoding, newctrl->encoding))
-               pg_fatal("encoding cluster values do not match:  old \"%s\", new \"%s\"\n",
-                                oldctrl->encoding, newctrl->encoding);
+       if (olddb->db_encoding != newdb->db_encoding)
+               pg_fatal("encodings for database \"%s\" do not match:  old \"%s\", new \"%s\"\n",
+                                olddb->db_name,
+                                pg_encoding_to_char(olddb->db_encoding),
+                                pg_encoding_to_char(newdb->db_encoding));
+       if (!equivalent_locale(LC_COLLATE, olddb->db_collate, newdb->db_collate))
+               pg_fatal("lc_collate values for database \"%s\" do not match:  old \"%s\", new \"%s\"\n",
+                                olddb->db_name, olddb->db_collate, newdb->db_collate);
+       if (!equivalent_locale(LC_CTYPE, olddb->db_ctype, newdb->db_ctype))
+               pg_fatal("lc_ctype values for database \"%s\" do not match:  old \"%s\", new \"%s\"\n",
+                                olddb->db_name, olddb->db_ctype, newdb->db_ctype);
  }
  
  /*
@@ -373,61 +298,46 @@ check_locale_and_encoding(ControlData *oldctrl,
   *
   * Best effort locale-name comparison.  Return false if we are not 100% sure
   * the locales are equivalent.
+ *
+ * Note: The encoding parts of the names are ignored. This function is
+ * currently used to compare locale names stored in pg_database, and
+ * pg_database contains a separate encoding field. That's compared directly
+ * in check_locale_and_encoding().
   */
  static bool
-equivalent_locale(const char *loca, const char *locb)
+equivalent_locale(int category, const char *loca, const char *locb)
  {
         const char *chara = strrchr(loca, '.');
         const char *charb = strrchr(locb, '.');
-       int                     lencmp;
-
-       /* If they don't both contain an encoding part, just do strcasecmp(). */
-       if (!chara || !charb)
-               return (pg_strcasecmp(loca, locb) == 0);
+       char       *canona;
+       char       *canonb;
+       int                     lena;
+       int                     lenb;
  
         /*
-        * Compare the encoding parts.  Windows tends to use code page numbers for
-        * the encoding part, which equivalent_encoding() won't like, so accept if
-        * the strings are case-insensitive equal; otherwise use
-        * equivalent_encoding() to compare.
+        * If the names are equal, the locales are equivalent. Checking this
+        * first avoids calling setlocale() in the common case that the names
+        * are equal. That's a good thing, if setlocale() is buggy, for example.
          */
-       if (pg_strcasecmp(chara + 1, charb + 1) != 0 &&
-               !equivalent_encoding(chara + 1, charb + 1))
-               return false;
+       if (pg_strcasecmp(loca, locb) == 0)
+               return true;
  
         /*
-        * OK, compare the locale identifiers (e.g. en_US part of en_US.utf8).
-        *
-        * It's tempting to ignore non-alphanumeric chars here, but for now it's
-        * not clear that that's necessary; just do case-insensitive comparison.
+        * Not identical. Canonicalize both names, remove the encoding parts,
+        * and try again.
          */
-       lencmp = chara - loca;
-       if (lencmp != charb - locb)
-               return false;
+       canona = get_canonical_locale_name(category, loca);
+       chara = strrchr(canona, '.');
+       lena = chara ? (chara - canona) : strlen(canona);
  
-       return (pg_strncasecmp(loca, locb, lencmp) == 0);
-}
+       canonb = get_canonical_locale_name(category, locb);
+       charb = strrchr(canonb, '.');
+       lenb = charb ? (charb - canonb) : strlen(canonb);
  
-/*
- * equivalent_encoding()
- *
- * Best effort encoding-name comparison.  Return true only if the encodings
- * are valid server-side encodings and known equivalent.
- *
- * Because the lookup in pg_valid_server_encoding() does case folding and
- * ignores non-alphanumeric characters, this will recognize many popular
- * variant spellings as equivalent, eg "utf8" and "UTF-8" will match.
- */
-static bool
-equivalent_encoding(const char *chara, const char *charb)
-{
-       int                     enca = pg_valid_server_encoding(chara);
-       int                     encb = pg_valid_server_encoding(charb);
+       if (lena == lenb && pg_strncasecmp(canona, canonb, lena) == 0)
+               return true;
  
-       if (enca < 0 || encb < 0)
-               return false;
-
-       return (enca == encb);
+       return false;
  }
  
  
@@ -450,7 +360,35 @@ check_new_cluster_is_empty(void)
                                                  new_cluster.dbarr.dbs[dbnum].db_name);
                 }
         }
+}
+
+/*
+ * Check that every database that already exists in the new cluster is
+ * compatible with the corresponding database in the old one.
+ */
+static void
+check_databases_are_compatible(void)
+{
+       int                     newdbnum;
+       int                     olddbnum;
+       DbInfo     *newdbinfo;
+       DbInfo     *olddbinfo;
  
+       for (newdbnum = 0; newdbnum < new_cluster.dbarr.ndbs; newdbnum++)
+       {
+               newdbinfo = &new_cluster.dbarr.dbs[newdbnum];
+
+               /* Find the corresponding database in the old cluster */
+               for (olddbnum = 0; olddbnum < old_cluster.dbarr.ndbs; olddbnum++)
+               {
+                       olddbinfo = &old_cluster.dbarr.dbs[olddbnum];
+                       if (strcmp(newdbinfo->db_name, olddbinfo->db_name) == 0)
+                       {
+                               check_locale_and_encoding(olddbinfo, newdbinfo);
+                               break;
+                       }
+               }
+       }
  }
  
  
diff --git a/contrib/pg_upgrade/controldata.c b/contrib/pg_upgrade/controldata.c

index 8379ebd71be153b0af1216979390257bb6c13ce4..4e9d5948fae010909716ef904a99b035ca2b8d0e 100644 (file)
--- a/contrib/pg_upgrade/controldata.c
+++ b/contrib/pg_upgrade/controldata.c
@@ -122,10 +122,6 @@ get_control_data(ClusterInfo *cluster, bool live_check)
                 pg_fatal("Could not get control data using %s: %s\n",
                                  cmd, getErrorText(errno));
  
-       /* Only pre-8.4 has these so if they are not set below we will check later */
-       cluster->controldata.lc_collate = NULL;
-       cluster->controldata.lc_ctype = NULL;
-
         /* Only in <= 9.2 */
         if (GET_MAJOR_VERSION(cluster->major_version) <= 902)
         {
@@ -404,36 +400,6 @@ get_control_data(ClusterInfo *cluster, bool live_check)
                         cluster->controldata.data_checksum_version = str2uint(p);
                         got_data_checksum_version = true;
                 }
-               /* In pre-8.4 only */
-               else if ((p = strstr(bufin, "LC_COLLATE:")) != NULL)
-               {
-                       p = strchr(p, ':');
-
-                       if (p == NULL || strlen(p) <= 1)
-                               pg_fatal("%d: controldata retrieval problem\n", __LINE__);
-
-                       p++;                            /* remove ':' char */
-                       /* skip leading spaces and remove trailing newline */
-                       p += strspn(p, " ");
-                       if (strlen(p) > 0 && *(p + strlen(p) - 1) == '\n')
-                               *(p + strlen(p) - 1) = '\0';
-                       cluster->controldata.lc_collate = pg_strdup(p);
-               }
-               /* In pre-8.4 only */
-               else if ((p = strstr(bufin, "LC_CTYPE:")) != NULL)
-               {
-                       p = strchr(p, ':');
-
-                       if (p == NULL || strlen(p) <= 1)
-                               pg_fatal("%d: controldata retrieval problem\n", __LINE__);
-
-                       p++;                            /* remove ':' char */
-                       /* skip leading spaces and remove trailing newline */
-                       p += strspn(p, " ");
-                       if (strlen(p) > 0 && *(p + strlen(p) - 1) == '\n')
-                               *(p + strlen(p) - 1) = '\0';
-                       cluster->controldata.lc_ctype = pg_strdup(p);
-               }
         }
  
         if (output)
diff --git a/contrib/pg_upgrade/info.c b/contrib/pg_upgrade/info.c

index a1773aa8e53bdcfacb71a23a3b3017f9e49331f9..c347dfc49326aee3687cc5be7b3c47534d4d2ea7 100644 (file)
--- a/contrib/pg_upgrade/info.c
+++ b/contrib/pg_upgrade/info.c
@@ -239,11 +239,15 @@ get_db_infos(ClusterInfo *cluster)
         DbInfo     *dbinfos;
         int                     i_datname,
                                 i_oid,
+                               i_encoding,
+                               i_datcollate,
+                               i_datctype,
                                 i_spclocation;
         char            query[QUERY_ALLOC];
  
         snprintf(query, sizeof(query),
-                        "SELECT d.oid, d.datname, %s "
+                        "SELECT d.oid, d.datname, d.encoding, d.datcollate, d.datctype, "
+                        "%s AS spclocation "
                          "FROM pg_catalog.pg_database d "
                          " LEFT OUTER JOIN pg_catalog.pg_tablespace t "
                          " ON d.dattablespace = t.oid "
@@ -252,12 +256,15 @@ get_db_infos(ClusterInfo *cluster)
                          "ORDER BY 2",
         /* 9.2 removed the spclocation column */
                          (GET_MAJOR_VERSION(cluster->major_version) <= 901) ?
-                        "t.spclocation" : "pg_catalog.pg_tablespace_location(t.oid) AS spclocation");
+                        "t.spclocation" : "pg_catalog.pg_tablespace_location(t.oid)");
  
         res = executeQueryOrDie(conn, "%s", query);
  
         i_oid = PQfnumber(res, "oid");
         i_datname = PQfnumber(res, "datname");
+       i_encoding = PQfnumber(res, "encoding");
+       i_datcollate = PQfnumber(res, "datcollate");
+       i_datctype = PQfnumber(res, "datctype");
         i_spclocation = PQfnumber(res, "spclocation");
  
         ntups = PQntuples(res);
@@ -267,6 +274,9 @@ get_db_infos(ClusterInfo *cluster)
         {
                 dbinfos[tupnum].db_oid = atooid(PQgetvalue(res, tupnum, i_oid));
                 dbinfos[tupnum].db_name = pg_strdup(PQgetvalue(res, tupnum, i_datname));
+               dbinfos[tupnum].db_encoding = atoi(PQgetvalue(res, tupnum, i_encoding));
+               dbinfos[tupnum].db_collate = pg_strdup(PQgetvalue(res, tupnum, i_datcollate));
+               dbinfos[tupnum].db_ctype = pg_strdup(PQgetvalue(res, tupnum, i_datctype));
                 snprintf(dbinfos[tupnum].db_tablespace, sizeof(dbinfos[tupnum].db_tablespace), "%s",
                                  PQgetvalue(res, tupnum, i_spclocation));
         }
diff --git a/contrib/pg_upgrade/pg_upgrade.h b/contrib/pg_upgrade/pg_upgrade.h

index 56a7505a961b8e065083195d644a1f1e3b6504d9..65dda9780ab91f733ca44f12252166b40023860e 100644 (file)
--- a/contrib/pg_upgrade/pg_upgrade.h
+++ b/contrib/pg_upgrade/pg_upgrade.h
@@ -180,6 +180,9 @@ typedef struct
         char       *db_name;            /* database name */
         char            db_tablespace[MAXPGPATH];               /* database default tablespace
                                                                                                  * path */
+       char       *db_collate;
+       char       *db_ctype;
+       int                     db_encoding;
         RelInfoArr      rel_arr;                /* array of all user relinfos */
  } DbInfo;
  
@@ -218,9 +221,6 @@ typedef struct
         bool            date_is_int;
         bool            float8_pass_by_value;
         bool            data_checksum_version;
-       char       *lc_collate;
-       char       *lc_ctype;
-       char       *encoding;
  } ControlData;
  
  /*
author	Heikki Linnakangas <heikki.linnakangas@iki.fi>
	Fri, 10 Oct 2014 06:59:44 +0000 (09:59 +0300)
committer	Heikki Linnakangas <heikki.linnakangas@iki.fi>
	Fri, 10 Oct 2014 07:39:32 +0000 (10:39 +0300)
contrib/pg_upgrade/check.c		patch \| blob \| history
contrib/pg_upgrade/controldata.c		patch \| blob \| history
contrib/pg_upgrade/info.c		patch \| blob \| history
contrib/pg_upgrade/pg_upgrade.h		patch \| blob \| history