Be forgiving of variant spellings of locale names in pg_upgrade.

author Tom Lane <tgl@sss.pgh.pa.us>

Fri, 31 Jan 2014 00:07:06 +0000 (19:07 -0500)

committer Tom Lane <tgl@sss.pgh.pa.us>

Fri, 31 Jan 2014 00:07:06 +0000 (19:07 -0500)
author Tom Lane <tgl@sss.pgh.pa.us>
Fri, 31 Jan 2014 00:07:06 +0000 (19:07 -0500)
committer Tom Lane <tgl@sss.pgh.pa.us>
Fri, 31 Jan 2014 00:07:06 +0000 (19:07 -0500)
diff --git a/contrib/pg_upgrade/check.c b/contrib/pg_upgrade/check.c

index e395c7c7f629694328d88912d51b473c07df1721..d52051a6e9981ece81ec66655182c2bb800d0bde 100644 (file)
--- a/contrib/pg_upgrade/check.c
+++ b/contrib/pg_upgrade/check.c
@@ -9,6 +9,7 @@
  
  #include "postgres_fe.h"
  
+#include "mb/pg_wchar.h"
  #include "pg_upgrade.h"
  
  
@@ -16,6 +17,8 @@ static void set_locale_and_encoding(ClusterInfo *cluster);
  static void check_new_cluster_is_empty(void);
  static void check_locale_and_encoding(ControlData *oldctrl,
                                                   ControlData *newctrl);
+static bool equivalent_locale(const char *loca, const char *locb);
+static bool equivalent_encoding(const char *chara, const char *charb);
  static void check_is_super_user(ClusterInfo *cluster);
  static void check_for_prepared_transactions(ClusterInfo *cluster);
  static void check_for_isn_and_int8_passing_mismatch(ClusterInfo *cluster);
@@ -397,27 +400,80 @@ set_locale_and_encoding(ClusterInfo *cluster)
  /*
   * check_locale_and_encoding()
   *
- *     locale is not in pg_controldata in 8.4 and later so
- *     we probably had to get via a database query.
+ * Check that old and new locale and encoding match.  Even though the backend
+ * tries to canonicalize stored locale names, the platform often doesn't
+ * cooperate, so it's entirely possible that one DB thinks its locale is
+ * "en_US.UTF-8" while the other says "en_US.utf8".  Try to be forgiving.
   */
  static void
  check_locale_and_encoding(ControlData *oldctrl,
                                                   ControlData *newctrl)
  {
-       /*
-        * These are often defined with inconsistent case, so use pg_strcasecmp().
-        * They also often use inconsistent hyphenation, which we cannot fix, e.g.
-        * UTF-8 vs. UTF8, so at least we display the mismatching values.
-        */
-       if (pg_strcasecmp(oldctrl->lc_collate, newctrl->lc_collate) != 0)
+       if (!equivalent_locale(oldctrl->lc_collate, newctrl->lc_collate))
                 pg_fatal("lc_collate cluster values do not match:  old \"%s\", new \"%s\"\n",
-                          oldctrl->lc_collate, newctrl->lc_collate);
-       if (pg_strcasecmp(oldctrl->lc_ctype, newctrl->lc_ctype) != 0)
+                                oldctrl->lc_collate, newctrl->lc_collate);
+       if (!equivalent_locale(oldctrl->lc_ctype, newctrl->lc_ctype))
                 pg_fatal("lc_ctype cluster values do not match:  old \"%s\", new \"%s\"\n",
-                          oldctrl->lc_ctype, newctrl->lc_ctype);
-       if (pg_strcasecmp(oldctrl->encoding, newctrl->encoding) != 0)
+                                oldctrl->lc_ctype, newctrl->lc_ctype);
+       if (!equivalent_encoding(oldctrl->encoding, newctrl->encoding))
                 pg_fatal("encoding cluster values do not match:  old \"%s\", new \"%s\"\n",
-                          oldctrl->encoding, newctrl->encoding);
+                                oldctrl->encoding, newctrl->encoding);
+}
+
+/*
+ * equivalent_locale()
+ *
+ * Best effort locale-name comparison.  Return false if we are not 100% sure
+ * the locales are equivalent.
+ */
+static bool
+equivalent_locale(const char *loca, const char *locb)
+{
+       const char *chara = strrchr(loca, '.');
+       const char *charb = strrchr(locb, '.');
+       int                     lencmp;
+
+       /* If they don't both contain an encoding part, just do strcasecmp(). */
+       if (!chara || !charb)
+               return (pg_strcasecmp(loca, locb) == 0);
+
+       /* Compare the encoding parts. */
+       if (!equivalent_encoding(chara + 1, charb + 1))
+               return false;
+
+       /*
+        * OK, compare the locale identifiers (e.g. en_US part of en_US.utf8).
+        *
+        * It's tempting to ignore non-alphanumeric chars here, but for now it's
+        * not clear that that's necessary; just do case-insensitive comparison.
+        */
+       lencmp = chara - loca;
+       if (lencmp != charb - locb)
+               return false;
+
+       return (pg_strncasecmp(loca, locb, lencmp) == 0);
+}
+
+/*
+ * equivalent_encoding()
+ *
+ * Best effort encoding-name comparison.  Return true only if the encodings
+ * are valid server-side encodings and known equivalent.
+ *
+ * Because the lookup in pg_valid_server_encoding() does case folding and
+ * ignores non-alphanumeric characters, this will recognize many popular
+ * variant spellings as equivalent, eg "utf8" and "UTF-8" will match.
+ */
+static bool
+equivalent_encoding(const char *chara, const char *charb)
+{
+       int                     enca = pg_valid_server_encoding(chara);
+       int                     encb = pg_valid_server_encoding(charb);
+
+       if (enca < 0 || encb < 0)
+               return false;
+
+       return (enca == encb);
  }
author	Tom Lane <tgl@sss.pgh.pa.us>
	Fri, 31 Jan 2014 00:07:06 +0000 (19:07 -0500)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Fri, 31 Jan 2014 00:07:06 +0000 (19:07 -0500)