]> granicus.if.org Git - postgresql/commitdiff
Add function to import operating system collations
authorPeter Eisentraut <peter_e@gmx.net>
Wed, 18 Jan 2017 17:00:00 +0000 (12:00 -0500)
committerPeter Eisentraut <peter_e@gmx.net>
Wed, 18 Jan 2017 14:35:56 +0000 (09:35 -0500)
Move this logic out of initdb into a user-callable function.  This
simplifies the code and makes it possible to update the standard
collations later on if additional operating system collations appear.

Reviewed-by: Andres Freund <andres@anarazel.de>
Reviewed-by: Euler Taveira <euler@timbira.com.br>
doc/src/sgml/charset.sgml
doc/src/sgml/func.sgml
src/backend/catalog/pg_collation.c
src/backend/commands/collationcmds.c
src/bin/initdb/initdb.c
src/include/catalog/catversion.h
src/include/catalog/pg_collation_fn.h
src/include/catalog/pg_proc.h

index f8c7ac3b1694cdad624c9926a365d8d2860c3d00..2aba0fc5282fc35263d50f58601f352f819b5d05 100644 (file)
@@ -496,7 +496,7 @@ SELECT * FROM test1 ORDER BY a || b COLLATE "fr_FR";
    </para>
   </sect2>
 
-  <sect2>
+  <sect2 id="collation-managing">
    <title>Managing Collations</title>
 
    <para>
index 10e31868baf24908a3112e36502c1bb3803bdda2..eb1b6984bf05871d2438339efb271620540eb394 100644 (file)
@@ -19190,6 +19190,46 @@ postgres=# SELECT * FROM pg_xlogfile_name_offset(pg_stop_backup());
     in the database's default tablespace, the tablespace can be specified as 0.
    </para>
 
+   <para>
+    <xref linkend="functions-admin-collation"> lists functions used to manage
+    collations.
+   </para>
+
+   <table id="functions-admin-collation">
+    <title>Collation Management Functions</title>
+    <tgroup cols="3">
+     <thead>
+      <row><entry>Name</entry> <entry>Return Type</entry> <entry>Description</entry></row>
+     </thead>
+
+     <tbody>
+      <row>
+       <entry>
+        <indexterm><primary>pg_import_system_collations</primary></indexterm>
+        <literal><function>pg_import_system_collations(<parameter>if_not_exists</> <type>boolean</>, <parameter>schema</> <type>regnamespace</>)</function></literal>
+       </entry>
+       <entry><type>void</type></entry>
+       <entry>Import operating system collations</entry>
+      </row>
+     </tbody>
+    </tgroup>
+   </table>
+
+   <para>
+    <function>pg_import_system_collations</> populates the system
+    catalog <literal>pg_collation</literal> with collations based on all the
+    locales it finds on the operating system.  This is
+    what <command>initdb</command> uses;
+    see <xref linkend="collation-managing"> for more details.  If additional
+    locales are installed into the operating system later on, this function
+    can be run again to add collations for the new locales.  In that case, the
+    parameter <parameter>if_not_exists</parameter> should be set to true to
+    skip over existing collations.  The <parameter>schema</parameter>
+    parameter would typically be <literal>pg_catalog</literal>, but that is
+    not a requirement.  (Collation objects based on locales that are no longer
+    present on the operating system are never removed by this function.)
+   </para>
+
   </sect2>
 
   <sect2 id="functions-admin-index">
index fa42ad5ec1b50b3d26e84ec1eb0f1d44e4e147f8..694c0f67f55432f18d100b05d8fc81907871412f 100644 (file)
@@ -41,7 +41,8 @@ Oid
 CollationCreate(const char *collname, Oid collnamespace,
                                Oid collowner,
                                int32 collencoding,
-                               const char *collcollate, const char *collctype)
+                               const char *collcollate, const char *collctype,
+                               bool if_not_exists)
 {
        Relation        rel;
        TupleDesc       tupDesc;
@@ -72,10 +73,21 @@ CollationCreate(const char *collname, Oid collnamespace,
                                                          PointerGetDatum(collname),
                                                          Int32GetDatum(collencoding),
                                                          ObjectIdGetDatum(collnamespace)))
-               ereport(ERROR,
+       {
+               if (if_not_exists)
+               {
+                       ereport(NOTICE,
                                (errcode(ERRCODE_DUPLICATE_OBJECT),
-                                errmsg("collation \"%s\" for encoding \"%s\" already exists",
+                                errmsg("collation \"%s\" for encoding \"%s\" already exists, skipping",
                                                collname, pg_encoding_to_char(collencoding))));
+                       return InvalidOid;
+               }
+               else
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_DUPLICATE_OBJECT),
+                                        errmsg("collation \"%s\" for encoding \"%s\" already exists",
+                                                       collname, pg_encoding_to_char(collencoding))));
+       }
 
        /*
         * Also forbid matching an any-encoding entry.  This test of course is not
@@ -86,10 +98,21 @@ CollationCreate(const char *collname, Oid collnamespace,
                                                          PointerGetDatum(collname),
                                                          Int32GetDatum(-1),
                                                          ObjectIdGetDatum(collnamespace)))
-               ereport(ERROR,
+       {
+               if (if_not_exists)
+               {
+                       ereport(NOTICE,
+                               (errcode(ERRCODE_DUPLICATE_OBJECT),
+                                errmsg("collation \"%s\" already exists, skipping",
+                                               collname)));
+                       return InvalidOid;
+               }
+               else
+                       ereport(ERROR,
                                (errcode(ERRCODE_DUPLICATE_OBJECT),
                                 errmsg("collation \"%s\" already exists",
                                                collname)));
+       }
 
        /* open pg_collation */
        rel = heap_open(CollationRelationId, RowExclusiveLock);
index ccadfc2e47476658c0b75cb53fa0af106e89834f..5cb3e2bb282e39bd453ad0dbff15f0cd2e3d5d72 100644 (file)
@@ -136,7 +136,11 @@ DefineCollation(ParseState *pstate, List *names, List *parameters)
                                                         GetUserId(),
                                                         GetDatabaseEncoding(),
                                                         collcollate,
-                                                        collctype);
+                                                        collctype,
+                                                        false);
+
+       if (!OidIsValid(newoid))
+               return InvalidObjectAddress;
 
        ObjectAddressSet(address, CollationRelationId, newoid);
 
@@ -177,3 +181,151 @@ IsThereCollationInNamespace(const char *collname, Oid nspOid)
                                 errmsg("collation \"%s\" already exists in schema \"%s\"",
                                                collname, get_namespace_name(nspOid))));
 }
+
+
+/*
+ * "Normalize" a locale name, stripping off encoding tags such as
+ * ".utf8" (e.g., "en_US.utf8" -> "en_US", but "br_FR.iso885915@euro"
+ * -> "br_FR@euro").  Return true if a new, different name was
+ * generated.
+ */
+pg_attribute_unused()
+static bool
+normalize_locale_name(char *new, const char *old)
+{
+       char       *n = new;
+       const char *o = old;
+       bool            changed = false;
+
+       while (*o)
+       {
+               if (*o == '.')
+               {
+                       /* skip over encoding tag such as ".utf8" or ".UTF-8" */
+                       o++;
+                       while ((*o >= 'A' && *o <= 'Z')
+                                  || (*o >= 'a' && *o <= 'z')
+                                  || (*o >= '0' && *o <= '9')
+                                  || (*o == '-'))
+                               o++;
+                       changed = true;
+               }
+               else
+                       *n++ = *o++;
+       }
+       *n = '\0';
+
+       return changed;
+}
+
+
+Datum
+pg_import_system_collations(PG_FUNCTION_ARGS)
+{
+#if defined(HAVE_LOCALE_T) && !defined(WIN32)
+       bool            if_not_exists = PG_GETARG_BOOL(0);
+       Oid         nspid = PG_GETARG_OID(1);
+
+       FILE       *locale_a_handle;
+       char            localebuf[NAMEDATALEN]; /* we assume ASCII so this is fine */
+       int                     count = 0;
+#endif
+
+       if (!superuser())
+               ereport(ERROR,
+                               (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+                                (errmsg("must be superuser to import system collations"))));
+
+#if defined(HAVE_LOCALE_T) && !defined(WIN32)
+       locale_a_handle = OpenPipeStream("locale -a", "r");
+       if (locale_a_handle == NULL)
+               ereport(ERROR,
+                               (errcode_for_file_access(),
+                                errmsg("could not execute command \"%s\": %m",
+                                               "locale -a")));
+
+       while (fgets(localebuf, sizeof(localebuf), locale_a_handle))
+       {
+               int                     i;
+               size_t          len;
+               int                     enc;
+               bool            skip;
+               char            alias[NAMEDATALEN];
+
+               len = strlen(localebuf);
+
+               if (len == 0 || localebuf[len - 1] != '\n')
+               {
+                       elog(DEBUG1, "locale name too long, skipped: \"%s\"", localebuf);
+                       continue;
+               }
+               localebuf[len - 1] = '\0';
+
+               /*
+                * Some systems have locale names that don't consist entirely of ASCII
+                * letters (such as "bokm&aring;l" or "fran&ccedil;ais").  This is
+                * pretty silly, since we need the locale itself to interpret the
+                * non-ASCII characters. We can't do much with those, so we filter
+                * them out.
+                */
+               skip = false;
+               for (i = 0; i < len; i++)
+               {
+                       if (IS_HIGHBIT_SET(localebuf[i]))
+                       {
+                               skip = true;
+                               break;
+                       }
+               }
+               if (skip)
+               {
+                       elog(DEBUG1, "locale name has non-ASCII characters, skipped: \"%s\"", localebuf);
+                       continue;
+               }
+
+               enc = pg_get_encoding_from_locale(localebuf, false);
+               if (enc < 0)
+               {
+                       /* error message printed by pg_get_encoding_from_locale() */
+                       continue;
+               }
+               if (!PG_VALID_BE_ENCODING(enc))
+                       continue;                       /* ignore locales for client-only encodings */
+               if (enc == PG_SQL_ASCII)
+                       continue;                       /* C/POSIX are already in the catalog */
+
+               count++;
+
+               CollationCreate(localebuf, nspid, GetUserId(), enc,
+                                               localebuf, localebuf, if_not_exists);
+
+               CommandCounterIncrement();
+
+               /*
+                * Generate aliases such as "en_US" in addition to "en_US.utf8" for
+                * ease of use.  Note that collation names are unique per encoding
+                * only, so this doesn't clash with "en_US" for LATIN1, say.
+                *
+                * This always runs in "if not exists" mode, to skip aliases that
+                * conflict with an existing locale name for the same encoding.  For
+                * example, "br_FR.iso88591" is normalized to "br_FR", both for
+                * encoding LATIN1.  But the unnormalized locale "br_FR" already
+                * exists for LATIN1.
+                */
+               if (normalize_locale_name(alias, localebuf))
+               {
+                       CollationCreate(alias, nspid, GetUserId(), enc,
+                                                       localebuf, localebuf, true);
+                       CommandCounterIncrement();
+               }
+       }
+
+       ClosePipeStream(locale_a_handle);
+
+       if (count == 0)
+               ereport(ERROR,
+                               (errmsg("no usable system locales were found")));
+#endif /* not HAVE_LOCALE_T && not WIN32 */
+
+       PG_RETURN_VOID();
+}
index 1e7d67724478205e9a483b89b1b187a7ddff6510..eb1be100c85326d4c4002a7e89d9d7eb5157f4d2 100644 (file)
@@ -1608,178 +1608,16 @@ setup_description(FILE *cmdfd)
        PG_CMD_PUTS("DROP TABLE tmp_pg_shdescription;\n\n");
 }
 
-#ifdef HAVE_LOCALE_T
-/*
- * "Normalize" a locale name, stripping off encoding tags such as
- * ".utf8" (e.g., "en_US.utf8" -> "en_US", but "br_FR.iso885915@euro"
- * -> "br_FR@euro").  Return true if a new, different name was
- * generated.
- */
-static bool
-normalize_locale_name(char *new, const char *old)
-{
-       char       *n = new;
-       const char *o = old;
-       bool            changed = false;
-
-       while (*o)
-       {
-               if (*o == '.')
-               {
-                       /* skip over encoding tag such as ".utf8" or ".UTF-8" */
-                       o++;
-                       while ((*o >= 'A' && *o <= 'Z')
-                                  || (*o >= 'a' && *o <= 'z')
-                                  || (*o >= '0' && *o <= '9')
-                                  || (*o == '-'))
-                               o++;
-                       changed = true;
-               }
-               else
-                       *n++ = *o++;
-       }
-       *n = '\0';
-
-       return changed;
-}
-#endif   /* HAVE_LOCALE_T */
-
 /*
  * populate pg_collation
  */
 static void
 setup_collation(FILE *cmdfd)
 {
-#if defined(HAVE_LOCALE_T) && !defined(WIN32)
-       int                     i;
-       FILE       *locale_a_handle;
-       char            localebuf[NAMEDATALEN]; /* we assume ASCII so this is fine */
-       int                     count = 0;
-
-       locale_a_handle = popen_check("locale -a", "r");
-       if (!locale_a_handle)
-               return;                                 /* complaint already printed */
-
-       PG_CMD_PUTS("CREATE TEMP TABLE tmp_pg_collation ( "
-                               "       collname name, "
-                               "       locale name, "
-                               "       encoding int) WITHOUT OIDS;\n\n");
-
-       while (fgets(localebuf, sizeof(localebuf), locale_a_handle))
-       {
-               size_t          len;
-               int                     enc;
-               bool            skip;
-               char       *quoted_locale;
-               char            alias[NAMEDATALEN];
-
-               len = strlen(localebuf);
-
-               if (len == 0 || localebuf[len - 1] != '\n')
-               {
-                       if (debug)
-                               fprintf(stderr, _("%s: locale name too long, skipped: \"%s\"\n"),
-                                               progname, localebuf);
-                       continue;
-               }
-               localebuf[len - 1] = '\0';
-
-               /*
-                * Some systems have locale names that don't consist entirely of ASCII
-                * letters (such as "bokm&aring;l" or "fran&ccedil;ais").  This is
-                * pretty silly, since we need the locale itself to interpret the
-                * non-ASCII characters. We can't do much with those, so we filter
-                * them out.
-                */
-               skip = false;
-               for (i = 0; i < len; i++)
-               {
-                       if (IS_HIGHBIT_SET(localebuf[i]))
-                       {
-                               skip = true;
-                               break;
-                       }
-               }
-               if (skip)
-               {
-                       if (debug)
-                               fprintf(stderr, _("%s: locale name has non-ASCII characters, skipped: \"%s\"\n"),
-                                               progname, localebuf);
-                       continue;
-               }
-
-               enc = pg_get_encoding_from_locale(localebuf, debug);
-               if (enc < 0)
-               {
-                       /* error message printed by pg_get_encoding_from_locale() */
-                       continue;
-               }
-               if (!PG_VALID_BE_ENCODING(enc))
-                       continue;                       /* ignore locales for client-only encodings */
-               if (enc == PG_SQL_ASCII)
-                       continue;                       /* C/POSIX are already in the catalog */
-
-               count++;
-
-               quoted_locale = escape_quotes(localebuf);
-
-               PG_CMD_PRINTF3("INSERT INTO tmp_pg_collation VALUES (E'%s', E'%s', %d);\n\n",
-                                          quoted_locale, quoted_locale, enc);
-
-               /*
-                * Generate aliases such as "en_US" in addition to "en_US.utf8" for
-                * ease of use.  Note that collation names are unique per encoding
-                * only, so this doesn't clash with "en_US" for LATIN1, say.
-                */
-               if (normalize_locale_name(alias, localebuf))
-               {
-                       char       *quoted_alias = escape_quotes(alias);
-
-                       PG_CMD_PRINTF3("INSERT INTO tmp_pg_collation VALUES (E'%s', E'%s', %d);\n\n",
-                                                  quoted_alias, quoted_locale, enc);
-                       free(quoted_alias);
-               }
-               free(quoted_locale);
-       }
+       PG_CMD_PUTS("SELECT pg_import_system_collations(if_not_exists => false, schema => 'pg_catalog');\n\n");
 
        /* Add an SQL-standard name */
-       PG_CMD_PRINTF1("INSERT INTO tmp_pg_collation VALUES ('ucs_basic', 'C', %d);\n\n", PG_UTF8);
-
-       /*
-        * When copying collations to the final location, eliminate aliases that
-        * conflict with an existing locale name for the same encoding.  For
-        * example, "br_FR.iso88591" is normalized to "br_FR", both for encoding
-        * LATIN1.  But the unnormalized locale "br_FR" already exists for LATIN1.
-        * Prefer the alias that matches the OS locale name, else the first locale
-        * name by sort order (arbitrary choice to be deterministic).
-        *
-        * Also, eliminate any aliases that conflict with pg_collation's
-        * hard-wired entries for "C" etc.
-        */
-       PG_CMD_PUTS("INSERT INTO pg_collation (collname, collnamespace, collowner, collencoding, collcollate, collctype) "
-                               " SELECT DISTINCT ON (collname, encoding)"
-                               "   collname, "
-                               "   (SELECT oid FROM pg_namespace WHERE nspname = 'pg_catalog') AS collnamespace, "
-                               "   (SELECT relowner FROM pg_class WHERE relname = 'pg_collation') AS collowner, "
-                               "   encoding, locale, locale "
-                               "  FROM tmp_pg_collation"
-                               "  WHERE NOT EXISTS (SELECT 1 FROM pg_collation WHERE collname = tmp_pg_collation.collname)"
-        "  ORDER BY collname, encoding, (collname = locale) DESC, locale;\n\n");
-
-       /*
-        * Even though the table is temp, drop it explicitly so it doesn't get
-        * copied into template0/postgres databases.
-        */
-       PG_CMD_PUTS("DROP TABLE tmp_pg_collation;\n\n");
-
-       pclose(locale_a_handle);
-
-       if (count == 0 && !debug)
-       {
-               printf(_("No usable system locales were found.\n"));
-               printf(_("Use the option \"--debug\" to see details.\n"));
-       }
-#endif   /* not HAVE_LOCALE_T  && not WIN32 */
+       PG_CMD_PRINTF2("INSERT INTO pg_collation (collname, collnamespace, collowner, collencoding, collcollate, collctype) VALUES ('ucs_basic', 'pg_catalog'::regnamespace, '%s'::regrole, %d, 'C', 'C');\n\n", escape_quotes(username), PG_UTF8);
 }
 
 /*
index 54211f5618d2dacaf51feaa05f26cb481e953b5f..7d33f39bea599c8ca189feaa2a918b966bf5fede 100644 (file)
@@ -53,6 +53,6 @@
  */
 
 /*                                                     yyyymmddN */
-#define CATALOG_VERSION_NO     201701172
+#define CATALOG_VERSION_NO     201701181
 
 #endif
index 1ea757f1505558668559e4a9c98ce50a09a01b92..482ba7920e5e3c8f8078802491e598df09a1539e 100644 (file)
@@ -17,7 +17,8 @@
 extern Oid CollationCreate(const char *collname, Oid collnamespace,
                                Oid collowner,
                                int32 collencoding,
-                               const char *collcollate, const char *collctype);
+                               const char *collcollate, const char *collctype,
+                               bool if_not_exists);
 extern void RemoveCollationById(Oid collationOid);
 
 #endif   /* PG_COLLATION_FN_H */
index 42f36891af582507d18767ca832108d5975610da..1a0eba3ca1b807d5e5c5b216dbb2099817bcf443 100644 (file)
@@ -5349,6 +5349,9 @@ DESCR("pg_controldata recovery state information as a function");
 DATA(insert OID = 3444 ( pg_control_init PGNSP PGUID 12 1 0 0 0 f f f f t f v s 0 0 2249 "" "{23,23,23,23,23,23,23,23,23,16,16,16,23}" "{o,o,o,o,o,o,o,o,o,o,o,o,o}" "{max_data_alignment,database_block_size,blocks_per_segment,wal_block_size,bytes_per_wal_segment,max_identifier_length,max_index_columns,max_toast_chunk_size,large_object_chunk_size,bigint_timestamps,float4_pass_by_value,float8_pass_by_value,data_page_checksum_version}" _null_ _null_ pg_control_init _null_ _null_ _null_ ));
 DESCR("pg_controldata init state information as a function");
 
+DATA(insert OID = 3445 ( pg_import_system_collations PGNSP PGUID 12 100 0 0 0 f f f f t f v r 2 0 2278 "16 4089" _null_ _null_ "{if_not_exists,schema}" _null_ _null_ pg_import_system_collations _null_ _null_ _null_ ));
+DESCR("import collations from operating system");
+
 /*
  * Symbolic values for provolatile column: these indicate whether the result
  * of a function is dependent *only* on the values of its explicit arguments,