From aa17c06fb58533d09c79c68a4d34a6f56687ee38 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Wed, 18 Jan 2017 12:00:00 -0500 Subject: [PATCH] Add function to import operating system collations Move this logic out of initdb into a user-callable function. This simplifies the code and makes it possible to update the standard collations later on if additional operating system collations appear. Reviewed-by: Andres Freund Reviewed-by: Euler Taveira --- doc/src/sgml/charset.sgml | 2 +- doc/src/sgml/func.sgml | 40 +++++++ src/backend/catalog/pg_collation.c | 31 ++++- src/backend/commands/collationcmds.c | 154 +++++++++++++++++++++++- src/bin/initdb/initdb.c | 166 +------------------------- src/include/catalog/catversion.h | 2 +- src/include/catalog/pg_collation_fn.h | 3 +- src/include/catalog/pg_proc.h | 3 + 8 files changed, 229 insertions(+), 172 deletions(-) diff --git a/doc/src/sgml/charset.sgml b/doc/src/sgml/charset.sgml index f8c7ac3b16..2aba0fc528 100644 --- a/doc/src/sgml/charset.sgml +++ b/doc/src/sgml/charset.sgml @@ -496,7 +496,7 @@ SELECT * FROM test1 ORDER BY a || b COLLATE "fr_FR"; - + Managing Collations diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 10e31868ba..eb1b6984bf 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -19190,6 +19190,46 @@ postgres=# SELECT * FROM pg_xlogfile_name_offset(pg_stop_backup()); in the database's default tablespace, the tablespace can be specified as 0. + + lists functions used to manage + collations. + + + + Collation Management Functions + + + Name Return Type Description + + + + + + pg_import_system_collations + pg_import_system_collations(if_not_exists boolean, schema regnamespace) + + void + Import operating system collations + + + +
+ + + pg_import_system_collations populates the system + catalog pg_collation with collations based on all the + locales it finds on the operating system. This is + what initdb uses; + see for more details. If additional + locales are installed into the operating system later on, this function + can be run again to add collations for the new locales. In that case, the + parameter if_not_exists should be set to true to + skip over existing collations. The schema + parameter would typically be pg_catalog, but that is + not a requirement. (Collation objects based on locales that are no longer + present on the operating system are never removed by this function.) + +
diff --git a/src/backend/catalog/pg_collation.c b/src/backend/catalog/pg_collation.c index fa42ad5ec1..694c0f67f5 100644 --- a/src/backend/catalog/pg_collation.c +++ b/src/backend/catalog/pg_collation.c @@ -41,7 +41,8 @@ Oid CollationCreate(const char *collname, Oid collnamespace, Oid collowner, int32 collencoding, - const char *collcollate, const char *collctype) + const char *collcollate, const char *collctype, + bool if_not_exists) { Relation rel; TupleDesc tupDesc; @@ -72,10 +73,21 @@ CollationCreate(const char *collname, Oid collnamespace, PointerGetDatum(collname), Int32GetDatum(collencoding), ObjectIdGetDatum(collnamespace))) - ereport(ERROR, + { + if (if_not_exists) + { + ereport(NOTICE, (errcode(ERRCODE_DUPLICATE_OBJECT), - errmsg("collation \"%s\" for encoding \"%s\" already exists", + errmsg("collation \"%s\" for encoding \"%s\" already exists, skipping", collname, pg_encoding_to_char(collencoding)))); + return InvalidOid; + } + else + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("collation \"%s\" for encoding \"%s\" already exists", + collname, pg_encoding_to_char(collencoding)))); + } /* * Also forbid matching an any-encoding entry. This test of course is not @@ -86,10 +98,21 @@ CollationCreate(const char *collname, Oid collnamespace, PointerGetDatum(collname), Int32GetDatum(-1), ObjectIdGetDatum(collnamespace))) - ereport(ERROR, + { + if (if_not_exists) + { + ereport(NOTICE, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("collation \"%s\" already exists, skipping", + collname))); + return InvalidOid; + } + else + ereport(ERROR, (errcode(ERRCODE_DUPLICATE_OBJECT), errmsg("collation \"%s\" already exists", collname))); + } /* open pg_collation */ rel = heap_open(CollationRelationId, RowExclusiveLock); diff --git a/src/backend/commands/collationcmds.c b/src/backend/commands/collationcmds.c index ccadfc2e47..5cb3e2bb28 100644 --- a/src/backend/commands/collationcmds.c +++ b/src/backend/commands/collationcmds.c @@ -136,7 +136,11 @@ DefineCollation(ParseState *pstate, List *names, List *parameters) GetUserId(), GetDatabaseEncoding(), collcollate, - collctype); + collctype, + false); + + if (!OidIsValid(newoid)) + return InvalidObjectAddress; ObjectAddressSet(address, CollationRelationId, newoid); @@ -177,3 +181,151 @@ IsThereCollationInNamespace(const char *collname, Oid nspOid) errmsg("collation \"%s\" already exists in schema \"%s\"", collname, get_namespace_name(nspOid)))); } + + +/* + * "Normalize" a locale name, stripping off encoding tags such as + * ".utf8" (e.g., "en_US.utf8" -> "en_US", but "br_FR.iso885915@euro" + * -> "br_FR@euro"). Return true if a new, different name was + * generated. + */ +pg_attribute_unused() +static bool +normalize_locale_name(char *new, const char *old) +{ + char *n = new; + const char *o = old; + bool changed = false; + + while (*o) + { + if (*o == '.') + { + /* skip over encoding tag such as ".utf8" or ".UTF-8" */ + o++; + while ((*o >= 'A' && *o <= 'Z') + || (*o >= 'a' && *o <= 'z') + || (*o >= '0' && *o <= '9') + || (*o == '-')) + o++; + changed = true; + } + else + *n++ = *o++; + } + *n = '\0'; + + return changed; +} + + +Datum +pg_import_system_collations(PG_FUNCTION_ARGS) +{ +#if defined(HAVE_LOCALE_T) && !defined(WIN32) + bool if_not_exists = PG_GETARG_BOOL(0); + Oid nspid = PG_GETARG_OID(1); + + FILE *locale_a_handle; + char localebuf[NAMEDATALEN]; /* we assume ASCII so this is fine */ + int count = 0; +#endif + + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + (errmsg("must be superuser to import system collations")))); + +#if defined(HAVE_LOCALE_T) && !defined(WIN32) + locale_a_handle = OpenPipeStream("locale -a", "r"); + if (locale_a_handle == NULL) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not execute command \"%s\": %m", + "locale -a"))); + + while (fgets(localebuf, sizeof(localebuf), locale_a_handle)) + { + int i; + size_t len; + int enc; + bool skip; + char alias[NAMEDATALEN]; + + len = strlen(localebuf); + + if (len == 0 || localebuf[len - 1] != '\n') + { + elog(DEBUG1, "locale name too long, skipped: \"%s\"", localebuf); + continue; + } + localebuf[len - 1] = '\0'; + + /* + * Some systems have locale names that don't consist entirely of ASCII + * letters (such as "bokmål" or "français"). This is + * pretty silly, since we need the locale itself to interpret the + * non-ASCII characters. We can't do much with those, so we filter + * them out. + */ + skip = false; + for (i = 0; i < len; i++) + { + if (IS_HIGHBIT_SET(localebuf[i])) + { + skip = true; + break; + } + } + if (skip) + { + elog(DEBUG1, "locale name has non-ASCII characters, skipped: \"%s\"", localebuf); + continue; + } + + enc = pg_get_encoding_from_locale(localebuf, false); + if (enc < 0) + { + /* error message printed by pg_get_encoding_from_locale() */ + continue; + } + if (!PG_VALID_BE_ENCODING(enc)) + continue; /* ignore locales for client-only encodings */ + if (enc == PG_SQL_ASCII) + continue; /* C/POSIX are already in the catalog */ + + count++; + + CollationCreate(localebuf, nspid, GetUserId(), enc, + localebuf, localebuf, if_not_exists); + + CommandCounterIncrement(); + + /* + * Generate aliases such as "en_US" in addition to "en_US.utf8" for + * ease of use. Note that collation names are unique per encoding + * only, so this doesn't clash with "en_US" for LATIN1, say. + * + * This always runs in "if not exists" mode, to skip aliases that + * conflict with an existing locale name for the same encoding. For + * example, "br_FR.iso88591" is normalized to "br_FR", both for + * encoding LATIN1. But the unnormalized locale "br_FR" already + * exists for LATIN1. + */ + if (normalize_locale_name(alias, localebuf)) + { + CollationCreate(alias, nspid, GetUserId(), enc, + localebuf, localebuf, true); + CommandCounterIncrement(); + } + } + + ClosePipeStream(locale_a_handle); + + if (count == 0) + ereport(ERROR, + (errmsg("no usable system locales were found"))); +#endif /* not HAVE_LOCALE_T && not WIN32 */ + + PG_RETURN_VOID(); +} diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c index 1e7d677244..eb1be100c8 100644 --- a/src/bin/initdb/initdb.c +++ b/src/bin/initdb/initdb.c @@ -1608,178 +1608,16 @@ setup_description(FILE *cmdfd) PG_CMD_PUTS("DROP TABLE tmp_pg_shdescription;\n\n"); } -#ifdef HAVE_LOCALE_T -/* - * "Normalize" a locale name, stripping off encoding tags such as - * ".utf8" (e.g., "en_US.utf8" -> "en_US", but "br_FR.iso885915@euro" - * -> "br_FR@euro"). Return true if a new, different name was - * generated. - */ -static bool -normalize_locale_name(char *new, const char *old) -{ - char *n = new; - const char *o = old; - bool changed = false; - - while (*o) - { - if (*o == '.') - { - /* skip over encoding tag such as ".utf8" or ".UTF-8" */ - o++; - while ((*o >= 'A' && *o <= 'Z') - || (*o >= 'a' && *o <= 'z') - || (*o >= '0' && *o <= '9') - || (*o == '-')) - o++; - changed = true; - } - else - *n++ = *o++; - } - *n = '\0'; - - return changed; -} -#endif /* HAVE_LOCALE_T */ - /* * populate pg_collation */ static void setup_collation(FILE *cmdfd) { -#if defined(HAVE_LOCALE_T) && !defined(WIN32) - int i; - FILE *locale_a_handle; - char localebuf[NAMEDATALEN]; /* we assume ASCII so this is fine */ - int count = 0; - - locale_a_handle = popen_check("locale -a", "r"); - if (!locale_a_handle) - return; /* complaint already printed */ - - PG_CMD_PUTS("CREATE TEMP TABLE tmp_pg_collation ( " - " collname name, " - " locale name, " - " encoding int) WITHOUT OIDS;\n\n"); - - while (fgets(localebuf, sizeof(localebuf), locale_a_handle)) - { - size_t len; - int enc; - bool skip; - char *quoted_locale; - char alias[NAMEDATALEN]; - - len = strlen(localebuf); - - if (len == 0 || localebuf[len - 1] != '\n') - { - if (debug) - fprintf(stderr, _("%s: locale name too long, skipped: \"%s\"\n"), - progname, localebuf); - continue; - } - localebuf[len - 1] = '\0'; - - /* - * Some systems have locale names that don't consist entirely of ASCII - * letters (such as "bokmål" or "français"). This is - * pretty silly, since we need the locale itself to interpret the - * non-ASCII characters. We can't do much with those, so we filter - * them out. - */ - skip = false; - for (i = 0; i < len; i++) - { - if (IS_HIGHBIT_SET(localebuf[i])) - { - skip = true; - break; - } - } - if (skip) - { - if (debug) - fprintf(stderr, _("%s: locale name has non-ASCII characters, skipped: \"%s\"\n"), - progname, localebuf); - continue; - } - - enc = pg_get_encoding_from_locale(localebuf, debug); - if (enc < 0) - { - /* error message printed by pg_get_encoding_from_locale() */ - continue; - } - if (!PG_VALID_BE_ENCODING(enc)) - continue; /* ignore locales for client-only encodings */ - if (enc == PG_SQL_ASCII) - continue; /* C/POSIX are already in the catalog */ - - count++; - - quoted_locale = escape_quotes(localebuf); - - PG_CMD_PRINTF3("INSERT INTO tmp_pg_collation VALUES (E'%s', E'%s', %d);\n\n", - quoted_locale, quoted_locale, enc); - - /* - * Generate aliases such as "en_US" in addition to "en_US.utf8" for - * ease of use. Note that collation names are unique per encoding - * only, so this doesn't clash with "en_US" for LATIN1, say. - */ - if (normalize_locale_name(alias, localebuf)) - { - char *quoted_alias = escape_quotes(alias); - - PG_CMD_PRINTF3("INSERT INTO tmp_pg_collation VALUES (E'%s', E'%s', %d);\n\n", - quoted_alias, quoted_locale, enc); - free(quoted_alias); - } - free(quoted_locale); - } + PG_CMD_PUTS("SELECT pg_import_system_collations(if_not_exists => false, schema => 'pg_catalog');\n\n"); /* Add an SQL-standard name */ - PG_CMD_PRINTF1("INSERT INTO tmp_pg_collation VALUES ('ucs_basic', 'C', %d);\n\n", PG_UTF8); - - /* - * When copying collations to the final location, eliminate aliases that - * conflict with an existing locale name for the same encoding. For - * example, "br_FR.iso88591" is normalized to "br_FR", both for encoding - * LATIN1. But the unnormalized locale "br_FR" already exists for LATIN1. - * Prefer the alias that matches the OS locale name, else the first locale - * name by sort order (arbitrary choice to be deterministic). - * - * Also, eliminate any aliases that conflict with pg_collation's - * hard-wired entries for "C" etc. - */ - PG_CMD_PUTS("INSERT INTO pg_collation (collname, collnamespace, collowner, collencoding, collcollate, collctype) " - " SELECT DISTINCT ON (collname, encoding)" - " collname, " - " (SELECT oid FROM pg_namespace WHERE nspname = 'pg_catalog') AS collnamespace, " - " (SELECT relowner FROM pg_class WHERE relname = 'pg_collation') AS collowner, " - " encoding, locale, locale " - " FROM tmp_pg_collation" - " WHERE NOT EXISTS (SELECT 1 FROM pg_collation WHERE collname = tmp_pg_collation.collname)" - " ORDER BY collname, encoding, (collname = locale) DESC, locale;\n\n"); - - /* - * Even though the table is temp, drop it explicitly so it doesn't get - * copied into template0/postgres databases. - */ - PG_CMD_PUTS("DROP TABLE tmp_pg_collation;\n\n"); - - pclose(locale_a_handle); - - if (count == 0 && !debug) - { - printf(_("No usable system locales were found.\n")); - printf(_("Use the option \"--debug\" to see details.\n")); - } -#endif /* not HAVE_LOCALE_T && not WIN32 */ + PG_CMD_PRINTF2("INSERT INTO pg_collation (collname, collnamespace, collowner, collencoding, collcollate, collctype) VALUES ('ucs_basic', 'pg_catalog'::regnamespace, '%s'::regrole, %d, 'C', 'C');\n\n", escape_quotes(username), PG_UTF8); } /* diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index 54211f5618..7d33f39bea 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 201701172 +#define CATALOG_VERSION_NO 201701181 #endif diff --git a/src/include/catalog/pg_collation_fn.h b/src/include/catalog/pg_collation_fn.h index 1ea757f150..482ba7920e 100644 --- a/src/include/catalog/pg_collation_fn.h +++ b/src/include/catalog/pg_collation_fn.h @@ -17,7 +17,8 @@ extern Oid CollationCreate(const char *collname, Oid collnamespace, Oid collowner, int32 collencoding, - const char *collcollate, const char *collctype); + const char *collcollate, const char *collctype, + bool if_not_exists); extern void RemoveCollationById(Oid collationOid); #endif /* PG_COLLATION_FN_H */ diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index 42f36891af..1a0eba3ca1 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -5349,6 +5349,9 @@ DESCR("pg_controldata recovery state information as a function"); DATA(insert OID = 3444 ( pg_control_init PGNSP PGUID 12 1 0 0 0 f f f f t f v s 0 0 2249 "" "{23,23,23,23,23,23,23,23,23,16,16,16,23}" "{o,o,o,o,o,o,o,o,o,o,o,o,o}" "{max_data_alignment,database_block_size,blocks_per_segment,wal_block_size,bytes_per_wal_segment,max_identifier_length,max_index_columns,max_toast_chunk_size,large_object_chunk_size,bigint_timestamps,float4_pass_by_value,float8_pass_by_value,data_page_checksum_version}" _null_ _null_ pg_control_init _null_ _null_ _null_ )); DESCR("pg_controldata init state information as a function"); +DATA(insert OID = 3445 ( pg_import_system_collations PGNSP PGUID 12 100 0 0 0 f f f f t f v r 2 0 2278 "16 4089" _null_ _null_ "{if_not_exists,schema}" _null_ _null_ pg_import_system_collations _null_ _null_ _null_ )); +DESCR("import collations from operating system"); + /* * Symbolic values for provolatile column: these indicate whether the result * of a function is dependent *only* on the values of its explicit arguments, -- 2.40.0