1 /*-------------------------------------------------------------------------
4 * collation-related commands support code
6 * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
11 * src/backend/commands/collationcmds.c
13 *-------------------------------------------------------------------------
17 #include "access/heapam.h"
18 #include "access/htup_details.h"
19 #include "access/xact.h"
20 #include "catalog/dependency.h"
21 #include "catalog/indexing.h"
22 #include "catalog/namespace.h"
23 #include "catalog/objectaccess.h"
24 #include "catalog/pg_collation.h"
25 #include "catalog/pg_collation_fn.h"
26 #include "commands/alter.h"
27 #include "commands/collationcmds.h"
28 #include "commands/comment.h"
29 #include "commands/dbcommands.h"
30 #include "commands/defrem.h"
31 #include "mb/pg_wchar.h"
32 #include "miscadmin.h"
33 #include "utils/builtins.h"
34 #include "utils/lsyscache.h"
35 #include "utils/pg_locale.h"
36 #include "utils/rel.h"
37 #include "utils/syscache.h"
42 char *localename; /* name of locale, as per "locale -a" */
43 char *alias; /* shortened alias for same */
44 int enc; /* encoding */
52 DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_exists)
58 DefElem *fromEl = NULL;
59 DefElem *localeEl = NULL;
60 DefElem *lccollateEl = NULL;
61 DefElem *lcctypeEl = NULL;
62 DefElem *providerEl = NULL;
63 DefElem *versionEl = NULL;
64 char *collcollate = NULL;
65 char *collctype = NULL;
66 char *collproviderstr = NULL;
68 char collprovider = 0;
69 char *collversion = NULL;
71 ObjectAddress address;
73 collNamespace = QualifiedNameGetCreationNamespace(names, &collName);
75 aclresult = pg_namespace_aclcheck(collNamespace, GetUserId(), ACL_CREATE);
76 if (aclresult != ACLCHECK_OK)
77 aclcheck_error(aclresult, OBJECT_SCHEMA,
78 get_namespace_name(collNamespace));
80 foreach(pl, parameters)
82 DefElem *defel = lfirst_node(DefElem, pl);
85 if (strcmp(defel->defname, "from") == 0)
87 else if (strcmp(defel->defname, "locale") == 0)
89 else if (strcmp(defel->defname, "lc_collate") == 0)
90 defelp = &lccollateEl;
91 else if (strcmp(defel->defname, "lc_ctype") == 0)
93 else if (strcmp(defel->defname, "provider") == 0)
95 else if (strcmp(defel->defname, "version") == 0)
100 (errcode(ERRCODE_SYNTAX_ERROR),
101 errmsg("collation attribute \"%s\" not recognized",
103 parser_errposition(pstate, defel->location)));
110 if ((localeEl && (lccollateEl || lcctypeEl))
111 || (fromEl && list_length(parameters) != 1))
113 (errcode(ERRCODE_SYNTAX_ERROR),
114 errmsg("conflicting or redundant options")));
121 collid = get_collation_oid(defGetQualifiedName(fromEl), false);
122 tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
123 if (!HeapTupleIsValid(tp))
124 elog(ERROR, "cache lookup failed for collation %u", collid);
126 collcollate = pstrdup(NameStr(((Form_pg_collation) GETSTRUCT(tp))->collcollate));
127 collctype = pstrdup(NameStr(((Form_pg_collation) GETSTRUCT(tp))->collctype));
128 collprovider = ((Form_pg_collation) GETSTRUCT(tp))->collprovider;
129 collencoding = ((Form_pg_collation) GETSTRUCT(tp))->collencoding;
134 * Copying the "default" collation is not allowed because most code
135 * checks for DEFAULT_COLLATION_OID instead of COLLPROVIDER_DEFAULT,
136 * and so having a second collation with COLLPROVIDER_DEFAULT would
137 * not work and potentially confuse or crash some code. This could be
138 * fixed with some legwork.
140 if (collprovider == COLLPROVIDER_DEFAULT)
142 (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
143 errmsg("collation \"default\" cannot be copied")));
148 collcollate = defGetString(localeEl);
149 collctype = defGetString(localeEl);
153 collcollate = defGetString(lccollateEl);
156 collctype = defGetString(lcctypeEl);
159 collproviderstr = defGetString(providerEl);
162 collversion = defGetString(versionEl);
166 if (pg_strcasecmp(collproviderstr, "icu") == 0)
167 collprovider = COLLPROVIDER_ICU;
168 else if (pg_strcasecmp(collproviderstr, "libc") == 0)
169 collprovider = COLLPROVIDER_LIBC;
172 (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
173 errmsg("unrecognized collation provider: %s",
177 collprovider = COLLPROVIDER_LIBC;
181 (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
182 errmsg("parameter \"lc_collate\" must be specified")));
186 (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
187 errmsg("parameter \"lc_ctype\" must be specified")));
191 if (collprovider == COLLPROVIDER_ICU)
195 collencoding = GetDatabaseEncoding();
196 check_encoding_locale_matches(collencoding, collcollate, collctype);
201 collversion = get_collation_actual_version(collprovider, collcollate);
203 newoid = CollationCreate(collName,
212 false); /* not quiet */
214 if (!OidIsValid(newoid))
215 return InvalidObjectAddress;
218 * Check that the locales can be loaded. NB: pg_newlocale_from_collation
219 * is only supposed to be called on non-C-equivalent locales.
221 CommandCounterIncrement();
222 if (!lc_collate_is_c(newoid) || !lc_ctype_is_c(newoid))
223 (void) pg_newlocale_from_collation(newoid);
225 ObjectAddressSet(address, CollationRelationId, newoid);
231 * Subroutine for ALTER COLLATION SET SCHEMA and RENAME
233 * Is there a collation with the same name of the given collation already in
234 * the given namespace? If so, raise an appropriate error message.
237 IsThereCollationInNamespace(const char *collname, Oid nspOid)
239 /* make sure the name doesn't already exist in new schema */
240 if (SearchSysCacheExists3(COLLNAMEENCNSP,
241 CStringGetDatum(collname),
242 Int32GetDatum(GetDatabaseEncoding()),
243 ObjectIdGetDatum(nspOid)))
245 (errcode(ERRCODE_DUPLICATE_OBJECT),
246 errmsg("collation \"%s\" for encoding \"%s\" already exists in schema \"%s\"",
247 collname, GetDatabaseEncodingName(),
248 get_namespace_name(nspOid))));
250 /* mustn't match an any-encoding entry, either */
251 if (SearchSysCacheExists3(COLLNAMEENCNSP,
252 CStringGetDatum(collname),
254 ObjectIdGetDatum(nspOid)))
256 (errcode(ERRCODE_DUPLICATE_OBJECT),
257 errmsg("collation \"%s\" already exists in schema \"%s\"",
258 collname, get_namespace_name(nspOid))));
265 AlterCollation(AlterCollationStmt *stmt)
270 Form_pg_collation collForm;
275 ObjectAddress address;
277 rel = heap_open(CollationRelationId, RowExclusiveLock);
278 collOid = get_collation_oid(stmt->collname, false);
280 if (!pg_collation_ownercheck(collOid, GetUserId()))
281 aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_COLLATION,
282 NameListToString(stmt->collname));
284 tup = SearchSysCacheCopy1(COLLOID, ObjectIdGetDatum(collOid));
285 if (!HeapTupleIsValid(tup))
286 elog(ERROR, "cache lookup failed for collation %u", collOid);
288 collForm = (Form_pg_collation) GETSTRUCT(tup);
289 collversion = SysCacheGetAttr(COLLOID, tup, Anum_pg_collation_collversion,
291 oldversion = isnull ? NULL : TextDatumGetCString(collversion);
293 newversion = get_collation_actual_version(collForm->collprovider, NameStr(collForm->collcollate));
295 /* cannot change from NULL to non-NULL or vice versa */
296 if ((!oldversion && newversion) || (oldversion && !newversion))
297 elog(ERROR, "invalid collation version change");
298 else if (oldversion && newversion && strcmp(newversion, oldversion) != 0)
300 bool nulls[Natts_pg_collation];
301 bool replaces[Natts_pg_collation];
302 Datum values[Natts_pg_collation];
305 (errmsg("changing version from %s to %s",
306 oldversion, newversion)));
308 memset(values, 0, sizeof(values));
309 memset(nulls, false, sizeof(nulls));
310 memset(replaces, false, sizeof(replaces));
312 values[Anum_pg_collation_collversion - 1] = CStringGetTextDatum(newversion);
313 replaces[Anum_pg_collation_collversion - 1] = true;
315 tup = heap_modify_tuple(tup, RelationGetDescr(rel),
316 values, nulls, replaces);
320 (errmsg("version has not changed")));
322 CatalogTupleUpdate(rel, &tup->t_self, tup);
324 InvokeObjectPostAlterHook(CollationRelationId, collOid, 0);
326 ObjectAddressSet(address, CollationRelationId, collOid);
329 heap_close(rel, NoLock);
336 pg_collation_actual_version(PG_FUNCTION_ARGS)
338 Oid collid = PG_GETARG_OID(0);
344 tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
345 if (!HeapTupleIsValid(tp))
347 (errcode(ERRCODE_UNDEFINED_OBJECT),
348 errmsg("collation with OID %u does not exist", collid)));
350 collcollate = pstrdup(NameStr(((Form_pg_collation) GETSTRUCT(tp))->collcollate));
351 collprovider = ((Form_pg_collation) GETSTRUCT(tp))->collprovider;
355 version = get_collation_actual_version(collprovider, collcollate);
358 PG_RETURN_TEXT_P(cstring_to_text(version));
364 /* will we use "locale -a" in pg_import_system_collations? */
365 #if defined(HAVE_LOCALE_T) && !defined(WIN32)
366 #define READ_LOCALE_A_OUTPUT
369 #if defined(READ_LOCALE_A_OUTPUT) || defined(USE_ICU)
371 * Check a string to see if it is pure ASCII
374 is_all_ascii(const char *str)
378 if (IS_HIGHBIT_SET(*str))
384 #endif /* READ_LOCALE_A_OUTPUT || USE_ICU */
386 #ifdef READ_LOCALE_A_OUTPUT
388 * "Normalize" a libc locale name, stripping off encoding tags such as
389 * ".utf8" (e.g., "en_US.utf8" -> "en_US", but "br_FR.iso885915@euro"
390 * -> "br_FR@euro"). Return true if a new, different name was
394 normalize_libc_locale_name(char *new, const char *old)
398 bool changed = false;
404 /* skip over encoding tag such as ".utf8" or ".UTF-8" */
406 while ((*o >= 'A' && *o <= 'Z')
407 || (*o >= 'a' && *o <= 'z')
408 || (*o >= '0' && *o <= '9')
422 * qsort comparator for CollAliasData items
425 cmpaliases(const void *a, const void *b)
427 const CollAliasData *ca = (const CollAliasData *) a;
428 const CollAliasData *cb = (const CollAliasData *) b;
430 /* comparing localename is enough because other fields are derived */
431 return strcmp(ca->localename, cb->localename);
433 #endif /* READ_LOCALE_A_OUTPUT */
438 * Get the ICU language tag for a locale name.
439 * The result is a palloc'd string.
442 get_icu_language_tag(const char *localename)
444 char buf[ULOC_FULLNAME_CAPACITY];
447 status = U_ZERO_ERROR;
448 uloc_toLanguageTag(localename, buf, sizeof(buf), TRUE, &status);
449 if (U_FAILURE(status))
451 (errmsg("could not convert locale name \"%s\" to language tag: %s",
452 localename, u_errorName(status))));
458 * Get a comment (specifically, the display name) for an ICU locale.
459 * The result is a palloc'd string, or NULL if we can't get a comment
460 * or find that it's not all ASCII. (We can *not* accept non-ASCII
461 * comments, because the contents of template0 must be encoding-agnostic.)
464 get_icu_locale_comment(const char *localename)
467 UChar displayname[128];
472 status = U_ZERO_ERROR;
473 len_uchar = uloc_getDisplayName(localename, "en",
474 displayname, lengthof(displayname),
476 if (U_FAILURE(status))
477 return NULL; /* no good reason to raise an error */
479 /* Check for non-ASCII comment (can't use is_all_ascii for this) */
480 for (i = 0; i < len_uchar; i++)
482 if (displayname[i] > 127)
487 result = palloc(len_uchar + 1);
488 for (i = 0; i < len_uchar; i++)
489 result[i] = displayname[i];
490 result[len_uchar] = '\0';
498 * pg_import_system_collations: add known system collations to pg_collation
501 pg_import_system_collations(PG_FUNCTION_ARGS)
503 Oid nspid = PG_GETARG_OID(0);
506 /* silence compiler warning if we have no locale implementation at all */
511 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
512 (errmsg("must be superuser to import system collations"))));
514 /* Load collations known to libc, using "locale -a" to enumerate them */
515 #ifdef READ_LOCALE_A_OUTPUT
517 FILE *locale_a_handle;
518 char localebuf[NAMEDATALEN]; /* we assume ASCII so this is fine */
521 CollAliasData *aliases;
526 /* expansible array of aliases */
528 aliases = (CollAliasData *) palloc(maxaliases * sizeof(CollAliasData));
531 locale_a_handle = OpenPipeStream("locale -a", "r");
532 if (locale_a_handle == NULL)
534 (errcode_for_file_access(),
535 errmsg("could not execute command \"%s\": %m",
538 while (fgets(localebuf, sizeof(localebuf), locale_a_handle))
542 char alias[NAMEDATALEN];
544 len = strlen(localebuf);
546 if (len == 0 || localebuf[len - 1] != '\n')
548 elog(DEBUG1, "locale name too long, skipped: \"%s\"", localebuf);
551 localebuf[len - 1] = '\0';
554 * Some systems have locale names that don't consist entirely of
555 * ASCII letters (such as "bokmål" or "français").
556 * This is pretty silly, since we need the locale itself to
557 * interpret the non-ASCII characters. We can't do much with
558 * those, so we filter them out.
560 if (!is_all_ascii(localebuf))
562 elog(DEBUG1, "locale name has non-ASCII characters, skipped: \"%s\"", localebuf);
566 enc = pg_get_encoding_from_locale(localebuf, false);
569 /* error message printed by pg_get_encoding_from_locale() */
572 if (!PG_VALID_BE_ENCODING(enc))
573 continue; /* ignore locales for client-only encodings */
574 if (enc == PG_SQL_ASCII)
575 continue; /* C/POSIX are already in the catalog */
577 /* count valid locales found in operating system */
581 * Create a collation named the same as the locale, but quietly
582 * doing nothing if it already exists. This is the behavior we
583 * need even at initdb time, because some versions of "locale -a"
584 * can report the same locale name more than once. And it's
585 * convenient for later import runs, too, since you just about
586 * always want to add on new locales without a lot of chatter
587 * about existing ones.
589 collid = CollationCreate(localebuf, nspid, GetUserId(),
590 COLLPROVIDER_LIBC, enc,
591 localebuf, localebuf,
592 get_collation_actual_version(COLLPROVIDER_LIBC, localebuf),
594 if (OidIsValid(collid))
598 /* Must do CCI between inserts to handle duplicates correctly */
599 CommandCounterIncrement();
603 * Generate aliases such as "en_US" in addition to "en_US.utf8"
604 * for ease of use. Note that collation names are unique per
605 * encoding only, so this doesn't clash with "en_US" for LATIN1,
608 * However, it might conflict with a name we'll see later in the
609 * "locale -a" output. So save up the aliases and try to add them
610 * after we've read all the output.
612 if (normalize_libc_locale_name(alias, localebuf))
614 if (naliases >= maxaliases)
617 aliases = (CollAliasData *)
618 repalloc(aliases, maxaliases * sizeof(CollAliasData));
620 aliases[naliases].localename = pstrdup(localebuf);
621 aliases[naliases].alias = pstrdup(alias);
622 aliases[naliases].enc = enc;
627 ClosePipeStream(locale_a_handle);
630 * Before processing the aliases, sort them by locale name. The point
631 * here is that if "locale -a" gives us multiple locale names with the
632 * same encoding and base name, say "en_US.utf8" and "en_US.utf-8", we
633 * want to pick a deterministic one of them. First in ASCII sort
634 * order is a good enough rule. (Before PG 10, the code corresponding
635 * to this logic in initdb.c had an additional ordering rule, to
636 * prefer the locale name exactly matching the alias, if any. We
637 * don't need to consider that here, because we would have already
638 * created such a pg_collation entry above, and that one will win.)
641 qsort((void *) aliases, naliases, sizeof(CollAliasData), cmpaliases);
643 /* Now add aliases, ignoring any that match pre-existing entries */
644 for (i = 0; i < naliases; i++)
646 char *locale = aliases[i].localename;
647 char *alias = aliases[i].alias;
648 int enc = aliases[i].enc;
650 collid = CollationCreate(alias, nspid, GetUserId(),
651 COLLPROVIDER_LIBC, enc,
653 get_collation_actual_version(COLLPROVIDER_LIBC, locale),
655 if (OidIsValid(collid))
659 CommandCounterIncrement();
663 /* Give a warning if "locale -a" seems to be malfunctioning */
666 (errmsg("no usable system locales were found")));
668 #endif /* READ_LOCALE_A_OUTPUT */
671 * Load collations known to ICU
673 * We use uloc_countAvailable()/uloc_getAvailable() rather than
674 * ucol_countAvailable()/ucol_getAvailable(). The former returns a full
675 * set of language+region combinations, whereas the latter only returns
676 * language+region combinations of they are distinct from the language's
677 * base collation. So there might not be a de-DE or en-GB, which would be
685 * Start the loop at -1 to sneak in the root locale without too much
688 for (i = -1; i < uloc_countAvailable(); i++)
693 const char *collcollate;
697 name = ""; /* ICU root locale */
699 name = uloc_getAvailable(i);
701 langtag = get_icu_language_tag(name);
702 collcollate = U_ICU_VERSION_MAJOR_NUM >= 54 ? langtag : name;
705 * Be paranoid about not allowing any non-ASCII strings into
708 if (!is_all_ascii(langtag) || !is_all_ascii(collcollate))
711 collid = CollationCreate(psprintf("%s-x-icu", langtag),
713 COLLPROVIDER_ICU, -1,
714 collcollate, collcollate,
715 get_collation_actual_version(COLLPROVIDER_ICU, collcollate),
717 if (OidIsValid(collid))
721 CommandCounterIncrement();
723 icucomment = get_icu_locale_comment(name);
725 CreateComments(collid, CollationRelationId, 0,
732 PG_RETURN_INT32(ncreated);