]> granicus.if.org Git - postgresql/commitdiff
ICU support
authorPeter Eisentraut <peter_e@gmx.net>
Thu, 23 Mar 2017 19:25:34 +0000 (15:25 -0400)
committerPeter Eisentraut <peter_e@gmx.net>
Thu, 23 Mar 2017 19:28:48 +0000 (15:28 -0400)
Add a column collprovider to pg_collation that determines which library
provides the collation data.  The existing choices are default and libc,
and this adds an icu choice, which uses the ICU4C library.

The pg_locale_t type is changed to a union that contains the
provider-specific locale handles.  Users of locale information are
changed to look into that struct for the appropriate handle to use.

Also add a collversion column that records the version of the collation
when it is created, and check at run time whether it is still the same.
This detects potentially incompatible library upgrades that can corrupt
indexes and other structures.  This is currently only supported by
ICU-provided collations.

initdb initializes the default collation set as before from the `locale
-a` output but also adds all available ICU locales with a "-x-icu"
appended.

Currently, ICU-provided collations can only be explicitly named
collations.  The global database locales are still always libc-provided.

ICU support is enabled by configure --with-icu.

Reviewed-by: Thomas Munro <thomas.munro@enterprisedb.com>
Reviewed-by: Andreas Karlsson <andreas@proxel.se>
45 files changed:
aclocal.m4
config/pkg.m4 [new file with mode: 0644]
configure
configure.in
doc/src/sgml/catalogs.sgml
doc/src/sgml/charset.sgml
doc/src/sgml/func.sgml
doc/src/sgml/installation.sgml
doc/src/sgml/mvcc.sgml
doc/src/sgml/ref/alter_collation.sgml
doc/src/sgml/ref/create_collation.sgml
src/Makefile.global.in
src/backend/Makefile
src/backend/catalog/pg_collation.c
src/backend/commands/collationcmds.c
src/backend/common.mk
src/backend/nodes/copyfuncs.c
src/backend/nodes/equalfuncs.c
src/backend/parser/gram.y
src/backend/regex/regc_pg_locale.c
src/backend/tcop/utility.c
src/backend/utils/adt/formatting.c
src/backend/utils/adt/like.c
src/backend/utils/adt/pg_locale.c
src/backend/utils/adt/selfuncs.c
src/backend/utils/adt/varlena.c
src/backend/utils/mb/encnames.c
src/bin/initdb/initdb.c
src/bin/pg_dump/pg_dump.c
src/bin/pg_dump/t/002_pg_dump.pl
src/bin/psql/describe.c
src/include/catalog/pg_collation.h
src/include/catalog/pg_collation_fn.h
src/include/catalog/pg_proc.h
src/include/commands/collationcmds.h
src/include/mb/pg_wchar.h
src/include/nodes/nodes.h
src/include/nodes/parsenodes.h
src/include/pg_config.h.in
src/include/utils/pg_locale.h
src/test/regress/GNUmakefile
src/test/regress/expected/collate.icu.out [new file with mode: 0644]
src/test/regress/expected/collate.linux.utf8.out
src/test/regress/sql/collate.icu.sql [new file with mode: 0644]
src/test/regress/sql/collate.linux.utf8.sql

index 6f930b6fc1be2129a3c20f8e5dec9e9e181fcc3b..5ca902b6a24287d780be3e71a531a8c2b41aee90 100644 (file)
@@ -7,6 +7,7 @@ m4_include([config/docbook.m4])
 m4_include([config/general.m4])
 m4_include([config/libtool.m4])
 m4_include([config/perl.m4])
+m4_include([config/pkg.m4])
 m4_include([config/programs.m4])
 m4_include([config/python.m4])
 m4_include([config/tcl.m4])
diff --git a/config/pkg.m4 b/config/pkg.m4
new file mode 100644 (file)
index 0000000..13a8890
--- /dev/null
@@ -0,0 +1,275 @@
+# pkg.m4 - Macros to locate and utilise pkg-config.   -*- Autoconf -*-
+# serial 12 (pkg-config-0.29.2)
+
+dnl Copyright © 2004 Scott James Remnant <scott@netsplit.com>.
+dnl Copyright © 2012-2015 Dan Nicholson <dbn.lists@gmail.com>
+dnl
+dnl This program is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU General Public License as published by
+dnl the Free Software Foundation; either version 2 of the License, or
+dnl (at your option) any later version.
+dnl
+dnl This program is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU General Public License
+dnl along with this program; if not, write to the Free Software
+dnl Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+dnl 02111-1307, USA.
+dnl
+dnl As a special exception to the GNU General Public License, if you
+dnl distribute this file as part of a program that contains a
+dnl configuration script generated by Autoconf, you may include it under
+dnl the same distribution terms that you use for the rest of that
+dnl program.
+
+dnl PKG_PREREQ(MIN-VERSION)
+dnl -----------------------
+dnl Since: 0.29
+dnl
+dnl Verify that the version of the pkg-config macros are at least
+dnl MIN-VERSION. Unlike PKG_PROG_PKG_CONFIG, which checks the user's
+dnl installed version of pkg-config, this checks the developer's version
+dnl of pkg.m4 when generating configure.
+dnl
+dnl To ensure that this macro is defined, also add:
+dnl m4_ifndef([PKG_PREREQ],
+dnl     [m4_fatal([must install pkg-config 0.29 or later before running autoconf/autogen])])
+dnl
+dnl See the "Since" comment for each macro you use to see what version
+dnl of the macros you require.
+m4_defun([PKG_PREREQ],
+[m4_define([PKG_MACROS_VERSION], [0.29.2])
+m4_if(m4_version_compare(PKG_MACROS_VERSION, [$1]), -1,
+    [m4_fatal([pkg.m4 version $1 or higher is required but ]PKG_MACROS_VERSION[ found])])
+])dnl PKG_PREREQ
+
+dnl PKG_PROG_PKG_CONFIG([MIN-VERSION])
+dnl ----------------------------------
+dnl Since: 0.16
+dnl
+dnl Search for the pkg-config tool and set the PKG_CONFIG variable to
+dnl first found in the path. Checks that the version of pkg-config found
+dnl is at least MIN-VERSION. If MIN-VERSION is not specified, 0.9.0 is
+dnl used since that's the first version where most current features of
+dnl pkg-config existed.
+AC_DEFUN([PKG_PROG_PKG_CONFIG],
+[m4_pattern_forbid([^_?PKG_[A-Z_]+$])
+m4_pattern_allow([^PKG_CONFIG(_(PATH|LIBDIR|SYSROOT_DIR|ALLOW_SYSTEM_(CFLAGS|LIBS)))?$])
+m4_pattern_allow([^PKG_CONFIG_(DISABLE_UNINSTALLED|TOP_BUILD_DIR|DEBUG_SPEW)$])
+AC_ARG_VAR([PKG_CONFIG], [path to pkg-config utility])
+AC_ARG_VAR([PKG_CONFIG_PATH], [directories to add to pkg-config's search path])
+AC_ARG_VAR([PKG_CONFIG_LIBDIR], [path overriding pkg-config's built-in search path])
+
+if test "x$ac_cv_env_PKG_CONFIG_set" != "xset"; then
+       AC_PATH_TOOL([PKG_CONFIG], [pkg-config])
+fi
+if test -n "$PKG_CONFIG"; then
+       _pkg_min_version=m4_default([$1], [0.9.0])
+       AC_MSG_CHECKING([pkg-config is at least version $_pkg_min_version])
+       if $PKG_CONFIG --atleast-pkgconfig-version $_pkg_min_version; then
+               AC_MSG_RESULT([yes])
+       else
+               AC_MSG_RESULT([no])
+               PKG_CONFIG=""
+       fi
+fi[]dnl
+])dnl PKG_PROG_PKG_CONFIG
+
+dnl PKG_CHECK_EXISTS(MODULES, [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND])
+dnl -------------------------------------------------------------------
+dnl Since: 0.18
+dnl
+dnl Check to see whether a particular set of modules exists. Similar to
+dnl PKG_CHECK_MODULES(), but does not set variables or print errors.
+dnl
+dnl Please remember that m4 expands AC_REQUIRE([PKG_PROG_PKG_CONFIG])
+dnl only at the first occurence in configure.ac, so if the first place
+dnl it's called might be skipped (such as if it is within an "if", you
+dnl have to call PKG_CHECK_EXISTS manually
+AC_DEFUN([PKG_CHECK_EXISTS],
+[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl
+if test -n "$PKG_CONFIG" && \
+    AC_RUN_LOG([$PKG_CONFIG --exists --print-errors "$1"]); then
+  m4_default([$2], [:])
+m4_ifvaln([$3], [else
+  $3])dnl
+fi])
+
+dnl _PKG_CONFIG([VARIABLE], [COMMAND], [MODULES])
+dnl ---------------------------------------------
+dnl Internal wrapper calling pkg-config via PKG_CONFIG and setting
+dnl pkg_failed based on the result.
+m4_define([_PKG_CONFIG],
+[if test -n "$$1"; then
+    pkg_cv_[]$1="$$1"
+ elif test -n "$PKG_CONFIG"; then
+    PKG_CHECK_EXISTS([$3],
+                     [pkg_cv_[]$1=`$PKG_CONFIG --[]$2 "$3" 2>/dev/null`
+                     test "x$?" != "x0" && pkg_failed=yes ],
+                    [pkg_failed=yes])
+ else
+    pkg_failed=untried
+fi[]dnl
+])dnl _PKG_CONFIG
+
+dnl _PKG_SHORT_ERRORS_SUPPORTED
+dnl ---------------------------
+dnl Internal check to see if pkg-config supports short errors.
+AC_DEFUN([_PKG_SHORT_ERRORS_SUPPORTED],
+[AC_REQUIRE([PKG_PROG_PKG_CONFIG])
+if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then
+        _pkg_short_errors_supported=yes
+else
+        _pkg_short_errors_supported=no
+fi[]dnl
+])dnl _PKG_SHORT_ERRORS_SUPPORTED
+
+
+dnl PKG_CHECK_MODULES(VARIABLE-PREFIX, MODULES, [ACTION-IF-FOUND],
+dnl   [ACTION-IF-NOT-FOUND])
+dnl --------------------------------------------------------------
+dnl Since: 0.4.0
+dnl
+dnl Note that if there is a possibility the first call to
+dnl PKG_CHECK_MODULES might not happen, you should be sure to include an
+dnl explicit call to PKG_PROG_PKG_CONFIG in your configure.ac
+AC_DEFUN([PKG_CHECK_MODULES],
+[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl
+AC_ARG_VAR([$1][_CFLAGS], [C compiler flags for $1, overriding pkg-config])dnl
+AC_ARG_VAR([$1][_LIBS], [linker flags for $1, overriding pkg-config])dnl
+
+pkg_failed=no
+AC_MSG_CHECKING([for $2])
+
+_PKG_CONFIG([$1][_CFLAGS], [cflags], [$2])
+_PKG_CONFIG([$1][_LIBS], [libs], [$2])
+
+m4_define([_PKG_TEXT], [Alternatively, you may set the environment variables $1[]_CFLAGS
+and $1[]_LIBS to avoid the need to call pkg-config.
+See the pkg-config man page for more details.])
+
+if test $pkg_failed = yes; then
+        AC_MSG_RESULT([no])
+        _PKG_SHORT_ERRORS_SUPPORTED
+        if test $_pkg_short_errors_supported = yes; then
+               $1[]_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "$2" 2>&1`
+        else
+               $1[]_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "$2" 2>&1`
+        fi
+       # Put the nasty error message in config.log where it belongs
+       echo "$$1[]_PKG_ERRORS" >&AS_MESSAGE_LOG_FD
+
+       m4_default([$4], [AC_MSG_ERROR(
+[Package requirements ($2) were not met:
+
+$$1_PKG_ERRORS
+
+Consider adjusting the PKG_CONFIG_PATH environment variable if you
+installed software in a non-standard prefix.
+
+_PKG_TEXT])[]dnl
+        ])
+elif test $pkg_failed = untried; then
+        AC_MSG_RESULT([no])
+       m4_default([$4], [AC_MSG_FAILURE(
+[The pkg-config script could not be found or is too old.  Make sure it
+is in your PATH or set the PKG_CONFIG environment variable to the full
+path to pkg-config.
+
+_PKG_TEXT
+
+To get pkg-config, see <http://pkg-config.freedesktop.org/>.])[]dnl
+        ])
+else
+       $1[]_CFLAGS=$pkg_cv_[]$1[]_CFLAGS
+       $1[]_LIBS=$pkg_cv_[]$1[]_LIBS
+        AC_MSG_RESULT([yes])
+       $3
+fi[]dnl
+])dnl PKG_CHECK_MODULES
+
+
+dnl PKG_CHECK_MODULES_STATIC(VARIABLE-PREFIX, MODULES, [ACTION-IF-FOUND],
+dnl   [ACTION-IF-NOT-FOUND])
+dnl ---------------------------------------------------------------------
+dnl Since: 0.29
+dnl
+dnl Checks for existence of MODULES and gathers its build flags with
+dnl static libraries enabled. Sets VARIABLE-PREFIX_CFLAGS from --cflags
+dnl and VARIABLE-PREFIX_LIBS from --libs.
+dnl
+dnl Note that if there is a possibility the first call to
+dnl PKG_CHECK_MODULES_STATIC might not happen, you should be sure to
+dnl include an explicit call to PKG_PROG_PKG_CONFIG in your
+dnl configure.ac.
+AC_DEFUN([PKG_CHECK_MODULES_STATIC],
+[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl
+_save_PKG_CONFIG=$PKG_CONFIG
+PKG_CONFIG="$PKG_CONFIG --static"
+PKG_CHECK_MODULES($@)
+PKG_CONFIG=$_save_PKG_CONFIG[]dnl
+])dnl PKG_CHECK_MODULES_STATIC
+
+
+dnl PKG_INSTALLDIR([DIRECTORY])
+dnl -------------------------
+dnl Since: 0.27
+dnl
+dnl Substitutes the variable pkgconfigdir as the location where a module
+dnl should install pkg-config .pc files. By default the directory is
+dnl $libdir/pkgconfig, but the default can be changed by passing
+dnl DIRECTORY. The user can override through the --with-pkgconfigdir
+dnl parameter.
+AC_DEFUN([PKG_INSTALLDIR],
+[m4_pushdef([pkg_default], [m4_default([$1], ['${libdir}/pkgconfig'])])
+m4_pushdef([pkg_description],
+    [pkg-config installation directory @<:@]pkg_default[@:>@])
+AC_ARG_WITH([pkgconfigdir],
+    [AS_HELP_STRING([--with-pkgconfigdir], pkg_description)],,
+    [with_pkgconfigdir=]pkg_default)
+AC_SUBST([pkgconfigdir], [$with_pkgconfigdir])
+m4_popdef([pkg_default])
+m4_popdef([pkg_description])
+])dnl PKG_INSTALLDIR
+
+
+dnl PKG_NOARCH_INSTALLDIR([DIRECTORY])
+dnl --------------------------------
+dnl Since: 0.27
+dnl
+dnl Substitutes the variable noarch_pkgconfigdir as the location where a
+dnl module should install arch-independent pkg-config .pc files. By
+dnl default the directory is $datadir/pkgconfig, but the default can be
+dnl changed by passing DIRECTORY. The user can override through the
+dnl --with-noarch-pkgconfigdir parameter.
+AC_DEFUN([PKG_NOARCH_INSTALLDIR],
+[m4_pushdef([pkg_default], [m4_default([$1], ['${datadir}/pkgconfig'])])
+m4_pushdef([pkg_description],
+    [pkg-config arch-independent installation directory @<:@]pkg_default[@:>@])
+AC_ARG_WITH([noarch-pkgconfigdir],
+    [AS_HELP_STRING([--with-noarch-pkgconfigdir], pkg_description)],,
+    [with_noarch_pkgconfigdir=]pkg_default)
+AC_SUBST([noarch_pkgconfigdir], [$with_noarch_pkgconfigdir])
+m4_popdef([pkg_default])
+m4_popdef([pkg_description])
+])dnl PKG_NOARCH_INSTALLDIR
+
+
+dnl PKG_CHECK_VAR(VARIABLE, MODULE, CONFIG-VARIABLE,
+dnl [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND])
+dnl -------------------------------------------
+dnl Since: 0.28
+dnl
+dnl Retrieves the value of the pkg-config variable for the given module.
+AC_DEFUN([PKG_CHECK_VAR],
+[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl
+AC_ARG_VAR([$1], [value of $3 for $2, overriding pkg-config])dnl
+
+_PKG_CONFIG([$1], [variable="][$3]["], [$2])
+AS_VAR_COPY([$1], [pkg_cv_][$1])
+
+AS_VAR_IF([$1], [""], [$5], [$4])dnl
+])dnl PKG_CHECK_VAR
index 9528622421ed863e897e51f68ba9c212adc8cb41..4b8229e959fa9d03eaeb198c63be6b8ba989db42 100755 (executable)
--- a/configure
+++ b/configure
@@ -715,6 +715,12 @@ krb_srvtab
 with_python
 with_perl
 with_tcl
+ICU_LIBS
+ICU_CFLAGS
+PKG_CONFIG_LIBDIR
+PKG_CONFIG_PATH
+PKG_CONFIG
+with_icu
 enable_thread_safety
 INCLUDES
 autodepend
@@ -821,6 +827,7 @@ with_CC
 enable_depend
 enable_cassert
 enable_thread_safety
+with_icu
 with_tcl
 with_tclconfig
 with_perl
@@ -856,6 +863,11 @@ LDFLAGS
 LIBS
 CPPFLAGS
 CPP
+PKG_CONFIG
+PKG_CONFIG_PATH
+PKG_CONFIG_LIBDIR
+ICU_CFLAGS
+ICU_LIBS
 LDFLAGS_EX
 LDFLAGS_SL
 DOCBOOKSTYLE'
@@ -1511,6 +1523,7 @@ Optional Packages:
   --with-wal-segsize=SEGSIZE
                           set WAL segment size in MB [16]
   --with-CC=CMD           set compiler (deprecated)
+  --with-icu              build with ICU support
   --with-tcl              build Tcl modules (PL/Tcl)
   --with-tclconfig=DIR    tclConfig.sh is in DIR
   --with-perl             build Perl modules (PL/Perl)
@@ -1546,6 +1559,13 @@ Some influential environment variables:
   CPPFLAGS    (Objective) C/C++ preprocessor flags, e.g. -I<include dir> if
               you have headers in a nonstandard directory <include dir>
   CPP         C preprocessor
+  PKG_CONFIG  path to pkg-config utility
+  PKG_CONFIG_PATH
+              directories to add to pkg-config's search path
+  PKG_CONFIG_LIBDIR
+              path overriding pkg-config's built-in search path
+  ICU_CFLAGS  C compiler flags for ICU, overriding pkg-config
+  ICU_LIBS    linker flags for ICU, overriding pkg-config
   LDFLAGS_EX  extra linker flags for linking executables only
   LDFLAGS_SL  extra linker flags for linking shared libraries only
   DOCBOOKSTYLE
@@ -5361,6 +5381,255 @@ fi
 $as_echo "$enable_thread_safety" >&6; }
 
 
+#
+# ICU
+#
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to build with ICU support" >&5
+$as_echo_n "checking whether to build with ICU support... " >&6; }
+
+
+
+# Check whether --with-icu was given.
+if test "${with_icu+set}" = set; then :
+  withval=$with_icu;
+  case $withval in
+    yes)
+
+$as_echo "#define USE_ICU 1" >>confdefs.h
+
+      ;;
+    no)
+      :
+      ;;
+    *)
+      as_fn_error $? "no argument expected for --with-icu option" "$LINENO" 5
+      ;;
+  esac
+
+else
+  with_icu=no
+
+fi
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $with_icu" >&5
+$as_echo "$with_icu" >&6; }
+
+
+if test "$with_icu" = yes; then
+
+
+
+
+
+
+
+if test "x$ac_cv_env_PKG_CONFIG_set" != "xset"; then
+       if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "${ac_tool_prefix}pkg-config", so it can be a program name with args.
+set dummy ${ac_tool_prefix}pkg-config; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_path_PKG_CONFIG+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  case $PKG_CONFIG in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_PKG_CONFIG="$PKG_CONFIG" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_path_PKG_CONFIG="$as_dir/$ac_word$ac_exec_ext"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+  ;;
+esac
+fi
+PKG_CONFIG=$ac_cv_path_PKG_CONFIG
+if test -n "$PKG_CONFIG"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $PKG_CONFIG" >&5
+$as_echo "$PKG_CONFIG" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_path_PKG_CONFIG"; then
+  ac_pt_PKG_CONFIG=$PKG_CONFIG
+  # Extract the first word of "pkg-config", so it can be a program name with args.
+set dummy pkg-config; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_path_ac_pt_PKG_CONFIG+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  case $ac_pt_PKG_CONFIG in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_ac_pt_PKG_CONFIG="$ac_pt_PKG_CONFIG" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_path_ac_pt_PKG_CONFIG="$as_dir/$ac_word$ac_exec_ext"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+  ;;
+esac
+fi
+ac_pt_PKG_CONFIG=$ac_cv_path_ac_pt_PKG_CONFIG
+if test -n "$ac_pt_PKG_CONFIG"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_pt_PKG_CONFIG" >&5
+$as_echo "$ac_pt_PKG_CONFIG" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+  if test "x$ac_pt_PKG_CONFIG" = x; then
+    PKG_CONFIG=""
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    PKG_CONFIG=$ac_pt_PKG_CONFIG
+  fi
+else
+  PKG_CONFIG="$ac_cv_path_PKG_CONFIG"
+fi
+
+fi
+if test -n "$PKG_CONFIG"; then
+       _pkg_min_version=0.9.0
+       { $as_echo "$as_me:${as_lineno-$LINENO}: checking pkg-config is at least version $_pkg_min_version" >&5
+$as_echo_n "checking pkg-config is at least version $_pkg_min_version... " >&6; }
+       if $PKG_CONFIG --atleast-pkgconfig-version $_pkg_min_version; then
+               { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+       else
+               { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+               PKG_CONFIG=""
+       fi
+fi
+
+pkg_failed=no
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for icu-uc icu-i18n" >&5
+$as_echo_n "checking for icu-uc icu-i18n... " >&6; }
+
+if test -n "$ICU_CFLAGS"; then
+    pkg_cv_ICU_CFLAGS="$ICU_CFLAGS"
+ elif test -n "$PKG_CONFIG"; then
+    if test -n "$PKG_CONFIG" && \
+    { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"icu-uc icu-i18n\""; } >&5
+  ($PKG_CONFIG --exists --print-errors "icu-uc icu-i18n") 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+  pkg_cv_ICU_CFLAGS=`$PKG_CONFIG --cflags "icu-uc icu-i18n" 2>/dev/null`
+                     test "x$?" != "x0" && pkg_failed=yes
+else
+  pkg_failed=yes
+fi
+ else
+    pkg_failed=untried
+fi
+if test -n "$ICU_LIBS"; then
+    pkg_cv_ICU_LIBS="$ICU_LIBS"
+ elif test -n "$PKG_CONFIG"; then
+    if test -n "$PKG_CONFIG" && \
+    { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"icu-uc icu-i18n\""; } >&5
+  ($PKG_CONFIG --exists --print-errors "icu-uc icu-i18n") 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+  pkg_cv_ICU_LIBS=`$PKG_CONFIG --libs "icu-uc icu-i18n" 2>/dev/null`
+                     test "x$?" != "x0" && pkg_failed=yes
+else
+  pkg_failed=yes
+fi
+ else
+    pkg_failed=untried
+fi
+
+
+
+if test $pkg_failed = yes; then
+        { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+
+if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then
+        _pkg_short_errors_supported=yes
+else
+        _pkg_short_errors_supported=no
+fi
+        if test $_pkg_short_errors_supported = yes; then
+               ICU_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "icu-uc icu-i18n" 2>&1`
+        else
+               ICU_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "icu-uc icu-i18n" 2>&1`
+        fi
+       # Put the nasty error message in config.log where it belongs
+       echo "$ICU_PKG_ERRORS" >&5
+
+       as_fn_error $? "Package requirements (icu-uc icu-i18n) were not met:
+
+$ICU_PKG_ERRORS
+
+Consider adjusting the PKG_CONFIG_PATH environment variable if you
+installed software in a non-standard prefix.
+
+Alternatively, you may set the environment variables ICU_CFLAGS
+and ICU_LIBS to avoid the need to call pkg-config.
+See the pkg-config man page for more details." "$LINENO" 5
+elif test $pkg_failed = untried; then
+        { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+       { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "The pkg-config script could not be found or is too old.  Make sure it
+is in your PATH or set the PKG_CONFIG environment variable to the full
+path to pkg-config.
+
+Alternatively, you may set the environment variables ICU_CFLAGS
+and ICU_LIBS to avoid the need to call pkg-config.
+See the pkg-config man page for more details.
+
+To get pkg-config, see <http://pkg-config.freedesktop.org/>.
+See \`config.log' for more details" "$LINENO" 5; }
+else
+       ICU_CFLAGS=$pkg_cv_ICU_CFLAGS
+       ICU_LIBS=$pkg_cv_ICU_LIBS
+        { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+
+fi
+fi
+
 #
 # Optionally build Tcl modules (PL/Tcl)
 #
 done
 
 
+if test "$with_icu" = yes; then
+  # ICU functions are macros, so we need to do this the long way.
+
+  # ucol_strcollUTF8() appeared in ICU 50.
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for ucol_strcollUTF8" >&5
+$as_echo_n "checking for ucol_strcollUTF8... " >&6; }
+if ${pgac_cv_func_ucol_strcollUTF8+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_save_CPPFLAGS=$CPPFLAGS
+CPPFLAGS="$ICU_CFLAGS $CPPFLAGS"
+ac_save_LIBS=$LIBS
+LIBS="$ICU_LIBS $LIBS"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <unicode/ucol.h>
+
+int
+main ()
+{
+ucol_strcollUTF8(NULL, NULL, 0, NULL, 0, NULL);
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  pgac_cv_func_ucol_strcollUTF8=yes
+else
+  pgac_cv_func_ucol_strcollUTF8=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+CPPFLAGS=$ac_save_CPPFLAGS
+LIBS=$ac_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_func_ucol_strcollUTF8" >&5
+$as_echo "$pgac_cv_func_ucol_strcollUTF8" >&6; }
+  if test "$pgac_cv_func_ucol_strcollUTF8" = yes ; then
+
+$as_echo "#define HAVE_UCOL_STRCOLLUTF8 1" >>confdefs.h
+
+  fi
+fi
+
 # Lastly, restore full LIBS list and check for readline/libedit symbols
 LIBS="$LIBS_including_readline"
 
index 7f234f543dae53ab975297e88cef077b8f99907f..6c7421417112bbb971f7076b7a0c43e09507d802 100644 (file)
@@ -613,6 +613,19 @@ fi
 AC_MSG_RESULT([$enable_thread_safety])
 AC_SUBST(enable_thread_safety)
 
+#
+# ICU
+#
+AC_MSG_CHECKING([whether to build with ICU support])
+PGAC_ARG_BOOL(with, icu, no, [build with ICU support],
+              [AC_DEFINE([USE_ICU], 1, [Define to build with ICU support. (--with-icu)])])
+AC_MSG_RESULT([$with_icu])
+AC_SUBST(with_icu)
+
+if test "$with_icu" = yes; then
+  PKG_CHECK_MODULES(ICU, icu-uc icu-i18n)
+fi
+
 #
 # Optionally build Tcl modules (PL/Tcl)
 #
@@ -1635,6 +1648,28 @@ fi
 AC_CHECK_FUNCS([strtoll strtoq], [break])
 AC_CHECK_FUNCS([strtoull strtouq], [break])
 
+if test "$with_icu" = yes; then
+  # ICU functions are macros, so we need to do this the long way.
+
+  # ucol_strcollUTF8() appeared in ICU 50.
+  AC_CACHE_CHECK([for ucol_strcollUTF8], [pgac_cv_func_ucol_strcollUTF8],
+[ac_save_CPPFLAGS=$CPPFLAGS
+CPPFLAGS="$ICU_CFLAGS $CPPFLAGS"
+ac_save_LIBS=$LIBS
+LIBS="$ICU_LIBS $LIBS"
+AC_LINK_IFELSE([AC_LANG_PROGRAM(
+[#include <unicode/ucol.h>
+],
+[ucol_strcollUTF8(NULL, NULL, 0, NULL, 0, NULL);])],
+[pgac_cv_func_ucol_strcollUTF8=yes],
+[pgac_cv_func_ucol_strcollUTF8=no])
+CPPFLAGS=$ac_save_CPPFLAGS
+LIBS=$ac_save_LIBS])
+  if test "$pgac_cv_func_ucol_strcollUTF8" = yes ; then
+    AC_DEFINE([HAVE_UCOL_STRCOLLUTF8], 1, [Define to 1 if you have the `ucol_strcollUTF8' function.])
+  fi
+fi
+
 # Lastly, restore full LIBS list and check for readline/libedit symbols
 LIBS="$LIBS_including_readline"
 
index 228ec7803189447e40c04355b4cb6117fbe74066..c531c73aac9f661cad9e3c03e0f16197932edfd4 100644 (file)
       <entry>Owner of the collation</entry>
      </row>
 
+     <row>
+      <entry><structfield>collprovider</structfield></entry>
+      <entry><type>char</type></entry>
+      <entry></entry>
+      <entry>Provider of the collation: <literal>d</literal> = database
+       default, <literal>c</literal> = libc, <literal>i</literal> = icu</entry>
+     </row>
+
      <row>
       <entry><structfield>collencoding</structfield></entry>
       <entry><type>int4</type></entry>
       <entry></entry>
       <entry><symbol>LC_CTYPE</> for this collation object</entry>
      </row>
+
+     <row>
+      <entry><structfield>collversion</structfield></entry>
+      <entry><type>text</type></entry>
+      <entry></entry>
+      <entry>
+       Provider-specific version of the collation.  This is recorded when the
+       collation is created and then checked when it is used, to detect
+       changes in the collation definition that could lead to data corruption.
+      </entry>
+     </row>
     </tbody>
    </tgroup>
   </table>
index 2aba0fc5282fc35263d50f58601f352f819b5d05..5c55f397f8ec238dbaee572c24d71f843cf50fd2 100644 (file)
@@ -500,20 +500,46 @@ SELECT * FROM test1 ORDER BY a || b COLLATE "fr_FR";
    <title>Managing Collations</title>
 
    <para>
-    A collation is an SQL schema object that maps an SQL name to
-    operating system locales.  In particular, it maps to a combination
-    of <symbol>LC_COLLATE</symbol> and <symbol>LC_CTYPE</symbol>.  (As
+    A collation is an SQL schema object that maps an SQL name to locales
+    provided by libraries installed in the operating system.  A collation
+    definition has a <firstterm>provider</firstterm> that specifies which
+    library supplies the locale data.  One standard provider name
+    is <literal>libc</literal>, which uses the locales provided by the
+    operating system C library.  These are the locales that most tools
+    provided by the operating system use.  Another provider
+    is <literal>icu</literal>, which uses the external
+    ICU<indexterm><primary>ICU</></> library.  Support for ICU has to be
+    configured when PostgreSQL is built.
+   </para>
+
+   <para>
+    A collation object provided by <literal>libc</literal> maps to a
+    combination of <symbol>LC_COLLATE</symbol> and <symbol>LC_CTYPE</symbol>
+    settings.  (As
     the name would suggest, the main purpose of a collation is to set
     <symbol>LC_COLLATE</symbol>, which controls the sort order.  But
     it is rarely necessary in practice to have an
     <symbol>LC_CTYPE</symbol> setting that is different from
     <symbol>LC_COLLATE</symbol>, so it is more convenient to collect
     these under one concept than to create another infrastructure for
-    setting <symbol>LC_CTYPE</symbol> per expression.)  Also, a collation
+    setting <symbol>LC_CTYPE</symbol> per expression.)  Also,
+    a <literal>libc</literal> collation
     is tied to a character set encoding (see <xref linkend="multibyte">).
     The same collation name may exist for different encodings.
    </para>
 
+   <para>
+    A collation provided by <literal>icu</literal> maps to a named collator
+    provided by the ICU library.  ICU does not support
+    separate <quote>collate</quote> and <quote>ctype</quote> settings, so they
+    are always the same.  Also, ICU collations are independent of the
+    encoding, so there is always only one ICU collation for a given name in a
+    database.
+   </para>
+
+   <sect3>
+    <title>Standard Collations</title>
+
    <para>
     On all platforms, the collations named <literal>default</>,
     <literal>C</>, and <literal>POSIX</> are available.  Additional
@@ -527,13 +553,37 @@ SELECT * FROM test1 ORDER BY a || b COLLATE "fr_FR";
     code byte values.
    </para>
 
+   <para>
+    Additionally, the SQL standard collation name <literal>ucs_basic</literal>
+    is available for encoding <literal>UTF8</literal>.  It is equivalent
+    to <literal>C</literal> and sorts by Unicode code point.
+   </para>
+  </sect3>
+
+  <sect3>
+   <title>Predefined Collations</title>
+
    <para>
     If the operating system provides support for using multiple locales
     within a single program (<function>newlocale</> and related functions),
+    or support for ICU is configured,
     then when a database cluster is initialized, <command>initdb</command>
     populates the system catalog <literal>pg_collation</literal> with
     collations based on all the locales it finds on the operating
-    system at the time.  For example, the operating system might
+    system at the time.
+   </para>
+
+   <para>
+    To inspect the currently available locales, use the query <literal>SELECT
+    * FROM pg_collation</literal>, or the command <command>\dOS+</command>
+    in <application>psql</application>.
+   </para>
+
+  <sect4>
+   <title>libc collations</title>
+
+   <para>
+    For example, the operating system might
     provide a locale named <literal>de_DE.utf8</literal>.
     <command>initdb</command> would then create a collation named
     <literal>de_DE.utf8</literal> for encoding <literal>UTF8</literal>
@@ -548,13 +598,14 @@ SELECT * FROM test1 ORDER BY a || b COLLATE "fr_FR";
    </para>
 
    <para>
-    In case a collation is needed that has different values for
-    <symbol>LC_COLLATE</symbol> and <symbol>LC_CTYPE</symbol>, a new
-    collation may be created using
-    the <xref linkend="sql-createcollation"> command.  That command
-    can also be used to create a new collation from an existing
-    collation, which can be useful to be able to use
-    operating-system-independent collation names in applications.
+    The default set of collations provided by <literal>libc</literal> map
+    directly to the locales installed in the operating system, which can be
+    listed using the command <literal>locale -a</literal>.  In case
+    a <literal>libc</literal> collation is needed that has different values
+    for <symbol>LC_COLLATE</symbol> and <symbol>LC_CTYPE</symbol>, or new
+    locales are installed in the operating system after the database system
+    was initialized, then a new collation may be created using
+    the <xref linkend="sql-createcollation"> command.
    </para>
 
    <para>
@@ -566,8 +617,8 @@ SELECT * FROM test1 ORDER BY a || b COLLATE "fr_FR";
     Use of the stripped collation names is recommended, since it will
     make one less thing you need to change if you decide to change to
     another database encoding.  Note however that the <literal>default</>,
-    <literal>C</>, and <literal>POSIX</> collations can be used
-    regardless of the database encoding.
+    <literal>C</>, and <literal>POSIX</> collations, as well as all collations
+    provided by ICU can be used regardless of the database encoding.
    </para>
 
    <para>
@@ -581,6 +632,104 @@ SELECT a COLLATE "C" &lt; b COLLATE "POSIX" FROM test1;
     collations have identical behaviors.  Mixing stripped and non-stripped
     collation names is therefore not recommended.
    </para>
+  </sect4>
+
+  <sect4>
+   <title>ICU collations</title>
+
+   <para>
+    Collations provided by ICU are created with names in BCP 47 language tag
+    format, with a <quote>private use</quote>
+    extension <literal>-x-icu</literal> appended, to distinguish them from
+    libc locales.  So <literal>de-x-icu</literal> would be an example.
+   </para>
+
+   <para>
+    With ICU, it is not sensible to enumerate all possible locale names.  ICU
+    uses a particular naming system for locales, but there are many more ways
+    to name a locale than there are actually distinct locales.  (In fact, any
+    string will be accepted as a locale name.)
+    See <ulink url="http://userguide.icu-project.org/locale"></ulink> for
+    information on ICU locale naming.  <command>initdb</command> uses the ICU
+    APIs to extract a set of locales with distinct collation rules to populate
+    the initial set of collations.  Here are some examples collations that
+    might be created:
+
+    <variablelist>
+     <varlistentry>
+      <term><literal>de-x-icu</literal></term>
+      <listitem>
+       <para>German collation, default variant</para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
+      <term><literal>de-u-co-phonebk-x-icu</literal></term>
+      <listitem>
+       <para>German collation, phone book variant</para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
+      <term><literal>de-AT-x-icu</literal></term>
+      <listitem>
+       <para>German collation for Austria, default variant</para>
+       <para>
+        (Note that as of this writing, there is no,
+        say, <literal>de-DE-x-icu</literal> or <literal>de-CH-x-icu</literal>,
+        because those are equivalent to <literal>de-x-icu</literal>.)
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
+      <term><literal>de-AT-u-co-phonebk-x-icu</literal></term>
+      <listitem>
+       <para>German collation for Austria, phone book variant</para>
+      </listitem>
+     </varlistentry>
+     <varlistentry>
+      <term><literal>und-x-icu</literal> (for <quote>undefined</quote>)</term>
+      <listitem>
+       <para>
+        ICU <quote>root</quote> collation.  Use this to get a reasonable
+        language-agnostic sort order.
+       </para>
+      </listitem>
+     </varlistentry>
+    </variablelist>
+   </para>
+
+   <para>
+    Some (less frequently used) encodings are not supported by ICU.  If the
+    database cluster was initialized with such an encoding, no ICU collations
+    will be predefined.
+   </para>
+   </sect4>
+   </sect3>
+
+   <sect3>
+   <title>Copying Collations</title>
+
+   <para>
+    The command <xref linkend="sql-createcollation"> can also be used to
+    create a new collation from an existing collation, which can be useful to
+    be able to use operating-system-independent collation names in
+    applications, create compatibility names, or use an ICU-provided collation
+    under a more readable name.  For example:
+<programlisting>
+CREATE COLLATION german FROM "de_DE";
+CREATE COLLATION french FROM "fr-x-icu";
+CREATE COLLATION "de-DE-x-icu" FROM "de-x-icu";
+</programlisting>
+   </para>
+
+   <para>
+    The standard and predefined collations are in the
+    schema <literal>pg_catalog</literal>, like all predefined objects.
+    User-defined collations should be created in user schemas.  This also
+    ensures that they are saved by <command>pg_dump</command>.
+   </para>
   </sect2>
  </sect1>
 
index 4dc30caccb63d422fcbdc7c8dc1fb33904ebd1c6..64f86ce6806ae41b5b0300c1e8f3c91b39a2c873 100644 (file)
@@ -19545,6 +19545,14 @@ postgres=# SELECT * FROM pg_walfile_name_offset(pg_stop_backup());
      </thead>
 
      <tbody>
+      <row>
+       <entry>
+        <indexterm><primary>pg_collation_actual_version</primary></indexterm>
+        <literal><function>pg_collation_actual_version(<type>oid</>)</function></literal>
+       </entry>
+       <entry><type>text</type></entry>
+       <entry>Return actual version of collation from operating system</entry>
+      </row>
       <row>
        <entry>
         <indexterm><primary>pg_import_system_collations</primary></indexterm>
@@ -19557,6 +19565,15 @@ postgres=# SELECT * FROM pg_walfile_name_offset(pg_stop_backup());
     </tgroup>
    </table>
 
+   <para>
+    <function>pg_collation_actual_version</function> returns the actual
+    version of the collation object as it is currently installed in the
+    operating system.  If this is different from the value
+    in <literal>pg_collation.collversion</literal>, then objects depending on
+    the collation might need to be rebuilt.  See also
+    <xref linkend="sql-altercollation">.
+   </para>
+
    <para>
     <function>pg_import_system_collations</> populates the system
     catalog <literal>pg_collation</literal> with collations based on all the
index f8a222e6379ca699eb6f652c4426dce2c9136e26..39774a058ac1afce52501fc1f8fdabb1bc8efe41 100644 (file)
@@ -766,6 +766,21 @@ su - postgres
        </listitem>
       </varlistentry>
 
+      <varlistentry>
+       <term><option>--with-icu</option></term>
+       <listitem>
+        <para>
+         Build with support for
+         the <productname>ICU</productname><indexterm><primary>ICU</></>
+         library.  This requires the <productname>ICU4C</productname> package
+         as well
+         as <productname>pkg-config</productname><indexterm><primary>pkg-config</></>
+         to be installed.  The minimum required version
+         of <productname>ICU4C</productname> is currently 4.6.
+        </para>
+       </listitem>
+      </varlistentry>
+
       <varlistentry>
        <term><option>--with-openssl</option>
        <indexterm>
index 306def4a15a0425942878580ec11da19f591db74..82e69fe2d277e9fb58e3347b02bf4f33a342b0ca 100644 (file)
@@ -967,7 +967,8 @@ ERROR:  could not serialize access due to read/write dependencies among transact
         </para>
 
         <para>
-         Acquired by <command>CREATE TRIGGER</command> and many forms of
+         Acquired by <command>CREATE COLLATION</command>,
+         <command>CREATE TRIGGER</command>, and many forms of
          <command>ALTER TABLE</command> (see <xref linkend="SQL-ALTERTABLE">).
         </para>
        </listitem>
index 6708c7e10e896964ecba85c5cb7a8a41bcef4d12..bf934ce75f1f37325a17023c56077a6145ab71fc 100644 (file)
@@ -21,6 +21,8 @@ PostgreSQL documentation
 
  <refsynopsisdiv>
 <synopsis>
+ALTER COLLATION <replaceable>name</replaceable> REFRESH VERSION
+
 ALTER COLLATION <replaceable>name</replaceable> RENAME TO <replaceable>new_name</replaceable>
 ALTER COLLATION <replaceable>name</replaceable> OWNER TO { <replaceable>new_owner</replaceable> | CURRENT_USER | SESSION_USER }
 ALTER COLLATION <replaceable>name</replaceable> SET SCHEMA <replaceable>new_schema</replaceable>
@@ -85,9 +87,62 @@ ALTER COLLATION <replaceable>name</replaceable> SET SCHEMA <replaceable>new_sche
      </para>
     </listitem>
    </varlistentry>
+
+   <varlistentry>
+    <term><literal>REFRESH VERSION</literal></term>
+    <listitem>
+     <para>
+      Updated the collation version.
+      See <xref linkend="sql-altercollation-notes"> below.
+     </para>
+    </listitem>
+   </varlistentry>
   </variablelist>
  </refsect1>
 
+ <refsect1 id="sql-altercollation-notes">
+  <title>Notes</title>
+
+  <para>
+   When using collations provided by the ICU library, the ICU-specific version
+   of the collator is recorded in the system catalog when the collation object
+   is created.  When the collation is then used, the current version is
+   checked against the recorded version, and a warning is issued when there is
+   a mismatch, for example:
+<screen>
+WARNING:  ICU collator version mismatch
+DETAIL:  The database was created using version 1.2.3.4, the library provides version 2.3.4.5.
+HINT:  Rebuild all objects affected by this collation and run ALTER COLLATION pg_catalog."xx-x-icu" REFRESH VERSION, or build PostgreSQL with the right version of ICU.
+</screen>
+   A change in collation definitions can lead to corrupt indexes and other
+   problems where the database system relies on stored objects having a
+   certain sort order.  Generally, this should be avoided, but it can happen
+   in legitimate circumstances, such as when
+   using <command>pg_upgrade</command> to upgrade to server binaries linked
+   with a newer version of ICU.  When this happens, all objects depending on
+   the collation should be rebuilt, for example,
+   using <command>REINDEX</command>.  When that is done, the collation version
+   can be refreshed using the command <literal>ALTER COLLATION ... REFRESH
+   VERSION</literal>.  This will update the system catalog to record the
+   current collator version and will make the warning go away.  Note that this
+   does not actually check whether all affected objects have been rebuilt
+   correctly.
+  </para>
+
+  <para>
+   The following query can be used to identify all collations in the current
+   database that need to be refreshed and the objects that depend on them:
+<programlisting><![CDATA[
+SELECT pg_describe_object(refclassid, refobjid, refobjsubid) AS "Collation",
+       pg_describe_object(classid, objid, objsubid) AS "Object"
+  FROM pg_depend d JOIN pg_collation c
+       ON refclassid = 'pg_collation'::regclass AND refobjid = c.oid
+  WHERE c.collversion <> pg_collation_actual_version(c.oid)
+  ORDER BY 1, 2;
+]]></programlisting>
+  </para>
+ </refsect1>
+
  <refsect1>
   <title>Examples</title>
 
index c09e5bd6d4079b6d4c8961bb6a207426a6e3b52d..47de9a09b69edcf383e2197f76e18b654ce6e15c 100644 (file)
@@ -21,7 +21,9 @@
 CREATE COLLATION [ IF NOT EXISTS ] <replaceable>name</replaceable> (
     [ LOCALE = <replaceable>locale</replaceable>, ]
     [ LC_COLLATE = <replaceable>lc_collate</replaceable>, ]
-    [ LC_CTYPE = <replaceable>lc_ctype</replaceable> ]
+    [ LC_CTYPE = <replaceable>lc_ctype</replaceable>, ]
+    [ PROVIDER = <replaceable>provider</replaceable>, ]
+    [ VERSION = <replaceable>version</replaceable> ]
 )
 CREATE COLLATION [ IF NOT EXISTS ] <replaceable>name</replaceable> FROM <replaceable>existing_collation</replaceable>
 </synopsis>
@@ -113,6 +115,39 @@ CREATE COLLATION [ IF NOT EXISTS ] <replaceable>name</replaceable> FROM <replace
      </listitem>
     </varlistentry>
 
+    <varlistentry>
+     <term><replaceable>provider</replaceable></term>
+
+     <listitem>
+      <para>
+       Specifies the provider to use for locale services associated with this
+       collation.  Possible values
+       are: <literal>icu</literal>,<indexterm><primary>ICU</></> <literal>libc</literal>.
+       The available choices depend on the operating system and build options.
+      </para>
+     </listitem>
+    </varlistentry>
+
+    <varlistentry>
+     <term><replaceable>version</replaceable></term>
+
+     <listitem>
+      <para>
+       Specifies the version string to store with the collation.  Normally,
+       this should be omitted, which will cause the version to be computed
+       from the actual version of the collation as provided by the operating
+       system.  This option is intended to be used
+       by <command>pg_upgrade</command> for copying the version from an
+       existing installation.
+      </para>
+
+      <para>
+       See also <xref linkend="sql-altercollation"> for how to handle
+       collation version mismatches.
+      </para>
+     </listitem>
+    </varlistentry>
+
     <varlistentry>
      <term><replaceable>existing_collation</replaceable></term>
 
index 8e1d6e3bd4439f79e9811a1e30cf890bd64e79ee..4acf7d2f060812d95fb3ee1a3751230b12b744c3 100644 (file)
@@ -179,6 +179,7 @@ pgxsdir = $(pkglibdir)/pgxs
 #
 # Records the choice of the various --enable-xxx and --with-xxx options.
 
+with_icu       = @with_icu@
 with_perl      = @with_perl@
 with_python    = @with_python@
 with_tcl       = @with_tcl@
@@ -208,6 +209,9 @@ python_version              = @python_version@
 
 krb_srvtab = @krb_srvtab@
 
+ICU_CFLAGS             = @ICU_CFLAGS@
+ICU_LIBS               = @ICU_LIBS@
+
 TCLSH                  = @TCLSH@
 TCL_LIBS               = @TCL_LIBS@
 TCL_LIB_SPEC           = @TCL_LIB_SPEC@
index 7a0bbb29424d145f3ec5b67a299f7dfcd8d8054d..fffb0d95bad3ab27c1f06f3c7fbd8157b0f5f82c 100644 (file)
@@ -58,7 +58,7 @@ ifneq ($(PORTNAME), win32)
 ifneq ($(PORTNAME), aix)
 
 postgres: $(OBJS)
-       $(CC) $(CFLAGS) $(LDFLAGS) $(LDFLAGS_EX) $(export_dynamic) $(call expand_subsys,$^) $(LIBS) -o $@
+       $(CC) $(CFLAGS) $(LDFLAGS) $(LDFLAGS_EX) $(export_dynamic) $(call expand_subsys,$^) $(LIBS) $(ICU_LIBS) -o $@
 
 endif
 endif
index 65b6051c0d197cdca5323fa5017a7439c99322d9..ede920955d75f1176a8bc3dff3e2d5ada1a14d68 100644 (file)
@@ -27,6 +27,7 @@
 #include "mb/pg_wchar.h"
 #include "utils/builtins.h"
 #include "utils/fmgroids.h"
+#include "utils/pg_locale.h"
 #include "utils/rel.h"
 #include "utils/syscache.h"
 #include "utils/tqual.h"
 Oid
 CollationCreate(const char *collname, Oid collnamespace,
                                Oid collowner,
+                               char collprovider,
                                int32 collencoding,
                                const char *collcollate, const char *collctype,
+                               const char *collversion,
                                bool if_not_exists)
 {
        Relation        rel;
@@ -78,29 +81,47 @@ CollationCreate(const char *collname, Oid collnamespace,
                {
                        ereport(NOTICE,
                                (errcode(ERRCODE_DUPLICATE_OBJECT),
-                                errmsg("collation \"%s\" for encoding \"%s\" already exists, skipping",
-                                               collname, pg_encoding_to_char(collencoding))));
+                                collencoding == -1
+                                ? errmsg("collation \"%s\" already exists, skipping",
+                                                 collname)
+                                : errmsg("collation \"%s\" for encoding \"%s\" already exists, skipping",
+                                                 collname, pg_encoding_to_char(collencoding))));
                        return InvalidOid;
                }
                else
                        ereport(ERROR,
                                        (errcode(ERRCODE_DUPLICATE_OBJECT),
-                                        errmsg("collation \"%s\" for encoding \"%s\" already exists",
-                                                       collname, pg_encoding_to_char(collencoding))));
+                                        collencoding == -1
+                                        ? errmsg("collation \"%s\" already exists",
+                                                         collname)
+                                        : errmsg("collation \"%s\" for encoding \"%s\" already exists",
+                                                         collname, pg_encoding_to_char(collencoding))));
        }
 
+       /* open pg_collation; see below about the lock level */
+       rel = heap_open(CollationRelationId, ShareRowExclusiveLock);
+
        /*
-        * Also forbid matching an any-encoding entry.  This test of course is not
-        * backed up by the unique index, but it's not a problem since we don't
-        * support adding any-encoding entries after initdb.
+        * Also forbid a specific-encoding collation shadowing an any-encoding
+        * collation, or an any-encoding collation being shadowed (see
+        * get_collation_name()).  This test is not backed up by the unique index,
+        * so we take a ShareRowExclusiveLock earlier, to protect against
+        * concurrent changes fooling this check.
         */
-       if (SearchSysCacheExists3(COLLNAMEENCNSP,
-                                                         PointerGetDatum(collname),
-                                                         Int32GetDatum(-1),
-                                                         ObjectIdGetDatum(collnamespace)))
+       if ((collencoding == -1 &&
+                SearchSysCacheExists3(COLLNAMEENCNSP,
+                                                          PointerGetDatum(collname),
+                                                          Int32GetDatum(GetDatabaseEncoding()),
+                                                          ObjectIdGetDatum(collnamespace))) ||
+               (collencoding != -1 &&
+                SearchSysCacheExists3(COLLNAMEENCNSP,
+                                                          PointerGetDatum(collname),
+                                                          Int32GetDatum(-1),
+                                                          ObjectIdGetDatum(collnamespace))))
        {
                if (if_not_exists)
                {
+                       heap_close(rel, NoLock);
                        ereport(NOTICE,
                                (errcode(ERRCODE_DUPLICATE_OBJECT),
                                 errmsg("collation \"%s\" already exists, skipping",
@@ -114,8 +135,6 @@ CollationCreate(const char *collname, Oid collnamespace,
                                                collname)));
        }
 
-       /* open pg_collation */
-       rel = heap_open(CollationRelationId, RowExclusiveLock);
        tupDesc = RelationGetDescr(rel);
 
        /* form a tuple */
@@ -125,11 +144,16 @@ CollationCreate(const char *collname, Oid collnamespace,
        values[Anum_pg_collation_collname - 1] = NameGetDatum(&name_name);
        values[Anum_pg_collation_collnamespace - 1] = ObjectIdGetDatum(collnamespace);
        values[Anum_pg_collation_collowner - 1] = ObjectIdGetDatum(collowner);
+       values[Anum_pg_collation_collprovider - 1] = CharGetDatum(collprovider);
        values[Anum_pg_collation_collencoding - 1] = Int32GetDatum(collencoding);
        namestrcpy(&name_collate, collcollate);
        values[Anum_pg_collation_collcollate - 1] = NameGetDatum(&name_collate);
        namestrcpy(&name_ctype, collctype);
        values[Anum_pg_collation_collctype - 1] = NameGetDatum(&name_ctype);
+       if (collversion)
+               values[Anum_pg_collation_collversion - 1] = CStringGetTextDatum(collversion);
+       else
+               nulls[Anum_pg_collation_collversion - 1] = true;
 
        tup = heap_form_tuple(tupDesc, values, nulls);
 
@@ -159,7 +183,7 @@ CollationCreate(const char *collname, Oid collnamespace,
        InvokeObjectPostCreateHook(CollationRelationId, oid, 0);
 
        heap_freetuple(tup);
-       heap_close(rel, RowExclusiveLock);
+       heap_close(rel, NoLock);
 
        return oid;
 }
index 919cfc6a0679ac80418aea5ce871278dc09b3c94..835cb263db3aa1220dc7bbc126277662ddd9dab5 100644 (file)
  */
 #include "postgres.h"
 
+#include "access/heapam.h"
 #include "access/htup_details.h"
 #include "access/xact.h"
 #include "catalog/dependency.h"
 #include "catalog/indexing.h"
 #include "catalog/namespace.h"
+#include "catalog/objectaccess.h"
 #include "catalog/pg_collation.h"
 #include "catalog/pg_collation_fn.h"
 #include "commands/alter.h"
 #include "commands/collationcmds.h"
+#include "commands/comment.h"
 #include "commands/dbcommands.h"
 #include "commands/defrem.h"
 #include "mb/pg_wchar.h"
@@ -33,6 +36,7 @@
 #include "utils/rel.h"
 #include "utils/syscache.h"
 
+
 /*
  * CREATE COLLATION
  */
@@ -47,8 +51,14 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e
        DefElem    *localeEl = NULL;
        DefElem    *lccollateEl = NULL;
        DefElem    *lcctypeEl = NULL;
+       DefElem    *providerEl = NULL;
+       DefElem    *versionEl = NULL;
        char       *collcollate = NULL;
        char       *collctype = NULL;
+       char       *collproviderstr = NULL;
+       int                     collencoding;
+       char            collprovider = 0;
+       char       *collversion = NULL;
        Oid                     newoid;
        ObjectAddress address;
 
@@ -72,6 +82,10 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e
                        defelp = &lccollateEl;
                else if (pg_strcasecmp(defel->defname, "lc_ctype") == 0)
                        defelp = &lcctypeEl;
+               else if (pg_strcasecmp(defel->defname, "provider") == 0)
+                       defelp = &providerEl;
+               else if (pg_strcasecmp(defel->defname, "version") == 0)
+                       defelp = &versionEl;
                else
                {
                        ereport(ERROR,
@@ -103,6 +117,7 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e
 
                collcollate = pstrdup(NameStr(((Form_pg_collation) GETSTRUCT(tp))->collcollate));
                collctype = pstrdup(NameStr(((Form_pg_collation) GETSTRUCT(tp))->collctype));
+               collprovider = ((Form_pg_collation) GETSTRUCT(tp))->collprovider;
 
                ReleaseSysCache(tp);
        }
@@ -119,6 +134,27 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e
        if (lcctypeEl)
                collctype = defGetString(lcctypeEl);
 
+       if (providerEl)
+               collproviderstr = defGetString(providerEl);
+
+       if (versionEl)
+               collversion = defGetString(versionEl);
+
+       if (collproviderstr)
+       {
+               if (pg_strcasecmp(collproviderstr, "icu") == 0)
+                       collprovider = COLLPROVIDER_ICU;
+               else if (pg_strcasecmp(collproviderstr, "libc") == 0)
+                       collprovider = COLLPROVIDER_LIBC;
+               else
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+                                        errmsg("unrecognized collation provider: %s",
+                                                       collproviderstr)));
+       }
+       else if (!fromEl)
+               collprovider = COLLPROVIDER_LIBC;
+
        if (!collcollate)
                ereport(ERROR,
                                (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
@@ -129,14 +165,25 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e
                                (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
                                 errmsg("parameter \"lc_ctype\" must be specified")));
 
-       check_encoding_locale_matches(GetDatabaseEncoding(), collcollate, collctype);
+       if (collprovider == COLLPROVIDER_ICU)
+               collencoding = -1;
+       else
+       {
+               collencoding = GetDatabaseEncoding();
+               check_encoding_locale_matches(collencoding, collcollate, collctype);
+       }
+
+       if (!collversion)
+               collversion = get_collation_actual_version(collprovider, collcollate);
 
        newoid = CollationCreate(collName,
                                                         collNamespace,
                                                         GetUserId(),
-                                                        GetDatabaseEncoding(),
+                                                        collprovider,
+                                                        collencoding,
                                                         collcollate,
                                                         collctype,
+                                                        collversion,
                                                         if_not_exists);
 
        if (!OidIsValid(newoid))
@@ -182,16 +229,118 @@ IsThereCollationInNamespace(const char *collname, Oid nspOid)
                                                collname, get_namespace_name(nspOid))));
 }
 
+/*
+ * ALTER COLLATION
+ */
+ObjectAddress
+AlterCollation(AlterCollationStmt *stmt)
+{
+       Relation        rel;
+       Oid                     collOid;
+       HeapTuple       tup;
+       Form_pg_collation collForm;
+       Datum           collversion;
+       bool            isnull;
+       char       *oldversion;
+       char       *newversion;
+       ObjectAddress address;
+
+       rel = heap_open(CollationRelationId, RowExclusiveLock);
+       collOid = get_collation_oid(stmt->collname, false);
+
+       if (!pg_collation_ownercheck(collOid, GetUserId()))
+               aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_COLLATION,
+                                          NameListToString(stmt->collname));
+
+       tup = SearchSysCacheCopy1(COLLOID, ObjectIdGetDatum(collOid));
+       if (!HeapTupleIsValid(tup))
+               elog(ERROR, "cache lookup failed for collation %u", collOid);
+
+       collForm = (Form_pg_collation) GETSTRUCT(tup);
+       collversion = SysCacheGetAttr(COLLOID, tup, Anum_pg_collation_collversion,
+                                                                 &isnull);
+       oldversion = isnull ? NULL : TextDatumGetCString(collversion);
+
+       newversion = get_collation_actual_version(collForm->collprovider, NameStr(collForm->collcollate));
+
+       /* cannot change from NULL to non-NULL or vice versa */
+       if ((!oldversion && newversion) || (oldversion && !newversion))
+               elog(ERROR, "invalid collation version change");
+       else if (oldversion && newversion && strcmp(newversion, oldversion) != 0)
+       {
+               bool        nulls[Natts_pg_collation];
+               bool        replaces[Natts_pg_collation];
+               Datum       values[Natts_pg_collation];
+
+               ereport(NOTICE,
+                               (errmsg("changing version from %s to %s",
+                                               oldversion, newversion)));
+
+               memset(values, 0, sizeof(values));
+               memset(nulls, false, sizeof(nulls));
+               memset(replaces, false, sizeof(replaces));
+
+               values[Anum_pg_collation_collversion - 1] = CStringGetTextDatum(newversion);
+               replaces[Anum_pg_collation_collversion - 1] = true;
+
+               tup = heap_modify_tuple(tup, RelationGetDescr(rel),
+                                                               values, nulls, replaces);
+       }
+       else
+               ereport(NOTICE,
+                               (errmsg("version has not changed")));
+
+       CatalogTupleUpdate(rel, &tup->t_self, tup);
+
+       InvokeObjectPostAlterHook(CollationRelationId, collOid, 0);
+
+       ObjectAddressSet(address, CollationRelationId, collOid);
+
+       heap_freetuple(tup);
+       heap_close(rel, NoLock);
+
+       return address;
+}
+
+
+Datum
+pg_collation_actual_version(PG_FUNCTION_ARGS)
+{
+       Oid                     collid = PG_GETARG_OID(0);
+       HeapTuple       tp;
+       char       *collcollate;
+       char            collprovider;
+       char       *version;
+
+       tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
+       if (!HeapTupleIsValid(tp))
+               ereport(ERROR,
+                               (errcode(ERRCODE_UNDEFINED_OBJECT),
+                                errmsg("collation with OID %u does not exist", collid)));
+
+       collcollate = pstrdup(NameStr(((Form_pg_collation) GETSTRUCT(tp))->collcollate));
+       collprovider = ((Form_pg_collation) GETSTRUCT(tp))->collprovider;
+
+       ReleaseSysCache(tp);
+
+       version = get_collation_actual_version(collprovider, collcollate);
+
+       if (version)
+               PG_RETURN_TEXT_P(cstring_to_text(version));
+       else
+               PG_RETURN_NULL();
+}
+
 
 /*
- * "Normalize" a locale name, stripping off encoding tags such as
+ * "Normalize" a libc locale name, stripping off encoding tags such as
  * ".utf8" (e.g., "en_US.utf8" -> "en_US", but "br_FR.iso885915@euro"
  * -> "br_FR@euro").  Return true if a new, different name was
  * generated.
  */
 pg_attribute_unused()
 static bool
-normalize_locale_name(char *new, const char *old)
+normalize_libc_locale_name(char *new, const char *old)
 {
        char       *n = new;
        const char *o = old;
@@ -219,6 +368,46 @@ normalize_locale_name(char *new, const char *old)
 }
 
 
+#ifdef USE_ICU
+static char *
+get_icu_language_tag(const char *localename)
+{
+       char            buf[ULOC_FULLNAME_CAPACITY];
+       UErrorCode      status;
+
+       status = U_ZERO_ERROR;
+       uloc_toLanguageTag(localename, buf, sizeof(buf), TRUE, &status);
+       if (U_FAILURE(status))
+               ereport(ERROR,
+                               (errmsg("could not convert locale name \"%s\" to language tag: %s",
+                                               localename, u_errorName(status))));
+
+       return pstrdup(buf);
+}
+
+
+static char *
+get_icu_locale_comment(const char *localename)
+{
+       UErrorCode      status;
+       UChar           displayname[128];
+       int32           len_uchar;
+       char       *result;
+
+       status = U_ZERO_ERROR;
+       len_uchar = uloc_getDisplayName(localename, "en", &displayname[0], sizeof(displayname), &status);
+       if (U_FAILURE(status))
+               ereport(ERROR,
+                               (errmsg("could get display name for locale \"%s\": %s",
+                                               localename, u_errorName(status))));
+
+       icu_from_uchar(&result, displayname, len_uchar);
+
+       return result;
+}
+#endif /* USE_ICU */
+
+
 Datum
 pg_import_system_collations(PG_FUNCTION_ARGS)
 {
@@ -302,8 +491,10 @@ pg_import_system_collations(PG_FUNCTION_ARGS)
 
                count++;
 
-               CollationCreate(localebuf, nspid, GetUserId(), enc,
-                                               localebuf, localebuf, if_not_exists);
+               CollationCreate(localebuf, nspid, GetUserId(), COLLPROVIDER_LIBC, enc,
+                                               localebuf, localebuf,
+                                               get_collation_actual_version(COLLPROVIDER_LIBC, localebuf),
+                                               if_not_exists);
 
                CommandCounterIncrement();
 
@@ -316,7 +507,7 @@ pg_import_system_collations(PG_FUNCTION_ARGS)
                 * "locale -a" output.  So save up the aliases and try to add them
                 * after we've read all the output.
                 */
-               if (normalize_locale_name(alias, localebuf))
+               if (normalize_libc_locale_name(alias, localebuf))
                {
                        aliaslist = lappend(aliaslist, pstrdup(alias));
                        localelist = lappend(localelist, pstrdup(localebuf));
@@ -333,8 +524,10 @@ pg_import_system_collations(PG_FUNCTION_ARGS)
                char       *locale = (char *) lfirst(lcl);
                int                     enc = lfirst_int(lce);
 
-               CollationCreate(alias, nspid, GetUserId(), enc,
-                                               locale, locale, true);
+               CollationCreate(alias, nspid, GetUserId(), COLLPROVIDER_LIBC, enc,
+                                               locale, locale,
+                                               get_collation_actual_version(COLLPROVIDER_LIBC, locale),
+                                               true);
                CommandCounterIncrement();
        }
 
@@ -343,5 +536,82 @@ pg_import_system_collations(PG_FUNCTION_ARGS)
                                (errmsg("no usable system locales were found")));
 #endif   /* not HAVE_LOCALE_T && not WIN32 */
 
+#ifdef USE_ICU
+       if (!is_encoding_supported_by_icu(GetDatabaseEncoding()))
+       {
+               ereport(NOTICE,
+                               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                                errmsg("encoding \"%s\" not supported by ICU",
+                                               pg_encoding_to_char(GetDatabaseEncoding()))));
+       }
+       else
+       {
+               int i;
+
+               /*
+                * Start the loop at -1 to sneak in the root locale without too much
+                * code duplication.
+                */
+               for (i = -1; i < ucol_countAvailable(); i++)
+               {
+                       const char *name;
+                       char       *langtag;
+                       const char *collcollate;
+                       UEnumeration *en;
+                       UErrorCode      status;
+                       const char *val;
+                       Oid                     collid;
+
+                       if (i == -1)
+                               name = "";  /* ICU root locale */
+                       else
+                               name = ucol_getAvailable(i);
+
+                       langtag = get_icu_language_tag(name);
+                       collcollate = U_ICU_VERSION_MAJOR_NUM >= 54 ? langtag : name;
+                       collid = CollationCreate(psprintf("%s-x-icu", langtag),
+                                                                        nspid, GetUserId(), COLLPROVIDER_ICU, -1,
+                                                                        collcollate, collcollate,
+                                                                        get_collation_actual_version(COLLPROVIDER_ICU, collcollate),
+                                                                        if_not_exists);
+
+                       CreateComments(collid, CollationRelationId, 0,
+                                                  get_icu_locale_comment(name));
+
+                       /*
+                        * Add keyword variants
+                        */
+                       status = U_ZERO_ERROR;
+                       en = ucol_getKeywordValuesForLocale("collation", name, TRUE, &status);
+                       if (U_FAILURE(status))
+                               ereport(ERROR,
+                                               (errmsg("could not get keyword values for locale \"%s\": %s",
+                                                               name, u_errorName(status))));
+
+                       status = U_ZERO_ERROR;
+                       uenum_reset(en, &status);
+                       while ((val = uenum_next(en, NULL, &status)))
+                       {
+                               char *localeid = psprintf("%s@collation=%s", name, val);
+
+                               langtag =  get_icu_language_tag(localeid);
+                               collcollate = U_ICU_VERSION_MAJOR_NUM >= 54 ? langtag : localeid;
+                               collid = CollationCreate(psprintf("%s-x-icu", langtag),
+                                                                                nspid, GetUserId(), COLLPROVIDER_ICU, -1,
+                                                                                collcollate, collcollate,
+                                                                                get_collation_actual_version(COLLPROVIDER_ICU, collcollate),
+                                                                                if_not_exists);
+                               CreateComments(collid, CollationRelationId, 0,
+                                                          get_icu_locale_comment(localeid));
+                       }
+                       if (U_FAILURE(status))
+                               ereport(ERROR,
+                                               (errmsg("could not get keyword values for locale \"%s\": %s",
+                                                               name, u_errorName(status))));
+                       uenum_close(en);
+               }
+       }
+#endif
+
        PG_RETURN_VOID();
 }
index 5d599dbd0ca2519436702a934e2fb666aa53aaf3..0b57543bc4a1672690dd7aa24d37fce12888a8ec 100644 (file)
@@ -8,6 +8,8 @@
 # this directory and SUBDIRS to subdirectories containing more things
 # to build.
 
+override CPPFLAGS := $(CPPFLAGS) $(ICU_CFLAGS)
+
 ifdef PARTIAL_LINKING
 # old style: linking using SUBSYS.o
 subsysfilename = SUBSYS.o
index 93d4eb207f1f8f748e3f72dd3667e6f5cbb4d0aa..93bda42715310b260b741766ffad51efb2e790f0 100644 (file)
@@ -3046,6 +3046,16 @@ _copyAlterTableCmd(const AlterTableCmd *from)
        return newnode;
 }
 
+static AlterCollationStmt *
+_copyAlterCollationStmt(const AlterCollationStmt *from)
+{
+       AlterCollationStmt *newnode = makeNode(AlterCollationStmt);
+
+       COPY_NODE_FIELD(collname);
+
+       return newnode;
+}
+
 static AlterDomainStmt *
 _copyAlterDomainStmt(const AlterDomainStmt *from)
 {
@@ -4986,6 +4996,9 @@ copyObject(const void *from)
                case T_AlterTableCmd:
                        retval = _copyAlterTableCmd(from);
                        break;
+               case T_AlterCollationStmt:
+                       retval = _copyAlterCollationStmt(from);
+                       break;
                case T_AlterDomainStmt:
                        retval = _copyAlterDomainStmt(from);
                        break;
index 6b40b56f71ee35f989f8e07cd32d934927b895a6..0d12636d92cf86a2255af7bb4b890a45a15d238f 100644 (file)
@@ -1095,6 +1095,14 @@ _equalAlterTableCmd(const AlterTableCmd *a, const AlterTableCmd *b)
        return true;
 }
 
+static bool
+_equalAlterCollationStmt(const AlterCollationStmt *a, const AlterCollationStmt *b)
+{
+       COMPARE_NODE_FIELD(collname);
+
+       return true;
+}
+
 static bool
 _equalAlterDomainStmt(const AlterDomainStmt *a, const AlterDomainStmt *b)
 {
@@ -3174,6 +3182,9 @@ equal(const void *a, const void *b)
                case T_AlterTableCmd:
                        retval = _equalAlterTableCmd(a, b);
                        break;
+               case T_AlterCollationStmt:
+                       retval = _equalAlterCollationStmt(a, b);
+                       break;
                case T_AlterDomainStmt:
                        retval = _equalAlterDomainStmt(a, b);
                        break;
index 50126baacf6431e379b53d5d27d856d1bcec3333..82844a0399d7ad6da9fd7df51cb32877eef9d2a4 100644 (file)
@@ -244,7 +244,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 }
 
 %type <node>   stmt schema_stmt
-               AlterEventTrigStmt
+               AlterEventTrigStmt AlterCollationStmt
                AlterDatabaseStmt AlterDatabaseSetStmt AlterDomainStmt AlterEnumStmt
                AlterFdwStmt AlterForeignServerStmt AlterGroupStmt
                AlterObjectDependsStmt AlterObjectSchemaStmt AlterOwnerStmt
@@ -812,6 +812,7 @@ stmtmulti:  stmtmulti ';' stmt
 
 stmt :
                        AlterEventTrigStmt
+                       | AlterCollationStmt
                        | AlterDatabaseStmt
                        | AlterDatabaseSetStmt
                        | AlterDefaultPrivilegesStmt
@@ -9705,6 +9706,21 @@ DropdbStmt: DROP DATABASE database_name
                ;
 
 
+/*****************************************************************************
+ *
+ *             ALTER COLLATION
+ *
+ *****************************************************************************/
+
+AlterCollationStmt: ALTER COLLATION any_name REFRESH VERSION_P
+                               {
+                                       AlterCollationStmt *n = makeNode(AlterCollationStmt);
+                                       n->collname = $3;
+                                       $$ = (Node *)n;
+                               }
+               ;
+
+
 /*****************************************************************************
  *
  *             ALTER SYSTEM
index 0121cbb2ada73d0d8afa1a4814a8725546cb15bc..4bdcb4fd6ae52b74e498c0b87a5b95d13d74c486 100644 (file)
@@ -68,7 +68,8 @@ typedef enum
        PG_REGEX_LOCALE_WIDE,           /* Use <wctype.h> functions */
        PG_REGEX_LOCALE_1BYTE,          /* Use <ctype.h> functions */
        PG_REGEX_LOCALE_WIDE_L,         /* Use locale_t <wctype.h> functions */
-       PG_REGEX_LOCALE_1BYTE_L         /* Use locale_t <ctype.h> functions */
+       PG_REGEX_LOCALE_1BYTE_L,        /* Use locale_t <ctype.h> functions */
+       PG_REGEX_LOCALE_ICU                     /* Use ICU uchar.h functions */
 } PG_Locale_Strategy;
 
 static PG_Locale_Strategy pg_regex_strategy;
@@ -262,6 +263,11 @@ pg_set_regex_collation(Oid collation)
                                         errhint("Use the COLLATE clause to set the collation explicitly.")));
                }
 
+#ifdef USE_ICU
+               if (pg_regex_locale && pg_regex_locale->provider == COLLPROVIDER_ICU)
+                       pg_regex_strategy = PG_REGEX_LOCALE_ICU;
+               else
+#endif
 #ifdef USE_WIDE_UPPER_LOWER
                if (GetDatabaseEncoding() == PG_UTF8)
                {
@@ -303,13 +309,18 @@ pg_wc_isdigit(pg_wchar c)
                case PG_REGEX_LOCALE_WIDE_L:
 #if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER)
                        if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-                               return iswdigit_l((wint_t) c, pg_regex_locale);
+                               return iswdigit_l((wint_t) c, pg_regex_locale->info.lt);
 #endif
                        /* FALL THRU */
                case PG_REGEX_LOCALE_1BYTE_L:
 #ifdef HAVE_LOCALE_T
                        return (c <= (pg_wchar) UCHAR_MAX &&
-                                       isdigit_l((unsigned char) c, pg_regex_locale));
+                                       isdigit_l((unsigned char) c, pg_regex_locale->info.lt));
+#endif
+                       break;
+               case PG_REGEX_LOCALE_ICU:
+#ifdef USE_ICU
+                       return u_isdigit(c);
 #endif
                        break;
        }
@@ -336,13 +347,18 @@ pg_wc_isalpha(pg_wchar c)
                case PG_REGEX_LOCALE_WIDE_L:
 #if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER)
                        if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-                               return iswalpha_l((wint_t) c, pg_regex_locale);
+                               return iswalpha_l((wint_t) c, pg_regex_locale->info.lt);
 #endif
                        /* FALL THRU */
                case PG_REGEX_LOCALE_1BYTE_L:
 #ifdef HAVE_LOCALE_T
                        return (c <= (pg_wchar) UCHAR_MAX &&
-                                       isalpha_l((unsigned char) c, pg_regex_locale));
+                                       isalpha_l((unsigned char) c, pg_regex_locale->info.lt));
+#endif
+                       break;
+               case PG_REGEX_LOCALE_ICU:
+#ifdef USE_ICU
+                       return u_isalpha(c);
 #endif
                        break;
        }
@@ -369,13 +385,18 @@ pg_wc_isalnum(pg_wchar c)
                case PG_REGEX_LOCALE_WIDE_L:
 #if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER)
                        if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-                               return iswalnum_l((wint_t) c, pg_regex_locale);
+                               return iswalnum_l((wint_t) c, pg_regex_locale->info.lt);
 #endif
                        /* FALL THRU */
                case PG_REGEX_LOCALE_1BYTE_L:
 #ifdef HAVE_LOCALE_T
                        return (c <= (pg_wchar) UCHAR_MAX &&
-                                       isalnum_l((unsigned char) c, pg_regex_locale));
+                                       isalnum_l((unsigned char) c, pg_regex_locale->info.lt));
+#endif
+                       break;
+               case PG_REGEX_LOCALE_ICU:
+#ifdef USE_ICU
+                       return u_isalnum(c);
 #endif
                        break;
        }
@@ -402,13 +423,18 @@ pg_wc_isupper(pg_wchar c)
                case PG_REGEX_LOCALE_WIDE_L:
 #if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER)
                        if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-                               return iswupper_l((wint_t) c, pg_regex_locale);
+                               return iswupper_l((wint_t) c, pg_regex_locale->info.lt);
 #endif
                        /* FALL THRU */
                case PG_REGEX_LOCALE_1BYTE_L:
 #ifdef HAVE_LOCALE_T
                        return (c <= (pg_wchar) UCHAR_MAX &&
-                                       isupper_l((unsigned char) c, pg_regex_locale));
+                                       isupper_l((unsigned char) c, pg_regex_locale->info.lt));
+#endif
+                       break;
+               case PG_REGEX_LOCALE_ICU:
+#ifdef USE_ICU
+                       return u_isupper(c);
 #endif
                        break;
        }
@@ -435,13 +461,18 @@ pg_wc_islower(pg_wchar c)
                case PG_REGEX_LOCALE_WIDE_L:
 #if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER)
                        if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-                               return iswlower_l((wint_t) c, pg_regex_locale);
+                               return iswlower_l((wint_t) c, pg_regex_locale->info.lt);
 #endif
                        /* FALL THRU */
                case PG_REGEX_LOCALE_1BYTE_L:
 #ifdef HAVE_LOCALE_T
                        return (c <= (pg_wchar) UCHAR_MAX &&
-                                       islower_l((unsigned char) c, pg_regex_locale));
+                                       islower_l((unsigned char) c, pg_regex_locale->info.lt));
+#endif
+                       break;
+               case PG_REGEX_LOCALE_ICU:
+#ifdef USE_ICU
+                       return u_islower(c);
 #endif
                        break;
        }
@@ -468,13 +499,18 @@ pg_wc_isgraph(pg_wchar c)
                case PG_REGEX_LOCALE_WIDE_L:
 #if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER)
                        if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-                               return iswgraph_l((wint_t) c, pg_regex_locale);
+                               return iswgraph_l((wint_t) c, pg_regex_locale->info.lt);
 #endif
                        /* FALL THRU */
                case PG_REGEX_LOCALE_1BYTE_L:
 #ifdef HAVE_LOCALE_T
                        return (c <= (pg_wchar) UCHAR_MAX &&
-                                       isgraph_l((unsigned char) c, pg_regex_locale));
+                                       isgraph_l((unsigned char) c, pg_regex_locale->info.lt));
+#endif
+                       break;
+               case PG_REGEX_LOCALE_ICU:
+#ifdef USE_ICU
+                       return u_isgraph(c);
 #endif
                        break;
        }
@@ -501,13 +537,18 @@ pg_wc_isprint(pg_wchar c)
                case PG_REGEX_LOCALE_WIDE_L:
 #if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER)
                        if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-                               return iswprint_l((wint_t) c, pg_regex_locale);
+                               return iswprint_l((wint_t) c, pg_regex_locale->info.lt);
 #endif
                        /* FALL THRU */
                case PG_REGEX_LOCALE_1BYTE_L:
 #ifdef HAVE_LOCALE_T
                        return (c <= (pg_wchar) UCHAR_MAX &&
-                                       isprint_l((unsigned char) c, pg_regex_locale));
+                                       isprint_l((unsigned char) c, pg_regex_locale->info.lt));
+#endif
+                       break;
+               case PG_REGEX_LOCALE_ICU:
+#ifdef USE_ICU
+                       return u_isprint(c);
 #endif
                        break;
        }
@@ -534,13 +575,18 @@ pg_wc_ispunct(pg_wchar c)
                case PG_REGEX_LOCALE_WIDE_L:
 #if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER)
                        if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-                               return iswpunct_l((wint_t) c, pg_regex_locale);
+                               return iswpunct_l((wint_t) c, pg_regex_locale->info.lt);
 #endif
                        /* FALL THRU */
                case PG_REGEX_LOCALE_1BYTE_L:
 #ifdef HAVE_LOCALE_T
                        return (c <= (pg_wchar) UCHAR_MAX &&
-                                       ispunct_l((unsigned char) c, pg_regex_locale));
+                                       ispunct_l((unsigned char) c, pg_regex_locale->info.lt));
+#endif
+                       break;
+               case PG_REGEX_LOCALE_ICU:
+#ifdef USE_ICU
+                       return u_ispunct(c);
 #endif
                        break;
        }
@@ -567,13 +613,18 @@ pg_wc_isspace(pg_wchar c)
                case PG_REGEX_LOCALE_WIDE_L:
 #if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER)
                        if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-                               return iswspace_l((wint_t) c, pg_regex_locale);
+                               return iswspace_l((wint_t) c, pg_regex_locale->info.lt);
 #endif
                        /* FALL THRU */
                case PG_REGEX_LOCALE_1BYTE_L:
 #ifdef HAVE_LOCALE_T
                        return (c <= (pg_wchar) UCHAR_MAX &&
-                                       isspace_l((unsigned char) c, pg_regex_locale));
+                                       isspace_l((unsigned char) c, pg_regex_locale->info.lt));
+#endif
+                       break;
+               case PG_REGEX_LOCALE_ICU:
+#ifdef USE_ICU
+                       return u_isspace(c);
 #endif
                        break;
        }
@@ -608,15 +659,20 @@ pg_wc_toupper(pg_wchar c)
                case PG_REGEX_LOCALE_WIDE_L:
 #if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER)
                        if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-                               return towupper_l((wint_t) c, pg_regex_locale);
+                               return towupper_l((wint_t) c, pg_regex_locale->info.lt);
 #endif
                        /* FALL THRU */
                case PG_REGEX_LOCALE_1BYTE_L:
 #ifdef HAVE_LOCALE_T
                        if (c <= (pg_wchar) UCHAR_MAX)
-                               return toupper_l((unsigned char) c, pg_regex_locale);
+                               return toupper_l((unsigned char) c, pg_regex_locale->info.lt);
 #endif
                        return c;
+               case PG_REGEX_LOCALE_ICU:
+#ifdef USE_ICU
+                       return u_toupper(c);
+#endif
+                       break;
        }
        return 0;                                       /* can't get here, but keep compiler quiet */
 }
@@ -649,15 +705,20 @@ pg_wc_tolower(pg_wchar c)
                case PG_REGEX_LOCALE_WIDE_L:
 #if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER)
                        if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-                               return towlower_l((wint_t) c, pg_regex_locale);
+                               return towlower_l((wint_t) c, pg_regex_locale->info.lt);
 #endif
                        /* FALL THRU */
                case PG_REGEX_LOCALE_1BYTE_L:
 #ifdef HAVE_LOCALE_T
                        if (c <= (pg_wchar) UCHAR_MAX)
-                               return tolower_l((unsigned char) c, pg_regex_locale);
+                               return tolower_l((unsigned char) c, pg_regex_locale->info.lt);
 #endif
                        return c;
+               case PG_REGEX_LOCALE_ICU:
+#ifdef USE_ICU
+                       return u_tolower(c);
+#endif
+                       break;
        }
        return 0;                                       /* can't get here, but keep compiler quiet */
 }
@@ -808,6 +869,9 @@ pg_ctype_get_cache(pg_wc_probefunc probefunc, int cclasscode)
                        max_chr = (pg_wchar) MAX_SIMPLE_CHR;
 #endif
                        break;
+               case PG_REGEX_LOCALE_ICU:
+                       max_chr = (pg_wchar) MAX_SIMPLE_CHR;
+                       break;
                default:
                        max_chr = 0;            /* can't get here, but keep compiler quiet */
                        break;
index 20b527340543b3003caf540c25af764fa674d148..c8d20fffeafc4ce301136815cbe11f0acca29e2e 100644 (file)
@@ -1623,6 +1623,10 @@ ProcessUtilitySlow(ParseState *pstate,
                                commandCollected = true;
                                break;
 
+                       case T_AlterCollationStmt:
+                               address = AlterCollation((AlterCollationStmt *) parsetree);
+                               break;
+
                        default:
                                elog(ERROR, "unrecognized node type: %d",
                                         (int) nodeTag(parsetree));
@@ -2673,6 +2677,10 @@ CreateCommandTag(Node *parsetree)
                        tag = "DROP SUBSCRIPTION";
                        break;
 
+               case T_AlterCollationStmt:
+                       tag = "ALTER COLLATION";
+                       break;
+
                case T_PrepareStmt:
                        tag = "PREPARE";
                        break;
index c16bfbca9338a5ac85a05aefb7f8efbff97735ea..0566abd314de711d22a51964ecba0a47211101d4 100644 (file)
 #include <wctype.h>
 #endif
 
+#ifdef USE_ICU
+#include <unicode/ustring.h>
+#endif
+
 #include "catalog/pg_collation.h"
 #include "mb/pg_wchar.h"
 #include "utils/builtins.h"
@@ -1443,6 +1447,42 @@ str_numth(char *dest, char *num, int type)
  *                     upper/lower/initcap functions
  *****************************************************************************/
 
+#ifdef USE_ICU
+static int32_t
+icu_convert_case(int32_t (*func)(UChar *, int32_t, const UChar *, int32_t, const char *, UErrorCode *),
+                                pg_locale_t mylocale, UChar **buff_dest, UChar *buff_source, int32_t len_source)
+{
+       UErrorCode      status;
+       int32_t         len_dest;
+
+       len_dest = len_source;  /* try first with same length */
+       *buff_dest = palloc(len_dest * sizeof(**buff_dest));
+       status = U_ZERO_ERROR;
+       len_dest = func(*buff_dest, len_dest, buff_source, len_source, mylocale->info.icu.locale, &status);
+       if (status == U_BUFFER_OVERFLOW_ERROR)
+       {
+               /* try again with adjusted length */
+               pfree(buff_dest);
+               buff_dest = palloc(len_dest * sizeof(**buff_dest));
+               status = U_ZERO_ERROR;
+               len_dest = func(*buff_dest, len_dest, buff_source, len_source, mylocale->info.icu.locale, &status);
+       }
+       if (U_FAILURE(status))
+               ereport(ERROR,
+                               (errmsg("case conversion failed: %s", u_errorName(status))));
+       return len_dest;
+}
+
+static int32_t
+u_strToTitle_default_BI(UChar *dest, int32_t destCapacity,
+                                               const UChar *src, int32_t srcLength,
+                                               const char *locale,
+                                               UErrorCode *pErrorCode)
+{
+       return u_strToTitle(dest, destCapacity, src, srcLength, NULL, locale, pErrorCode);
+}
+#endif
+
 /*
  * If the system provides the needed functions for wide-character manipulation
  * (which are all standardized by C99), then we implement upper/lower/initcap
@@ -1479,12 +1519,9 @@ str_tolower(const char *buff, size_t nbytes, Oid collid)
                result = asc_tolower(buff, nbytes);
        }
 #ifdef USE_WIDE_UPPER_LOWER
-       else if (pg_database_encoding_max_length() > 1)
+       else
        {
                pg_locale_t mylocale = 0;
-               wchar_t    *workspace;
-               size_t          curr_char;
-               size_t          result_size;
 
                if (collid != DEFAULT_COLLATION_OID)
                {
@@ -1502,77 +1539,79 @@ str_tolower(const char *buff, size_t nbytes, Oid collid)
                        mylocale = pg_newlocale_from_collation(collid);
                }
 
-               /* Overflow paranoia */
-               if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
-                       ereport(ERROR,
-                                       (errcode(ERRCODE_OUT_OF_MEMORY),
-                                        errmsg("out of memory")));
+#ifdef USE_ICU
+               if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
+               {
+                       int32_t         len_uchar;
+                       int32_t         len_conv;
+                       UChar      *buff_uchar;
+                       UChar      *buff_conv;
+
+                       len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
+                       len_conv = icu_convert_case(u_strToLower, mylocale, &buff_conv, buff_uchar, len_uchar);
+                       icu_from_uchar(&result, buff_conv, len_conv);
+               }
+               else
+#endif
+               {
+                       if (pg_database_encoding_max_length() > 1)
+                       {
+                               wchar_t    *workspace;
+                               size_t          curr_char;
+                               size_t          result_size;
 
-               /* Output workspace cannot have more codes than input bytes */
-               workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
+                               /* Overflow paranoia */
+                               if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
+                                       ereport(ERROR,
+                                                       (errcode(ERRCODE_OUT_OF_MEMORY),
+                                                        errmsg("out of memory")));
 
-               char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
+                               /* Output workspace cannot have more codes than input bytes */
+                               workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
 
-               for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
-               {
+                               char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
+
+                               for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
+                               {
 #ifdef HAVE_LOCALE_T
-                       if (mylocale)
-                               workspace[curr_char] = towlower_l(workspace[curr_char], mylocale);
-                       else
+                                       if (mylocale)
+                                               workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt);
+                                       else
 #endif
-                               workspace[curr_char] = towlower(workspace[curr_char]);
-               }
+                                               workspace[curr_char] = towlower(workspace[curr_char]);
+                               }
 
-               /* Make result large enough; case change might change number of bytes */
-               result_size = curr_char * pg_database_encoding_max_length() + 1;
-               result = palloc(result_size);
+                               /* Make result large enough; case change might change number of bytes */
+                               result_size = curr_char * pg_database_encoding_max_length() + 1;
+                               result = palloc(result_size);
 
-               wchar2char(result, workspace, result_size, mylocale);
-               pfree(workspace);
-       }
+                               wchar2char(result, workspace, result_size, mylocale);
+                               pfree(workspace);
+                       }
 #endif   /* USE_WIDE_UPPER_LOWER */
-       else
-       {
-#ifdef HAVE_LOCALE_T
-               pg_locale_t mylocale = 0;
-#endif
-               char       *p;
-
-               if (collid != DEFAULT_COLLATION_OID)
-               {
-                       if (!OidIsValid(collid))
+                       else
                        {
-                               /*
-                                * This typically means that the parser could not resolve a
-                                * conflict of implicit collations, so report it that way.
-                                */
-                               ereport(ERROR,
-                                               (errcode(ERRCODE_INDETERMINATE_COLLATION),
-                                                errmsg("could not determine which collation to use for lower() function"),
-                                                errhint("Use the COLLATE clause to set the collation explicitly.")));
-                       }
-#ifdef HAVE_LOCALE_T
-                       mylocale = pg_newlocale_from_collation(collid);
-#endif
-               }
+                               char       *p;
 
-               result = pnstrdup(buff, nbytes);
+                               result = pnstrdup(buff, nbytes);
 
-               /*
-                * Note: we assume that tolower_l() will not be so broken as to need
-                * an isupper_l() guard test.  When using the default collation, we
-                * apply the traditional Postgres behavior that forces ASCII-style
-                * treatment of I/i, but in non-default collations you get exactly
-                * what the collation says.
-                */
-               for (p = result; *p; p++)
-               {
+                               /*
+                                * Note: we assume that tolower_l() will not be so broken as to need
+                                * an isupper_l() guard test.  When using the default collation, we
+                                * apply the traditional Postgres behavior that forces ASCII-style
+                                * treatment of I/i, but in non-default collations you get exactly
+                                * what the collation says.
+                                */
+                               for (p = result; *p; p++)
+                               {
 #ifdef HAVE_LOCALE_T
-                       if (mylocale)
-                               *p = tolower_l((unsigned char) *p, mylocale);
-                       else
+                                       if (mylocale)
+                                               *p = tolower_l((unsigned char) *p, mylocale->info.lt);
+                                       else
 #endif
-                               *p = pg_tolower((unsigned char) *p);
+                                               *p = pg_tolower((unsigned char) *p);
+                               }
+                       }
                }
        }
 
@@ -1599,12 +1638,9 @@ str_toupper(const char *buff, size_t nbytes, Oid collid)
                result = asc_toupper(buff, nbytes);
        }
 #ifdef USE_WIDE_UPPER_LOWER
-       else if (pg_database_encoding_max_length() > 1)
+       else
        {
                pg_locale_t mylocale = 0;
-               wchar_t    *workspace;
-               size_t          curr_char;
-               size_t          result_size;
 
                if (collid != DEFAULT_COLLATION_OID)
                {
@@ -1622,77 +1658,78 @@ str_toupper(const char *buff, size_t nbytes, Oid collid)
                        mylocale = pg_newlocale_from_collation(collid);
                }
 
-               /* Overflow paranoia */
-               if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
-                       ereport(ERROR,
-                                       (errcode(ERRCODE_OUT_OF_MEMORY),
-                                        errmsg("out of memory")));
+#ifdef USE_ICU
+               if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
+               {
+                       int32_t         len_uchar, len_conv;
+                       UChar      *buff_uchar;
+                       UChar      *buff_conv;
 
-               /* Output workspace cannot have more codes than input bytes */
-               workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
+                       len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
+                       len_conv = icu_convert_case(u_strToUpper, mylocale, &buff_conv, buff_uchar, len_uchar);
+                       icu_from_uchar(&result, buff_conv, len_conv);
+               }
+               else
+#endif
+               {
+                       if (pg_database_encoding_max_length() > 1)
+                       {
+                               wchar_t    *workspace;
+                               size_t          curr_char;
+                               size_t          result_size;
 
-               char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
+                               /* Overflow paranoia */
+                               if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
+                                       ereport(ERROR,
+                                                       (errcode(ERRCODE_OUT_OF_MEMORY),
+                                                        errmsg("out of memory")));
 
-               for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
-               {
-#ifdef HAVE_LOCALE_T
-                       if (mylocale)
-                               workspace[curr_char] = towupper_l(workspace[curr_char], mylocale);
-                       else
-#endif
-                               workspace[curr_char] = towupper(workspace[curr_char]);
-               }
+                               /* Output workspace cannot have more codes than input bytes */
+                               workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
 
-               /* Make result large enough; case change might change number of bytes */
-               result_size = curr_char * pg_database_encoding_max_length() + 1;
-               result = palloc(result_size);
+                               char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
 
-               wchar2char(result, workspace, result_size, mylocale);
-               pfree(workspace);
-       }
-#endif   /* USE_WIDE_UPPER_LOWER */
-       else
-       {
+                               for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
+                               {
 #ifdef HAVE_LOCALE_T
-               pg_locale_t mylocale = 0;
+                                       if (mylocale)
+                                               workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt);
+                                       else
 #endif
-               char       *p;
+                                               workspace[curr_char] = towupper(workspace[curr_char]);
+                               }
 
-               if (collid != DEFAULT_COLLATION_OID)
-               {
-                       if (!OidIsValid(collid))
-                       {
-                               /*
-                                * This typically means that the parser could not resolve a
-                                * conflict of implicit collations, so report it that way.
-                                */
-                               ereport(ERROR,
-                                               (errcode(ERRCODE_INDETERMINATE_COLLATION),
-                                                errmsg("could not determine which collation to use for upper() function"),
-                                                errhint("Use the COLLATE clause to set the collation explicitly.")));
+                               /* Make result large enough; case change might change number of bytes */
+                               result_size = curr_char * pg_database_encoding_max_length() + 1;
+                               result = palloc(result_size);
+
+                               wchar2char(result, workspace, result_size, mylocale);
+                               pfree(workspace);
                        }
-#ifdef HAVE_LOCALE_T
-                       mylocale = pg_newlocale_from_collation(collid);
-#endif
-               }
+#endif   /* USE_WIDE_UPPER_LOWER */
+                       else
+                       {
+                               char       *p;
 
-               result = pnstrdup(buff, nbytes);
+                               result = pnstrdup(buff, nbytes);
 
-               /*
-                * Note: we assume that toupper_l() will not be so broken as to need
-                * an islower_l() guard test.  When using the default collation, we
-                * apply the traditional Postgres behavior that forces ASCII-style
-                * treatment of I/i, but in non-default collations you get exactly
-                * what the collation says.
-                */
-               for (p = result; *p; p++)
-               {
+                               /*
+                                * Note: we assume that toupper_l() will not be so broken as to need
+                                * an islower_l() guard test.  When using the default collation, we
+                                * apply the traditional Postgres behavior that forces ASCII-style
+                                * treatment of I/i, but in non-default collations you get exactly
+                                * what the collation says.
+                                */
+                               for (p = result; *p; p++)
+                               {
 #ifdef HAVE_LOCALE_T
-                       if (mylocale)
-                               *p = toupper_l((unsigned char) *p, mylocale);
-                       else
+                                       if (mylocale)
+                                               *p = toupper_l((unsigned char) *p, mylocale->info.lt);
+                                       else
 #endif
-                               *p = pg_toupper((unsigned char) *p);
+                                               *p = pg_toupper((unsigned char) *p);
+                               }
+                       }
                }
        }
 
@@ -1720,12 +1757,9 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
                result = asc_initcap(buff, nbytes);
        }
 #ifdef USE_WIDE_UPPER_LOWER
-       else if (pg_database_encoding_max_length() > 1)
+       else
        {
                pg_locale_t mylocale = 0;
-               wchar_t    *workspace;
-               size_t          curr_char;
-               size_t          result_size;
 
                if (collid != DEFAULT_COLLATION_OID)
                {
@@ -1743,100 +1777,101 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
                        mylocale = pg_newlocale_from_collation(collid);
                }
 
-               /* Overflow paranoia */
-               if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
-                       ereport(ERROR,
-                                       (errcode(ERRCODE_OUT_OF_MEMORY),
-                                        errmsg("out of memory")));
-
-               /* Output workspace cannot have more codes than input bytes */
-               workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
-
-               char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
-
-               for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
+#ifdef USE_ICU
+               if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
                {
-#ifdef HAVE_LOCALE_T
-                       if (mylocale)
-                       {
-                               if (wasalnum)
-                                       workspace[curr_char] = towlower_l(workspace[curr_char], mylocale);
-                               else
-                                       workspace[curr_char] = towupper_l(workspace[curr_char], mylocale);
-                               wasalnum = iswalnum_l(workspace[curr_char], mylocale);
-                       }
-                       else
+                       int32_t         len_uchar, len_conv;
+                       UChar      *buff_uchar;
+                       UChar      *buff_conv;
+
+                       len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
+                       len_conv = icu_convert_case(u_strToTitle_default_BI, mylocale, &buff_conv, buff_uchar, len_uchar);
+                       icu_from_uchar(&result, buff_conv, len_conv);
+               }
+               else
 #endif
+               {
+                       if (pg_database_encoding_max_length() > 1)
                        {
-                               if (wasalnum)
-                                       workspace[curr_char] = towlower(workspace[curr_char]);
-                               else
-                                       workspace[curr_char] = towupper(workspace[curr_char]);
-                               wasalnum = iswalnum(workspace[curr_char]);
-                       }
-               }
+                               wchar_t    *workspace;
+                               size_t          curr_char;
+                               size_t          result_size;
 
-               /* Make result large enough; case change might change number of bytes */
-               result_size = curr_char * pg_database_encoding_max_length() + 1;
-               result = palloc(result_size);
+                               /* Overflow paranoia */
+                               if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
+                                       ereport(ERROR,
+                                                       (errcode(ERRCODE_OUT_OF_MEMORY),
+                                                        errmsg("out of memory")));
 
-               wchar2char(result, workspace, result_size, mylocale);
-               pfree(workspace);
-       }
-#endif   /* USE_WIDE_UPPER_LOWER */
-       else
-       {
-#ifdef HAVE_LOCALE_T
-               pg_locale_t mylocale = 0;
-#endif
-               char       *p;
+                               /* Output workspace cannot have more codes than input bytes */
+                               workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
 
-               if (collid != DEFAULT_COLLATION_OID)
-               {
-                       if (!OidIsValid(collid))
-                       {
-                               /*
-                                * This typically means that the parser could not resolve a
-                                * conflict of implicit collations, so report it that way.
-                                */
-                               ereport(ERROR,
-                                               (errcode(ERRCODE_INDETERMINATE_COLLATION),
-                                                errmsg("could not determine which collation to use for initcap() function"),
-                                                errhint("Use the COLLATE clause to set the collation explicitly.")));
-                       }
+                               char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
+
+                               for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
+                               {
 #ifdef HAVE_LOCALE_T
-                       mylocale = pg_newlocale_from_collation(collid);
+                                       if (mylocale)
+                                       {
+                                               if (wasalnum)
+                                                       workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt);
+                                               else
+                                                       workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt);
+                                               wasalnum = iswalnum_l(workspace[curr_char], mylocale->info.lt);
+                                       }
+                                       else
 #endif
-               }
+                                       {
+                                               if (wasalnum)
+                                                       workspace[curr_char] = towlower(workspace[curr_char]);
+                                               else
+                                                       workspace[curr_char] = towupper(workspace[curr_char]);
+                                               wasalnum = iswalnum(workspace[curr_char]);
+                                       }
+                               }
 
-               result = pnstrdup(buff, nbytes);
+                               /* Make result large enough; case change might change number of bytes */
+                               result_size = curr_char * pg_database_encoding_max_length() + 1;
+                               result = palloc(result_size);
 
-               /*
-                * Note: we assume that toupper_l()/tolower_l() will not be so broken
-                * as to need guard tests.  When using the default collation, we apply
-                * the traditional Postgres behavior that forces ASCII-style treatment
-                * of I/i, but in non-default collations you get exactly what the
-                * collation says.
-                */
-               for (p = result; *p; p++)
-               {
-#ifdef HAVE_LOCALE_T
-                       if (mylocale)
-                       {
-                               if (wasalnum)
-                                       *p = tolower_l((unsigned char) *p, mylocale);
-                               else
-                                       *p = toupper_l((unsigned char) *p, mylocale);
-                               wasalnum = isalnum_l((unsigned char) *p, mylocale);
+                               wchar2char(result, workspace, result_size, mylocale);
+                               pfree(workspace);
                        }
+#endif   /* USE_WIDE_UPPER_LOWER */
                        else
-#endif
                        {
-                               if (wasalnum)
-                                       *p = pg_tolower((unsigned char) *p);
-                               else
-                                       *p = pg_toupper((unsigned char) *p);
-                               wasalnum = isalnum((unsigned char) *p);
+                               char       *p;
+
+                               result = pnstrdup(buff, nbytes);
+
+                               /*
+                                * Note: we assume that toupper_l()/tolower_l() will not be so broken
+                                * as to need guard tests.  When using the default collation, we apply
+                                * the traditional Postgres behavior that forces ASCII-style treatment
+                                * of I/i, but in non-default collations you get exactly what the
+                                * collation says.
+                                */
+                               for (p = result; *p; p++)
+                               {
+#ifdef HAVE_LOCALE_T
+                                       if (mylocale)
+                                       {
+                                               if (wasalnum)
+                                                       *p = tolower_l((unsigned char) *p, mylocale->info.lt);
+                                               else
+                                                       *p = toupper_l((unsigned char) *p, mylocale->info.lt);
+                                               wasalnum = isalnum_l((unsigned char) *p, mylocale->info.lt);
+                                       }
+                                       else
+#endif
+                                       {
+                                               if (wasalnum)
+                                                       *p = pg_tolower((unsigned char) *p);
+                                               else
+                                                       *p = pg_toupper((unsigned char) *p);
+                                               wasalnum = isalnum((unsigned char) *p);
+                                       }
+                               }
                        }
                }
        }
index 8d9d285fb55d51321d3ca86c603c39691042b2ce..1f683ccd0f7acc905d9f1b2c32dcd130ade566c5 100644 (file)
@@ -96,7 +96,7 @@ SB_lower_char(unsigned char c, pg_locale_t locale, bool locale_is_c)
                return pg_ascii_tolower(c);
 #ifdef HAVE_LOCALE_T
        else if (locale)
-               return tolower_l(c, locale);
+               return tolower_l(c, locale->info.lt);
 #endif
        else
                return pg_tolower(c);
@@ -165,14 +165,36 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation)
                           *p;
        int                     slen,
                                plen;
+       pg_locale_t locale = 0;
+       bool            locale_is_c = false;
+
+       if (lc_ctype_is_c(collation))
+               locale_is_c = true;
+       else if (collation != DEFAULT_COLLATION_OID)
+       {
+               if (!OidIsValid(collation))
+               {
+                       /*
+                        * This typically means that the parser could not resolve a
+                        * conflict of implicit collations, so report it that way.
+                        */
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_INDETERMINATE_COLLATION),
+                                        errmsg("could not determine which collation to use for ILIKE"),
+                                        errhint("Use the COLLATE clause to set the collation explicitly.")));
+               }
+               locale = pg_newlocale_from_collation(collation);
+       }
 
        /*
         * For efficiency reasons, in the single byte case we don't call lower()
         * on the pattern and text, but instead call SB_lower_char on each
-        * character.  In the multi-byte case we don't have much choice :-(
+        * character.  In the multi-byte case we don't have much choice :-(.
+        * Also, ICU does not support single-character case folding, so we go the
+        * long way.
         */
 
-       if (pg_database_encoding_max_length() > 1)
+       if (pg_database_encoding_max_length() > 1 || locale->provider == COLLPROVIDER_ICU)
        {
                /* lower's result is never packed, so OK to use old macros here */
                pat = DatumGetTextPP(DirectFunctionCall1Coll(lower, collation,
@@ -190,31 +212,6 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation)
        }
        else
        {
-               /*
-                * Here we need to prepare locale information for SB_lower_char. This
-                * should match the methods used in str_tolower().
-                */
-               pg_locale_t locale = 0;
-               bool            locale_is_c = false;
-
-               if (lc_ctype_is_c(collation))
-                       locale_is_c = true;
-               else if (collation != DEFAULT_COLLATION_OID)
-               {
-                       if (!OidIsValid(collation))
-                       {
-                               /*
-                                * This typically means that the parser could not resolve a
-                                * conflict of implicit collations, so report it that way.
-                                */
-                               ereport(ERROR,
-                                               (errcode(ERRCODE_INDETERMINATE_COLLATION),
-                                                errmsg("could not determine which collation to use for ILIKE"),
-                                                errhint("Use the COLLATE clause to set the collation explicitly.")));
-                       }
-                       locale = pg_newlocale_from_collation(collation);
-               }
-
                p = VARDATA_ANY(pat);
                plen = VARSIZE_ANY_EXHDR(pat);
                s = VARDATA_ANY(str);
index ab197025f8183be4d302f41182ccf3c62f5bc9f2..2a2c9bc504683abdcc495a7f32fc521272f7a908 100644 (file)
 #include "catalog/pg_collation.h"
 #include "catalog/pg_control.h"
 #include "mb/pg_wchar.h"
+#include "utils/builtins.h"
 #include "utils/hsearch.h"
+#include "utils/lsyscache.h"
 #include "utils/memutils.h"
 #include "utils/pg_locale.h"
 #include "utils/syscache.h"
 
+#ifdef USE_ICU
+#include <unicode/ucnv.h>
+#endif
+
 #ifdef WIN32
 /*
  * This Windows file defines StrNCpy. We don't need it here, so we undefine
@@ -1272,12 +1278,13 @@ pg_newlocale_from_collation(Oid collid)
        if (cache_entry->locale == 0)
        {
                /* We haven't computed this yet in this session, so do it */
-#ifdef HAVE_LOCALE_T
                HeapTuple       tp;
                Form_pg_collation collform;
                const char *collcollate;
-               const char *collctype;
-               locale_t        result;
+               const char *collctype pg_attribute_unused();
+               pg_locale_t     result;
+               Datum           collversion;
+               bool            isnull;
 
                tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
                if (!HeapTupleIsValid(tp))
@@ -1287,61 +1294,230 @@ pg_newlocale_from_collation(Oid collid)
                collcollate = NameStr(collform->collcollate);
                collctype = NameStr(collform->collctype);
 
-               if (strcmp(collcollate, collctype) == 0)
+               result = malloc(sizeof(* result));
+               memset(result, 0, sizeof(* result));
+               result->provider = collform->collprovider;
+
+               if (collform->collprovider == COLLPROVIDER_LIBC)
                {
-                       /* Normal case where they're the same */
+#ifdef HAVE_LOCALE_T
+                       locale_t        loc;
+
+                       if (strcmp(collcollate, collctype) == 0)
+                       {
+                               /* Normal case where they're the same */
 #ifndef WIN32
-                       result = newlocale(LC_COLLATE_MASK | LC_CTYPE_MASK, collcollate,
-                                                          NULL);
+                               loc = newlocale(LC_COLLATE_MASK | LC_CTYPE_MASK, collcollate,
+                                                                  NULL);
 #else
-                       result = _create_locale(LC_ALL, collcollate);
+                               loc = _create_locale(LC_ALL, collcollate);
 #endif
-                       if (!result)
-                               report_newlocale_failure(collcollate);
-               }
-               else
-               {
+                               if (!loc)
+                                       report_newlocale_failure(collcollate);
+                       }
+                       else
+                       {
 #ifndef WIN32
-                       /* We need two newlocale() steps */
-                       locale_t        loc1;
-
-                       loc1 = newlocale(LC_COLLATE_MASK, collcollate, NULL);
-                       if (!loc1)
-                               report_newlocale_failure(collcollate);
-                       result = newlocale(LC_CTYPE_MASK, collctype, loc1);
-                       if (!result)
-                               report_newlocale_failure(collctype);
+                               /* We need two newlocale() steps */
+                               locale_t        loc1;
+
+                               loc1 = newlocale(LC_COLLATE_MASK, collcollate, NULL);
+                               if (!loc1)
+                                       report_newlocale_failure(collcollate);
+                               loc = newlocale(LC_CTYPE_MASK, collctype, loc1);
+                               if (!loc)
+                                       report_newlocale_failure(collctype);
 #else
 
-                       /*
-                        * XXX The _create_locale() API doesn't appear to support this.
-                        * Could perhaps be worked around by changing pg_locale_t to
-                        * contain two separate fields.
-                        */
+                               /*
+                                * XXX The _create_locale() API doesn't appear to support this.
+                                * Could perhaps be worked around by changing pg_locale_t to
+                                * contain two separate fields.
+                                */
+                               ereport(ERROR,
+                                               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                                                errmsg("collations with different collate and ctype values are not supported on this platform")));
+#endif
+                       }
+
+                       result->info.lt = loc;
+#else                                                  /* not HAVE_LOCALE_T */
+                       /* platform that doesn't support locale_t */
                        ereport(ERROR,
                                        (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-                                        errmsg("collations with different collate and ctype values are not supported on this platform")));
-#endif
+                                        errmsg("collation provider LIBC is not supported on this platform")));
+#endif   /* not HAVE_LOCALE_T */
+               }
+               else if (collform->collprovider == COLLPROVIDER_ICU)
+               {
+#ifdef USE_ICU
+                       UCollator  *collator;
+                       UErrorCode      status;
+
+                       status = U_ZERO_ERROR;
+                       collator = ucol_open(collcollate, &status);
+                       if (U_FAILURE(status))
+                               ereport(ERROR,
+                                               (errmsg("could not open collator for locale \"%s\": %s",
+                                                               collcollate, u_errorName(status))));
+
+                       result->info.icu.locale = strdup(collcollate);
+                       result->info.icu.ucol = collator;
+#else /* not USE_ICU */
+                       /* could get here if a collation was created by a build with ICU */
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                                        errmsg("ICU is not supported in this build"), \
+                                        errhint("You need to rebuild PostgreSQL using --with-icu.")));
+#endif /* not USE_ICU */
                }
 
-               cache_entry->locale = result;
+               collversion = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collversion,
+                                                                         &isnull);
+               if (!isnull)
+               {
+                       char       *actual_versionstr;
+                       char       *collversionstr;
+
+                       actual_versionstr = get_collation_actual_version(collform->collprovider, collcollate);
+                       if (!actual_versionstr)
+                               /* This could happen when specifying a version in CREATE
+                                * COLLATION for a libc locale, or manually creating a mess
+                                * in the catalogs. */
+                               ereport(ERROR,
+                                               (errmsg("collation \"%s\" has no actual version, but a version was specified",
+                                                               NameStr(collform->collname))));
+                       collversionstr = TextDatumGetCString(collversion);
+
+                       if (strcmp(actual_versionstr, collversionstr) != 0)
+                               ereport(WARNING,
+                                               (errmsg("collation \"%s\" has version mismatch",
+                                                               NameStr(collform->collname)),
+                                                errdetail("The collation in the database was created using version %s, "
+                                                                  "but the operating system provides version %s.",
+                                                                  collversionstr, actual_versionstr),
+                                                errhint("Rebuild all objects affected by this collation and run "
+                                                                "ALTER COLLATION %s REFRESH VERSION, "
+                                                                "or build PostgreSQL with the right library version.",
+                                                                quote_qualified_identifier(get_namespace_name(collform->collnamespace),
+                                                                                                                       NameStr(collform->collname)))));
+               }
 
                ReleaseSysCache(tp);
-#else                                                  /* not HAVE_LOCALE_T */
 
-               /*
-                * For platforms that don't support locale_t, we can't do anything
-                * with non-default collations.
-                */
-               ereport(ERROR,
-                               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-               errmsg("nondefault collations are not supported on this platform")));
-#endif   /* not HAVE_LOCALE_T */
+               cache_entry->locale = result;
        }
 
        return cache_entry->locale;
 }
 
+/*
+ * Get provider-specific collation version string for the given collation from
+ * the operating system/library.
+ *
+ * A particular provider must always either return a non-NULL string or return
+ * NULL (if it doesn't support versions).  It must not return NULL for some
+ * collcollate and not NULL for others.
+ */
+char *
+get_collation_actual_version(char collprovider, const char *collcollate)
+{
+       char       *collversion;
+
+#ifdef USE_ICU
+       if (collprovider == COLLPROVIDER_ICU)
+       {
+               UCollator  *collator;
+               UErrorCode      status;
+               UVersionInfo versioninfo;
+               char            buf[U_MAX_VERSION_STRING_LENGTH];
+
+               status = U_ZERO_ERROR;
+               collator = ucol_open(collcollate, &status);
+               if (U_FAILURE(status))
+                       ereport(ERROR,
+                                       (errmsg("could not open collator for locale \"%s\": %s",
+                                                       collcollate, u_errorName(status))));
+               ucol_getVersion(collator, versioninfo);
+               ucol_close(collator);
+
+               u_versionToString(versioninfo, buf);
+               collversion = pstrdup(buf);
+       }
+       else
+#endif
+               collversion = NULL;
+
+       return collversion;
+}
+
+
+#ifdef USE_ICU
+/*
+ * Converter object for converting between ICU's UChar strings and C strings
+ * in database encoding.  Since the database encoding doesn't change, we only
+ * need one of these per session.
+ */
+static UConverter *icu_converter = NULL;
+
+static void
+init_icu_converter(void)
+{
+       const char *icu_encoding_name;
+       UErrorCode      status;
+       UConverter *conv;
+
+       if (icu_converter)
+               return;
+
+       icu_encoding_name = get_encoding_name_for_icu(GetDatabaseEncoding());
+
+       status = U_ZERO_ERROR;
+       conv = ucnv_open(icu_encoding_name, &status);
+       if (U_FAILURE(status))
+               ereport(ERROR,
+                               (errmsg("could not open ICU converter for encoding \"%s\": %s",
+                                               icu_encoding_name, u_errorName(status))));
+
+       icu_converter = conv;
+}
+
+int32_t
+icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes)
+{
+       UErrorCode      status;
+       int32_t         len_uchar;
+
+       init_icu_converter();
+
+       len_uchar = 2 * nbytes;  /* max length per docs */
+       *buff_uchar = palloc(len_uchar * sizeof(**buff_uchar));
+       status = U_ZERO_ERROR;
+       len_uchar = ucnv_toUChars(icu_converter, *buff_uchar, len_uchar, buff, nbytes, &status);
+       if (U_FAILURE(status))
+               ereport(ERROR,
+                               (errmsg("ucnv_toUChars failed: %s", u_errorName(status))));
+       return len_uchar;
+}
+
+int32_t
+icu_from_uchar(char **result, UChar *buff_uchar, int32_t len_uchar)
+{
+       UErrorCode      status;
+       int32_t         len_result;
+
+       init_icu_converter();
+
+       len_result = UCNV_GET_MAX_BYTES_FOR_STRING(len_uchar, ucnv_getMaxCharSize(icu_converter));
+       *result = palloc(len_result + 1);
+       status = U_ZERO_ERROR;
+       ucnv_fromUChars(icu_converter, *result, len_result, buff_uchar, len_uchar, &status);
+       if (U_FAILURE(status))
+               ereport(ERROR,
+                               (errmsg("ucnv_fromUChars failed: %s", u_errorName(status))));
+       return len_result;
+}
+#endif
 
 /*
  * These functions convert from/to libc's wchar_t, *not* pg_wchar_t.
@@ -1362,6 +1538,8 @@ wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale)
 {
        size_t          result;
 
+       Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
+
        if (tolen == 0)
                return 0;
 
@@ -1398,10 +1576,10 @@ wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale)
 #ifdef HAVE_LOCALE_T
 #ifdef HAVE_WCSTOMBS_L
                /* Use wcstombs_l for nondefault locales */
-               result = wcstombs_l(to, from, tolen, locale);
+               result = wcstombs_l(to, from, tolen, locale->info.lt);
 #else                                                  /* !HAVE_WCSTOMBS_L */
                /* We have to temporarily set the locale as current ... ugh */
-               locale_t        save_locale = uselocale(locale);
+               locale_t        save_locale = uselocale(locale->info.lt);
 
                result = wcstombs(to, from, tolen);
 
@@ -1432,6 +1610,8 @@ char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen,
 {
        size_t          result;
 
+       Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
+
        if (tolen == 0)
                return 0;
 
@@ -1473,10 +1653,10 @@ char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen,
 #ifdef HAVE_LOCALE_T
 #ifdef HAVE_MBSTOWCS_L
                        /* Use mbstowcs_l for nondefault locales */
-                       result = mbstowcs_l(to, str, tolen, locale);
+                       result = mbstowcs_l(to, str, tolen, locale->info.lt);
 #else                                                  /* !HAVE_MBSTOWCS_L */
                        /* We have to temporarily set the locale as current ... ugh */
-                       locale_t        save_locale = uselocale(locale);
+                       locale_t        save_locale = uselocale(locale->info.lt);
 
                        result = mbstowcs(to, str, tolen);
 
index bb9a5446861bde72e1caee147ef7e0c382b8de16..f8b28fe0e612da3376cd95f6848f84de25fc5d39 100644 (file)
@@ -5259,7 +5259,7 @@ find_join_input_rel(PlannerInfo *root, Relids relids)
 /*
  * Check whether char is a letter (and, hence, subject to case-folding)
  *
- * In multibyte character sets, we can't use isalpha, and it does not seem
+ * In multibyte character sets or with ICU, we can't use isalpha, and it does not seem
  * worth trying to convert to wchar_t to use iswalpha.  Instead, just assume
  * any multibyte char is potentially case-varying.
  */
@@ -5271,9 +5271,11 @@ pattern_char_isalpha(char c, bool is_multibyte,
                return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
        else if (is_multibyte && IS_HIGHBIT_SET(c))
                return true;
+       else if (locale && locale->provider == COLLPROVIDER_ICU)
+               return IS_HIGHBIT_SET(c) ? true : false;
 #ifdef HAVE_LOCALE_T
-       else if (locale)
-               return isalpha_l((unsigned char) c, locale);
+       else if (locale && locale->provider == COLLPROVIDER_LIBC)
+               return isalpha_l((unsigned char) c, locale->info.lt);
 #endif
        else
                return isalpha((unsigned char) c);
index cd036afc004d569d7f8c00a9605b0e70b3bca812..aa556aa5deb4663ab0d183bc2da7b6195475d32f 100644 (file)
@@ -73,9 +73,7 @@ typedef struct
        hyperLogLogState abbr_card; /* Abbreviated key cardinality state */
        hyperLogLogState full_card; /* Full key cardinality state */
        double          prop_card;              /* Required cardinality proportion */
-#ifdef HAVE_LOCALE_T
        pg_locale_t locale;
-#endif
 } VarStringSortSupport;
 
 /*
@@ -1403,10 +1401,7 @@ varstr_cmp(char *arg1, int len1, char *arg2, int len2, Oid collid)
                char            a2buf[TEXTBUFLEN];
                char       *a1p,
                                   *a2p;
-
-#ifdef HAVE_LOCALE_T
                pg_locale_t mylocale = 0;
-#endif
 
                if (collid != DEFAULT_COLLATION_OID)
                {
@@ -1421,9 +1416,7 @@ varstr_cmp(char *arg1, int len1, char *arg2, int len2, Oid collid)
                                                 errmsg("could not determine which collation to use for string comparison"),
                                                 errhint("Use the COLLATE clause to set the collation explicitly.")));
                        }
-#ifdef HAVE_LOCALE_T
                        mylocale = pg_newlocale_from_collation(collid);
-#endif
                }
 
                /*
@@ -1542,11 +1535,54 @@ varstr_cmp(char *arg1, int len1, char *arg2, int len2, Oid collid)
                memcpy(a2p, arg2, len2);
                a2p[len2] = '\0';
 
-#ifdef HAVE_LOCALE_T
                if (mylocale)
-                       result = strcoll_l(a1p, a2p, mylocale);
-               else
+               {
+                       if (mylocale->provider == COLLPROVIDER_ICU)
+                       {
+#ifdef USE_ICU
+#ifdef HAVE_UCOL_STRCOLLUTF8
+                               if (GetDatabaseEncoding() == PG_UTF8)
+                               {
+                                       UErrorCode      status;
+
+                                       status = U_ZERO_ERROR;
+                                       result = ucol_strcollUTF8(mylocale->info.icu.ucol,
+                                                                                         arg1, len1,
+                                                                                         arg2, len2,
+                                                                                         &status);
+                                       if (U_FAILURE(status))
+                                               ereport(ERROR,
+                                                               (errmsg("collation failed: %s", u_errorName(status))));
+                               }
+                               else
+#endif
+                               {
+                                       int32_t ulen1, ulen2;
+                                       UChar *uchar1, *uchar2;
+
+                                       ulen1 = icu_to_uchar(&uchar1, arg1, len1);
+                                       ulen2 = icu_to_uchar(&uchar2, arg2, len2);
+
+                                       result = ucol_strcoll(mylocale->info.icu.ucol,
+                                                                                 uchar1, ulen1,
+                                                                                 uchar2, ulen2);
+                               }
+#else  /* not USE_ICU */
+                               /* shouldn't happen */
+                               elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
+#endif /* not USE_ICU */
+                       }
+                       else
+                       {
+#ifdef HAVE_LOCALE_T
+                               result = strcoll_l(a1p, a2p, mylocale->info.lt);
+#else
+                               /* shouldn't happen */
+                               elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
 #endif
+                       }
+               }
+               else
                        result = strcoll(a1p, a2p);
 
                /*
@@ -1768,10 +1804,7 @@ varstr_sortsupport(SortSupport ssup, Oid collid, bool bpchar)
        bool            abbreviate = ssup->abbreviate;
        bool            collate_c = false;
        VarStringSortSupport *sss;
-
-#ifdef HAVE_LOCALE_T
        pg_locale_t locale = 0;
-#endif
 
        /*
         * If possible, set ssup->comparator to a function which can be used to
@@ -1826,9 +1859,7 @@ varstr_sortsupport(SortSupport ssup, Oid collid, bool bpchar)
                                                 errmsg("could not determine which collation to use for string comparison"),
                                                 errhint("Use the COLLATE clause to set the collation explicitly.")));
                        }
-#ifdef HAVE_LOCALE_T
                        locale = pg_newlocale_from_collation(collid);
-#endif
                }
        }
 
@@ -1854,7 +1885,7 @@ varstr_sortsupport(SortSupport ssup, Oid collid, bool bpchar)
         * platforms.
         */
 #ifndef TRUST_STRXFRM
-       if (!collate_c)
+       if (!collate_c && !(locale && locale->provider == COLLPROVIDER_ICU))
                abbreviate = false;
 #endif
 
@@ -1877,9 +1908,7 @@ varstr_sortsupport(SortSupport ssup, Oid collid, bool bpchar)
                sss->last_len2 = -1;
                /* Initialize */
                sss->last_returned = 0;
-#ifdef HAVE_LOCALE_T
                sss->locale = locale;
-#endif
 
                /*
                 * To avoid somehow confusing a strxfrm() blob and an original string,
@@ -2090,11 +2119,54 @@ varstrfastcmp_locale(Datum x, Datum y, SortSupport ssup)
                goto done;
        }
 
-#ifdef HAVE_LOCALE_T
        if (sss->locale)
-               result = strcoll_l(sss->buf1, sss->buf2, sss->locale);
-       else
+       {
+               if (sss->locale->provider == COLLPROVIDER_ICU)
+               {
+#ifdef USE_ICU
+#ifdef HAVE_UCOL_STRCOLLUTF8
+                       if (GetDatabaseEncoding() == PG_UTF8)
+                       {
+                               UErrorCode      status;
+
+                               status = U_ZERO_ERROR;
+                               result = ucol_strcollUTF8(sss->locale->info.icu.ucol,
+                                                                                 a1p, len1,
+                                                                                 a2p, len2,
+                                                                                 &status);
+                               if (U_FAILURE(status))
+                                       ereport(ERROR,
+                                                       (errmsg("collation failed: %s", u_errorName(status))));
+                       }
+                       else
 #endif
+                       {
+                               int32_t ulen1, ulen2;
+                               UChar *uchar1, *uchar2;
+
+                               ulen1 = icu_to_uchar(&uchar1, a1p, len1);
+                               ulen2 = icu_to_uchar(&uchar2, a2p, len2);
+
+                               result = ucol_strcoll(sss->locale->info.icu.ucol,
+                                                                         uchar1, ulen1,
+                                                                         uchar2, ulen2);
+                       }
+#else  /* not USE_ICU */
+                       /* shouldn't happen */
+                       elog(ERROR, "unsupported collprovider: %c", sss->locale->provider);
+#endif /* not USE_ICU */
+               }
+               else
+               {
+#ifdef HAVE_LOCALE_T
+                       result = strcoll_l(sss->buf1, sss->buf2, sss->locale->info.lt);
+#else
+                       /* shouldn't happen */
+                       elog(ERROR, "unsupported collprovider: %c", sss->locale->provider);
+#endif
+               }
+       }
+       else
                result = strcoll(sss->buf1, sss->buf2);
 
        /*
@@ -2200,9 +2272,14 @@ varstr_abbrev_convert(Datum original, SortSupport ssup)
        else
        {
                Size            bsize;
+#ifdef USE_ICU
+               int32_t         ulen = -1;
+               UChar      *uchar;
+#endif
 
                /*
-                * We're not using the C collation, so fall back on strxfrm.
+                * We're not using the C collation, so fall back on strxfrm or ICU
+                * analogs.
                 */
 
                /* By convention, we use buffer 1 to store and NUL-terminate */
@@ -2222,17 +2299,66 @@ varstr_abbrev_convert(Datum original, SortSupport ssup)
                        goto done;
                }
 
-               /* Just like strcoll(), strxfrm() expects a NUL-terminated string */
                memcpy(sss->buf1, authoritative_data, len);
+               /* Just like strcoll(), strxfrm() expects a NUL-terminated string.
+                * Not necessary for ICU, but doesn't hurt. */
                sss->buf1[len] = '\0';
                sss->last_len1 = len;
 
+#ifdef USE_ICU
+               /* When using ICU and not UTF8, convert string to UChar. */
+               if (sss->locale && sss->locale->provider == COLLPROVIDER_ICU &&
+                       GetDatabaseEncoding() != PG_UTF8)
+                       ulen = icu_to_uchar(&uchar, sss->buf1, len);
+#endif
+
+               /*
+                * Loop: Call strxfrm() or ucol_getSortKey(), possibly enlarge buffer,
+                * and try again.  Both of these functions have the result buffer
+                * content undefined if the result did not fit, so we need to retry
+                * until everything fits, even though we only need the first few bytes
+                * in the end.  When using ucol_nextSortKeyPart(), however, we only
+                * ask for as many bytes as we actually need.
+                */
                for (;;)
                {
+#ifdef USE_ICU
+                       if (sss->locale && sss->locale->provider == COLLPROVIDER_ICU)
+                       {
+                               /*
+                                * When using UTF8, use the iteration interface so we only
+                                * need to produce as many bytes as we actually need.
+                                */
+                               if (GetDatabaseEncoding() == PG_UTF8)
+                               {
+                                       UCharIterator iter;
+                                       uint32_t        state[2];
+                                       UErrorCode      status;
+
+                                       uiter_setUTF8(&iter, sss->buf1, len);
+                                       state[0] = state[1] = 0;  /* won't need that again */
+                                       status = U_ZERO_ERROR;
+                                       bsize = ucol_nextSortKeyPart(sss->locale->info.icu.ucol,
+                                                                                                &iter,
+                                                                                                state,
+                                                                                                (uint8_t *) sss->buf2,
+                                                                                                Min(sizeof(Datum), sss->buflen2),
+                                                                                                &status);
+                                       if (U_FAILURE(status))
+                                               ereport(ERROR,
+                                                               (errmsg("sort key generation failed: %s", u_errorName(status))));
+                               }
+                               else
+                                       bsize = ucol_getSortKey(sss->locale->info.icu.ucol,
+                                                                                       uchar, ulen,
+                                                                                       (uint8_t *) sss->buf2, sss->buflen2);
+                       }
+                       else
+#endif
 #ifdef HAVE_LOCALE_T
-                       if (sss->locale)
+                       if (sss->locale && sss->locale->provider == COLLPROVIDER_LIBC)
                                bsize = strxfrm_l(sss->buf2, sss->buf1,
-                                                                 sss->buflen2, sss->locale);
+                                                                 sss->buflen2, sss->locale->info.lt);
                        else
 #endif
                                bsize = strxfrm(sss->buf2, sss->buf1, sss->buflen2);
@@ -2242,8 +2368,7 @@ varstr_abbrev_convert(Datum original, SortSupport ssup)
                                break;
 
                        /*
-                        * The C standard states that the contents of the buffer is now
-                        * unspecified.  Grow buffer, and retry.
+                        * Grow buffer and retry.
                         */
                        pfree(sss->buf2);
                        sss->buflen2 = Max(bsize + 1,
index 11099b844f46135a3001592c4b471dbd193707c3..444eec25b50caf8e692cc29d6f11db7a72731dc7 100644 (file)
@@ -403,6 +403,82 @@ const pg_enc2gettext pg_enc2gettext_tbl[] =
 };
 
 
+#ifndef FRONTEND
+
+/*
+ * Table of encoding names for ICU
+ *
+ * Reference: <https://ssl.icu-project.org/icu-bin/convexp>
+ *
+ * NULL entries are not supported by ICU, or their mapping is unclear.
+ */
+static const char * const pg_enc2icu_tbl[] =
+{
+       NULL,                                   /* PG_SQL_ASCII */
+       "EUC-JP",                               /* PG_EUC_JP */
+       "EUC-CN",                               /* PG_EUC_CN */
+       "EUC-KR",                               /* PG_EUC_KR */
+       "EUC-TW",                               /* PG_EUC_TW */
+       NULL,                                   /* PG_EUC_JIS_2004 */
+       "UTF-8",                                /* PG_UTF8 */
+       NULL,                                   /* PG_MULE_INTERNAL */
+       "ISO-8859-1",                   /* PG_LATIN1 */
+       "ISO-8859-2",                   /* PG_LATIN2 */
+       "ISO-8859-3",                   /* PG_LATIN3 */
+       "ISO-8859-4",                   /* PG_LATIN4 */
+       "ISO-8859-9",                   /* PG_LATIN5 */
+       "ISO-8859-10",                  /* PG_LATIN6 */
+       "ISO-8859-13",                  /* PG_LATIN7 */
+       "ISO-8859-14",                  /* PG_LATIN8 */
+       "ISO-8859-15",                  /* PG_LATIN9 */
+       NULL,                                   /* PG_LATIN10 */
+       "CP1256",                               /* PG_WIN1256 */
+       "CP1258",                               /* PG_WIN1258 */
+       "CP866",                                /* PG_WIN866 */
+       NULL,                                   /* PG_WIN874 */
+       "KOI8-R",                               /* PG_KOI8R */
+       "CP1251",                               /* PG_WIN1251 */
+       "CP1252",                               /* PG_WIN1252 */
+       "ISO-8859-5",                   /* PG_ISO_8859_5 */
+       "ISO-8859-6",                   /* PG_ISO_8859_6 */
+       "ISO-8859-7",                   /* PG_ISO_8859_7 */
+       "ISO-8859-8",                   /* PG_ISO_8859_8 */
+       "CP1250",                               /* PG_WIN1250 */
+       "CP1253",                               /* PG_WIN1253 */
+       "CP1254",                               /* PG_WIN1254 */
+       "CP1255",                               /* PG_WIN1255 */
+       "CP1257",                               /* PG_WIN1257 */
+       "KOI8-U",                               /* PG_KOI8U */
+};
+
+bool
+is_encoding_supported_by_icu(int encoding)
+{
+       return (pg_enc2icu_tbl[encoding] != NULL);
+}
+
+const char *
+get_encoding_name_for_icu(int encoding)
+{
+       const char *icu_encoding_name;
+
+       StaticAssertStmt(lengthof(pg_enc2icu_tbl) == PG_ENCODING_BE_LAST + 1,
+                                        "pg_enc2icu_tbl incomplete");
+
+       icu_encoding_name = pg_enc2icu_tbl[encoding];
+
+       if (!icu_encoding_name)
+               ereport(ERROR,
+                               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                                errmsg("encoding \"%s\" not supported by ICU",
+                                               pg_encoding_to_char(encoding))));
+
+       return icu_encoding_name;
+}
+
+#endif /* not FRONTEND */
+
+
 /* ----------
  * Encoding checks, for error returns -1 else encoding id
  * ----------
index e0c72fbb8003b4e1954e3e87ecdcf5c2bb3bbc87..8dde1e8f9d4fd9e1759aef21602400de9c6dc651 100644 (file)
@@ -62,6 +62,7 @@
 #include "catalog/catalog.h"
 #include "catalog/pg_authid.h"
 #include "catalog/pg_class.h"
+#include "catalog/pg_collation.h"
 #include "common/file_utils.h"
 #include "common/restricted_token.h"
 #include "common/username.h"
@@ -1629,7 +1630,7 @@ setup_collation(FILE *cmdfd)
        PG_CMD_PUTS("SELECT pg_import_system_collations(if_not_exists => false, schema => 'pg_catalog');\n\n");
 
        /* Add an SQL-standard name */
-       PG_CMD_PRINTF2("INSERT INTO pg_collation (collname, collnamespace, collowner, collencoding, collcollate, collctype) VALUES ('ucs_basic', 'pg_catalog'::regnamespace, %u, %d, 'C', 'C');\n\n", BOOTSTRAP_SUPERUSERID, PG_UTF8);
+       PG_CMD_PRINTF3("INSERT INTO pg_collation (collname, collnamespace, collowner, collprovider, collencoding, collcollate, collctype) VALUES ('ucs_basic', 'pg_catalog'::regnamespace, %u, '%c', %d, 'C', 'C');\n\n", BOOTSTRAP_SUPERUSERID, COLLPROVIDER_LIBC, PG_UTF8);
 }
 
 /*
index a98747d89a461952ee3780620d4e5d6e046ada51..b3d95d7f6ee3a3fa50a900b8131f8727f3900fd1 100644 (file)
@@ -12834,8 +12834,10 @@ dumpCollation(Archive *fout, CollInfo *collinfo)
        PQExpBuffer delq;
        PQExpBuffer labelq;
        PGresult   *res;
+       int                     i_collprovider;
        int                     i_collcollate;
        int                     i_collctype;
+       const char *collprovider;
        const char *collcollate;
        const char *collctype;
 
@@ -12852,18 +12854,32 @@ dumpCollation(Archive *fout, CollInfo *collinfo)
        selectSourceSchema(fout, collinfo->dobj.namespace->dobj.name);
 
        /* Get collation-specific details */
-       appendPQExpBuffer(query, "SELECT "
-                                         "collcollate, "
-                                         "collctype "
-                                         "FROM pg_catalog.pg_collation c "
-                                         "WHERE c.oid = '%u'::pg_catalog.oid",
-                                         collinfo->dobj.catId.oid);
+       if (fout->remoteVersion >= 100000)
+               appendPQExpBuffer(query, "SELECT "
+                                                 "collprovider, "
+                                                 "collcollate, "
+                                                 "collctype, "
+                                                 "collversion "
+                                                 "FROM pg_catalog.pg_collation c "
+                                                 "WHERE c.oid = '%u'::pg_catalog.oid",
+                                                 collinfo->dobj.catId.oid);
+       else
+               appendPQExpBuffer(query, "SELECT "
+                                                 "'p'::char AS collprovider, "
+                                                 "collcollate, "
+                                                 "collctype, "
+                                                 "NULL AS collversion "
+                                                 "FROM pg_catalog.pg_collation c "
+                                                 "WHERE c.oid = '%u'::pg_catalog.oid",
+                                                 collinfo->dobj.catId.oid);
 
        res = ExecuteSqlQueryForSingleRow(fout, query->data);
 
+       i_collprovider = PQfnumber(res, "collprovider");
        i_collcollate = PQfnumber(res, "collcollate");
        i_collctype = PQfnumber(res, "collctype");
 
+       collprovider = PQgetvalue(res, 0, i_collprovider);
        collcollate = PQgetvalue(res, 0, i_collcollate);
        collctype = PQgetvalue(res, 0, i_collctype);
 
@@ -12875,11 +12891,50 @@ dumpCollation(Archive *fout, CollInfo *collinfo)
        appendPQExpBuffer(delq, ".%s;\n",
                                          fmtId(collinfo->dobj.name));
 
-       appendPQExpBuffer(q, "CREATE COLLATION %s (lc_collate = ",
+       appendPQExpBuffer(q, "CREATE COLLATION %s (",
                                          fmtId(collinfo->dobj.name));
-       appendStringLiteralAH(q, collcollate, fout);
-       appendPQExpBufferStr(q, ", lc_ctype = ");
-       appendStringLiteralAH(q, collctype, fout);
+
+       appendPQExpBufferStr(q, "provider = ");
+       if (collprovider[0] == 'c')
+               appendPQExpBufferStr(q, "libc");
+       else if (collprovider[0] == 'i')
+               appendPQExpBufferStr(q, "icu");
+       else
+               exit_horribly(NULL,
+                                         "unrecognized collation provider: %s\n",
+                                         collprovider);
+
+       if (strcmp(collcollate, collctype) == 0)
+       {
+               appendPQExpBufferStr(q, ", locale = ");
+               appendStringLiteralAH(q, collcollate, fout);
+       }
+       else
+       {
+               appendPQExpBufferStr(q, ", lc_collate = ");
+               appendStringLiteralAH(q, collcollate, fout);
+               appendPQExpBufferStr(q, ", lc_ctype = ");
+               appendStringLiteralAH(q, collctype, fout);
+       }
+
+       /*
+        * For binary upgrade, carry over the collation version.  For normal
+        * dump/restore, omit the version, so that it is computed upon restore.
+        */
+       if (dopt->binary_upgrade)
+       {
+               int                     i_collversion;
+
+               i_collversion = PQfnumber(res, "collversion");
+               if (!PQgetisnull(res, 0, i_collversion))
+               {
+                       appendPQExpBufferStr(q, ", version = ");
+                       appendStringLiteralAH(q,
+                                                                 PQgetvalue(res, 0, i_collversion),
+                                                                 fout);
+               }
+       }
+
        appendPQExpBufferStr(q, ");\n");
 
        appendPQExpBuffer(labelq, "COLLATION %s", fmtId(collinfo->dobj.name));
index 021f4bf081a7a43a51824a532783ea4d3d4f3683..366737440ce31ac3b09ffdfe0e00279823320695 100644 (file)
@@ -2424,7 +2424,7 @@ qr/^\QINSERT INTO test_fifth_table (col1, col2, col3, col4, col5) VALUES (NULL,
                  'CREATE COLLATION test0 FROM "C";',
                regexp =>
                  qr/^
-                 \QCREATE COLLATION test0 (lc_collate = 'C', lc_ctype = 'C');\E/xm,
+                 \QCREATE COLLATION test0 (provider = libc, locale = 'C');\E/xm,
            collation => 1,
                like => {
                        binary_upgrade           => 1,
index 61a3e2a848330ab6febfb4812f5bebe2403d38a9..8c583127fdd351aa0acf166a46cfac47b91560f1 100644 (file)
@@ -3738,7 +3738,7 @@ listCollations(const char *pattern, bool verbose, bool showSystem)
        PQExpBufferData buf;
        PGresult   *res;
        printQueryOpt myopt = pset.popt;
-       static const bool translate_columns[] = {false, false, false, false, false};
+       static const bool translate_columns[] = {false, false, false, false, false, false};
 
        if (pset.sversion < 90100)
        {
@@ -3762,6 +3762,11 @@ listCollations(const char *pattern, bool verbose, bool showSystem)
                                          gettext_noop("Collate"),
                                          gettext_noop("Ctype"));
 
+       if (pset.sversion >= 100000)
+               appendPQExpBuffer(&buf,
+                                                 ",\n       CASE c.collprovider WHEN 'd' THEN 'default' WHEN 'c' THEN 'libc' WHEN 'i' THEN 'icu' END AS \"%s\"",
+                                                 gettext_noop("Provider"));
+
        if (verbose)
                appendPQExpBuffer(&buf,
                                                  ",\n       pg_catalog.obj_description(c.oid, 'pg_collation') AS \"%s\"",
index 30c87e004ec4ea5ab09ea6b00cd8072ff94f7d92..8edd8aa0662442f7f40ab4bf13181d1f7074a7de 100644 (file)
@@ -34,9 +34,13 @@ CATALOG(pg_collation,3456)
        NameData        collname;               /* collation name */
        Oid                     collnamespace;  /* OID of namespace containing collation */
        Oid                     collowner;              /* owner of collation */
+       char            collprovider;   /* see constants below */
        int32           collencoding;   /* encoding for this collation; -1 = "all" */
        NameData        collcollate;    /* LC_COLLATE setting */
        NameData        collctype;              /* LC_CTYPE setting */
+#ifdef CATALOG_VARLEN                  /* variable-length fields start here */
+       text            collversion;    /* provider-dependent version of collation data */
+#endif
 } FormData_pg_collation;
 
 /* ----------------
@@ -50,27 +54,34 @@ typedef FormData_pg_collation *Form_pg_collation;
  *             compiler constants for pg_collation
  * ----------------
  */
-#define Natts_pg_collation                             6
+#define Natts_pg_collation                             8
 #define Anum_pg_collation_collname             1
 #define Anum_pg_collation_collnamespace 2
 #define Anum_pg_collation_collowner            3
-#define Anum_pg_collation_collencoding 4
-#define Anum_pg_collation_collcollate  5
-#define Anum_pg_collation_collctype            6
+#define Anum_pg_collation_collprovider 4
+#define Anum_pg_collation_collencoding 5
+#define Anum_pg_collation_collcollate  6
+#define Anum_pg_collation_collctype            7
+#define Anum_pg_collation_collversion  8
 
 /* ----------------
  *             initial contents of pg_collation
  * ----------------
  */
 
-DATA(insert OID = 100 ( default                PGNSP PGUID -1 "" "" ));
+DATA(insert OID = 100 ( default                PGNSP PGUID d -1 "" "" 0 ));
 DESCR("database's default collation");
 #define DEFAULT_COLLATION_OID  100
-DATA(insert OID = 950 ( C                      PGNSP PGUID -1 "C" "C" ));
+DATA(insert OID = 950 ( C                      PGNSP PGUID c -1 "C" "C" 0 ));
 DESCR("standard C collation");
 #define C_COLLATION_OID                        950
-DATA(insert OID = 951 ( POSIX          PGNSP PGUID -1 "POSIX" "POSIX" ));
+DATA(insert OID = 951 ( POSIX          PGNSP PGUID c -1 "POSIX" "POSIX" 0 ));
 DESCR("standard POSIX collation");
 #define POSIX_COLLATION_OID            951
 
+
+#define COLLPROVIDER_DEFAULT   'd'
+#define COLLPROVIDER_ICU               'i'
+#define COLLPROVIDER_LIBC              'c'
+
 #endif   /* PG_COLLATION_H */
index 482ba7920e5e3c8f8078802491e598df09a1539e..dfebdbaa0bbb7fddf5296ae737f6f2e9c95dca25 100644 (file)
 
 extern Oid CollationCreate(const char *collname, Oid collnamespace,
                                Oid collowner,
+                               char collprovider,
                                int32 collencoding,
                                const char *collcollate, const char *collctype,
+                               const char *collversion,
                                bool if_not_exists);
 extern void RemoveCollationById(Oid collationOid);
 
index a5b415346b7a65b2bc39d06527e209509825e05f..0d18ab8c0dcf61d3b61601779114e475f284644e 100644 (file)
@@ -5401,6 +5401,9 @@ DESCR("pg_controldata init state information as a function");
 DATA(insert OID = 3445 ( pg_import_system_collations PGNSP PGUID 12 100 0 0 0 f f f f t f v r 2 0 2278 "16 4089" _null_ _null_ "{if_not_exists,schema}" _null_ _null_ pg_import_system_collations _null_ _null_ _null_ ));
 DESCR("import collations from operating system");
 
+DATA(insert OID = 3448 ( pg_collation_actual_version PGNSP PGUID 12 100 0 0 0 f f f f t f v s 1 0 25 "26" _null_ _null_ _null_ _null_ _null_ pg_collation_actual_version _null_ _null_ _null_ ));
+DESCR("import collations from operating system");
+
 /* system management/monitoring related functions */
 DATA(insert OID = 3353 (  pg_ls_logdir               PGNSP PGUID 12 10 20 0 0 f f f f t t v s 0 0 2249 "" "{25,20,1184}" "{o,o,o}" "{name,size,modification}" _null_ _null_ pg_ls_logdir _null_ _null_ _null_ ));
 DESCR("list files in the log directory");
index 3b2fcb8271139f1b8c8ffe51ab66e1a999d76ff3..df5623ccb6d5dc227f272bf82b554de7ee7a0880 100644 (file)
@@ -20,5 +20,6 @@
 
 extern ObjectAddress DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_exists);
 extern void IsThereCollationInNamespace(const char *collname, Oid nspOid);
+extern ObjectAddress AlterCollation(AlterCollationStmt *stmt);
 
 #endif   /* COLLATIONCMDS_H */
index 5f546973028ca4597acdcadec6bda79329d472dc..9c5e749c9e7c2a08912e8d2977c51071af27f1b2 100644 (file)
@@ -332,6 +332,12 @@ typedef struct pg_enc2gettext
 
 extern const pg_enc2gettext pg_enc2gettext_tbl[];
 
+/*
+ * Encoding names for ICU
+ */
+extern bool is_encoding_supported_by_icu(int encoding);
+extern const char *get_encoding_name_for_icu(int encoding);
+
 /*
  * pg_wchar stuff
  */
index 9a4221a9e7bbca1ce897af3bff0f485d2c11c52a..b2d8514f895fe96a53b5b3a889d2c02d78bda71e 100644 (file)
@@ -424,6 +424,7 @@ typedef enum NodeTag
        T_CreateSubscriptionStmt,
        T_AlterSubscriptionStmt,
        T_DropSubscriptionStmt,
+       T_AlterCollationStmt,
 
        /*
         * TAGS FOR PARSE TREE NODES (parsenodes.h)
index 582e0e0ebe94a9c1d0ccf97b3342fcb478b1dbcb..f3773ca9294ec58f3b8dd4b2bf470f4c55e564c8 100644 (file)
@@ -1732,6 +1732,17 @@ typedef struct AlterTableCmd     /* one subcommand of an ALTER TABLE */
 } AlterTableCmd;
 
 
+/* ----------------------
+ * Alter Collation
+ * ----------------------
+ */
+typedef struct AlterCollationStmt
+{
+       NodeTag         type;
+       List       *collname;
+} AlterCollationStmt;
+
+
 /* ----------------------
  *     Alter Domain
  *
index 6a8176b323e38150a9598a453cd3a4179fef3420..e1c1c9e9b4787c95412bd0be61bcfa41f1b72704 100644 (file)
 /* Define to 1 if you have the external array `tzname'. */
 #undef HAVE_TZNAME
 
+/* Define to 1 if you have the `ucol_strcollUTF8' function. */
+#undef HAVE_UCOL_STRCOLLUTF8
+
 /* Define to 1 if you have the <ucred.h> header file. */
 #undef HAVE_UCRED_H
 
    (--enable-float8-byval) */
 #undef USE_FLOAT8_BYVAL
 
+/* Define to build with ICU support. (--with-icu) */
+#undef USE_ICU
+
 /* Define to 1 to build with LDAP support. (--with-ldap) */
 #undef USE_LDAP
 
index cb509e2b6b0ce93952d6b378027b19347cf52014..12d7547413847a3d7efbeb5575108e20b3358fe0 100644 (file)
@@ -15,6 +15,9 @@
 #if defined(LOCALE_T_IN_XLOCALE) || defined(WCSTOMBS_L_IN_XLOCALE)
 #include <xlocale.h>
 #endif
+#ifdef USE_ICU
+#include <unicode/ucol.h>
+#endif
 
 #include "utils/guc.h"
 
@@ -61,17 +64,36 @@ extern void cache_locale_time(void);
  * We define our own wrapper around locale_t so we can keep the same
  * function signatures for all builds, while not having to create a
  * fake version of the standard type locale_t in the global namespace.
- * The fake version of pg_locale_t can be checked for truth; that's
- * about all it will be needed for.
+ * pg_locale_t is occasionally checked for truth, so make it a pointer.
  */
+struct pg_locale_t
+{
+       char    provider;
+       union
+       {
 #ifdef HAVE_LOCALE_T
-typedef locale_t pg_locale_t;
-#else
-typedef int pg_locale_t;
+               locale_t lt;
+#endif
+#ifdef USE_ICU
+               struct {
+                       const char *locale;
+                       UCollator *ucol;
+               } icu;
 #endif
+       } info;
+};
+
+typedef struct pg_locale_t *pg_locale_t;
 
 extern pg_locale_t pg_newlocale_from_collation(Oid collid);
 
+extern char *get_collation_actual_version(char collprovider, const char *collcollate);
+
+#ifdef USE_ICU
+extern int32_t icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes);
+extern int32_t icu_from_uchar(char **result, UChar *buff_uchar, int32_t len_uchar);
+#endif
+
 /* These functions convert from/to libc's wchar_t, *not* pg_wchar_t */
 #ifdef USE_WIDE_UPPER_LOWER
 extern size_t wchar2char(char *to, const wchar_t *from, size_t tolen,
index b923ea142031052e561d7fad3ded467d997609d2..a747facb9af146fb0e0dcde7ae3df237c41ed8fa 100644 (file)
@@ -125,6 +125,9 @@ tablespace-setup:
 ##
 
 REGRESS_OPTS = --dlpath=. $(EXTRA_REGRESS_OPTS)
+ifeq ($(with_icu),yes)
+override EXTRA_TESTS := collate.icu $(EXTRA_TESTS)
+endif
 
 check: all tablespace-setup
        $(pg_regress_check) $(REGRESS_OPTS) --schedule=$(srcdir)/parallel_schedule $(MAXCONNOPT) $(EXTRA_TESTS)
diff --git a/src/test/regress/expected/collate.icu.out b/src/test/regress/expected/collate.icu.out
new file mode 100644 (file)
index 0000000..e1fc998
--- /dev/null
@@ -0,0 +1,1126 @@
+/*
+ * This test is for ICU collations.
+ */
+SET client_encoding TO UTF8;
+CREATE SCHEMA collate_tests;
+SET search_path = collate_tests;
+CREATE TABLE collate_test1 (
+    a int,
+    b text COLLATE "en-x-icu" NOT NULL
+);
+\d collate_test1
+        Table "collate_tests.collate_test1"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | text    | en-x-icu  | not null | 
+
+CREATE TABLE collate_test_fail (
+    a int,
+    b text COLLATE "ja_JP.eucjp-x-icu"
+);
+ERROR:  collation "ja_JP.eucjp-x-icu" for encoding "UTF8" does not exist
+LINE 3:     b text COLLATE "ja_JP.eucjp-x-icu"
+                   ^
+CREATE TABLE collate_test_fail (
+    a int,
+    b text COLLATE "foo-x-icu"
+);
+ERROR:  collation "foo-x-icu" for encoding "UTF8" does not exist
+LINE 3:     b text COLLATE "foo-x-icu"
+                   ^
+CREATE TABLE collate_test_fail (
+    a int COLLATE "en-x-icu",
+    b text
+);
+ERROR:  collations are not supported by type integer
+LINE 2:     a int COLLATE "en-x-icu",
+                  ^
+CREATE TABLE collate_test_like (
+    LIKE collate_test1
+);
+\d collate_test_like
+      Table "collate_tests.collate_test_like"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | text    | en-x-icu  | not null | 
+
+CREATE TABLE collate_test2 (
+    a int,
+    b text COLLATE "sv-x-icu"
+);
+CREATE TABLE collate_test3 (
+    a int,
+    b text COLLATE "C"
+);
+INSERT INTO collate_test1 VALUES (1, 'abc'), (2, 'äbc'), (3, 'bbc'), (4, 'ABC');
+INSERT INTO collate_test2 SELECT * FROM collate_test1;
+INSERT INTO collate_test3 SELECT * FROM collate_test1;
+SELECT * FROM collate_test1 WHERE b >= 'bbc';
+ a |  b  
+---+-----
+ 3 | bbc
+(1 row)
+
+SELECT * FROM collate_test2 WHERE b >= 'bbc';
+ a |  b  
+---+-----
+ 2 | äbc
+ 3 | bbc
+(2 rows)
+
+SELECT * FROM collate_test3 WHERE b >= 'bbc';
+ a |  b  
+---+-----
+ 2 | äbc
+ 3 | bbc
+(2 rows)
+
+SELECT * FROM collate_test3 WHERE b >= 'BBC';
+ a |  b  
+---+-----
+ 1 | abc
+ 2 | äbc
+ 3 | bbc
+(3 rows)
+
+SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc';
+ a |  b  
+---+-----
+ 2 | äbc
+ 3 | bbc
+(2 rows)
+
+SELECT * FROM collate_test1 WHERE b >= 'bbc' COLLATE "C";
+ a |  b  
+---+-----
+ 2 | äbc
+ 3 | bbc
+(2 rows)
+
+SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc' COLLATE "C";
+ a |  b  
+---+-----
+ 2 | äbc
+ 3 | bbc
+(2 rows)
+
+SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc' COLLATE "en-x-icu";
+ERROR:  collation mismatch between explicit collations "C" and "en-x-icu"
+LINE 1: ...* FROM collate_test1 WHERE b COLLATE "C" >= 'bbc' COLLATE "e...
+                                                             ^
+CREATE DOMAIN testdomain_sv AS text COLLATE "sv-x-icu";
+CREATE DOMAIN testdomain_i AS int COLLATE "sv-x-icu"; -- fails
+ERROR:  collations are not supported by type integer
+CREATE TABLE collate_test4 (
+    a int,
+    b testdomain_sv
+);
+INSERT INTO collate_test4 SELECT * FROM collate_test1;
+SELECT a, b FROM collate_test4 ORDER BY b;
+ a |  b  
+---+-----
+ 1 | abc
+ 4 | ABC
+ 3 | bbc
+ 2 | äbc
+(4 rows)
+
+CREATE TABLE collate_test5 (
+    a int,
+    b testdomain_sv COLLATE "en-x-icu"
+);
+INSERT INTO collate_test5 SELECT * FROM collate_test1;
+SELECT a, b FROM collate_test5 ORDER BY b;
+ a |  b  
+---+-----
+ 1 | abc
+ 4 | ABC
+ 2 | äbc
+ 3 | bbc
+(4 rows)
+
+SELECT a, b FROM collate_test1 ORDER BY b;
+ a |  b  
+---+-----
+ 1 | abc
+ 4 | ABC
+ 2 | äbc
+ 3 | bbc
+(4 rows)
+
+SELECT a, b FROM collate_test2 ORDER BY b;
+ a |  b  
+---+-----
+ 1 | abc
+ 4 | ABC
+ 3 | bbc
+ 2 | äbc
+(4 rows)
+
+SELECT a, b FROM collate_test3 ORDER BY b;
+ a |  b  
+---+-----
+ 4 | ABC
+ 1 | abc
+ 3 | bbc
+ 2 | äbc
+(4 rows)
+
+SELECT a, b FROM collate_test1 ORDER BY b COLLATE "C";
+ a |  b  
+---+-----
+ 4 | ABC
+ 1 | abc
+ 3 | bbc
+ 2 | äbc
+(4 rows)
+
+-- star expansion
+SELECT * FROM collate_test1 ORDER BY b;
+ a |  b  
+---+-----
+ 1 | abc
+ 4 | ABC
+ 2 | äbc
+ 3 | bbc
+(4 rows)
+
+SELECT * FROM collate_test2 ORDER BY b;
+ a |  b  
+---+-----
+ 1 | abc
+ 4 | ABC
+ 3 | bbc
+ 2 | äbc
+(4 rows)
+
+SELECT * FROM collate_test3 ORDER BY b;
+ a |  b  
+---+-----
+ 4 | ABC
+ 1 | abc
+ 3 | bbc
+ 2 | äbc
+(4 rows)
+
+-- constant expression folding
+SELECT 'bbc' COLLATE "en-x-icu" > 'äbc' COLLATE "en-x-icu" AS "true";
+ true 
+------
+ t
+(1 row)
+
+SELECT 'bbc' COLLATE "sv-x-icu" > 'äbc' COLLATE "sv-x-icu" AS "false";
+ false 
+-------
+ f
+(1 row)
+
+-- upper/lower
+CREATE TABLE collate_test10 (
+    a int,
+    x text COLLATE "en-x-icu",
+    y text COLLATE "tr-x-icu"
+);
+INSERT INTO collate_test10 VALUES (1, 'hij', 'hij'), (2, 'HIJ', 'HIJ');
+SELECT a, lower(x), lower(y), upper(x), upper(y), initcap(x), initcap(y) FROM collate_test10;
+ a | lower | lower | upper | upper | initcap | initcap 
+---+-------+-------+-------+-------+---------+---------
+ 1 | hij   | hij   | HIJ   | HİJ   | Hij     | Hij
+ 2 | hij   | hıj   | HIJ   | HIJ   | Hij     | Hıj
+(2 rows)
+
+SELECT a, lower(x COLLATE "C"), lower(y COLLATE "C") FROM collate_test10;
+ a | lower | lower 
+---+-------+-------
+ 1 | hij   | hij
+ 2 | hij   | hij
+(2 rows)
+
+SELECT a, x, y FROM collate_test10 ORDER BY lower(y), a;
+ a |  x  |  y  
+---+-----+-----
+ 2 | HIJ | HIJ
+ 1 | hij | hij
+(2 rows)
+
+-- LIKE/ILIKE
+SELECT * FROM collate_test1 WHERE b LIKE 'abc';
+ a |  b  
+---+-----
+ 1 | abc
+(1 row)
+
+SELECT * FROM collate_test1 WHERE b LIKE 'abc%';
+ a |  b  
+---+-----
+ 1 | abc
+(1 row)
+
+SELECT * FROM collate_test1 WHERE b LIKE '%bc%';
+ a |  b  
+---+-----
+ 1 | abc
+ 2 | äbc
+ 3 | bbc
+(3 rows)
+
+SELECT * FROM collate_test1 WHERE b ILIKE 'abc';
+ a |  b  
+---+-----
+ 1 | abc
+ 4 | ABC
+(2 rows)
+
+SELECT * FROM collate_test1 WHERE b ILIKE 'abc%';
+ a |  b  
+---+-----
+ 1 | abc
+ 4 | ABC
+(2 rows)
+
+SELECT * FROM collate_test1 WHERE b ILIKE '%bc%';
+ a |  b  
+---+-----
+ 1 | abc
+ 2 | äbc
+ 3 | bbc
+ 4 | ABC
+(4 rows)
+
+SELECT 'Türkiye' COLLATE "en-x-icu" ILIKE '%KI%' AS "true";
+ true 
+------
+ t
+(1 row)
+
+SELECT 'Türkiye' COLLATE "tr-x-icu" ILIKE '%KI%' AS "false";
+ false 
+-------
+ f
+(1 row)
+
+SELECT 'bıt' ILIKE 'BIT' COLLATE "en-x-icu" AS "false";
+ false 
+-------
+ f
+(1 row)
+
+SELECT 'bıt' ILIKE 'BIT' COLLATE "tr-x-icu" AS "true";
+ true 
+------
+ t
+(1 row)
+
+-- The following actually exercises the selectivity estimation for ILIKE.
+SELECT relname FROM pg_class WHERE relname ILIKE 'abc%';
+ relname 
+---------
+(0 rows)
+
+-- regular expressions
+SELECT * FROM collate_test1 WHERE b ~ '^abc$';
+ a |  b  
+---+-----
+ 1 | abc
+(1 row)
+
+SELECT * FROM collate_test1 WHERE b ~ '^abc';
+ a |  b  
+---+-----
+ 1 | abc
+(1 row)
+
+SELECT * FROM collate_test1 WHERE b ~ 'bc';
+ a |  b  
+---+-----
+ 1 | abc
+ 2 | äbc
+ 3 | bbc
+(3 rows)
+
+SELECT * FROM collate_test1 WHERE b ~* '^abc$';
+ a |  b  
+---+-----
+ 1 | abc
+ 4 | ABC
+(2 rows)
+
+SELECT * FROM collate_test1 WHERE b ~* '^abc';
+ a |  b  
+---+-----
+ 1 | abc
+ 4 | ABC
+(2 rows)
+
+SELECT * FROM collate_test1 WHERE b ~* 'bc';
+ a |  b  
+---+-----
+ 1 | abc
+ 2 | äbc
+ 3 | bbc
+ 4 | ABC
+(4 rows)
+
+CREATE TABLE collate_test6 (
+    a int,
+    b text COLLATE "en-x-icu"
+);
+INSERT INTO collate_test6 VALUES (1, 'abc'), (2, 'ABC'), (3, '123'), (4, 'ab1'),
+                                 (5, 'a1!'), (6, 'a c'), (7, '!.;'), (8, '   '),
+                                 (9, 'äbç'), (10, 'ÄBÇ');
+SELECT b,
+       b ~ '^[[:alpha:]]+$' AS is_alpha,
+       b ~ '^[[:upper:]]+$' AS is_upper,
+       b ~ '^[[:lower:]]+$' AS is_lower,
+       b ~ '^[[:digit:]]+$' AS is_digit,
+       b ~ '^[[:alnum:]]+$' AS is_alnum,
+       b ~ '^[[:graph:]]+$' AS is_graph,
+       b ~ '^[[:print:]]+$' AS is_print,
+       b ~ '^[[:punct:]]+$' AS is_punct,
+       b ~ '^[[:space:]]+$' AS is_space
+FROM collate_test6;
+  b  | is_alpha | is_upper | is_lower | is_digit | is_alnum | is_graph | is_print | is_punct | is_space 
+-----+----------+----------+----------+----------+----------+----------+----------+----------+----------
+ abc | t        | f        | t        | f        | t        | t        | t        | f        | f
+ ABC | t        | t        | f        | f        | t        | t        | t        | f        | f
+ 123 | f        | f        | f        | t        | t        | t        | t        | f        | f
+ ab1 | f        | f        | f        | f        | t        | t        | t        | f        | f
+ a1! | f        | f        | f        | f        | f        | t        | t        | f        | f
+ a c | f        | f        | f        | f        | f        | f        | t        | f        | f
+ !.; | f        | f        | f        | f        | f        | t        | t        | t        | f
+     | f        | f        | f        | f        | f        | f        | t        | f        | t
+ äbç | t        | f        | t        | f        | t        | t        | t        | f        | f
+ ÄBÇ | t        | t        | f        | f        | t        | t        | t        | f        | f
+(10 rows)
+
+SELECT 'Türkiye' COLLATE "en-x-icu" ~* 'KI' AS "true";
+ true 
+------
+ t
+(1 row)
+
+SELECT 'Türkiye' COLLATE "tr-x-icu" ~* 'KI' AS "true";  -- true with ICU
+ true 
+------
+ t
+(1 row)
+
+SELECT 'bıt' ~* 'BIT' COLLATE "en-x-icu" AS "false";
+ false 
+-------
+ f
+(1 row)
+
+SELECT 'bıt' ~* 'BIT' COLLATE "tr-x-icu" AS "false";  -- false with ICU
+ false 
+-------
+ f
+(1 row)
+
+-- The following actually exercises the selectivity estimation for ~*.
+SELECT relname FROM pg_class WHERE relname ~* '^abc';
+ relname 
+---------
+(0 rows)
+
+/* not run by default because it requires tr_TR system locale
+-- to_char
+
+SET lc_time TO 'tr_TR';
+SELECT to_char(date '2010-04-01', 'DD TMMON YYYY');
+SELECT to_char(date '2010-04-01', 'DD TMMON YYYY' COLLATE "tr-x-icu");
+*/
+-- backwards parsing
+CREATE VIEW collview1 AS SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc';
+CREATE VIEW collview2 AS SELECT a, b FROM collate_test1 ORDER BY b COLLATE "C";
+CREATE VIEW collview3 AS SELECT a, lower((x || x) COLLATE "C") FROM collate_test10;
+SELECT table_name, view_definition FROM information_schema.views
+  WHERE table_name LIKE 'collview%' ORDER BY 1;
+ table_name |                             view_definition                              
+------------+--------------------------------------------------------------------------
+ collview1  |  SELECT collate_test1.a,                                                +
+            |     collate_test1.b                                                     +
+            |    FROM collate_test1                                                   +
+            |   WHERE ((collate_test1.b COLLATE "C") >= 'bbc'::text);
+ collview2  |  SELECT collate_test1.a,                                                +
+            |     collate_test1.b                                                     +
+            |    FROM collate_test1                                                   +
+            |   ORDER BY (collate_test1.b COLLATE "C");
+ collview3  |  SELECT collate_test10.a,                                               +
+            |     lower(((collate_test10.x || collate_test10.x) COLLATE "C")) AS lower+
+            |    FROM collate_test10;
+(3 rows)
+
+-- collation propagation in various expression types
+SELECT a, coalesce(b, 'foo') FROM collate_test1 ORDER BY 2;
+ a | coalesce 
+---+----------
+ 1 | abc
+ 4 | ABC
+ 2 | äbc
+ 3 | bbc
+(4 rows)
+
+SELECT a, coalesce(b, 'foo') FROM collate_test2 ORDER BY 2;
+ a | coalesce 
+---+----------
+ 1 | abc
+ 4 | ABC
+ 3 | bbc
+ 2 | äbc
+(4 rows)
+
+SELECT a, coalesce(b, 'foo') FROM collate_test3 ORDER BY 2;
+ a | coalesce 
+---+----------
+ 4 | ABC
+ 1 | abc
+ 3 | bbc
+ 2 | äbc
+(4 rows)
+
+SELECT a, lower(coalesce(x, 'foo')), lower(coalesce(y, 'foo')) FROM collate_test10;
+ a | lower | lower 
+---+-------+-------
+ 1 | hij   | hij
+ 2 | hij   | hıj
+(2 rows)
+
+SELECT a, b, greatest(b, 'CCC') FROM collate_test1 ORDER BY 3;
+ a |  b  | greatest 
+---+-----+----------
+ 1 | abc | CCC
+ 2 | äbc | CCC
+ 3 | bbc | CCC
+ 4 | ABC | CCC
+(4 rows)
+
+SELECT a, b, greatest(b, 'CCC') FROM collate_test2 ORDER BY 3;
+ a |  b  | greatest 
+---+-----+----------
+ 1 | abc | CCC
+ 3 | bbc | CCC
+ 4 | ABC | CCC
+ 2 | äbc | äbc
+(4 rows)
+
+SELECT a, b, greatest(b, 'CCC') FROM collate_test3 ORDER BY 3;
+ a |  b  | greatest 
+---+-----+----------
+ 4 | ABC | CCC
+ 1 | abc | abc
+ 3 | bbc | bbc
+ 2 | äbc | äbc
+(4 rows)
+
+SELECT a, x, y, lower(greatest(x, 'foo')), lower(greatest(y, 'foo')) FROM collate_test10;
+ a |  x  |  y  | lower | lower 
+---+-----+-----+-------+-------
+ 1 | hij | hij | hij   | hij
+ 2 | HIJ | HIJ | hij   | hıj
+(2 rows)
+
+SELECT a, nullif(b, 'abc') FROM collate_test1 ORDER BY 2;
+ a | nullif 
+---+--------
+ 4 | ABC
+ 2 | äbc
+ 3 | bbc
+ 1 | 
+(4 rows)
+
+SELECT a, nullif(b, 'abc') FROM collate_test2 ORDER BY 2;
+ a | nullif 
+---+--------
+ 4 | ABC
+ 3 | bbc
+ 2 | äbc
+ 1 | 
+(4 rows)
+
+SELECT a, nullif(b, 'abc') FROM collate_test3 ORDER BY 2;
+ a | nullif 
+---+--------
+ 4 | ABC
+ 3 | bbc
+ 2 | äbc
+ 1 | 
+(4 rows)
+
+SELECT a, lower(nullif(x, 'foo')), lower(nullif(y, 'foo')) FROM collate_test10;
+ a | lower | lower 
+---+-------+-------
+ 1 | hij   | hij
+ 2 | hij   | hıj
+(2 rows)
+
+SELECT a, CASE b WHEN 'abc' THEN 'abcd' ELSE b END FROM collate_test1 ORDER BY 2;
+ a |  b   
+---+------
+ 4 | ABC
+ 2 | äbc
+ 1 | abcd
+ 3 | bbc
+(4 rows)
+
+SELECT a, CASE b WHEN 'abc' THEN 'abcd' ELSE b END FROM collate_test2 ORDER BY 2;
+ a |  b   
+---+------
+ 4 | ABC
+ 1 | abcd
+ 3 | bbc
+ 2 | äbc
+(4 rows)
+
+SELECT a, CASE b WHEN 'abc' THEN 'abcd' ELSE b END FROM collate_test3 ORDER BY 2;
+ a |  b   
+---+------
+ 4 | ABC
+ 1 | abcd
+ 3 | bbc
+ 2 | äbc
+(4 rows)
+
+CREATE DOMAIN testdomain AS text;
+SELECT a, b::testdomain FROM collate_test1 ORDER BY 2;
+ a |  b  
+---+-----
+ 1 | abc
+ 4 | ABC
+ 2 | äbc
+ 3 | bbc
+(4 rows)
+
+SELECT a, b::testdomain FROM collate_test2 ORDER BY 2;
+ a |  b  
+---+-----
+ 1 | abc
+ 4 | ABC
+ 3 | bbc
+ 2 | äbc
+(4 rows)
+
+SELECT a, b::testdomain FROM collate_test3 ORDER BY 2;
+ a |  b  
+---+-----
+ 4 | ABC
+ 1 | abc
+ 3 | bbc
+ 2 | äbc
+(4 rows)
+
+SELECT a, b::testdomain_sv FROM collate_test3 ORDER BY 2;
+ a |  b  
+---+-----
+ 1 | abc
+ 4 | ABC
+ 3 | bbc
+ 2 | äbc
+(4 rows)
+
+SELECT a, lower(x::testdomain), lower(y::testdomain) FROM collate_test10;
+ a | lower | lower 
+---+-------+-------
+ 1 | hij   | hij
+ 2 | hij   | hıj
+(2 rows)
+
+SELECT min(b), max(b) FROM collate_test1;
+ min | max 
+-----+-----
+ abc | bbc
+(1 row)
+
+SELECT min(b), max(b) FROM collate_test2;
+ min | max 
+-----+-----
+ abc | äbc
+(1 row)
+
+SELECT min(b), max(b) FROM collate_test3;
+ min | max 
+-----+-----
+ ABC | äbc
+(1 row)
+
+SELECT array_agg(b ORDER BY b) FROM collate_test1;
+     array_agg     
+-------------------
+ {abc,ABC,äbc,bbc}
+(1 row)
+
+SELECT array_agg(b ORDER BY b) FROM collate_test2;
+     array_agg     
+-------------------
+ {abc,ABC,bbc,äbc}
+(1 row)
+
+SELECT array_agg(b ORDER BY b) FROM collate_test3;
+     array_agg     
+-------------------
+ {ABC,abc,bbc,äbc}
+(1 row)
+
+SELECT a, b FROM collate_test1 UNION ALL SELECT a, b FROM collate_test1 ORDER BY 2;
+ a |  b  
+---+-----
+ 1 | abc
+ 1 | abc
+ 4 | ABC
+ 4 | ABC
+ 2 | äbc
+ 2 | äbc
+ 3 | bbc
+ 3 | bbc
+(8 rows)
+
+SELECT a, b FROM collate_test2 UNION SELECT a, b FROM collate_test2 ORDER BY 2;
+ a |  b  
+---+-----
+ 1 | abc
+ 4 | ABC
+ 3 | bbc
+ 2 | äbc
+(4 rows)
+
+SELECT a, b FROM collate_test3 WHERE a < 4 INTERSECT SELECT a, b FROM collate_test3 WHERE a > 1 ORDER BY 2;
+ a |  b  
+---+-----
+ 3 | bbc
+ 2 | äbc
+(2 rows)
+
+SELECT a, b FROM collate_test3 EXCEPT SELECT a, b FROM collate_test3 WHERE a < 2 ORDER BY 2;
+ a |  b  
+---+-----
+ 4 | ABC
+ 3 | bbc
+ 2 | äbc
+(3 rows)
+
+SELECT a, b FROM collate_test1 UNION ALL SELECT a, b FROM collate_test3 ORDER BY 2; -- fail
+ERROR:  could not determine which collation to use for string comparison
+HINT:  Use the COLLATE clause to set the collation explicitly.
+SELECT a, b FROM collate_test1 UNION ALL SELECT a, b FROM collate_test3; -- ok
+ a |  b  
+---+-----
+ 1 | abc
+ 2 | äbc
+ 3 | bbc
+ 4 | ABC
+ 1 | abc
+ 2 | äbc
+ 3 | bbc
+ 4 | ABC
+(8 rows)
+
+SELECT a, b FROM collate_test1 UNION SELECT a, b FROM collate_test3 ORDER BY 2; -- fail
+ERROR:  collation mismatch between implicit collations "en-x-icu" and "C"
+LINE 1: SELECT a, b FROM collate_test1 UNION SELECT a, b FROM collat...
+                                                       ^
+HINT:  You can choose the collation by applying the COLLATE clause to one or both expressions.
+SELECT a, b COLLATE "C" FROM collate_test1 UNION SELECT a, b FROM collate_test3 ORDER BY 2; -- ok
+ a |  b  
+---+-----
+ 4 | ABC
+ 1 | abc
+ 3 | bbc
+ 2 | äbc
+(4 rows)
+
+SELECT a, b FROM collate_test1 INTERSECT SELECT a, b FROM collate_test3 ORDER BY 2; -- fail
+ERROR:  collation mismatch between implicit collations "en-x-icu" and "C"
+LINE 1: ...ELECT a, b FROM collate_test1 INTERSECT SELECT a, b FROM col...
+                                                             ^
+HINT:  You can choose the collation by applying the COLLATE clause to one or both expressions.
+SELECT a, b FROM collate_test1 EXCEPT SELECT a, b FROM collate_test3 ORDER BY 2; -- fail
+ERROR:  collation mismatch between implicit collations "en-x-icu" and "C"
+LINE 1: SELECT a, b FROM collate_test1 EXCEPT SELECT a, b FROM colla...
+                                                        ^
+HINT:  You can choose the collation by applying the COLLATE clause to one or both expressions.
+CREATE TABLE test_u AS SELECT a, b FROM collate_test1 UNION ALL SELECT a, b FROM collate_test3; -- fail
+ERROR:  no collation was derived for column "b" with collatable type text
+HINT:  Use the COLLATE clause to set the collation explicitly.
+-- ideally this would be a parse-time error, but for now it must be run-time:
+select x < y from collate_test10; -- fail
+ERROR:  could not determine which collation to use for string comparison
+HINT:  Use the COLLATE clause to set the collation explicitly.
+select x || y from collate_test10; -- ok, because || is not collation aware
+ ?column? 
+----------
+ hijhij
+ HIJHIJ
+(2 rows)
+
+select x, y from collate_test10 order by x || y; -- not so ok
+ERROR:  collation mismatch between implicit collations "en-x-icu" and "tr-x-icu"
+LINE 1: select x, y from collate_test10 order by x || y;
+                                                      ^
+HINT:  You can choose the collation by applying the COLLATE clause to one or both expressions.
+-- collation mismatch between recursive and non-recursive term
+WITH RECURSIVE foo(x) AS
+   (SELECT x FROM (VALUES('a' COLLATE "en-x-icu"),('b')) t(x)
+   UNION ALL
+   SELECT (x || 'c') COLLATE "de-x-icu" FROM foo WHERE length(x) < 10)
+SELECT * FROM foo;
+ERROR:  recursive query "foo" column 1 has collation "en-x-icu" in non-recursive term but collation "de-x-icu" overall
+LINE 2:    (SELECT x FROM (VALUES('a' COLLATE "en-x-icu"),('b')) t(x...
+                   ^
+HINT:  Use the COLLATE clause to set the collation of the non-recursive term.
+-- casting
+SELECT CAST('42' AS text COLLATE "C");
+ERROR:  syntax error at or near "COLLATE"
+LINE 1: SELECT CAST('42' AS text COLLATE "C");
+                                 ^
+SELECT a, CAST(b AS varchar) FROM collate_test1 ORDER BY 2;
+ a |  b  
+---+-----
+ 1 | abc
+ 4 | ABC
+ 2 | äbc
+ 3 | bbc
+(4 rows)
+
+SELECT a, CAST(b AS varchar) FROM collate_test2 ORDER BY 2;
+ a |  b  
+---+-----
+ 1 | abc
+ 4 | ABC
+ 3 | bbc
+ 2 | äbc
+(4 rows)
+
+SELECT a, CAST(b AS varchar) FROM collate_test3 ORDER BY 2;
+ a |  b  
+---+-----
+ 4 | ABC
+ 1 | abc
+ 3 | bbc
+ 2 | äbc
+(4 rows)
+
+-- propagation of collation in SQL functions (inlined and non-inlined cases)
+-- and plpgsql functions too
+CREATE FUNCTION mylt (text, text) RETURNS boolean LANGUAGE sql
+    AS $$ select $1 < $2 $$;
+CREATE FUNCTION mylt_noninline (text, text) RETURNS boolean LANGUAGE sql
+    AS $$ select $1 < $2 limit 1 $$;
+CREATE FUNCTION mylt_plpgsql (text, text) RETURNS boolean LANGUAGE plpgsql
+    AS $$ begin return $1 < $2; end $$;
+SELECT a.b AS a, b.b AS b, a.b < b.b AS lt,
+       mylt(a.b, b.b), mylt_noninline(a.b, b.b), mylt_plpgsql(a.b, b.b)
+FROM collate_test1 a, collate_test1 b
+ORDER BY a.b, b.b;
+  a  |  b  | lt | mylt | mylt_noninline | mylt_plpgsql 
+-----+-----+----+------+----------------+--------------
+ abc | abc | f  | f    | f              | f
+ abc | ABC | t  | t    | t              | t
+ abc | äbc | t  | t    | t              | t
+ abc | bbc | t  | t    | t              | t
+ ABC | abc | f  | f    | f              | f
+ ABC | ABC | f  | f    | f              | f
+ ABC | äbc | t  | t    | t              | t
+ ABC | bbc | t  | t    | t              | t
+ äbc | abc | f  | f    | f              | f
+ äbc | ABC | f  | f    | f              | f
+ äbc | äbc | f  | f    | f              | f
+ äbc | bbc | t  | t    | t              | t
+ bbc | abc | f  | f    | f              | f
+ bbc | ABC | f  | f    | f              | f
+ bbc | äbc | f  | f    | f              | f
+ bbc | bbc | f  | f    | f              | f
+(16 rows)
+
+SELECT a.b AS a, b.b AS b, a.b < b.b COLLATE "C" AS lt,
+       mylt(a.b, b.b COLLATE "C"), mylt_noninline(a.b, b.b COLLATE "C"),
+       mylt_plpgsql(a.b, b.b COLLATE "C")
+FROM collate_test1 a, collate_test1 b
+ORDER BY a.b, b.b;
+  a  |  b  | lt | mylt | mylt_noninline | mylt_plpgsql 
+-----+-----+----+------+----------------+--------------
+ abc | abc | f  | f    | f              | f
+ abc | ABC | f  | f    | f              | f
+ abc | äbc | t  | t    | t              | t
+ abc | bbc | t  | t    | t              | t
+ ABC | abc | t  | t    | t              | t
+ ABC | ABC | f  | f    | f              | f
+ ABC | äbc | t  | t    | t              | t
+ ABC | bbc | t  | t    | t              | t
+ äbc | abc | f  | f    | f              | f
+ äbc | ABC | f  | f    | f              | f
+ äbc | äbc | f  | f    | f              | f
+ äbc | bbc | f  | f    | f              | f
+ bbc | abc | f  | f    | f              | f
+ bbc | ABC | f  | f    | f              | f
+ bbc | äbc | t  | t    | t              | t
+ bbc | bbc | f  | f    | f              | f
+(16 rows)
+
+-- collation override in plpgsql
+CREATE FUNCTION mylt2 (x text, y text) RETURNS boolean LANGUAGE plpgsql AS $$
+declare
+  xx text := x;
+  yy text := y;
+begin
+  return xx < yy;
+end
+$$;
+SELECT mylt2('a', 'B' collate "en-x-icu") as t, mylt2('a', 'B' collate "C") as f;
+ t | f 
+---+---
+ t | f
+(1 row)
+
+CREATE OR REPLACE FUNCTION
+  mylt2 (x text, y text) RETURNS boolean LANGUAGE plpgsql AS $$
+declare
+  xx text COLLATE "POSIX" := x;
+  yy text := y;
+begin
+  return xx < yy;
+end
+$$;
+SELECT mylt2('a', 'B') as f;
+ f 
+---
+ f
+(1 row)
+
+SELECT mylt2('a', 'B' collate "C") as fail; -- conflicting collations
+ERROR:  could not determine which collation to use for string comparison
+HINT:  Use the COLLATE clause to set the collation explicitly.
+CONTEXT:  PL/pgSQL function mylt2(text,text) line 6 at RETURN
+SELECT mylt2('a', 'B' collate "POSIX") as f;
+ f 
+---
+ f
+(1 row)
+
+-- polymorphism
+SELECT * FROM unnest((SELECT array_agg(b ORDER BY b) FROM collate_test1)) ORDER BY 1;
+ unnest 
+--------
+ abc
+ ABC
+ äbc
+ bbc
+(4 rows)
+
+SELECT * FROM unnest((SELECT array_agg(b ORDER BY b) FROM collate_test2)) ORDER BY 1;
+ unnest 
+--------
+ abc
+ ABC
+ bbc
+ äbc
+(4 rows)
+
+SELECT * FROM unnest((SELECT array_agg(b ORDER BY b) FROM collate_test3)) ORDER BY 1;
+ unnest 
+--------
+ ABC
+ abc
+ bbc
+ äbc
+(4 rows)
+
+CREATE FUNCTION dup (anyelement) RETURNS anyelement
+    AS 'select $1' LANGUAGE sql;
+SELECT a, dup(b) FROM collate_test1 ORDER BY 2;
+ a | dup 
+---+-----
+ 1 | abc
+ 4 | ABC
+ 2 | äbc
+ 3 | bbc
+(4 rows)
+
+SELECT a, dup(b) FROM collate_test2 ORDER BY 2;
+ a | dup 
+---+-----
+ 1 | abc
+ 4 | ABC
+ 3 | bbc
+ 2 | äbc
+(4 rows)
+
+SELECT a, dup(b) FROM collate_test3 ORDER BY 2;
+ a | dup 
+---+-----
+ 4 | ABC
+ 1 | abc
+ 3 | bbc
+ 2 | äbc
+(4 rows)
+
+-- indexes
+CREATE INDEX collate_test1_idx1 ON collate_test1 (b);
+CREATE INDEX collate_test1_idx2 ON collate_test1 (b COLLATE "C");
+CREATE INDEX collate_test1_idx3 ON collate_test1 ((b COLLATE "C")); -- this is different grammatically
+CREATE INDEX collate_test1_idx4 ON collate_test1 (((b||'foo') COLLATE "POSIX"));
+CREATE INDEX collate_test1_idx5 ON collate_test1 (a COLLATE "C"); -- fail
+ERROR:  collations are not supported by type integer
+CREATE INDEX collate_test1_idx6 ON collate_test1 ((a COLLATE "C")); -- fail
+ERROR:  collations are not supported by type integer
+LINE 1: ...ATE INDEX collate_test1_idx6 ON collate_test1 ((a COLLATE "C...
+                                                             ^
+SELECT relname, pg_get_indexdef(oid) FROM pg_class WHERE relname LIKE 'collate_test%_idx%' ORDER BY 1;
+      relname       |                                           pg_get_indexdef                                           
+--------------------+-----------------------------------------------------------------------------------------------------
+ collate_test1_idx1 | CREATE INDEX collate_test1_idx1 ON collate_test1 USING btree (b)
+ collate_test1_idx2 | CREATE INDEX collate_test1_idx2 ON collate_test1 USING btree (b COLLATE "C")
+ collate_test1_idx3 | CREATE INDEX collate_test1_idx3 ON collate_test1 USING btree (b COLLATE "C")
+ collate_test1_idx4 | CREATE INDEX collate_test1_idx4 ON collate_test1 USING btree (((b || 'foo'::text)) COLLATE "POSIX")
+(4 rows)
+
+-- schema manipulation commands
+CREATE ROLE regress_test_role;
+CREATE SCHEMA test_schema;
+-- We need to do this this way to cope with varying names for encodings:
+do $$
+BEGIN
+  EXECUTE 'CREATE COLLATION test0 (provider = icu, locale = ' ||
+          quote_literal(current_setting('lc_collate')) || ');';
+END
+$$;
+CREATE COLLATION test0 FROM "C"; -- fail, duplicate name
+ERROR:  collation "test0" already exists
+do $$
+BEGIN
+  EXECUTE 'CREATE COLLATION test1 (provider = icu, lc_collate = ' ||
+          quote_literal(current_setting('lc_collate')) ||
+          ', lc_ctype = ' ||
+          quote_literal(current_setting('lc_ctype')) || ');';
+END
+$$;
+CREATE COLLATION test3 (provider = icu, lc_collate = 'en_US.utf8'); -- fail, need lc_ctype
+ERROR:  parameter "lc_ctype" must be specified
+CREATE COLLATION testx (provider = icu, locale = 'nonsense'); /* never fails with ICU */  DROP COLLATION testx;
+CREATE COLLATION test4 FROM nonsense;
+ERROR:  collation "nonsense" for encoding "UTF8" does not exist
+CREATE COLLATION test5 FROM test0;
+SELECT collname FROM pg_collation WHERE collname LIKE 'test%' ORDER BY 1;
+ collname 
+----------
+ test0
+ test1
+ test5
+(3 rows)
+
+ALTER COLLATION test1 RENAME TO test11;
+ALTER COLLATION test0 RENAME TO test11; -- fail
+ERROR:  collation "test11" already exists in schema "collate_tests"
+ALTER COLLATION test1 RENAME TO test22; -- fail
+ERROR:  collation "test1" for encoding "UTF8" does not exist
+ALTER COLLATION test11 OWNER TO regress_test_role;
+ALTER COLLATION test11 OWNER TO nonsense;
+ERROR:  role "nonsense" does not exist
+ALTER COLLATION test11 SET SCHEMA test_schema;
+COMMENT ON COLLATION test0 IS 'US English';
+SELECT collname, nspname, obj_description(pg_collation.oid, 'pg_collation')
+    FROM pg_collation JOIN pg_namespace ON (collnamespace = pg_namespace.oid)
+    WHERE collname LIKE 'test%'
+    ORDER BY 1;
+ collname |    nspname    | obj_description 
+----------+---------------+-----------------
+ test0    | collate_tests | US English
+ test11   | test_schema   | 
+ test5    | collate_tests | 
+(3 rows)
+
+DROP COLLATION test0, test_schema.test11, test5;
+DROP COLLATION test0; -- fail
+ERROR:  collation "test0" for encoding "UTF8" does not exist
+DROP COLLATION IF EXISTS test0;
+NOTICE:  collation "test0" does not exist, skipping
+SELECT collname FROM pg_collation WHERE collname LIKE 'test%';
+ collname 
+----------
+(0 rows)
+
+DROP SCHEMA test_schema;
+DROP ROLE regress_test_role;
+-- ALTER
+ALTER COLLATION "en-x-icu" REFRESH VERSION;
+NOTICE:  version has not changed
+-- dependencies
+CREATE COLLATION test0 FROM "C";
+CREATE TABLE collate_dep_test1 (a int, b text COLLATE test0);
+CREATE DOMAIN collate_dep_dom1 AS text COLLATE test0;
+CREATE TYPE collate_dep_test2 AS (x int, y text COLLATE test0);
+CREATE VIEW collate_dep_test3 AS SELECT text 'foo' COLLATE test0 AS foo;
+CREATE TABLE collate_dep_test4t (a int, b text);
+CREATE INDEX collate_dep_test4i ON collate_dep_test4t (b COLLATE test0);
+DROP COLLATION test0 RESTRICT; -- fail
+ERROR:  cannot drop collation test0 because other objects depend on it
+DETAIL:  table collate_dep_test1 column b depends on collation test0
+type collate_dep_dom1 depends on collation test0
+composite type collate_dep_test2 column y depends on collation test0
+view collate_dep_test3 depends on collation test0
+index collate_dep_test4i depends on collation test0
+HINT:  Use DROP ... CASCADE to drop the dependent objects too.
+DROP COLLATION test0 CASCADE;
+NOTICE:  drop cascades to 5 other objects
+DETAIL:  drop cascades to table collate_dep_test1 column b
+drop cascades to type collate_dep_dom1
+drop cascades to composite type collate_dep_test2 column y
+drop cascades to view collate_dep_test3
+drop cascades to index collate_dep_test4i
+\d collate_dep_test1
+      Table "collate_tests.collate_dep_test1"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+
+\d collate_dep_test2
+ Composite type "collate_tests.collate_dep_test2"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ x      | integer |           |          | 
+
+DROP TABLE collate_dep_test1, collate_dep_test4t;
+DROP TYPE collate_dep_test2;
+-- test range types and collations
+create type textrange_c as range(subtype=text, collation="C");
+create type textrange_en_us as range(subtype=text, collation="en-x-icu");
+select textrange_c('A','Z') @> 'b'::text;
+ ?column? 
+----------
+ f
+(1 row)
+
+select textrange_en_us('A','Z') @> 'b'::text;
+ ?column? 
+----------
+ t
+(1 row)
+
+drop type textrange_c;
+drop type textrange_en_us;
+-- cleanup
+DROP SCHEMA collate_tests CASCADE;
+NOTICE:  drop cascades to 18 other objects
+DETAIL:  drop cascades to table collate_test1
+drop cascades to table collate_test_like
+drop cascades to table collate_test2
+drop cascades to table collate_test3
+drop cascades to type testdomain_sv
+drop cascades to table collate_test4
+drop cascades to table collate_test5
+drop cascades to table collate_test10
+drop cascades to table collate_test6
+drop cascades to view collview1
+drop cascades to view collview2
+drop cascades to view collview3
+drop cascades to type testdomain
+drop cascades to function mylt(text,text)
+drop cascades to function mylt_noninline(text,text)
+drop cascades to function mylt_plpgsql(text,text)
+drop cascades to function mylt2(text,text)
+drop cascades to function dup(anyelement)
+RESET search_path;
+-- leave a collation for pg_upgrade test
+CREATE COLLATION coll_icu_upgrade FROM "und-x-icu";
index 293e78641ecc6bc23520a27125df70b6e1d41f77..26275c3fb3ee35b3315487a0da7fcd743d0806bf 100644 (file)
@@ -4,12 +4,14 @@
  * because other encodings don't support all the characters used.
  */
 SET client_encoding TO UTF8;
+CREATE SCHEMA collate_tests;
+SET search_path = collate_tests;
 CREATE TABLE collate_test1 (
     a int,
     b text COLLATE "en_US" NOT NULL
 );
 \d collate_test1
-           Table "public.collate_test1"
+        Table "collate_tests.collate_test1"
  Column |  Type   | Collation | Nullable | Default 
 --------+---------+-----------+----------+---------
  a      | integer |           |          | 
@@ -40,7 +42,7 @@ CREATE TABLE collate_test_like (
     LIKE collate_test1
 );
 \d collate_test_like
-         Table "public.collate_test_like"
+      Table "collate_tests.collate_test_like"
  Column |  Type   | Collation | Nullable | Default 
 --------+---------+-----------+----------+---------
  a      | integer |           |          | 
@@ -364,6 +366,38 @@ SELECT * FROM collate_test1 WHERE b ~* 'bc';
  4 | ABC
 (4 rows)
 
+CREATE TABLE collate_test6 (
+    a int,
+    b text COLLATE "en_US"
+);
+INSERT INTO collate_test6 VALUES (1, 'abc'), (2, 'ABC'), (3, '123'), (4, 'ab1'),
+                                 (5, 'a1!'), (6, 'a c'), (7, '!.;'), (8, '   '),
+                                 (9, 'äbç'), (10, 'ÄBÇ');
+SELECT b,
+       b ~ '^[[:alpha:]]+$' AS is_alpha,
+       b ~ '^[[:upper:]]+$' AS is_upper,
+       b ~ '^[[:lower:]]+$' AS is_lower,
+       b ~ '^[[:digit:]]+$' AS is_digit,
+       b ~ '^[[:alnum:]]+$' AS is_alnum,
+       b ~ '^[[:graph:]]+$' AS is_graph,
+       b ~ '^[[:print:]]+$' AS is_print,
+       b ~ '^[[:punct:]]+$' AS is_punct,
+       b ~ '^[[:space:]]+$' AS is_space
+FROM collate_test6;
+  b  | is_alpha | is_upper | is_lower | is_digit | is_alnum | is_graph | is_print | is_punct | is_space 
+-----+----------+----------+----------+----------+----------+----------+----------+----------+----------
+ abc | t        | f        | t        | f        | t        | t        | t        | f        | f
+ ABC | t        | t        | f        | f        | t        | t        | t        | f        | f
+ 123 | f        | f        | f        | t        | t        | t        | t        | f        | f
+ ab1 | f        | f        | f        | f        | t        | t        | t        | f        | f
+ a1! | f        | f        | f        | f        | f        | t        | t        | f        | f
+ a c | f        | f        | f        | f        | f        | f        | t        | f        | f
+ !.; | f        | f        | f        | f        | f        | t        | t        | t        | f
+     | f        | f        | f        | f        | f        | f        | t        | f        | t
+ äbç | t        | f        | t        | f        | t        | t        | t        | f        | f
+ ÄBÇ | t        | t        | f        | f        | t        | t        | t        | f        | f
+(10 rows)
+
 SELECT 'Türkiye' COLLATE "en_US" ~* 'KI' AS "true";
  true 
 ------
@@ -980,6 +1014,8 @@ ERROR:  parameter "lc_ctype" must be specified
 CREATE COLLATION testx (locale = 'nonsense'); -- fail
 ERROR:  could not create locale "nonsense": No such file or directory
 DETAIL:  The operating system could not find any locale data for the locale name "nonsense".
+CREATE COLLATION testy (locale = 'en_US.utf8', version = 'foo'); -- fail, no versions for libc
+ERROR:  collation "testy" has no actual version, but a version was specified
 CREATE COLLATION test4 FROM nonsense;
 ERROR:  collation "nonsense" for encoding "UTF8" does not exist
 CREATE COLLATION test5 FROM test0;
@@ -993,7 +1029,7 @@ SELECT collname FROM pg_collation WHERE collname LIKE 'test%' ORDER BY 1;
 
 ALTER COLLATION test1 RENAME TO test11;
 ALTER COLLATION test0 RENAME TO test11; -- fail
-ERROR:  collation "test11" for encoding "UTF8" already exists in schema "public"
+ERROR:  collation "test11" for encoding "UTF8" already exists in schema "collate_tests"
 ALTER COLLATION test1 RENAME TO test22; -- fail
 ERROR:  collation "test1" for encoding "UTF8" does not exist
 ALTER COLLATION test11 OWNER TO regress_test_role;
@@ -1005,11 +1041,11 @@ SELECT collname, nspname, obj_description(pg_collation.oid, 'pg_collation')
     FROM pg_collation JOIN pg_namespace ON (collnamespace = pg_namespace.oid)
     WHERE collname LIKE 'test%'
     ORDER BY 1;
- collname |   nspname   | obj_description 
-----------+-------------+-----------------
- test0    | public      | US English
- test11   | test_schema | 
- test5    | public      | 
+ collname |    nspname    | obj_description 
+----------+---------------+-----------------
+ test0    | collate_tests | US English
+ test11   | test_schema   
+ test5    | collate_tests | 
 (3 rows)
 
 DROP COLLATION test0, test_schema.test11, test5;
@@ -1024,6 +1060,9 @@ SELECT collname FROM pg_collation WHERE collname LIKE 'test%';
 
 DROP SCHEMA test_schema;
 DROP ROLE regress_test_role;
+-- ALTER
+ALTER COLLATION "en_US" REFRESH VERSION;
+NOTICE:  version has not changed
 -- dependencies
 CREATE COLLATION test0 FROM "C";
 CREATE TABLE collate_dep_test1 (a int, b text COLLATE test0);
@@ -1048,13 +1087,13 @@ drop cascades to composite type collate_dep_test2 column y
 drop cascades to view collate_dep_test3
 drop cascades to index collate_dep_test4i
 \d collate_dep_test1
-         Table "public.collate_dep_test1"
+      Table "collate_tests.collate_dep_test1"
  Column |  Type   | Collation | Nullable | Default 
 --------+---------+-----------+----------+---------
  a      | integer |           |          | 
 
 \d collate_dep_test2
    Composite type "public.collate_dep_test2"
Composite type "collate_tests.collate_dep_test2"
  Column |  Type   | Collation | Nullable | Default 
 --------+---------+-----------+----------+---------
  x      | integer |           |          | 
@@ -1078,3 +1117,24 @@ select textrange_en_us('A','Z') @> 'b'::text;
 
 drop type textrange_c;
 drop type textrange_en_us;
+-- cleanup
+DROP SCHEMA collate_tests CASCADE;
+NOTICE:  drop cascades to 18 other objects
+DETAIL:  drop cascades to table collate_test1
+drop cascades to table collate_test_like
+drop cascades to table collate_test2
+drop cascades to table collate_test3
+drop cascades to type testdomain_sv
+drop cascades to table collate_test4
+drop cascades to table collate_test5
+drop cascades to table collate_test10
+drop cascades to table collate_test6
+drop cascades to view collview1
+drop cascades to view collview2
+drop cascades to view collview3
+drop cascades to type testdomain
+drop cascades to function mylt(text,text)
+drop cascades to function mylt_noninline(text,text)
+drop cascades to function mylt_plpgsql(text,text)
+drop cascades to function mylt2(text,text)
+drop cascades to function dup(anyelement)
diff --git a/src/test/regress/sql/collate.icu.sql b/src/test/regress/sql/collate.icu.sql
new file mode 100644 (file)
index 0000000..ef39445
--- /dev/null
@@ -0,0 +1,433 @@
+/*
+ * This test is for ICU collations.
+ */
+
+SET client_encoding TO UTF8;
+
+CREATE SCHEMA collate_tests;
+SET search_path = collate_tests;
+
+
+CREATE TABLE collate_test1 (
+    a int,
+    b text COLLATE "en-x-icu" NOT NULL
+);
+
+\d collate_test1
+
+CREATE TABLE collate_test_fail (
+    a int,
+    b text COLLATE "ja_JP.eucjp-x-icu"
+);
+
+CREATE TABLE collate_test_fail (
+    a int,
+    b text COLLATE "foo-x-icu"
+);
+
+CREATE TABLE collate_test_fail (
+    a int COLLATE "en-x-icu",
+    b text
+);
+
+CREATE TABLE collate_test_like (
+    LIKE collate_test1
+);
+
+\d collate_test_like
+
+CREATE TABLE collate_test2 (
+    a int,
+    b text COLLATE "sv-x-icu"
+);
+
+CREATE TABLE collate_test3 (
+    a int,
+    b text COLLATE "C"
+);
+
+INSERT INTO collate_test1 VALUES (1, 'abc'), (2, 'äbc'), (3, 'bbc'), (4, 'ABC');
+INSERT INTO collate_test2 SELECT * FROM collate_test1;
+INSERT INTO collate_test3 SELECT * FROM collate_test1;
+
+SELECT * FROM collate_test1 WHERE b >= 'bbc';
+SELECT * FROM collate_test2 WHERE b >= 'bbc';
+SELECT * FROM collate_test3 WHERE b >= 'bbc';
+SELECT * FROM collate_test3 WHERE b >= 'BBC';
+
+SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc';
+SELECT * FROM collate_test1 WHERE b >= 'bbc' COLLATE "C";
+SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc' COLLATE "C";
+SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc' COLLATE "en-x-icu";
+
+
+CREATE DOMAIN testdomain_sv AS text COLLATE "sv-x-icu";
+CREATE DOMAIN testdomain_i AS int COLLATE "sv-x-icu"; -- fails
+CREATE TABLE collate_test4 (
+    a int,
+    b testdomain_sv
+);
+INSERT INTO collate_test4 SELECT * FROM collate_test1;
+SELECT a, b FROM collate_test4 ORDER BY b;
+
+CREATE TABLE collate_test5 (
+    a int,
+    b testdomain_sv COLLATE "en-x-icu"
+);
+INSERT INTO collate_test5 SELECT * FROM collate_test1;
+SELECT a, b FROM collate_test5 ORDER BY b;
+
+
+SELECT a, b FROM collate_test1 ORDER BY b;
+SELECT a, b FROM collate_test2 ORDER BY b;
+SELECT a, b FROM collate_test3 ORDER BY b;
+
+SELECT a, b FROM collate_test1 ORDER BY b COLLATE "C";
+
+-- star expansion
+SELECT * FROM collate_test1 ORDER BY b;
+SELECT * FROM collate_test2 ORDER BY b;
+SELECT * FROM collate_test3 ORDER BY b;
+
+-- constant expression folding
+SELECT 'bbc' COLLATE "en-x-icu" > 'äbc' COLLATE "en-x-icu" AS "true";
+SELECT 'bbc' COLLATE "sv-x-icu" > 'äbc' COLLATE "sv-x-icu" AS "false";
+
+-- upper/lower
+
+CREATE TABLE collate_test10 (
+    a int,
+    x text COLLATE "en-x-icu",
+    y text COLLATE "tr-x-icu"
+);
+
+INSERT INTO collate_test10 VALUES (1, 'hij', 'hij'), (2, 'HIJ', 'HIJ');
+
+SELECT a, lower(x), lower(y), upper(x), upper(y), initcap(x), initcap(y) FROM collate_test10;
+SELECT a, lower(x COLLATE "C"), lower(y COLLATE "C") FROM collate_test10;
+
+SELECT a, x, y FROM collate_test10 ORDER BY lower(y), a;
+
+-- LIKE/ILIKE
+
+SELECT * FROM collate_test1 WHERE b LIKE 'abc';
+SELECT * FROM collate_test1 WHERE b LIKE 'abc%';
+SELECT * FROM collate_test1 WHERE b LIKE '%bc%';
+SELECT * FROM collate_test1 WHERE b ILIKE 'abc';
+SELECT * FROM collate_test1 WHERE b ILIKE 'abc%';
+SELECT * FROM collate_test1 WHERE b ILIKE '%bc%';
+
+SELECT 'Türkiye' COLLATE "en-x-icu" ILIKE '%KI%' AS "true";
+SELECT 'Türkiye' COLLATE "tr-x-icu" ILIKE '%KI%' AS "false";
+
+SELECT 'bıt' ILIKE 'BIT' COLLATE "en-x-icu" AS "false";
+SELECT 'bıt' ILIKE 'BIT' COLLATE "tr-x-icu" AS "true";
+
+-- The following actually exercises the selectivity estimation for ILIKE.
+SELECT relname FROM pg_class WHERE relname ILIKE 'abc%';
+
+-- regular expressions
+
+SELECT * FROM collate_test1 WHERE b ~ '^abc$';
+SELECT * FROM collate_test1 WHERE b ~ '^abc';
+SELECT * FROM collate_test1 WHERE b ~ 'bc';
+SELECT * FROM collate_test1 WHERE b ~* '^abc$';
+SELECT * FROM collate_test1 WHERE b ~* '^abc';
+SELECT * FROM collate_test1 WHERE b ~* 'bc';
+
+CREATE TABLE collate_test6 (
+    a int,
+    b text COLLATE "en-x-icu"
+);
+INSERT INTO collate_test6 VALUES (1, 'abc'), (2, 'ABC'), (3, '123'), (4, 'ab1'),
+                                 (5, 'a1!'), (6, 'a c'), (7, '!.;'), (8, '   '),
+                                 (9, 'äbç'), (10, 'ÄBÇ');
+SELECT b,
+       b ~ '^[[:alpha:]]+$' AS is_alpha,
+       b ~ '^[[:upper:]]+$' AS is_upper,
+       b ~ '^[[:lower:]]+$' AS is_lower,
+       b ~ '^[[:digit:]]+$' AS is_digit,
+       b ~ '^[[:alnum:]]+$' AS is_alnum,
+       b ~ '^[[:graph:]]+$' AS is_graph,
+       b ~ '^[[:print:]]+$' AS is_print,
+       b ~ '^[[:punct:]]+$' AS is_punct,
+       b ~ '^[[:space:]]+$' AS is_space
+FROM collate_test6;
+
+SELECT 'Türkiye' COLLATE "en-x-icu" ~* 'KI' AS "true";
+SELECT 'Türkiye' COLLATE "tr-x-icu" ~* 'KI' AS "true";  -- true with ICU
+
+SELECT 'bıt' ~* 'BIT' COLLATE "en-x-icu" AS "false";
+SELECT 'bıt' ~* 'BIT' COLLATE "tr-x-icu" AS "false";  -- false with ICU
+
+-- The following actually exercises the selectivity estimation for ~*.
+SELECT relname FROM pg_class WHERE relname ~* '^abc';
+
+
+/* not run by default because it requires tr_TR system locale
+-- to_char
+
+SET lc_time TO 'tr_TR';
+SELECT to_char(date '2010-04-01', 'DD TMMON YYYY');
+SELECT to_char(date '2010-04-01', 'DD TMMON YYYY' COLLATE "tr-x-icu");
+*/
+
+
+-- backwards parsing
+
+CREATE VIEW collview1 AS SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc';
+CREATE VIEW collview2 AS SELECT a, b FROM collate_test1 ORDER BY b COLLATE "C";
+CREATE VIEW collview3 AS SELECT a, lower((x || x) COLLATE "C") FROM collate_test10;
+
+SELECT table_name, view_definition FROM information_schema.views
+  WHERE table_name LIKE 'collview%' ORDER BY 1;
+
+
+-- collation propagation in various expression types
+
+SELECT a, coalesce(b, 'foo') FROM collate_test1 ORDER BY 2;
+SELECT a, coalesce(b, 'foo') FROM collate_test2 ORDER BY 2;
+SELECT a, coalesce(b, 'foo') FROM collate_test3 ORDER BY 2;
+SELECT a, lower(coalesce(x, 'foo')), lower(coalesce(y, 'foo')) FROM collate_test10;
+
+SELECT a, b, greatest(b, 'CCC') FROM collate_test1 ORDER BY 3;
+SELECT a, b, greatest(b, 'CCC') FROM collate_test2 ORDER BY 3;
+SELECT a, b, greatest(b, 'CCC') FROM collate_test3 ORDER BY 3;
+SELECT a, x, y, lower(greatest(x, 'foo')), lower(greatest(y, 'foo')) FROM collate_test10;
+
+SELECT a, nullif(b, 'abc') FROM collate_test1 ORDER BY 2;
+SELECT a, nullif(b, 'abc') FROM collate_test2 ORDER BY 2;
+SELECT a, nullif(b, 'abc') FROM collate_test3 ORDER BY 2;
+SELECT a, lower(nullif(x, 'foo')), lower(nullif(y, 'foo')) FROM collate_test10;
+
+SELECT a, CASE b WHEN 'abc' THEN 'abcd' ELSE b END FROM collate_test1 ORDER BY 2;
+SELECT a, CASE b WHEN 'abc' THEN 'abcd' ELSE b END FROM collate_test2 ORDER BY 2;
+SELECT a, CASE b WHEN 'abc' THEN 'abcd' ELSE b END FROM collate_test3 ORDER BY 2;
+
+CREATE DOMAIN testdomain AS text;
+SELECT a, b::testdomain FROM collate_test1 ORDER BY 2;
+SELECT a, b::testdomain FROM collate_test2 ORDER BY 2;
+SELECT a, b::testdomain FROM collate_test3 ORDER BY 2;
+SELECT a, b::testdomain_sv FROM collate_test3 ORDER BY 2;
+SELECT a, lower(x::testdomain), lower(y::testdomain) FROM collate_test10;
+
+SELECT min(b), max(b) FROM collate_test1;
+SELECT min(b), max(b) FROM collate_test2;
+SELECT min(b), max(b) FROM collate_test3;
+
+SELECT array_agg(b ORDER BY b) FROM collate_test1;
+SELECT array_agg(b ORDER BY b) FROM collate_test2;
+SELECT array_agg(b ORDER BY b) FROM collate_test3;
+
+SELECT a, b FROM collate_test1 UNION ALL SELECT a, b FROM collate_test1 ORDER BY 2;
+SELECT a, b FROM collate_test2 UNION SELECT a, b FROM collate_test2 ORDER BY 2;
+SELECT a, b FROM collate_test3 WHERE a < 4 INTERSECT SELECT a, b FROM collate_test3 WHERE a > 1 ORDER BY 2;
+SELECT a, b FROM collate_test3 EXCEPT SELECT a, b FROM collate_test3 WHERE a < 2 ORDER BY 2;
+
+SELECT a, b FROM collate_test1 UNION ALL SELECT a, b FROM collate_test3 ORDER BY 2; -- fail
+SELECT a, b FROM collate_test1 UNION ALL SELECT a, b FROM collate_test3; -- ok
+SELECT a, b FROM collate_test1 UNION SELECT a, b FROM collate_test3 ORDER BY 2; -- fail
+SELECT a, b COLLATE "C" FROM collate_test1 UNION SELECT a, b FROM collate_test3 ORDER BY 2; -- ok
+SELECT a, b FROM collate_test1 INTERSECT SELECT a, b FROM collate_test3 ORDER BY 2; -- fail
+SELECT a, b FROM collate_test1 EXCEPT SELECT a, b FROM collate_test3 ORDER BY 2; -- fail
+
+CREATE TABLE test_u AS SELECT a, b FROM collate_test1 UNION ALL SELECT a, b FROM collate_test3; -- fail
+
+-- ideally this would be a parse-time error, but for now it must be run-time:
+select x < y from collate_test10; -- fail
+select x || y from collate_test10; -- ok, because || is not collation aware
+select x, y from collate_test10 order by x || y; -- not so ok
+
+-- collation mismatch between recursive and non-recursive term
+WITH RECURSIVE foo(x) AS
+   (SELECT x FROM (VALUES('a' COLLATE "en-x-icu"),('b')) t(x)
+   UNION ALL
+   SELECT (x || 'c') COLLATE "de-x-icu" FROM foo WHERE length(x) < 10)
+SELECT * FROM foo;
+
+
+-- casting
+
+SELECT CAST('42' AS text COLLATE "C");
+
+SELECT a, CAST(b AS varchar) FROM collate_test1 ORDER BY 2;
+SELECT a, CAST(b AS varchar) FROM collate_test2 ORDER BY 2;
+SELECT a, CAST(b AS varchar) FROM collate_test3 ORDER BY 2;
+
+
+-- propagation of collation in SQL functions (inlined and non-inlined cases)
+-- and plpgsql functions too
+
+CREATE FUNCTION mylt (text, text) RETURNS boolean LANGUAGE sql
+    AS $$ select $1 < $2 $$;
+
+CREATE FUNCTION mylt_noninline (text, text) RETURNS boolean LANGUAGE sql
+    AS $$ select $1 < $2 limit 1 $$;
+
+CREATE FUNCTION mylt_plpgsql (text, text) RETURNS boolean LANGUAGE plpgsql
+    AS $$ begin return $1 < $2; end $$;
+
+SELECT a.b AS a, b.b AS b, a.b < b.b AS lt,
+       mylt(a.b, b.b), mylt_noninline(a.b, b.b), mylt_plpgsql(a.b, b.b)
+FROM collate_test1 a, collate_test1 b
+ORDER BY a.b, b.b;
+
+SELECT a.b AS a, b.b AS b, a.b < b.b COLLATE "C" AS lt,
+       mylt(a.b, b.b COLLATE "C"), mylt_noninline(a.b, b.b COLLATE "C"),
+       mylt_plpgsql(a.b, b.b COLLATE "C")
+FROM collate_test1 a, collate_test1 b
+ORDER BY a.b, b.b;
+
+
+-- collation override in plpgsql
+
+CREATE FUNCTION mylt2 (x text, y text) RETURNS boolean LANGUAGE plpgsql AS $$
+declare
+  xx text := x;
+  yy text := y;
+begin
+  return xx < yy;
+end
+$$;
+
+SELECT mylt2('a', 'B' collate "en-x-icu") as t, mylt2('a', 'B' collate "C") as f;
+
+CREATE OR REPLACE FUNCTION
+  mylt2 (x text, y text) RETURNS boolean LANGUAGE plpgsql AS $$
+declare
+  xx text COLLATE "POSIX" := x;
+  yy text := y;
+begin
+  return xx < yy;
+end
+$$;
+
+SELECT mylt2('a', 'B') as f;
+SELECT mylt2('a', 'B' collate "C") as fail; -- conflicting collations
+SELECT mylt2('a', 'B' collate "POSIX") as f;
+
+
+-- polymorphism
+
+SELECT * FROM unnest((SELECT array_agg(b ORDER BY b) FROM collate_test1)) ORDER BY 1;
+SELECT * FROM unnest((SELECT array_agg(b ORDER BY b) FROM collate_test2)) ORDER BY 1;
+SELECT * FROM unnest((SELECT array_agg(b ORDER BY b) FROM collate_test3)) ORDER BY 1;
+
+CREATE FUNCTION dup (anyelement) RETURNS anyelement
+    AS 'select $1' LANGUAGE sql;
+
+SELECT a, dup(b) FROM collate_test1 ORDER BY 2;
+SELECT a, dup(b) FROM collate_test2 ORDER BY 2;
+SELECT a, dup(b) FROM collate_test3 ORDER BY 2;
+
+
+-- indexes
+
+CREATE INDEX collate_test1_idx1 ON collate_test1 (b);
+CREATE INDEX collate_test1_idx2 ON collate_test1 (b COLLATE "C");
+CREATE INDEX collate_test1_idx3 ON collate_test1 ((b COLLATE "C")); -- this is different grammatically
+CREATE INDEX collate_test1_idx4 ON collate_test1 (((b||'foo') COLLATE "POSIX"));
+
+CREATE INDEX collate_test1_idx5 ON collate_test1 (a COLLATE "C"); -- fail
+CREATE INDEX collate_test1_idx6 ON collate_test1 ((a COLLATE "C")); -- fail
+
+SELECT relname, pg_get_indexdef(oid) FROM pg_class WHERE relname LIKE 'collate_test%_idx%' ORDER BY 1;
+
+
+-- schema manipulation commands
+
+CREATE ROLE regress_test_role;
+CREATE SCHEMA test_schema;
+
+-- We need to do this this way to cope with varying names for encodings:
+do $$
+BEGIN
+  EXECUTE 'CREATE COLLATION test0 (provider = icu, locale = ' ||
+          quote_literal(current_setting('lc_collate')) || ');';
+END
+$$;
+CREATE COLLATION test0 FROM "C"; -- fail, duplicate name
+do $$
+BEGIN
+  EXECUTE 'CREATE COLLATION test1 (provider = icu, lc_collate = ' ||
+          quote_literal(current_setting('lc_collate')) ||
+          ', lc_ctype = ' ||
+          quote_literal(current_setting('lc_ctype')) || ');';
+END
+$$;
+CREATE COLLATION test3 (provider = icu, lc_collate = 'en_US.utf8'); -- fail, need lc_ctype
+CREATE COLLATION testx (provider = icu, locale = 'nonsense'); /* never fails with ICU */  DROP COLLATION testx;
+
+CREATE COLLATION test4 FROM nonsense;
+CREATE COLLATION test5 FROM test0;
+
+SELECT collname FROM pg_collation WHERE collname LIKE 'test%' ORDER BY 1;
+
+ALTER COLLATION test1 RENAME TO test11;
+ALTER COLLATION test0 RENAME TO test11; -- fail
+ALTER COLLATION test1 RENAME TO test22; -- fail
+
+ALTER COLLATION test11 OWNER TO regress_test_role;
+ALTER COLLATION test11 OWNER TO nonsense;
+ALTER COLLATION test11 SET SCHEMA test_schema;
+
+COMMENT ON COLLATION test0 IS 'US English';
+
+SELECT collname, nspname, obj_description(pg_collation.oid, 'pg_collation')
+    FROM pg_collation JOIN pg_namespace ON (collnamespace = pg_namespace.oid)
+    WHERE collname LIKE 'test%'
+    ORDER BY 1;
+
+DROP COLLATION test0, test_schema.test11, test5;
+DROP COLLATION test0; -- fail
+DROP COLLATION IF EXISTS test0;
+
+SELECT collname FROM pg_collation WHERE collname LIKE 'test%';
+
+DROP SCHEMA test_schema;
+DROP ROLE regress_test_role;
+
+
+-- ALTER
+
+ALTER COLLATION "en-x-icu" REFRESH VERSION;
+
+
+-- dependencies
+
+CREATE COLLATION test0 FROM "C";
+
+CREATE TABLE collate_dep_test1 (a int, b text COLLATE test0);
+CREATE DOMAIN collate_dep_dom1 AS text COLLATE test0;
+CREATE TYPE collate_dep_test2 AS (x int, y text COLLATE test0);
+CREATE VIEW collate_dep_test3 AS SELECT text 'foo' COLLATE test0 AS foo;
+CREATE TABLE collate_dep_test4t (a int, b text);
+CREATE INDEX collate_dep_test4i ON collate_dep_test4t (b COLLATE test0);
+
+DROP COLLATION test0 RESTRICT; -- fail
+DROP COLLATION test0 CASCADE;
+
+\d collate_dep_test1
+\d collate_dep_test2
+
+DROP TABLE collate_dep_test1, collate_dep_test4t;
+DROP TYPE collate_dep_test2;
+
+-- test range types and collations
+
+create type textrange_c as range(subtype=text, collation="C");
+create type textrange_en_us as range(subtype=text, collation="en-x-icu");
+
+select textrange_c('A','Z') @> 'b'::text;
+select textrange_en_us('A','Z') @> 'b'::text;
+
+drop type textrange_c;
+drop type textrange_en_us;
+
+
+-- cleanup
+DROP SCHEMA collate_tests CASCADE;
+RESET search_path;
+
+-- leave a collation for pg_upgrade test
+CREATE COLLATION coll_icu_upgrade FROM "und-x-icu";
index c349cbde2b90ab6723eb7ba9d63c683be4f6b065..b51162e3a1fe960e39245e80fd19e117910b6dd4 100644 (file)
@@ -6,6 +6,9 @@
 
 SET client_encoding TO UTF8;
 
+CREATE SCHEMA collate_tests;
+SET search_path = collate_tests;
+
 
 CREATE TABLE collate_test1 (
     a int,
@@ -134,6 +137,25 @@ SELECT * FROM collate_test1 WHERE b ~* '^abc$';
 SELECT * FROM collate_test1 WHERE b ~* '^abc';
 SELECT * FROM collate_test1 WHERE b ~* 'bc';
 
+CREATE TABLE collate_test6 (
+    a int,
+    b text COLLATE "en_US"
+);
+INSERT INTO collate_test6 VALUES (1, 'abc'), (2, 'ABC'), (3, '123'), (4, 'ab1'),
+                                 (5, 'a1!'), (6, 'a c'), (7, '!.;'), (8, '   '),
+                                 (9, 'äbç'), (10, 'ÄBÇ');
+SELECT b,
+       b ~ '^[[:alpha:]]+$' AS is_alpha,
+       b ~ '^[[:upper:]]+$' AS is_upper,
+       b ~ '^[[:lower:]]+$' AS is_lower,
+       b ~ '^[[:digit:]]+$' AS is_digit,
+       b ~ '^[[:alnum:]]+$' AS is_alnum,
+       b ~ '^[[:graph:]]+$' AS is_graph,
+       b ~ '^[[:print:]]+$' AS is_print,
+       b ~ '^[[:punct:]]+$' AS is_punct,
+       b ~ '^[[:space:]]+$' AS is_space
+FROM collate_test6;
+
 SELECT 'Türkiye' COLLATE "en_US" ~* 'KI' AS "true";
 SELECT 'Türkiye' COLLATE "tr_TR" ~* 'KI' AS "false";
 
@@ -337,6 +359,7 @@ END
 $$;
 CREATE COLLATION test3 (lc_collate = 'en_US.utf8'); -- fail, need lc_ctype
 CREATE COLLATION testx (locale = 'nonsense'); -- fail
+CREATE COLLATION testy (locale = 'en_US.utf8', version = 'foo'); -- fail, no versions for libc
 
 CREATE COLLATION test4 FROM nonsense;
 CREATE COLLATION test5 FROM test0;
@@ -368,6 +391,11 @@ DROP SCHEMA test_schema;
 DROP ROLE regress_test_role;
 
 
+-- ALTER
+
+ALTER COLLATION "en_US" REFRESH VERSION;
+
+
 -- dependencies
 
 CREATE COLLATION test0 FROM "C";
@@ -398,3 +426,7 @@ select textrange_en_us('A','Z') @> 'b'::text;
 
 drop type textrange_c;
 drop type textrange_en_us;
+
+
+-- cleanup
+DROP SCHEMA collate_tests CASCADE;