From f89e4dfa755b9c9be9c19f7c65cff50d47b4f33a Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sat, 9 Apr 2011 16:24:36 -0400 Subject: [PATCH] Remove collate.linux.utf8.sql's assumptions about ".utf8" in locale names. Tweak the test so that it does not depend on the platform using ".utf8" as the extension signifying that a locale uses UTF8 encoding. For the most part this just requires using the abbreviated collation names "en_US" etc, though I had to work a bit harder on the collation creation tests. This opens the door to using the test on platforms that spell locales differently, for example ".utf-8" or ".UTF-8". Also, the test is now somewhat useful with server encodings other than UTF8; though depending on which encoding is selected, different subsets of it will fail for lack of character set support. --- .../regress/expected/collate.linux.utf8.out | 108 ++++++++++-------- src/test/regress/sql/collate.linux.utf8.sql | 58 ++++++---- 2 files changed, 99 insertions(+), 67 deletions(-) diff --git a/src/test/regress/expected/collate.linux.utf8.out b/src/test/regress/expected/collate.linux.utf8.out index fe4d27e34b..25c543c2bb 100644 --- a/src/test/regress/expected/collate.linux.utf8.out +++ b/src/test/regress/expected/collate.linux.utf8.out @@ -5,14 +5,14 @@ SET client_encoding TO UTF8; CREATE TABLE collate_test1 ( a int, - b text COLLATE "en_US.utf8" NOT NULL + b text COLLATE "en_US" NOT NULL ); \d collate_test1 - Table "public.collate_test1" - Column | Type | Modifiers ---------+---------+----------------------------- + Table "public.collate_test1" + Column | Type | Modifiers +--------+---------+------------------------ a | integer | - b | text | collate en_US.utf8 not null + b | text | collate en_US not null CREATE TABLE collate_test_fail ( a int, @@ -29,25 +29,25 @@ ERROR: collation "foo" for encoding "UTF8" does not exist LINE 3: b text COLLATE "foo" ^ CREATE TABLE collate_test_fail ( - a int COLLATE "en_US.utf8", + a int COLLATE "en_US", b text ); ERROR: collations are not supported by type integer -LINE 2: a int COLLATE "en_US.utf8", +LINE 2: a int COLLATE "en_US", ^ CREATE TABLE collate_test_like ( LIKE collate_test1 ); \d collate_test_like - Table "public.collate_test_like" - Column | Type | Modifiers ---------+---------+----------------------------- + Table "public.collate_test_like" + Column | Type | Modifiers +--------+---------+------------------------ a | integer | - b | text | collate en_US.utf8 not null + b | text | collate en_US not null CREATE TABLE collate_test2 ( a int, - b text COLLATE "sv_SE.utf8" + b text COLLATE "sv_SE" ); CREATE TABLE collate_test3 ( a int, @@ -105,16 +105,12 @@ SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc' COLLATE "C"; 3 | bbc (2 rows) -SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc' COLLATE "en_US.utf8"; -ERROR: collation mismatch between explicit collations "C" and "en_US.utf8" -LINE 1: ...* FROM collate_test1 WHERE b COLLATE "C" >= 'bbc' COLLATE "e... - ^ SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc' COLLATE "en_US"; ERROR: collation mismatch between explicit collations "C" and "en_US" LINE 1: ...* FROM collate_test1 WHERE b COLLATE "C" >= 'bbc' COLLATE "e... ^ -CREATE DOMAIN testdomain_sv AS text COLLATE "sv_SE.utf8"; -CREATE DOMAIN testdomain_i AS int COLLATE "sv_SE.utf8"; -- fails +CREATE DOMAIN testdomain_sv AS text COLLATE "sv_SE"; +CREATE DOMAIN testdomain_i AS int COLLATE "sv_SE"; -- fails ERROR: collations are not supported by type integer CREATE TABLE collate_test4 ( a int, @@ -132,7 +128,7 @@ SELECT a, b FROM collate_test4 ORDER BY b; CREATE TABLE collate_test5 ( a int, - b testdomain_sv COLLATE "en_US.utf8" + b testdomain_sv COLLATE "en_US" ); INSERT INTO collate_test5 SELECT * FROM collate_test1; SELECT a, b FROM collate_test5 ORDER BY b; @@ -209,13 +205,13 @@ SELECT * FROM collate_test3 ORDER BY b; (4 rows) -- constant expression folding -SELECT 'bbc' COLLATE "en_US.utf8" > 'äbc' COLLATE "en_US.utf8" AS "true"; +SELECT 'bbc' COLLATE "en_US" > 'äbc' COLLATE "en_US" AS "true"; true ------ t (1 row) -SELECT 'bbc' COLLATE "sv_SE.utf8" > 'äbc' COLLATE "sv_SE.utf8" AS "false"; +SELECT 'bbc' COLLATE "sv_SE" > 'äbc' COLLATE "sv_SE" AS "false"; false ------- f @@ -224,8 +220,8 @@ SELECT 'bbc' COLLATE "sv_SE.utf8" > 'äbc' COLLATE "sv_SE.utf8" AS "false"; -- upper/lower CREATE TABLE collate_test10 ( a int, - x text COLLATE "en_US.utf8", - y text COLLATE "tr_TR.utf8" + x text COLLATE "en_US", + y text COLLATE "tr_TR" ); INSERT INTO collate_test10 VALUES (1, 'hij', 'hij'), (2, 'HIJ', 'HIJ'); SELECT a, lower(x), lower(y), upper(x), upper(y), initcap(x), initcap(y) FROM collate_test10; @@ -293,18 +289,30 @@ SELECT * FROM collate_test1 WHERE b ILIKE '%bc%'; 4 | ABC (4 rows) -SELECT 'Türkiye' COLLATE "en_US.utf8" ILIKE '%KI%' AS "true"; +SELECT 'Türkiye' COLLATE "en_US" ILIKE '%KI%' AS "true"; true ------ t (1 row) -SELECT 'Türkiye' COLLATE "tr_TR.utf8" ILIKE '%KI%' AS "false"; +SELECT 'Türkiye' COLLATE "tr_TR" ILIKE '%KI%' AS "false"; + false +------- + f +(1 row) + +SELECT 'bıt' ILIKE 'BIT' COLLATE "en_US" AS "false"; false ------- f (1 row) +SELECT 'bıt' ILIKE 'BIT' COLLATE "tr_TR" AS "true"; + true +------ + t +(1 row) + -- The following actually exercises the selectivity estimation for ILIKE. SELECT relname FROM pg_class WHERE relname ILIKE 'abc%'; relname @@ -312,14 +320,14 @@ SELECT relname FROM pg_class WHERE relname ILIKE 'abc%'; (0 rows) -- to_char -SET lc_time TO 'tr_TR.utf8'; +SET lc_time TO 'tr_TR'; SELECT to_char(date '2010-04-01', 'DD TMMON YYYY'); to_char ------------- 01 NIS 2010 (1 row) -SELECT to_char(date '2010-04-01', 'DD TMMON YYYY' COLLATE "tr_TR.utf8"); +SELECT to_char(date '2010-04-01', 'DD TMMON YYYY' COLLATE "tr_TR"); to_char ------------- 01 NİS 2010 @@ -602,7 +610,7 @@ SELECT a, b FROM collate_test1 UNION ALL SELECT a, b FROM collate_test3; -- ok (8 rows) SELECT a, b FROM collate_test1 UNION SELECT a, b FROM collate_test3 ORDER BY 2; -- fail -ERROR: collation mismatch between implicit collations "en_US.utf8" and "C" +ERROR: collation mismatch between implicit collations "en_US" and "C" LINE 1: SELECT a, b FROM collate_test1 UNION SELECT a, b FROM collat... ^ HINT: You can choose the collation by applying the COLLATE clause to one or both expressions. @@ -616,12 +624,12 @@ SELECT a, b COLLATE "C" FROM collate_test1 UNION SELECT a, b FROM collate_test3 (4 rows) SELECT a, b FROM collate_test1 INTERSECT SELECT a, b FROM collate_test3 ORDER BY 2; -- fail -ERROR: collation mismatch between implicit collations "en_US.utf8" and "C" +ERROR: collation mismatch between implicit collations "en_US" and "C" LINE 1: ...ELECT a, b FROM collate_test1 INTERSECT SELECT a, b FROM col... ^ HINT: You can choose the collation by applying the COLLATE clause to one or both expressions. SELECT a, b FROM collate_test1 EXCEPT SELECT a, b FROM collate_test3 ORDER BY 2; -- fail -ERROR: collation mismatch between implicit collations "en_US.utf8" and "C" +ERROR: collation mismatch between implicit collations "en_US" and "C" LINE 1: SELECT a, b FROM collate_test1 EXCEPT SELECT a, b FROM colla... ^ HINT: You can choose the collation by applying the COLLATE clause to one or both expressions. @@ -640,7 +648,7 @@ select x || y from collate_test10; -- ok, because || is not collation aware (2 rows) select x, y from collate_test10 order by x || y; -- not so ok -ERROR: collation mismatch between implicit collations "en_US.utf8" and "tr_TR.utf8" +ERROR: collation mismatch between implicit collations "en_US" and "tr_TR" LINE 1: select x, y from collate_test10 order by x || y; ^ HINT: You can choose the collation by applying the COLLATE clause to one or both expressions. @@ -823,26 +831,36 @@ SELECT relname, pg_get_indexdef(oid) FROM pg_class WHERE relname LIKE 'collate_t -- schema manipulation commands CREATE ROLE regress_test_role; CREATE SCHEMA test_schema; -CREATE COLLATION test0 (locale = 'en_US.utf8'); -CREATE COLLATION test0 (locale = 'en_US.utf8'); -- fail +-- We need to do this this way to cope with varying names for encodings: +do $$ +BEGIN + EXECUTE 'CREATE COLLATION test0 (locale = ' || + quote_literal(current_setting('lc_collate')) || ');'; +END +$$; +CREATE COLLATION test0 FROM "C"; -- fail, duplicate name ERROR: collation "test0" for encoding "UTF8" already exists -CREATE COLLATION test1 (lc_collate = 'en_US.utf8', lc_ctype = 'de_DE.utf8'); -CREATE COLLATION test2 (locale = 'en_US'); -- fail -ERROR: encoding UTF8 does not match locale en_US -DETAIL: The chosen LC_CTYPE setting requires encoding LATIN1. -CREATE COLLATION test3 (lc_collate = 'en_US.utf8'); -- fail +do $$ +BEGIN + EXECUTE 'CREATE COLLATION test1 (lc_collate = ' || + quote_literal(current_setting('lc_collate')) || + ', lc_ctype = ' || + quote_literal(current_setting('lc_ctype')) || ');'; +END +$$; +CREATE COLLATION test3 (lc_collate = 'en_US.utf8'); -- fail, need lc_ctype ERROR: parameter "lc_ctype" must be specified CREATE COLLATION testx (locale = 'nonsense'); -- fail ERROR: could not create locale "nonsense": No such file or directory CREATE COLLATION test4 FROM nonsense; ERROR: collation "nonsense" for encoding "UTF8" does not exist CREATE COLLATION test5 FROM test0; -SELECT collname, collencoding, collcollate, collctype FROM pg_collation WHERE collname LIKE 'test%' ORDER BY 1; - collname | collencoding | collcollate | collctype -----------+--------------+-------------+------------ - test0 | 6 | en_US.utf8 | en_US.utf8 - test1 | 6 | en_US.utf8 | de_DE.utf8 - test5 | 6 | en_US.utf8 | en_US.utf8 +SELECT collname FROM pg_collation WHERE collname LIKE 'test%' ORDER BY 1; + collname +---------- + test0 + test1 + test5 (3 rows) ALTER COLLATION test1 RENAME TO test11; @@ -879,7 +897,7 @@ SELECT collname FROM pg_collation WHERE collname LIKE 'test%'; DROP SCHEMA test_schema; DROP ROLE regress_test_role; -- dependencies -CREATE COLLATION test0 (locale = 'en_US.utf8'); +CREATE COLLATION test0 FROM "C"; CREATE TABLE collate_dep_test1 (a int, b text COLLATE test0); CREATE DOMAIN collate_dep_dom1 AS text COLLATE test0; CREATE TYPE collate_dep_test2 AS (x int, y text COLLATE test0); diff --git a/src/test/regress/sql/collate.linux.utf8.sql b/src/test/regress/sql/collate.linux.utf8.sql index 57f5947aa9..b6d9368a06 100644 --- a/src/test/regress/sql/collate.linux.utf8.sql +++ b/src/test/regress/sql/collate.linux.utf8.sql @@ -8,7 +8,7 @@ SET client_encoding TO UTF8; CREATE TABLE collate_test1 ( a int, - b text COLLATE "en_US.utf8" NOT NULL + b text COLLATE "en_US" NOT NULL ); \d collate_test1 @@ -24,7 +24,7 @@ CREATE TABLE collate_test_fail ( ); CREATE TABLE collate_test_fail ( - a int COLLATE "en_US.utf8", + a int COLLATE "en_US", b text ); @@ -36,7 +36,7 @@ CREATE TABLE collate_test_like ( CREATE TABLE collate_test2 ( a int, - b text COLLATE "sv_SE.utf8" + b text COLLATE "sv_SE" ); CREATE TABLE collate_test3 ( @@ -56,12 +56,11 @@ SELECT * FROM collate_test3 WHERE b >= 'BBC'; SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc'; SELECT * FROM collate_test1 WHERE b >= 'bbc' COLLATE "C"; SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc' COLLATE "C"; -SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc' COLLATE "en_US.utf8"; SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc' COLLATE "en_US"; -CREATE DOMAIN testdomain_sv AS text COLLATE "sv_SE.utf8"; -CREATE DOMAIN testdomain_i AS int COLLATE "sv_SE.utf8"; -- fails +CREATE DOMAIN testdomain_sv AS text COLLATE "sv_SE"; +CREATE DOMAIN testdomain_i AS int COLLATE "sv_SE"; -- fails CREATE TABLE collate_test4 ( a int, b testdomain_sv @@ -71,7 +70,7 @@ SELECT a, b FROM collate_test4 ORDER BY b; CREATE TABLE collate_test5 ( a int, - b testdomain_sv COLLATE "en_US.utf8" + b testdomain_sv COLLATE "en_US" ); INSERT INTO collate_test5 SELECT * FROM collate_test1; SELECT a, b FROM collate_test5 ORDER BY b; @@ -89,15 +88,15 @@ SELECT * FROM collate_test2 ORDER BY b; SELECT * FROM collate_test3 ORDER BY b; -- constant expression folding -SELECT 'bbc' COLLATE "en_US.utf8" > 'äbc' COLLATE "en_US.utf8" AS "true"; -SELECT 'bbc' COLLATE "sv_SE.utf8" > 'äbc' COLLATE "sv_SE.utf8" AS "false"; +SELECT 'bbc' COLLATE "en_US" > 'äbc' COLLATE "en_US" AS "true"; +SELECT 'bbc' COLLATE "sv_SE" > 'äbc' COLLATE "sv_SE" AS "false"; -- upper/lower CREATE TABLE collate_test10 ( a int, - x text COLLATE "en_US.utf8", - y text COLLATE "tr_TR.utf8" + x text COLLATE "en_US", + y text COLLATE "tr_TR" ); INSERT INTO collate_test10 VALUES (1, 'hij', 'hij'), (2, 'HIJ', 'HIJ'); @@ -116,8 +115,11 @@ SELECT * FROM collate_test1 WHERE b ILIKE 'abc'; SELECT * FROM collate_test1 WHERE b ILIKE 'abc%'; SELECT * FROM collate_test1 WHERE b ILIKE '%bc%'; -SELECT 'Türkiye' COLLATE "en_US.utf8" ILIKE '%KI%' AS "true"; -SELECT 'Türkiye' COLLATE "tr_TR.utf8" ILIKE '%KI%' AS "false"; +SELECT 'Türkiye' COLLATE "en_US" ILIKE '%KI%' AS "true"; +SELECT 'Türkiye' COLLATE "tr_TR" ILIKE '%KI%' AS "false"; + +SELECT 'bıt' ILIKE 'BIT' COLLATE "en_US" AS "false"; +SELECT 'bıt' ILIKE 'BIT' COLLATE "tr_TR" AS "true"; -- The following actually exercises the selectivity estimation for ILIKE. SELECT relname FROM pg_class WHERE relname ILIKE 'abc%'; @@ -125,9 +127,9 @@ SELECT relname FROM pg_class WHERE relname ILIKE 'abc%'; -- to_char -SET lc_time TO 'tr_TR.utf8'; +SET lc_time TO 'tr_TR'; SELECT to_char(date '2010-04-01', 'DD TMMON YYYY'); -SELECT to_char(date '2010-04-01', 'DD TMMON YYYY' COLLATE "tr_TR.utf8"); +SELECT to_char(date '2010-04-01', 'DD TMMON YYYY' COLLATE "tr_TR"); -- backwards parsing @@ -268,17 +270,29 @@ SELECT relname, pg_get_indexdef(oid) FROM pg_class WHERE relname LIKE 'collate_t CREATE ROLE regress_test_role; CREATE SCHEMA test_schema; -CREATE COLLATION test0 (locale = 'en_US.utf8'); -CREATE COLLATION test0 (locale = 'en_US.utf8'); -- fail -CREATE COLLATION test1 (lc_collate = 'en_US.utf8', lc_ctype = 'de_DE.utf8'); -CREATE COLLATION test2 (locale = 'en_US'); -- fail -CREATE COLLATION test3 (lc_collate = 'en_US.utf8'); -- fail +-- We need to do this this way to cope with varying names for encodings: +do $$ +BEGIN + EXECUTE 'CREATE COLLATION test0 (locale = ' || + quote_literal(current_setting('lc_collate')) || ');'; +END +$$; +CREATE COLLATION test0 FROM "C"; -- fail, duplicate name +do $$ +BEGIN + EXECUTE 'CREATE COLLATION test1 (lc_collate = ' || + quote_literal(current_setting('lc_collate')) || + ', lc_ctype = ' || + quote_literal(current_setting('lc_ctype')) || ');'; +END +$$; +CREATE COLLATION test3 (lc_collate = 'en_US.utf8'); -- fail, need lc_ctype CREATE COLLATION testx (locale = 'nonsense'); -- fail CREATE COLLATION test4 FROM nonsense; CREATE COLLATION test5 FROM test0; -SELECT collname, collencoding, collcollate, collctype FROM pg_collation WHERE collname LIKE 'test%' ORDER BY 1; +SELECT collname FROM pg_collation WHERE collname LIKE 'test%' ORDER BY 1; ALTER COLLATION test1 RENAME TO test11; ALTER COLLATION test0 RENAME TO test11; -- fail @@ -307,7 +321,7 @@ DROP ROLE regress_test_role; -- dependencies -CREATE COLLATION test0 (locale = 'en_US.utf8'); +CREATE COLLATION test0 FROM "C"; CREATE TABLE collate_dep_test1 (a int, b text COLLATE test0); CREATE DOMAIN collate_dep_dom1 AS text COLLATE test0; -- 2.40.0