Commit f89e4dfa authored by Tom Lane's avatar Tom Lane

Remove collate.linux.utf8.sql's assumptions about ".utf8" in locale names.

Tweak the test so that it does not depend on the platform using ".utf8" as
the extension signifying that a locale uses UTF8 encoding.  For the most
part this just requires using the abbreviated collation names "en_US" etc,
though I had to work a bit harder on the collation creation tests.

This opens the door to using the test on platforms that spell locales
differently, for example ".utf-8" or ".UTF-8".  Also, the test is now
somewhat useful with server encodings other than UTF8; though depending on
which encoding is selected, different subsets of it will fail for lack of
character set support.
parent a19002d4
......@@ -5,14 +5,14 @@
SET client_encoding TO UTF8;
CREATE TABLE collate_test1 (
a int,
b text COLLATE "en_US.utf8" NOT NULL
b text COLLATE "en_US" NOT NULL
);
\d collate_test1
Table "public.collate_test1"
Column | Type | Modifiers
--------+---------+-----------------------------
Table "public.collate_test1"
Column | Type | Modifiers
--------+---------+------------------------
a | integer |
b | text | collate en_US.utf8 not null
b | text | collate en_US not null
CREATE TABLE collate_test_fail (
a int,
......@@ -29,25 +29,25 @@ ERROR: collation "foo" for encoding "UTF8" does not exist
LINE 3: b text COLLATE "foo"
^
CREATE TABLE collate_test_fail (
a int COLLATE "en_US.utf8",
a int COLLATE "en_US",
b text
);
ERROR: collations are not supported by type integer
LINE 2: a int COLLATE "en_US.utf8",
LINE 2: a int COLLATE "en_US",
^
CREATE TABLE collate_test_like (
LIKE collate_test1
);
\d collate_test_like
Table "public.collate_test_like"
Column | Type | Modifiers
--------+---------+-----------------------------
Table "public.collate_test_like"
Column | Type | Modifiers
--------+---------+------------------------
a | integer |
b | text | collate en_US.utf8 not null
b | text | collate en_US not null
CREATE TABLE collate_test2 (
a int,
b text COLLATE "sv_SE.utf8"
b text COLLATE "sv_SE"
);
CREATE TABLE collate_test3 (
a int,
......@@ -105,16 +105,12 @@ SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc' COLLATE "C";
3 | bbc
(2 rows)
SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc' COLLATE "en_US.utf8";
ERROR: collation mismatch between explicit collations "C" and "en_US.utf8"
LINE 1: ...* FROM collate_test1 WHERE b COLLATE "C" >= 'bbc' COLLATE "e...
^
SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc' COLLATE "en_US";
ERROR: collation mismatch between explicit collations "C" and "en_US"
LINE 1: ...* FROM collate_test1 WHERE b COLLATE "C" >= 'bbc' COLLATE "e...
^
CREATE DOMAIN testdomain_sv AS text COLLATE "sv_SE.utf8";
CREATE DOMAIN testdomain_i AS int COLLATE "sv_SE.utf8"; -- fails
CREATE DOMAIN testdomain_sv AS text COLLATE "sv_SE";
CREATE DOMAIN testdomain_i AS int COLLATE "sv_SE"; -- fails
ERROR: collations are not supported by type integer
CREATE TABLE collate_test4 (
a int,
......@@ -132,7 +128,7 @@ SELECT a, b FROM collate_test4 ORDER BY b;
CREATE TABLE collate_test5 (
a int,
b testdomain_sv COLLATE "en_US.utf8"
b testdomain_sv COLLATE "en_US"
);
INSERT INTO collate_test5 SELECT * FROM collate_test1;
SELECT a, b FROM collate_test5 ORDER BY b;
......@@ -209,13 +205,13 @@ SELECT * FROM collate_test3 ORDER BY b;
(4 rows)
-- constant expression folding
SELECT 'bbc' COLLATE "en_US.utf8" > 'äbc' COLLATE "en_US.utf8" AS "true";
SELECT 'bbc' COLLATE "en_US" > 'äbc' COLLATE "en_US" AS "true";
true
------
t
(1 row)
SELECT 'bbc' COLLATE "sv_SE.utf8" > 'äbc' COLLATE "sv_SE.utf8" AS "false";
SELECT 'bbc' COLLATE "sv_SE" > 'äbc' COLLATE "sv_SE" AS "false";
false
-------
f
......@@ -224,8 +220,8 @@ SELECT 'bbc' COLLATE "sv_SE.utf8" > 'äbc' COLLATE "sv_SE.utf8" AS "false";
-- upper/lower
CREATE TABLE collate_test10 (
a int,
x text COLLATE "en_US.utf8",
y text COLLATE "tr_TR.utf8"
x text COLLATE "en_US",
y text COLLATE "tr_TR"
);
INSERT INTO collate_test10 VALUES (1, 'hij', 'hij'), (2, 'HIJ', 'HIJ');
SELECT a, lower(x), lower(y), upper(x), upper(y), initcap(x), initcap(y) FROM collate_test10;
......@@ -293,18 +289,30 @@ SELECT * FROM collate_test1 WHERE b ILIKE '%bc%';
4 | ABC
(4 rows)
SELECT 'Türkiye' COLLATE "en_US.utf8" ILIKE '%KI%' AS "true";
SELECT 'Türkiye' COLLATE "en_US" ILIKE '%KI%' AS "true";
true
------
t
(1 row)
SELECT 'Türkiye' COLLATE "tr_TR.utf8" ILIKE '%KI%' AS "false";
SELECT 'Türkiye' COLLATE "tr_TR" ILIKE '%KI%' AS "false";
false
-------
f
(1 row)
SELECT 'bıt' ILIKE 'BIT' COLLATE "en_US" AS "false";
false
-------
f
(1 row)
SELECT 'bıt' ILIKE 'BIT' COLLATE "tr_TR" AS "true";
true
------
t
(1 row)
-- The following actually exercises the selectivity estimation for ILIKE.
SELECT relname FROM pg_class WHERE relname ILIKE 'abc%';
relname
......@@ -312,14 +320,14 @@ SELECT relname FROM pg_class WHERE relname ILIKE 'abc%';
(0 rows)
-- to_char
SET lc_time TO 'tr_TR.utf8';
SET lc_time TO 'tr_TR';
SELECT to_char(date '2010-04-01', 'DD TMMON YYYY');
to_char
-------------
01 NIS 2010
(1 row)
SELECT to_char(date '2010-04-01', 'DD TMMON YYYY' COLLATE "tr_TR.utf8");
SELECT to_char(date '2010-04-01', 'DD TMMON YYYY' COLLATE "tr_TR");
to_char
-------------
01 NİS 2010
......@@ -602,7 +610,7 @@ SELECT a, b FROM collate_test1 UNION ALL SELECT a, b FROM collate_test3; -- ok
(8 rows)
SELECT a, b FROM collate_test1 UNION SELECT a, b FROM collate_test3 ORDER BY 2; -- fail
ERROR: collation mismatch between implicit collations "en_US.utf8" and "C"
ERROR: collation mismatch between implicit collations "en_US" and "C"
LINE 1: SELECT a, b FROM collate_test1 UNION SELECT a, b FROM collat...
^
HINT: You can choose the collation by applying the COLLATE clause to one or both expressions.
......@@ -616,12 +624,12 @@ SELECT a, b COLLATE "C" FROM collate_test1 UNION SELECT a, b FROM collate_test3
(4 rows)
SELECT a, b FROM collate_test1 INTERSECT SELECT a, b FROM collate_test3 ORDER BY 2; -- fail
ERROR: collation mismatch between implicit collations "en_US.utf8" and "C"
ERROR: collation mismatch between implicit collations "en_US" and "C"
LINE 1: ...ELECT a, b FROM collate_test1 INTERSECT SELECT a, b FROM col...
^
HINT: You can choose the collation by applying the COLLATE clause to one or both expressions.
SELECT a, b FROM collate_test1 EXCEPT SELECT a, b FROM collate_test3 ORDER BY 2; -- fail
ERROR: collation mismatch between implicit collations "en_US.utf8" and "C"
ERROR: collation mismatch between implicit collations "en_US" and "C"
LINE 1: SELECT a, b FROM collate_test1 EXCEPT SELECT a, b FROM colla...
^
HINT: You can choose the collation by applying the COLLATE clause to one or both expressions.
......@@ -640,7 +648,7 @@ select x || y from collate_test10; -- ok, because || is not collation aware
(2 rows)
select x, y from collate_test10 order by x || y; -- not so ok
ERROR: collation mismatch between implicit collations "en_US.utf8" and "tr_TR.utf8"
ERROR: collation mismatch between implicit collations "en_US" and "tr_TR"
LINE 1: select x, y from collate_test10 order by x || y;
^
HINT: You can choose the collation by applying the COLLATE clause to one or both expressions.
......@@ -823,26 +831,36 @@ SELECT relname, pg_get_indexdef(oid) FROM pg_class WHERE relname LIKE 'collate_t
-- schema manipulation commands
CREATE ROLE regress_test_role;
CREATE SCHEMA test_schema;
CREATE COLLATION test0 (locale = 'en_US.utf8');
CREATE COLLATION test0 (locale = 'en_US.utf8'); -- fail
-- We need to do this this way to cope with varying names for encodings:
do $$
BEGIN
EXECUTE 'CREATE COLLATION test0 (locale = ' ||
quote_literal(current_setting('lc_collate')) || ');';
END
$$;
CREATE COLLATION test0 FROM "C"; -- fail, duplicate name
ERROR: collation "test0" for encoding "UTF8" already exists
CREATE COLLATION test1 (lc_collate = 'en_US.utf8', lc_ctype = 'de_DE.utf8');
CREATE COLLATION test2 (locale = 'en_US'); -- fail
ERROR: encoding UTF8 does not match locale en_US
DETAIL: The chosen LC_CTYPE setting requires encoding LATIN1.
CREATE COLLATION test3 (lc_collate = 'en_US.utf8'); -- fail
do $$
BEGIN
EXECUTE 'CREATE COLLATION test1 (lc_collate = ' ||
quote_literal(current_setting('lc_collate')) ||
', lc_ctype = ' ||
quote_literal(current_setting('lc_ctype')) || ');';
END
$$;
CREATE COLLATION test3 (lc_collate = 'en_US.utf8'); -- fail, need lc_ctype
ERROR: parameter "lc_ctype" must be specified
CREATE COLLATION testx (locale = 'nonsense'); -- fail
ERROR: could not create locale "nonsense": No such file or directory
CREATE COLLATION test4 FROM nonsense;
ERROR: collation "nonsense" for encoding "UTF8" does not exist
CREATE COLLATION test5 FROM test0;
SELECT collname, collencoding, collcollate, collctype FROM pg_collation WHERE collname LIKE 'test%' ORDER BY 1;
collname | collencoding | collcollate | collctype
----------+--------------+-------------+------------
test0 | 6 | en_US.utf8 | en_US.utf8
test1 | 6 | en_US.utf8 | de_DE.utf8
test5 | 6 | en_US.utf8 | en_US.utf8
SELECT collname FROM pg_collation WHERE collname LIKE 'test%' ORDER BY 1;
collname
----------
test0
test1
test5
(3 rows)
ALTER COLLATION test1 RENAME TO test11;
......@@ -879,7 +897,7 @@ SELECT collname FROM pg_collation WHERE collname LIKE 'test%';
DROP SCHEMA test_schema;
DROP ROLE regress_test_role;
-- dependencies
CREATE COLLATION test0 (locale = 'en_US.utf8');
CREATE COLLATION test0 FROM "C";
CREATE TABLE collate_dep_test1 (a int, b text COLLATE test0);
CREATE DOMAIN collate_dep_dom1 AS text COLLATE test0;
CREATE TYPE collate_dep_test2 AS (x int, y text COLLATE test0);
......
......@@ -8,7 +8,7 @@ SET client_encoding TO UTF8;
CREATE TABLE collate_test1 (
a int,
b text COLLATE "en_US.utf8" NOT NULL
b text COLLATE "en_US" NOT NULL
);
\d collate_test1
......@@ -24,7 +24,7 @@ CREATE TABLE collate_test_fail (
);
CREATE TABLE collate_test_fail (
a int COLLATE "en_US.utf8",
a int COLLATE "en_US",
b text
);
......@@ -36,7 +36,7 @@ CREATE TABLE collate_test_like (
CREATE TABLE collate_test2 (
a int,
b text COLLATE "sv_SE.utf8"
b text COLLATE "sv_SE"
);
CREATE TABLE collate_test3 (
......@@ -56,12 +56,11 @@ SELECT * FROM collate_test3 WHERE b >= 'BBC';
SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc';
SELECT * FROM collate_test1 WHERE b >= 'bbc' COLLATE "C";
SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc' COLLATE "C";
SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc' COLLATE "en_US.utf8";
SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc' COLLATE "en_US";
CREATE DOMAIN testdomain_sv AS text COLLATE "sv_SE.utf8";
CREATE DOMAIN testdomain_i AS int COLLATE "sv_SE.utf8"; -- fails
CREATE DOMAIN testdomain_sv AS text COLLATE "sv_SE";
CREATE DOMAIN testdomain_i AS int COLLATE "sv_SE"; -- fails
CREATE TABLE collate_test4 (
a int,
b testdomain_sv
......@@ -71,7 +70,7 @@ SELECT a, b FROM collate_test4 ORDER BY b;
CREATE TABLE collate_test5 (
a int,
b testdomain_sv COLLATE "en_US.utf8"
b testdomain_sv COLLATE "en_US"
);
INSERT INTO collate_test5 SELECT * FROM collate_test1;
SELECT a, b FROM collate_test5 ORDER BY b;
......@@ -89,15 +88,15 @@ SELECT * FROM collate_test2 ORDER BY b;
SELECT * FROM collate_test3 ORDER BY b;
-- constant expression folding
SELECT 'bbc' COLLATE "en_US.utf8" > 'äbc' COLLATE "en_US.utf8" AS "true";
SELECT 'bbc' COLLATE "sv_SE.utf8" > 'äbc' COLLATE "sv_SE.utf8" AS "false";
SELECT 'bbc' COLLATE "en_US" > 'äbc' COLLATE "en_US" AS "true";
SELECT 'bbc' COLLATE "sv_SE" > 'äbc' COLLATE "sv_SE" AS "false";
-- upper/lower
CREATE TABLE collate_test10 (
a int,
x text COLLATE "en_US.utf8",
y text COLLATE "tr_TR.utf8"
x text COLLATE "en_US",
y text COLLATE "tr_TR"
);
INSERT INTO collate_test10 VALUES (1, 'hij', 'hij'), (2, 'HIJ', 'HIJ');
......@@ -116,8 +115,11 @@ SELECT * FROM collate_test1 WHERE b ILIKE 'abc';
SELECT * FROM collate_test1 WHERE b ILIKE 'abc%';
SELECT * FROM collate_test1 WHERE b ILIKE '%bc%';
SELECT 'Türkiye' COLLATE "en_US.utf8" ILIKE '%KI%' AS "true";
SELECT 'Türkiye' COLLATE "tr_TR.utf8" ILIKE '%KI%' AS "false";
SELECT 'Türkiye' COLLATE "en_US" ILIKE '%KI%' AS "true";
SELECT 'Türkiye' COLLATE "tr_TR" ILIKE '%KI%' AS "false";
SELECT 'bıt' ILIKE 'BIT' COLLATE "en_US" AS "false";
SELECT 'bıt' ILIKE 'BIT' COLLATE "tr_TR" AS "true";
-- The following actually exercises the selectivity estimation for ILIKE.
SELECT relname FROM pg_class WHERE relname ILIKE 'abc%';
......@@ -125,9 +127,9 @@ SELECT relname FROM pg_class WHERE relname ILIKE 'abc%';
-- to_char
SET lc_time TO 'tr_TR.utf8';
SET lc_time TO 'tr_TR';
SELECT to_char(date '2010-04-01', 'DD TMMON YYYY');
SELECT to_char(date '2010-04-01', 'DD TMMON YYYY' COLLATE "tr_TR.utf8");
SELECT to_char(date '2010-04-01', 'DD TMMON YYYY' COLLATE "tr_TR");
-- backwards parsing
......@@ -268,17 +270,29 @@ SELECT relname, pg_get_indexdef(oid) FROM pg_class WHERE relname LIKE 'collate_t
CREATE ROLE regress_test_role;
CREATE SCHEMA test_schema;
CREATE COLLATION test0 (locale = 'en_US.utf8');
CREATE COLLATION test0 (locale = 'en_US.utf8'); -- fail
CREATE COLLATION test1 (lc_collate = 'en_US.utf8', lc_ctype = 'de_DE.utf8');
CREATE COLLATION test2 (locale = 'en_US'); -- fail
CREATE COLLATION test3 (lc_collate = 'en_US.utf8'); -- fail
-- We need to do this this way to cope with varying names for encodings:
do $$
BEGIN
EXECUTE 'CREATE COLLATION test0 (locale = ' ||
quote_literal(current_setting('lc_collate')) || ');';
END
$$;
CREATE COLLATION test0 FROM "C"; -- fail, duplicate name
do $$
BEGIN
EXECUTE 'CREATE COLLATION test1 (lc_collate = ' ||
quote_literal(current_setting('lc_collate')) ||
', lc_ctype = ' ||
quote_literal(current_setting('lc_ctype')) || ');';
END
$$;
CREATE COLLATION test3 (lc_collate = 'en_US.utf8'); -- fail, need lc_ctype
CREATE COLLATION testx (locale = 'nonsense'); -- fail
CREATE COLLATION test4 FROM nonsense;
CREATE COLLATION test5 FROM test0;
SELECT collname, collencoding, collcollate, collctype FROM pg_collation WHERE collname LIKE 'test%' ORDER BY 1;
SELECT collname FROM pg_collation WHERE collname LIKE 'test%' ORDER BY 1;
ALTER COLLATION test1 RENAME TO test11;
ALTER COLLATION test0 RENAME TO test11; -- fail
......@@ -307,7 +321,7 @@ DROP ROLE regress_test_role;
-- dependencies
CREATE COLLATION test0 (locale = 'en_US.utf8');
CREATE COLLATION test0 FROM "C";
CREATE TABLE collate_dep_test1 (a int, b text COLLATE test0);
CREATE DOMAIN collate_dep_dom1 AS text COLLATE test0;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment