Commit 2bfd1b1e authored by Peter Eisentraut's avatar Peter Eisentraut

Don't install ICU collation keyword variants

Users can still create them themselves.  Instead, document Unicode TR 35
collation options for ICU, so users can create all this themselves.
Reviewed-by: default avatarPeter Geoghegan <pg@bowt.ie>
parent 51e225da
......@@ -664,13 +664,6 @@ SELECT a COLLATE "C" &lt; b COLLATE "POSIX" FROM test1;
</listitem>
</varlistentry>
<varlistentry>
<term><literal>de-u-co-phonebk-x-icu</literal></term>
<listitem>
<para>German collation, phone book variant</para>
</listitem>
</varlistentry>
<varlistentry>
<term><literal>de-AT-x-icu</literal></term>
<listitem>
......@@ -683,13 +676,6 @@ SELECT a COLLATE "C" &lt; b COLLATE "POSIX" FROM test1;
</listitem>
</varlistentry>
<varlistentry>
<term><literal>de-AT-u-co-phonebk-x-icu</literal></term>
<listitem>
<para>German collation for Austria, phone book variant</para>
</listitem>
</varlistentry>
<varlistentry>
<term><literal>und-x-icu</literal> (for <quote>undefined</quote>)</term>
<listitem>
......@@ -709,6 +695,90 @@ SELECT a COLLATE "C" &lt; b COLLATE "POSIX" FROM test1;
will draw an error along the lines of <quote>collation "de-x-icu" for
encoding "WIN874" does not exist</>.
</para>
<para>
ICU allows collations to be customized beyond the basic language+country
set that is preloaded by <command>initdb</command>. Users are encouraged
to define their own collation objects that make use of these facilities to
suit the sorting behavior to their requirements. Here are some examples:
<variablelist>
<varlistentry>
<term><literal>CREATE COLLATION "de-u-co-phonebk-x-icu" (provider = icu, locale = 'de-u-co-phonebk')</literal></term>
<listitem>
<para>German collation with phone book collation type</para>
</listitem>
</varlistentry>
<varlistentry>
<term><literal>CREATE COLLATION "und-u-co-emoji-x-icu" (provider = icu, locale = 'und-u-co-emoji')</literal></term>
<listitem>
<para>
Root collation with Emoji collation type, per Unicode Technical Standard #51
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><literal>CREATE COLLATION digitslast (provider = icu, locale = 'en-u-kr-latn-digit')</literal></term>
<listitem>
<para>
Sort digits after Latin letters. (The default is digits before letters.)
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><literal>CREATE COLLATION upperfirst (provider = icu, locale = 'en-u-kf-upper')</literal></term>
<listitem>
<para>
Sort upper-case letters before lower-case letters. (The default is
lower-case letters first.)
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><literal>CREATE COLLATION special (provider = icu, locale = 'en-u-kf-upper-kr-latn-digit')</literal></term>
<listitem>
<para>
Combines both of the above options.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><literal>CREATE COLLATION numeric (provider = icu, locale = 'en-u-kn-true')</literal></term>
<listitem>
<para>
Numeric ordering, sorts sequences of digits by their numeric value,
for example: <literal>A-21</literal> &lt; <literal>A-123</literal>
(also known as natural sort).
</para>
</listitem>
</varlistentry>
</variablelist>
See <ulink url="http://unicode.org/reports/tr35/tr35-collation.html">Unicode
Technical Standard #35</ulink>
and <ulink url="https://tools.ietf.org/html/bcp47">BCP 47</ulink> for
details. The list of possible collation types (<literal>co</literal>
subtag) can be found in
the <ulink url="http://www.unicode.org/repos/cldr/trunk/common/bcp47/collation.xml">CLDR
repository</ulink>.
The <ulink url="https://ssl.icu-project.org/icu-bin/locexp">ICU Locale
Explorer</ulink> can be used to check the details of a particular locale
definition.
</para>
<para>
Note that while this system allows creating collations that <quote>ignore
case</quote> or <quote>ignore accents</quote> or similar (using
the <literal>ks</literal> key), PostgreSQL does not at the moment allow
such collations to act in a truly case- or accent-insensitive manner. Any
strings that compare equal according to the collation but are not
byte-wise equal will be sorted according to their byte values.
</para>
</sect4>
</sect3>
......
......@@ -687,30 +687,11 @@ pg_import_system_collations(PG_FUNCTION_ARGS)
*/
for (i = -1; i < uloc_countAvailable(); i++)
{
/*
* In ICU 4.2, ucol_getKeywordValuesForLocale() sometimes returns
* values that will not be accepted by uloc_toLanguageTag(). Skip
* loading keyword variants in that version. (Both
* ucol_getKeywordValuesForLocale() and uloc_toLanguageTag() are
* new in ICU 4.2, so older versions are not supported at all.)
*
* XXX We have no information about ICU 4.3 through 4.7, but we
* know the code below works with 4.8.
*/
#if U_ICU_VERSION_MAJOR_NUM > 4 || (U_ICU_VERSION_MAJOR_NUM == 4 && U_ICU_VERSION_MINOR_NUM > 2)
#define LOAD_ICU_KEYWORD_VARIANTS
#endif
const char *name;
char *langtag;
char *icucomment;
const char *collcollate;
Oid collid;
#ifdef LOAD_ICU_KEYWORD_VARIANTS
UEnumeration *en;
UErrorCode status;
const char *val;
#endif
if (i == -1)
name = ""; /* ICU root locale */
......@@ -744,58 +725,6 @@ pg_import_system_collations(PG_FUNCTION_ARGS)
CreateComments(collid, CollationRelationId, 0,
icucomment);
}
/*
* Add keyword variants, if enabled.
*/
#ifdef LOAD_ICU_KEYWORD_VARIANTS
status = U_ZERO_ERROR;
en = ucol_getKeywordValuesForLocale("collation", name, TRUE, &status);
if (U_FAILURE(status))
ereport(ERROR,
(errmsg("could not get keyword values for locale \"%s\": %s",
name, u_errorName(status))));
status = U_ZERO_ERROR;
uenum_reset(en, &status);
while ((val = uenum_next(en, NULL, &status)))
{
char *localeid = psprintf("%s@collation=%s", name, val);
langtag = get_icu_language_tag(localeid);
collcollate = U_ICU_VERSION_MAJOR_NUM >= 54 ? langtag : localeid;
/*
* Be paranoid about not allowing any non-ASCII strings into
* pg_collation
*/
if (!is_all_ascii(langtag) || !is_all_ascii(collcollate))
continue;
collid = CollationCreate(psprintf("%s-x-icu", langtag),
nspid, GetUserId(),
COLLPROVIDER_ICU, -1,
collcollate, collcollate,
get_collation_actual_version(COLLPROVIDER_ICU, collcollate),
true, true);
if (OidIsValid(collid))
{
ncreated++;
CommandCounterIncrement();
icucomment = get_icu_locale_comment(localeid);
if (icucomment)
CreateComments(collid, CollationRelationId, 0,
icucomment);
}
}
if (U_FAILURE(status))
ereport(ERROR,
(errmsg("could not get keyword values for locale \"%s\": %s",
name, u_errorName(status))));
uenum_close(en);
#endif /* LOAD_ICU_KEYWORD_VARIANTS */
}
}
#endif /* USE_ICU */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment