Commit b8f9a2a6 authored by Peter Eisentraut's avatar Peter Eisentraut

Add support for collation attributes on older ICU versions

Starting in ICU 54, collation customization attributes can be
specified in the locale string, for example
"@colStrength=primary;colCaseLevel=yes".  Add support for this for
older ICU versions as well, by adding some minimal parsing of the
attributes in the locale string and calling ucol_setAttribute() on
them.  This is essentially what never ICU versions do internally in
ucol_open().  This was we can offer this functionality in a consistent
way in all ICU versions supported by PostgreSQL.

Also add some tests for ICU collation customization.
Reported-by: default avatarDaniel Verite <daniel@manitou-mail.org>
Discussion: https://www.postgresql.org/message-id/0270ebd4-f67c-8774-1a5a-91adfb9bb41f@2ndquadrant.com
parent 042162d6
......@@ -58,6 +58,7 @@
#include "catalog/pg_control.h"
#include "mb/pg_wchar.h"
#include "utils/builtins.h"
#include "utils/formatting.h"
#include "utils/hsearch.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
......@@ -132,6 +133,9 @@ static HTAB *collation_cache = NULL;
static char *IsoLocaleName(const char *); /* MSVC specific */
#endif
#ifdef USE_ICU
static void icu_set_collation_attributes(UCollator *collator, const char *loc);
#endif
/*
* pg_perm_setlocale
......@@ -1380,6 +1384,9 @@ pg_newlocale_from_collation(Oid collid)
(errmsg("could not open collator for locale \"%s\": %s",
collcollate, u_errorName(status))));
if (U_ICU_VERSION_MAJOR_NUM < 54)
icu_set_collation_attributes(collator, collcollate);
/* We will leak this string if we get an error below :-( */
result.info.icu.locale = MemoryContextStrdup(TopMemoryContext,
collcollate);
......@@ -1588,6 +1595,103 @@ icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar)
return len_result;
}
/*
* Parse collation attributes and apply them to the open collator. This takes
* a string like "und@colStrength=primary;colCaseLevel=yes" and parses and
* applies the key-value arguments.
*
* Starting with ICU version 54, the attributes are processed automatically by
* ucol_open(), so this is only necessary for emulating this behavior on older
* versions.
*/
pg_attribute_unused()
static void
icu_set_collation_attributes(UCollator *collator, const char *loc)
{
char *str = asc_tolower(loc, strlen(loc));
str = strchr(str, '@');
if (!str)
return;
str++;
for (char *token = strtok(str, ";"); token; token = strtok(NULL, ";"))
{
char *e = strchr(token, '=');
if (e)
{
char *name;
char *value;
UColAttribute uattr = -1;
UColAttributeValue uvalue = -1;
UErrorCode status;
status = U_ZERO_ERROR;
*e = '\0';
name = token;
value = e + 1;
/*
* See attribute name and value lists in ICU i18n/coll.cpp
*/
if (strcmp(name, "colstrength") == 0)
uattr = UCOL_STRENGTH;
else if (strcmp(name, "colbackwards") == 0)
uattr = UCOL_FRENCH_COLLATION;
else if (strcmp(name, "colcaselevel") == 0)
uattr = UCOL_CASE_LEVEL;
else if (strcmp(name, "colcasefirst") == 0)
uattr = UCOL_CASE_FIRST;
else if (strcmp(name, "colalternate") == 0)
uattr = UCOL_ALTERNATE_HANDLING;
else if (strcmp(name, "colnormalization") == 0)
uattr = UCOL_NORMALIZATION_MODE;
else if (strcmp(name, "colnumeric") == 0)
uattr = UCOL_NUMERIC_COLLATION;
/* ignore if unknown */
if (strcmp(value, "primary") == 0)
uvalue = UCOL_PRIMARY;
else if (strcmp(value, "secondary") == 0)
uvalue = UCOL_SECONDARY;
else if (strcmp(value, "tertiary") == 0)
uvalue = UCOL_TERTIARY;
else if (strcmp(value, "quaternary") == 0)
uvalue = UCOL_QUATERNARY;
else if (strcmp(value, "identical") == 0)
uvalue = UCOL_IDENTICAL;
else if (strcmp(value, "no") == 0)
uvalue = UCOL_OFF;
else if (strcmp(value, "yes") == 0)
uvalue = UCOL_ON;
else if (strcmp(value, "shifted") == 0)
uvalue = UCOL_SHIFTED;
else if (strcmp(value, "non-ignorable") == 0)
uvalue = UCOL_NON_IGNORABLE;
else if (strcmp(value, "lower") == 0)
uvalue = UCOL_LOWER_FIRST;
else if (strcmp(value, "upper") == 0)
uvalue = UCOL_UPPER_FIRST;
else
status = U_ILLEGAL_ARGUMENT_ERROR;
if (uattr != -1 && uvalue != -1)
ucol_setAttribute(collator, uattr, uvalue, &status);
/*
* Pretend the error came from ucol_open(), for consistent error
* message across ICU versions.
*/
if (U_FAILURE(status))
ereport(ERROR,
(errmsg("could not open collator for locale \"%s\": %s",
loc, u_errorName(status))));
}
}
}
#endif /* USE_ICU */
/*
......
......@@ -1100,6 +1100,45 @@ select textrange_en_us('A','Z') @> 'b'::text;
drop type textrange_c;
drop type textrange_en_us;
-- test ICU collation customization
CREATE COLLATION testcoll_ignore_accents (provider = icu, locale = '@colStrength=primary;colCaseLevel=yes');
SELECT 'aaá' > 'AAA' COLLATE "und-x-icu", 'aaá' < 'AAA' COLLATE testcoll_ignore_accents;
?column? | ?column?
----------+----------
t | t
(1 row)
CREATE COLLATION testcoll_backwards (provider = icu, locale = '@colBackwards=yes');
SELECT 'coté' < 'côte' COLLATE "und-x-icu", 'coté' > 'côte' COLLATE testcoll_backwards;
?column? | ?column?
----------+----------
t | t
(1 row)
CREATE COLLATION testcoll_lower_first (provider = icu, locale = '@colCaseFirst=lower');
CREATE COLLATION testcoll_upper_first (provider = icu, locale = '@colCaseFirst=upper');
SELECT 'aaa' < 'AAA' COLLATE testcoll_lower_first, 'aaa' > 'AAA' COLLATE testcoll_upper_first;
?column? | ?column?
----------+----------
t | t
(1 row)
CREATE COLLATION testcoll_shifted (provider = icu, locale = '@colAlternate=shifted');
SELECT 'de-luge' < 'deanza' COLLATE "und-x-icu", 'de-luge' > 'deanza' COLLATE testcoll_shifted;
?column? | ?column?
----------+----------
t | t
(1 row)
CREATE COLLATION testcoll_numeric (provider = icu, locale = '@colNumeric=yes');
SELECT 'A-21' > 'A-123' COLLATE "und-x-icu", 'A-21' < 'A-123' COLLATE testcoll_numeric;
?column? | ?column?
----------+----------
t | t
(1 row)
CREATE COLLATION testcoll_error1 (provider = icu, locale = '@colNumeric=lower');
ERROR: could not open collator for locale "@colNumeric=lower": U_ILLEGAL_ARGUMENT_ERROR
-- cleanup
SET client_min_messages TO warning;
DROP SCHEMA collate_tests CASCADE;
......
......@@ -425,6 +425,27 @@ drop type textrange_c;
drop type textrange_en_us;
-- test ICU collation customization
CREATE COLLATION testcoll_ignore_accents (provider = icu, locale = '@colStrength=primary;colCaseLevel=yes');
SELECT 'aaá' > 'AAA' COLLATE "und-x-icu", 'aaá' < 'AAA' COLLATE testcoll_ignore_accents;
CREATE COLLATION testcoll_backwards (provider = icu, locale = '@colBackwards=yes');
SELECT 'coté' < 'côte' COLLATE "und-x-icu", 'coté' > 'côte' COLLATE testcoll_backwards;
CREATE COLLATION testcoll_lower_first (provider = icu, locale = '@colCaseFirst=lower');
CREATE COLLATION testcoll_upper_first (provider = icu, locale = '@colCaseFirst=upper');
SELECT 'aaa' < 'AAA' COLLATE testcoll_lower_first, 'aaa' > 'AAA' COLLATE testcoll_upper_first;
CREATE COLLATION testcoll_shifted (provider = icu, locale = '@colAlternate=shifted');
SELECT 'de-luge' < 'deanza' COLLATE "und-x-icu", 'de-luge' > 'deanza' COLLATE testcoll_shifted;
CREATE COLLATION testcoll_numeric (provider = icu, locale = '@colNumeric=yes');
SELECT 'A-21' > 'A-123' COLLATE "und-x-icu", 'A-21' < 'A-123' COLLATE testcoll_numeric;
CREATE COLLATION testcoll_error1 (provider = icu, locale = '@colNumeric=lower');
-- cleanup
SET client_min_messages TO warning;
DROP SCHEMA collate_tests CASCADE;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment