Commit aa17c06f authored by Peter Eisentraut's avatar Peter Eisentraut

Add function to import operating system collations

Move this logic out of initdb into a user-callable function.  This
simplifies the code and makes it possible to update the standard
collations later on if additional operating system collations appear.
Reviewed-by: default avatarAndres Freund <andres@anarazel.de>
Reviewed-by: default avatarEuler Taveira <euler@timbira.com.br>
parent 193a7d79
......@@ -496,7 +496,7 @@ SELECT * FROM test1 ORDER BY a || b COLLATE "fr_FR";
</para>
</sect2>
<sect2>
<sect2 id="collation-managing">
<title>Managing Collations</title>
<para>
......
......@@ -19190,6 +19190,46 @@ postgres=# SELECT * FROM pg_xlogfile_name_offset(pg_stop_backup());
in the database's default tablespace, the tablespace can be specified as 0.
</para>
<para>
<xref linkend="functions-admin-collation"> lists functions used to manage
collations.
</para>
<table id="functions-admin-collation">
<title>Collation Management Functions</title>
<tgroup cols="3">
<thead>
<row><entry>Name</entry> <entry>Return Type</entry> <entry>Description</entry></row>
</thead>
<tbody>
<row>
<entry>
<indexterm><primary>pg_import_system_collations</primary></indexterm>
<literal><function>pg_import_system_collations(<parameter>if_not_exists</> <type>boolean</>, <parameter>schema</> <type>regnamespace</>)</function></literal>
</entry>
<entry><type>void</type></entry>
<entry>Import operating system collations</entry>
</row>
</tbody>
</tgroup>
</table>
<para>
<function>pg_import_system_collations</> populates the system
catalog <literal>pg_collation</literal> with collations based on all the
locales it finds on the operating system. This is
what <command>initdb</command> uses;
see <xref linkend="collation-managing"> for more details. If additional
locales are installed into the operating system later on, this function
can be run again to add collations for the new locales. In that case, the
parameter <parameter>if_not_exists</parameter> should be set to true to
skip over existing collations. The <parameter>schema</parameter>
parameter would typically be <literal>pg_catalog</literal>, but that is
not a requirement. (Collation objects based on locales that are no longer
present on the operating system are never removed by this function.)
</para>
</sect2>
<sect2 id="functions-admin-index">
......
......@@ -41,7 +41,8 @@ Oid
CollationCreate(const char *collname, Oid collnamespace,
Oid collowner,
int32 collencoding,
const char *collcollate, const char *collctype)
const char *collcollate, const char *collctype,
bool if_not_exists)
{
Relation rel;
TupleDesc tupDesc;
......@@ -72,10 +73,21 @@ CollationCreate(const char *collname, Oid collnamespace,
PointerGetDatum(collname),
Int32GetDatum(collencoding),
ObjectIdGetDatum(collnamespace)))
ereport(ERROR,
{
if (if_not_exists)
{
ereport(NOTICE,
(errcode(ERRCODE_DUPLICATE_OBJECT),
errmsg("collation \"%s\" for encoding \"%s\" already exists",
errmsg("collation \"%s\" for encoding \"%s\" already exists, skipping",
collname, pg_encoding_to_char(collencoding))));
return InvalidOid;
}
else
ereport(ERROR,
(errcode(ERRCODE_DUPLICATE_OBJECT),
errmsg("collation \"%s\" for encoding \"%s\" already exists",
collname, pg_encoding_to_char(collencoding))));
}
/*
* Also forbid matching an any-encoding entry. This test of course is not
......@@ -86,10 +98,21 @@ CollationCreate(const char *collname, Oid collnamespace,
PointerGetDatum(collname),
Int32GetDatum(-1),
ObjectIdGetDatum(collnamespace)))
ereport(ERROR,
{
if (if_not_exists)
{
ereport(NOTICE,
(errcode(ERRCODE_DUPLICATE_OBJECT),
errmsg("collation \"%s\" already exists, skipping",
collname)));
return InvalidOid;
}
else
ereport(ERROR,
(errcode(ERRCODE_DUPLICATE_OBJECT),
errmsg("collation \"%s\" already exists",
collname)));
}
/* open pg_collation */
rel = heap_open(CollationRelationId, RowExclusiveLock);
......
......@@ -136,7 +136,11 @@ DefineCollation(ParseState *pstate, List *names, List *parameters)
GetUserId(),
GetDatabaseEncoding(),
collcollate,
collctype);
collctype,
false);
if (!OidIsValid(newoid))
return InvalidObjectAddress;
ObjectAddressSet(address, CollationRelationId, newoid);
......@@ -177,3 +181,151 @@ IsThereCollationInNamespace(const char *collname, Oid nspOid)
errmsg("collation \"%s\" already exists in schema \"%s\"",
collname, get_namespace_name(nspOid))));
}
/*
* "Normalize" a locale name, stripping off encoding tags such as
* ".utf8" (e.g., "en_US.utf8" -> "en_US", but "br_FR.iso885915@euro"
* -> "br_FR@euro"). Return true if a new, different name was
* generated.
*/
pg_attribute_unused()
static bool
normalize_locale_name(char *new, const char *old)
{
char *n = new;
const char *o = old;
bool changed = false;
while (*o)
{
if (*o == '.')
{
/* skip over encoding tag such as ".utf8" or ".UTF-8" */
o++;
while ((*o >= 'A' && *o <= 'Z')
|| (*o >= 'a' && *o <= 'z')
|| (*o >= '0' && *o <= '9')
|| (*o == '-'))
o++;
changed = true;
}
else
*n++ = *o++;
}
*n = '\0';
return changed;
}
Datum
pg_import_system_collations(PG_FUNCTION_ARGS)
{
#if defined(HAVE_LOCALE_T) && !defined(WIN32)
bool if_not_exists = PG_GETARG_BOOL(0);
Oid nspid = PG_GETARG_OID(1);
FILE *locale_a_handle;
char localebuf[NAMEDATALEN]; /* we assume ASCII so this is fine */
int count = 0;
#endif
if (!superuser())
ereport(ERROR,
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
(errmsg("must be superuser to import system collations"))));
#if defined(HAVE_LOCALE_T) && !defined(WIN32)
locale_a_handle = OpenPipeStream("locale -a", "r");
if (locale_a_handle == NULL)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not execute command \"%s\": %m",
"locale -a")));
while (fgets(localebuf, sizeof(localebuf), locale_a_handle))
{
int i;
size_t len;
int enc;
bool skip;
char alias[NAMEDATALEN];
len = strlen(localebuf);
if (len == 0 || localebuf[len - 1] != '\n')
{
elog(DEBUG1, "locale name too long, skipped: \"%s\"", localebuf);
continue;
}
localebuf[len - 1] = '\0';
/*
* Some systems have locale names that don't consist entirely of ASCII
* letters (such as "bokm&aring;l" or "fran&ccedil;ais"). This is
* pretty silly, since we need the locale itself to interpret the
* non-ASCII characters. We can't do much with those, so we filter
* them out.
*/
skip = false;
for (i = 0; i < len; i++)
{
if (IS_HIGHBIT_SET(localebuf[i]))
{
skip = true;
break;
}
}
if (skip)
{
elog(DEBUG1, "locale name has non-ASCII characters, skipped: \"%s\"", localebuf);
continue;
}
enc = pg_get_encoding_from_locale(localebuf, false);
if (enc < 0)
{
/* error message printed by pg_get_encoding_from_locale() */
continue;
}
if (!PG_VALID_BE_ENCODING(enc))
continue; /* ignore locales for client-only encodings */
if (enc == PG_SQL_ASCII)
continue; /* C/POSIX are already in the catalog */
count++;
CollationCreate(localebuf, nspid, GetUserId(), enc,
localebuf, localebuf, if_not_exists);
CommandCounterIncrement();
/*
* Generate aliases such as "en_US" in addition to "en_US.utf8" for
* ease of use. Note that collation names are unique per encoding
* only, so this doesn't clash with "en_US" for LATIN1, say.
*
* This always runs in "if not exists" mode, to skip aliases that
* conflict with an existing locale name for the same encoding. For
* example, "br_FR.iso88591" is normalized to "br_FR", both for
* encoding LATIN1. But the unnormalized locale "br_FR" already
* exists for LATIN1.
*/
if (normalize_locale_name(alias, localebuf))
{
CollationCreate(alias, nspid, GetUserId(), enc,
localebuf, localebuf, true);
CommandCounterIncrement();
}
}
ClosePipeStream(locale_a_handle);
if (count == 0)
ereport(ERROR,
(errmsg("no usable system locales were found")));
#endif /* not HAVE_LOCALE_T && not WIN32 */
PG_RETURN_VOID();
}
......@@ -1608,178 +1608,16 @@ setup_description(FILE *cmdfd)
PG_CMD_PUTS("DROP TABLE tmp_pg_shdescription;\n\n");
}
#ifdef HAVE_LOCALE_T
/*
* "Normalize" a locale name, stripping off encoding tags such as
* ".utf8" (e.g., "en_US.utf8" -> "en_US", but "br_FR.iso885915@euro"
* -> "br_FR@euro"). Return true if a new, different name was
* generated.
*/
static bool
normalize_locale_name(char *new, const char *old)
{
char *n = new;
const char *o = old;
bool changed = false;
while (*o)
{
if (*o == '.')
{
/* skip over encoding tag such as ".utf8" or ".UTF-8" */
o++;
while ((*o >= 'A' && *o <= 'Z')
|| (*o >= 'a' && *o <= 'z')
|| (*o >= '0' && *o <= '9')
|| (*o == '-'))
o++;
changed = true;
}
else
*n++ = *o++;
}
*n = '\0';
return changed;
}
#endif /* HAVE_LOCALE_T */
/*
* populate pg_collation
*/
static void
setup_collation(FILE *cmdfd)
{
#if defined(HAVE_LOCALE_T) && !defined(WIN32)
int i;
FILE *locale_a_handle;
char localebuf[NAMEDATALEN]; /* we assume ASCII so this is fine */
int count = 0;
locale_a_handle = popen_check("locale -a", "r");
if (!locale_a_handle)
return; /* complaint already printed */
PG_CMD_PUTS("CREATE TEMP TABLE tmp_pg_collation ( "
" collname name, "
" locale name, "
" encoding int) WITHOUT OIDS;\n\n");
while (fgets(localebuf, sizeof(localebuf), locale_a_handle))
{
size_t len;
int enc;
bool skip;
char *quoted_locale;
char alias[NAMEDATALEN];
len = strlen(localebuf);
if (len == 0 || localebuf[len - 1] != '\n')
{
if (debug)
fprintf(stderr, _("%s: locale name too long, skipped: \"%s\"\n"),
progname, localebuf);
continue;
}
localebuf[len - 1] = '\0';
/*
* Some systems have locale names that don't consist entirely of ASCII
* letters (such as "bokm&aring;l" or "fran&ccedil;ais"). This is
* pretty silly, since we need the locale itself to interpret the
* non-ASCII characters. We can't do much with those, so we filter
* them out.
*/
skip = false;
for (i = 0; i < len; i++)
{
if (IS_HIGHBIT_SET(localebuf[i]))
{
skip = true;
break;
}
}
if (skip)
{
if (debug)
fprintf(stderr, _("%s: locale name has non-ASCII characters, skipped: \"%s\"\n"),
progname, localebuf);
continue;
}
enc = pg_get_encoding_from_locale(localebuf, debug);
if (enc < 0)
{
/* error message printed by pg_get_encoding_from_locale() */
continue;
}
if (!PG_VALID_BE_ENCODING(enc))
continue; /* ignore locales for client-only encodings */
if (enc == PG_SQL_ASCII)
continue; /* C/POSIX are already in the catalog */
count++;
quoted_locale = escape_quotes(localebuf);
PG_CMD_PRINTF3("INSERT INTO tmp_pg_collation VALUES (E'%s', E'%s', %d);\n\n",
quoted_locale, quoted_locale, enc);
/*
* Generate aliases such as "en_US" in addition to "en_US.utf8" for
* ease of use. Note that collation names are unique per encoding
* only, so this doesn't clash with "en_US" for LATIN1, say.
*/
if (normalize_locale_name(alias, localebuf))
{
char *quoted_alias = escape_quotes(alias);
PG_CMD_PRINTF3("INSERT INTO tmp_pg_collation VALUES (E'%s', E'%s', %d);\n\n",
quoted_alias, quoted_locale, enc);
free(quoted_alias);
}
free(quoted_locale);
}
PG_CMD_PUTS("SELECT pg_import_system_collations(if_not_exists => false, schema => 'pg_catalog');\n\n");
/* Add an SQL-standard name */
PG_CMD_PRINTF1("INSERT INTO tmp_pg_collation VALUES ('ucs_basic', 'C', %d);\n\n", PG_UTF8);
/*
* When copying collations to the final location, eliminate aliases that
* conflict with an existing locale name for the same encoding. For
* example, "br_FR.iso88591" is normalized to "br_FR", both for encoding
* LATIN1. But the unnormalized locale "br_FR" already exists for LATIN1.
* Prefer the alias that matches the OS locale name, else the first locale
* name by sort order (arbitrary choice to be deterministic).
*
* Also, eliminate any aliases that conflict with pg_collation's
* hard-wired entries for "C" etc.
*/
PG_CMD_PUTS("INSERT INTO pg_collation (collname, collnamespace, collowner, collencoding, collcollate, collctype) "
" SELECT DISTINCT ON (collname, encoding)"
" collname, "
" (SELECT oid FROM pg_namespace WHERE nspname = 'pg_catalog') AS collnamespace, "
" (SELECT relowner FROM pg_class WHERE relname = 'pg_collation') AS collowner, "
" encoding, locale, locale "
" FROM tmp_pg_collation"
" WHERE NOT EXISTS (SELECT 1 FROM pg_collation WHERE collname = tmp_pg_collation.collname)"
" ORDER BY collname, encoding, (collname = locale) DESC, locale;\n\n");
/*
* Even though the table is temp, drop it explicitly so it doesn't get
* copied into template0/postgres databases.
*/
PG_CMD_PUTS("DROP TABLE tmp_pg_collation;\n\n");
pclose(locale_a_handle);
if (count == 0 && !debug)
{
printf(_("No usable system locales were found.\n"));
printf(_("Use the option \"--debug\" to see details.\n"));
}
#endif /* not HAVE_LOCALE_T && not WIN32 */
PG_CMD_PRINTF2("INSERT INTO pg_collation (collname, collnamespace, collowner, collencoding, collcollate, collctype) VALUES ('ucs_basic', 'pg_catalog'::regnamespace, '%s'::regrole, %d, 'C', 'C');\n\n", escape_quotes(username), PG_UTF8);
}
/*
......
......@@ -53,6 +53,6 @@
*/
/* yyyymmddN */
#define CATALOG_VERSION_NO 201701172
#define CATALOG_VERSION_NO 201701181
#endif
......@@ -17,7 +17,8 @@
extern Oid CollationCreate(const char *collname, Oid collnamespace,
Oid collowner,
int32 collencoding,
const char *collcollate, const char *collctype);
const char *collcollate, const char *collctype,
bool if_not_exists);
extern void RemoveCollationById(Oid collationOid);
#endif /* PG_COLLATION_FN_H */
......@@ -5349,6 +5349,9 @@ DESCR("pg_controldata recovery state information as a function");
DATA(insert OID = 3444 ( pg_control_init PGNSP PGUID 12 1 0 0 0 f f f f t f v s 0 0 2249 "" "{23,23,23,23,23,23,23,23,23,16,16,16,23}" "{o,o,o,o,o,o,o,o,o,o,o,o,o}" "{max_data_alignment,database_block_size,blocks_per_segment,wal_block_size,bytes_per_wal_segment,max_identifier_length,max_index_columns,max_toast_chunk_size,large_object_chunk_size,bigint_timestamps,float4_pass_by_value,float8_pass_by_value,data_page_checksum_version}" _null_ _null_ pg_control_init _null_ _null_ _null_ ));
DESCR("pg_controldata init state information as a function");
DATA(insert OID = 3445 ( pg_import_system_collations PGNSP PGUID 12 100 0 0 0 f f f f t f v r 2 0 2278 "16 4089" _null_ _null_ "{if_not_exists,schema}" _null_ _null_ pg_import_system_collations _null_ _null_ _null_ ));
DESCR("import collations from operating system");
/*
* Symbolic values for provolatile column: these indicate whether the result
* of a function is dependent *only* on the values of its explicit arguments,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment