Commit c2d45264 authored by Heikki Linnakangas's avatar Heikki Linnakangas

Tighten the check in initdb and CREATE DATABASE that the chosen encoding

matches the encoding of the locale. LC_COLLATE is now checked in addition
to LC_CTYPE.
parent 61d96749
<!-- $PostgreSQL: pgsql/doc/src/sgml/charset.sgml,v 2.88 2008/09/23 09:20:34 heikki Exp $ --> <!-- $PostgreSQL: pgsql/doc/src/sgml/charset.sgml,v 2.89 2008/09/23 10:58:03 heikki Exp $ -->
<chapter id="charset"> <chapter id="charset">
<title>Localization</> <title>Localization</>
...@@ -320,10 +320,10 @@ initdb --locale=sv_SE ...@@ -320,10 +320,10 @@ initdb --locale=sv_SE
<para> <para>
An important restriction, however, is that each database's character set An important restriction, however, is that each database's character set
must be compatible with the database's <envar>LC_CTYPE</> setting. must be compatible with the database's <envar>LC_CTYPE</> and
When <envar>LC_CTYPE</> is <literal>C</> or <literal>POSIX</>, any <envvar>LC_COLLATE</> locale settings. For <literal>C</> or
character set is allowed, but for other settings of <envar>LC_CTYPE</> <literal>POSIX</> locale, any character set is allowed, but for other
there is only one character set that will work correctly. locales there is only one character set that will work correctly.
</para> </para>
<sect2 id="multibyte-charset-supported"> <sect2 id="multibyte-charset-supported">
......
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/commands/dbcommands.c,v 1.211 2008/09/23 09:20:35 heikki Exp $ * $PostgreSQL: pgsql/src/backend/commands/dbcommands.c,v 1.212 2008/09/23 10:58:03 heikki Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -118,6 +118,7 @@ createdb(const CreatedbStmt *stmt) ...@@ -118,6 +118,7 @@ createdb(const CreatedbStmt *stmt)
int encoding = -1; int encoding = -1;
int dbconnlimit = -1; int dbconnlimit = -1;
int ctype_encoding; int ctype_encoding;
int collate_encoding;
int notherbackends; int notherbackends;
int npreparedxacts; int npreparedxacts;
createdb_failure_params fparms; createdb_failure_params fparms;
...@@ -334,6 +335,7 @@ createdb(const CreatedbStmt *stmt) ...@@ -334,6 +335,7 @@ createdb(const CreatedbStmt *stmt)
* Note: if you change this policy, fix initdb to match. * Note: if you change this policy, fix initdb to match.
*/ */
ctype_encoding = pg_get_encoding_from_locale(dbctype); ctype_encoding = pg_get_encoding_from_locale(dbctype);
collate_encoding = pg_get_encoding_from_locale(dbcollate);
if (!(ctype_encoding == encoding || if (!(ctype_encoding == encoding ||
ctype_encoding == PG_SQL_ASCII || ctype_encoding == PG_SQL_ASCII ||
...@@ -345,9 +347,22 @@ createdb(const CreatedbStmt *stmt) ...@@ -345,9 +347,22 @@ createdb(const CreatedbStmt *stmt)
(errmsg("encoding %s does not match locale %s", (errmsg("encoding %s does not match locale %s",
pg_encoding_to_char(encoding), pg_encoding_to_char(encoding),
dbctype), dbctype),
errdetail("The chosen LC_CTYPE setting requires encoding %s.", errdetail("The chosen CTYPE setting requires encoding %s.",
pg_encoding_to_char(ctype_encoding)))); pg_encoding_to_char(ctype_encoding))));
if (!(collate_encoding == encoding ||
collate_encoding == PG_SQL_ASCII ||
#ifdef WIN32
encoding == PG_UTF8 ||
#endif
(encoding == PG_SQL_ASCII && superuser())))
ereport(ERROR,
(errmsg("encoding %s does not match locale %s",
pg_encoding_to_char(encoding),
dbcollate),
errdetail("The chosen COLLATE setting requires encoding %s.",
pg_encoding_to_char(collate_encoding))));
/* /*
* Check that the new locale is compatible with the source database. * Check that the new locale is compatible with the source database.
* *
......
...@@ -42,7 +42,7 @@ ...@@ -42,7 +42,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* Portions taken from FreeBSD. * Portions taken from FreeBSD.
* *
* $PostgreSQL: pgsql/src/bin/initdb/initdb.c,v 1.160 2008/09/23 09:20:37 heikki Exp $ * $PostgreSQL: pgsql/src/bin/initdb/initdb.c,v 1.161 2008/09/23 10:58:03 heikki Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -188,7 +188,8 @@ static void trapsig(int signum); ...@@ -188,7 +188,8 @@ static void trapsig(int signum);
static void check_ok(void); static void check_ok(void);
static char *escape_quotes(const char *src); static char *escape_quotes(const char *src);
static int locale_date_order(const char *locale); static int locale_date_order(const char *locale);
static bool chklocale(const char *locale); static bool check_locale_name(const char *locale);
static bool check_locale_encoding(const char *locale, int encoding);
static void setlocales(void); static void setlocales(void);
static void usage(const char *progname); static void usage(const char *progname);
...@@ -2187,7 +2188,7 @@ locale_date_order(const char *locale) ...@@ -2187,7 +2188,7 @@ locale_date_order(const char *locale)
* this should match the backend check_locale() function * this should match the backend check_locale() function
*/ */
static bool static bool
chklocale(const char *locale) check_locale_name(const char *locale)
{ {
bool ret; bool ret;
int category = LC_CTYPE; int category = LC_CTYPE;
...@@ -2211,6 +2212,50 @@ chklocale(const char *locale) ...@@ -2211,6 +2212,50 @@ chklocale(const char *locale)
return ret; return ret;
} }
/*
* check if the chosen encoding matches the encoding required by the locale
*
* this should match the similar check in the backend createdb() function
*/
static bool
check_locale_encoding(const char *locale, int user_enc)
{
int locale_enc;
locale_enc = pg_get_encoding_from_locale(locale);
/* We allow selection of SQL_ASCII --- see notes in createdb() */
if (!(locale_enc == user_enc ||
locale_enc == PG_SQL_ASCII ||
user_enc == PG_SQL_ASCII
#ifdef WIN32
/*
* On win32, if the encoding chosen is UTF8, all locales are OK
* (assuming the actual locale name passed the checks above). This is
* because UTF8 is a pseudo-codepage, that we convert to UTF16 before
* doing any operations on, and UTF16 supports all locales.
*/
|| user_enc == PG_UTF8
#endif
))
{
fprintf(stderr, _("%s: encoding mismatch\n"), progname);
fprintf(stderr,
_("The encoding you selected (%s) and the encoding that the\n"
"selected locale uses (%s) do not match. This would lead to\n"
"misbehavior in various character string processing functions.\n"
"Rerun %s and either do not specify an encoding explicitly,\n"
"or choose a matching combination.\n"),
pg_encoding_to_char(user_enc),
pg_encoding_to_char(locale_enc),
progname);
return false;
}
return true;
}
/* /*
* set up the locale variables * set up the locale variables
* *
...@@ -2241,17 +2286,17 @@ setlocales(void) ...@@ -2241,17 +2286,17 @@ setlocales(void)
* override absent/invalid config settings from initdb's locale settings * override absent/invalid config settings from initdb's locale settings
*/ */
if (strlen(lc_ctype) == 0 || !chklocale(lc_ctype)) if (strlen(lc_ctype) == 0 || !check_locale_name(lc_ctype))
lc_ctype = xstrdup(setlocale(LC_CTYPE, NULL)); lc_ctype = xstrdup(setlocale(LC_CTYPE, NULL));
if (strlen(lc_collate) == 0 || !chklocale(lc_collate)) if (strlen(lc_collate) == 0 || !check_locale_name(lc_collate))
lc_collate = xstrdup(setlocale(LC_COLLATE, NULL)); lc_collate = xstrdup(setlocale(LC_COLLATE, NULL));
if (strlen(lc_numeric) == 0 || !chklocale(lc_numeric)) if (strlen(lc_numeric) == 0 || !check_locale_name(lc_numeric))
lc_numeric = xstrdup(setlocale(LC_NUMERIC, NULL)); lc_numeric = xstrdup(setlocale(LC_NUMERIC, NULL));
if (strlen(lc_time) == 0 || !chklocale(lc_time)) if (strlen(lc_time) == 0 || !check_locale_name(lc_time))
lc_time = xstrdup(setlocale(LC_TIME, NULL)); lc_time = xstrdup(setlocale(LC_TIME, NULL));
if (strlen(lc_monetary) == 0 || !chklocale(lc_monetary)) if (strlen(lc_monetary) == 0 || !check_locale_name(lc_monetary))
lc_monetary = xstrdup(setlocale(LC_MONETARY, NULL)); lc_monetary = xstrdup(setlocale(LC_MONETARY, NULL));
if (strlen(lc_messages) == 0 || !chklocale(lc_messages)) if (strlen(lc_messages) == 0 || !check_locale_name(lc_messages))
#if defined(LC_MESSAGES) && !defined(WIN32) #if defined(LC_MESSAGES) && !defined(WIN32)
{ {
/* when available get the current locale setting */ /* when available get the current locale setting */
...@@ -2452,6 +2497,7 @@ main(int argc, char *argv[]) ...@@ -2452,6 +2497,7 @@ main(int argc, char *argv[])
* environment */ * environment */
char bin_dir[MAXPGPATH]; char bin_dir[MAXPGPATH];
char *pg_data_native; char *pg_data_native;
int user_enc;
#ifdef WIN32 #ifdef WIN32
char *restrict_env; char *restrict_env;
...@@ -2868,44 +2914,12 @@ main(int argc, char *argv[]) ...@@ -2868,44 +2914,12 @@ main(int argc, char *argv[])
} }
} }
else else
{
int user_enc;
int ctype_enc;
encodingid = get_encoding_id(encoding); encodingid = get_encoding_id(encoding);
user_enc = atoi(encodingid);
ctype_enc = pg_get_encoding_from_locale(lc_ctype);
/* We allow selection of SQL_ASCII --- see notes in createdb() */ user_enc = atoi(encodingid);
if (!(ctype_enc == user_enc || if (!check_locale_encoding(lc_ctype, user_enc) ||
ctype_enc == PG_SQL_ASCII || !check_locale_encoding(lc_collate, user_enc))
user_enc == PG_SQL_ASCII exit(1); /* check_locale_encoding printed the error */
#ifdef WIN32
/*
* On win32, if the encoding chosen is UTF8, all locales are OK
* (assuming the actual locale name passed the checks above). This is
* because UTF8 is a pseudo-codepage, that we convert to UTF16 before
* doing any operations on, and UTF16 supports all locales.
*/
|| user_enc == PG_UTF8
#endif
))
{
fprintf(stderr, _("%s: encoding mismatch\n"), progname);
fprintf(stderr,
_("The encoding you selected (%s) and the encoding that the\n"
"selected locale uses (%s) do not match. This would lead to\n"
"misbehavior in various character string processing functions.\n"
"Rerun %s and either do not specify an encoding explicitly,\n"
"or choose a matching combination.\n"),
pg_encoding_to_char(user_enc),
pg_encoding_to_char(ctype_enc),
progname);
exit(1);
}
}
if (strlen(default_text_search_config) == 0) if (strlen(default_text_search_config) == 0)
{ {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment