Commit a837ed88 authored by Peter Eisentraut's avatar Peter Eisentraut

Detect locale/encoding mismatch in initdb, or pick a suitable encoding

automatically if none was specified.
parent bbcee544
...@@ -6989,7 +6989,8 @@ done ...@@ -6989,7 +6989,8 @@ done
for ac_header in crypt.h dld.h endian.h fp_class.h getopt.h ieeefp.h poll.h pwd.h sys/ipc.h sys/poll.h sys/pstat.h sys/select.h sys/sem.h sys/socket.h sys/shm.h sys/time.h sys/un.h termios.h utime.h wchar.h wctype.h kernel/OS.h kernel/image.h SupportDefs.h
for ac_header in crypt.h dld.h endian.h fp_class.h getopt.h ieeefp.h langinfo.h poll.h pwd.h sys/ipc.h sys/poll.h sys/pstat.h sys/select.h sys/sem.h sys/socket.h sys/shm.h sys/time.h sys/un.h termios.h utime.h wchar.h wctype.h kernel/OS.h kernel/image.h SupportDefs.h
do do
as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh` as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh`
if eval "test \"\${$as_ac_Header+set}\" = set"; then if eval "test \"\${$as_ac_Header+set}\" = set"; then
......
dnl Process this file with autoconf to produce a configure script. dnl Process this file with autoconf to produce a configure script.
dnl $PostgreSQL: pgsql/configure.in,v 1.366 2004/07/10 01:24:29 momjian Exp $ dnl $PostgreSQL: pgsql/configure.in,v 1.367 2004/07/14 17:55:09 petere Exp $
dnl dnl
dnl Developers, please strive to achieve this order: dnl Developers, please strive to achieve this order:
dnl dnl
...@@ -675,7 +675,7 @@ fi ...@@ -675,7 +675,7 @@ fi
## ##
dnl sys/socket.h is required by AC_FUNC_ACCEPT_ARGTYPES dnl sys/socket.h is required by AC_FUNC_ACCEPT_ARGTYPES
AC_CHECK_HEADERS([crypt.h dld.h endian.h fp_class.h getopt.h ieeefp.h poll.h pwd.h sys/ipc.h sys/poll.h sys/pstat.h sys/select.h sys/sem.h sys/socket.h sys/shm.h sys/time.h sys/un.h termios.h utime.h wchar.h wctype.h kernel/OS.h kernel/image.h SupportDefs.h]) AC_CHECK_HEADERS([crypt.h dld.h endian.h fp_class.h getopt.h ieeefp.h langinfo.h poll.h pwd.h sys/ipc.h sys/poll.h sys/pstat.h sys/select.h sys/sem.h sys/socket.h sys/shm.h sys/time.h sys/un.h termios.h utime.h wchar.h wctype.h kernel/OS.h kernel/image.h SupportDefs.h])
# At least on IRIX, cpp test for netinet/tcp.h will fail unless # At least on IRIX, cpp test for netinet/tcp.h will fail unless
# netinet/in.h is included first. # netinet/in.h is included first.
......
<!-- <!--
$PostgreSQL: pgsql/doc/src/sgml/ref/initdb.sgml,v 1.30 2004/06/24 19:26:54 tgl Exp $ $PostgreSQL: pgsql/doc/src/sgml/ref/initdb.sgml,v 1.31 2004/07/14 17:55:09 petere Exp $
PostgreSQL documentation PostgreSQL documentation
--> -->
...@@ -121,8 +121,8 @@ PostgreSQL documentation ...@@ -121,8 +121,8 @@ PostgreSQL documentation
<para> <para>
Selects the encoding of the template database. This will also Selects the encoding of the template database. This will also
be the default encoding of any database you create later, be the default encoding of any database you create later,
unless you override it there. The default is unless you override it there. The default is derived from the locale, or
<literal>SQL_ASCII</literal>. The character sets supported by <literal>SQL_ASCII</literal> if that does not work. The character sets supported by
the <productname>PostgreSQL</productname> server are described the <productname>PostgreSQL</productname> server are described
in <xref linkend="multibyte-charset-supported">. in <xref linkend="multibyte-charset-supported">.
</para> </para>
......
...@@ -39,7 +39,7 @@ ...@@ -39,7 +39,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* Portions taken from FreeBSD. * Portions taken from FreeBSD.
* *
* $PostgreSQL: pgsql/src/bin/initdb/initdb.c,v 1.42 2004/07/12 01:54:10 momjian Exp $ * $PostgreSQL: pgsql/src/bin/initdb/initdb.c,v 1.43 2004/07/14 17:55:10 petere Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -52,6 +52,9 @@ ...@@ -52,6 +52,9 @@
#include <locale.h> #include <locale.h>
#include <signal.h> #include <signal.h>
#include <errno.h> #include <errno.h>
#ifdef HAVE_LANGINFO_H
# include <langinfo.h>
#endif
#include "libpq/pqsignal.h" #include "libpq/pqsignal.h"
#include "mb/pg_wchar.h" #include "mb/pg_wchar.h"
...@@ -600,6 +603,15 @@ get_id(void) ...@@ -600,6 +603,15 @@ get_id(void)
return xstrdup(pw->pw_name); return xstrdup(pw->pw_name);
} }
static char *
encodingid_to_string(int enc)
{
char result[20];
sprintf(result, "%d", enc);
return xstrdup(result);
}
/* /*
* get the encoding id for a given encoding name * get the encoding id for a given encoding name
*/ */
...@@ -607,15 +619,13 @@ static char * ...@@ -607,15 +619,13 @@ static char *
get_encoding_id(char *encoding_name) get_encoding_id(char *encoding_name)
{ {
int enc; int enc;
char result[20];
if (encoding_name && *encoding_name) if (encoding_name && *encoding_name)
{ {
if ((enc = pg_char_to_encoding(encoding_name)) >= 0 && if ((enc = pg_char_to_encoding(encoding_name)) >= 0 &&
pg_valid_server_encoding(encoding_name) >= 0) pg_valid_server_encoding(encoding_name) >= 0)
{ {
sprintf(result, "%d", enc); return encodingid_to_string(enc);
return xstrdup(result);
} }
} }
fprintf(stderr, _("%s: \"%s\" is not a valid server encoding name\n"), fprintf(stderr, _("%s: \"%s\" is not a valid server encoding name\n"),
...@@ -623,6 +633,191 @@ get_encoding_id(char *encoding_name) ...@@ -623,6 +633,191 @@ get_encoding_id(char *encoding_name)
exit(1); exit(1);
} }
#ifdef HAVE_LANGINFO_H
/*
* Checks whether the encoding selected for PostgreSQL and the
* encoding used by the system locale match.
*/
struct encoding_match
{
enum pg_enc pg_enc_code;
char *system_enc_name;
};
struct encoding_match encoding_match_list[] = {
{ PG_EUC_JP, "EUC-JP" },
{ PG_EUC_JP, "eucJP" },
{ PG_EUC_JP, "IBM-eucJP" },
{ PG_EUC_JP, "sdeckanji" },
{ PG_EUC_CN, "EUC-CN" },
{ PG_EUC_CN, "eucCN" },
{ PG_EUC_CN, "IBM-eucCN" },
{ PG_EUC_CN, "GB2312" },
{ PG_EUC_CN, "dechanzi" },
{ PG_EUC_KR, "EUC-KR" },
{ PG_EUC_KR, "eucKR" },
{ PG_EUC_KR, "IBM-eucKR" },
{ PG_EUC_KR, "deckorean" },
{ PG_EUC_KR, "5601" },
{ PG_EUC_TW, "EUC-TW" },
{ PG_EUC_TW, "eucTW" },
{ PG_EUC_TW, "IBM-eucTW" },
{ PG_EUC_TW, "cns11643" },
#ifdef NOT_VERIFIED
{ PG_JOHAB, "???" },
#endif
{ PG_UTF8, "UTF-8" },
{ PG_UTF8, "utf8" },
{ PG_LATIN1, "ISO-8859-1" },
{ PG_LATIN1, "ISO8859-1" },
{ PG_LATIN1, "iso88591" },
{ PG_LATIN2, "ISO-8859-2" },
{ PG_LATIN2, "ISO8859-2" },
{ PG_LATIN2, "iso88592" },
{ PG_LATIN3, "ISO-8859-3" },
{ PG_LATIN3, "ISO8859-3" },
{ PG_LATIN3, "iso88593" },
{ PG_LATIN4, "ISO-8859-4" },
{ PG_LATIN4, "ISO8859-4" },
{ PG_LATIN4, "iso88594" },
{ PG_LATIN5, "ISO-8859-9" },
{ PG_LATIN5, "ISO8859-9" },
{ PG_LATIN5, "iso88599" },
{ PG_LATIN6, "ISO-8859-10" },
{ PG_LATIN6, "ISO8859-10" },
{ PG_LATIN6, "iso885910" },
{ PG_LATIN7, "ISO-8859-13" },
{ PG_LATIN7, "ISO8859-13" },
{ PG_LATIN7, "iso885913" },
{ PG_LATIN8, "ISO-8859-14" },
{ PG_LATIN8, "ISO8859-14" },
{ PG_LATIN8, "iso885914" },
{ PG_LATIN9, "ISO-8859-15" },
{ PG_LATIN9, "ISO8859-15" },
{ PG_LATIN9, "iso885915" },
{ PG_LATIN10, "ISO-8859-16" },
{ PG_LATIN10, "ISO8859-16" },
{ PG_LATIN10, "iso885916" },
{ PG_WIN1256, "CP1256" },
{ PG_TCVN, "CP1258" },
#ifdef NOT_VERIFIED
{ PG_WIN874, "???" },
#endif
{ PG_KOI8R, "KOI8-R" },
{ PG_WIN1251, "CP1251" },
{ PG_ALT, "CP866" },
{ PG_ISO_8859_5, "ISO-8859-5" },
{ PG_ISO_8859_5, "ISO8859-5" },
{ PG_ISO_8859_5, "iso88595" },
{ PG_ISO_8859_6, "ISO-8859-6" },
{ PG_ISO_8859_6, "ISO8859-6" },
{ PG_ISO_8859_6, "iso88596" },
{ PG_ISO_8859_7, "ISO-8859-7" },
{ PG_ISO_8859_7, "ISO8859-7" },
{ PG_ISO_8859_7, "iso88597" },
{ PG_ISO_8859_8, "ISO-8859-8" },
{ PG_ISO_8859_8, "ISO8859-8" },
{ PG_ISO_8859_8, "iso88598" },
{ PG_SQL_ASCII, NULL } /* end marker */
};
static char *
get_encoding_from_locale(const char *ctype)
{
char *save;
char *sys;
save = setlocale(LC_CTYPE, NULL);
if (!save)
return NULL;
save = xstrdup(save);
setlocale(LC_CTYPE, ctype);
sys = nl_langinfo(CODESET);
sys = xstrdup(sys);
setlocale(LC_CTYPE, save);
free(save);
return sys;
}
static void
check_encodings_match(int pg_enc, const char *ctype)
{
char *sys;
int i;
sys = get_encoding_from_locale(ctype);
for (i = 0; encoding_match_list[i].system_enc_name; i++)
{
if (pg_enc == encoding_match_list[i].pg_enc_code
&& strcasecmp(sys, encoding_match_list[i].system_enc_name) == 0)
{
free(sys);
return;
}
}
fprintf(stderr,
_("%s: warning: encoding mismatch\n"), progname);
fprintf(stderr,
_("The encoding you selected (%s) and the encoding that the selected\n"
"locale uses (%s) are not known to match. This may lead to\n"
"misbehavior in various character string processing functions. To fix\n"
"this situation, rerun %s and either do not specify an encoding\n"
"explicitly, or choose a matching combination.\n"),
pg_encoding_to_char(pg_enc), sys, progname);
free(sys);
return;
}
static int
find_matching_encoding(const char *ctype)
{
char *sys;
int i;
sys = get_encoding_from_locale(ctype);
for (i = 0; encoding_match_list[i].system_enc_name; i++)
{
if (strcasecmp(sys, encoding_match_list[i].system_enc_name) == 0)
{
free(sys);
return encoding_match_list[i].pg_enc_code;
}
}
free(sys);
return -1;
}
#endif /* HAVE_LANGINFO_H */
/* /*
* get short version of VERSION * get short version of VERSION
*/ */
...@@ -2027,13 +2222,11 @@ main(int argc, char *argv[]) ...@@ -2027,13 +2222,11 @@ main(int argc, char *argv[])
fprintf(stderr, fprintf(stderr,
"VERSION=%s\n" "VERSION=%s\n"
"PGDATA=%s\nshare_path=%s\nPGPATH=%s\n" "PGDATA=%s\nshare_path=%s\nPGPATH=%s\n"
"ENCODING=%s\nENCODINGID=%s\n"
"POSTGRES_SUPERUSERNAME=%s\nPOSTGRES_BKI=%s\n" "POSTGRES_SUPERUSERNAME=%s\nPOSTGRES_BKI=%s\n"
"POSTGRES_DESCR=%s\nPOSTGRESQL_CONF_SAMPLE=%s\n" "POSTGRES_DESCR=%s\nPOSTGRESQL_CONF_SAMPLE=%s\n"
"PG_HBA_SAMPLE=%s\nPG_IDENT_SAMPLE=%s\n", "PG_HBA_SAMPLE=%s\nPG_IDENT_SAMPLE=%s\n",
PG_VERSION, PG_VERSION,
pg_data, share_path, bin_path, pg_data, share_path, bin_path,
encoding, encodingid,
effective_user, bki_file, effective_user, bki_file,
desc_file, conf_file, desc_file, conf_file,
hba_file, ident_file); hba_file, ident_file);
...@@ -2051,21 +2244,20 @@ main(int argc, char *argv[]) ...@@ -2051,21 +2244,20 @@ main(int argc, char *argv[])
check_input(features_file); check_input(features_file);
check_input(system_views_file); check_input(system_views_file);
setlocales();
printf(_("The files belonging to this database system will be owned " printf(_("The files belonging to this database system will be owned "
"by user \"%s\".\n" "by user \"%s\".\n"
"This user must also own the server process.\n\n"), "This user must also own the server process.\n\n"),
effective_user); effective_user);
setlocales();
if (strcmp(lc_ctype, lc_collate) == 0 && if (strcmp(lc_ctype, lc_collate) == 0 &&
strcmp(lc_ctype, lc_time) == 0 && strcmp(lc_ctype, lc_time) == 0 &&
strcmp(lc_ctype, lc_numeric) == 0 && strcmp(lc_ctype, lc_numeric) == 0 &&
strcmp(lc_ctype, lc_monetary) == 0 && strcmp(lc_ctype, lc_monetary) == 0 &&
strcmp(lc_ctype, lc_messages) == 0) strcmp(lc_ctype, lc_messages) == 0)
{ {
printf(_("The database cluster will be initialized with locale %s.\n\n"), printf(_("The database cluster will be initialized with locale %s.\n"), lc_ctype);
lc_ctype);
} }
else else
{ {
...@@ -2075,7 +2267,7 @@ main(int argc, char *argv[]) ...@@ -2075,7 +2267,7 @@ main(int argc, char *argv[])
" MESSAGES: %s\n" " MESSAGES: %s\n"
" MONETARY: %s\n" " MONETARY: %s\n"
" NUMERIC: %s\n" " NUMERIC: %s\n"
" TIME: %s\n\n"), " TIME: %s\n"),
lc_collate, lc_collate,
lc_ctype, lc_ctype,
lc_messages, lc_messages,
...@@ -2084,6 +2276,34 @@ main(int argc, char *argv[]) ...@@ -2084,6 +2276,34 @@ main(int argc, char *argv[])
lc_time); lc_time);
} }
#ifdef HAVE_LANGINFO_H
if (strcmp(lc_ctype, "C") != 0 && strcmp(lc_ctype, "POSIX") != 0)
{
if (strlen(encoding) == 0)
{
int tmp;
tmp = find_matching_encoding(lc_ctype);
if (tmp == -1)
{
fprintf(stderr, _("%s: could not find suitable encoding for locale \"%s\"\n"), progname, lc_ctype);
fprintf(stderr, _("Rerun %s with the -E option.\n"), progname);
fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
exit(1);
}
else
{
encodingid = encodingid_to_string(tmp);
printf(_("The default database encoding has accordingly been set to %s.\n"),
pg_encoding_to_char(tmp));
}
}
else
check_encodings_match(atoi(encodingid), lc_ctype);
}
#endif /* HAVE_LANGINFO_H */
printf("\n");
umask(077); umask(077);
/* /*
......
...@@ -200,6 +200,9 @@ ...@@ -200,6 +200,9 @@
/* Define to 1 if `enc_part2' is member of `krb5_ticket'. */ /* Define to 1 if `enc_part2' is member of `krb5_ticket'. */
#undef HAVE_KRB5_TICKET_ENC_PART2 #undef HAVE_KRB5_TICKET_ENC_PART2
/* Define to 1 if you have the <langinfo.h> header file. */
#undef HAVE_LANGINFO_H
/* Define to 1 if you have the `bind' library (-lbind). */ /* Define to 1 if you have the `bind' library (-lbind). */
#undef HAVE_LIBBIND #undef HAVE_LIBBIND
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment