Commit 32032d42 authored by Teodor Sigaev's avatar Teodor Sigaev

Fix usage of char2wchar/wchar2char. Changes:

- pg_wchar and wchar_t could have different size, so char2wchar
  doesn't call pg_mb2wchar_with_len to prevent out-of-bound
  memory bug
- make char2wchar/wchar2char symmetric, now they should not be
  called with C-locale because mbstowcs/wcstombs oftenly doesn't
  work correct with C-locale.
- Text parser uses pg_mb2wchar_with_len directly in case of
  C-locale and multibyte encoding

Per bug report by Hiroshi Inoue <inoue@tpf.co.jp> and
following discussion.

Backpatch up to 8.2 when multybyte support was implemented in tsearch.
parent 876b37d5
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/tsearch/wparser_def.c,v 1.20 2009/01/15 16:33:59 teodor Exp $ * $PostgreSQL: pgsql/src/backend/tsearch/wparser_def.c,v 1.21 2009/03/02 15:10:09 teodor Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -240,12 +240,12 @@ typedef struct TParser ...@@ -240,12 +240,12 @@ typedef struct TParser
int lenstr; /* length of mbstring */ int lenstr; /* length of mbstring */
#ifdef USE_WIDE_UPPER_LOWER #ifdef USE_WIDE_UPPER_LOWER
wchar_t *wstr; /* wide character string */ wchar_t *wstr; /* wide character string */
int lenwstr; /* length of wsting */ pg_wchar *pgwstr; /* wide character string for C-locale */
bool usewide;
#endif #endif
/* State of parse */ /* State of parse */
int charmaxlen; int charmaxlen;
bool usewide;
TParserPosition *state; TParserPosition *state;
bool ignore; bool ignore;
bool wanthost; bool wanthost;
...@@ -299,13 +299,24 @@ TParserInit(char *str, int len) ...@@ -299,13 +299,24 @@ TParserInit(char *str, int len)
if (prs->charmaxlen > 1) if (prs->charmaxlen > 1)
{ {
prs->usewide = true; prs->usewide = true;
if ( lc_ctype_is_c() )
{
/*
* char2wchar doesn't work for C-locale and
* sizeof(pg_wchar) could be not equal to sizeof(wchar_t)
*/
prs->pgwstr = (pg_wchar*) palloc(sizeof(pg_wchar) * (prs->lenstr + 1));
pg_mb2wchar_with_len(prs->str, prs->pgwstr, prs->lenstr);
}
else
{
prs->wstr = (wchar_t *) palloc(sizeof(wchar_t) * (prs->lenstr + 1)); prs->wstr = (wchar_t *) palloc(sizeof(wchar_t) * (prs->lenstr + 1));
prs->lenwstr = char2wchar(prs->wstr, prs->lenstr + 1, char2wchar(prs->wstr, prs->lenstr + 1, prs->str, prs->lenstr);
prs->str, prs->lenstr); }
} }
else else
#endif
prs->usewide = false; prs->usewide = false;
#endif
prs->state = newTParserPosition(NULL); prs->state = newTParserPosition(NULL);
prs->state->state = TPS_Base; prs->state->state = TPS_Base;
...@@ -331,6 +342,8 @@ TParserClose(TParser *prs) ...@@ -331,6 +342,8 @@ TParserClose(TParser *prs)
#ifdef USE_WIDE_UPPER_LOWER #ifdef USE_WIDE_UPPER_LOWER
if (prs->wstr) if (prs->wstr)
pfree(prs->wstr); pfree(prs->wstr);
if (prs->pgwstr)
pfree(prs->pgwstr);
#endif #endif
pfree(prs); pfree(prs);
...@@ -338,10 +351,12 @@ TParserClose(TParser *prs) ...@@ -338,10 +351,12 @@ TParserClose(TParser *prs)
/* /*
* Character-type support functions, equivalent to is* macros, but * Character-type support functions, equivalent to is* macros, but
* working with any possible encodings and locales. Note, * working with any possible encodings and locales. Notes:
* that with multibyte encoding and C-locale isw* function may fail * - with multibyte encoding and C-locale isw* function may fail
* or give wrong result. Note 2: multibyte encoding and C-locale * or give wrong result.
* often are used for Asian languages * - multibyte encoding and C-locale often are used for
* Asian languages.
* - if locale is C the we use pgwstr instead of wstr
*/ */
#ifdef USE_WIDE_UPPER_LOWER #ifdef USE_WIDE_UPPER_LOWER
...@@ -352,8 +367,8 @@ p_is##type(TParser *prs) { \ ...@@ -352,8 +367,8 @@ p_is##type(TParser *prs) { \
Assert( prs->state ); \ Assert( prs->state ); \
if ( prs->usewide ) \ if ( prs->usewide ) \
{ \ { \
if ( lc_ctype_is_c() ) \ if ( prs->pgwstr ) \
return is##type( 0xff & *( prs->wstr + prs->state->poschar) ); \ return is##type( 0xff & *( prs->pgwstr + prs->state->poschar) );\
\ \
return isw##type( *(wint_t*)( prs->wstr + prs->state->poschar ) ); \ return isw##type( *(wint_t*)( prs->wstr + prs->state->poschar ) ); \
} \ } \
...@@ -373,9 +388,9 @@ p_isalnum(TParser *prs) ...@@ -373,9 +388,9 @@ p_isalnum(TParser *prs)
if (prs->usewide) if (prs->usewide)
{ {
if (lc_ctype_is_c()) if (prs->pgwstr)
{ {
unsigned int c = *(prs->wstr + prs->state->poschar); unsigned int c = *(prs->pgwstr + prs->state->poschar);
/* /*
* any non-ascii symbol with multibyte encoding with C-locale is * any non-ascii symbol with multibyte encoding with C-locale is
...@@ -405,9 +420,9 @@ p_isalpha(TParser *prs) ...@@ -405,9 +420,9 @@ p_isalpha(TParser *prs)
if (prs->usewide) if (prs->usewide)
{ {
if (lc_ctype_is_c()) if (prs->pgwstr)
{ {
unsigned int c = *(prs->wstr + prs->state->poschar); unsigned int c = *(prs->pgwstr + prs->state->poschar);
/* /*
* any non-ascii symbol with multibyte encoding with C-locale is * any non-ascii symbol with multibyte encoding with C-locale is
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
* (currently mule internal code (mic) is used) * (currently mule internal code (mic) is used)
* Tatsuo Ishii * Tatsuo Ishii
* *
* $PostgreSQL: pgsql/src/backend/utils/mb/mbutils.c,v 1.78 2009/01/22 10:09:48 mha Exp $ * $PostgreSQL: pgsql/src/backend/utils/mb/mbutils.c,v 1.79 2009/03/02 15:10:09 teodor Exp $
*/ */
#include "postgres.h" #include "postgres.h"
...@@ -601,7 +601,10 @@ wchar2char(char *to, const wchar_t *from, size_t tolen) ...@@ -601,7 +601,10 @@ wchar2char(char *to, const wchar_t *from, size_t tolen)
} }
else else
#endif /* WIN32 */ #endif /* WIN32 */
{
Assert( !lc_ctype_is_c() );
result = wcstombs(to, from, tolen); result = wcstombs(to, from, tolen);
}
return result; return result;
} }
...@@ -646,24 +649,14 @@ char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen) ...@@ -646,24 +649,14 @@ char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen)
} }
else else
#endif /* WIN32 */ #endif /* WIN32 */
{
if (lc_ctype_is_c())
{
/*
* pg_mb2wchar_with_len always adds trailing '\0', so 'to' should be
* allocated with sufficient space
*/
result = pg_mb2wchar_with_len(from, (pg_wchar *) to, fromlen);
}
else
{ {
/* mbstowcs requires ending '\0' */ /* mbstowcs requires ending '\0' */
char *str = pnstrdup(from, fromlen); char *str = pnstrdup(from, fromlen);
Assert( !lc_ctype_is_c() );
result = mbstowcs(to, str, tolen); result = mbstowcs(to, str, tolen);
pfree(str); pfree(str);
} }
}
if (result == -1) if (result == -1)
{ {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment