Commit 9de09c08 authored by Bruce Momjian's avatar Bruce Momjian

Move wchar2char() and char2wchar() from tsearch into /mb to be easier to

use for other modules;  also move pnstrdup().

Clean up code slightly.
parent 3eb9da52
......@@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/tsearch/ts_locale.c,v 1.8 2008/06/17 16:09:06 momjian Exp $
* $PostgreSQL: pgsql/src/backend/tsearch/ts_locale.c,v 1.9 2008/06/18 18:42:54 momjian Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -16,125 +16,8 @@
#include "tsearch/ts_locale.h"
#include "tsearch/ts_public.h"
#ifdef USE_WIDE_UPPER_LOWER
/*
* wchar2char --- convert wide characters to multibyte format
*
* This has the same API as the standard wcstombs() function; in particular,
* tolen is the maximum number of bytes to store at *to, and *from must be
* zero-terminated. The output will be zero-terminated iff there is room.
*/
size_t
wchar2char(char *to, const wchar_t *from, size_t tolen)
{
if (tolen == 0)
return 0;
#ifdef WIN32
if (GetDatabaseEncoding() == PG_UTF8)
{
int r;
r = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, tolen,
NULL, NULL);
if (r <= 0)
return (size_t) -1;
Assert(r <= tolen);
/* Microsoft counts the zero terminator in the result */
return r - 1;
}
#endif /* WIN32 */
return wcstombs(to, from, tolen);
}
/*
* char2wchar --- convert multibyte characters to wide characters
*
* This has almost the API of mbstowcs(), except that *from need not be
* null-terminated; instead, the number of input bytes is specified as
* fromlen. Also, we ereport() rather than returning -1 for invalid
* input encoding. tolen is the maximum number of wchar_t's to store at *to.
* The output will be zero-terminated iff there is room.
*/
size_t
char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen)
{
if (tolen == 0)
return 0;
#ifdef WIN32
if (GetDatabaseEncoding() == PG_UTF8)
{
int r;
/* stupid Microsloth API does not work for zero-length input */
if (fromlen == 0)
r = 0;
else
{
r = MultiByteToWideChar(CP_UTF8, 0, from, fromlen, to, tolen - 1);
if (r <= 0)
{
/* see notes in oracle_compat.c about error reporting */
pg_verifymbstr(from, fromlen, false);
ereport(ERROR,
(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
errmsg("invalid multibyte character for locale"),
errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
}
}
Assert(r < tolen);
to[r] = 0;
return r;
}
#endif /* WIN32 */
if (lc_ctype_is_c())
{
/*
* pg_mb2wchar_with_len always adds trailing '\0', so 'to' should be
* allocated with sufficient space
*/
return pg_mb2wchar_with_len(from, (pg_wchar *) to, fromlen);
}
else
{
/*
* mbstowcs requires ending '\0'
*/
char *str = pnstrdup(from, fromlen);
size_t result;
result = mbstowcs(to, str, tolen);
pfree(str);
if (result == (size_t) -1)
{
pg_verifymbstr(from, fromlen, false);
ereport(ERROR,
(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
errmsg("invalid multibyte character for locale"),
errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
}
if (result < tolen)
to[result] = 0;
return result;
}
}
int
t_isdigit(const char *ptr)
{
......
......@@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/tsearch/ts_utils.c,v 1.9 2008/01/01 19:45:52 momjian Exp $
* $PostgreSQL: pgsql/src/backend/tsearch/ts_utils.c,v 1.10 2008/06/18 18:42:54 momjian Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -153,13 +153,3 @@ searchstoplist(StopList *s, char *key)
bsearch(&key, s->stop, s->len,
sizeof(char *), comparestr)) ? true : false;
}
char *
pnstrdup(const char *in, int len)
{
char *out = palloc(len + 1);
memcpy(out, in, len);
out[len] = '\0';
return out;
}
......@@ -4,7 +4,7 @@
* (currently mule internal code (mic) is used)
* Tatsuo Ishii
*
* $PostgreSQL: pgsql/src/backend/utils/mb/mbutils.c,v 1.71 2008/05/27 12:24:42 mha Exp $
* $PostgreSQL: pgsql/src/backend/utils/mb/mbutils.c,v 1.72 2008/06/18 18:42:54 momjian Exp $
*/
#include "postgres.h"
......@@ -555,6 +555,134 @@ perform_default_encoding_conversion(const char *src, int len, bool is_client_to_
return result;
}
#ifdef USE_WIDE_UPPER_LOWER
/*
* wchar2char --- convert wide characters to multibyte format
*
* This has the same API as the standard wcstombs() function; in particular,
* tolen is the maximum number of bytes to store at *to, and *from must be
* zero-terminated. The output will be zero-terminated iff there is room.
*/
size_t
wchar2char(char *to, const wchar_t *from, size_t tolen)
{
size_t result;
if (tolen == 0)
return 0;
#ifdef WIN32
/*
* On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding,
* and for some reason mbstowcs and wcstombs won't do this for us,
* so we use MultiByteToWideChar().
*/
if (GetDatabaseEncoding() == PG_UTF8)
{
result = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, tolen,
NULL, NULL);
/* A zero return is failure */
if (result <= 0)
result = -1;
else
{
Assert(result <= tolen);
/* Microsoft counts the zero terminator in the result */
result--;
}
}
else
#endif /* WIN32 */
result = wcstombs(to, from, tolen);
return result;
}
/*
* char2wchar --- convert multibyte characters to wide characters
*
* This has almost the API of mbstowcs(), except that *from need not be
* null-terminated; instead, the number of input bytes is specified as
* fromlen. Also, we ereport() rather than returning -1 for invalid
* input encoding. tolen is the maximum number of wchar_t's to store at *to.
* The output will be zero-terminated iff there is room.
*/
size_t
char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen)
{
size_t result;
if (tolen == 0)
return 0;
#ifdef WIN32
/* See WIN32 "Unicode" comment above */
if (GetDatabaseEncoding() == PG_UTF8)
{
/* Win32 API does not work for zero-length input */
if (fromlen == 0)
result = 0;
else
{
result = MultiByteToWideChar(CP_UTF8, 0, from, fromlen, to, tolen - 1);
/* A zero return is failure */
if (result == 0)
result = -1;
}
if (result != -1)
{
Assert(result < tolen);
/* Append trailing null wchar (MultiByteToWideChar() does not) */
to[result] = 0;
}
}
else
#endif /* WIN32 */
{
if (lc_ctype_is_c())
{
/*
* pg_mb2wchar_with_len always adds trailing '\0', so 'to' should be
* allocated with sufficient space
*/
result = pg_mb2wchar_with_len(from, (pg_wchar *) to, fromlen);
}
else
{
/* mbstowcs requires ending '\0' */
char *str = pnstrdup(from, fromlen);
result = mbstowcs(to, str, tolen);
pfree(str);
}
}
if (result == -1)
{
/*
* Invalid multibyte character encountered. We try to give a useful
* error message by letting pg_verifymbstr check the string. But it's
* possible that the string is OK to us, and not OK to mbstowcs ---
* this suggests that the LC_CTYPE locale is different from the
* database encoding. Give a generic error message if verifymbstr
* can't find anything wrong.
*/
pg_verifymbstr(from, fromlen, false); /* might not return */
/* but if it does ... */
ereport(ERROR,
(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
errmsg("invalid multibyte character for locale"),
errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
}
return result;
}
#endif
/* convert a multibyte string to a wchar */
int
pg_mb2wchar(const char *from, pg_wchar *to)
......
......@@ -14,7 +14,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/mmgr/mcxt.c,v 1.63 2008/01/01 19:45:55 momjian Exp $
* $PostgreSQL: pgsql/src/backend/utils/mmgr/mcxt.c,v 1.64 2008/06/18 18:42:54 momjian Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -624,6 +624,18 @@ repalloc(void *pointer, Size size)
pointer, size);
}
/* Like pstrdup(), but append null byte */
char *
pnstrdup(const char *in, int len)
{
char *out = palloc(len + 1);
memcpy(out, in, len);
out[len] = '\0';
return out;
}
/*
* MemoryContextSwitchTo
* Returns the current context; installs the given context.
......
......@@ -6,7 +6,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/mb/pg_wchar.h,v 1.78 2008/01/01 19:45:58 momjian Exp $
* $PostgreSQL: pgsql/src/include/mb/pg_wchar.h,v 1.79 2008/06/18 18:42:54 momjian Exp $
*
* NOTES
* This is used both by the backend and by libpq, but should not be
......@@ -362,6 +362,11 @@ extern int pg_mbcharcliplen(const char *mbstr, int len, int imit);
extern int pg_encoding_max_length(int encoding);
extern int pg_database_encoding_max_length(void);
#ifdef USE_WIDE_UPPER_LOWER
extern size_t wchar2char(char *to, const wchar_t *from, size_t tolen);
extern size_t char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen);
#endif
extern void SetDefaultClientEncoding(void);
extern int SetClientEncoding(int encoding, bool doit);
extern void InitializeClientEncoding(void);
......
......@@ -5,7 +5,7 @@
*
* Copyright (c) 1998-2008, PostgreSQL Global Development Group
*
* $PostgreSQL: pgsql/src/include/tsearch/ts_locale.h,v 1.6 2008/06/17 16:09:06 momjian Exp $
* $PostgreSQL: pgsql/src/include/tsearch/ts_locale.h,v 1.7 2008/06/18 18:42:54 momjian Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -33,9 +33,6 @@
#ifdef USE_WIDE_UPPER_LOWER
extern size_t wchar2char(char *to, const wchar_t *from, size_t tolen);
extern size_t char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen);
extern int t_isdigit(const char *ptr);
extern int t_isspace(const char *ptr);
extern int t_isalpha(const char *ptr);
......
......@@ -6,7 +6,7 @@
*
* Copyright (c) 1998-2008, PostgreSQL Global Development Group
*
* $PostgreSQL: pgsql/src/include/tsearch/ts_public.h,v 1.9 2008/05/16 16:31:02 tgl Exp $
* $PostgreSQL: pgsql/src/include/tsearch/ts_public.h,v 1.10 2008/06/18 18:42:54 momjian Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -62,8 +62,6 @@ typedef struct
extern char *get_tsearch_config_filename(const char *basename,
const char *extension);
extern char *pnstrdup(const char *in, int len);
/*
* Often useful stopword list management
*/
......
......@@ -21,7 +21,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/utils/palloc.h,v 1.38 2008/01/01 19:45:59 momjian Exp $
* $PostgreSQL: pgsql/src/include/utils/palloc.h,v 1.39 2008/06/18 18:42:54 momjian Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -70,6 +70,8 @@ extern void pfree(void *pointer);
extern void *repalloc(void *pointer, Size size);
extern char *pnstrdup(const char *in, int len);
/*
* MemoryContextSwitchTo can't be a macro in standard C compilers.
* But we can make it an inline function when using GCC.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment