Commit c61a2f58 authored by Tom Lane's avatar Tom Lane

Change the backend to reject strings containing invalidly-encoded multibyte

characters in all cases.  Formerly we mostly just threw warnings for invalid
input, and failed to detect it at all if no encoding conversion was required.
The tighter check is needed to defend against SQL-injection attacks as per
CVE-2006-2313 (further details will be published after release).  Embedded
zero (null) bytes will be rejected as well.  The checks are applied during
input to the backend (receipt from client or COPY IN), so it no longer seems
necessary to check in textin() and related routines; any string arriving at
those functions will already have been validated.  Conversion failure
reporting (for characters with no equivalent in the destination encoding)
has been cleaned up and made consistent while at it.

Also, fix a few longstanding errors in little-used encoding conversion
routines: win1251_to_iso, win866_to_iso, euc_tw_to_big5, euc_tw_to_mic,
mic_to_euc_tw were all broken to varying extents.

Patches by Tatsuo Ishii and Tom Lane.  Thanks to Akio Ishida and Yasuo Ohgaki
for identifying the security issues.
parent 1f219cf4
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.263 2006/04/05 22:11:54 tgl Exp $ * $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.264 2006/05/21 20:05:19 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -1023,9 +1023,15 @@ DoCopy(const CopyStmt *stmt) ...@@ -1023,9 +1023,15 @@ DoCopy(const CopyStmt *stmt)
cstate->raw_buf_index = cstate->raw_buf_len = 0; cstate->raw_buf_index = cstate->raw_buf_len = 0;
cstate->processed = 0; cstate->processed = 0;
/* Set up encoding conversion info */ /*
* Set up encoding conversion info. Even if the client and server
* encodings are the same, we must apply pg_client_to_server() to
* validate data in multibyte encodings.
*/
cstate->client_encoding = pg_get_client_encoding(); cstate->client_encoding = pg_get_client_encoding();
cstate->need_transcoding = (cstate->client_encoding != GetDatabaseEncoding()); cstate->need_transcoding =
(cstate->client_encoding != GetDatabaseEncoding() ||
pg_database_encoding_max_length() > 1);
/* See Multibyte encoding comment above */ /* See Multibyte encoding comment above */
cstate->encoding_embeds_ascii = PG_ENCODING_IS_CLIENT_ONLY(cstate->client_encoding); cstate->encoding_embeds_ascii = PG_ENCODING_IS_CLIENT_ONLY(cstate->client_encoding);
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/name.c,v 1.57 2006/03/05 15:58:43 momjian Exp $ * $PostgreSQL: pgsql/src/backend/utils/adt/name.c,v 1.58 2006/05/21 20:05:19 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -49,10 +49,7 @@ namein(PG_FUNCTION_ARGS) ...@@ -49,10 +49,7 @@ namein(PG_FUNCTION_ARGS)
NameData *result; NameData *result;
int len; int len;
/* verify encoding */
len = strlen(s); len = strlen(s);
pg_verifymbstr(s, len, false);
len = pg_mbcliplen(s, len, NAMEDATALEN - 1); len = pg_mbcliplen(s, len, NAMEDATALEN - 1);
result = (NameData *) palloc0(NAMEDATALEN); result = (NameData *) palloc0(NAMEDATALEN);
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/varchar.c,v 1.115 2006/03/05 15:58:44 momjian Exp $ * $PostgreSQL: pgsql/src/backend/utils/adt/varchar.c,v 1.116 2006/05/21 20:05:19 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -73,9 +73,6 @@ bpchar_input(const char *s, size_t len, int32 atttypmod) ...@@ -73,9 +73,6 @@ bpchar_input(const char *s, size_t len, int32 atttypmod)
char *r; char *r;
size_t maxlen; size_t maxlen;
/* verify encoding */
pg_verifymbstr(s, len, false);
/* If typmod is -1 (or invalid), use the actual string length */ /* If typmod is -1 (or invalid), use the actual string length */
if (atttypmod < (int32) VARHDRSZ) if (atttypmod < (int32) VARHDRSZ)
maxlen = len; maxlen = len;
...@@ -393,9 +390,6 @@ varchar_input(const char *s, size_t len, int32 atttypmod) ...@@ -393,9 +390,6 @@ varchar_input(const char *s, size_t len, int32 atttypmod)
VarChar *result; VarChar *result;
size_t maxlen; size_t maxlen;
/* verify encoding */
pg_verifymbstr(s, len, false);
maxlen = atttypmod - VARHDRSZ; maxlen = atttypmod - VARHDRSZ;
if (atttypmod >= (int32) VARHDRSZ && len > maxlen) if (atttypmod >= (int32) VARHDRSZ && len > maxlen)
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/varlena.c,v 1.146 2006/04/04 19:35:36 tgl Exp $ * $PostgreSQL: pgsql/src/backend/utils/adt/varlena.c,v 1.147 2006/05/21 20:05:19 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -256,10 +256,7 @@ textin(PG_FUNCTION_ARGS) ...@@ -256,10 +256,7 @@ textin(PG_FUNCTION_ARGS)
text *result; text *result;
int len; int len;
/* verify encoding */
len = strlen(inputText); len = strlen(inputText);
pg_verifymbstr(inputText, len, false);
result = (text *) palloc(len + VARHDRSZ); result = (text *) palloc(len + VARHDRSZ);
VARATT_SIZEP(result) = len + VARHDRSZ; VARATT_SIZEP(result) = len + VARHDRSZ;
...@@ -299,9 +296,6 @@ textrecv(PG_FUNCTION_ARGS) ...@@ -299,9 +296,6 @@ textrecv(PG_FUNCTION_ARGS)
str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes); str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
/* verify encoding */
pg_verifymbstr(str, nbytes, false);
result = (text *) palloc(nbytes + VARHDRSZ); result = (text *) palloc(nbytes + VARHDRSZ);
VARATT_SIZEP(result) = nbytes + VARHDRSZ; VARATT_SIZEP(result) = nbytes + VARHDRSZ;
memcpy(VARDATA(result), str, nbytes); memcpy(VARDATA(result), str, nbytes);
......
This diff is collapsed.
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/cyrillic_and_mic/cyrillic_and_mic.c,v 1.12 2006/03/05 15:58:47 momjian Exp $ * $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/cyrillic_and_mic/cyrillic_and_mic.c,v 1.13 2006/05/21 20:05:19 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -70,14 +70,14 @@ extern Datum win866_to_iso(PG_FUNCTION_ARGS); ...@@ -70,14 +70,14 @@ extern Datum win866_to_iso(PG_FUNCTION_ARGS);
* ---------- * ----------
*/ */
static void koi8r2mic(unsigned char *l, unsigned char *p, int len); static void koi8r2mic(const unsigned char *l, unsigned char *p, int len);
static void mic2koi8r(unsigned char *mic, unsigned char *p, int len); static void mic2koi8r(const unsigned char *mic, unsigned char *p, int len);
static void iso2mic(unsigned char *l, unsigned char *p, int len); static void iso2mic(const unsigned char *l, unsigned char *p, int len);
static void mic2iso(unsigned char *mic, unsigned char *p, int len); static void mic2iso(const unsigned char *mic, unsigned char *p, int len);
static void win12512mic(unsigned char *l, unsigned char *p, int len); static void win12512mic(const unsigned char *l, unsigned char *p, int len);
static void mic2win1251(unsigned char *mic, unsigned char *p, int len); static void mic2win1251(const unsigned char *mic, unsigned char *p, int len);
static void win8662mic(unsigned char *l, unsigned char *p, int len); static void win8662mic(const unsigned char *l, unsigned char *p, int len);
static void mic2win866(unsigned char *mic, unsigned char *p, int len); static void mic2win866(const unsigned char *mic, unsigned char *p, int len);
Datum Datum
koi8r_to_mic(PG_FUNCTION_ARGS) koi8r_to_mic(PG_FUNCTION_ARGS)
...@@ -401,7 +401,7 @@ win1251_to_iso(PG_FUNCTION_ARGS) ...@@ -401,7 +401,7 @@ win1251_to_iso(PG_FUNCTION_ARGS)
buf = palloc(len * ENCODING_GROWTH_RATE); buf = palloc(len * ENCODING_GROWTH_RATE);
win12512mic(src, buf, len); win12512mic(src, buf, len);
mic2win1251(buf, dest, strlen((char *) buf)); mic2iso(buf, dest, strlen((char *) buf));
pfree(buf); pfree(buf);
PG_RETURN_VOID(); PG_RETURN_VOID();
...@@ -441,7 +441,7 @@ win866_to_iso(PG_FUNCTION_ARGS) ...@@ -441,7 +441,7 @@ win866_to_iso(PG_FUNCTION_ARGS)
buf = palloc(len * ENCODING_GROWTH_RATE); buf = palloc(len * ENCODING_GROWTH_RATE);
win8662mic(src, buf, len); win8662mic(src, buf, len);
mic2win866(buf, dest, strlen((char *) buf)); mic2iso(buf, dest, strlen((char *) buf));
pfree(buf); pfree(buf);
PG_RETURN_VOID(); PG_RETURN_VOID();
...@@ -460,23 +460,23 @@ win866_to_iso(PG_FUNCTION_ARGS) ...@@ -460,23 +460,23 @@ win866_to_iso(PG_FUNCTION_ARGS)
/* koi8r2mic: KOI8-R to Mule internal code */ /* koi8r2mic: KOI8-R to Mule internal code */
static void static void
koi8r2mic(unsigned char *l, unsigned char *p, int len) koi8r2mic(const unsigned char *l, unsigned char *p, int len)
{ {
latin2mic(l, p, len, LC_KOI8_R); latin2mic(l, p, len, LC_KOI8_R, PG_KOI8R);
} }
/* mic2koi8r: Mule internal code to KOI8-R */ /* mic2koi8r: Mule internal code to KOI8-R */
static void static void
mic2koi8r(unsigned char *mic, unsigned char *p, int len) mic2koi8r(const unsigned char *mic, unsigned char *p, int len)
{ {
mic2latin(mic, p, len, LC_KOI8_R); mic2latin(mic, p, len, LC_KOI8_R, PG_KOI8R);
} }
/* iso2mic: ISO-8859-5 to Mule internal code */ /* iso2mic: ISO-8859-5 to Mule internal code */
static void static void
iso2mic(unsigned char *l, unsigned char *p, int len) iso2mic(const unsigned char *l, unsigned char *p, int len)
{ {
static unsigned char iso2koi[] = { static const unsigned char iso2koi[] = {
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
...@@ -495,14 +495,14 @@ iso2mic(unsigned char *l, unsigned char *p, int len) ...@@ -495,14 +495,14 @@ iso2mic(unsigned char *l, unsigned char *p, int len)
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
}; };
latin2mic_with_table(l, p, len, LC_KOI8_R, iso2koi); latin2mic_with_table(l, p, len, LC_KOI8_R, PG_ISO_8859_5, iso2koi);
} }
/* mic2iso: Mule internal code to ISO8859-5 */ /* mic2iso: Mule internal code to ISO8859-5 */
static void static void
mic2iso(unsigned char *mic, unsigned char *p, int len) mic2iso(const unsigned char *mic, unsigned char *p, int len)
{ {
static unsigned char koi2iso[] = { static const unsigned char koi2iso[] = {
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
...@@ -521,14 +521,14 @@ mic2iso(unsigned char *mic, unsigned char *p, int len) ...@@ -521,14 +521,14 @@ mic2iso(unsigned char *mic, unsigned char *p, int len)
0xcc, 0xcb, 0xb7, 0xc8, 0xcd, 0xc9, 0xc7, 0xca 0xcc, 0xcb, 0xb7, 0xc8, 0xcd, 0xc9, 0xc7, 0xca
}; };
mic2latin_with_table(mic, p, len, LC_KOI8_R, koi2iso); mic2latin_with_table(mic, p, len, LC_KOI8_R, PG_ISO_8859_5, koi2iso);
} }
/* win2mic: CP1251 to Mule internal code */ /* win2mic: CP1251 to Mule internal code */
static void static void
win12512mic(unsigned char *l, unsigned char *p, int len) win12512mic(const unsigned char *l, unsigned char *p, int len)
{ {
static unsigned char win2koi[] = { static const unsigned char win2koi[] = {
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
...@@ -547,14 +547,14 @@ win12512mic(unsigned char *l, unsigned char *p, int len) ...@@ -547,14 +547,14 @@ win12512mic(unsigned char *l, unsigned char *p, int len)
0xdb, 0xdd, 0xdf, 0xd9, 0xd8, 0xdc, 0xc0, 0xd1 0xdb, 0xdd, 0xdf, 0xd9, 0xd8, 0xdc, 0xc0, 0xd1
}; };
latin2mic_with_table(l, p, len, LC_KOI8_R, win2koi); latin2mic_with_table(l, p, len, LC_KOI8_R, PG_WIN1251, win2koi);
} }
/* mic2win: Mule internal code to CP1251 */ /* mic2win: Mule internal code to CP1251 */
static void static void
mic2win1251(unsigned char *mic, unsigned char *p, int len) mic2win1251(const unsigned char *mic, unsigned char *p, int len)
{ {
static unsigned char koi2win[] = { static const unsigned char koi2win[] = {
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
...@@ -573,14 +573,14 @@ mic2win1251(unsigned char *mic, unsigned char *p, int len) ...@@ -573,14 +573,14 @@ mic2win1251(unsigned char *mic, unsigned char *p, int len)
0xdc, 0xdb, 0xc7, 0xd8, 0xdd, 0xd9, 0xd7, 0xda 0xdc, 0xdb, 0xc7, 0xd8, 0xdd, 0xd9, 0xd7, 0xda
}; };
mic2latin_with_table(mic, p, len, LC_KOI8_R, koi2win); mic2latin_with_table(mic, p, len, LC_KOI8_R, PG_WIN1251, koi2win);
} }
/* win8662mic: CP866 to Mule internal code */ /* win8662mic: CP866 to Mule internal code */
static void static void
win8662mic(unsigned char *l, unsigned char *p, int len) win8662mic(const unsigned char *l, unsigned char *p, int len)
{ {
static unsigned char win8662koi[] = { static const unsigned char win8662koi[] = {
0xe1, 0xe2, 0xf7, 0xe7, 0xe4, 0xe5, 0xf6, 0xfa, 0xe1, 0xe2, 0xf7, 0xe7, 0xe4, 0xe5, 0xf6, 0xfa,
0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0,
0xf2, 0xf3, 0xf4, 0xf5, 0xe6, 0xe8, 0xe3, 0xfe, 0xf2, 0xf3, 0xf4, 0xf5, 0xe6, 0xe8, 0xe3, 0xfe,
...@@ -599,14 +599,14 @@ win8662mic(unsigned char *l, unsigned char *p, int len) ...@@ -599,14 +599,14 @@ win8662mic(unsigned char *l, unsigned char *p, int len)
0xb6, 0xa6, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 0xb6, 0xa6, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
}; };
latin2mic_with_table(l, p, len, LC_KOI8_R, win8662koi); latin2mic_with_table(l, p, len, LC_KOI8_R, PG_WIN866, win8662koi);
} }
/* mic2win866: Mule internal code to CP866 */ /* mic2win866: Mule internal code to CP866 */
static void static void
mic2win866(unsigned char *mic, unsigned char *p, int len) mic2win866(const unsigned char *mic, unsigned char *p, int len)
{ {
static unsigned char koi2win866[] = { static const unsigned char koi2win866[] = {
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
...@@ -625,5 +625,5 @@ mic2win866(unsigned char *mic, unsigned char *p, int len) ...@@ -625,5 +625,5 @@ mic2win866(unsigned char *mic, unsigned char *p, int len)
0x9c, 0x9b, 0x87, 0x98, 0x9d, 0x99, 0x97, 0x9a 0x9c, 0x9b, 0x87, 0x98, 0x9d, 0x99, 0x97, 0x9a
}; };
mic2latin_with_table(mic, p, len, LC_KOI8_R, koi2win866); mic2latin_with_table(mic, p, len, LC_KOI8_R, PG_WIN866, koi2win866);
} }
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/euc_cn_and_mic/euc_cn_and_mic.c,v 1.13 2006/03/05 15:58:47 momjian Exp $ * $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/euc_cn_and_mic/euc_cn_and_mic.c,v 1.14 2006/05/21 20:05:19 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -32,8 +32,8 @@ extern Datum mic_to_euc_cn(PG_FUNCTION_ARGS); ...@@ -32,8 +32,8 @@ extern Datum mic_to_euc_cn(PG_FUNCTION_ARGS);
* ---------- * ----------
*/ */
static void euc_cn2mic(unsigned char *euc, unsigned char *p, int len); static void euc_cn2mic(const unsigned char *euc, unsigned char *p, int len);
static void mic2euc_cn(unsigned char *mic, unsigned char *p, int len); static void mic2euc_cn(const unsigned char *mic, unsigned char *p, int len);
Datum Datum
euc_cn_to_mic(PG_FUNCTION_ARGS) euc_cn_to_mic(PG_FUNCTION_ARGS)
...@@ -71,23 +71,30 @@ mic_to_euc_cn(PG_FUNCTION_ARGS) ...@@ -71,23 +71,30 @@ mic_to_euc_cn(PG_FUNCTION_ARGS)
* EUC_CN ---> MIC * EUC_CN ---> MIC
*/ */
static void static void
euc_cn2mic(unsigned char *euc, unsigned char *p, int len) euc_cn2mic(const unsigned char *euc, unsigned char *p, int len)
{ {
int c1; int c1;
while (len >= 0 && (c1 = *euc++)) while (len > 0)
{ {
c1 = *euc;
if (IS_HIGHBIT_SET(c1)) if (IS_HIGHBIT_SET(c1))
{ {
len -= 2; if (len < 2 || !IS_HIGHBIT_SET(euc[1]))
report_invalid_encoding(PG_EUC_CN, (const char *) euc, len);
*p++ = LC_GB2312_80; *p++ = LC_GB2312_80;
*p++ = c1; *p++ = c1;
*p++ = *euc++; *p++ = euc[1];
euc += 2;
len -= 2;
} }
else else
{ /* should be ASCII */ { /* should be ASCII */
len--; if (c1 == 0)
report_invalid_encoding(PG_EUC_CN, (const char *) euc, len);
*p++ = c1; *p++ = c1;
euc++;
len--;
} }
} }
*p = '\0'; *p = '\0';
...@@ -97,26 +104,35 @@ euc_cn2mic(unsigned char *euc, unsigned char *p, int len) ...@@ -97,26 +104,35 @@ euc_cn2mic(unsigned char *euc, unsigned char *p, int len)
* MIC ---> EUC_CN * MIC ---> EUC_CN
*/ */
static void static void
mic2euc_cn(unsigned char *mic, unsigned char *p, int len) mic2euc_cn(const unsigned char *mic, unsigned char *p, int len)
{ {
int c1; int c1;
while (len >= 0 && (c1 = *mic)) while (len > 0)
{ {
len -= pg_mic_mblen(mic++); c1 = *mic;
if (IS_HIGHBIT_SET(c1))
if (c1 == LC_GB2312_80)
{ {
if (c1 != LC_GB2312_80)
report_untranslatable_char(PG_MULE_INTERNAL, PG_EUC_CN,
(const char *) mic, len);
if (len < 3 || !IS_HIGHBIT_SET(mic[1]) || !IS_HIGHBIT_SET(mic[2]))
report_invalid_encoding(PG_MULE_INTERNAL,
(const char *) mic, len);
mic++;
*p++ = *mic++; *p++ = *mic++;
*p++ = *mic++; *p++ = *mic++;
} len -= 3;
else if (IS_HIGHBIT_SET(c1))
{ /* cannot convert to EUC_CN! */
mic--;
pg_print_bogus_char(&mic, &p);
} }
else else
*p++ = c1; /* should be ASCII */ { /* should be ASCII */
if (c1 == 0)
report_invalid_encoding(PG_MULE_INTERNAL,
(const char *) mic, len);
*p++ = c1;
mic++;
len--;
}
} }
*p = '\0'; *p = '\0';
} }
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/euc_kr_and_mic/euc_kr_and_mic.c,v 1.13 2006/03/05 15:58:47 momjian Exp $ * $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/euc_kr_and_mic/euc_kr_and_mic.c,v 1.14 2006/05/21 20:05:19 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -32,8 +32,8 @@ extern Datum mic_to_euc_kr(PG_FUNCTION_ARGS); ...@@ -32,8 +32,8 @@ extern Datum mic_to_euc_kr(PG_FUNCTION_ARGS);
* ---------- * ----------
*/ */
static void euc_kr2mic(unsigned char *euc, unsigned char *p, int len); static void euc_kr2mic(const unsigned char *euc, unsigned char *p, int len);
static void mic2euc_kr(unsigned char *mic, unsigned char *p, int len); static void mic2euc_kr(const unsigned char *mic, unsigned char *p, int len);
Datum Datum
euc_kr_to_mic(PG_FUNCTION_ARGS) euc_kr_to_mic(PG_FUNCTION_ARGS)
...@@ -71,23 +71,34 @@ mic_to_euc_kr(PG_FUNCTION_ARGS) ...@@ -71,23 +71,34 @@ mic_to_euc_kr(PG_FUNCTION_ARGS)
* EUC_KR ---> MIC * EUC_KR ---> MIC
*/ */
static void static void
euc_kr2mic(unsigned char *euc, unsigned char *p, int len) euc_kr2mic(const unsigned char *euc, unsigned char *p, int len)
{ {
int c1; int c1;
int l;
while (len >= 0 && (c1 = *euc++)) while (len > 0)
{ {
c1 = *euc;
if (IS_HIGHBIT_SET(c1)) if (IS_HIGHBIT_SET(c1))
{ {
len -= 2; l = pg_encoding_verifymb(PG_EUC_KR, (const char *) euc, len);
if (l != 2)
report_invalid_encoding(PG_EUC_KR,
(const char *) euc, len);
*p++ = LC_KS5601; *p++ = LC_KS5601;
*p++ = c1; *p++ = c1;
*p++ = *euc++; *p++ = euc[1];
euc += 2;
len -= 2;
} }
else else
{ /* should be ASCII */ { /* should be ASCII */
len--; if (c1 == 0)
report_invalid_encoding(PG_EUC_KR,
(const char *) euc, len);
*p++ = c1; *p++ = c1;
euc++;
len--;
} }
} }
*p = '\0'; *p = '\0';
...@@ -97,26 +108,39 @@ euc_kr2mic(unsigned char *euc, unsigned char *p, int len) ...@@ -97,26 +108,39 @@ euc_kr2mic(unsigned char *euc, unsigned char *p, int len)
* MIC ---> EUC_KR * MIC ---> EUC_KR
*/ */
static void static void
mic2euc_kr(unsigned char *mic, unsigned char *p, int len) mic2euc_kr(const unsigned char *mic, unsigned char *p, int len)
{ {
int c1; int c1;
int l;
while (len >= 0 && (c1 = *mic)) while (len > 0)
{ {
len -= pg_mic_mblen(mic++); c1 = *mic;
if (!IS_HIGHBIT_SET(c1))
if (c1 == LC_KS5601)
{ {
*p++ = *mic++; /* ASCII */
*p++ = *mic++; if (c1 == 0)
report_invalid_encoding(PG_MULE_INTERNAL,
(const char *) mic, len);
*p++ = c1;
mic++;
len--;
continue;
} }
else if (IS_HIGHBIT_SET(c1)) l = pg_encoding_verifymb(PG_MULE_INTERNAL, (const char *) mic, len);
{ /* cannot convert to EUC_KR! */ if (l < 0)
mic--; report_invalid_encoding(PG_MULE_INTERNAL,
pg_print_bogus_char(&mic, &p); (const char *) mic, len);
if (c1 == LC_KS5601)
{
*p++ = mic[1];
*p++ = mic[2];
} }
else else
*p++ = c1; /* should be ASCII */ report_untranslatable_char(PG_MULE_INTERNAL, PG_EUC_KR,
(const char *) mic, len);
mic += l;
len -= l;
} }
*p = '\0'; *p = '\0';
} }
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/euc_tw_and_big5/euc_tw_and_big5.c,v 1.13 2006/03/05 15:58:47 momjian Exp $ * $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/euc_tw_and_big5/euc_tw_and_big5.c,v 1.14 2006/05/21 20:05:20 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -42,10 +42,10 @@ extern Datum mic_to_big5(PG_FUNCTION_ARGS); ...@@ -42,10 +42,10 @@ extern Datum mic_to_big5(PG_FUNCTION_ARGS);
* ---------- * ----------
*/ */
static void big52mic(unsigned char *big5, unsigned char *p, int len); static void big52mic(const unsigned char *big5, unsigned char *p, int len);
static void mic2big5(unsigned char *mic, unsigned char *p, int len); static void mic2big5(const unsigned char *mic, unsigned char *p, int len);
static void euc_tw2mic(unsigned char *euc, unsigned char *p, int len); static void euc_tw2mic(const unsigned char *euc, unsigned char *p, int len);
static void mic2euc_tw(unsigned char *mic, unsigned char *p, int len); static void mic2euc_tw(const unsigned char *mic, unsigned char *p, int len);
Datum Datum
euc_tw_to_big5(PG_FUNCTION_ARGS) euc_tw_to_big5(PG_FUNCTION_ARGS)
...@@ -114,7 +114,7 @@ mic_to_euc_tw(PG_FUNCTION_ARGS) ...@@ -114,7 +114,7 @@ mic_to_euc_tw(PG_FUNCTION_ARGS)
Assert(PG_GETARG_INT32(1) == PG_EUC_TW); Assert(PG_GETARG_INT32(1) == PG_EUC_TW);
Assert(len >= 0); Assert(len >= 0);
mic2big5(src, dest, len); mic2euc_tw(src, dest, len);
PG_RETURN_VOID(); PG_RETURN_VOID();
} }
...@@ -155,16 +155,23 @@ mic_to_big5(PG_FUNCTION_ARGS) ...@@ -155,16 +155,23 @@ mic_to_big5(PG_FUNCTION_ARGS)
* EUC_TW ---> MIC * EUC_TW ---> MIC
*/ */
static void static void
euc_tw2mic(unsigned char *euc, unsigned char *p, int len) euc_tw2mic(const unsigned char *euc, unsigned char *p, int len)
{ {
int c1; int c1;
int l;
while (len >= 0 && (c1 = *euc++)) while (len > 0)
{
c1 = *euc;
if (IS_HIGHBIT_SET(c1))
{ {
l = pg_encoding_verifymb(PG_EUC_TW, (const char *) euc, len);
if (l < 0)
report_invalid_encoding(PG_EUC_TW,
(const char *) euc, len);
if (c1 == SS2) if (c1 == SS2)
{ {
len -= 4; c1 = euc[1]; /* plane No. */
c1 = *euc++; /* plane No. */
if (c1 == 0xa1) if (c1 == 0xa1)
*p++ = LC_CNS11643_1; *p++ = LC_CNS11643_1;
else if (c1 == 0xa2) else if (c1 == 0xa2)
...@@ -172,22 +179,28 @@ euc_tw2mic(unsigned char *euc, unsigned char *p, int len) ...@@ -172,22 +179,28 @@ euc_tw2mic(unsigned char *euc, unsigned char *p, int len)
else else
{ {
*p++ = 0x9d; /* LCPRV2 */ *p++ = 0x9d; /* LCPRV2 */
*p++ = 0xa3 - c1 + LC_CNS11643_3; *p++ = c1 - 0xa3 + LC_CNS11643_3;
} }
*p++ = *euc++; *p++ = euc[2];
*p++ = *euc++; *p++ = euc[3];
} }
else if (IS_HIGHBIT_SET(c1)) else
{ /* CNS11643-1 */ { /* CNS11643-1 */
len -= 2;
*p++ = LC_CNS11643_1; *p++ = LC_CNS11643_1;
*p++ = c1; *p++ = c1;
*p++ = *euc++; *p++ = euc[1];
}
euc += l;
len -= l;
} }
else else
{ /* should be ASCII */ { /* should be ASCII */
len--; if (c1 == 0)
report_invalid_encoding(PG_EUC_TW,
(const char *) euc, len);
*p++ = c1; *p++ = c1;
euc++;
len--;
} }
} }
*p = '\0'; *p = '\0';
...@@ -197,40 +210,54 @@ euc_tw2mic(unsigned char *euc, unsigned char *p, int len) ...@@ -197,40 +210,54 @@ euc_tw2mic(unsigned char *euc, unsigned char *p, int len)
* MIC ---> EUC_TW * MIC ---> EUC_TW
*/ */
static void static void
mic2euc_tw(unsigned char *mic, unsigned char *p, int len) mic2euc_tw(const unsigned char *mic, unsigned char *p, int len)
{ {
int c1; int c1;
int l;
while (len >= 0 && (c1 = *mic)) while (len > 0)
{ {
len -= pg_mic_mblen(mic++); c1 = *mic;
if (!IS_HIGHBIT_SET(c1))
{
/* ASCII */
if (c1 == 0)
report_invalid_encoding(PG_MULE_INTERNAL,
(const char *) mic, len);
*p++ = c1;
mic++;
len--;
continue;
}
l = pg_encoding_verifymb(PG_MULE_INTERNAL, (const char *) mic, len);
if (l < 0)
report_invalid_encoding(PG_MULE_INTERNAL,
(const char *) mic, len);
if (c1 == LC_CNS11643_1) if (c1 == LC_CNS11643_1)
{ {
*p++ = *mic++; *p++ = mic[1];
*p++ = *mic++; *p++ = mic[2];
} }
else if (c1 == LC_CNS11643_2) else if (c1 == LC_CNS11643_2)
{ {
*p++ = SS2; *p++ = SS2;
*p++ = 0xa2; *p++ = 0xa2;
*p++ = *mic++; *p++ = mic[1];
*p++ = *mic++; *p++ = mic[2];
} }
else if (c1 == 0x9d) else if (c1 == 0x9d &&
mic[1] >= LC_CNS11643_3 && mic[1] <= LC_CNS11643_7)
{ /* LCPRV2? */ { /* LCPRV2? */
*p++ = SS2; *p++ = SS2;
*p++ = *mic++ - LC_CNS11643_3 + 0xa3; *p++ = mic[1] - LC_CNS11643_3 + 0xa3;
*p++ = *mic++; *p++ = mic[2];
*p++ = *mic++; *p++ = mic[3];
}
else if (IS_HIGHBIT_SET(c1))
{ /* cannot convert to EUC_TW! */
mic--;
pg_print_bogus_char(&mic, &p);
} }
else else
*p++ = c1; /* should be ASCII */ report_untranslatable_char(PG_MULE_INTERNAL, PG_EUC_TW,
(const char *) mic, len);
mic += l;
len -= l;
} }
*p = '\0'; *p = '\0';
} }
...@@ -239,28 +266,33 @@ mic2euc_tw(unsigned char *mic, unsigned char *p, int len) ...@@ -239,28 +266,33 @@ mic2euc_tw(unsigned char *mic, unsigned char *p, int len)
* Big5 ---> MIC * Big5 ---> MIC
*/ */
static void static void
big52mic(unsigned char *big5, unsigned char *p, int len) big52mic(const unsigned char *big5, unsigned char *p, int len)
{ {
unsigned short c1; unsigned short c1;
unsigned short big5buf, unsigned short big5buf,
cnsBuf; cnsBuf;
unsigned char lc; unsigned char lc;
char bogusBuf[3]; int l;
int i;
while (len >= 0 && (c1 = *big5++)) while (len > 0)
{ {
c1 = *big5;
if (!IS_HIGHBIT_SET(c1)) if (!IS_HIGHBIT_SET(c1))
{ /* ASCII */ {
len--; /* ASCII */
if (c1 == 0)
report_invalid_encoding(PG_BIG5,
(const char *) big5, len);
*p++ = c1; *p++ = c1;
big5++;
len--;
continue;
} }
else l = pg_encoding_verifymb(PG_BIG5, (const char *) big5, len);
{ if (l < 0)
len -= 2; report_invalid_encoding(PG_BIG5,
big5buf = c1 << 8; (const char *) big5, len);
c1 = *big5++; big5buf = (c1 << 8) | big5[1];
big5buf |= c1;
cnsBuf = BIG5toCNS(big5buf, &lc); cnsBuf = BIG5toCNS(big5buf, &lc);
if (lc != 0) if (lc != 0)
{ {
...@@ -273,18 +305,10 @@ big52mic(unsigned char *big5, unsigned char *p, int len) ...@@ -273,18 +305,10 @@ big52mic(unsigned char *big5, unsigned char *p, int len)
*p++ = cnsBuf & 0x00ff; *p++ = cnsBuf & 0x00ff;
} }
else else
{ /* cannot convert */ report_untranslatable_char(PG_BIG5, PG_MULE_INTERNAL,
big5 -= 2; (const char *) big5, len);
*p++ = '('; big5 += l;
for (i = 0; i < 2; i++) len -= l;
{
sprintf(bogusBuf, "%02x", *big5++);
*p++ = bogusBuf[0];
*p++ = bogusBuf[1];
}
*p++ = ')';
}
}
} }
*p = '\0'; *p = '\0';
} }
...@@ -293,46 +317,55 @@ big52mic(unsigned char *big5, unsigned char *p, int len) ...@@ -293,46 +317,55 @@ big52mic(unsigned char *big5, unsigned char *p, int len)
* MIC ---> Big5 * MIC ---> Big5
*/ */
static void static void
mic2big5(unsigned char *mic, unsigned char *p, int len) mic2big5(const unsigned char *mic, unsigned char *p, int len)
{ {
int l;
unsigned short c1; unsigned short c1;
unsigned short big5buf, unsigned short big5buf,
cnsBuf; cnsBuf;
int l;
while (len >= 0 && (c1 = *mic)) while (len > 0)
{ {
l = pg_mic_mblen(mic++); c1 = *mic;
len -= l; if (!IS_HIGHBIT_SET(c1))
{
/* ASCII */
if (c1 == 0)
report_invalid_encoding(PG_MULE_INTERNAL,
(const char *) mic, len);
*p++ = c1;
mic++;
len--;
continue;
}
l = pg_encoding_verifymb(PG_MULE_INTERNAL, (const char *) mic, len);
if (l < 0)
report_invalid_encoding(PG_MULE_INTERNAL,
(const char *) mic, len);
/* 0x9d means LCPRV2 */ /* 0x9d means LCPRV2 */
if (c1 == LC_CNS11643_1 || c1 == LC_CNS11643_2 || c1 == 0x9d) if (c1 == LC_CNS11643_1 || c1 == LC_CNS11643_2 || c1 == 0x9d)
{ {
if (c1 == 0x9d) if (c1 == 0x9d)
{ {
c1 = *mic++; /* get plane no. */ c1 = mic[1]; /* get plane no. */
} cnsBuf = (mic[2] << 8) | mic[3];
cnsBuf = (*mic++) << 8;
cnsBuf |= (*mic++) & 0x00ff;
big5buf = CNStoBIG5(cnsBuf, c1);
if (big5buf == 0)
{ /* cannot convert to Big5! */
mic -= l;
pg_print_bogus_char(&mic, &p);
} }
else else
{ {
cnsBuf = (mic[1] << 8) | mic[2];
}
big5buf = CNStoBIG5(cnsBuf, c1);
if (big5buf == 0)
report_untranslatable_char(PG_MULE_INTERNAL, PG_BIG5,
(const char *) mic, len);
*p++ = (big5buf >> 8) & 0x00ff; *p++ = (big5buf >> 8) & 0x00ff;
*p++ = big5buf & 0x00ff; *p++ = big5buf & 0x00ff;
} }
}
else if (!IS_HIGHBIT_SET(c1)) /* ASCII */
*p++ = c1;
else else
{ /* cannot convert to Big5! */ report_untranslatable_char(PG_MULE_INTERNAL, PG_BIG5,
mic--; (const char *) mic, len);
pg_print_bogus_char(&mic, &p); mic += l;
} len -= l;
} }
*p = '\0'; *p = '\0';
} }
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/latin2_and_win1250/latin2_and_win1250.c,v 1.11 2006/03/05 15:58:47 momjian Exp $ * $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/latin2_and_win1250/latin2_and_win1250.c,v 1.12 2006/05/21 20:05:20 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -42,10 +42,10 @@ extern Datum win1250_to_latin2(PG_FUNCTION_ARGS); ...@@ -42,10 +42,10 @@ extern Datum win1250_to_latin2(PG_FUNCTION_ARGS);
* ---------- * ----------
*/ */
static void latin22mic(unsigned char *l, unsigned char *p, int len); static void latin22mic(const unsigned char *l, unsigned char *p, int len);
static void mic2latin2(unsigned char *mic, unsigned char *p, int len); static void mic2latin2(const unsigned char *mic, unsigned char *p, int len);
static void win12502mic(unsigned char *l, unsigned char *p, int len); static void win12502mic(const unsigned char *l, unsigned char *p, int len);
static void mic2win1250(unsigned char *mic, unsigned char *p, int len); static void mic2win1250(const unsigned char *mic, unsigned char *p, int len);
Datum Datum
latin2_to_mic(PG_FUNCTION_ARGS) latin2_to_mic(PG_FUNCTION_ARGS)
...@@ -152,14 +152,15 @@ win1250_to_latin2(PG_FUNCTION_ARGS) ...@@ -152,14 +152,15 @@ win1250_to_latin2(PG_FUNCTION_ARGS)
} }
static void static void
latin22mic(unsigned char *l, unsigned char *p, int len) latin22mic(const unsigned char *l, unsigned char *p, int len)
{ {
latin2mic(l, p, len, LC_ISO8859_2); latin2mic(l, p, len, LC_ISO8859_2, PG_LATIN2);
} }
static void static void
mic2latin2(unsigned char *mic, unsigned char *p, int len) mic2latin2(const unsigned char *mic, unsigned char *p, int len)
{ {
mic2latin(mic, p, len, LC_ISO8859_2); mic2latin(mic, p, len, LC_ISO8859_2, PG_LATIN2);
} }
/*----------------------------------------------------------------- /*-----------------------------------------------------------------
...@@ -167,9 +168,9 @@ mic2latin2(unsigned char *mic, unsigned char *p, int len) ...@@ -167,9 +168,9 @@ mic2latin2(unsigned char *mic, unsigned char *p, int len)
* Microsoft's CP1250(windows-1250) * Microsoft's CP1250(windows-1250)
*-----------------------------------------------------------------*/ *-----------------------------------------------------------------*/
static void static void
win12502mic(unsigned char *l, unsigned char *p, int len) win12502mic(const unsigned char *l, unsigned char *p, int len)
{ {
static unsigned char win1250_2_iso88592[] = { static const unsigned char win1250_2_iso88592[] = {
0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
0x88, 0x89, 0xA9, 0x8B, 0xA6, 0xAB, 0xAE, 0xAC, 0x88, 0x89, 0xA9, 0x8B, 0xA6, 0xAB, 0xAE, 0xAC,
0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
...@@ -188,12 +189,14 @@ win12502mic(unsigned char *l, unsigned char *p, int len) ...@@ -188,12 +189,14 @@ win12502mic(unsigned char *l, unsigned char *p, int len)
0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF
}; };
latin2mic_with_table(l, p, len, LC_ISO8859_2, win1250_2_iso88592); latin2mic_with_table(l, p, len, LC_ISO8859_2, PG_WIN1250,
win1250_2_iso88592);
} }
static void static void
mic2win1250(unsigned char *mic, unsigned char *p, int len) mic2win1250(const unsigned char *mic, unsigned char *p, int len)
{ {
static unsigned char iso88592_2_win1250[] = { static const unsigned char iso88592_2_win1250[] = {
0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
0x88, 0x89, 0x00, 0x8B, 0x00, 0x00, 0x00, 0x00, 0x88, 0x89, 0x00, 0x8B, 0x00, 0x00, 0x00, 0x00,
0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
...@@ -212,5 +215,6 @@ mic2win1250(unsigned char *mic, unsigned char *p, int len) ...@@ -212,5 +215,6 @@ mic2win1250(unsigned char *mic, unsigned char *p, int len)
0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF
}; };
mic2latin_with_table(mic, p, len, LC_ISO8859_2, iso88592_2_win1250); mic2latin_with_table(mic, p, len, LC_ISO8859_2, PG_WIN1250,
iso88592_2_win1250);
} }
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/latin_and_mic/latin_and_mic.c,v 1.11 2006/03/05 15:58:47 momjian Exp $ * $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/latin_and_mic/latin_and_mic.c,v 1.12 2006/05/21 20:05:20 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -40,12 +40,12 @@ extern Datum mic_to_latin4(PG_FUNCTION_ARGS); ...@@ -40,12 +40,12 @@ extern Datum mic_to_latin4(PG_FUNCTION_ARGS);
* ---------- * ----------
*/ */
static void latin12mic(unsigned char *l, unsigned char *p, int len); static void latin12mic(const unsigned char *l, unsigned char *p, int len);
static void mic2latin1(unsigned char *mic, unsigned char *p, int len); static void mic2latin1(const unsigned char *mic, unsigned char *p, int len);
static void latin32mic(unsigned char *l, unsigned char *p, int len); static void latin32mic(const unsigned char *l, unsigned char *p, int len);
static void mic2latin3(unsigned char *mic, unsigned char *p, int len); static void mic2latin3(const unsigned char *mic, unsigned char *p, int len);
static void latin42mic(unsigned char *l, unsigned char *p, int len); static void latin42mic(const unsigned char *l, unsigned char *p, int len);
static void mic2latin4(unsigned char *mic, unsigned char *p, int len); static void mic2latin4(const unsigned char *mic, unsigned char *p, int len);
Datum Datum
latin1_to_mic(PG_FUNCTION_ARGS) latin1_to_mic(PG_FUNCTION_ARGS)
...@@ -144,32 +144,37 @@ mic_to_latin4(PG_FUNCTION_ARGS) ...@@ -144,32 +144,37 @@ mic_to_latin4(PG_FUNCTION_ARGS)
} }
static void static void
latin12mic(unsigned char *l, unsigned char *p, int len) latin12mic(const unsigned char *l, unsigned char *p, int len)
{ {
latin2mic(l, p, len, LC_ISO8859_1); latin2mic(l, p, len, LC_ISO8859_1, PG_LATIN1);
} }
static void static void
mic2latin1(unsigned char *mic, unsigned char *p, int len) mic2latin1(const unsigned char *mic, unsigned char *p, int len)
{ {
mic2latin(mic, p, len, LC_ISO8859_1); mic2latin(mic, p, len, LC_ISO8859_1, PG_LATIN1);
} }
static void static void
latin32mic(unsigned char *l, unsigned char *p, int len) latin32mic(const unsigned char *l, unsigned char *p, int len)
{ {
latin2mic(l, p, len, LC_ISO8859_3); latin2mic(l, p, len, LC_ISO8859_3, PG_LATIN3);
} }
static void static void
mic2latin3(unsigned char *mic, unsigned char *p, int len) mic2latin3(const unsigned char *mic, unsigned char *p, int len)
{ {
mic2latin(mic, p, len, LC_ISO8859_3); mic2latin(mic, p, len, LC_ISO8859_3, PG_LATIN3);
} }
static void static void
latin42mic(unsigned char *l, unsigned char *p, int len) latin42mic(const unsigned char *l, unsigned char *p, int len)
{ {
latin2mic(l, p, len, LC_ISO8859_4); latin2mic(l, p, len, LC_ISO8859_4, PG_LATIN4);
} }
static void static void
mic2latin4(unsigned char *mic, unsigned char *p, int len) mic2latin4(const unsigned char *mic, unsigned char *p, int len)
{ {
mic2latin(mic, p, len, LC_ISO8859_4); mic2latin(mic, p, len, LC_ISO8859_4, PG_LATIN4);
} }
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_ascii/utf8_and_ascii.c,v 1.12 2006/03/05 15:58:47 momjian Exp $ * $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_ascii/utf8_and_ascii.c,v 1.13 2006/05/21 20:05:20 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -43,6 +43,7 @@ ascii_to_utf8(PG_FUNCTION_ARGS) ...@@ -43,6 +43,7 @@ ascii_to_utf8(PG_FUNCTION_ARGS)
Assert(PG_GETARG_INT32(1) == PG_UTF8); Assert(PG_GETARG_INT32(1) == PG_UTF8);
Assert(len >= 0); Assert(len >= 0);
/* this looks wrong, but basically we're just rejecting high-bit-set */
pg_ascii2mic(src, dest, len); pg_ascii2mic(src, dest, len);
PG_RETURN_VOID(); PG_RETURN_VOID();
...@@ -59,6 +60,7 @@ utf8_to_ascii(PG_FUNCTION_ARGS) ...@@ -59,6 +60,7 @@ utf8_to_ascii(PG_FUNCTION_ARGS)
Assert(PG_GETARG_INT32(1) == PG_SQL_ASCII); Assert(PG_GETARG_INT32(1) == PG_SQL_ASCII);
Assert(len >= 0); Assert(len >= 0);
/* this looks wrong, but basically we're just rejecting high-bit-set */
pg_mic2ascii(src, dest, len); pg_mic2ascii(src, dest, len);
PG_RETURN_VOID(); PG_RETURN_VOID();
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_big5/utf8_and_big5.c,v 1.12 2006/03/05 15:58:47 momjian Exp $ * $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_big5/utf8_and_big5.c,v 1.13 2006/05/21 20:05:20 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -62,7 +62,7 @@ utf8_to_big5(PG_FUNCTION_ARGS) ...@@ -62,7 +62,7 @@ utf8_to_big5(PG_FUNCTION_ARGS)
Assert(len >= 0); Assert(len >= 0);
UtfToLocal(src, dest, ULmapBIG5, UtfToLocal(src, dest, ULmapBIG5,
sizeof(ULmapBIG5) / sizeof(pg_utf_to_local), len); sizeof(ULmapBIG5) / sizeof(pg_utf_to_local), PG_BIG5, len);
PG_RETURN_VOID(); PG_RETURN_VOID();
} }
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_cyrillic/utf8_and_cyrillic.c,v 1.14 2006/03/05 15:58:47 momjian Exp $ * $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_cyrillic/utf8_and_cyrillic.c,v 1.15 2006/05/21 20:05:20 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -46,7 +46,7 @@ utf8_to_koi8r(PG_FUNCTION_ARGS) ...@@ -46,7 +46,7 @@ utf8_to_koi8r(PG_FUNCTION_ARGS)
Assert(len >= 0); Assert(len >= 0);
UtfToLocal(src, dest, ULmapKOI8R, UtfToLocal(src, dest, ULmapKOI8R,
sizeof(ULmapKOI8R) / sizeof(pg_utf_to_local), len); sizeof(ULmapKOI8R) / sizeof(pg_utf_to_local), PG_KOI8R, len);
PG_RETURN_VOID(); PG_RETURN_VOID();
} }
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_euc_cn/utf8_and_euc_cn.c,v 1.13 2006/03/05 15:58:47 momjian Exp $ * $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_euc_cn/utf8_and_euc_cn.c,v 1.14 2006/05/21 20:05:20 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -62,7 +62,7 @@ utf8_to_euc_cn(PG_FUNCTION_ARGS) ...@@ -62,7 +62,7 @@ utf8_to_euc_cn(PG_FUNCTION_ARGS)
Assert(len >= 0); Assert(len >= 0);
UtfToLocal(src, dest, ULmapEUC_CN, UtfToLocal(src, dest, ULmapEUC_CN,
sizeof(ULmapEUC_CN) / sizeof(pg_utf_to_local), len); sizeof(ULmapEUC_CN) / sizeof(pg_utf_to_local), PG_EUC_CN, len);
PG_RETURN_VOID(); PG_RETURN_VOID();
} }
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_euc_jp/utf8_and_euc_jp.c,v 1.13 2006/03/05 15:58:48 momjian Exp $ * $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_euc_jp/utf8_and_euc_jp.c,v 1.14 2006/05/21 20:05:20 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -62,7 +62,7 @@ utf8_to_euc_jp(PG_FUNCTION_ARGS) ...@@ -62,7 +62,7 @@ utf8_to_euc_jp(PG_FUNCTION_ARGS)
Assert(len >= 0); Assert(len >= 0);
UtfToLocal(src, dest, ULmapEUC_JP, UtfToLocal(src, dest, ULmapEUC_JP,
sizeof(ULmapEUC_JP) / sizeof(pg_utf_to_local), len); sizeof(ULmapEUC_JP) / sizeof(pg_utf_to_local), PG_EUC_JP, len);
PG_RETURN_VOID(); PG_RETURN_VOID();
} }
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_euc_kr/utf8_and_euc_kr.c,v 1.13 2006/03/05 15:58:48 momjian Exp $ * $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_euc_kr/utf8_and_euc_kr.c,v 1.14 2006/05/21 20:05:20 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -62,7 +62,7 @@ utf8_to_euc_kr(PG_FUNCTION_ARGS) ...@@ -62,7 +62,7 @@ utf8_to_euc_kr(PG_FUNCTION_ARGS)
Assert(len >= 0); Assert(len >= 0);
UtfToLocal(src, dest, ULmapEUC_KR, UtfToLocal(src, dest, ULmapEUC_KR,
sizeof(ULmapEUC_KR) / sizeof(pg_utf_to_local), len); sizeof(ULmapEUC_KR) / sizeof(pg_utf_to_local), PG_EUC_KR, len);
PG_RETURN_VOID(); PG_RETURN_VOID();
} }
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_euc_tw/utf8_and_euc_tw.c,v 1.13 2006/03/05 15:58:48 momjian Exp $ * $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_euc_tw/utf8_and_euc_tw.c,v 1.14 2006/05/21 20:05:20 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -62,7 +62,7 @@ utf8_to_euc_tw(PG_FUNCTION_ARGS) ...@@ -62,7 +62,7 @@ utf8_to_euc_tw(PG_FUNCTION_ARGS)
Assert(len >= 0); Assert(len >= 0);
UtfToLocal(src, dest, ULmapEUC_TW, UtfToLocal(src, dest, ULmapEUC_TW,
sizeof(ULmapEUC_TW) / sizeof(pg_utf_to_local), len); sizeof(ULmapEUC_TW) / sizeof(pg_utf_to_local), PG_EUC_TW, len);
PG_RETURN_VOID(); PG_RETURN_VOID();
} }
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_gb18030/utf8_and_gb18030.c,v 1.13 2006/03/05 15:58:48 momjian Exp $ * $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_gb18030/utf8_and_gb18030.c,v 1.14 2006/05/21 20:05:20 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -62,7 +62,7 @@ utf8_to_gb18030(PG_FUNCTION_ARGS) ...@@ -62,7 +62,7 @@ utf8_to_gb18030(PG_FUNCTION_ARGS)
Assert(len >= 0); Assert(len >= 0);
UtfToLocal(src, dest, ULmapGB18030, UtfToLocal(src, dest, ULmapGB18030,
sizeof(ULmapGB18030) / sizeof(pg_utf_to_local), len); sizeof(ULmapGB18030) / sizeof(pg_utf_to_local), PG_GB18030, len);
PG_RETURN_VOID(); PG_RETURN_VOID();
} }
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_gbk/utf8_and_gbk.c,v 1.12 2006/03/05 15:58:48 momjian Exp $ * $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_gbk/utf8_and_gbk.c,v 1.13 2006/05/21 20:05:20 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -62,7 +62,7 @@ utf8_to_gbk(PG_FUNCTION_ARGS) ...@@ -62,7 +62,7 @@ utf8_to_gbk(PG_FUNCTION_ARGS)
Assert(len >= 0); Assert(len >= 0);
UtfToLocal(src, dest, ULmapGBK, UtfToLocal(src, dest, ULmapGBK,
sizeof(ULmapGBK) / sizeof(pg_utf_to_local), len); sizeof(ULmapGBK) / sizeof(pg_utf_to_local), PG_GBK, len);
PG_RETURN_VOID(); PG_RETURN_VOID();
} }
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_iso8859/utf8_and_iso8859.c,v 1.18 2006/03/05 15:58:48 momjian Exp $ * $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_iso8859/utf8_and_iso8859.c,v 1.19 2006/05/21 20:05:20 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -153,7 +153,7 @@ utf8_to_iso8859(PG_FUNCTION_ARGS) ...@@ -153,7 +153,7 @@ utf8_to_iso8859(PG_FUNCTION_ARGS)
{ {
if (encoding == maps[i].encoding) if (encoding == maps[i].encoding)
{ {
UtfToLocal(src, dest, maps[i].map2, maps[i].size2, len); UtfToLocal(src, dest, maps[i].map2, maps[i].size2, encoding, len);
PG_RETURN_VOID(); PG_RETURN_VOID();
} }
} }
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_iso8859_1/utf8_and_iso8859_1.c,v 1.15 2006/03/05 15:58:48 momjian Exp $ * $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_iso8859_1/utf8_and_iso8859_1.c,v 1.16 2006/05/21 20:05:20 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -44,8 +44,11 @@ iso8859_1_to_utf8(PG_FUNCTION_ARGS) ...@@ -44,8 +44,11 @@ iso8859_1_to_utf8(PG_FUNCTION_ARGS)
Assert(PG_GETARG_INT32(1) == PG_UTF8); Assert(PG_GETARG_INT32(1) == PG_UTF8);
Assert(len >= 0); Assert(len >= 0);
while (len-- > 0 && (c = *src++)) while (len > 0)
{ {
c = *src;
if (c == 0)
report_invalid_encoding(PG_LATIN1, (const char *) src, len);
if (!IS_HIGHBIT_SET(c)) if (!IS_HIGHBIT_SET(c))
*dest++ = c; *dest++ = c;
else else
...@@ -53,6 +56,8 @@ iso8859_1_to_utf8(PG_FUNCTION_ARGS) ...@@ -53,6 +56,8 @@ iso8859_1_to_utf8(PG_FUNCTION_ARGS)
*dest++ = (c >> 6) | 0xc0; *dest++ = (c >> 6) | 0xc0;
*dest++ = (c & 0x003f) | HIGHBIT; *dest++ = (c & 0x003f) | HIGHBIT;
} }
src++;
len--;
} }
*dest = '\0'; *dest = '\0';
...@@ -66,32 +71,44 @@ utf8_to_iso8859_1(PG_FUNCTION_ARGS) ...@@ -66,32 +71,44 @@ utf8_to_iso8859_1(PG_FUNCTION_ARGS)
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4); int len = PG_GETARG_INT32(4);
unsigned short c, unsigned short c,
c1, c1;
c2;
Assert(PG_GETARG_INT32(0) == PG_UTF8); Assert(PG_GETARG_INT32(0) == PG_UTF8);
Assert(PG_GETARG_INT32(1) == PG_LATIN1); Assert(PG_GETARG_INT32(1) == PG_LATIN1);
Assert(len >= 0); Assert(len >= 0);
while (len >= 0 && (c = *src++)) while (len > 0)
{ {
if ((c & 0xe0) == 0xc0) c = *src;
if (c == 0)
report_invalid_encoding(PG_UTF8, (const char *) src, len);
/* fast path for ASCII-subset characters */
if (!IS_HIGHBIT_SET(c))
{ {
c1 = c & 0x1f; *dest++ = c;
c2 = *src++ & 0x3f; src++;
*dest = c1 << 6; len--;
*dest++ |= c2;
len -= 2;
} }
else if ((c & 0xe0) == 0xe0)
ereport(WARNING,
(errcode(ERRCODE_UNTRANSLATABLE_CHARACTER),
errmsg("ignoring unconvertible UTF-8 character 0x%04x",
c)));
else else
{ {
*dest++ = c; int l = pg_utf_mblen(src);
len--;
if (l > len || !pg_utf8_islegal(src, l))
report_invalid_encoding(PG_UTF8, (const char *) src, len);
if (l != 2)
report_untranslatable_char(PG_UTF8, PG_LATIN1,
(const char *) src, len);
c1 = src[1] & 0x3f;
c = ((c & 0x1f) << 6) | c1;
if (c >= 0x80 && c <= 0xff)
{
*dest++ = (unsigned char) c;
src += 2;
len -= 2;
}
else
report_untranslatable_char(PG_UTF8, PG_LATIN1,
(const char *) src, len);
} }
} }
*dest = '\0'; *dest = '\0';
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_johab/utf8_and_johab.c,v 1.13 2006/03/05 15:58:48 momjian Exp $ * $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_johab/utf8_and_johab.c,v 1.14 2006/05/21 20:05:21 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -62,7 +62,7 @@ utf8_to_johab(PG_FUNCTION_ARGS) ...@@ -62,7 +62,7 @@ utf8_to_johab(PG_FUNCTION_ARGS)
Assert(len >= 0); Assert(len >= 0);
UtfToLocal(src, dest, ULmapJOHAB, UtfToLocal(src, dest, ULmapJOHAB,
sizeof(ULmapJOHAB) / sizeof(pg_utf_to_local), len); sizeof(ULmapJOHAB) / sizeof(pg_utf_to_local), PG_JOHAB, len);
PG_RETURN_VOID(); PG_RETURN_VOID();
} }
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_sjis/utf8_and_sjis.c,v 1.12 2006/03/05 15:58:48 momjian Exp $ * $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_sjis/utf8_and_sjis.c,v 1.13 2006/05/21 20:05:21 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -62,7 +62,7 @@ utf8_to_sjis(PG_FUNCTION_ARGS) ...@@ -62,7 +62,7 @@ utf8_to_sjis(PG_FUNCTION_ARGS)
Assert(len >= 0); Assert(len >= 0);
UtfToLocal(src, dest, ULmapSJIS, UtfToLocal(src, dest, ULmapSJIS,
sizeof(ULmapSJIS) / sizeof(pg_utf_to_local), len); sizeof(ULmapSJIS) / sizeof(pg_utf_to_local), PG_SJIS, len);
PG_RETURN_VOID(); PG_RETURN_VOID();
} }
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_uhc/utf8_and_uhc.c,v 1.12 2006/03/05 15:58:48 momjian Exp $ * $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_uhc/utf8_and_uhc.c,v 1.13 2006/05/21 20:05:21 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -62,7 +62,7 @@ utf8_to_uhc(PG_FUNCTION_ARGS) ...@@ -62,7 +62,7 @@ utf8_to_uhc(PG_FUNCTION_ARGS)
Assert(len >= 0); Assert(len >= 0);
UtfToLocal(src, dest, ULmapUHC, UtfToLocal(src, dest, ULmapUHC,
sizeof(ULmapUHC) / sizeof(pg_utf_to_local), len); sizeof(ULmapUHC) / sizeof(pg_utf_to_local), PG_UHC, len);
PG_RETURN_VOID(); PG_RETURN_VOID();
} }
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_win/utf8_and_win.c,v 1.2 2006/03/05 15:58:48 momjian Exp $ * $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_win/utf8_and_win.c,v 1.3 2006/05/21 20:05:21 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -143,7 +143,7 @@ utf8_to_win(PG_FUNCTION_ARGS) ...@@ -143,7 +143,7 @@ utf8_to_win(PG_FUNCTION_ARGS)
{ {
if (encoding == maps[i].encoding) if (encoding == maps[i].encoding)
{ {
UtfToLocal(src, dest, maps[i].map2, maps[i].size2, len); UtfToLocal(src, dest, maps[i].map2, maps[i].size2, encoding, len);
PG_RETURN_VOID(); PG_RETURN_VOID();
} }
} }
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
* (currently mule internal code (mic) is used) * (currently mule internal code (mic) is used)
* Tatsuo Ishii * Tatsuo Ishii
* *
* $PostgreSQL: pgsql/src/backend/utils/mb/mbutils.c,v 1.55 2006/01/12 22:04:02 neilc Exp $ * $PostgreSQL: pgsql/src/backend/utils/mb/mbutils.c,v 1.56 2006/05/21 20:05:19 tgl Exp $
*/ */
#include "postgres.h" #include "postgres.h"
...@@ -362,8 +362,49 @@ pg_client_to_server(const char *s, int len) ...@@ -362,8 +362,49 @@ pg_client_to_server(const char *s, int len)
Assert(DatabaseEncoding); Assert(DatabaseEncoding);
Assert(ClientEncoding); Assert(ClientEncoding);
if (ClientEncoding->encoding == DatabaseEncoding->encoding) if (len <= 0)
return (char *) s;
if (ClientEncoding->encoding == DatabaseEncoding->encoding ||
ClientEncoding->encoding == PG_SQL_ASCII)
{
/*
* No conversion is needed, but we must still validate the data.
*/
(void) pg_verify_mbstr(DatabaseEncoding->encoding, s, len, false);
return (char *) s; return (char *) s;
}
if (DatabaseEncoding->encoding == PG_SQL_ASCII)
{
/*
* No conversion is possible, but we must still validate the data,
* because the client-side code might have done string escaping
* using the selected client_encoding. If the client encoding is
* ASCII-safe then we just do a straight validation under that
* encoding. For an ASCII-unsafe encoding we have a problem:
* we dare not pass such data to the parser but we have no way
* to convert it. We compromise by rejecting the data if it
* contains any non-ASCII characters.
*/
if (PG_VALID_BE_ENCODING(ClientEncoding->encoding))
(void) pg_verify_mbstr(ClientEncoding->encoding, s, len, false);
else
{
int i;
for (i = 0; i < len; i++)
{
if (s[i] == '\0' || IS_HIGHBIT_SET(s[i]))
ereport(ERROR,
(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
errmsg("invalid byte value for encoding \"%s\": 0x%02x",
pg_enc2name_tbl[PG_SQL_ASCII].name,
(unsigned char) s[i])));
}
}
return (char *) s;
}
return perform_default_encoding_conversion(s, len, true); return perform_default_encoding_conversion(s, len, true);
} }
...@@ -377,9 +418,14 @@ pg_server_to_client(const char *s, int len) ...@@ -377,9 +418,14 @@ pg_server_to_client(const char *s, int len)
Assert(DatabaseEncoding); Assert(DatabaseEncoding);
Assert(ClientEncoding); Assert(ClientEncoding);
if (ClientEncoding->encoding == DatabaseEncoding->encoding) if (len <= 0)
return (char *) s; return (char *) s;
if (ClientEncoding->encoding == DatabaseEncoding->encoding ||
ClientEncoding->encoding == PG_SQL_ASCII ||
DatabaseEncoding->encoding == PG_SQL_ASCII)
return (char *) s; /* assume data is valid */
return perform_default_encoding_conversion(s, len, false); return perform_default_encoding_conversion(s, len, false);
} }
...@@ -398,9 +444,6 @@ perform_default_encoding_conversion(const char *src, int len, bool is_client_to_ ...@@ -398,9 +444,6 @@ perform_default_encoding_conversion(const char *src, int len, bool is_client_to_
dest_encoding; dest_encoding;
FmgrInfo *flinfo; FmgrInfo *flinfo;
if (len <= 0)
return (char *) src;
if (is_client_to_server) if (is_client_to_server)
{ {
src_encoding = ClientEncoding->encoding; src_encoding = ClientEncoding->encoding;
...@@ -417,12 +460,6 @@ perform_default_encoding_conversion(const char *src, int len, bool is_client_to_ ...@@ -417,12 +460,6 @@ perform_default_encoding_conversion(const char *src, int len, bool is_client_to_
if (flinfo == NULL) if (flinfo == NULL)
return (char *) src; return (char *) src;
if (src_encoding == dest_encoding)
return (char *) src;
if (src_encoding == PG_SQL_ASCII || dest_encoding == PG_SQL_ASCII)
return (char *) src;
result = palloc(len * 4 + 1); result = palloc(len * 4 + 1);
FunctionCall5(flinfo, FunctionCall5(flinfo,
......
This diff is collapsed.
/* $PostgreSQL: pgsql/src/include/mb/pg_wchar.h,v 1.67 2006/02/18 16:15:23 petere Exp $ */ /* $PostgreSQL: pgsql/src/include/mb/pg_wchar.h,v 1.68 2006/05/21 20:05:21 tgl Exp $ */
#ifndef PG_WCHAR_H #ifndef PG_WCHAR_H
#define PG_WCHAR_H #define PG_WCHAR_H
...@@ -23,11 +23,17 @@ typedef unsigned int pg_wchar; ...@@ -23,11 +23,17 @@ typedef unsigned int pg_wchar;
#define SS2 0x8e /* single shift 2 (JIS0201) */ #define SS2 0x8e /* single shift 2 (JIS0201) */
#define SS3 0x8f /* single shift 3 (JIS0212) */ #define SS3 0x8f /* single shift 3 (JIS0212) */
/*
* SJIS validation macros
*/
#define ISSJISHEAD(c) (((c) >= 0x81 && (c) <= 0x9f) || ((c) >= 0xe0 && (c) <= 0xfc))
#define ISSJISTAIL(c) (((c) >= 0x40 && (c) <= 0x7e) || ((c) >= 0x80 && (c) <= 0xfc))
/* /*
* Leading byte types or leading prefix byte for MULE internal code. * Leading byte types or leading prefix byte for MULE internal code.
* See http://www.xemacs.org for more details. (there is a doc titled * See http://www.xemacs.org for more details. (there is a doc titled
* "XEmacs Internals Manual", "MULE Character Sets and Encodings" * "XEmacs Internals Manual", "MULE Character Sets and Encodings"
* section. * section.)
*/ */
/* /*
* Is a leading byte for "official" single byte encodings? * Is a leading byte for "official" single byte encodings?
...@@ -64,7 +70,7 @@ typedef unsigned int pg_wchar; ...@@ -64,7 +70,7 @@ typedef unsigned int pg_wchar;
#define LC_ISO8859_8 0x88 /* Hebrew (not supported yet) */ #define LC_ISO8859_8 0x88 /* Hebrew (not supported yet) */
#define LC_JISX0201K 0x89 /* Japanese 1 byte kana */ #define LC_JISX0201K 0x89 /* Japanese 1 byte kana */
#define LC_JISX0201R 0x8a /* Japanese 1 byte Roman */ #define LC_JISX0201R 0x8a /* Japanese 1 byte Roman */
/* Note that 0x8b seems to be unused in as of Emacs 20.7. /* Note that 0x8b seems to be unused as of Emacs 20.7.
* However, there might be a chance that 0x8b could be used * However, there might be a chance that 0x8b could be used
* in later version of Emacs. * in later version of Emacs.
*/ */
...@@ -135,13 +141,13 @@ typedef unsigned int pg_wchar; ...@@ -135,13 +141,13 @@ typedef unsigned int pg_wchar;
/* #define FREE 0xff free (unused) */ /* #define FREE 0xff free (unused) */
/* /*
* Encoding numeral identificators * PostgreSQL encoding identifiers
* *
* WARNING: the order of this table must be same as order * WARNING: the order of this table must be same as order
* in the pg_enc2name[] (mb/encnames.c) array! * in the pg_enc2name[] (mb/encnames.c) array!
* *
* If you add some encoding don'y forget check * If you add some encoding don't forget to check
* PG_ENCODING_[BE|FE]_LAST macros. * PG_ENCODING_BE_LAST macro.
* *
* The PG_SQL_ASCII is default encoding and must be = 0. * The PG_SQL_ASCII is default encoding and must be = 0.
*/ */
...@@ -208,8 +214,7 @@ typedef enum pg_enc ...@@ -208,8 +214,7 @@ typedef enum pg_enc
#define PG_VALID_ENCODING(_enc) \ #define PG_VALID_ENCODING(_enc) \
((_enc) >= 0 && (_enc) < _PG_LAST_ENCODING_) ((_enc) >= 0 && (_enc) < _PG_LAST_ENCODING_)
/* On FE are possible all encodings /* On FE are possible all encodings */
*/
#define PG_VALID_FE_ENCODING(_enc) PG_VALID_ENCODING(_enc) #define PG_VALID_FE_ENCODING(_enc) PG_VALID_ENCODING(_enc)
/* /*
...@@ -249,18 +254,21 @@ extern const char *pg_encoding_to_char(int encoding); ...@@ -249,18 +254,21 @@ extern const char *pg_encoding_to_char(int encoding);
typedef int (*mb2wchar_with_len_converter) (const unsigned char *from, typedef int (*mb2wchar_with_len_converter) (const unsigned char *from,
pg_wchar *to, pg_wchar *to,
int len); int len);
typedef int (*mblen_converter) (const unsigned char *mbstr); typedef int (*mblen_converter) (const unsigned char *mbstr);
typedef int (*mbdisplaylen_converter) (const unsigned char *mbstr); typedef int (*mbdisplaylen_converter) (const unsigned char *mbstr);
typedef int (*mbverifier) (const unsigned char *mbstr, int len);
typedef struct typedef struct
{ {
mb2wchar_with_len_converter mb2wchar_with_len; /* convert a multibyte mb2wchar_with_len_converter mb2wchar_with_len; /* convert a multibyte
* string to a wchar */ * string to a wchar */
mblen_converter mblen; /* returns the length of a multibyte char */ mblen_converter mblen; /* get byte length of a char */
mbdisplaylen_converter dsplen; /* returns the lenghth of a display mbdisplaylen_converter dsplen; /* get display width of a char */
* length */ mbverifier mbverify; /* verify multibyte sequence */
int maxmblen; /* max bytes for a char in this charset */ int maxmblen; /* max bytes for a char in this encoding */
} pg_wchar_tbl; } pg_wchar_tbl;
extern pg_wchar_tbl pg_wchar_table[]; extern pg_wchar_tbl pg_wchar_table[];
...@@ -293,6 +301,7 @@ extern int pg_mblen(const char *mbstr); ...@@ -293,6 +301,7 @@ extern int pg_mblen(const char *mbstr);
extern int pg_dsplen(const char *mbstr); extern int pg_dsplen(const char *mbstr);
extern int pg_encoding_mblen(int encoding, const char *mbstr); extern int pg_encoding_mblen(int encoding, const char *mbstr);
extern int pg_encoding_dsplen(int encoding, const char *mbstr); extern int pg_encoding_dsplen(int encoding, const char *mbstr);
extern int pg_encoding_verifymb(int encoding, const char *mbstr, int len);
extern int pg_mule_mblen(const unsigned char *mbstr); extern int pg_mule_mblen(const unsigned char *mbstr);
extern int pg_mic_mblen(const unsigned char *mbstr); extern int pg_mic_mblen(const unsigned char *mbstr);
extern int pg_mbstrlen(const char *mbstr); extern int pg_mbstrlen(const char *mbstr);
...@@ -326,21 +335,32 @@ extern char *pg_server_to_client(const char *s, int len); ...@@ -326,21 +335,32 @@ extern char *pg_server_to_client(const char *s, int len);
extern unsigned short BIG5toCNS(unsigned short big5, unsigned char *lc); extern unsigned short BIG5toCNS(unsigned short big5, unsigned char *lc);
extern unsigned short CNStoBIG5(unsigned short cns, unsigned char lc); extern unsigned short CNStoBIG5(unsigned short cns, unsigned char lc);
extern void LocalToUtf(unsigned char *iso, unsigned char *utf, extern void LocalToUtf(const unsigned char *iso, unsigned char *utf,
pg_local_to_utf *map, int size, int encoding, int len); const pg_local_to_utf *map, int size, int encoding, int len);
extern void UtfToLocal(unsigned char *utf, unsigned char *iso, extern void UtfToLocal(const unsigned char *utf, unsigned char *iso,
pg_utf_to_local *map, int size, int len); const pg_utf_to_local *map, int size, int encoding, int len);
extern bool pg_verifymbstr(const char *mbstr, int len, bool noError); extern bool pg_verifymbstr(const char *mbstr, int len, bool noError);
extern bool pg_verify_mbstr(int encoding, const char *mbstr, int len,
extern void pg_ascii2mic(unsigned char *src, unsigned char *dest, int len); bool noError);
extern void pg_mic2ascii(unsigned char *src, unsigned char *dest, int len);
extern void pg_print_bogus_char(unsigned char **mic, unsigned char **p); extern void report_invalid_encoding(int encoding, const char *mbstr, int len);
extern void latin2mic(unsigned char *l, unsigned char *p, int len, int lc); extern void report_untranslatable_char(int src_encoding, int dest_encoding,
extern void mic2latin(unsigned char *mic, unsigned char *p, int len, int lc); const char *mbstr, int len);
extern void latin2mic_with_table(unsigned char *l, unsigned char *p, int len, int lc, unsigned char *tab);
extern void mic2latin_with_table(unsigned char *mic, unsigned char *p, int len, int lc, unsigned char *tab); extern void pg_ascii2mic(const unsigned char *l, unsigned char *p, int len);
extern void pg_mic2ascii(const unsigned char *mic, unsigned char *p, int len);
extern void latin2mic(const unsigned char *l, unsigned char *p, int len,
int lc, int encoding);
extern void mic2latin(const unsigned char *mic, unsigned char *p, int len,
int lc, int encoding);
extern void latin2mic_with_table(const unsigned char *l, unsigned char *p,
int len, int lc, int encoding,
const unsigned char *tab);
extern void mic2latin_with_table(const unsigned char *mic, unsigned char *p,
int len, int lc, int encoding,
const unsigned char *tab);
extern bool pg_utf8_islegal(const unsigned char *source, int length); extern bool pg_utf8_islegal(const unsigned char *source, int length);
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment