Commit ea1b99a6 authored by Heikki Linnakangas's avatar Heikki Linnakangas

Add 'noError' argument to encoding conversion functions.

With the 'noError' argument, you can try to convert a buffer without
knowing the character boundaries beforehand. The functions now need to
return the number of input bytes successfully converted.

This is is a backwards-incompatible change, if you have created a custom
encoding conversion with CREATE CONVERSION. This adds a check to
pg_upgrade for that, refusing the upgrade if there are any user-defined
encoding conversions. Custom conversions are very rare, there are no
commonly used extensions that I know of that uses that feature. No other
objects can depend on conversions, so if you do have one, you can fairly
easily drop it before upgrading, and recreate it after the upgrade with
an updated version.

Add regression tests for built-in encoding conversions. This doesn't cover
every conversion, but it covers all the internal functions in conv.c that
are used to implement the conversions.

Reviewed-by: John Naylor
Discussion: https://www.postgresql.org/message-id/e7861509-3960-538a-9025-b75a61188e01%40iki.fi
parent e2639a76
...@@ -117,9 +117,15 @@ conv_proc( ...@@ -117,9 +117,15 @@ conv_proc(
integer, -- destination encoding ID integer, -- destination encoding ID
cstring, -- source string (null terminated C string) cstring, -- source string (null terminated C string)
internal, -- destination (fill with a null terminated C string) internal, -- destination (fill with a null terminated C string)
integer -- source string length integer, -- source string length
) RETURNS void; boolean -- if true, don't throw an error if conversion fails
</programlisting></para> ) RETURNS integer;
</programlisting>
The return value is the number of source bytes that were successfully
converted. If the last argument is false, the function must throw an
error on invalid input, and the return value is always equal to the
source string length.
</para>
</listitem> </listitem>
</varlistentry> </varlistentry>
</variablelist> </variablelist>
......
...@@ -45,8 +45,9 @@ CreateConversionCommand(CreateConversionStmt *stmt) ...@@ -45,8 +45,9 @@ CreateConversionCommand(CreateConversionStmt *stmt)
const char *from_encoding_name = stmt->for_encoding_name; const char *from_encoding_name = stmt->for_encoding_name;
const char *to_encoding_name = stmt->to_encoding_name; const char *to_encoding_name = stmt->to_encoding_name;
List *func_name = stmt->func_name; List *func_name = stmt->func_name;
static const Oid funcargs[] = {INT4OID, INT4OID, CSTRINGOID, INTERNALOID, INT4OID}; static const Oid funcargs[] = {INT4OID, INT4OID, CSTRINGOID, INTERNALOID, INT4OID, BOOLOID};
char result[1]; char result[1];
Datum funcresult;
/* Convert list of names to a name and namespace */ /* Convert list of names to a name and namespace */
namespaceId = QualifiedNameGetCreationNamespace(stmt->conversion_name, namespaceId = QualifiedNameGetCreationNamespace(stmt->conversion_name,
...@@ -92,12 +93,12 @@ CreateConversionCommand(CreateConversionStmt *stmt) ...@@ -92,12 +93,12 @@ CreateConversionCommand(CreateConversionStmt *stmt)
funcoid = LookupFuncName(func_name, sizeof(funcargs) / sizeof(Oid), funcoid = LookupFuncName(func_name, sizeof(funcargs) / sizeof(Oid),
funcargs, false); funcargs, false);
/* Check it returns VOID, else it's probably the wrong function */ /* Check it returns int4, else it's probably the wrong function */
if (get_func_rettype(funcoid) != VOIDOID) if (get_func_rettype(funcoid) != INT4OID)
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION), (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("encoding conversion function %s must return type %s", errmsg("encoding conversion function %s must return type %s",
NameListToString(func_name), "void"))); NameListToString(func_name), "integer")));
/* Check we have EXECUTE rights for the function */ /* Check we have EXECUTE rights for the function */
aclresult = pg_proc_aclcheck(funcoid, GetUserId(), ACL_EXECUTE); aclresult = pg_proc_aclcheck(funcoid, GetUserId(), ACL_EXECUTE);
...@@ -111,12 +112,23 @@ CreateConversionCommand(CreateConversionStmt *stmt) ...@@ -111,12 +112,23 @@ CreateConversionCommand(CreateConversionStmt *stmt)
* string; the conversion function should throw an error if it can't * string; the conversion function should throw an error if it can't
* perform the requested conversion. * perform the requested conversion.
*/ */
OidFunctionCall5(funcoid, funcresult = OidFunctionCall6(funcoid,
Int32GetDatum(from_encoding), Int32GetDatum(from_encoding),
Int32GetDatum(to_encoding), Int32GetDatum(to_encoding),
CStringGetDatum(""), CStringGetDatum(""),
CStringGetDatum(result), CStringGetDatum(result),
Int32GetDatum(0)); Int32GetDatum(0),
BoolGetDatum(false));
/*
* The function should return 0 for empty input. Might as well check that,
* too.
*/
if (DatumGetInt32(funcresult) != 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("encoding conversion function %s returned incorrect result for empty input",
NameListToString(func_name))));
/* /*
* All seem ok, go ahead (possible failure would be a duplicate conversion * All seem ok, go ahead (possible failure would be a duplicate conversion
......
...@@ -2271,6 +2271,8 @@ write_console(const char *line, int len) ...@@ -2271,6 +2271,8 @@ write_console(const char *line, int len)
* Conversion on non-win32 platforms is not implemented yet. It requires * Conversion on non-win32 platforms is not implemented yet. It requires
* non-throw version of pg_do_encoding_conversion(), that converts * non-throw version of pg_do_encoding_conversion(), that converts
* unconvertable characters to '?' without errors. * unconvertable characters to '?' without errors.
*
* XXX: We have a no-throw version now. It doesn't convert to '?' though.
*/ */
#endif #endif
......
...@@ -25,15 +25,20 @@ ...@@ -25,15 +25,20 @@
* tab holds conversion entries for the source charset * tab holds conversion entries for the source charset
* starting from 128 (0x80). each entry in the table holds the corresponding * starting from 128 (0x80). each entry in the table holds the corresponding
* code point for the target charset, or 0 if there is no equivalent code. * code point for the target charset, or 0 if there is no equivalent code.
*
* Returns the number of input bytes consumed. If noError is true, this can
* be less than 'len'.
*/ */
void int
local2local(const unsigned char *l, local2local(const unsigned char *l,
unsigned char *p, unsigned char *p,
int len, int len,
int src_encoding, int src_encoding,
int dest_encoding, int dest_encoding,
const unsigned char *tab) const unsigned char *tab,
bool noError)
{ {
const unsigned char *start = l;
unsigned char c1, unsigned char c1,
c2; c2;
...@@ -41,7 +46,11 @@ local2local(const unsigned char *l, ...@@ -41,7 +46,11 @@ local2local(const unsigned char *l,
{ {
c1 = *l; c1 = *l;
if (c1 == 0) if (c1 == 0)
{
if (noError)
break;
report_invalid_encoding(src_encoding, (const char *) l, len); report_invalid_encoding(src_encoding, (const char *) l, len);
}
if (!IS_HIGHBIT_SET(c1)) if (!IS_HIGHBIT_SET(c1))
*p++ = c1; *p++ = c1;
else else
...@@ -50,13 +59,19 @@ local2local(const unsigned char *l, ...@@ -50,13 +59,19 @@ local2local(const unsigned char *l,
if (c2) if (c2)
*p++ = c2; *p++ = c2;
else else
{
if (noError)
break;
report_untranslatable_char(src_encoding, dest_encoding, report_untranslatable_char(src_encoding, dest_encoding,
(const char *) l, len); (const char *) l, len);
}
} }
l++; l++;
len--; len--;
} }
*p = '\0'; *p = '\0';
return l - start;
} }
/* /*
...@@ -66,18 +81,26 @@ local2local(const unsigned char *l, ...@@ -66,18 +81,26 @@ local2local(const unsigned char *l,
* p is the output area (must be large enough!) * p is the output area (must be large enough!)
* lc is the mule character set id for the local encoding * lc is the mule character set id for the local encoding
* encoding is the PG identifier for the local encoding * encoding is the PG identifier for the local encoding
*
* Returns the number of input bytes consumed. If noError is true, this can
* be less than 'len'.
*/ */
void int
latin2mic(const unsigned char *l, unsigned char *p, int len, latin2mic(const unsigned char *l, unsigned char *p, int len,
int lc, int encoding) int lc, int encoding, bool noError)
{ {
const unsigned char *start = l;
int c1; int c1;
while (len > 0) while (len > 0)
{ {
c1 = *l; c1 = *l;
if (c1 == 0) if (c1 == 0)
{
if (noError)
break;
report_invalid_encoding(encoding, (const char *) l, len); report_invalid_encoding(encoding, (const char *) l, len);
}
if (IS_HIGHBIT_SET(c1)) if (IS_HIGHBIT_SET(c1))
*p++ = lc; *p++ = lc;
*p++ = c1; *p++ = c1;
...@@ -85,6 +108,8 @@ latin2mic(const unsigned char *l, unsigned char *p, int len, ...@@ -85,6 +108,8 @@ latin2mic(const unsigned char *l, unsigned char *p, int len,
len--; len--;
} }
*p = '\0'; *p = '\0';
return l - start;
} }
/* /*
...@@ -94,18 +119,26 @@ latin2mic(const unsigned char *l, unsigned char *p, int len, ...@@ -94,18 +119,26 @@ latin2mic(const unsigned char *l, unsigned char *p, int len,
* p is the output area (must be large enough!) * p is the output area (must be large enough!)
* lc is the mule character set id for the local encoding * lc is the mule character set id for the local encoding
* encoding is the PG identifier for the local encoding * encoding is the PG identifier for the local encoding
*
* Returns the number of input bytes consumed. If noError is true, this can
* be less than 'len'.
*/ */
void int
mic2latin(const unsigned char *mic, unsigned char *p, int len, mic2latin(const unsigned char *mic, unsigned char *p, int len,
int lc, int encoding) int lc, int encoding, bool noError)
{ {
const unsigned char *start = mic;
int c1; int c1;
while (len > 0) while (len > 0)
{ {
c1 = *mic; c1 = *mic;
if (c1 == 0) if (c1 == 0)
{
if (noError)
break;
report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, len); report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, len);
}
if (!IS_HIGHBIT_SET(c1)) if (!IS_HIGHBIT_SET(c1))
{ {
/* easy for ASCII */ /* easy for ASCII */
...@@ -118,17 +151,27 @@ mic2latin(const unsigned char *mic, unsigned char *p, int len, ...@@ -118,17 +151,27 @@ mic2latin(const unsigned char *mic, unsigned char *p, int len,
int l = pg_mule_mblen(mic); int l = pg_mule_mblen(mic);
if (len < l) if (len < l)
{
if (noError)
break;
report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic,
len); len);
}
if (l != 2 || c1 != lc || !IS_HIGHBIT_SET(mic[1])) if (l != 2 || c1 != lc || !IS_HIGHBIT_SET(mic[1]))
{
if (noError)
break;
report_untranslatable_char(PG_MULE_INTERNAL, encoding, report_untranslatable_char(PG_MULE_INTERNAL, encoding,
(const char *) mic, len); (const char *) mic, len);
}
*p++ = mic[1]; *p++ = mic[1];
mic += 2; mic += 2;
len -= 2; len -= 2;
} }
} }
*p = '\0'; *p = '\0';
return mic - start;
} }
...@@ -143,15 +186,20 @@ mic2latin(const unsigned char *mic, unsigned char *p, int len, ...@@ -143,15 +186,20 @@ mic2latin(const unsigned char *mic, unsigned char *p, int len,
* tab holds conversion entries for the local charset * tab holds conversion entries for the local charset
* starting from 128 (0x80). each entry in the table holds the corresponding * starting from 128 (0x80). each entry in the table holds the corresponding
* code point for the mule encoding, or 0 if there is no equivalent code. * code point for the mule encoding, or 0 if there is no equivalent code.
*
* Returns the number of input bytes consumed. If noError is true, this can
* be less than 'len'.
*/ */
void int
latin2mic_with_table(const unsigned char *l, latin2mic_with_table(const unsigned char *l,
unsigned char *p, unsigned char *p,
int len, int len,
int lc, int lc,
int encoding, int encoding,
const unsigned char *tab) const unsigned char *tab,
bool noError)
{ {
const unsigned char *start = l;
unsigned char c1, unsigned char c1,
c2; c2;
...@@ -159,7 +207,11 @@ latin2mic_with_table(const unsigned char *l, ...@@ -159,7 +207,11 @@ latin2mic_with_table(const unsigned char *l,
{ {
c1 = *l; c1 = *l;
if (c1 == 0) if (c1 == 0)
{
if (noError)
break;
report_invalid_encoding(encoding, (const char *) l, len); report_invalid_encoding(encoding, (const char *) l, len);
}
if (!IS_HIGHBIT_SET(c1)) if (!IS_HIGHBIT_SET(c1))
*p++ = c1; *p++ = c1;
else else
...@@ -171,13 +223,19 @@ latin2mic_with_table(const unsigned char *l, ...@@ -171,13 +223,19 @@ latin2mic_with_table(const unsigned char *l,
*p++ = c2; *p++ = c2;
} }
else else
{
if (noError)
break;
report_untranslatable_char(encoding, PG_MULE_INTERNAL, report_untranslatable_char(encoding, PG_MULE_INTERNAL,
(const char *) l, len); (const char *) l, len);
}
} }
l++; l++;
len--; len--;
} }
*p = '\0'; *p = '\0';
return l - start;
} }
/* /*
...@@ -191,15 +249,20 @@ latin2mic_with_table(const unsigned char *l, ...@@ -191,15 +249,20 @@ latin2mic_with_table(const unsigned char *l,
* tab holds conversion entries for the mule internal code's second byte, * tab holds conversion entries for the mule internal code's second byte,
* starting from 128 (0x80). each entry in the table holds the corresponding * starting from 128 (0x80). each entry in the table holds the corresponding
* code point for the local charset, or 0 if there is no equivalent code. * code point for the local charset, or 0 if there is no equivalent code.
*
* Returns the number of input bytes consumed. If noError is true, this can
* be less than 'len'.
*/ */
void int
mic2latin_with_table(const unsigned char *mic, mic2latin_with_table(const unsigned char *mic,
unsigned char *p, unsigned char *p,
int len, int len,
int lc, int lc,
int encoding, int encoding,
const unsigned char *tab) const unsigned char *tab,
bool noError)
{ {
const unsigned char *start = mic;
unsigned char c1, unsigned char c1,
c2; c2;
...@@ -207,7 +270,11 @@ mic2latin_with_table(const unsigned char *mic, ...@@ -207,7 +270,11 @@ mic2latin_with_table(const unsigned char *mic,
{ {
c1 = *mic; c1 = *mic;
if (c1 == 0) if (c1 == 0)
{
if (noError)
break;
report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, len); report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, len);
}
if (!IS_HIGHBIT_SET(c1)) if (!IS_HIGHBIT_SET(c1))
{ {
/* easy for ASCII */ /* easy for ASCII */
...@@ -220,11 +287,17 @@ mic2latin_with_table(const unsigned char *mic, ...@@ -220,11 +287,17 @@ mic2latin_with_table(const unsigned char *mic,
int l = pg_mule_mblen(mic); int l = pg_mule_mblen(mic);
if (len < l) if (len < l)
{
if (noError)
break;
report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic,
len); len);
}
if (l != 2 || c1 != lc || !IS_HIGHBIT_SET(mic[1]) || if (l != 2 || c1 != lc || !IS_HIGHBIT_SET(mic[1]) ||
(c2 = tab[mic[1] - HIGHBIT]) == 0) (c2 = tab[mic[1] - HIGHBIT]) == 0)
{ {
if (noError)
break;
report_untranslatable_char(PG_MULE_INTERNAL, encoding, report_untranslatable_char(PG_MULE_INTERNAL, encoding,
(const char *) mic, len); (const char *) mic, len);
break; /* keep compiler quiet */ break; /* keep compiler quiet */
...@@ -235,6 +308,8 @@ mic2latin_with_table(const unsigned char *mic, ...@@ -235,6 +308,8 @@ mic2latin_with_table(const unsigned char *mic,
} }
} }
*p = '\0'; *p = '\0';
return mic - start;
} }
/* /*
...@@ -424,18 +499,22 @@ pg_mb_radix_conv(const pg_mb_radix_tree *rt, ...@@ -424,18 +499,22 @@ pg_mb_radix_conv(const pg_mb_radix_tree *rt,
* is applied. An error is raised if no match is found. * is applied. An error is raised if no match is found.
* *
* See pg_wchar.h for more details about the data structures used here. * See pg_wchar.h for more details about the data structures used here.
*
* Returns the number of input bytes consumed. If noError is true, this can
* be less than 'len'.
*/ */
void int
UtfToLocal(const unsigned char *utf, int len, UtfToLocal(const unsigned char *utf, int len,
unsigned char *iso, unsigned char *iso,
const pg_mb_radix_tree *map, const pg_mb_radix_tree *map,
const pg_utf_to_local_combined *cmap, int cmapsize, const pg_utf_to_local_combined *cmap, int cmapsize,
utf_local_conversion_func conv_func, utf_local_conversion_func conv_func,
int encoding) int encoding, bool noError)
{ {
uint32 iutf; uint32 iutf;
int l; int l;
const pg_utf_to_local_combined *cp; const pg_utf_to_local_combined *cp;
const unsigned char *start = utf;
if (!PG_VALID_ENCODING(encoding)) if (!PG_VALID_ENCODING(encoding))
ereport(ERROR, ereport(ERROR,
...@@ -505,10 +584,19 @@ UtfToLocal(const unsigned char *utf, int len, ...@@ -505,10 +584,19 @@ UtfToLocal(const unsigned char *utf, int len,
l = pg_utf_mblen(utf); l = pg_utf_mblen(utf);
if (len < l) if (len < l)
{
/* need more data to decide if this is a combined char */
utf -= l_save;
break; break;
}
if (!pg_utf8_islegal(utf, l)) if (!pg_utf8_islegal(utf, l))
{
if (!noError)
report_invalid_encoding(PG_UTF8, (const char *) utf, len);
utf -= l_save;
break; break;
}
/* We assume ASCII character cannot be in combined map */ /* We assume ASCII character cannot be in combined map */
if (l > 1) if (l > 1)
...@@ -584,15 +672,20 @@ UtfToLocal(const unsigned char *utf, int len, ...@@ -584,15 +672,20 @@ UtfToLocal(const unsigned char *utf, int len,
} }
/* failed to translate this character */ /* failed to translate this character */
utf -= l;
if (noError)
break;
report_untranslatable_char(PG_UTF8, encoding, report_untranslatable_char(PG_UTF8, encoding,
(const char *) (utf - l), len); (const char *) utf, len);
} }
/* if we broke out of loop early, must be invalid input */ /* if we broke out of loop early, must be invalid input */
if (len > 0) if (len > 0 && !noError)
report_invalid_encoding(PG_UTF8, (const char *) utf, len); report_invalid_encoding(PG_UTF8, (const char *) utf, len);
*iso = '\0'; *iso = '\0';
return utf - start;
} }
/* /*
...@@ -616,18 +709,23 @@ UtfToLocal(const unsigned char *utf, int len, ...@@ -616,18 +709,23 @@ UtfToLocal(const unsigned char *utf, int len,
* (if provided) is applied. An error is raised if no match is found. * (if provided) is applied. An error is raised if no match is found.
* *
* See pg_wchar.h for more details about the data structures used here. * See pg_wchar.h for more details about the data structures used here.
*
* Returns the number of input bytes consumed. If noError is true, this can
* be less than 'len'.
*/ */
void int
LocalToUtf(const unsigned char *iso, int len, LocalToUtf(const unsigned char *iso, int len,
unsigned char *utf, unsigned char *utf,
const pg_mb_radix_tree *map, const pg_mb_radix_tree *map,
const pg_local_to_utf_combined *cmap, int cmapsize, const pg_local_to_utf_combined *cmap, int cmapsize,
utf_local_conversion_func conv_func, utf_local_conversion_func conv_func,
int encoding) int encoding,
bool noError)
{ {
uint32 iiso; uint32 iiso;
int l; int l;
const pg_local_to_utf_combined *cp; const pg_local_to_utf_combined *cp;
const unsigned char *start = iso;
if (!PG_VALID_ENCODING(encoding)) if (!PG_VALID_ENCODING(encoding))
ereport(ERROR, ereport(ERROR,
...@@ -723,13 +821,18 @@ LocalToUtf(const unsigned char *iso, int len, ...@@ -723,13 +821,18 @@ LocalToUtf(const unsigned char *iso, int len,
} }
/* failed to translate this character */ /* failed to translate this character */
iso -= l;
if (noError)
break;
report_untranslatable_char(encoding, PG_UTF8, report_untranslatable_char(encoding, PG_UTF8,
(const char *) (iso - l), len); (const char *) iso, len);
} }
/* if we broke out of loop early, must be invalid input */ /* if we broke out of loop early, must be invalid input */
if (len > 0) if (len > 0 && !noError)
report_invalid_encoding(encoding, (const char *) iso, len); report_invalid_encoding(encoding, (const char *) iso, len);
*utf = '\0'; *utf = '\0';
return iso - start;
} }
...@@ -19,8 +19,8 @@ PG_MODULE_MAGIC; ...@@ -19,8 +19,8 @@ PG_MODULE_MAGIC;
PG_FUNCTION_INFO_V1(euc_jis_2004_to_shift_jis_2004); PG_FUNCTION_INFO_V1(euc_jis_2004_to_shift_jis_2004);
PG_FUNCTION_INFO_V1(shift_jis_2004_to_euc_jis_2004); PG_FUNCTION_INFO_V1(shift_jis_2004_to_euc_jis_2004);
static void euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len); static int euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len, bool noError);
static void shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len); static int shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len, bool noError);
/* ---------- /* ----------
* conv_proc( * conv_proc(
...@@ -28,8 +28,11 @@ static void shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char ...@@ -28,8 +28,11 @@ static void shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char
* INTEGER, -- destination encoding id * INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string) * CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string) * CSTRING, -- destination string (null terminated C string)
* INTEGER -- source string length * INTEGER, -- source string length
* ) returns VOID; * BOOL -- if true, don't throw an error if conversion fails
* ) returns INTEGER;
*
* Returns the number of bytes successfully converted.
* ---------- * ----------
*/ */
...@@ -39,12 +42,14 @@ euc_jis_2004_to_shift_jis_2004(PG_FUNCTION_ARGS) ...@@ -39,12 +42,14 @@ euc_jis_2004_to_shift_jis_2004(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4); int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_JIS_2004, PG_SHIFT_JIS_2004); CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_JIS_2004, PG_SHIFT_JIS_2004);
euc_jis_20042shift_jis_2004(src, dest, len); converted = euc_jis_20042shift_jis_2004(src, dest, len, noError);
PG_RETURN_VOID(); PG_RETURN_INT32(converted);
} }
Datum Datum
...@@ -53,20 +58,23 @@ shift_jis_2004_to_euc_jis_2004(PG_FUNCTION_ARGS) ...@@ -53,20 +58,23 @@ shift_jis_2004_to_euc_jis_2004(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4); int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_SHIFT_JIS_2004, PG_EUC_JIS_2004); CHECK_ENCODING_CONVERSION_ARGS(PG_SHIFT_JIS_2004, PG_EUC_JIS_2004);
shift_jis_20042euc_jis_2004(src, dest, len); converted = shift_jis_20042euc_jis_2004(src, dest, len, noError);
PG_RETURN_VOID(); PG_RETURN_INT32(converted);
} }
/* /*
* EUC_JIS_2004 -> SHIFT_JIS_2004 * EUC_JIS_2004 -> SHIFT_JIS_2004
*/ */
static void static int
euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len) euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len, bool noError)
{ {
const unsigned char *start = euc;
int c1, int c1,
ku, ku,
ten; ten;
...@@ -79,8 +87,12 @@ euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len) ...@@ -79,8 +87,12 @@ euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len)
{ {
/* ASCII */ /* ASCII */
if (c1 == 0) if (c1 == 0)
{
if (noError)
break;
report_invalid_encoding(PG_EUC_JIS_2004, report_invalid_encoding(PG_EUC_JIS_2004,
(const char *) euc, len); (const char *) euc, len);
}
*p++ = c1; *p++ = c1;
euc++; euc++;
len--; len--;
...@@ -90,8 +102,12 @@ euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len) ...@@ -90,8 +102,12 @@ euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len)
l = pg_encoding_verifymbchar(PG_EUC_JIS_2004, (const char *) euc, len); l = pg_encoding_verifymbchar(PG_EUC_JIS_2004, (const char *) euc, len);
if (l < 0) if (l < 0)
{
if (noError)
break;
report_invalid_encoding(PG_EUC_JIS_2004, report_invalid_encoding(PG_EUC_JIS_2004,
(const char *) euc, len); (const char *) euc, len);
}
if (c1 == SS2 && l == 2) /* JIS X 0201 kana? */ if (c1 == SS2 && l == 2) /* JIS X 0201 kana? */
{ {
...@@ -121,8 +137,12 @@ euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len) ...@@ -121,8 +137,12 @@ euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len)
*p++ = (ku + 0x19b) >> 1; *p++ = (ku + 0x19b) >> 1;
} }
else else
{
if (noError)
break;
report_invalid_encoding(PG_EUC_JIS_2004, report_invalid_encoding(PG_EUC_JIS_2004,
(const char *) euc, len); (const char *) euc, len);
}
} }
if (ku % 2) if (ku % 2)
...@@ -132,8 +152,12 @@ euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len) ...@@ -132,8 +152,12 @@ euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len)
else if (ten >= 64 && ten <= 94) else if (ten >= 64 && ten <= 94)
*p++ = ten + 0x40; *p++ = ten + 0x40;
else else
{
if (noError)
break;
report_invalid_encoding(PG_EUC_JIS_2004, report_invalid_encoding(PG_EUC_JIS_2004,
(const char *) euc, len); (const char *) euc, len);
}
} }
else else
*p++ = ten + 0x9e; *p++ = ten + 0x9e;
...@@ -149,8 +173,12 @@ euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len) ...@@ -149,8 +173,12 @@ euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len)
else if (ku >= 63 && ku <= 94) else if (ku >= 63 && ku <= 94)
*p++ = (ku + 0x181) >> 1; *p++ = (ku + 0x181) >> 1;
else else
{
if (noError)
break;
report_invalid_encoding(PG_EUC_JIS_2004, report_invalid_encoding(PG_EUC_JIS_2004,
(const char *) euc, len); (const char *) euc, len);
}
if (ku % 2) if (ku % 2)
{ {
...@@ -159,20 +187,30 @@ euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len) ...@@ -159,20 +187,30 @@ euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len)
else if (ten >= 64 && ten <= 94) else if (ten >= 64 && ten <= 94)
*p++ = ten + 0x40; *p++ = ten + 0x40;
else else
{
if (noError)
break;
report_invalid_encoding(PG_EUC_JIS_2004, report_invalid_encoding(PG_EUC_JIS_2004,
(const char *) euc, len); (const char *) euc, len);
}
} }
else else
*p++ = ten + 0x9e; *p++ = ten + 0x9e;
} }
else else
{
if (noError)
break;
report_invalid_encoding(PG_EUC_JIS_2004, report_invalid_encoding(PG_EUC_JIS_2004,
(const char *) euc, len); (const char *) euc, len);
}
euc += l; euc += l;
len -= l; len -= l;
} }
*p = '\0'; *p = '\0';
return euc - start;
} }
/* /*
...@@ -212,9 +250,10 @@ get_ten(int b, int *ku) ...@@ -212,9 +250,10 @@ get_ten(int b, int *ku)
* SHIFT_JIS_2004 ---> EUC_JIS_2004 * SHIFT_JIS_2004 ---> EUC_JIS_2004
*/ */
static void static int
shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len) shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len, bool noError)
{ {
const unsigned char *start = sjis;
int c1; int c1;
int ku, int ku,
ten, ten,
...@@ -230,8 +269,12 @@ shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len ...@@ -230,8 +269,12 @@ shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len
{ {
/* ASCII */ /* ASCII */
if (c1 == 0) if (c1 == 0)
{
if (noError)
break;
report_invalid_encoding(PG_SHIFT_JIS_2004, report_invalid_encoding(PG_SHIFT_JIS_2004,
(const char *) sjis, len); (const char *) sjis, len);
}
*p++ = c1; *p++ = c1;
sjis++; sjis++;
len--; len--;
...@@ -241,8 +284,12 @@ shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len ...@@ -241,8 +284,12 @@ shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len
l = pg_encoding_verifymbchar(PG_SHIFT_JIS_2004, (const char *) sjis, len); l = pg_encoding_verifymbchar(PG_SHIFT_JIS_2004, (const char *) sjis, len);
if (l < 0 || l > len) if (l < 0 || l > len)
{
if (noError)
break;
report_invalid_encoding(PG_SHIFT_JIS_2004, report_invalid_encoding(PG_SHIFT_JIS_2004,
(const char *) sjis, len); (const char *) sjis, len);
}
if (c1 >= 0xa1 && c1 <= 0xdf && l == 1) if (c1 >= 0xa1 && c1 <= 0xdf && l == 1)
{ {
...@@ -266,8 +313,12 @@ shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len ...@@ -266,8 +313,12 @@ shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len
ku = (c1 << 1) - 0x100; ku = (c1 << 1) - 0x100;
ten = get_ten(c2, &kubun); ten = get_ten(c2, &kubun);
if (ten < 0) if (ten < 0)
{
if (noError)
break;
report_invalid_encoding(PG_SHIFT_JIS_2004, report_invalid_encoding(PG_SHIFT_JIS_2004,
(const char *) sjis, len); (const char *) sjis, len);
}
ku -= kubun; ku -= kubun;
} }
else if (c1 >= 0xe0 && c1 <= 0xef) /* plane 1 62ku-94ku */ else if (c1 >= 0xe0 && c1 <= 0xef) /* plane 1 62ku-94ku */
...@@ -275,9 +326,12 @@ shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len ...@@ -275,9 +326,12 @@ shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len
ku = (c1 << 1) - 0x180; ku = (c1 << 1) - 0x180;
ten = get_ten(c2, &kubun); ten = get_ten(c2, &kubun);
if (ten < 0) if (ten < 0)
{
if (noError)
break;
report_invalid_encoding(PG_SHIFT_JIS_2004, report_invalid_encoding(PG_SHIFT_JIS_2004,
(const char *) sjis, len); (const char *) sjis, len);
}
ku -= kubun; ku -= kubun;
} }
else if (c1 >= 0xf0 && c1 <= 0xf3) /* plane 2 else if (c1 >= 0xf0 && c1 <= 0xf3) /* plane 2
...@@ -286,8 +340,12 @@ shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len ...@@ -286,8 +340,12 @@ shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len
plane = 2; plane = 2;
ten = get_ten(c2, &kubun); ten = get_ten(c2, &kubun);
if (ten < 0) if (ten < 0)
{
if (noError)
break;
report_invalid_encoding(PG_SHIFT_JIS_2004, report_invalid_encoding(PG_SHIFT_JIS_2004,
(const char *) sjis, len); (const char *) sjis, len);
}
switch (c1) switch (c1)
{ {
case 0xf0: case 0xf0:
...@@ -309,16 +367,24 @@ shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len ...@@ -309,16 +367,24 @@ shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len
plane = 2; plane = 2;
ten = get_ten(c2, &kubun); ten = get_ten(c2, &kubun);
if (ten < 0) if (ten < 0)
{
if (noError)
break;
report_invalid_encoding(PG_SHIFT_JIS_2004, report_invalid_encoding(PG_SHIFT_JIS_2004,
(const char *) sjis, len); (const char *) sjis, len);
}
if (c1 == 0xf4 && kubun == 1) if (c1 == 0xf4 && kubun == 1)
ku = 15; ku = 15;
else else
ku = (c1 << 1) - 0x19a - kubun; ku = (c1 << 1) - 0x19a - kubun;
} }
else else
{
if (noError)
break;
report_invalid_encoding(PG_SHIFT_JIS_2004, report_invalid_encoding(PG_SHIFT_JIS_2004,
(const char *) sjis, len); (const char *) sjis, len);
}
if (plane == 2) if (plane == 2)
*p++ = SS3; *p++ = SS3;
...@@ -330,4 +396,6 @@ shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len ...@@ -330,4 +396,6 @@ shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len
len -= l; len -= l;
} }
*p = '\0'; *p = '\0';
return sjis - start;
} }
...@@ -26,13 +26,16 @@ PG_FUNCTION_INFO_V1(mic_to_euc_cn); ...@@ -26,13 +26,16 @@ PG_FUNCTION_INFO_V1(mic_to_euc_cn);
* INTEGER, -- destination encoding id * INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string) * CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string) * CSTRING, -- destination string (null terminated C string)
* INTEGER -- source string length * INTEGER, -- source string length
* ) returns VOID; * BOOL -- if true, don't throw an error if conversion fails
* ) returns INTEGER;
*
* Returns the number of bytes successfully converted.
* ---------- * ----------
*/ */
static void euc_cn2mic(const unsigned char *euc, unsigned char *p, int len); static int euc_cn2mic(const unsigned char *euc, unsigned char *p, int len, bool noError);
static void mic2euc_cn(const unsigned char *mic, unsigned char *p, int len); static int mic2euc_cn(const unsigned char *mic, unsigned char *p, int len, bool noError);
Datum Datum
euc_cn_to_mic(PG_FUNCTION_ARGS) euc_cn_to_mic(PG_FUNCTION_ARGS)
...@@ -40,12 +43,14 @@ euc_cn_to_mic(PG_FUNCTION_ARGS) ...@@ -40,12 +43,14 @@ euc_cn_to_mic(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4); int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_CN, PG_MULE_INTERNAL); CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_CN, PG_MULE_INTERNAL);
euc_cn2mic(src, dest, len); converted = euc_cn2mic(src, dest, len, noError);
PG_RETURN_VOID(); PG_RETURN_INT32(converted);
} }
Datum Datum
...@@ -54,20 +59,23 @@ mic_to_euc_cn(PG_FUNCTION_ARGS) ...@@ -54,20 +59,23 @@ mic_to_euc_cn(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4); int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_EUC_CN); CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_EUC_CN);
mic2euc_cn(src, dest, len); converted = mic2euc_cn(src, dest, len, noError);
PG_RETURN_VOID(); PG_RETURN_INT32(converted);
} }
/* /*
* EUC_CN ---> MIC * EUC_CN ---> MIC
*/ */
static void static int
euc_cn2mic(const unsigned char *euc, unsigned char *p, int len) euc_cn2mic(const unsigned char *euc, unsigned char *p, int len, bool noError)
{ {
const unsigned char *start = euc;
int c1; int c1;
while (len > 0) while (len > 0)
...@@ -76,7 +84,11 @@ euc_cn2mic(const unsigned char *euc, unsigned char *p, int len) ...@@ -76,7 +84,11 @@ euc_cn2mic(const unsigned char *euc, unsigned char *p, int len)
if (IS_HIGHBIT_SET(c1)) if (IS_HIGHBIT_SET(c1))
{ {
if (len < 2 || !IS_HIGHBIT_SET(euc[1])) if (len < 2 || !IS_HIGHBIT_SET(euc[1]))
{
if (noError)
break;
report_invalid_encoding(PG_EUC_CN, (const char *) euc, len); report_invalid_encoding(PG_EUC_CN, (const char *) euc, len);
}
*p++ = LC_GB2312_80; *p++ = LC_GB2312_80;
*p++ = c1; *p++ = c1;
*p++ = euc[1]; *p++ = euc[1];
...@@ -86,21 +98,28 @@ euc_cn2mic(const unsigned char *euc, unsigned char *p, int len) ...@@ -86,21 +98,28 @@ euc_cn2mic(const unsigned char *euc, unsigned char *p, int len)
else else
{ /* should be ASCII */ { /* should be ASCII */
if (c1 == 0) if (c1 == 0)
{
if (noError)
break;
report_invalid_encoding(PG_EUC_CN, (const char *) euc, len); report_invalid_encoding(PG_EUC_CN, (const char *) euc, len);
}
*p++ = c1; *p++ = c1;
euc++; euc++;
len--; len--;
} }
} }
*p = '\0'; *p = '\0';
return euc - start;
} }
/* /*
* MIC ---> EUC_CN * MIC ---> EUC_CN
*/ */
static void static int
mic2euc_cn(const unsigned char *mic, unsigned char *p, int len) mic2euc_cn(const unsigned char *mic, unsigned char *p, int len, bool noError)
{ {
const unsigned char *start = mic;
int c1; int c1;
while (len > 0) while (len > 0)
...@@ -109,11 +128,19 @@ mic2euc_cn(const unsigned char *mic, unsigned char *p, int len) ...@@ -109,11 +128,19 @@ mic2euc_cn(const unsigned char *mic, unsigned char *p, int len)
if (IS_HIGHBIT_SET(c1)) if (IS_HIGHBIT_SET(c1))
{ {
if (c1 != LC_GB2312_80) if (c1 != LC_GB2312_80)
{
if (noError)
break;
report_untranslatable_char(PG_MULE_INTERNAL, PG_EUC_CN, report_untranslatable_char(PG_MULE_INTERNAL, PG_EUC_CN,
(const char *) mic, len); (const char *) mic, len);
}
if (len < 3 || !IS_HIGHBIT_SET(mic[1]) || !IS_HIGHBIT_SET(mic[2])) if (len < 3 || !IS_HIGHBIT_SET(mic[1]) || !IS_HIGHBIT_SET(mic[2]))
{
if (noError)
break;
report_invalid_encoding(PG_MULE_INTERNAL, report_invalid_encoding(PG_MULE_INTERNAL,
(const char *) mic, len); (const char *) mic, len);
}
mic++; mic++;
*p++ = *mic++; *p++ = *mic++;
*p++ = *mic++; *p++ = *mic++;
...@@ -122,12 +149,18 @@ mic2euc_cn(const unsigned char *mic, unsigned char *p, int len) ...@@ -122,12 +149,18 @@ mic2euc_cn(const unsigned char *mic, unsigned char *p, int len)
else else
{ /* should be ASCII */ { /* should be ASCII */
if (c1 == 0) if (c1 == 0)
{
if (noError)
break;
report_invalid_encoding(PG_MULE_INTERNAL, report_invalid_encoding(PG_MULE_INTERNAL,
(const char *) mic, len); (const char *) mic, len);
}
*p++ = c1; *p++ = c1;
mic++; mic++;
len--; len--;
} }
} }
*p = '\0'; *p = '\0';
return mic - start;
} }
...@@ -26,13 +26,16 @@ PG_FUNCTION_INFO_V1(mic_to_euc_kr); ...@@ -26,13 +26,16 @@ PG_FUNCTION_INFO_V1(mic_to_euc_kr);
* INTEGER, -- destination encoding id * INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string) * CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string) * CSTRING, -- destination string (null terminated C string)
* INTEGER -- source string length * INTEGER, -- source string length
* ) returns VOID; * BOOL -- if true, don't throw an error if conversion fails
* ) returns INTEGER;
*
* Returns the number of bytes successfully converted.
* ---------- * ----------
*/ */
static void euc_kr2mic(const unsigned char *euc, unsigned char *p, int len); static int euc_kr2mic(const unsigned char *euc, unsigned char *p, int len, bool noError);
static void mic2euc_kr(const unsigned char *mic, unsigned char *p, int len); static int mic2euc_kr(const unsigned char *mic, unsigned char *p, int len, bool noError);
Datum Datum
euc_kr_to_mic(PG_FUNCTION_ARGS) euc_kr_to_mic(PG_FUNCTION_ARGS)
...@@ -40,12 +43,14 @@ euc_kr_to_mic(PG_FUNCTION_ARGS) ...@@ -40,12 +43,14 @@ euc_kr_to_mic(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4); int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_KR, PG_MULE_INTERNAL); CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_KR, PG_MULE_INTERNAL);
euc_kr2mic(src, dest, len); converted = euc_kr2mic(src, dest, len, noError);
PG_RETURN_VOID(); PG_RETURN_INT32(converted);
} }
Datum Datum
...@@ -54,20 +59,23 @@ mic_to_euc_kr(PG_FUNCTION_ARGS) ...@@ -54,20 +59,23 @@ mic_to_euc_kr(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4); int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_EUC_KR); CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_EUC_KR);
mic2euc_kr(src, dest, len); converted = mic2euc_kr(src, dest, len, noError);
PG_RETURN_VOID(); PG_RETURN_INT32(converted);
} }
/* /*
* EUC_KR ---> MIC * EUC_KR ---> MIC
*/ */
static void static int
euc_kr2mic(const unsigned char *euc, unsigned char *p, int len) euc_kr2mic(const unsigned char *euc, unsigned char *p, int len, bool noError)
{ {
const unsigned char *start = euc;
int c1; int c1;
int l; int l;
...@@ -78,8 +86,12 @@ euc_kr2mic(const unsigned char *euc, unsigned char *p, int len) ...@@ -78,8 +86,12 @@ euc_kr2mic(const unsigned char *euc, unsigned char *p, int len)
{ {
l = pg_encoding_verifymbchar(PG_EUC_KR, (const char *) euc, len); l = pg_encoding_verifymbchar(PG_EUC_KR, (const char *) euc, len);
if (l != 2) if (l != 2)
{
if (noError)
break;
report_invalid_encoding(PG_EUC_KR, report_invalid_encoding(PG_EUC_KR,
(const char *) euc, len); (const char *) euc, len);
}
*p++ = LC_KS5601; *p++ = LC_KS5601;
*p++ = c1; *p++ = c1;
*p++ = euc[1]; *p++ = euc[1];
...@@ -89,22 +101,29 @@ euc_kr2mic(const unsigned char *euc, unsigned char *p, int len) ...@@ -89,22 +101,29 @@ euc_kr2mic(const unsigned char *euc, unsigned char *p, int len)
else else
{ /* should be ASCII */ { /* should be ASCII */
if (c1 == 0) if (c1 == 0)
{
if (noError)
break;
report_invalid_encoding(PG_EUC_KR, report_invalid_encoding(PG_EUC_KR,
(const char *) euc, len); (const char *) euc, len);
}
*p++ = c1; *p++ = c1;
euc++; euc++;
len--; len--;
} }
} }
*p = '\0'; *p = '\0';
return euc - start;
} }
/* /*
* MIC ---> EUC_KR * MIC ---> EUC_KR
*/ */
static void static int
mic2euc_kr(const unsigned char *mic, unsigned char *p, int len) mic2euc_kr(const unsigned char *mic, unsigned char *p, int len, bool noError)
{ {
const unsigned char *start = mic;
int c1; int c1;
int l; int l;
...@@ -115,8 +134,12 @@ mic2euc_kr(const unsigned char *mic, unsigned char *p, int len) ...@@ -115,8 +134,12 @@ mic2euc_kr(const unsigned char *mic, unsigned char *p, int len)
{ {
/* ASCII */ /* ASCII */
if (c1 == 0) if (c1 == 0)
{
if (noError)
break;
report_invalid_encoding(PG_MULE_INTERNAL, report_invalid_encoding(PG_MULE_INTERNAL,
(const char *) mic, len); (const char *) mic, len);
}
*p++ = c1; *p++ = c1;
mic++; mic++;
len--; len--;
...@@ -124,18 +147,28 @@ mic2euc_kr(const unsigned char *mic, unsigned char *p, int len) ...@@ -124,18 +147,28 @@ mic2euc_kr(const unsigned char *mic, unsigned char *p, int len)
} }
l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len); l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len);
if (l < 0) if (l < 0)
{
if (noError)
break;
report_invalid_encoding(PG_MULE_INTERNAL, report_invalid_encoding(PG_MULE_INTERNAL,
(const char *) mic, len); (const char *) mic, len);
}
if (c1 == LC_KS5601) if (c1 == LC_KS5601)
{ {
*p++ = mic[1]; *p++ = mic[1];
*p++ = mic[2]; *p++ = mic[2];
} }
else else
{
if (noError)
break;
report_untranslatable_char(PG_MULE_INTERNAL, PG_EUC_KR, report_untranslatable_char(PG_MULE_INTERNAL, PG_EUC_KR,
(const char *) mic, len); (const char *) mic, len);
}
mic += l; mic += l;
len -= l; len -= l;
} }
*p = '\0'; *p = '\0';
return mic - start;
} }
...@@ -30,8 +30,11 @@ PG_FUNCTION_INFO_V1(win1250_to_latin2); ...@@ -30,8 +30,11 @@ PG_FUNCTION_INFO_V1(win1250_to_latin2);
* INTEGER, -- destination encoding id * INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string) * CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string) * CSTRING, -- destination string (null terminated C string)
* INTEGER -- source string length * INTEGER, -- source string length
* ) returns VOID; * BOOL -- if true, don't throw an error if conversion fails
* ) returns INTEGER;
*
* Returns the number of bytes successfully converted.
* ---------- * ----------
*/ */
...@@ -82,12 +85,14 @@ latin2_to_mic(PG_FUNCTION_ARGS) ...@@ -82,12 +85,14 @@ latin2_to_mic(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4); int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_LATIN2, PG_MULE_INTERNAL); CHECK_ENCODING_CONVERSION_ARGS(PG_LATIN2, PG_MULE_INTERNAL);
latin2mic(src, dest, len, LC_ISO8859_2, PG_LATIN2); converted = latin2mic(src, dest, len, LC_ISO8859_2, PG_LATIN2, noError);
PG_RETURN_VOID(); PG_RETURN_INT32(converted);
} }
Datum Datum
...@@ -96,12 +101,14 @@ mic_to_latin2(PG_FUNCTION_ARGS) ...@@ -96,12 +101,14 @@ mic_to_latin2(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4); int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_LATIN2); CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_LATIN2);
mic2latin(src, dest, len, LC_ISO8859_2, PG_LATIN2); converted = mic2latin(src, dest, len, LC_ISO8859_2, PG_LATIN2, noError);
PG_RETURN_VOID(); PG_RETURN_INT32(converted);
} }
Datum Datum
...@@ -110,13 +117,15 @@ win1250_to_mic(PG_FUNCTION_ARGS) ...@@ -110,13 +117,15 @@ win1250_to_mic(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4); int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_WIN1250, PG_MULE_INTERNAL); CHECK_ENCODING_CONVERSION_ARGS(PG_WIN1250, PG_MULE_INTERNAL);
latin2mic_with_table(src, dest, len, LC_ISO8859_2, PG_WIN1250, converted = latin2mic_with_table(src, dest, len, LC_ISO8859_2, PG_WIN1250,
win1250_2_iso88592); win1250_2_iso88592, noError);
PG_RETURN_VOID(); PG_RETURN_INT32(converted);
} }
Datum Datum
...@@ -125,13 +134,15 @@ mic_to_win1250(PG_FUNCTION_ARGS) ...@@ -125,13 +134,15 @@ mic_to_win1250(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4); int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_WIN1250); CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_WIN1250);
mic2latin_with_table(src, dest, len, LC_ISO8859_2, PG_WIN1250, converted = mic2latin_with_table(src, dest, len, LC_ISO8859_2, PG_WIN1250,
iso88592_2_win1250); iso88592_2_win1250, noError);
PG_RETURN_VOID(); PG_RETURN_INT32(converted);
} }
Datum Datum
...@@ -140,12 +151,15 @@ latin2_to_win1250(PG_FUNCTION_ARGS) ...@@ -140,12 +151,15 @@ latin2_to_win1250(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4); int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_LATIN2, PG_WIN1250); CHECK_ENCODING_CONVERSION_ARGS(PG_LATIN2, PG_WIN1250);
local2local(src, dest, len, PG_LATIN2, PG_WIN1250, iso88592_2_win1250); converted = local2local(src, dest, len, PG_LATIN2, PG_WIN1250,
iso88592_2_win1250, noError);
PG_RETURN_VOID(); PG_RETURN_INT32(converted);
} }
Datum Datum
...@@ -154,10 +168,13 @@ win1250_to_latin2(PG_FUNCTION_ARGS) ...@@ -154,10 +168,13 @@ win1250_to_latin2(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4); int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_WIN1250, PG_LATIN2); CHECK_ENCODING_CONVERSION_ARGS(PG_WIN1250, PG_LATIN2);
local2local(src, dest, len, PG_WIN1250, PG_LATIN2, win1250_2_iso88592); converted = local2local(src, dest, len, PG_WIN1250, PG_LATIN2,
win1250_2_iso88592, noError);
PG_RETURN_VOID(); PG_RETURN_INT32(converted);
} }
...@@ -30,8 +30,11 @@ PG_FUNCTION_INFO_V1(mic_to_latin4); ...@@ -30,8 +30,11 @@ PG_FUNCTION_INFO_V1(mic_to_latin4);
* INTEGER, -- destination encoding id * INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string) * CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string) * CSTRING, -- destination string (null terminated C string)
* INTEGER -- source string length * INTEGER, -- source string length
* ) returns VOID; * BOOL -- if true, don't throw an error if conversion fails
* ) returns INTEGER;
*
* Returns the number of bytes successfully converted.
* ---------- * ----------
*/ */
...@@ -42,12 +45,14 @@ latin1_to_mic(PG_FUNCTION_ARGS) ...@@ -42,12 +45,14 @@ latin1_to_mic(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4); int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_LATIN1, PG_MULE_INTERNAL); CHECK_ENCODING_CONVERSION_ARGS(PG_LATIN1, PG_MULE_INTERNAL);
latin2mic(src, dest, len, LC_ISO8859_1, PG_LATIN1); converted = latin2mic(src, dest, len, LC_ISO8859_1, PG_LATIN1, noError);
PG_RETURN_VOID(); PG_RETURN_INT32(converted);
} }
Datum Datum
...@@ -56,12 +61,14 @@ mic_to_latin1(PG_FUNCTION_ARGS) ...@@ -56,12 +61,14 @@ mic_to_latin1(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4); int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_LATIN1); CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_LATIN1);
mic2latin(src, dest, len, LC_ISO8859_1, PG_LATIN1); converted = mic2latin(src, dest, len, LC_ISO8859_1, PG_LATIN1, noError);
PG_RETURN_VOID(); PG_RETURN_INT32(converted);
} }
Datum Datum
...@@ -70,12 +77,14 @@ latin3_to_mic(PG_FUNCTION_ARGS) ...@@ -70,12 +77,14 @@ latin3_to_mic(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4); int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_LATIN3, PG_MULE_INTERNAL); CHECK_ENCODING_CONVERSION_ARGS(PG_LATIN3, PG_MULE_INTERNAL);
latin2mic(src, dest, len, LC_ISO8859_3, PG_LATIN3); converted = latin2mic(src, dest, len, LC_ISO8859_3, PG_LATIN3, noError);
PG_RETURN_VOID(); PG_RETURN_INT32(converted);
} }
Datum Datum
...@@ -84,12 +93,14 @@ mic_to_latin3(PG_FUNCTION_ARGS) ...@@ -84,12 +93,14 @@ mic_to_latin3(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4); int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_LATIN3); CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_LATIN3);
mic2latin(src, dest, len, LC_ISO8859_3, PG_LATIN3); converted = mic2latin(src, dest, len, LC_ISO8859_3, PG_LATIN3, noError);
PG_RETURN_VOID(); PG_RETURN_INT32(converted);
} }
Datum Datum
...@@ -98,12 +109,14 @@ latin4_to_mic(PG_FUNCTION_ARGS) ...@@ -98,12 +109,14 @@ latin4_to_mic(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4); int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_LATIN4, PG_MULE_INTERNAL); CHECK_ENCODING_CONVERSION_ARGS(PG_LATIN4, PG_MULE_INTERNAL);
latin2mic(src, dest, len, LC_ISO8859_4, PG_LATIN4); converted = latin2mic(src, dest, len, LC_ISO8859_4, PG_LATIN4, noError);
PG_RETURN_VOID(); PG_RETURN_INT32(converted);
} }
Datum Datum
...@@ -112,10 +125,12 @@ mic_to_latin4(PG_FUNCTION_ARGS) ...@@ -112,10 +125,12 @@ mic_to_latin4(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4); int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_LATIN4); CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_LATIN4);
mic2latin(src, dest, len, LC_ISO8859_4, PG_LATIN4); converted = mic2latin(src, dest, len, LC_ISO8859_4, PG_LATIN4, noError);
PG_RETURN_VOID(); PG_RETURN_INT32(converted);
} }
...@@ -28,8 +28,11 @@ PG_FUNCTION_INFO_V1(utf8_to_big5); ...@@ -28,8 +28,11 @@ PG_FUNCTION_INFO_V1(utf8_to_big5);
* INTEGER, -- destination encoding id * INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string) * CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string) * CSTRING, -- destination string (null terminated C string)
* INTEGER -- source string length * INTEGER, -- source string length
* ) returns VOID; * BOOL -- if true, don't throw an error if conversion fails
* ) returns INTEGER;
*
* Returns the number of bytes successfully converted.
* ---------- * ----------
*/ */
Datum Datum
...@@ -38,16 +41,19 @@ big5_to_utf8(PG_FUNCTION_ARGS) ...@@ -38,16 +41,19 @@ big5_to_utf8(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4); int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_BIG5, PG_UTF8); CHECK_ENCODING_CONVERSION_ARGS(PG_BIG5, PG_UTF8);
LocalToUtf(src, len, dest, converted = LocalToUtf(src, len, dest,
&big5_to_unicode_tree, &big5_to_unicode_tree,
NULL, 0, NULL, 0,
NULL, NULL,
PG_BIG5); PG_BIG5,
noError);
PG_RETURN_VOID(); PG_RETURN_INT32(converted);
} }
Datum Datum
...@@ -56,14 +62,17 @@ utf8_to_big5(PG_FUNCTION_ARGS) ...@@ -56,14 +62,17 @@ utf8_to_big5(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4); int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_BIG5); CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_BIG5);
UtfToLocal(src, len, dest, converted = UtfToLocal(src, len, dest,
&big5_from_unicode_tree, &big5_from_unicode_tree,
NULL, 0, NULL, 0,
NULL, NULL,
PG_BIG5); PG_BIG5,
noError);
PG_RETURN_VOID(); PG_RETURN_INT32(converted);
} }
...@@ -33,8 +33,11 @@ PG_FUNCTION_INFO_V1(koi8u_to_utf8); ...@@ -33,8 +33,11 @@ PG_FUNCTION_INFO_V1(koi8u_to_utf8);
* INTEGER, -- destination encoding id * INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string) * CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string) * CSTRING, -- destination string (null terminated C string)
* INTEGER -- source string length * INTEGER, -- source string length
* ) returns VOID; * BOOL -- if true, don't throw an error if conversion fails
* ) returns INTEGER;
*
* Returns the number of bytes successfully converted.
* ---------- * ----------
*/ */
...@@ -44,16 +47,19 @@ utf8_to_koi8r(PG_FUNCTION_ARGS) ...@@ -44,16 +47,19 @@ utf8_to_koi8r(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4); int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_KOI8R); CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_KOI8R);
UtfToLocal(src, len, dest, converted = UtfToLocal(src, len, dest,
&koi8r_from_unicode_tree, &koi8r_from_unicode_tree,
NULL, 0, NULL, 0,
NULL, NULL,
PG_KOI8R); PG_KOI8R,
noError);
PG_RETURN_VOID(); PG_RETURN_INT32(converted);
} }
Datum Datum
...@@ -62,16 +68,19 @@ koi8r_to_utf8(PG_FUNCTION_ARGS) ...@@ -62,16 +68,19 @@ koi8r_to_utf8(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4); int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_KOI8R, PG_UTF8); CHECK_ENCODING_CONVERSION_ARGS(PG_KOI8R, PG_UTF8);
LocalToUtf(src, len, dest, converted = LocalToUtf(src, len, dest,
&koi8r_to_unicode_tree, &koi8r_to_unicode_tree,
NULL, 0, NULL, 0,
NULL, NULL,
PG_KOI8R); PG_KOI8R,
noError);
PG_RETURN_VOID(); PG_RETURN_INT32(converted);
} }
Datum Datum
...@@ -80,16 +89,19 @@ utf8_to_koi8u(PG_FUNCTION_ARGS) ...@@ -80,16 +89,19 @@ utf8_to_koi8u(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4); int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_KOI8U); CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_KOI8U);
UtfToLocal(src, len, dest, converted = UtfToLocal(src, len, dest,
&koi8u_from_unicode_tree, &koi8u_from_unicode_tree,
NULL, 0, NULL, 0,
NULL, NULL,
PG_KOI8U); PG_KOI8U,
noError);
PG_RETURN_VOID(); PG_RETURN_INT32(converted);
} }
Datum Datum
...@@ -98,14 +110,17 @@ koi8u_to_utf8(PG_FUNCTION_ARGS) ...@@ -98,14 +110,17 @@ koi8u_to_utf8(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4); int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_KOI8U, PG_UTF8); CHECK_ENCODING_CONVERSION_ARGS(PG_KOI8U, PG_UTF8);
LocalToUtf(src, len, dest, converted = LocalToUtf(src, len, dest,
&koi8u_to_unicode_tree, &koi8u_to_unicode_tree,
NULL, 0, NULL, 0,
NULL, NULL,
PG_KOI8U); PG_KOI8U,
noError);
PG_RETURN_VOID(); PG_RETURN_INT32(converted);
} }
...@@ -28,8 +28,11 @@ PG_FUNCTION_INFO_V1(utf8_to_euc_jis_2004); ...@@ -28,8 +28,11 @@ PG_FUNCTION_INFO_V1(utf8_to_euc_jis_2004);
* INTEGER, -- destination encoding id * INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string) * CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string) * CSTRING, -- destination string (null terminated C string)
* INTEGER -- source string length * INTEGER, -- source string length
* ) returns VOID; * BOOL -- if true, don't throw an error if conversion fails
* ) returns INTEGER;
*
* Returns the number of bytes successfully converted.
* ---------- * ----------
*/ */
Datum Datum
...@@ -38,16 +41,19 @@ euc_jis_2004_to_utf8(PG_FUNCTION_ARGS) ...@@ -38,16 +41,19 @@ euc_jis_2004_to_utf8(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4); int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_JIS_2004, PG_UTF8); CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_JIS_2004, PG_UTF8);
LocalToUtf(src, len, dest, converted = LocalToUtf(src, len, dest,
&euc_jis_2004_to_unicode_tree, &euc_jis_2004_to_unicode_tree,
LUmapEUC_JIS_2004_combined, lengthof(LUmapEUC_JIS_2004_combined), LUmapEUC_JIS_2004_combined, lengthof(LUmapEUC_JIS_2004_combined),
NULL, NULL,
PG_EUC_JIS_2004); PG_EUC_JIS_2004,
noError);
PG_RETURN_VOID(); PG_RETURN_INT32(converted);
} }
Datum Datum
...@@ -56,14 +62,17 @@ utf8_to_euc_jis_2004(PG_FUNCTION_ARGS) ...@@ -56,14 +62,17 @@ utf8_to_euc_jis_2004(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4); int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_EUC_JIS_2004); CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_EUC_JIS_2004);
UtfToLocal(src, len, dest, converted = UtfToLocal(src, len, dest,
&euc_jis_2004_from_unicode_tree, &euc_jis_2004_from_unicode_tree,
ULmapEUC_JIS_2004_combined, lengthof(ULmapEUC_JIS_2004_combined), ULmapEUC_JIS_2004_combined, lengthof(ULmapEUC_JIS_2004_combined),
NULL, NULL,
PG_EUC_JIS_2004); PG_EUC_JIS_2004,
noError);
PG_RETURN_VOID(); PG_RETURN_INT32(converted);
} }
...@@ -28,8 +28,11 @@ PG_FUNCTION_INFO_V1(utf8_to_euc_cn); ...@@ -28,8 +28,11 @@ PG_FUNCTION_INFO_V1(utf8_to_euc_cn);
* INTEGER, -- destination encoding id * INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string) * CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string) * CSTRING, -- destination string (null terminated C string)
* INTEGER -- source string length * INTEGER, -- source string length
* ) returns VOID; * BOOL -- if true, don't throw an error if conversion fails
* ) returns INTEGER;
*
* Returns the number of bytes successfully converted.
* ---------- * ----------
*/ */
Datum Datum
...@@ -38,16 +41,19 @@ euc_cn_to_utf8(PG_FUNCTION_ARGS) ...@@ -38,16 +41,19 @@ euc_cn_to_utf8(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4); int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_CN, PG_UTF8); CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_CN, PG_UTF8);
LocalToUtf(src, len, dest, converted = LocalToUtf(src, len, dest,
&euc_cn_to_unicode_tree, &euc_cn_to_unicode_tree,
NULL, 0, NULL, 0,
NULL, NULL,
PG_EUC_CN); PG_EUC_CN,
noError);
PG_RETURN_VOID(); PG_RETURN_INT32(converted);
} }
Datum Datum
...@@ -56,14 +62,17 @@ utf8_to_euc_cn(PG_FUNCTION_ARGS) ...@@ -56,14 +62,17 @@ utf8_to_euc_cn(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4); int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_EUC_CN); CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_EUC_CN);
UtfToLocal(src, len, dest, converted = UtfToLocal(src, len, dest,
&euc_cn_from_unicode_tree, &euc_cn_from_unicode_tree,
NULL, 0, NULL, 0,
NULL, NULL,
PG_EUC_CN); PG_EUC_CN,
noError);
PG_RETURN_VOID(); PG_RETURN_INT32(converted);
} }
...@@ -28,8 +28,11 @@ PG_FUNCTION_INFO_V1(utf8_to_euc_jp); ...@@ -28,8 +28,11 @@ PG_FUNCTION_INFO_V1(utf8_to_euc_jp);
* INTEGER, -- destination encoding id * INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string) * CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string) * CSTRING, -- destination string (null terminated C string)
* INTEGER -- source string length * INTEGER, -- source string length
* ) returns VOID; * BOOL -- if true, don't throw an error if conversion fails
* ) returns INTEGER;
*
* Returns the number of bytes successfully converted.
* ---------- * ----------
*/ */
Datum Datum
...@@ -38,16 +41,19 @@ euc_jp_to_utf8(PG_FUNCTION_ARGS) ...@@ -38,16 +41,19 @@ euc_jp_to_utf8(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4); int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_JP, PG_UTF8); CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_JP, PG_UTF8);
LocalToUtf(src, len, dest, converted = LocalToUtf(src, len, dest,
&euc_jp_to_unicode_tree, &euc_jp_to_unicode_tree,
NULL, 0, NULL, 0,
NULL, NULL,
PG_EUC_JP); PG_EUC_JP,
noError);
PG_RETURN_VOID(); PG_RETURN_INT32(converted);
} }
Datum Datum
...@@ -56,14 +62,17 @@ utf8_to_euc_jp(PG_FUNCTION_ARGS) ...@@ -56,14 +62,17 @@ utf8_to_euc_jp(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4); int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_EUC_JP); CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_EUC_JP);
UtfToLocal(src, len, dest, converted = UtfToLocal(src, len, dest,
&euc_jp_from_unicode_tree, &euc_jp_from_unicode_tree,
NULL, 0, NULL, 0,
NULL, NULL,
PG_EUC_JP); PG_EUC_JP,
noError);
PG_RETURN_VOID(); PG_RETURN_INT32(converted);
} }
...@@ -28,8 +28,11 @@ PG_FUNCTION_INFO_V1(utf8_to_euc_kr); ...@@ -28,8 +28,11 @@ PG_FUNCTION_INFO_V1(utf8_to_euc_kr);
* INTEGER, -- destination encoding id * INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string) * CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string) * CSTRING, -- destination string (null terminated C string)
* INTEGER -- source string length * INTEGER, -- source string length
* ) returns VOID; * BOOL -- if true, don't throw an error if conversion fails
* ) returns INTEGER;
*
* Returns the number of bytes successfully converted.
* ---------- * ----------
*/ */
Datum Datum
...@@ -38,16 +41,19 @@ euc_kr_to_utf8(PG_FUNCTION_ARGS) ...@@ -38,16 +41,19 @@ euc_kr_to_utf8(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4); int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_KR, PG_UTF8); CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_KR, PG_UTF8);
LocalToUtf(src, len, dest, converted = LocalToUtf(src, len, dest,
&euc_kr_to_unicode_tree, &euc_kr_to_unicode_tree,
NULL, 0, NULL, 0,
NULL, NULL,
PG_EUC_KR); PG_EUC_KR,
noError);
PG_RETURN_VOID(); PG_RETURN_INT32(converted);
} }
Datum Datum
...@@ -56,14 +62,17 @@ utf8_to_euc_kr(PG_FUNCTION_ARGS) ...@@ -56,14 +62,17 @@ utf8_to_euc_kr(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4); int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_EUC_KR); CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_EUC_KR);
UtfToLocal(src, len, dest, converted = UtfToLocal(src, len, dest,
&euc_kr_from_unicode_tree, &euc_kr_from_unicode_tree,
NULL, 0, NULL, 0,
NULL, NULL,
PG_EUC_KR); PG_EUC_KR,
noError);
PG_RETURN_VOID(); PG_RETURN_INT32(converted);
} }
...@@ -28,8 +28,11 @@ PG_FUNCTION_INFO_V1(utf8_to_euc_tw); ...@@ -28,8 +28,11 @@ PG_FUNCTION_INFO_V1(utf8_to_euc_tw);
* INTEGER, -- destination encoding id * INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string) * CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string) * CSTRING, -- destination string (null terminated C string)
* INTEGER -- source string length * INTEGER, -- source string length
* ) returns VOID; * BOOL -- if true, don't throw an error if conversion fails
* ) returns INTEGER;
*
* Returns the number of bytes successfully converted.
* ---------- * ----------
*/ */
Datum Datum
...@@ -38,16 +41,19 @@ euc_tw_to_utf8(PG_FUNCTION_ARGS) ...@@ -38,16 +41,19 @@ euc_tw_to_utf8(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4); int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_TW, PG_UTF8); CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_TW, PG_UTF8);
LocalToUtf(src, len, dest, converted = LocalToUtf(src, len, dest,
&euc_tw_to_unicode_tree, &euc_tw_to_unicode_tree,
NULL, 0, NULL, 0,
NULL, NULL,
PG_EUC_TW); PG_EUC_TW,
noError);
PG_RETURN_VOID(); PG_RETURN_INT32(converted);
} }
Datum Datum
...@@ -56,14 +62,17 @@ utf8_to_euc_tw(PG_FUNCTION_ARGS) ...@@ -56,14 +62,17 @@ utf8_to_euc_tw(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4); int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_EUC_TW); CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_EUC_TW);
UtfToLocal(src, len, dest, converted = UtfToLocal(src, len, dest,
&euc_tw_from_unicode_tree, &euc_tw_from_unicode_tree,
NULL, 0, NULL, 0,
NULL, NULL,
PG_EUC_TW); PG_EUC_TW,
noError);
PG_RETURN_VOID(); PG_RETURN_INT32(converted);
} }
...@@ -183,8 +183,11 @@ conv_utf8_to_18030(uint32 code) ...@@ -183,8 +183,11 @@ conv_utf8_to_18030(uint32 code)
* INTEGER, -- destination encoding id * INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string) * CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string) * CSTRING, -- destination string (null terminated C string)
* INTEGER -- source string length * INTEGER, -- source string length
* ) returns VOID; * BOOL -- if true, don't throw an error if conversion fails
* ) returns INTEGER;
*
* Returns the number of bytes successfully converted.
* ---------- * ----------
*/ */
Datum Datum
...@@ -193,16 +196,19 @@ gb18030_to_utf8(PG_FUNCTION_ARGS) ...@@ -193,16 +196,19 @@ gb18030_to_utf8(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4); int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_GB18030, PG_UTF8); CHECK_ENCODING_CONVERSION_ARGS(PG_GB18030, PG_UTF8);
LocalToUtf(src, len, dest, converted = LocalToUtf(src, len, dest,
&gb18030_to_unicode_tree, &gb18030_to_unicode_tree,
NULL, 0, NULL, 0,
conv_18030_to_utf8, conv_18030_to_utf8,
PG_GB18030); PG_GB18030,
noError);
PG_RETURN_VOID(); PG_RETURN_INT32(converted);
} }
Datum Datum
...@@ -211,14 +217,17 @@ utf8_to_gb18030(PG_FUNCTION_ARGS) ...@@ -211,14 +217,17 @@ utf8_to_gb18030(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4); int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_GB18030); CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_GB18030);
UtfToLocal(src, len, dest, converted = UtfToLocal(src, len, dest,
&gb18030_from_unicode_tree, &gb18030_from_unicode_tree,
NULL, 0, NULL, 0,
conv_utf8_to_18030, conv_utf8_to_18030,
PG_GB18030); PG_GB18030,
noError);
PG_RETURN_VOID(); PG_RETURN_INT32(converted);
} }
...@@ -28,8 +28,11 @@ PG_FUNCTION_INFO_V1(utf8_to_gbk); ...@@ -28,8 +28,11 @@ PG_FUNCTION_INFO_V1(utf8_to_gbk);
* INTEGER, -- destination encoding id * INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string) * CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string) * CSTRING, -- destination string (null terminated C string)
* INTEGER -- source string length * INTEGER, -- source string length
* ) returns VOID; * BOOL -- if true, don't throw an error if conversion fails
* ) returns INTEGER;
*
* Returns the number of bytes successfully converted.
* ---------- * ----------
*/ */
Datum Datum
...@@ -38,16 +41,19 @@ gbk_to_utf8(PG_FUNCTION_ARGS) ...@@ -38,16 +41,19 @@ gbk_to_utf8(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4); int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_GBK, PG_UTF8); CHECK_ENCODING_CONVERSION_ARGS(PG_GBK, PG_UTF8);
LocalToUtf(src, len, dest, converted = LocalToUtf(src, len, dest,
&gbk_to_unicode_tree, &gbk_to_unicode_tree,
NULL, 0, NULL, 0,
NULL, NULL,
PG_GBK); PG_GBK,
noError);
PG_RETURN_VOID(); PG_RETURN_INT32(converted);
} }
Datum Datum
...@@ -56,14 +62,17 @@ utf8_to_gbk(PG_FUNCTION_ARGS) ...@@ -56,14 +62,17 @@ utf8_to_gbk(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4); int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_GBK); CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_GBK);
UtfToLocal(src, len, dest, converted = UtfToLocal(src, len, dest,
&gbk_from_unicode_tree, &gbk_from_unicode_tree,
NULL, 0, NULL, 0,
NULL, NULL,
PG_GBK); PG_GBK,
noError);
PG_RETURN_VOID(); PG_RETURN_INT32(converted);
} }
...@@ -52,8 +52,11 @@ PG_FUNCTION_INFO_V1(utf8_to_iso8859); ...@@ -52,8 +52,11 @@ PG_FUNCTION_INFO_V1(utf8_to_iso8859);
* INTEGER, -- destination encoding id * INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string) * CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string) * CSTRING, -- destination string (null terminated C string)
* INTEGER -- source string length * INTEGER, -- source string length
* ) returns VOID; * BOOL -- if true, don't throw an error if conversion fails
* ) returns INTEGER;
*
* Returns the number of bytes successfully converted.
* ---------- * ----------
*/ */
...@@ -100,6 +103,7 @@ iso8859_to_utf8(PG_FUNCTION_ARGS) ...@@ -100,6 +103,7 @@ iso8859_to_utf8(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4); int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int i; int i;
CHECK_ENCODING_CONVERSION_ARGS(-1, PG_UTF8); CHECK_ENCODING_CONVERSION_ARGS(-1, PG_UTF8);
...@@ -108,12 +112,15 @@ iso8859_to_utf8(PG_FUNCTION_ARGS) ...@@ -108,12 +112,15 @@ iso8859_to_utf8(PG_FUNCTION_ARGS)
{ {
if (encoding == maps[i].encoding) if (encoding == maps[i].encoding)
{ {
LocalToUtf(src, len, dest, int converted;
maps[i].map1,
NULL, 0, converted = LocalToUtf(src, len, dest,
NULL, maps[i].map1,
encoding); NULL, 0,
PG_RETURN_VOID(); NULL,
encoding,
noError);
PG_RETURN_INT32(converted);
} }
} }
...@@ -122,7 +129,7 @@ iso8859_to_utf8(PG_FUNCTION_ARGS) ...@@ -122,7 +129,7 @@ iso8859_to_utf8(PG_FUNCTION_ARGS)
errmsg("unexpected encoding ID %d for ISO 8859 character sets", errmsg("unexpected encoding ID %d for ISO 8859 character sets",
encoding))); encoding)));
PG_RETURN_VOID(); PG_RETURN_INT32(0);
} }
Datum Datum
...@@ -132,6 +139,7 @@ utf8_to_iso8859(PG_FUNCTION_ARGS) ...@@ -132,6 +139,7 @@ utf8_to_iso8859(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4); int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int i; int i;
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, -1); CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, -1);
...@@ -140,12 +148,15 @@ utf8_to_iso8859(PG_FUNCTION_ARGS) ...@@ -140,12 +148,15 @@ utf8_to_iso8859(PG_FUNCTION_ARGS)
{ {
if (encoding == maps[i].encoding) if (encoding == maps[i].encoding)
{ {
UtfToLocal(src, len, dest, int converted;
maps[i].map2,
NULL, 0, converted = UtfToLocal(src, len, dest,
NULL, maps[i].map2,
encoding); NULL, 0,
PG_RETURN_VOID(); NULL,
encoding,
noError);
PG_RETURN_INT32(converted);
} }
} }
...@@ -154,5 +165,5 @@ utf8_to_iso8859(PG_FUNCTION_ARGS) ...@@ -154,5 +165,5 @@ utf8_to_iso8859(PG_FUNCTION_ARGS)
errmsg("unexpected encoding ID %d for ISO 8859 character sets", errmsg("unexpected encoding ID %d for ISO 8859 character sets",
encoding))); encoding)));
PG_RETURN_VOID(); PG_RETURN_INT32(0);
} }
...@@ -26,8 +26,11 @@ PG_FUNCTION_INFO_V1(utf8_to_iso8859_1); ...@@ -26,8 +26,11 @@ PG_FUNCTION_INFO_V1(utf8_to_iso8859_1);
* INTEGER, -- destination encoding id * INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string) * CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string) * CSTRING, -- destination string (null terminated C string)
* INTEGER -- source string length * INTEGER, -- source string length
* ) returns VOID; * BOOL -- if true, don't throw an error if conversion fails
* ) returns INTEGER;
*
* Returns the number of bytes successfully converted.
* ---------- * ----------
*/ */
...@@ -37,6 +40,8 @@ iso8859_1_to_utf8(PG_FUNCTION_ARGS) ...@@ -37,6 +40,8 @@ iso8859_1_to_utf8(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4); int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
unsigned char *start = src;
unsigned short c; unsigned short c;
CHECK_ENCODING_CONVERSION_ARGS(PG_LATIN1, PG_UTF8); CHECK_ENCODING_CONVERSION_ARGS(PG_LATIN1, PG_UTF8);
...@@ -45,7 +50,11 @@ iso8859_1_to_utf8(PG_FUNCTION_ARGS) ...@@ -45,7 +50,11 @@ iso8859_1_to_utf8(PG_FUNCTION_ARGS)
{ {
c = *src; c = *src;
if (c == 0) if (c == 0)
{
if (noError)
break;
report_invalid_encoding(PG_LATIN1, (const char *) src, len); report_invalid_encoding(PG_LATIN1, (const char *) src, len);
}
if (!IS_HIGHBIT_SET(c)) if (!IS_HIGHBIT_SET(c))
*dest++ = c; *dest++ = c;
else else
...@@ -58,7 +67,7 @@ iso8859_1_to_utf8(PG_FUNCTION_ARGS) ...@@ -58,7 +67,7 @@ iso8859_1_to_utf8(PG_FUNCTION_ARGS)
} }
*dest = '\0'; *dest = '\0';
PG_RETURN_VOID(); PG_RETURN_INT32(src - start);
} }
Datum Datum
...@@ -67,6 +76,8 @@ utf8_to_iso8859_1(PG_FUNCTION_ARGS) ...@@ -67,6 +76,8 @@ utf8_to_iso8859_1(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4); int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
unsigned char *start = src;
unsigned short c, unsigned short c,
c1; c1;
...@@ -76,7 +87,11 @@ utf8_to_iso8859_1(PG_FUNCTION_ARGS) ...@@ -76,7 +87,11 @@ utf8_to_iso8859_1(PG_FUNCTION_ARGS)
{ {
c = *src; c = *src;
if (c == 0) if (c == 0)
{
if (noError)
break;
report_invalid_encoding(PG_UTF8, (const char *) src, len); report_invalid_encoding(PG_UTF8, (const char *) src, len);
}
/* fast path for ASCII-subset characters */ /* fast path for ASCII-subset characters */
if (!IS_HIGHBIT_SET(c)) if (!IS_HIGHBIT_SET(c))
{ {
...@@ -89,10 +104,18 @@ utf8_to_iso8859_1(PG_FUNCTION_ARGS) ...@@ -89,10 +104,18 @@ utf8_to_iso8859_1(PG_FUNCTION_ARGS)
int l = pg_utf_mblen(src); int l = pg_utf_mblen(src);
if (l > len || !pg_utf8_islegal(src, l)) if (l > len || !pg_utf8_islegal(src, l))
{
if (noError)
break;
report_invalid_encoding(PG_UTF8, (const char *) src, len); report_invalid_encoding(PG_UTF8, (const char *) src, len);
}
if (l != 2) if (l != 2)
{
if (noError)
break;
report_untranslatable_char(PG_UTF8, PG_LATIN1, report_untranslatable_char(PG_UTF8, PG_LATIN1,
(const char *) src, len); (const char *) src, len);
}
c1 = src[1] & 0x3f; c1 = src[1] & 0x3f;
c = ((c & 0x1f) << 6) | c1; c = ((c & 0x1f) << 6) | c1;
if (c >= 0x80 && c <= 0xff) if (c >= 0x80 && c <= 0xff)
...@@ -102,11 +125,15 @@ utf8_to_iso8859_1(PG_FUNCTION_ARGS) ...@@ -102,11 +125,15 @@ utf8_to_iso8859_1(PG_FUNCTION_ARGS)
len -= 2; len -= 2;
} }
else else
{
if (noError)
break;
report_untranslatable_char(PG_UTF8, PG_LATIN1, report_untranslatable_char(PG_UTF8, PG_LATIN1,
(const char *) src, len); (const char *) src, len);
}
} }
} }
*dest = '\0'; *dest = '\0';
PG_RETURN_VOID(); PG_RETURN_INT32(src - start);
} }
...@@ -28,8 +28,11 @@ PG_FUNCTION_INFO_V1(utf8_to_johab); ...@@ -28,8 +28,11 @@ PG_FUNCTION_INFO_V1(utf8_to_johab);
* INTEGER, -- destination encoding id * INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string) * CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string) * CSTRING, -- destination string (null terminated C string)
* INTEGER -- source string length * INTEGER, -- source string length
* ) returns VOID; * BOOL -- if true, don't throw an error if conversion fails
* ) returns INTEGER;
*
* Returns the number of bytes successfully converted.
* ---------- * ----------
*/ */
Datum Datum
...@@ -38,16 +41,19 @@ johab_to_utf8(PG_FUNCTION_ARGS) ...@@ -38,16 +41,19 @@ johab_to_utf8(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4); int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_JOHAB, PG_UTF8); CHECK_ENCODING_CONVERSION_ARGS(PG_JOHAB, PG_UTF8);
LocalToUtf(src, len, dest, converted = LocalToUtf(src, len, dest,
&johab_to_unicode_tree, &johab_to_unicode_tree,
NULL, 0, NULL, 0,
NULL, NULL,
PG_JOHAB); PG_JOHAB,
noError);
PG_RETURN_VOID(); PG_RETURN_INT32(converted);
} }
Datum Datum
...@@ -56,14 +62,17 @@ utf8_to_johab(PG_FUNCTION_ARGS) ...@@ -56,14 +62,17 @@ utf8_to_johab(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4); int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_JOHAB); CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_JOHAB);
UtfToLocal(src, len, dest, converted = UtfToLocal(src, len, dest,
&johab_from_unicode_tree, &johab_from_unicode_tree,
NULL, 0, NULL, 0,
NULL, NULL,
PG_JOHAB); PG_JOHAB,
noError);
PG_RETURN_VOID(); PG_RETURN_INT32(converted);
} }
...@@ -28,8 +28,11 @@ PG_FUNCTION_INFO_V1(utf8_to_sjis); ...@@ -28,8 +28,11 @@ PG_FUNCTION_INFO_V1(utf8_to_sjis);
* INTEGER, -- destination encoding id * INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string) * CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string) * CSTRING, -- destination string (null terminated C string)
* INTEGER -- source string length * INTEGER, -- source string length
* ) returns VOID; * BOOL -- if true, don't throw an error if conversion fails
* ) returns INTEGER;
*
* Returns the number of bytes successfully converted.
* ---------- * ----------
*/ */
Datum Datum
...@@ -38,16 +41,19 @@ sjis_to_utf8(PG_FUNCTION_ARGS) ...@@ -38,16 +41,19 @@ sjis_to_utf8(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4); int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_SJIS, PG_UTF8); CHECK_ENCODING_CONVERSION_ARGS(PG_SJIS, PG_UTF8);
LocalToUtf(src, len, dest, converted = LocalToUtf(src, len, dest,
&sjis_to_unicode_tree, &sjis_to_unicode_tree,
NULL, 0, NULL, 0,
NULL, NULL,
PG_SJIS); PG_SJIS,
noError);
PG_RETURN_VOID(); PG_RETURN_INT32(converted);
} }
Datum Datum
...@@ -56,14 +62,17 @@ utf8_to_sjis(PG_FUNCTION_ARGS) ...@@ -56,14 +62,17 @@ utf8_to_sjis(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4); int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_SJIS); CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_SJIS);
UtfToLocal(src, len, dest, converted = UtfToLocal(src, len, dest,
&sjis_from_unicode_tree, &sjis_from_unicode_tree,
NULL, 0, NULL, 0,
NULL, NULL,
PG_SJIS); PG_SJIS,
noError);
PG_RETURN_VOID(); PG_RETURN_INT32(converted);
} }
...@@ -28,8 +28,11 @@ PG_FUNCTION_INFO_V1(utf8_to_shift_jis_2004); ...@@ -28,8 +28,11 @@ PG_FUNCTION_INFO_V1(utf8_to_shift_jis_2004);
* INTEGER, -- destination encoding id * INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string) * CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string) * CSTRING, -- destination string (null terminated C string)
* INTEGER -- source string length * INTEGER, -- source string length
* ) returns VOID; * BOOL -- if true, don't throw an error if conversion fails
* ) returns INTEGER;
*
* Returns the number of bytes successfully converted.
* ---------- * ----------
*/ */
Datum Datum
...@@ -38,16 +41,19 @@ shift_jis_2004_to_utf8(PG_FUNCTION_ARGS) ...@@ -38,16 +41,19 @@ shift_jis_2004_to_utf8(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4); int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_SHIFT_JIS_2004, PG_UTF8); CHECK_ENCODING_CONVERSION_ARGS(PG_SHIFT_JIS_2004, PG_UTF8);
LocalToUtf(src, len, dest, converted = LocalToUtf(src, len, dest,
&shift_jis_2004_to_unicode_tree, &shift_jis_2004_to_unicode_tree,
LUmapSHIFT_JIS_2004_combined, lengthof(LUmapSHIFT_JIS_2004_combined), LUmapSHIFT_JIS_2004_combined, lengthof(LUmapSHIFT_JIS_2004_combined),
NULL, NULL,
PG_SHIFT_JIS_2004); PG_SHIFT_JIS_2004,
noError);
PG_RETURN_VOID(); PG_RETURN_INT32(converted);
} }
Datum Datum
...@@ -56,14 +62,17 @@ utf8_to_shift_jis_2004(PG_FUNCTION_ARGS) ...@@ -56,14 +62,17 @@ utf8_to_shift_jis_2004(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4); int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_SHIFT_JIS_2004); CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_SHIFT_JIS_2004);
UtfToLocal(src, len, dest, converted = UtfToLocal(src, len, dest,
&shift_jis_2004_from_unicode_tree, &shift_jis_2004_from_unicode_tree,
ULmapSHIFT_JIS_2004_combined, lengthof(ULmapSHIFT_JIS_2004_combined), ULmapSHIFT_JIS_2004_combined, lengthof(ULmapSHIFT_JIS_2004_combined),
NULL, NULL,
PG_SHIFT_JIS_2004); PG_SHIFT_JIS_2004,
noError);
PG_RETURN_VOID(); PG_RETURN_INT32(converted);
} }
...@@ -28,8 +28,11 @@ PG_FUNCTION_INFO_V1(utf8_to_uhc); ...@@ -28,8 +28,11 @@ PG_FUNCTION_INFO_V1(utf8_to_uhc);
* INTEGER, -- destination encoding id * INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string) * CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string) * CSTRING, -- destination string (null terminated C string)
* INTEGER -- source string length * INTEGER, -- source string length
* ) returns VOID; * BOOL -- if true, don't throw an error if conversion fails
* ) returns INTEGER;
*
* Returns the number of bytes successfully converted.
* ---------- * ----------
*/ */
Datum Datum
...@@ -38,16 +41,19 @@ uhc_to_utf8(PG_FUNCTION_ARGS) ...@@ -38,16 +41,19 @@ uhc_to_utf8(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4); int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_UHC, PG_UTF8); CHECK_ENCODING_CONVERSION_ARGS(PG_UHC, PG_UTF8);
LocalToUtf(src, len, dest, converted = LocalToUtf(src, len, dest,
&uhc_to_unicode_tree, &uhc_to_unicode_tree,
NULL, 0, NULL, 0,
NULL, NULL,
PG_UHC); PG_UHC,
noError);
PG_RETURN_VOID(); PG_RETURN_INT32(converted);
} }
Datum Datum
...@@ -56,14 +62,17 @@ utf8_to_uhc(PG_FUNCTION_ARGS) ...@@ -56,14 +62,17 @@ utf8_to_uhc(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4); int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_UHC); CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_UHC);
UtfToLocal(src, len, dest, converted = UtfToLocal(src, len, dest,
&uhc_from_unicode_tree, &uhc_from_unicode_tree,
NULL, 0, NULL, 0,
NULL, NULL,
PG_UHC); PG_UHC,
noError);
PG_RETURN_VOID(); PG_RETURN_INT32(converted);
} }
...@@ -48,8 +48,11 @@ PG_FUNCTION_INFO_V1(utf8_to_win); ...@@ -48,8 +48,11 @@ PG_FUNCTION_INFO_V1(utf8_to_win);
* INTEGER, -- destination encoding id * INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string) * CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string) * CSTRING, -- destination string (null terminated C string)
* INTEGER -- source string length * INTEGER, -- source string length
* ) returns VOID; * BOOL -- if true, don't throw an error if conversion fails
* ) returns INTEGER;
*
* Returns the number of bytes successfully converted.
* ---------- * ----------
*/ */
...@@ -81,6 +84,7 @@ win_to_utf8(PG_FUNCTION_ARGS) ...@@ -81,6 +84,7 @@ win_to_utf8(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4); int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int i; int i;
CHECK_ENCODING_CONVERSION_ARGS(-1, PG_UTF8); CHECK_ENCODING_CONVERSION_ARGS(-1, PG_UTF8);
...@@ -89,12 +93,15 @@ win_to_utf8(PG_FUNCTION_ARGS) ...@@ -89,12 +93,15 @@ win_to_utf8(PG_FUNCTION_ARGS)
{ {
if (encoding == maps[i].encoding) if (encoding == maps[i].encoding)
{ {
LocalToUtf(src, len, dest, int converted;
maps[i].map1,
NULL, 0, converted = LocalToUtf(src, len, dest,
NULL, maps[i].map1,
encoding); NULL, 0,
PG_RETURN_VOID(); NULL,
encoding,
noError);
PG_RETURN_INT32(converted);
} }
} }
...@@ -103,7 +110,7 @@ win_to_utf8(PG_FUNCTION_ARGS) ...@@ -103,7 +110,7 @@ win_to_utf8(PG_FUNCTION_ARGS)
errmsg("unexpected encoding ID %d for WIN character sets", errmsg("unexpected encoding ID %d for WIN character sets",
encoding))); encoding)));
PG_RETURN_VOID(); PG_RETURN_INT32(0);
} }
Datum Datum
...@@ -113,6 +120,7 @@ utf8_to_win(PG_FUNCTION_ARGS) ...@@ -113,6 +120,7 @@ utf8_to_win(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4); int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int i; int i;
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, -1); CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, -1);
...@@ -121,12 +129,15 @@ utf8_to_win(PG_FUNCTION_ARGS) ...@@ -121,12 +129,15 @@ utf8_to_win(PG_FUNCTION_ARGS)
{ {
if (encoding == maps[i].encoding) if (encoding == maps[i].encoding)
{ {
UtfToLocal(src, len, dest, int converted;
maps[i].map2,
NULL, 0, converted = UtfToLocal(src, len, dest,
NULL, maps[i].map2,
encoding); NULL, 0,
PG_RETURN_VOID(); NULL,
encoding,
noError);
PG_RETURN_INT32(converted);
} }
} }
...@@ -135,5 +146,5 @@ utf8_to_win(PG_FUNCTION_ARGS) ...@@ -135,5 +146,5 @@ utf8_to_win(PG_FUNCTION_ARGS)
errmsg("unexpected encoding ID %d for WIN character sets", errmsg("unexpected encoding ID %d for WIN character sets",
encoding))); encoding)));
PG_RETURN_VOID(); PG_RETURN_INT32(0);
} }
...@@ -406,12 +406,13 @@ pg_do_encoding_conversion(unsigned char *src, int len, ...@@ -406,12 +406,13 @@ pg_do_encoding_conversion(unsigned char *src, int len,
MemoryContextAllocHuge(CurrentMemoryContext, MemoryContextAllocHuge(CurrentMemoryContext,
(Size) len * MAX_CONVERSION_GROWTH + 1); (Size) len * MAX_CONVERSION_GROWTH + 1);
OidFunctionCall5(proc, (void) OidFunctionCall6(proc,
Int32GetDatum(src_encoding), Int32GetDatum(src_encoding),
Int32GetDatum(dest_encoding), Int32GetDatum(dest_encoding),
CStringGetDatum(src), CStringGetDatum(src),
CStringGetDatum(result), CStringGetDatum(result),
Int32GetDatum(len)); Int32GetDatum(len),
BoolGetDatum(false));
/* /*
* If the result is large, it's worth repalloc'ing to release any extra * If the result is large, it's worth repalloc'ing to release any extra
...@@ -435,6 +436,62 @@ pg_do_encoding_conversion(unsigned char *src, int len, ...@@ -435,6 +436,62 @@ pg_do_encoding_conversion(unsigned char *src, int len,
return result; return result;
} }
/*
* Convert src string to another encoding.
*
* This function has a different API than the other conversion functions.
* The caller should've looked up the conversion function using
* FindDefaultConversionProc(). Unlike the other functions, the converted
* result is not palloc'd. It is written to the caller-supplied buffer
* instead.
*
* src_encoding - encoding to convert from
* dest_encoding - encoding to convert to
* src, srclen - input buffer and its length in bytes
* dest, destlen - destination buffer and its size in bytes
*
* The output is null-terminated.
*
* If destlen < srclen * MAX_CONVERSION_LENGTH + 1, the converted output
* wouldn't necessarily fit in the output buffer, and the function will not
* convert the whole input.
*
* TODO: The conversion function interface is not great. Firstly, it
* would be nice to pass through the destination buffer size to the
* conversion function, so that if you pass a shorter destination buffer, it
* could still continue to fill up the whole buffer. Currently, we have to
* assume worst case expansion and stop the conversion short, even if there
* is in fact space left in the destination buffer. Secondly, it would be
* nice to return the number of bytes written to the caller, to avoid a call
* to strlen().
*/
int
pg_do_encoding_conversion_buf(Oid proc,
int src_encoding,
int dest_encoding,
unsigned char *src, int srclen,
unsigned char *dest, int destlen,
bool noError)
{
Datum result;
/*
* If the destination buffer is not large enough to hold the result in the
* worst case, limit the input size passed to the conversion function.
*/
if ((Size) srclen >= ((destlen - 1) / (Size) MAX_CONVERSION_GROWTH))
srclen = ((destlen - 1) / (Size) MAX_CONVERSION_GROWTH);
result = OidFunctionCall6(proc,
Int32GetDatum(src_encoding),
Int32GetDatum(dest_encoding),
CStringGetDatum(src),
CStringGetDatum(dest),
Int32GetDatum(srclen),
BoolGetDatum(noError));
return DatumGetInt32(result);
}
/* /*
* Convert string to encoding encoding_name. The source * Convert string to encoding encoding_name. The source
* encoding is the DB encoding. * encoding is the DB encoding.
...@@ -762,12 +819,13 @@ perform_default_encoding_conversion(const char *src, int len, ...@@ -762,12 +819,13 @@ perform_default_encoding_conversion(const char *src, int len,
MemoryContextAllocHuge(CurrentMemoryContext, MemoryContextAllocHuge(CurrentMemoryContext,
(Size) len * MAX_CONVERSION_GROWTH + 1); (Size) len * MAX_CONVERSION_GROWTH + 1);
FunctionCall5(flinfo, FunctionCall6(flinfo,
Int32GetDatum(src_encoding), Int32GetDatum(src_encoding),
Int32GetDatum(dest_encoding), Int32GetDatum(dest_encoding),
CStringGetDatum(src), CStringGetDatum(src),
CStringGetDatum(result), CStringGetDatum(result),
Int32GetDatum(len)); Int32GetDatum(len),
BoolGetDatum(false));
/* /*
* Release extra space if there might be a lot --- see comments in * Release extra space if there might be a lot --- see comments in
...@@ -849,12 +907,13 @@ pg_unicode_to_server(pg_wchar c, unsigned char *s) ...@@ -849,12 +907,13 @@ pg_unicode_to_server(pg_wchar c, unsigned char *s)
c_as_utf8[c_as_utf8_len] = '\0'; c_as_utf8[c_as_utf8_len] = '\0';
/* Convert, or throw error if we can't */ /* Convert, or throw error if we can't */
FunctionCall5(Utf8ToServerConvProc, FunctionCall6(Utf8ToServerConvProc,
Int32GetDatum(PG_UTF8), Int32GetDatum(PG_UTF8),
Int32GetDatum(server_encoding), Int32GetDatum(server_encoding),
CStringGetDatum(c_as_utf8), CStringGetDatum(c_as_utf8),
CStringGetDatum(s), CStringGetDatum(s),
Int32GetDatum(c_as_utf8_len)); Int32GetDatum(c_as_utf8_len),
BoolGetDatum(false));
} }
......
...@@ -28,6 +28,7 @@ static void check_for_reg_data_type_usage(ClusterInfo *cluster); ...@@ -28,6 +28,7 @@ static void check_for_reg_data_type_usage(ClusterInfo *cluster);
static void check_for_jsonb_9_4_usage(ClusterInfo *cluster); static void check_for_jsonb_9_4_usage(ClusterInfo *cluster);
static void check_for_pg_role_prefix(ClusterInfo *cluster); static void check_for_pg_role_prefix(ClusterInfo *cluster);
static void check_for_new_tablespace_dir(ClusterInfo *new_cluster); static void check_for_new_tablespace_dir(ClusterInfo *new_cluster);
static void check_for_user_defined_encoding_conversions(ClusterInfo *cluster);
static char *get_canonical_locale_name(int category, const char *locale); static char *get_canonical_locale_name(int category, const char *locale);
...@@ -102,6 +103,15 @@ check_and_dump_old_cluster(bool live_check) ...@@ -102,6 +103,15 @@ check_and_dump_old_cluster(bool live_check)
check_for_reg_data_type_usage(&old_cluster); check_for_reg_data_type_usage(&old_cluster);
check_for_isn_and_int8_passing_mismatch(&old_cluster); check_for_isn_and_int8_passing_mismatch(&old_cluster);
/*
* PG 14 changed the function signature of encoding conversion functions.
* Conversions from older versions cannot be upgraded automatically
* because the user-defined functions used by the encoding conversions
* need to be changed to match the new signature.
*/
if (GET_MAJOR_VERSION(old_cluster.major_version) <= 1300)
check_for_user_defined_encoding_conversions(&old_cluster);
/* /*
* Pre-PG 14 allowed user defined postfix operators, which are not * Pre-PG 14 allowed user defined postfix operators, which are not
* supported anymore. Verify there are none, iff applicable. * supported anymore. Verify there are none, iff applicable.
...@@ -1268,6 +1278,91 @@ check_for_pg_role_prefix(ClusterInfo *cluster) ...@@ -1268,6 +1278,91 @@ check_for_pg_role_prefix(ClusterInfo *cluster)
check_ok(); check_ok();
} }
/*
* Verify that no user-defined encoding conversions exist.
*/
static void
check_for_user_defined_encoding_conversions(ClusterInfo *cluster)
{
int dbnum;
FILE *script = NULL;
bool found = false;
char output_path[MAXPGPATH];
prep_status("Checking for user-defined encoding conversions");
snprintf(output_path, sizeof(output_path),
"encoding_conversions.txt");
/* Find any user defined encoding conversions */
for (dbnum = 0; dbnum < cluster->dbarr.ndbs; dbnum++)
{
PGresult *res;
bool db_used = false;
int ntups;
int rowno;
int i_conoid,
i_conname,
i_nspname;
DbInfo *active_db = &cluster->dbarr.dbs[dbnum];
PGconn *conn = connectToServer(cluster, active_db->db_name);
/*
* The query below hardcodes FirstNormalObjectId as 16384 rather than
* interpolating that C #define into the query because, if that
* #define is ever changed, the cutoff we want to use is the value
* used by pre-version 14 servers, not that of some future version.
*/
res = executeQueryOrDie(conn,
"SELECT c.oid as conoid, c.conname, n.nspname "
"FROM pg_catalog.pg_conversion c, "
" pg_catalog.pg_namespace n "
"WHERE c.connamespace = n.oid AND "
" c.oid >= 16384");
ntups = PQntuples(res);
i_conoid = PQfnumber(res, "conoid");
i_conname = PQfnumber(res, "conname");
i_nspname = PQfnumber(res, "nspname");
for (rowno = 0; rowno < ntups; rowno++)
{
found = true;
if (script == NULL &&
(script = fopen_priv(output_path, "w")) == NULL)
pg_fatal("could not open file \"%s\": %s\n",
output_path, strerror(errno));
if (!db_used)
{
fprintf(script, "In database: %s\n", active_db->db_name);
db_used = true;
}
fprintf(script, " (oid=%s) %s.%s\n",
PQgetvalue(res, rowno, i_conoid),
PQgetvalue(res, rowno, i_nspname),
PQgetvalue(res, rowno, i_conname));
}
PQclear(res);
PQfinish(conn);
}
if (script)
fclose(script);
if (found)
{
pg_log(PG_REPORT, "fatal\n");
pg_fatal("Your installation contains user-defined encoding conversions.\n"
"The conversion function parameters changed in PostgreSQL version 14\n"
"so this cluster cannot currently be upgraded. You can remove the\n"
"encoding conversions in the old cluster and restart the upgrade.\n"
"A list of user-defined encoding conversions is in the file:\n"
" %s\n\n", output_path);
}
else
check_ok();
}
/* /*
* get_canonical_locale_name * get_canonical_locale_name
......
...@@ -53,6 +53,6 @@ ...@@ -53,6 +53,6 @@
*/ */
/* yyyymmddN */ /* yyyymmddN */
#define CATALOG_VERSION_NO 202103291 #define CATALOG_VERSION_NO 202104011
#endif #endif
This diff is collapsed.
...@@ -616,6 +616,12 @@ extern int pg_bind_textdomain_codeset(const char *domainname); ...@@ -616,6 +616,12 @@ extern int pg_bind_textdomain_codeset(const char *domainname);
extern unsigned char *pg_do_encoding_conversion(unsigned char *src, int len, extern unsigned char *pg_do_encoding_conversion(unsigned char *src, int len,
int src_encoding, int src_encoding,
int dest_encoding); int dest_encoding);
extern int pg_do_encoding_conversion_buf(Oid proc,
int src_encoding,
int dest_encoding,
unsigned char *src, int srclen,
unsigned char *dst, int dstlen,
bool noError);
extern char *pg_client_to_server(const char *s, int len); extern char *pg_client_to_server(const char *s, int len);
extern char *pg_server_to_client(const char *s, int len); extern char *pg_server_to_client(const char *s, int len);
...@@ -627,18 +633,18 @@ extern void pg_unicode_to_server(pg_wchar c, unsigned char *s); ...@@ -627,18 +633,18 @@ extern void pg_unicode_to_server(pg_wchar c, unsigned char *s);
extern unsigned short BIG5toCNS(unsigned short big5, unsigned char *lc); extern unsigned short BIG5toCNS(unsigned short big5, unsigned char *lc);
extern unsigned short CNStoBIG5(unsigned short cns, unsigned char lc); extern unsigned short CNStoBIG5(unsigned short cns, unsigned char lc);
extern void UtfToLocal(const unsigned char *utf, int len, extern int UtfToLocal(const unsigned char *utf, int len,
unsigned char *iso, unsigned char *iso,
const pg_mb_radix_tree *map, const pg_mb_radix_tree *map,
const pg_utf_to_local_combined *cmap, int cmapsize, const pg_utf_to_local_combined *cmap, int cmapsize,
utf_local_conversion_func conv_func, utf_local_conversion_func conv_func,
int encoding); int encoding, bool noError);
extern void LocalToUtf(const unsigned char *iso, int len, extern int LocalToUtf(const unsigned char *iso, int len,
unsigned char *utf, unsigned char *utf,
const pg_mb_radix_tree *map, const pg_mb_radix_tree *map,
const pg_local_to_utf_combined *cmap, int cmapsize, const pg_local_to_utf_combined *cmap, int cmapsize,
utf_local_conversion_func conv_func, utf_local_conversion_func conv_func,
int encoding); int encoding, bool noError);
extern bool pg_verifymbstr(const char *mbstr, int len, bool noError); extern bool pg_verifymbstr(const char *mbstr, int len, bool noError);
extern bool pg_verify_mbstr(int encoding, const char *mbstr, int len, extern bool pg_verify_mbstr(int encoding, const char *mbstr, int len,
...@@ -656,18 +662,19 @@ extern void report_invalid_encoding(int encoding, const char *mbstr, int len) pg ...@@ -656,18 +662,19 @@ extern void report_invalid_encoding(int encoding, const char *mbstr, int len) pg
extern void report_untranslatable_char(int src_encoding, int dest_encoding, extern void report_untranslatable_char(int src_encoding, int dest_encoding,
const char *mbstr, int len) pg_attribute_noreturn(); const char *mbstr, int len) pg_attribute_noreturn();
extern void local2local(const unsigned char *l, unsigned char *p, int len, extern int local2local(const unsigned char *l, unsigned char *p, int len,
int src_encoding, int dest_encoding, const unsigned char *tab); int src_encoding, int dest_encoding,
extern void latin2mic(const unsigned char *l, unsigned char *p, int len, const unsigned char *tab, bool noError);
int lc, int encoding); extern int latin2mic(const unsigned char *l, unsigned char *p, int len,
extern void mic2latin(const unsigned char *mic, unsigned char *p, int len, int lc, int encoding, bool noError);
int lc, int encoding); extern int mic2latin(const unsigned char *mic, unsigned char *p, int len,
extern void latin2mic_with_table(const unsigned char *l, unsigned char *p, int lc, int encoding, bool noError);
extern int latin2mic_with_table(const unsigned char *l, unsigned char *p,
int len, int lc, int encoding, int len, int lc, int encoding,
const unsigned char *tab); const unsigned char *tab, bool noError);
extern void mic2latin_with_table(const unsigned char *mic, unsigned char *p, extern int mic2latin_with_table(const unsigned char *mic, unsigned char *p,
int len, int lc, int encoding, int len, int lc, int encoding,
const unsigned char *tab); const unsigned char *tab, bool noError);
#ifdef WIN32 #ifdef WIN32
extern WCHAR *pgwin32_message_to_UTF16(const char *str, int len, int *utf16len); extern WCHAR *pgwin32_message_to_UTF16(const char *str, int len, int *utf16len);
......
This diff is collapsed.
...@@ -1052,13 +1052,14 @@ WHERE p1.conproc = 0 OR ...@@ -1052,13 +1052,14 @@ WHERE p1.conproc = 0 OR
SELECT p.oid, p.proname, c.oid, c.conname SELECT p.oid, p.proname, c.oid, c.conname
FROM pg_proc p, pg_conversion c FROM pg_proc p, pg_conversion c
WHERE p.oid = c.conproc AND WHERE p.oid = c.conproc AND
(p.prorettype != 'void'::regtype OR p.proretset OR (p.prorettype != 'int4'::regtype OR p.proretset OR
p.pronargs != 5 OR p.pronargs != 6 OR
p.proargtypes[0] != 'int4'::regtype OR p.proargtypes[0] != 'int4'::regtype OR
p.proargtypes[1] != 'int4'::regtype OR p.proargtypes[1] != 'int4'::regtype OR
p.proargtypes[2] != 'cstring'::regtype OR p.proargtypes[2] != 'cstring'::regtype OR
p.proargtypes[3] != 'internal'::regtype OR p.proargtypes[3] != 'internal'::regtype OR
p.proargtypes[4] != 'int4'::regtype); p.proargtypes[4] != 'int4'::regtype OR
p.proargtypes[5] != 'bool'::regtype);
oid | proname | oid | conname oid | proname | oid | conname
-----+---------+-----+--------- -----+---------+-----+---------
(0 rows) (0 rows)
......
...@@ -78,6 +78,10 @@ CREATE FUNCTION test_opclass_options_func(internal) ...@@ -78,6 +78,10 @@ CREATE FUNCTION test_opclass_options_func(internal)
AS '@libdir@/regress@DLSUFFIX@', 'test_opclass_options_func' AS '@libdir@/regress@DLSUFFIX@', 'test_opclass_options_func'
LANGUAGE C; LANGUAGE C;
CREATE FUNCTION test_enc_conversion(bytea, name, name, bool, validlen OUT int, result OUT bytea)
AS '@libdir@/regress@DLSUFFIX@', 'test_enc_conversion'
LANGUAGE C;
-- Things that shouldn't work: -- Things that shouldn't work:
CREATE FUNCTION test1 (int) RETURNS int LANGUAGE SQL CREATE FUNCTION test1 (int) RETURNS int LANGUAGE SQL
......
...@@ -68,6 +68,9 @@ CREATE FUNCTION test_opclass_options_func(internal) ...@@ -68,6 +68,9 @@ CREATE FUNCTION test_opclass_options_func(internal)
RETURNS void RETURNS void
AS '@libdir@/regress@DLSUFFIX@', 'test_opclass_options_func' AS '@libdir@/regress@DLSUFFIX@', 'test_opclass_options_func'
LANGUAGE C; LANGUAGE C;
CREATE FUNCTION test_enc_conversion(bytea, name, name, bool, validlen OUT int, result OUT bytea)
AS '@libdir@/regress@DLSUFFIX@', 'test_enc_conversion'
LANGUAGE C;
-- Things that shouldn't work: -- Things that shouldn't work:
CREATE FUNCTION test1 (int) RETURNS int LANGUAGE SQL CREATE FUNCTION test1 (int) RETURNS int LANGUAGE SQL
AS 'SELECT ''not an integer'';'; AS 'SELECT ''not an integer'';';
......
This diff is collapsed.
This diff is collapsed.
...@@ -556,13 +556,14 @@ WHERE p1.conproc = 0 OR ...@@ -556,13 +556,14 @@ WHERE p1.conproc = 0 OR
SELECT p.oid, p.proname, c.oid, c.conname SELECT p.oid, p.proname, c.oid, c.conname
FROM pg_proc p, pg_conversion c FROM pg_proc p, pg_conversion c
WHERE p.oid = c.conproc AND WHERE p.oid = c.conproc AND
(p.prorettype != 'void'::regtype OR p.proretset OR (p.prorettype != 'int4'::regtype OR p.proretset OR
p.pronargs != 5 OR p.pronargs != 6 OR
p.proargtypes[0] != 'int4'::regtype OR p.proargtypes[0] != 'int4'::regtype OR
p.proargtypes[1] != 'int4'::regtype OR p.proargtypes[1] != 'int4'::regtype OR
p.proargtypes[2] != 'cstring'::regtype OR p.proargtypes[2] != 'cstring'::regtype OR
p.proargtypes[3] != 'internal'::regtype OR p.proargtypes[3] != 'internal'::regtype OR
p.proargtypes[4] != 'int4'::regtype); p.proargtypes[4] != 'int4'::regtype OR
p.proargtypes[5] != 'bool'::regtype);
-- Check for conprocs that don't perform the specific conversion that -- Check for conprocs that don't perform the specific conversion that
-- pg_conversion alleges they do, by trying to invoke each conversion -- pg_conversion alleges they do, by trying to invoke each conversion
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment