Commit 7730f48e authored by Tom Lane's avatar Tom Lane

Teach UtfToLocal/LocalToUtf to support algorithmic encoding conversions.

Until now, these functions have only supported encoding conversions using
lookup tables, which is fine as long as there's not too many code points
to convert.  However, GB18030 expects all 1.1 million Unicode code points
to be convertible, which would require a ridiculously-sized lookup table.
Fortunately, a large fraction of those conversions can be expressed through
arithmetic, ie the conversions are one-to-one in certain defined ranges.
To support that, provide a callback function that is used after consulting
the lookup tables.  (This patch doesn't actually change anything about the
GB18030 conversion behavior, just provide infrastructure for fixing it.)

Since this requires changing the APIs of UtfToLocal/LocalToUtf anyway,
take the opportunity to rearrange their argument lists into what seems
to me a saner order.  And beautify the call sites by using lengthof()
instead of error-prone sizeof() arithmetic.

In passing, also mark all the lookup tables used by these calls "const".
This moves an impressive amount of stuff into the text segment, at least
on my machine, and is safer anyhow.
parent 83e176ec
......@@ -97,7 +97,7 @@ close(FILE);
$file = lc("utf8_to_big5.map");
open(FILE, "> $file") || die("cannot open $file");
print FILE "static pg_utf_to_local ULmapBIG5[ $count ] = {\n";
print FILE "static const pg_utf_to_local ULmapBIG5[ $count ] = {\n";
for $index (sort { $a <=> $b } keys(%array))
{
......@@ -185,7 +185,7 @@ close(FILE);
$file = lc("big5_to_utf8.map");
open(FILE, "> $file") || die("cannot open $file");
print FILE "static pg_local_to_utf LUmapBIG5[ $count ] = {\n";
print FILE "static const pg_local_to_utf LUmapBIG5[ $count ] = {\n";
for $index (sort { $a <=> $b } keys(%array))
{
$utf = $array{$index};
......
......@@ -55,7 +55,7 @@ close(FILE);
$file = "utf8_to_euc_cn.map";
open(FILE, "> $file") || die("cannot open $file");
print FILE "static pg_utf_to_local ULmapEUC_CN[ $count ] = {\n";
print FILE "static const pg_utf_to_local ULmapEUC_CN[ $count ] = {\n";
for $index (sort { $a <=> $b } keys(%array))
{
......@@ -109,7 +109,7 @@ close(FILE);
$file = "euc_cn_to_utf8.map";
open(FILE, "> $file") || die("cannot open $file");
print FILE "static pg_local_to_utf LUmapEUC_CN[ $count ] = {\n";
print FILE "static const pg_local_to_utf LUmapEUC_CN[ $count ] = {\n";
for $index (sort { $a <=> $b } keys(%array))
{
$utf = $array{$index};
......
......@@ -72,7 +72,7 @@ open(FILE, "> $file") || die("cannot open $file");
print FILE "/*\n";
print FILE " * This file was generated by UCS_to_EUC_JIS_2004.pl\n";
print FILE " */\n";
print FILE "static pg_utf_to_local ULmapEUC_JIS_2004[] = {\n";
print FILE "static const pg_utf_to_local ULmapEUC_JIS_2004[] = {\n";
for $index (sort { $a <=> $b } keys(%array))
{
......@@ -133,7 +133,7 @@ print FILE "/*\n";
print FILE " * This file was generated by UCS_to_EUC_JIS_2004.pl\n";
print FILE " */\n";
print FILE
"static pg_utf_to_local_combined ULmapEUC_JIS_2004_combined[] = {\n";
"static const pg_utf_to_local_combined ULmapEUC_JIS_2004_combined[] = {\n";
for $index (sort { $a cmp $b } keys(%array1))
{
......@@ -256,7 +256,7 @@ open(FILE, "> $file") || die("cannot open $file");
print FILE "/*\n";
print FILE " * This file was generated by UCS_to_EUC_JIS_2004.pl\n";
print FILE " */\n";
print FILE "static pg_local_to_utf LUmapEUC_JIS_2004[] = {\n";
print FILE "static const pg_local_to_utf LUmapEUC_JIS_2004[] = {\n";
for $index (sort { $a <=> $b } keys(%array))
{
......@@ -283,7 +283,7 @@ print FILE "/*\n";
print FILE " * This file was generated by UCS_to_EUC_JIS_2004.pl\n";
print FILE " */\n";
print FILE
"static pg_local_to_utf_combined LUmapEUC_JIS_2004_combined[] = {\n";
"static const pg_local_to_utf_combined LUmapEUC_JIS_2004_combined[] = {\n";
for $index (sort { $a <=> $b } keys(%array1))
{
......
......@@ -136,7 +136,7 @@ close(FILE);
$file = "utf8_to_euc_jp.map";
open(FILE, "> $file") || die("cannot open $file");
print FILE "static pg_utf_to_local ULmapEUC_JP[ $count ] = {\n";
print FILE "static const pg_utf_to_local ULmapEUC_JP[ $count ] = {\n";
for $index (sort { $a <=> $b } keys(%array))
{
......@@ -263,7 +263,7 @@ close(FILE);
$file = "euc_jp_to_utf8.map";
open(FILE, "> $file") || die("cannot open $file");
print FILE "static pg_local_to_utf LUmapEUC_JP[ $count ] = {\n";
print FILE "static const pg_local_to_utf LUmapEUC_JP[ $count ] = {\n";
for $index (sort { $a <=> $b } keys(%array))
{
$utf = $array{$index};
......
......@@ -55,7 +55,7 @@ close(FILE);
$file = "utf8_to_euc_kr.map";
open(FILE, "> $file") || die("cannot open $file");
print FILE "static pg_utf_to_local ULmapEUC_KR[ $count ] = {\n";
print FILE "static const pg_utf_to_local ULmapEUC_KR[ $count ] = {\n";
for $index (sort { $a <=> $b } keys(%array))
{
......@@ -109,7 +109,7 @@ close(FILE);
$file = "euc_kr_to_utf8.map";
open(FILE, "> $file") || die("cannot open $file");
print FILE "static pg_local_to_utf LUmapEUC_KR[ $count ] = {\n";
print FILE "static const pg_local_to_utf LUmapEUC_KR[ $count ] = {\n";
for $index (sort { $a <=> $b } keys(%array))
{
$utf = $array{$index};
......
......@@ -71,7 +71,7 @@ close(FILE);
$file = "utf8_to_euc_tw.map";
open(FILE, "> $file") || die("cannot open $file");
print FILE "static pg_utf_to_local ULmapEUC_TW[ $count ] = {\n";
print FILE "static const pg_utf_to_local ULmapEUC_TW[ $count ] = {\n";
for $index (sort { $a <=> $b } keys(%array))
{
......@@ -138,7 +138,7 @@ close(FILE);
$file = "euc_tw_to_utf8.map";
open(FILE, "> $file") || die("cannot open $file");
print FILE "static pg_local_to_utf LUmapEUC_TW[ $count ] = {\n";
print FILE "static const pg_local_to_utf LUmapEUC_TW[ $count ] = {\n";
for $index (sort { $a <=> $b } keys(%array))
{
$utf = $array{$index};
......
......@@ -52,7 +52,7 @@ close(FILE);
$file = "utf8_to_gb18030.map";
open(FILE, "> $file") || die("cannot open $file");
print FILE "static pg_utf_to_local ULmapGB18030[ $count ] = {\n";
print FILE "static const pg_utf_to_local ULmapGB18030[ $count ] = {\n";
for $index (sort { $a <=> $b } keys(%array))
{
......@@ -106,7 +106,7 @@ close(FILE);
$file = "gb18030_to_utf8.map";
open(FILE, "> $file") || die("cannot open $file");
print FILE "static pg_local_to_utf LUmapGB18030[ $count ] = {\n";
print FILE "static const pg_local_to_utf LUmapGB18030[ $count ] = {\n";
for $index (sort { $a <=> $b } keys(%array))
{
$utf = $array{$index};
......
......@@ -72,7 +72,7 @@ open(FILE, "> $file") || die("cannot open $file");
print FILE "/*\n";
print FILE " * This file was generated by UCS_to_SHIFT_JIS_2004.pl\n";
print FILE " */\n";
print FILE "static pg_utf_to_local ULmapSHIFT_JIS_2004[] = {\n";
print FILE "static const pg_utf_to_local ULmapSHIFT_JIS_2004[] = {\n";
for $index (sort { $a <=> $b } keys(%array))
{
......@@ -99,7 +99,7 @@ print FILE "/*\n";
print FILE " * This file was generated by UCS_to_SHIFT_JIS_2004.pl\n";
print FILE " */\n";
print FILE
"static pg_utf_to_local_combined ULmapSHIFT_JIS_2004_combined[] = {\n";
"static const pg_utf_to_local_combined ULmapSHIFT_JIS_2004_combined[] = {\n";
for $index (sort { $a cmp $b } keys(%array1))
{
......@@ -185,7 +185,7 @@ open(FILE, "> $file") || die("cannot open $file");
print FILE "/*\n";
print FILE " * This file was generated by UCS_to_SHIFTJIS_2004.pl\n";
print FILE " */\n";
print FILE "static pg_local_to_utf LUmapSHIFT_JIS_2004[] = {\n";
print FILE "static const pg_local_to_utf LUmapSHIFT_JIS_2004[] = {\n";
for $index (sort { $a <=> $b } keys(%array))
{
......@@ -212,7 +212,7 @@ print FILE "/*\n";
print FILE " * This file was generated by UCS_to_SHIFT_JIS_2004.pl\n";
print FILE " */\n";
print FILE
"static pg_local_to_utf_combined LUmapSHIFT_JIS_2004_combined[] = {\n";
"static const pg_local_to_utf_combined LUmapSHIFT_JIS_2004_combined[] = {\n";
for $index (sort { $a <=> $b } keys(%array1))
{
......
......@@ -72,7 +72,7 @@ close(FILE);
$file = "utf8_to_sjis.map";
open(FILE, "> $file") || die("cannot open $file");
print FILE "static pg_utf_to_local ULmapSJIS[ $count ] = {\n";
print FILE "static const pg_utf_to_local ULmapSJIS[ $count ] = {\n";
for $index (sort { $a <=> $b } keys(%array))
{
......@@ -122,7 +122,7 @@ close(FILE);
$file = "sjis_to_utf8.map";
open(FILE, "> $file") || die("cannot open $file");
print FILE "static pg_local_to_utf LUmapSJIS[ $count ] = {\n";
print FILE "static const pg_local_to_utf LUmapSJIS[ $count ] = {\n";
for $index (sort { $a <=> $b } keys(%array))
{
$utf = $array{$index};
......
......@@ -88,7 +88,7 @@ foreach $charset (@charsets)
$file = lc("utf8_to_${charset}.map");
open(FILE, "> $file") || die("cannot open $file");
print FILE "static pg_utf_to_local ULmap${charset}[ $count ] = {\n";
print FILE "static const pg_utf_to_local ULmap${charset}[ $count ] = {\n";
for $index (sort { $a <=> $b } keys(%array))
{
......@@ -140,7 +140,7 @@ foreach $charset (@charsets)
$file = lc("${charset}_to_utf8.map");
open(FILE, "> $file") || die("cannot open $file");
print FILE "static pg_local_to_utf LUmap${charset}[ $count ] = {\n";
print FILE "static const pg_local_to_utf LUmap${charset}[ $count ] = {\n";
for $index (sort { $a <=> $b } keys(%array))
{
$utf = $array{$index};
......
static pg_local_to_utf LUmapBIG5[ 13717 ] = {
static const pg_local_to_utf LUmapBIG5[ 13717 ] = {
{0xa140, 0xe38080},
{0xa141, 0xefbc8c},
{0xa142, 0xe38081},
......
/* src/backend/utils/mb/Unicode/euc_cn_to_utf8.map */
static pg_local_to_utf LUmapEUC_CN[ 7445 ] = {
static const pg_local_to_utf LUmapEUC_CN[ 7445 ] = {
{0xa1a1, 0xe38080},
{0xa1a2, 0xe38081},
{0xa1a3, 0xe38082},
......
/*
* This file was generated by UCS_to_EUC_JIS_2004.pl
*/
static pg_local_to_utf LUmapEUC_JIS_2004[] = {
static const pg_local_to_utf LUmapEUC_JIS_2004[] = {
{0x000000, 0x00000000}, /* U+0000 <control> */
{0x000001, 0x00000001}, /* U+0001 <control> */
{0x000002, 0x00000002}, /* U+0002 <control> */
......
/*
* This file was generated by UCS_to_EUC_JIS_2004.pl
*/
static pg_local_to_utf_combined LUmapEUC_JIS_2004_combined[] = {
static const pg_local_to_utf_combined LUmapEUC_JIS_2004_combined[] = {
{0x00a4f7, 0x00e3818b, 0x00e3829a}, /* U+304B+309A [2000] */
{0x00a4f8, 0x00e3818d, 0x00e3829a}, /* U+304D+309A [2000] */
{0x00a4f9, 0x00e3818f, 0x00e3829a}, /* U+304F+309A [2000] */
......
/* src/backend/utils/mb/Unicode/euc_jp_to_utf8.map */
static pg_local_to_utf LUmapEUC_JP[] = {
static const pg_local_to_utf LUmapEUC_JP[] = {
{0x8ea1, 0xefbda1},
{0x8ea2, 0xefbda2},
{0x8ea3, 0xefbda3},
......
static pg_local_to_utf LUmapEUC_KR[ 8227 ] = {
static const pg_local_to_utf LUmapEUC_KR[ 8227 ] = {
{0xa1a1, 0xe38080},
{0xa1a2, 0xe38081},
{0xa1a3, 0xe38082},
......
/* src/backend/utils/mb/Unicode/euc_tw_to_utf8.map */
static pg_local_to_utf LUmapEUC_TW[ 23575 ] = {
static const pg_local_to_utf LUmapEUC_TW[ 23575 ] = {
{0xa1a1, 0xe38080},
{0xa1a2, 0xefbc8c},
{0xa1a3, 0xe38081},
......
/* src/backend/utils/mb/Unicode/gb18030_to_utf8.map */
static pg_local_to_utf LUmapGB18030[ 63360 ] = {
static const pg_local_to_utf LUmapGB18030[ 63360 ] = {
{0x8140, 0xe4b882},
{0x8141, 0xe4b884},
{0x8142, 0xe4b885},
/* src/backend/utils/mb/Unicode/gbk_to_utf8.map */
static pg_local_to_utf LUmapGBK[ 21792 ] = {
static const pg_local_to_utf LUmapGBK[ 21792 ] = {
{0x0080, 0xe282ac},
{0x8140, 0xe4b882},
{0x8141, 0xe4b884},
......
/* src/backend/utils/mb/Unicode/iso8859_10_to_utf8.map */
static pg_local_to_utf LUmapISO8859_10[ 128 ] = {
static const pg_local_to_utf LUmapISO8859_10[ 128 ] = {
{0x0080, 0xc280},
{0x0081, 0xc281},
{0x0082, 0xc282},
......
/* src/backend/utils/mb/Unicode/iso8859_13_to_utf8.map */
static pg_local_to_utf LUmapISO8859_13[ 128 ] = {
static const pg_local_to_utf LUmapISO8859_13[ 128 ] = {
{0x0080, 0xc280},
{0x0081, 0xc281},
{0x0082, 0xc282},
......
/* src/backend/utils/mb/Unicode/iso8859_14_to_utf8.map */
static pg_local_to_utf LUmapISO8859_14[ 128 ] = {
static const pg_local_to_utf LUmapISO8859_14[ 128 ] = {
{0x0080, 0xc280},
{0x0081, 0xc281},
{0x0082, 0xc282},
......
/* src/backend/utils/mb/Unicode/iso8859_15_to_utf8.map */
static pg_local_to_utf LUmapISO8859_15[ 128 ] = {
static const pg_local_to_utf LUmapISO8859_15[ 128 ] = {
{0x0080, 0xc280},
{0x0081, 0xc281},
{0x0082, 0xc282},
......
/* src/backend/utils/mb/Unicode/iso8859_16_to_utf8.map */
static pg_local_to_utf LUmapISO8859_16[ 128 ] = {
static const pg_local_to_utf LUmapISO8859_16[ 128 ] = {
{0x0080, 0xc280},
{0x0081, 0xc281},
{0x0082, 0xc282},
......
/* src/backend/utils/mb/Unicode/iso8859_2_to_utf8.map */
static pg_local_to_utf LUmapISO8859_2[ 128 ] = {
static const pg_local_to_utf LUmapISO8859_2[ 128 ] = {
{0x0080, 0xc280},
{0x0081, 0xc281},
{0x0082, 0xc282},
......
/* src/backend/utils/mb/Unicode/iso8859_3_to_utf8.map */
static pg_local_to_utf LUmapISO8859_3[ 121 ] = {
static const pg_local_to_utf LUmapISO8859_3[ 121 ] = {
{0x0080, 0xc280},
{0x0081, 0xc281},
{0x0082, 0xc282},
......
/* src/backend/utils/mb/Unicode/iso8859_4_to_utf8.map */
static pg_local_to_utf LUmapISO8859_4[ 128 ] = {
static const pg_local_to_utf LUmapISO8859_4[ 128 ] = {
{0x0080, 0xc280},
{0x0081, 0xc281},
{0x0082, 0xc282},
......
/* src/backend/utils/mb/Unicode/iso8859_5_to_utf8.map */
static pg_local_to_utf LUmapISO8859_5[ 128 ] = {
static const pg_local_to_utf LUmapISO8859_5[ 128 ] = {
{0x0080, 0xc280},
{0x0081, 0xc281},
{0x0082, 0xc282},
......
/* src/backend/utils/mb/Unicode/iso8859_6_to_utf8.map */
static pg_local_to_utf LUmapISO8859_6[ 83 ] = {
static const pg_local_to_utf LUmapISO8859_6[ 83 ] = {
{0x0080, 0xc280},
{0x0081, 0xc281},
{0x0082, 0xc282},
......
/* src/backend/utils/mb/Unicode/iso8859_7_to_utf8.map */
static pg_local_to_utf LUmapISO8859_7[ 125 ] = {
static const pg_local_to_utf LUmapISO8859_7[ 125 ] = {
{0x0080, 0xc280},
{0x0081, 0xc281},
{0x0082, 0xc282},
......
/* src/backend/utils/mb/Unicode/iso8859_8_to_utf8.map */
static pg_local_to_utf LUmapISO8859_8[ 92 ] = {
static const pg_local_to_utf LUmapISO8859_8[ 92 ] = {
{0x0080, 0xc280},
{0x0081, 0xc281},
{0x0082, 0xc282},
......
/* src/backend/utils/mb/Unicode/iso8859_9_to_utf8.map */
static pg_local_to_utf LUmapISO8859_9[ 128 ] = {
static const pg_local_to_utf LUmapISO8859_9[ 128 ] = {
{0x0080, 0xc280},
{0x0081, 0xc281},
{0x0082, 0xc282},
......
static pg_local_to_utf LUmapJOHAB[ 17049 ] = {
static const pg_local_to_utf LUmapJOHAB[ 17049 ] = {
{0x8444, 0xe384b3},
{0x8446, 0xe384b5},
{0x8447, 0xe384b6},
......
/* src/backend/utils/mb/Unicode/koi8r_to_utf8.map */
static pg_local_to_utf LUmapKOI8R[ 128 ] = {
static const pg_local_to_utf LUmapKOI8R[ 128 ] = {
{0x0080, 0xe29480},
{0x0081, 0xe29482},
{0x0082, 0xe2948c},
......
static pg_local_to_utf LUmapKOI8U[ 128 ] = {
static const pg_local_to_utf LUmapKOI8U[ 128 ] = {
{0x0080, 0xe29480},
{0x0081, 0xe29482},
{0x0082, 0xe2948c},
......
/*
* This file was generated by UCS_to_SHIFTJIS_2004.pl
*/
static pg_local_to_utf LUmapSHIFT_JIS_2004[] = {
static const pg_local_to_utf LUmapSHIFT_JIS_2004[] = {
{0x0000, 0x00000000}, /* U+0000 <control> */
{0x0001, 0x00000001}, /* U+0001 <control> */
{0x0002, 0x00000002}, /* U+0002 <control> */
......
/*
* This file was generated by UCS_to_SHIFT_JIS_2004.pl
*/
static pg_local_to_utf_combined LUmapSHIFT_JIS_2004_combined[] = {
static const pg_local_to_utf_combined LUmapSHIFT_JIS_2004_combined[] = {
{0x82f5, 0x00e3818b, 0x00e3829a}, /* U+304B+309A [2000] */
{0x82f6, 0x00e3818d, 0x00e3829a}, /* U+304D+309A [2000] */
{0x82f7, 0x00e3818f, 0x00e3829a}, /* U+304F+309A [2000] */
......
static pg_local_to_utf LUmapSJIS[ 7787 ] = {
static const pg_local_to_utf LUmapSJIS[ 7787 ] = {
{0x00a1, 0xefbda1},
{0x00a2, 0xefbda2},
{0x00a3, 0xefbda3},
......
static pg_local_to_utf LUmapUHC[ 17237 ] = {
static const pg_local_to_utf LUmapUHC[ 17237 ] = {
{0x8141, 0xeab082},
{0x8142, 0xeab083},
{0x8143, 0xeab085},
......
static pg_utf_to_local ULmapBIG5[ 13711 ] = {
static const pg_utf_to_local ULmapBIG5[ 13711 ] = {
{0xc2a2, 0xa246},
{0xc2a3, 0xa247},
{0xc2a5, 0xa244},
......
static pg_utf_to_local ULmapEUC_CN[ 7445 ] = {
static const pg_utf_to_local ULmapEUC_CN[ 7445 ] = {
{0xc2a4, 0xa1e8},
{0xc2a7, 0xa1ec},
{0xc2a8, 0xa1a7},
......
/*
* This file was generated by UCS_to_EUC_JIS_2004.pl
*/
static pg_utf_to_local ULmapEUC_JIS_2004[] = {
static const pg_utf_to_local ULmapEUC_JIS_2004[] = {
{0x00000000, 0x000000}, /* U+0000 <control> */
{0x00000001, 0x000001}, /* U+0001 <control> */
{0x00000002, 0x000002}, /* U+0002 <control> */
......
/*
* This file was generated by UCS_to_EUC_JIS_2004.pl
*/
static pg_utf_to_local_combined ULmapEUC_JIS_2004_combined[] = {
static const pg_utf_to_local_combined ULmapEUC_JIS_2004_combined[] = {
{0x0000c3a6, 0x0000cc80, 0x00abc4}, /* U+00E6+0300 [2000] */
{0x0000c994, 0x0000cc80, 0x00abc8}, /* U+0254+0300 [2000] */
{0x0000c994, 0x0000cc81, 0x00abc9}, /* U+0254+0301 [2000] */
......
static pg_utf_to_local ULmapEUC_JP[ 13175 ] = {
static const pg_utf_to_local ULmapEUC_JP[ 13175 ] = {
{0xc2a1, 0x8fa2c2},
{0xc2a4, 0x8fa2f0},
{0xc2a6, 0x8fa2c3},
......
static pg_utf_to_local ULmapEUC_KR[ 8227 ] = {
static const pg_utf_to_local ULmapEUC_KR[ 8227 ] = {
{0xc2a1, 0xa2ae},
{0xc2a4, 0xa2b4},
{0xc2a7, 0xa1d7},
......
static pg_utf_to_local ULmapEUC_TW[ 17711 ] = {
static const pg_utf_to_local ULmapEUC_TW[ 17711 ] = {
{0xc2a7, 0xa1f0},
{0xc2b0, 0xa2f8},
{0xc2b1, 0xa2b4},
......
static pg_utf_to_local ULmapGB18030[ 63360 ] = {
static const pg_utf_to_local ULmapGB18030[ 63360 ] = {
{0xc280, 0x81308130},
{0xc281, 0x81308131},
{0xc282, 0x81308132},
static pg_utf_to_local ULmapGBK[ 21792 ] = {
static const pg_utf_to_local ULmapGBK[ 21792 ] = {
{0xc2a4, 0xa1e8},
{0xc2a7, 0xa1ec},
{0xc2a8, 0xa1a7},
......
static pg_utf_to_local ULmapISO8859_10[ 128 ] = {
static const pg_utf_to_local ULmapISO8859_10[ 128 ] = {
{0xc280, 0x0080},
{0xc281, 0x0081},
{0xc282, 0x0082},
......
static pg_utf_to_local ULmapISO8859_13[ 128 ] = {
static const pg_utf_to_local ULmapISO8859_13[ 128 ] = {
{0xc280, 0x0080},
{0xc281, 0x0081},
{0xc282, 0x0082},
......
static pg_utf_to_local ULmapISO8859_14[ 128 ] = {
static const pg_utf_to_local ULmapISO8859_14[ 128 ] = {
{0xc280, 0x0080},
{0xc281, 0x0081},
{0xc282, 0x0082},
......
static pg_utf_to_local ULmapISO8859_15[ 128 ] = {
static const pg_utf_to_local ULmapISO8859_15[ 128 ] = {
{0xc280, 0x0080},
{0xc281, 0x0081},
{0xc282, 0x0082},
......
static pg_utf_to_local ULmapISO8859_16[ 128 ] = {
static const pg_utf_to_local ULmapISO8859_16[ 128 ] = {
{0xc280, 0x0080},
{0xc281, 0x0081},
{0xc282, 0x0082},
......
static pg_utf_to_local ULmapISO8859_2[ 128 ] = {
static const pg_utf_to_local ULmapISO8859_2[ 128 ] = {
{0xc280, 0x0080},
{0xc281, 0x0081},
{0xc282, 0x0082},
......
static pg_utf_to_local ULmapISO8859_3[ 121 ] = {
static const pg_utf_to_local ULmapISO8859_3[ 121 ] = {
{0xc280, 0x0080},
{0xc281, 0x0081},
{0xc282, 0x0082},
......
static pg_utf_to_local ULmapISO8859_4[ 128 ] = {
static const pg_utf_to_local ULmapISO8859_4[ 128 ] = {
{0xc280, 0x0080},
{0xc281, 0x0081},
{0xc282, 0x0082},
......
static pg_utf_to_local ULmapISO8859_5[ 128 ] = {
static const pg_utf_to_local ULmapISO8859_5[ 128 ] = {
{0xc280, 0x0080},
{0xc281, 0x0081},
{0xc282, 0x0082},
......
static pg_utf_to_local ULmapISO8859_6[ 83 ] = {
static const pg_utf_to_local ULmapISO8859_6[ 83 ] = {
{0xc280, 0x0080},
{0xc281, 0x0081},
{0xc282, 0x0082},
......
static pg_utf_to_local ULmapISO8859_7[ 125 ] = {
static const pg_utf_to_local ULmapISO8859_7[ 125 ] = {
{0xc280, 0x0080},
{0xc281, 0x0081},
{0xc282, 0x0082},
......
static pg_utf_to_local ULmapISO8859_8[ 92 ] = {
static const pg_utf_to_local ULmapISO8859_8[ 92 ] = {
{0xc280, 0x0080},
{0xc281, 0x0081},
{0xc282, 0x0082},
......
static pg_utf_to_local ULmapISO8859_9[ 128 ] = {
static const pg_utf_to_local ULmapISO8859_9[ 128 ] = {
{0xc280, 0x0080},
{0xc281, 0x0081},
{0xc282, 0x0082},
......
static pg_utf_to_local ULmapJOHAB[ 17049 ] = {
static const pg_utf_to_local ULmapJOHAB[ 17049 ] = {
{0xc2a1, 0xd9ae},
{0xc2a4, 0xd9b4},
{0xc2a7, 0xd967},
......
static pg_utf_to_local ULmapKOI8R[ 128 ] = {
static const pg_utf_to_local ULmapKOI8R[ 128 ] = {
{0xc2a0, 0x009a},
{0xc2a9, 0x00bf},
{0xc2b0, 0x009c},
......
static pg_utf_to_local ULmapKOI8U[ 128 ] = {
static const pg_utf_to_local ULmapKOI8U[ 128 ] = {
{0xc2a0, 0x009a},
{0xc2a9, 0x00bf},
{0xc2b0, 0x009c},
......
/*
* This file was generated by UCS_to_SHIFT_JIS_2004.pl
*/
static pg_utf_to_local ULmapSHIFT_JIS_2004[] = {
static const pg_utf_to_local ULmapSHIFT_JIS_2004[] = {
{0x00000000, 0x000000}, /* U+0000 <control> */
{0x00000001, 0x000001}, /* U+0001 <control> */
{0x00000002, 0x000002}, /* U+0002 <control> */
......
/*
* This file was generated by UCS_to_SHIFT_JIS_2004.pl
*/
static pg_utf_to_local_combined ULmapSHIFT_JIS_2004_combined[] = {
static const pg_utf_to_local_combined ULmapSHIFT_JIS_2004_combined[] = {
{0x0000c3a6, 0x0000cc80, 0x8663}, /* U+00E6+0300 [2000] */
{0x0000c994, 0x0000cc80, 0x8667}, /* U+0254+0300 [2000] */
{0x0000c994, 0x0000cc81, 0x8668}, /* U+0254+0301 [2000] */
......
static pg_utf_to_local ULmapSJIS[ 7398 ] = {
static const pg_utf_to_local ULmapSJIS[ 7398 ] = {
{0xc19c, 0x815f},
{0xc2a2, 0x8191},
{0xc2a3, 0x8192},
......
static pg_utf_to_local ULmapUHC[ 17237 ] = {
static const pg_utf_to_local ULmapUHC[ 17237 ] = {
{0xc2a1, 0xa2ae},
{0xc2a4, 0xa2b4},
{0xc2a7, 0xa1d7},
......
static pg_utf_to_local ULmapWIN1250[ 123 ] = {
static const pg_utf_to_local ULmapWIN1250[ 123 ] = {
{0xc2a0, 0x00a0},
{0xc2a4, 0x00a4},
{0xc2a6, 0x00a6},
......
static pg_utf_to_local ULmapWIN1251[ 127 ] = {
static const pg_utf_to_local ULmapWIN1251[ 127 ] = {
{0xc2a0, 0x00a0},
{0xc2a4, 0x00a4},
{0xc2a6, 0x00a6},
......
static pg_utf_to_local ULmapWIN1252[ 123 ] = {
static const pg_utf_to_local ULmapWIN1252[ 123 ] = {
{0xc2a0, 0x00a0},
{0xc2a1, 0x00a1},
{0xc2a2, 0x00a2},
......
static pg_utf_to_local ULmapWIN1253[ 111 ] = {
static const pg_utf_to_local ULmapWIN1253[ 111 ] = {
{0xc2a0, 0x00a0},
{0xc2a3, 0x00a3},
{0xc2a4, 0x00a4},
......
static pg_utf_to_local ULmapWIN1254[ 121 ] = {
static const pg_utf_to_local ULmapWIN1254[ 121 ] = {
{0xc2a0, 0x00a0},
{0xc2a1, 0x00a1},
{0xc2a2, 0x00a2},
......
static pg_utf_to_local ULmapWIN1255[ 105 ] = {
static const pg_utf_to_local ULmapWIN1255[ 105 ] = {
{0xc2a0, 0x00a0},
{0xc2a1, 0x00a1},
{0xc2a2, 0x00a2},
......
static pg_utf_to_local ULmapWIN1256[ 128 ] = {
static const pg_utf_to_local ULmapWIN1256[ 128 ] = {
{0xc2a0, 0x00a0},
{0xc2a2, 0x00a2},
{0xc2a3, 0x00a3},
......
static pg_utf_to_local ULmapWIN1257[ 116 ] = {
static const pg_utf_to_local ULmapWIN1257[ 116 ] = {
{0xc2a0, 0x00a0},
{0xc2a2, 0x00a2},
{0xc2a3, 0x00a3},
......
static pg_utf_to_local ULmapWIN1258[ 119 ] = {
static const pg_utf_to_local ULmapWIN1258[ 119 ] = {
{0xc2a0, 0x00a0},
{0xc2a1, 0x00a1},
{0xc2a2, 0x00a2},
......
static pg_utf_to_local ULmapWIN866[ 128 ] = {
static const pg_utf_to_local ULmapWIN866[ 128 ] = {
{0xc2a0, 0x00ff},
{0xc2a4, 0x00fd},
{0xc2b0, 0x00f8},
......
static pg_utf_to_local ULmapWIN874[ 97 ] = {
static const pg_utf_to_local ULmapWIN874[ 97 ] = {
{0xc2a0, 0x00a0},
{0xe0b881, 0x00a1},
{0xe0b882, 0x00a2},
......
static pg_local_to_utf LUmapWIN1250[ 123 ] = {
static const pg_local_to_utf LUmapWIN1250[ 123 ] = {
{0x0080, 0xe282ac},
{0x0082, 0xe2809a},
{0x0084, 0xe2809e},
......
static pg_local_to_utf LUmapWIN1251[ 127 ] = {
static const pg_local_to_utf LUmapWIN1251[ 127 ] = {
{0x0080, 0xd082},
{0x0081, 0xd083},
{0x0082, 0xe2809a},
......
static pg_local_to_utf LUmapWIN1252[ 123 ] = {
static const pg_local_to_utf LUmapWIN1252[ 123 ] = {
{0x0080, 0xe282ac},
{0x0082, 0xe2809a},
{0x0083, 0xc692},
......
static pg_local_to_utf LUmapWIN1253[ 111 ] = {
static const pg_local_to_utf LUmapWIN1253[ 111 ] = {
{0x0080, 0xe282ac},
{0x0082, 0xe2809a},
{0x0083, 0xc692},
......
static pg_local_to_utf LUmapWIN1254[ 121 ] = {
static const pg_local_to_utf LUmapWIN1254[ 121 ] = {
{0x0080, 0xe282ac},
{0x0082, 0xe2809a},
{0x0083, 0xc692},
......
static pg_local_to_utf LUmapWIN1255[ 105 ] = {
static const pg_local_to_utf LUmapWIN1255[ 105 ] = {
{0x0080, 0xe282ac},
{0x0082, 0xe2809a},
{0x0083, 0xc692},
......
static pg_local_to_utf LUmapWIN1256[ 128 ] = {
static const pg_local_to_utf LUmapWIN1256[ 128 ] = {
{0x0080, 0xe282ac},
{0x0081, 0xd9be},
{0x0082, 0xe2809a},
......
static pg_local_to_utf LUmapWIN1257[ 116 ] = {
static const pg_local_to_utf LUmapWIN1257[ 116 ] = {
{0x0080, 0xe282ac},
{0x0082, 0xe2809a},
{0x0084, 0xe2809e},
......
static pg_local_to_utf LUmapWIN1258[ 119 ] = {
static const pg_local_to_utf LUmapWIN1258[ 119 ] = {
{0x0080, 0xe282ac},
{0x0082, 0xe2809a},
{0x0083, 0xc692},
......
static pg_local_to_utf LUmapWIN866[ 128 ] = {
static const pg_local_to_utf LUmapWIN866[ 128 ] = {
{0x0080, 0xd090},
{0x0081, 0xd091},
{0x0082, 0xd092},
......
static pg_local_to_utf LUmapWIN874[ 97 ] = {
static const pg_local_to_utf LUmapWIN874[ 97 ] = {
{0x0080, 0xe282ac},
{0x0085, 0xe280a6},
{0x0091, 0xe28098},
......
......@@ -302,47 +302,62 @@ compare4(const void *p1, const void *p2)
}
/*
* convert 32bit wide character to mutibye stream pointed to by iso
* store 32bit character representation into multibyte stream
*/
static unsigned char *
set_iso_code(unsigned char *iso, uint32 code)
static inline unsigned char *
store_coded_char(unsigned char *dest, uint32 code)
{
if (code & 0xff000000)
*iso++ = code >> 24;
*dest++ = code >> 24;
if (code & 0x00ff0000)
*iso++ = (code & 0x00ff0000) >> 16;
*dest++ = code >> 16;
if (code & 0x0000ff00)
*iso++ = (code & 0x0000ff00) >> 8;
*dest++ = code >> 8;
if (code & 0x000000ff)
*iso++ = code & 0x000000ff;
return iso;
*dest++ = code;
return dest;
}
/*
* UTF8 ---> local code
*
* utf: input UTF8 string (need not be null-terminated).
* utf: input string in UTF8 encoding (need not be null-terminated)
* len: length of input string (in bytes)
* iso: pointer to the output area (must be large enough!)
* map: the conversion map.
* cmap: the conversion map for combined characters.
* (optional)
* size1: the size of the conversion map.
* size2: the size of the conversion map for combined characters
* (optional)
* encoding: the PG identifier for the local encoding.
* len: length of input string.
(output string will be null-terminated)
* map: conversion map for single characters
* mapsize: number of entries in the conversion map
* cmap: conversion map for combined characters
* (optional, pass NULL if none)
* cmapsize: number of entries in the conversion map for combined characters
* (optional, pass 0 if none)
* conv_func: algorithmic encoding conversion function
* (optional, pass NULL if none)
* encoding: PG identifier for the local encoding
*
* For each character, the cmap (if provided) is consulted first; if no match,
* the map is consulted next; if still no match, the conv_func (if provided)
* is applied. An error is raised if no match is found.
*
* See pg_wchar.h for more details about the data structures used here.
*/
void
UtfToLocal(const unsigned char *utf, unsigned char *iso,
const pg_utf_to_local *map, const pg_utf_to_local_combined *cmap,
int size1, int size2, int encoding, int len)
UtfToLocal(const unsigned char *utf, int len,
unsigned char *iso,
const pg_utf_to_local *map, int mapsize,
const pg_utf_to_local_combined *cmap, int cmapsize,
utf_local_conversion_func conv_func,
int encoding)
{
uint32 iutf;
uint32 cutf[2];
uint32 code;
pg_utf_to_local *p;
pg_utf_to_local_combined *cp;
int l;
const pg_utf_to_local *p;
const pg_utf_to_local_combined *cp;
if (!PG_VALID_ENCODING(encoding))
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("invalid encoding number: %d", encoding)));
for (; len > 0; len -= l)
{
......@@ -351,7 +366,6 @@ UtfToLocal(const unsigned char *utf, unsigned char *iso,
break;
l = pg_utf_mblen(utf);
if (len < l)
break;
......@@ -360,11 +374,13 @@ UtfToLocal(const unsigned char *utf, unsigned char *iso,
if (l == 1)
{
/* ASCII case is easy */
/* ASCII case is easy, assume it's one-to-one conversion */
*iso++ = *utf++;
continue;
}
else if (l == 2)
/* collect coded char of length l */
if (l == 2)
{
iutf = *utf++ << 8;
iutf |= *utf++;
......@@ -388,15 +404,14 @@ UtfToLocal(const unsigned char *utf, unsigned char *iso,
iutf = 0; /* keep compiler quiet */
}
/*
* first, try with combined map if possible
*/
/* First, try with combined map if possible */
if (cmap && len > l)
{
const unsigned char *utf_save = utf;
int len_save = len;
int l_save = l;
/* collect next character, same as above */
len -= l;
l = pg_utf_mblen(utf);
......@@ -406,83 +421,83 @@ UtfToLocal(const unsigned char *utf, unsigned char *iso,
if (!pg_utf8_islegal(utf, l))
break;
cutf[0] = iutf;
if (l == 1)
/* We assume ASCII character cannot be in combined map */
if (l > 1)
{
if (len_save > 1)
{
p = bsearch(&cutf[0], map, size1,
sizeof(pg_utf_to_local), compare1);
if (p == NULL)
report_untranslatable_char(PG_UTF8, encoding,
(const char *) (utf_save - l_save), len_save);
iso = set_iso_code(iso, p->code);
}
uint32 iutf2;
uint32 cutf[2];
/* ASCII case is easy */
*iso++ = *utf++;
continue;
}
else if (l == 2)
if (l == 2)
{
iutf = *utf++ << 8;
iutf |= *utf++;
iutf2 = *utf++ << 8;
iutf2 |= *utf++;
}
else if (l == 3)
{
iutf = *utf++ << 16;
iutf |= *utf++ << 8;
iutf |= *utf++;
iutf2 = *utf++ << 16;
iutf2 |= *utf++ << 8;
iutf2 |= *utf++;
}
else if (l == 4)
{
iutf = *utf++ << 24;
iutf |= *utf++ << 16;
iutf |= *utf++ << 8;
iutf |= *utf++;
iutf2 = *utf++ << 24;
iutf2 |= *utf++ << 16;
iutf2 |= *utf++ << 8;
iutf2 |= *utf++;
}
else
{
elog(ERROR, "unsupported character length %d", l);
iutf = 0; /* keep compiler quiet */
iutf2 = 0; /* keep compiler quiet */
}
cutf[1] = iutf;
cp = bsearch(cutf, cmap, size2,
cutf[0] = iutf;
cutf[1] = iutf2;
cp = bsearch(cutf, cmap, cmapsize,
sizeof(pg_utf_to_local_combined), compare3);
if (cp)
code = cp->code;
else
{
/* not found in combined map. try with ordinary map */
p = bsearch(&cutf[0], map, size1,
sizeof(pg_utf_to_local), compare1);
if (p == NULL)
report_untranslatable_char(PG_UTF8, encoding,
(const char *) (utf_save - l_save), len_save);
iso = set_iso_code(iso, p->code);
iso = store_coded_char(iso, cp->code);
continue;
}
}
p = bsearch(&cutf[1], map, size1,
sizeof(pg_utf_to_local), compare1);
if (p == NULL)
report_untranslatable_char(PG_UTF8, encoding,
(const char *) (utf - l), len);
code = p->code;
/* fail, so back up to reprocess second character next time */
utf = utf_save;
len = len_save;
l = l_save;
}
/* Now check ordinary map */
p = bsearch(&iutf, map, mapsize,
sizeof(pg_utf_to_local), compare1);
if (p)
{
iso = store_coded_char(iso, p->code);
continue;
}
else /* no cmap or no remaining data */
/* if there's a conversion function, try that */
if (conv_func)
{
p = bsearch(&iutf, map, size1,
sizeof(pg_utf_to_local), compare1);
if (p == NULL)
uint32 converted = (*conv_func) (iutf);
if (converted)
{
iso = store_coded_char(iso, converted);
continue;
}
}
/* failed to translate this character */
report_untranslatable_char(PG_UTF8, encoding,
(const char *) (utf - l), len);
code = p->code;
}
iso = set_iso_code(iso, code);
}
/* if we broke out of loop early, must be invalid input */
if (len > 0)
report_invalid_encoding(PG_UTF8, (const char *) utf, len);
......@@ -492,26 +507,38 @@ UtfToLocal(const unsigned char *utf, unsigned char *iso,
/*
* local code ---> UTF8
*
* iso: input local string (need not be null-terminated).
* iso: input string in local encoding (need not be null-terminated)
* len: length of input string (in bytes)
* utf: pointer to the output area (must be large enough!)
* map: the conversion map.
* cmap: the conversion map for combined characters.
* (optional)
* size1: the size of the conversion map.
* size2: the size of the conversion map for combined characters
* (optional)
* encoding: the PG identifier for the local encoding.
* len: length of input string.
(output string will be null-terminated)
* map: conversion map for single characters
* mapsize: number of entries in the conversion map
* cmap: conversion map for combined characters
* (optional, pass NULL if none)
* cmapsize: number of entries in the conversion map for combined characters
* (optional, pass 0 if none)
* conv_func: algorithmic encoding conversion function
* (optional, pass NULL if none)
* encoding: PG identifier for the local encoding
*
* For each character, the map is consulted first; if no match, the cmap
* (if provided) is consulted next; if still no match, the conv_func
* (if provided) is applied. An error is raised if no match is found.
*
* See pg_wchar.h for more details about the data structures used here.
*/
void
LocalToUtf(const unsigned char *iso, unsigned char *utf,
const pg_local_to_utf *map, const pg_local_to_utf_combined *cmap,
int size1, int size2, int encoding, int len)
LocalToUtf(const unsigned char *iso, int len,
unsigned char *utf,
const pg_local_to_utf *map, int mapsize,
const pg_local_to_utf_combined *cmap, int cmapsize,
utf_local_conversion_func conv_func,
int encoding)
{
unsigned int iiso;
uint32 iiso;
int l;
pg_local_to_utf *p;
pg_local_to_utf_combined *cp;
const pg_local_to_utf *p;
const pg_local_to_utf_combined *cp;
if (!PG_VALID_ENCODING(encoding))
ereport(ERROR,
......@@ -526,7 +553,7 @@ LocalToUtf(const unsigned char *iso, unsigned char *utf,
if (!IS_HIGHBIT_SET(*iso))
{
/* ASCII case is easy */
/* ASCII case is easy, assume it's one-to-one conversion */
*utf++ = *iso++;
l = 1;
continue;
......@@ -536,6 +563,7 @@ LocalToUtf(const unsigned char *iso, unsigned char *utf,
if (l < 0)
break;
/* collect coded char of length l */
if (l == 1)
iiso = *iso++;
else if (l == 2)
......@@ -562,61 +590,48 @@ LocalToUtf(const unsigned char *iso, unsigned char *utf,
iiso = 0; /* keep compiler quiet */
}
p = bsearch(&iiso, map, size1,
/* First check ordinary map */
p = bsearch(&iiso, map, mapsize,
sizeof(pg_local_to_utf), compare2);
if (p == NULL)
if (p)
{
/*
* not found in the ordinary map. if there's a combined character
* map, try with it
*/
utf = store_coded_char(utf, p->utf);
continue;
}
/* If there's a combined character map, try that */
if (cmap)
{
cp = bsearch(&iiso, cmap, size2,
cp = bsearch(&iiso, cmap, cmapsize,
sizeof(pg_local_to_utf_combined), compare4);
if (cp)
{
if (cp->utf1 & 0xff000000)
*utf++ = cp->utf1 >> 24;
if (cp->utf1 & 0x00ff0000)
*utf++ = (cp->utf1 & 0x00ff0000) >> 16;
if (cp->utf1 & 0x0000ff00)
*utf++ = (cp->utf1 & 0x0000ff00) >> 8;
if (cp->utf1 & 0x000000ff)
*utf++ = cp->utf1 & 0x000000ff;
if (cp->utf2 & 0xff000000)
*utf++ = cp->utf2 >> 24;
if (cp->utf2 & 0x00ff0000)
*utf++ = (cp->utf2 & 0x00ff0000) >> 16;
if (cp->utf2 & 0x0000ff00)
*utf++ = (cp->utf2 & 0x0000ff00) >> 8;
if (cp->utf2 & 0x000000ff)
*utf++ = cp->utf2 & 0x000000ff;
utf = store_coded_char(utf, cp->utf1);
utf = store_coded_char(utf, cp->utf2);
continue;
}
}
report_untranslatable_char(encoding, PG_UTF8,
(const char *) (iso - l), len);
/* if there's a conversion function, try that */
if (conv_func)
{
uint32 converted = (*conv_func) (iiso);
}
else
if (converted)
{
if (p->utf & 0xff000000)
*utf++ = p->utf >> 24;
if (p->utf & 0x00ff0000)
*utf++ = (p->utf & 0x00ff0000) >> 16;
if (p->utf & 0x0000ff00)
*utf++ = (p->utf & 0x0000ff00) >> 8;
if (p->utf & 0x000000ff)
*utf++ = p->utf & 0x000000ff;
utf = store_coded_char(utf, converted);
continue;
}
}
/* failed to translate this character */
report_untranslatable_char(encoding, PG_UTF8,
(const char *) (iso - l), len);
}
/* if we broke out of loop early, must be invalid input */
if (len > 0)
report_invalid_encoding(encoding, (const char *) iso, len);
......
......@@ -22,7 +22,7 @@ typedef struct
} codes_t;
/* map Big5 Level 1 to CNS 11643-1992 Plane 1 */
static codes_t big5Level1ToCnsPlane1[25] = { /* range */
static const codes_t big5Level1ToCnsPlane1[25] = { /* range */
{0xA140, 0x2121},
{0xA1F6, 0x2258},
{0xA1F7, 0x2257},
......@@ -51,7 +51,7 @@ static codes_t big5Level1ToCnsPlane1[25] = { /* range */
};
/* map CNS 11643-1992 Plane 1 to Big5 Level 1 */
static codes_t cnsPlane1ToBig5Level1[26] = { /* range */
static const codes_t cnsPlane1ToBig5Level1[26] = { /* range */
{0x2121, 0xA140},
{0x2257, 0xA1F7},
{0x2258, 0xA1F6},
......@@ -81,7 +81,7 @@ static codes_t cnsPlane1ToBig5Level1[26] = { /* range */
};
/* map Big5 Level 2 to CNS 11643-1992 Plane 2 */
static codes_t big5Level2ToCnsPlane2[48] = { /* range */
static const codes_t big5Level2ToCnsPlane2[48] = { /* range */
{0xC940, 0x2121},
{0xc94a, 0x0000},
{0xC94B, 0x212B},
......@@ -133,7 +133,7 @@ static codes_t big5Level2ToCnsPlane2[48] = { /* range */
};
/* map CNS 11643-1992 Plane 2 to Big5 Level 2 */
static codes_t cnsPlane2ToBig5Level2[49] = { /* range */
static const codes_t cnsPlane2ToBig5Level2[49] = { /* range */
{0x2121, 0xC940},
{0x212B, 0xC94B},
{0x214C, 0xC9BE},
......@@ -186,7 +186,7 @@ static codes_t cnsPlane2ToBig5Level2[49] = { /* range */
};
/* Big Five Level 1 Correspondence to CNS 11643-1992 Plane 4 */
static unsigned short b1c4[][2] = {
static const unsigned short b1c4[][2] = {
{0xC879, 0x2123},
{0xC87B, 0x2124},
{0xC87D, 0x212A},
......@@ -194,7 +194,7 @@ static unsigned short b1c4[][2] = {
};
/* Big Five Level 2 Correspondence to CNS 11643-1992 Plane 3 */
static unsigned short b2c3[][2] = {
static const unsigned short b2c3[][2] = {
{0xF9D6, 0x4337},
{0xF9D7, 0x4F50},
{0xF9D8, 0x444E},
......@@ -205,7 +205,7 @@ static unsigned short b2c3[][2] = {
};
static unsigned short BinarySearchRange
(codes_t *array, int high, unsigned short code)
(const codes_t *array, int high, unsigned short code)
{
int low,
mid,
......
......@@ -44,8 +44,11 @@ big5_to_utf8(PG_FUNCTION_ARGS)
CHECK_ENCODING_CONVERSION_ARGS(PG_BIG5, PG_UTF8);
LocalToUtf(src, dest, LUmapBIG5, NULL,
sizeof(LUmapBIG5) / sizeof(pg_local_to_utf), 0, PG_BIG5, len);
LocalToUtf(src, len, dest,
LUmapBIG5, lengthof(LUmapBIG5),
NULL, 0,
NULL,
PG_BIG5);
PG_RETURN_VOID();
}
......@@ -59,8 +62,11 @@ utf8_to_big5(PG_FUNCTION_ARGS)
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_BIG5);
UtfToLocal(src, dest, ULmapBIG5, NULL,
sizeof(ULmapBIG5) / sizeof(pg_utf_to_local), 0, PG_BIG5, len);
UtfToLocal(src, len, dest,
ULmapBIG5, lengthof(ULmapBIG5),
NULL, 0,
NULL,
PG_BIG5);
PG_RETURN_VOID();
}
......@@ -53,8 +53,11 @@ utf8_to_koi8r(PG_FUNCTION_ARGS)
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_KOI8R);
UtfToLocal(src, dest, ULmapKOI8R, NULL,
sizeof(ULmapKOI8R) / sizeof(pg_utf_to_local), 0, PG_KOI8R, len);
UtfToLocal(src, len, dest,
ULmapKOI8R, lengthof(ULmapKOI8R),
NULL, 0,
NULL,
PG_KOI8R);
PG_RETURN_VOID();
}
......@@ -68,8 +71,11 @@ koi8r_to_utf8(PG_FUNCTION_ARGS)
CHECK_ENCODING_CONVERSION_ARGS(PG_KOI8R, PG_UTF8);
LocalToUtf(src, dest, LUmapKOI8R, NULL,
sizeof(LUmapKOI8R) / sizeof(pg_local_to_utf), 0, PG_KOI8R, len);
LocalToUtf(src, len, dest,
LUmapKOI8R, lengthof(LUmapKOI8R),
NULL, 0,
NULL,
PG_KOI8R);
PG_RETURN_VOID();
}
......@@ -83,8 +89,11 @@ utf8_to_koi8u(PG_FUNCTION_ARGS)
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_KOI8U);
UtfToLocal(src, dest, ULmapKOI8U, NULL,
sizeof(ULmapKOI8U) / sizeof(pg_utf_to_local), 0, PG_KOI8U, len);
UtfToLocal(src, len, dest,
ULmapKOI8U, lengthof(ULmapKOI8U),
NULL, 0,
NULL,
PG_KOI8U);
PG_RETURN_VOID();
}
......@@ -98,8 +107,11 @@ koi8u_to_utf8(PG_FUNCTION_ARGS)
CHECK_ENCODING_CONVERSION_ARGS(PG_KOI8U, PG_UTF8);
LocalToUtf(src, dest, LUmapKOI8U, NULL,
sizeof(LUmapKOI8U) / sizeof(pg_local_to_utf), 0, PG_KOI8U, len);
LocalToUtf(src, len, dest,
LUmapKOI8U, lengthof(LUmapKOI8U),
NULL, 0,
NULL,
PG_KOI8U);
PG_RETURN_VOID();
}
......@@ -46,10 +46,11 @@ euc_jis_2004_to_utf8(PG_FUNCTION_ARGS)
CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_JIS_2004, PG_UTF8);
LocalToUtf(src, dest, LUmapEUC_JIS_2004, LUmapEUC_JIS_2004_combined,
sizeof(LUmapEUC_JIS_2004) / sizeof(pg_local_to_utf),
sizeof(LUmapEUC_JIS_2004_combined) / sizeof(pg_local_to_utf_combined),
PG_EUC_JIS_2004, len);
LocalToUtf(src, len, dest,
LUmapEUC_JIS_2004, lengthof(LUmapEUC_JIS_2004),
LUmapEUC_JIS_2004_combined, lengthof(LUmapEUC_JIS_2004_combined),
NULL,
PG_EUC_JIS_2004);
PG_RETURN_VOID();
}
......@@ -63,10 +64,11 @@ utf8_to_euc_jis_2004(PG_FUNCTION_ARGS)
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_EUC_JIS_2004);
UtfToLocal(src, dest, ULmapEUC_JIS_2004, ULmapEUC_JIS_2004_combined,
sizeof(ULmapEUC_JIS_2004) / sizeof(pg_utf_to_local),
sizeof(ULmapEUC_JIS_2004_combined) / sizeof(pg_utf_to_local_combined),
PG_EUC_JIS_2004, len);
UtfToLocal(src, len, dest,
ULmapEUC_JIS_2004, lengthof(ULmapEUC_JIS_2004),
ULmapEUC_JIS_2004_combined, lengthof(ULmapEUC_JIS_2004_combined),
NULL,
PG_EUC_JIS_2004);
PG_RETURN_VOID();
}
......@@ -44,8 +44,11 @@ euc_cn_to_utf8(PG_FUNCTION_ARGS)
CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_CN, PG_UTF8);
LocalToUtf(src, dest, LUmapEUC_CN, NULL,
sizeof(LUmapEUC_CN) / sizeof(pg_local_to_utf), 0, PG_EUC_CN, len);
LocalToUtf(src, len, dest,
LUmapEUC_CN, lengthof(LUmapEUC_CN),
NULL, 0,
NULL,
PG_EUC_CN);
PG_RETURN_VOID();
}
......@@ -59,8 +62,11 @@ utf8_to_euc_cn(PG_FUNCTION_ARGS)
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_EUC_CN);
UtfToLocal(src, dest, ULmapEUC_CN, NULL,
sizeof(ULmapEUC_CN) / sizeof(pg_utf_to_local), 0, PG_EUC_CN, len);
UtfToLocal(src, len, dest,
ULmapEUC_CN, lengthof(ULmapEUC_CN),
NULL, 0,
NULL,
PG_EUC_CN);
PG_RETURN_VOID();
}
......@@ -44,8 +44,11 @@ euc_jp_to_utf8(PG_FUNCTION_ARGS)
CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_JP, PG_UTF8);
LocalToUtf(src, dest, LUmapEUC_JP, NULL,
sizeof(LUmapEUC_JP) / sizeof(pg_local_to_utf), 0, PG_EUC_JP, len);
LocalToUtf(src, len, dest,
LUmapEUC_JP, lengthof(LUmapEUC_JP),
NULL, 0,
NULL,
PG_EUC_JP);
PG_RETURN_VOID();
}
......@@ -59,8 +62,11 @@ utf8_to_euc_jp(PG_FUNCTION_ARGS)
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_EUC_JP);
UtfToLocal(src, dest, ULmapEUC_JP, NULL,
sizeof(ULmapEUC_JP) / sizeof(pg_utf_to_local), 0, PG_EUC_JP, len);
UtfToLocal(src, len, dest,
ULmapEUC_JP, lengthof(ULmapEUC_JP),
NULL, 0,
NULL,
PG_EUC_JP);
PG_RETURN_VOID();
}
......@@ -44,8 +44,11 @@ euc_kr_to_utf8(PG_FUNCTION_ARGS)
CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_KR, PG_UTF8);
LocalToUtf(src, dest, LUmapEUC_KR, NULL,
sizeof(LUmapEUC_KR) / sizeof(pg_local_to_utf), 0, PG_EUC_KR, len);
LocalToUtf(src, len, dest,
LUmapEUC_KR, lengthof(LUmapEUC_KR),
NULL, 0,
NULL,
PG_EUC_KR);
PG_RETURN_VOID();
}
......@@ -59,8 +62,11 @@ utf8_to_euc_kr(PG_FUNCTION_ARGS)
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_EUC_KR);
UtfToLocal(src, dest, ULmapEUC_KR, NULL,
sizeof(ULmapEUC_KR) / sizeof(pg_utf_to_local), 0, PG_EUC_KR, len);
UtfToLocal(src, len, dest,
ULmapEUC_KR, lengthof(ULmapEUC_KR),
NULL, 0,
NULL,
PG_EUC_KR);
PG_RETURN_VOID();
}
......@@ -44,8 +44,11 @@ euc_tw_to_utf8(PG_FUNCTION_ARGS)
CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_TW, PG_UTF8);
LocalToUtf(src, dest, LUmapEUC_TW, NULL,
sizeof(LUmapEUC_TW) / sizeof(pg_local_to_utf), 0, PG_EUC_TW, len);
LocalToUtf(src, len, dest,
LUmapEUC_TW, lengthof(LUmapEUC_TW),
NULL, 0,
NULL,
PG_EUC_TW);
PG_RETURN_VOID();
}
......@@ -59,8 +62,11 @@ utf8_to_euc_tw(PG_FUNCTION_ARGS)
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_EUC_TW);
UtfToLocal(src, dest, ULmapEUC_TW, NULL,
sizeof(ULmapEUC_TW) / sizeof(pg_utf_to_local), 0, PG_EUC_TW, len);
UtfToLocal(src, len, dest,
ULmapEUC_TW, lengthof(ULmapEUC_TW),
NULL, 0,
NULL,
PG_EUC_TW);
PG_RETURN_VOID();
}
......@@ -44,8 +44,11 @@ gb18030_to_utf8(PG_FUNCTION_ARGS)
CHECK_ENCODING_CONVERSION_ARGS(PG_GB18030, PG_UTF8);
LocalToUtf(src, dest, LUmapGB18030, NULL,
sizeof(LUmapGB18030) / sizeof(pg_local_to_utf), 0, PG_GB18030, len);
LocalToUtf(src, len, dest,
LUmapGB18030, lengthof(LUmapGB18030),
NULL, 0,
NULL,
PG_GB18030);
PG_RETURN_VOID();
}
......@@ -59,8 +62,11 @@ utf8_to_gb18030(PG_FUNCTION_ARGS)
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_GB18030);
UtfToLocal(src, dest, ULmapGB18030, NULL,
sizeof(ULmapGB18030) / sizeof(pg_utf_to_local), 0, PG_GB18030, len);
UtfToLocal(src, len, dest,
ULmapGB18030, lengthof(ULmapGB18030),
NULL, 0,
NULL,
PG_GB18030);
PG_RETURN_VOID();
}
......@@ -44,8 +44,11 @@ gbk_to_utf8(PG_FUNCTION_ARGS)
CHECK_ENCODING_CONVERSION_ARGS(PG_GBK, PG_UTF8);
LocalToUtf(src, dest, LUmapGBK, NULL,
sizeof(LUmapGBK) / sizeof(pg_local_to_utf), 0, PG_GBK, len);
LocalToUtf(src, len, dest,
LUmapGBK, lengthof(LUmapGBK),
NULL, 0,
NULL,
PG_GBK);
PG_RETURN_VOID();
}
......@@ -59,8 +62,11 @@ utf8_to_gbk(PG_FUNCTION_ARGS)
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_GBK);
UtfToLocal(src, dest, ULmapGBK, NULL,
sizeof(ULmapGBK) / sizeof(pg_utf_to_local), 0, PG_GBK, len);
UtfToLocal(src, len, dest,
ULmapGBK, lengthof(ULmapGBK),
NULL, 0,
NULL,
PG_GBK);
PG_RETURN_VOID();
}
......@@ -63,52 +63,52 @@ extern Datum utf8_to_iso8859(PG_FUNCTION_ARGS);
typedef struct
{
pg_enc encoding;
pg_local_to_utf *map1; /* to UTF8 map name */
pg_utf_to_local *map2; /* from UTF8 map name */
const pg_local_to_utf *map1; /* to UTF8 map name */
const pg_utf_to_local *map2; /* from UTF8 map name */
int size1; /* size of map1 */
int size2; /* size of map2 */
} pg_conv_map;
static pg_conv_map maps[] = {
static const pg_conv_map maps[] = {
{PG_LATIN2, LUmapISO8859_2, ULmapISO8859_2,
sizeof(LUmapISO8859_2) / sizeof(pg_local_to_utf),
sizeof(ULmapISO8859_2) / sizeof(pg_utf_to_local)}, /* ISO-8859-2 Latin 2 */
lengthof(LUmapISO8859_2),
lengthof(ULmapISO8859_2)}, /* ISO-8859-2 Latin 2 */
{PG_LATIN3, LUmapISO8859_3, ULmapISO8859_3,
sizeof(LUmapISO8859_3) / sizeof(pg_local_to_utf),
sizeof(ULmapISO8859_3) / sizeof(pg_utf_to_local)}, /* ISO-8859-3 Latin 3 */
lengthof(LUmapISO8859_3),
lengthof(ULmapISO8859_3)}, /* ISO-8859-3 Latin 3 */
{PG_LATIN4, LUmapISO8859_4, ULmapISO8859_4,
sizeof(LUmapISO8859_4) / sizeof(pg_local_to_utf),
sizeof(ULmapISO8859_4) / sizeof(pg_utf_to_local)}, /* ISO-8859-4 Latin 4 */
lengthof(LUmapISO8859_4),
lengthof(ULmapISO8859_4)}, /* ISO-8859-4 Latin 4 */
{PG_LATIN5, LUmapISO8859_9, ULmapISO8859_9,
sizeof(LUmapISO8859_9) / sizeof(pg_local_to_utf),
sizeof(ULmapISO8859_9) / sizeof(pg_utf_to_local)}, /* ISO-8859-9 Latin 5 */
lengthof(LUmapISO8859_9),
lengthof(ULmapISO8859_9)}, /* ISO-8859-9 Latin 5 */
{PG_LATIN6, LUmapISO8859_10, ULmapISO8859_10,
sizeof(LUmapISO8859_10) / sizeof(pg_local_to_utf),
sizeof(ULmapISO8859_10) / sizeof(pg_utf_to_local)}, /* ISO-8859-10 Latin 6 */
lengthof(LUmapISO8859_10),
lengthof(ULmapISO8859_10)}, /* ISO-8859-10 Latin 6 */
{PG_LATIN7, LUmapISO8859_13, ULmapISO8859_13,
sizeof(LUmapISO8859_13) / sizeof(pg_local_to_utf),
sizeof(ULmapISO8859_13) / sizeof(pg_utf_to_local)}, /* ISO-8859-13 Latin 7 */
lengthof(LUmapISO8859_13),
lengthof(ULmapISO8859_13)}, /* ISO-8859-13 Latin 7 */
{PG_LATIN8, LUmapISO8859_14, ULmapISO8859_14,
sizeof(LUmapISO8859_14) / sizeof(pg_local_to_utf),
sizeof(ULmapISO8859_14) / sizeof(pg_utf_to_local)}, /* ISO-8859-14 Latin 8 */
lengthof(LUmapISO8859_14),
lengthof(ULmapISO8859_14)}, /* ISO-8859-14 Latin 8 */
{PG_LATIN9, LUmapISO8859_15, ULmapISO8859_15,
sizeof(LUmapISO8859_15) / sizeof(pg_local_to_utf),
sizeof(ULmapISO8859_15) / sizeof(pg_utf_to_local)}, /* ISO-8859-15 Latin 9 */
lengthof(LUmapISO8859_15),
lengthof(ULmapISO8859_15)}, /* ISO-8859-15 Latin 9 */
{PG_LATIN10, LUmapISO8859_16, ULmapISO8859_16,
sizeof(LUmapISO8859_16) / sizeof(pg_local_to_utf),
sizeof(ULmapISO8859_16) / sizeof(pg_utf_to_local)}, /* ISO-8859-16 Latin 10 */
lengthof(LUmapISO8859_16),
lengthof(ULmapISO8859_16)}, /* ISO-8859-16 Latin 10 */
{PG_ISO_8859_5, LUmapISO8859_5, ULmapISO8859_5,
sizeof(LUmapISO8859_5) / sizeof(pg_local_to_utf),
sizeof(ULmapISO8859_5) / sizeof(pg_utf_to_local)}, /* ISO-8859-5 */
lengthof(LUmapISO8859_5),
lengthof(ULmapISO8859_5)}, /* ISO-8859-5 */
{PG_ISO_8859_6, LUmapISO8859_6, ULmapISO8859_6,
sizeof(LUmapISO8859_6) / sizeof(pg_local_to_utf),
sizeof(ULmapISO8859_6) / sizeof(pg_utf_to_local)}, /* ISO-8859-6 */
lengthof(LUmapISO8859_6),
lengthof(ULmapISO8859_6)}, /* ISO-8859-6 */
{PG_ISO_8859_7, LUmapISO8859_7, ULmapISO8859_7,
sizeof(LUmapISO8859_7) / sizeof(pg_local_to_utf),
sizeof(ULmapISO8859_7) / sizeof(pg_utf_to_local)}, /* ISO-8859-7 */
lengthof(LUmapISO8859_7),
lengthof(ULmapISO8859_7)}, /* ISO-8859-7 */
{PG_ISO_8859_8, LUmapISO8859_8, ULmapISO8859_8,
sizeof(LUmapISO8859_8) / sizeof(pg_local_to_utf),
sizeof(ULmapISO8859_8) / sizeof(pg_utf_to_local)}, /* ISO-8859-8 */
lengthof(LUmapISO8859_8),
lengthof(ULmapISO8859_8)}, /* ISO-8859-8 */
};
Datum
......@@ -122,18 +122,23 @@ iso8859_to_utf8(PG_FUNCTION_ARGS)
CHECK_ENCODING_CONVERSION_ARGS(-1, PG_UTF8);
for (i = 0; i < sizeof(maps) / sizeof(pg_conv_map); i++)
for (i = 0; i < lengthof(maps); i++)
{
if (encoding == maps[i].encoding)
{
LocalToUtf(src, dest, maps[i].map1, NULL, maps[i].size1, 0, encoding, len);
LocalToUtf(src, len, dest,
maps[i].map1, maps[i].size1,
NULL, 0,
NULL,
encoding);
PG_RETURN_VOID();
}
}
ereport(ERROR,
(errcode(ERRCODE_INTERNAL_ERROR),
errmsg("unexpected encoding ID %d for ISO 8859 character sets", encoding)));
errmsg("unexpected encoding ID %d for ISO 8859 character sets",
encoding)));
PG_RETURN_VOID();
}
......@@ -149,18 +154,23 @@ utf8_to_iso8859(PG_FUNCTION_ARGS)
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, -1);
for (i = 0; i < sizeof(maps) / sizeof(pg_conv_map); i++)
for (i = 0; i < lengthof(maps); i++)
{
if (encoding == maps[i].encoding)
{
UtfToLocal(src, dest, maps[i].map2, NULL, maps[i].size2, 0, encoding, len);
UtfToLocal(src, len, dest,
maps[i].map2, maps[i].size2,
NULL, 0,
NULL,
encoding);
PG_RETURN_VOID();
}
}
ereport(ERROR,
(errcode(ERRCODE_INTERNAL_ERROR),
errmsg("unexpected encoding ID %d for ISO 8859 character sets", encoding)));
errmsg("unexpected encoding ID %d for ISO 8859 character sets",
encoding)));
PG_RETURN_VOID();
}
......@@ -44,8 +44,11 @@ johab_to_utf8(PG_FUNCTION_ARGS)
CHECK_ENCODING_CONVERSION_ARGS(PG_JOHAB, PG_UTF8);
LocalToUtf(src, dest, LUmapJOHAB, NULL,
sizeof(LUmapJOHAB) / sizeof(pg_local_to_utf), 0, PG_JOHAB, len);
LocalToUtf(src, len, dest,
LUmapJOHAB, lengthof(LUmapJOHAB),
NULL, 0,
NULL,
PG_JOHAB);
PG_RETURN_VOID();
}
......@@ -59,8 +62,11 @@ utf8_to_johab(PG_FUNCTION_ARGS)
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_JOHAB);
UtfToLocal(src, dest, ULmapJOHAB, NULL,
sizeof(ULmapJOHAB) / sizeof(pg_utf_to_local), 0, PG_JOHAB, len);
UtfToLocal(src, len, dest,
ULmapJOHAB, lengthof(ULmapJOHAB),
NULL, 0,
NULL,
PG_JOHAB);
PG_RETURN_VOID();
}
......@@ -44,8 +44,11 @@ sjis_to_utf8(PG_FUNCTION_ARGS)
CHECK_ENCODING_CONVERSION_ARGS(PG_SJIS, PG_UTF8);
LocalToUtf(src, dest, LUmapSJIS, NULL,
sizeof(LUmapSJIS) / sizeof(pg_local_to_utf), 0, PG_SJIS, len);
LocalToUtf(src, len, dest,
LUmapSJIS, lengthof(LUmapSJIS),
NULL, 0,
NULL,
PG_SJIS);
PG_RETURN_VOID();
}
......@@ -59,8 +62,11 @@ utf8_to_sjis(PG_FUNCTION_ARGS)
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_SJIS);
UtfToLocal(src, dest, ULmapSJIS, NULL,
sizeof(ULmapSJIS) / sizeof(pg_utf_to_local), 0, PG_SJIS, len);
UtfToLocal(src, len, dest,
ULmapSJIS, lengthof(ULmapSJIS),
NULL, 0,
NULL,
PG_SJIS);
PG_RETURN_VOID();
}
......@@ -46,10 +46,11 @@ shift_jis_2004_to_utf8(PG_FUNCTION_ARGS)
CHECK_ENCODING_CONVERSION_ARGS(PG_SHIFT_JIS_2004, PG_UTF8);
LocalToUtf(src, dest, LUmapSHIFT_JIS_2004, LUmapSHIFT_JIS_2004_combined,
sizeof(LUmapSHIFT_JIS_2004) / sizeof(pg_local_to_utf),
sizeof(LUmapSHIFT_JIS_2004_combined) / sizeof(pg_local_to_utf_combined),
PG_SHIFT_JIS_2004, len);
LocalToUtf(src, len, dest,
LUmapSHIFT_JIS_2004, lengthof(LUmapSHIFT_JIS_2004),
LUmapSHIFT_JIS_2004_combined, lengthof(LUmapSHIFT_JIS_2004_combined),
NULL,
PG_SHIFT_JIS_2004);
PG_RETURN_VOID();
}
......@@ -63,10 +64,11 @@ utf8_to_shift_jis_2004(PG_FUNCTION_ARGS)
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_SHIFT_JIS_2004);
UtfToLocal(src, dest, ULmapSHIFT_JIS_2004, ULmapSHIFT_JIS_2004_combined,
sizeof(ULmapSHIFT_JIS_2004) / sizeof(pg_utf_to_local),
sizeof(ULmapSHIFT_JIS_2004_combined) / sizeof(pg_utf_to_local_combined),
PG_SHIFT_JIS_2004, len);
UtfToLocal(src, len, dest,
ULmapSHIFT_JIS_2004, lengthof(ULmapSHIFT_JIS_2004),
ULmapSHIFT_JIS_2004_combined, lengthof(ULmapSHIFT_JIS_2004_combined),
NULL,
PG_SHIFT_JIS_2004);
PG_RETURN_VOID();
}
......@@ -44,8 +44,11 @@ uhc_to_utf8(PG_FUNCTION_ARGS)
CHECK_ENCODING_CONVERSION_ARGS(PG_UHC, PG_UTF8);
LocalToUtf(src, dest, LUmapUHC, NULL,
sizeof(LUmapUHC) / sizeof(pg_local_to_utf), 0, PG_UHC, len);
LocalToUtf(src, len, dest,
LUmapUHC, lengthof(LUmapUHC),
NULL, 0,
NULL,
PG_UHC);
PG_RETURN_VOID();
}
......@@ -59,8 +62,11 @@ utf8_to_uhc(PG_FUNCTION_ARGS)
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_UHC);
UtfToLocal(src, dest, ULmapUHC, NULL,
sizeof(ULmapUHC) / sizeof(pg_utf_to_local), 0, PG_UHC, len);
UtfToLocal(src, len, dest,
ULmapUHC, lengthof(ULmapUHC),
NULL, 0,
NULL,
PG_UHC);
PG_RETURN_VOID();
}
......@@ -59,46 +59,46 @@ extern Datum utf8_to_win(PG_FUNCTION_ARGS);
typedef struct
{
pg_enc encoding;
pg_local_to_utf *map1; /* to UTF8 map name */
pg_utf_to_local *map2; /* from UTF8 map name */
const pg_local_to_utf *map1; /* to UTF8 map name */
const pg_utf_to_local *map2; /* from UTF8 map name */
int size1; /* size of map1 */
int size2; /* size of map2 */
} pg_conv_map;
static pg_conv_map maps[] = {
static const pg_conv_map maps[] = {
{PG_WIN866, LUmapWIN866, ULmapWIN866,
sizeof(LUmapWIN866) / sizeof(pg_local_to_utf),
sizeof(ULmapWIN866) / sizeof(pg_utf_to_local)},
lengthof(LUmapWIN866),
lengthof(ULmapWIN866)},
{PG_WIN874, LUmapWIN874, ULmapWIN874,
sizeof(LUmapWIN874) / sizeof(pg_local_to_utf),
sizeof(ULmapWIN874) / sizeof(pg_utf_to_local)},
lengthof(LUmapWIN874),
lengthof(ULmapWIN874)},
{PG_WIN1250, LUmapWIN1250, ULmapWIN1250,
sizeof(LUmapWIN1250) / sizeof(pg_local_to_utf),
sizeof(ULmapWIN1250) / sizeof(pg_utf_to_local)},
lengthof(LUmapWIN1250),
lengthof(ULmapWIN1250)},
{PG_WIN1251, LUmapWIN1251, ULmapWIN1251,
sizeof(LUmapWIN1251) / sizeof(pg_local_to_utf),
sizeof(ULmapWIN1251) / sizeof(pg_utf_to_local)},
lengthof(LUmapWIN1251),
lengthof(ULmapWIN1251)},
{PG_WIN1252, LUmapWIN1252, ULmapWIN1252,
sizeof(LUmapWIN1252) / sizeof(pg_local_to_utf),
sizeof(ULmapWIN1252) / sizeof(pg_utf_to_local)},
lengthof(LUmapWIN1252),
lengthof(ULmapWIN1252)},
{PG_WIN1253, LUmapWIN1253, ULmapWIN1253,
sizeof(LUmapWIN1253) / sizeof(pg_local_to_utf),
sizeof(ULmapWIN1253) / sizeof(pg_utf_to_local)},
lengthof(LUmapWIN1253),
lengthof(ULmapWIN1253)},
{PG_WIN1254, LUmapWIN1254, ULmapWIN1254,
sizeof(LUmapWIN1254) / sizeof(pg_local_to_utf),
sizeof(ULmapWIN1254) / sizeof(pg_utf_to_local)},
lengthof(LUmapWIN1254),
lengthof(ULmapWIN1254)},
{PG_WIN1255, LUmapWIN1255, ULmapWIN1255,
sizeof(LUmapWIN1255) / sizeof(pg_local_to_utf),
sizeof(ULmapWIN1255) / sizeof(pg_utf_to_local)},
lengthof(LUmapWIN1255),
lengthof(ULmapWIN1255)},
{PG_WIN1256, LUmapWIN1256, ULmapWIN1256,
sizeof(LUmapWIN1256) / sizeof(pg_local_to_utf),
sizeof(ULmapWIN1256) / sizeof(pg_utf_to_local)},
lengthof(LUmapWIN1256),
lengthof(ULmapWIN1256)},
{PG_WIN1257, LUmapWIN1257, ULmapWIN1257,
sizeof(LUmapWIN1257) / sizeof(pg_local_to_utf),
sizeof(ULmapWIN1257) / sizeof(pg_utf_to_local)},
lengthof(LUmapWIN1257),
lengthof(ULmapWIN1257)},
{PG_WIN1258, LUmapWIN1258, ULmapWIN1258,
sizeof(LUmapWIN1258) / sizeof(pg_local_to_utf),
sizeof(ULmapWIN1258) / sizeof(pg_utf_to_local)},
lengthof(LUmapWIN1258),
lengthof(ULmapWIN1258)},
};
Datum
......@@ -112,18 +112,23 @@ win_to_utf8(PG_FUNCTION_ARGS)
CHECK_ENCODING_CONVERSION_ARGS(-1, PG_UTF8);
for (i = 0; i < sizeof(maps) / sizeof(pg_conv_map); i++)
for (i = 0; i < lengthof(maps); i++)
{
if (encoding == maps[i].encoding)
{
LocalToUtf(src, dest, maps[i].map1, NULL, maps[i].size1, 0, encoding, len);
LocalToUtf(src, len, dest,
maps[i].map1, maps[i].size1,
NULL, 0,
NULL,
encoding);
PG_RETURN_VOID();
}
}
ereport(ERROR,
(errcode(ERRCODE_INTERNAL_ERROR),
errmsg("unexpected encoding ID %d for WIN character sets", encoding)));
errmsg("unexpected encoding ID %d for WIN character sets",
encoding)));
PG_RETURN_VOID();
}
......@@ -139,18 +144,23 @@ utf8_to_win(PG_FUNCTION_ARGS)
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, -1);
for (i = 0; i < sizeof(maps) / sizeof(pg_conv_map); i++)
for (i = 0; i < lengthof(maps); i++)
{
if (encoding == maps[i].encoding)
{
UtfToLocal(src, dest, maps[i].map2, NULL, maps[i].size2, 0, encoding, len);
UtfToLocal(src, len, dest,
maps[i].map2, maps[i].size2,
NULL, 0,
NULL,
encoding);
PG_RETURN_VOID();
}
}
ereport(ERROR,
(errcode(ERRCODE_INTERNAL_ERROR),
errmsg("unexpected encoding ID %d for WIN character sets", encoding)));
errmsg("unexpected encoding ID %d for WIN character sets",
encoding)));
PG_RETURN_VOID();
}
......@@ -366,9 +366,16 @@ typedef struct
extern const pg_wchar_tbl pg_wchar_table[];
/*
* Data structures for conversions between UTF-8 and other encodings
* (UtfToLocal() and LocalToUtf()). In these data structures, characters of
* either encoding are represented by uint32 words; hence we can only support
* characters up to 4 bytes long. For example, the byte sequence 0xC2 0x89
* would be represented by 0x0000C289, and 0xE8 0xA2 0xB4 by 0x00E8A2B4.
*
* Maps are arrays of these structs, which must be in order by the lookup key
* (so that bsearch() can be used).
*
* UTF-8 to local code conversion map
* Note that we limit the max length of UTF-8 to 4 bytes,
* which is UCS-4 00010000-001FFFFF range.
*/
typedef struct
{
......@@ -386,7 +393,7 @@ typedef struct
} pg_local_to_utf;
/*
* UTF-8 to local code conversion map(combined characters)
* UTF-8 to local code conversion map (for combined characters)
*/
typedef struct
{
......@@ -396,7 +403,7 @@ typedef struct
} pg_utf_to_local_combined;
/*
* local code to UTF-8 conversion map(combined characters)
* local code to UTF-8 conversion map (for combined characters)
*/
typedef struct
{
......@@ -405,6 +412,13 @@ typedef struct
uint32 utf2; /* UTF-8 code 2 */
} pg_local_to_utf_combined;
/*
* callback function for algorithmic encoding conversions (in either direction)
*
* if function returns zero, it does not know how to convert the code
*/
typedef uint32 (*utf_local_conversion_func) (uint32 code);
/*
* Support macro for encoding conversion functions to validate their
* arguments. (This could be made more compact if we included fmgr.h
......@@ -494,13 +508,18 @@ extern char *pg_server_to_any(const char *s, int len, int encoding);
extern unsigned short BIG5toCNS(unsigned short big5, unsigned char *lc);
extern unsigned short CNStoBIG5(unsigned short cns, unsigned char lc);
extern void LocalToUtf(const unsigned char *iso, unsigned char *utf,
const pg_local_to_utf *map, const pg_local_to_utf_combined *cmap,
int size1, int size2, int encoding, int len);
extern void UtfToLocal(const unsigned char *utf, unsigned char *iso,
const pg_utf_to_local *map, const pg_utf_to_local_combined *cmap,
int size1, int size2, int encoding, int len);
extern void UtfToLocal(const unsigned char *utf, int len,
unsigned char *iso,
const pg_utf_to_local *map, int mapsize,
const pg_utf_to_local_combined *cmap, int cmapsize,
utf_local_conversion_func conv_func,
int encoding);
extern void LocalToUtf(const unsigned char *iso, int len,
unsigned char *utf,
const pg_local_to_utf *map, int mapsize,
const pg_local_to_utf_combined *cmap, int cmapsize,
utf_local_conversion_func conv_func,
int encoding);
extern bool pg_verifymbstr(const char *mbstr, int len, bool noError);
extern bool pg_verify_mbstr(int encoding, const char *mbstr, int len,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment