Commit 75c6519f authored by Tatsuo Ishii's avatar Tatsuo Ishii

Add new encoding EUC_JIS_2004 and SHIFT_JIS_2004,

along with new conversions among EUC_JIS_2004, SHIFT_JIS_2004 and UTF-8.
catalog version has been bump up.
parent 7b4726e6
<!-- $PostgreSQL: pgsql/doc/src/sgml/charset.sgml,v 2.81 2007/01/31 20:56:16 momjian Exp $ -->
<!-- $PostgreSQL: pgsql/doc/src/sgml/charset.sgml,v 2.82 2007/03/25 11:56:01 ishii Exp $ -->
<chapter id="charset">
<title>Localization</>
......@@ -364,6 +364,14 @@ initdb --locale=sv_SE
<entry>1-3</entry>
<entry></entry>
</row>
<row>
<entry><literal>EUC_JIS_2004</literal></entry>
<entry>Extended UNIX Code-JP, JIS X 0213</entry>
<entry>Japanese</entry>
<entry>Yes</entry>
<entry>1-3</entry>
<entry></entry>
</row>
<row>
<entry><literal>EUC_KR</literal></entry>
<entry>Extended UNIX Code-KR</entry>
......@@ -540,6 +548,14 @@ initdb --locale=sv_SE
<entry>1-2</entry>
<entry><literal>Mskanji</>, <literal>ShiftJIS</>, <literal>WIN932</>, <literal>Windows932</></entry>
</row>
<row>
<entry><literal>SHIFT_JIS_2004</literal></entry>
<entry>Shift JIS, JIS X 0213</entry>
<entry>Japanese</entry>
<entry>No</entry>
<entry>1-2</entry>
<entry></entry>
</row>
<row>
<entry><literal>SQL_ASCII</literal></entry>
<entry>unspecified (see text)</entry>
......
<!-- $PostgreSQL: pgsql/doc/src/sgml/func.sgml,v 1.370 2007/03/20 05:44:59 neilc Exp $ -->
<!-- $PostgreSQL: pgsql/doc/src/sgml/func.sgml,v 1.371 2007/03/25 11:56:01 ishii Exp $ -->
<chapter id="functions">
<title>Functions and Operators</title>
......@@ -2394,6 +2394,42 @@
<entry><literal>UTF8</literal></entry>
</row>
<row>
<entry><literal>euc_jis_2004_to_utf8</literal></entry>
<entry><literal>EUC_JIS_2004</literal></entry>
<entry><literal>UTF8</literal></entry>
</row>
<row>
<entry><literal>ut8_to_euc_jis_2004</literal></entry>
<entry><literal>UTF8</literal></entry>
<entry><literal>EUC_JIS_2004</literal></entry>
</row>
<row>
<entry><literal>shift_jis_2004_to_utf8</literal></entry>
<entry><literal>SHIFT_JIS_2004</literal></entry>
<entry><literal>UTF8</literal></entry>
</row>
<row>
<entry><literal>ut8_to_shift_jis_2004</literal></entry>
<entry><literal>UTF8</literal></entry>
<entry><literal>SHIFT_JIS_2004</literal></entry>
</row>
<row>
<entry><literal>euc_jis_2004_to_shift_jis_2004</literal></entry>
<entry><literal>EUC_JIS_2004</literal></entry>
<entry><literal>SHIFT_JIS_2004</literal></entry>
</row>
<row>
<entry><literal>shift_jis_2004_to_euc_jis_2004</literal></entry>
<entry><literal>SHIFT_JIS_2004</literal></entry>
<entry><literal>EUC_JIS_2004</literal></entry>
</row>
</tbody>
</tgroup>
</table>
......
#! /usr/bin/perl
#
# Copyright (c) 2007, PostgreSQL Global Development Group
#
# $PostgreSQL: pgsql/src/backend/utils/mb/Unicode/UCS_to_EUC_JIS_2004.pl,v 1.1 2007/03/25 11:56:02 ishii Exp $
#
# Generate UTF-8 <--> EUC_JIS_2004 code conversion tables from
# "euc-jis-2004-std.txt" (http://x0213.org)
require "ucs2utf.pl";
$TEST = 1;
# first generate UTF-8 --> EUC_JIS_2004 table
$in_file = "euc-jis-2004-std.txt";
open( FILE, $in_file ) || die( "cannot open $in_file" );
reset 'array';
reset 'array1';
reset 'comment';
reset 'comment1';
while($line = <FILE> ){
if ($line =~ /^0x(.*)[ \t]*U\+(.*)\+(.*)[ \t]*#(.*)$/) {
$c = $1;
$u1 = $2;
$u2 = $3;
$rest = "U+" . $u1 . "+" . $u2 . $4;
$code = hex($c);
$ucs = hex($u1);
$utf1 = &ucs2utf($ucs);
$ucs = hex($u2);
$utf2 = &ucs2utf($ucs);
$str = sprintf "%08x%08x", $utf1, $utf2;
$array1{ $str } = $code;
$comment1{ $str } = $rest;
$count1++;
next;
} elsif ($line =~ /^0x(.*)[ \t]*U\+(.*)[ \t]*#(.*)$/) {
$c = $1;
$u = $2;
$rest = "U+" . $u . $3;
} else {
next;
}
$ucs = hex($u);
$code = hex($c);
$utf = &ucs2utf($ucs);
if( $array{ $utf } ne "" ){
printf STDERR "Warning: duplicate UTF8: %04x\n",$ucs;
next;
}
$count++;
$array{ $utf } = $code;
$comment{ $code } = $rest;
}
close( FILE );
$file = "utf8_to_euc_jis_2004.map";
open( FILE, "> $file" ) || die( "cannot open $file" );
print FILE "/*\n";
print FILE " * This file was generated by UCS_to_EUC_JIS_2004.pl\n";
print FILE " */\n";
print FILE "static pg_utf_to_local ULmapEUC_JIS_2004[] = {\n";
for $index ( sort {$a <=> $b} keys( %array ) ){
$code = $array{ $index };
$count--;
if( $count == 0 ){
printf FILE " {0x%08x, 0x%06x} /* %s */\n", $index, $code, $comment{ $code };
} else {
printf FILE " {0x%08x, 0x%06x}, /* %s */\n", $index, $code, $comment{ $code };
}
}
print FILE "};\n";
close(FILE);
if ($TEST == 1) {
$file1 = "utf8.data";
$file2 = "euc_jis_2004.data";
open( FILE1, "> $file1" ) || die( "cannot open $file1" );
open( FILE2, "> $file2" ) || die( "cannot open $file2" );
for $index ( sort {$a <=> $b} keys( %array ) ){
$code = $array{ $index };
if ($code > 0x00 && $code != 0x09 && $code != 0x0a && $code != 0x0d &&
$code != 0x5c &&
($code < 0x80 ||
($code >= 0x8ea1 && $code <= 0x8efe) ||
($code >= 0x8fa1a1 && $code <= 0x8ffefe) ||
($code >= 0xa1a1 && $code <= 0x8fefe))) {
for ($i = 3; $i >= 0; $i--) {
$s = $i * 8;
$mask = 0xff << $s;
print FILE1 pack("C", ($index & $mask) >> $s) if $index & $mask;
print FILE2 pack("C", ($code & $mask) >> $s) if $code & $mask;
}
print FILE1 "\n";
print FILE2 "\n";
}
}
}
$file = "utf8_to_euc_jis_2004_combined.map";
open( FILE, "> $file" ) || die( "cannot open $file" );
print FILE "/*\n";
print FILE " * This file was generated by UCS_to_EUC_JIS_2004.pl\n";
print FILE " */\n";
print FILE "static pg_utf_to_local_combined ULmapEUC_JIS_2004_combined[] = {\n";
for $index ( sort {$a cmp $b} keys( %array1 ) ){
$code = $array1{ $index };
$count1--;
if( $count1 == 0 ){
printf FILE " {0x%s, 0x%s, 0x%06x} /* %s */\n", substr($index, 0, 8), substr($index, 8, 8), $code, $comment1{ $index };
} else {
printf FILE " {0x%s, 0x%s, 0x%06x}, /* %s */\n", substr($index, 0, 8), substr($index, 8, 8), $code, $comment1{ $index };
}
}
print FILE "};\n";
close(FILE);
if ($TEST == 1) {
for $index ( sort {$a cmp $b} keys( %array1 ) ){
$code = $array1{ $index };
if ($code > 0x00 && $code != 0x09 && $code != 0x0a && $code != 0x0d &&
$code != 0x5c &&
($code < 0x80 ||
($code >= 0x8ea1 && $code <= 0x8efe) ||
($code >= 0x8fa1a1 && $code <= 0x8ffefe) ||
($code >= 0xa1a1 && $code <= 0x8fefe))) {
$v1 = hex(substr($index, 0, 8));
$v2 = hex(substr($index, 8, 8));
for ($i = 3; $i >= 0; $i--) {
$s = $i * 8;
$mask = 0xff << $s;
print FILE1 pack("C", ($v1 & $mask) >> $s) if $v1 & $mask;
print FILE2 pack("C", ($code & $mask) >> $s) if $code & $mask;
}
for ($i = 3; $i >= 0; $i--) {
$s = $i * 8;
$mask = 0xff << $s;
print FILE1 pack("C", ($v2 & $mask) >> $s) if $v2 & $mask;
}
print FILE1 "\n";
print FILE2 "\n";
}
}
close(FILE1);
close(FILE2);
}
# then generate EUC_JIS_2004 --> UTF-8 table
$in_file = "euc-jis-2004-std.txt";
open( FILE, $in_file ) || die( "cannot open $in_file" );
reset 'array';
reset 'array1';
reset 'comment';
reset 'comment1';
while($line = <FILE> ){
if ($line =~ /^0x(.*)[ \t]*U\+(.*)\+(.*)[ \t]*#(.*)$/) {
$c = $1;
$u1 = $2;
$u2 = $3;
$rest = "U+" . $u1 . "+" . $u2 . $4;
$code = hex($c);
$ucs = hex($u1);
$utf1 = &ucs2utf($ucs);
$ucs = hex($u2);
$utf2 = &ucs2utf($ucs);
$str = sprintf "%08x%08x", $utf1, $utf2;
$array1{ $code } = $str;
$comment1{ $code } = $rest;
$count1++;
next;
} elsif ($line =~ /^0x(.*)[ \t]*U\+(.*)[ \t]*#(.*)$/) {
$c = $1;
$u = $2;
$rest = "U+" . $u . $3;
} else {
next;
}
$ucs = hex($u);
$code = hex($c);
$utf = &ucs2utf($ucs);
if( $array{ $code } ne "" ){
printf STDERR "Warning: duplicate UTF8: %04x\n",$ucs;
next;
}
$count++;
$array{ $code } = $utf;
$comment{ $utf } = $rest;
}
close( FILE );
$file = "euc_jis_2004_to_utf8.map";
open( FILE, "> $file" ) || die( "cannot open $file" );
print FILE "/*\n";
print FILE " * This file was generated by UCS_to_EUC_JIS_2004.pl\n";
print FILE " */\n";
print FILE "static pg_local_to_utf LUmapEUC_JIS_2004[] = {\n";
for $index ( sort {$a <=> $b} keys( %array ) ){
$code = $array{ $index };
$count--;
if( $count == 0 ){
printf FILE " {0x%06x, 0x%08x} /* %s */\n", $index, $code, $comment{ $code };
} else {
printf FILE " {0x%06x, 0x%08x}, /* %s */\n", $index, $code, $comment{ $code };
}
}
print FILE "};\n";
close(FILE);
$file = "euc_jis_2004_to_utf8_combined.map";
open( FILE, "> $file" ) || die( "cannot open $file" );
print FILE "/*\n";
print FILE " * This file was generated by UCS_to_EUC_JIS_2004.pl\n";
print FILE " */\n";
print FILE "static pg_local_to_utf_combined LUmapEUC_JIS_2004_combined[] = {\n";
for $index ( sort {$a <=> $b} keys( %array1 ) ){
$code = $array1{ $index };
$count1--;
if( $count1 == 0 ){
printf FILE " {0x%06x, 0x%s, 0x%s} /* %s */\n", $index, substr($code, 0, 8), substr($code, 8, 8), $comment1{ $index };
} else {
printf FILE " {0x%06x, 0x%s, 0x%s}, /* %s */\n", $index, substr($code, 0, 8), substr($code, 8, 8), $comment1{ $index };
}
}
print FILE "};\n";
close(FILE);
#! /usr/bin/perl
#
# Copyright (c) 2007, PostgreSQL Global Development Group
#
# $PostgreSQL: pgsql/src/backend/utils/mb/Unicode/UCS_to_SHIFT_JIS_2004.pl,v 1.1 2007/03/25 11:56:02 ishii Exp $
#
# Generate UTF-8 <--> SHIFT_JIS_2004 code conversion tables from
# "sjis-0213-2004-std.txt" (http://x0213.org)
require "ucs2utf.pl";
# first generate UTF-8 --> SHIFT_JIS_2004 table
$in_file = "sjis-0213-2004-std.txt";
open( FILE, $in_file ) || die( "cannot open $in_file" );
reset 'array';
reset 'array1';
reset 'comment';
reset 'comment1';
while($line = <FILE> ){
if ($line =~ /^0x(.*)[ \t]*U\+(.*)\+(.*)[ \t]*#(.*)$/) {
$c = $1;
$u1 = $2;
$u2 = $3;
$rest = "U+" . $u1 . "+" . $u2 . $4;
$code = hex($c);
$ucs = hex($u1);
$utf1 = &ucs2utf($ucs);
$ucs = hex($u2);
$utf2 = &ucs2utf($ucs);
$str = sprintf "%08x%08x", $utf1, $utf2;
$array1{ $str } = $code;
$comment1{ $str } = $rest;
$count1++;
next;
} elsif ($line =~ /^0x(.*)[ \t]*U\+(.*)[ \t]*#(.*)$/) {
$c = $1;
$u = $2;
$rest = "U+" . $u . $3;
} else {
next;
}
$ucs = hex($u);
$code = hex($c);
$utf = &ucs2utf($ucs);
if( $array{ $utf } ne "" ){
printf STDERR "Warning: duplicate UTF8: %08x UCS: %04x Shift JIS: %04x\n",$utf, $ucs, $code;
next;
}
$count++;
$array{ $utf } = $code;
$comment{ $code } = $rest;
}
close( FILE );
$file = "utf8_to_shift_jis_2004.map";
open( FILE, "> $file" ) || die( "cannot open $file" );
print FILE "/*\n";
print FILE " * This file was generated by UCS_to_SHIFT_JIS_2004.pl\n";
print FILE " */\n";
print FILE "static pg_utf_to_local ULmapSHIFT_JIS_2004[] = {\n";
for $index ( sort {$a <=> $b} keys( %array ) ){
$code = $array{ $index };
$count--;
if( $count == 0 ){
printf FILE " {0x%08x, 0x%06x} /* %s */\n", $index, $code, $comment{ $code };
} else {
printf FILE " {0x%08x, 0x%06x}, /* %s */\n", $index, $code, $comment{ $code };
}
}
print FILE "};\n";
close(FILE);
$file = "utf8_to_shift_jis_2004_combined.map";
open( FILE, "> $file" ) || die( "cannot open $file" );
print FILE "/*\n";
print FILE " * This file was generated by UCS_to_SHIFT_JIS_2004.pl\n";
print FILE " */\n";
print FILE "static pg_utf_to_local_combined ULmapSHIFT_JIS_2004_combined[] = {\n";
for $index ( sort {$a cmp $b} keys( %array1 ) ){
$code = $array1{ $index };
$count1--;
if( $count1 == 0 ){
printf FILE " {0x%s, 0x%s, 0x%04x} /* %s */\n", substr($index, 0, 8), substr($index, 8, 8), $code, $comment1{ $index };
} else {
printf FILE " {0x%s, 0x%s, 0x%04x}, /* %s */\n", substr($index, 0, 8), substr($index, 8, 8), $code, $comment1{ $index };
}
}
print FILE "};\n";
close(FILE);
# then generate SHIFT_JIS_2004 --> UTF-8 table
$in_file = "sjis-0213-2004-std.txt";
open( FILE, $in_file ) || die( "cannot open $in_file" );
reset 'array';
reset 'array1';
reset 'comment';
reset 'comment1';
while($line = <FILE> ){
if ($line =~ /^0x(.*)[ \t]*U\+(.*)\+(.*)[ \t]*#(.*)$/) {
$c = $1;
$u1 = $2;
$u2 = $3;
$rest = "U+" . $u1 . "+" . $u2 . $4;
$code = hex($c);
$ucs = hex($u1);
$utf1 = &ucs2utf($ucs);
$ucs = hex($u2);
$utf2 = &ucs2utf($ucs);
$str = sprintf "%08x%08x", $utf1, $utf2;
$array1{ $code } = $str;
$comment1{ $code } = $rest;
$count1++;
next;
} elsif ($line =~ /^0x(.*)[ \t]*U\+(.*)[ \t]*#(.*)$/) {
$c = $1;
$u = $2;
$rest = "U+" . $u . $3;
} else {
next;
}
$ucs = hex($u);
$code = hex($c);
$utf = &ucs2utf($ucs);
if( $array{ $code } ne "" ){
printf STDERR "Warning: duplicate UTF-8: %08x UCS: %04x Shift JIS: %04x\n",$utf, $ucs, $code;
printf STDERR "Previous value: UTF-8: %08x\n", $array{ $utf };
next;
}
$count++;
$array{ $code } = $utf;
$comment{ $utf } = $rest;
}
close( FILE );
$file = "shift_jis_2004_to_utf8.map";
open( FILE, "> $file" ) || die( "cannot open $file" );
print FILE "/*\n";
print FILE " * This file was generated by UCS_to_SHIFTJIS_2004.pl\n";
print FILE " */\n";
print FILE "static pg_local_to_utf LUmapSHIFT_JIS_2004[] = {\n";
for $index ( sort {$a <=> $b} keys( %array ) ){
$code = $array{ $index };
$count--;
if( $count == 0 ){
printf FILE " {0x%04x, 0x%08x} /* %s */\n", $index, $code, $comment{ $code };
} else {
printf FILE " {0x%04x, 0x%08x}, /* %s */\n", $index, $code, $comment{ $code };
}
}
print FILE "};\n";
close(FILE);
$file = "shift_jis_2004_to_utf8_combined.map";
open( FILE, "> $file" ) || die( "cannot open $file" );
print FILE "/*\n";
print FILE " * This file was generated by UCS_to_SHIFT_JIS_2004.pl\n";
print FILE " */\n";
print FILE "static pg_local_to_utf_combined LUmapSHIFT_JIS_2004_combined[] = {\n";
for $index ( sort {$a <=> $b} keys( %array1 ) ){
$code = $array1{ $index };
$count1--;
if( $count1 == 0 ){
printf FILE " {0x%04x, 0x%s, 0x%s} /* %s */\n", $index, substr($code, 0, 8), substr($code, 8, 8), $comment1{ $index };
} else {
printf FILE " {0x%04x, 0x%s, 0x%s}, /* %s */\n", $index, substr($code, 0, 8), substr($code, 8, 8), $comment1{ $index };
}
}
print FILE "};\n";
close(FILE);
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
/*
* This file was generated by UCS_to_EUC_JIS_2004.pl
*/
static pg_local_to_utf_combined LUmapEUC_JIS_2004_combined[] = {
{0x00a4f7, 0x00e3818b, 0x00e3829a}, /* U+304B+309A [2000] */
{0x00a4f8, 0x00e3818d, 0x00e3829a}, /* U+304D+309A [2000] */
{0x00a4f9, 0x00e3818f, 0x00e3829a}, /* U+304F+309A [2000] */
{0x00a4fa, 0x00e38191, 0x00e3829a}, /* U+3051+309A [2000] */
{0x00a4fb, 0x00e38193, 0x00e3829a}, /* U+3053+309A [2000] */
{0x00a5f7, 0x00e382ab, 0x00e3829a}, /* U+30AB+309A [2000] */
{0x00a5f8, 0x00e382ad, 0x00e3829a}, /* U+30AD+309A [2000] */
{0x00a5f9, 0x00e382af, 0x00e3829a}, /* U+30AF+309A [2000] */
{0x00a5fa, 0x00e382b1, 0x00e3829a}, /* U+30B1+309A [2000] */
{0x00a5fb, 0x00e382b3, 0x00e3829a}, /* U+30B3+309A [2000] */
{0x00a5fc, 0x00e382bb, 0x00e3829a}, /* U+30BB+309A [2000] */
{0x00a5fd, 0x00e38384, 0x00e3829a}, /* U+30C4+309A [2000] */
{0x00a5fe, 0x00e38388, 0x00e3829a}, /* U+30C8+309A [2000] */
{0x00a6f8, 0x00e387b7, 0x00e3829a}, /* U+31F7+309A [2000] */
{0x00abc4, 0x0000c3a6, 0x0000cc80}, /* U+00E6+0300 [2000] */
{0x00abc8, 0x0000c994, 0x0000cc80}, /* U+0254+0300 [2000] */
{0x00abc9, 0x0000c994, 0x0000cc81}, /* U+0254+0301 [2000] */
{0x00abca, 0x0000ca8c, 0x0000cc80}, /* U+028C+0300 [2000] */
{0x00abcb, 0x0000ca8c, 0x0000cc81}, /* U+028C+0301 [2000] */
{0x00abcc, 0x0000c999, 0x0000cc80}, /* U+0259+0300 [2000] */
{0x00abcd, 0x0000c999, 0x0000cc81}, /* U+0259+0301 [2000] */
{0x00abce, 0x0000c99a, 0x0000cc80}, /* U+025A+0300 [2000] */
{0x00abcf, 0x0000c99a, 0x0000cc81}, /* U+025A+0301 [2000] */
{0x00abe5, 0x0000cba9, 0x0000cba5}, /* U+02E9+02E5 [2000] */
{0x00abe6, 0x0000cba5, 0x0000cba9} /* U+02E5+02E9 [2000] */
};
This source diff could not be displayed because it is too large. You can view the blob instead.
/*
* This file was generated by UCS_to_SHIFT_JIS_2004.pl
*/
static pg_local_to_utf_combined LUmapSHIFT_JIS_2004_combined[] = {
{0x82f5, 0x00e3818b, 0x00e3829a}, /* U+304B+309A [2000] */
{0x82f6, 0x00e3818d, 0x00e3829a}, /* U+304D+309A [2000] */
{0x82f7, 0x00e3818f, 0x00e3829a}, /* U+304F+309A [2000] */
{0x82f8, 0x00e38191, 0x00e3829a}, /* U+3051+309A [2000] */
{0x82f9, 0x00e38193, 0x00e3829a}, /* U+3053+309A [2000] */
{0x8397, 0x00e382ab, 0x00e3829a}, /* U+30AB+309A [2000] */
{0x8398, 0x00e382ad, 0x00e3829a}, /* U+30AD+309A [2000] */
{0x8399, 0x00e382af, 0x00e3829a}, /* U+30AF+309A [2000] */
{0x839a, 0x00e382b1, 0x00e3829a}, /* U+30B1+309A [2000] */
{0x839b, 0x00e382b3, 0x00e3829a}, /* U+30B3+309A [2000] */
{0x839c, 0x00e382bb, 0x00e3829a}, /* U+30BB+309A [2000] */
{0x839d, 0x00e38384, 0x00e3829a}, /* U+30C4+309A [2000] */
{0x839e, 0x00e38388, 0x00e3829a}, /* U+30C8+309A [2000] */
{0x83f6, 0x00e387b7, 0x00e3829a}, /* U+31F7+309A [2000] */
{0x8663, 0x0000c3a6, 0x0000cc80}, /* U+00E6+0300 [2000] */
{0x8667, 0x0000c994, 0x0000cc80}, /* U+0254+0300 [2000] */
{0x8668, 0x0000c994, 0x0000cc81}, /* U+0254+0301 [2000] */
{0x8669, 0x0000ca8c, 0x0000cc80}, /* U+028C+0300 [2000] */
{0x866a, 0x0000ca8c, 0x0000cc81}, /* U+028C+0301 [2000] */
{0x866b, 0x0000c999, 0x0000cc80}, /* U+0259+0300 [2000] */
{0x866c, 0x0000c999, 0x0000cc81}, /* U+0259+0301 [2000] */
{0x866d, 0x0000c99a, 0x0000cc80}, /* U+025A+0300 [2000] */
{0x866e, 0x0000c99a, 0x0000cc81}, /* U+025A+0301 [2000] */
{0x8685, 0x0000cba9, 0x0000cba5}, /* U+02E9+02E5 [2000] */
{0x8686, 0x0000cba5, 0x0000cba9} /* U+02E5+02E9 [2000] */
};
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
/*
* This file was generated by UCS_to_EUC_JIS_2004.pl
*/
static pg_utf_to_local_combined ULmapEUC_JIS_2004_combined[] = {
{0x0000c3a6, 0x0000cc80, 0x00abc4}, /* U+00E6+0300 [2000] */
{0x0000c994, 0x0000cc80, 0x00abc8}, /* U+0254+0300 [2000] */
{0x0000c994, 0x0000cc81, 0x00abc9}, /* U+0254+0301 [2000] */
{0x0000c999, 0x0000cc80, 0x00abcc}, /* U+0259+0300 [2000] */
{0x0000c999, 0x0000cc81, 0x00abcd}, /* U+0259+0301 [2000] */
{0x0000c99a, 0x0000cc80, 0x00abce}, /* U+025A+0300 [2000] */
{0x0000c99a, 0x0000cc81, 0x00abcf}, /* U+025A+0301 [2000] */
{0x0000ca8c, 0x0000cc80, 0x00abca}, /* U+028C+0300 [2000] */
{0x0000ca8c, 0x0000cc81, 0x00abcb}, /* U+028C+0301 [2000] */
{0x0000cba5, 0x0000cba9, 0x00abe6}, /* U+02E5+02E9 [2000] */
{0x0000cba9, 0x0000cba5, 0x00abe5}, /* U+02E9+02E5 [2000] */
{0x00e3818b, 0x00e3829a, 0x00a4f7}, /* U+304B+309A [2000] */
{0x00e3818d, 0x00e3829a, 0x00a4f8}, /* U+304D+309A [2000] */
{0x00e3818f, 0x00e3829a, 0x00a4f9}, /* U+304F+309A [2000] */
{0x00e38191, 0x00e3829a, 0x00a4fa}, /* U+3051+309A [2000] */
{0x00e38193, 0x00e3829a, 0x00a4fb}, /* U+3053+309A [2000] */
{0x00e382ab, 0x00e3829a, 0x00a5f7}, /* U+30AB+309A [2000] */
{0x00e382ad, 0x00e3829a, 0x00a5f8}, /* U+30AD+309A [2000] */
{0x00e382af, 0x00e3829a, 0x00a5f9}, /* U+30AF+309A [2000] */
{0x00e382b1, 0x00e3829a, 0x00a5fa}, /* U+30B1+309A [2000] */
{0x00e382b3, 0x00e3829a, 0x00a5fb}, /* U+30B3+309A [2000] */
{0x00e382bb, 0x00e3829a, 0x00a5fc}, /* U+30BB+309A [2000] */
{0x00e38384, 0x00e3829a, 0x00a5fd}, /* U+30C4+309A [2000] */
{0x00e38388, 0x00e3829a, 0x00a5fe}, /* U+30C8+309A [2000] */
{0x00e387b7, 0x00e3829a, 0x00a6f8} /* U+31F7+309A [2000] */
};
This source diff could not be displayed because it is too large. You can view the blob instead.
/*
* This file was generated by UCS_to_SHIFT_JIS_2004.pl
*/
static pg_utf_to_local_combined ULmapSHIFT_JIS_2004_combined[] = {
{0x0000c3a6, 0x0000cc80, 0x8663}, /* U+00E6+0300 [2000] */
{0x0000c994, 0x0000cc80, 0x8667}, /* U+0254+0300 [2000] */
{0x0000c994, 0x0000cc81, 0x8668}, /* U+0254+0301 [2000] */
{0x0000c999, 0x0000cc80, 0x866b}, /* U+0259+0300 [2000] */
{0x0000c999, 0x0000cc81, 0x866c}, /* U+0259+0301 [2000] */
{0x0000c99a, 0x0000cc80, 0x866d}, /* U+025A+0300 [2000] */
{0x0000c99a, 0x0000cc81, 0x866e}, /* U+025A+0301 [2000] */
{0x0000ca8c, 0x0000cc80, 0x8669}, /* U+028C+0300 [2000] */
{0x0000ca8c, 0x0000cc81, 0x866a}, /* U+028C+0301 [2000] */
{0x0000cba5, 0x0000cba9, 0x8686}, /* U+02E5+02E9 [2000] */
{0x0000cba9, 0x0000cba5, 0x8685}, /* U+02E9+02E5 [2000] */
{0x00e3818b, 0x00e3829a, 0x82f5}, /* U+304B+309A [2000] */
{0x00e3818d, 0x00e3829a, 0x82f6}, /* U+304D+309A [2000] */
{0x00e3818f, 0x00e3829a, 0x82f7}, /* U+304F+309A [2000] */
{0x00e38191, 0x00e3829a, 0x82f8}, /* U+3051+309A [2000] */
{0x00e38193, 0x00e3829a, 0x82f9}, /* U+3053+309A [2000] */
{0x00e382ab, 0x00e3829a, 0x8397}, /* U+30AB+309A [2000] */
{0x00e382ad, 0x00e3829a, 0x8398}, /* U+30AD+309A [2000] */
{0x00e382af, 0x00e3829a, 0x8399}, /* U+30AF+309A [2000] */
{0x00e382b1, 0x00e3829a, 0x839a}, /* U+30B1+309A [2000] */
{0x00e382b3, 0x00e3829a, 0x839b}, /* U+30B3+309A [2000] */
{0x00e382bb, 0x00e3829a, 0x839c}, /* U+30BB+309A [2000] */
{0x00e38384, 0x00e3829a, 0x839d}, /* U+30C4+309A [2000] */
{0x00e38388, 0x00e3829a, 0x839e}, /* U+30C8+309A [2000] */
{0x00e387b7, 0x00e3829a, 0x83f6} /* U+31F7+309A [2000] */
};
......@@ -6,7 +6,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/mb/conv.c,v 1.62 2007/01/05 22:19:44 momjian Exp $
* $PostgreSQL: pgsql/src/backend/utils/mb/conv.c,v 1.63 2007/03/25 11:56:02 ishii Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -244,10 +244,10 @@ mic2latin_with_table(const unsigned char *mic,
static int
compare1(const void *p1, const void *p2)
{
unsigned int v1,
uint32 v1,
v2;
v1 = *(unsigned int *) p1;
v1 = *(uint32 *) p1;
v2 = ((pg_utf_to_local *) p2)->utf;
return (v1 > v2) ? 1 : ((v1 == v2) ? 0 : -1);
}
......@@ -259,31 +259,86 @@ compare1(const void *p1, const void *p2)
static int
compare2(const void *p1, const void *p2)
{
unsigned int v1,
uint32 v1,
v2;
v1 = *(unsigned int *) p1;
v1 = *(uint32 *) p1;
v2 = ((pg_local_to_utf *) p2)->code;
return (v1 > v2) ? 1 : ((v1 == v2) ? 0 : -1);
}
/*
* comparison routine for bsearch()
* this routine is intended for combined UTF8 -> local code
*/
static int
compare3(const void *p1, const void *p2)
{
uint32 s1, s2, d1, d2;
s1 = *(uint32 *)p1;
s2 = *((uint32 *)p1 + 1);
d1 = ((pg_utf_to_local_combined *) p2)->utf1;
d2 = ((pg_utf_to_local_combined *) p2)->utf2;
return (s1 > d1 || (s1 == d1 && s2 > d2)) ? 1 : ((s1 == d1 && s2 == d2) ? 0 : -1);
}
/*
* comparison routine for bsearch()
* this routine is intended for local code -> combined UTF8
*/
static int
compare4(const void *p1, const void *p2)
{
uint32 v1,
v2;
v1 = *(uint32 *) p1;
v2 = ((pg_local_to_utf_combined *) p2)->code;
return (v1 > v2) ? 1 : ((v1 == v2) ? 0 : -1);
}
/*
* convert 32bit wide character to mutibye stream pointed to by iso
*/
static unsigned char *set_iso_code(unsigned char *iso, uint32 code)
{
if (code & 0xff000000)
*iso++ = code >> 24;
if (code & 0x00ff0000)
*iso++ = (code & 0x00ff0000) >> 16;
if (code & 0x0000ff00)
*iso++ = (code & 0x0000ff00) >> 8;
if (code & 0x000000ff)
*iso++ = code & 0x000000ff;
return iso;
}
/*
* UTF8 ---> local code
*
* utf: input UTF8 string (need not be null-terminated).
* iso: pointer to the output area (must be large enough!)
* map: the conversion map.
* size: the size of the conversion map.
* cmap: the conversion map for combined characters.
* (optional)
* size1: the size of the conversion map.
* size2: the size of the conversion map for combined characters
* (optional)
* encoding: the PG identifier for the local encoding.
* len: length of input string.
*/
void
UtfToLocal(const unsigned char *utf, unsigned char *iso,
const pg_utf_to_local *map, int size, int encoding, int len)
const pg_utf_to_local *map, const pg_utf_to_local_combined *cmap,
int size1, int size2, int encoding, int len)
{
unsigned int iutf;
int l;
uint32 iutf;
uint32 cutf[2];
uint32 code;
pg_utf_to_local *p;
pg_utf_to_local_combined *cp;
int l;
for (; len > 0; len -= l)
{
......@@ -324,21 +379,94 @@ UtfToLocal(const unsigned char *utf, unsigned char *iso,
iutf |= *utf++;
}
p = bsearch(&iutf, map, size,
sizeof(pg_utf_to_local), compare1);
/*
* first, try with combined map if possible
*/
if (cmap && len > l)
{
const unsigned char *utf_save = utf;
int len_save = len;
int l_save = l;
len -= l;
if (p == NULL)
report_untranslatable_char(PG_UTF8, encoding,
(const char *) (utf - l), len);
if (p->code & 0xff000000)
*iso++ = p->code >> 24;
if (p->code & 0x00ff0000)
*iso++ = (p->code & 0x00ff0000) >> 16;
if (p->code & 0x0000ff00)
*iso++ = (p->code & 0x0000ff00) >> 8;
if (p->code & 0x000000ff)
*iso++ = p->code & 0x000000ff;
l = pg_utf_mblen(utf);
if (len < l)
break;
if (!pg_utf8_islegal(utf, l))
break;
cutf[0] = iutf;
if (l == 1)
{
if (len_save > 1)
{
p = bsearch(&cutf[0], map, size1,
sizeof(pg_utf_to_local), compare1);
if (p == NULL)
report_untranslatable_char(PG_UTF8, encoding,
(const char *) (utf_save - l_save), len_save);
iso = set_iso_code(iso, p->code);
}
/* ASCII case is easy */
*iso++ = *utf++;
continue;
}
else if (l == 2)
{
iutf = *utf++ << 8;
iutf |= *utf++;
}
else if (l == 3)
{
iutf = *utf++ << 16;
iutf |= *utf++ << 8;
iutf |= *utf++;
}
else if (l == 4)
{
iutf = *utf++ << 24;
iutf |= *utf++ << 16;
iutf |= *utf++ << 8;
iutf |= *utf++;
}
cutf[1] = iutf;
cp = bsearch(cutf, cmap, size2,
sizeof(pg_utf_to_local_combined), compare3);
if (cp)
code = cp->code;
else
{
/* not found in combined map. try with ordinary map */
p = bsearch(&cutf[0], map, size1,
sizeof(pg_utf_to_local), compare1);
if (p == NULL)
report_untranslatable_char(PG_UTF8, encoding,
(const char *) (utf_save - l_save), len_save);
iso = set_iso_code(iso, p->code);
p = bsearch(&cutf[1], map, size1,
sizeof(pg_utf_to_local), compare1);
if (p == NULL)
report_untranslatable_char(PG_UTF8, encoding,
(const char *) (utf - l), len);
code = p->code;
}
}
else /* no cmap or no remaining data */
{
p = bsearch(&iutf, map, size1,
sizeof(pg_utf_to_local), compare1);
if (p == NULL)
report_untranslatable_char(PG_UTF8, encoding,
(const char *) (utf - l), len);
code = p->code;
}
iso = set_iso_code(iso, code);
}
if (len > 0)
......@@ -353,17 +481,23 @@ UtfToLocal(const unsigned char *utf, unsigned char *iso,
* iso: input local string (need not be null-terminated).
* utf: pointer to the output area (must be large enough!)
* map: the conversion map.
* size: the size of the conversion map.
* cmap: the conversion map for combined characters.
* (optional)
* size1: the size of the conversion map.
* size2: the size of the conversion map for combined characters
* (optional)
* encoding: the PG identifier for the local encoding.
* len: length of input string.
*/
void
LocalToUtf(const unsigned char *iso, unsigned char *utf,
const pg_local_to_utf *map, int size, int encoding, int len)
const pg_local_to_utf *map, const pg_local_to_utf_combined *cmap,
int size1, int size2, int encoding, int len)
{
unsigned int iiso;
int l;
pg_local_to_utf *p;
pg_local_to_utf_combined *cp;
if (!PG_VALID_ENCODING(encoding))
ereport(ERROR,
......@@ -409,20 +543,59 @@ LocalToUtf(const unsigned char *iso, unsigned char *utf,
iiso |= *iso++;
}
p = bsearch(&iiso, map, size,
p = bsearch(&iiso, map, size1,
sizeof(pg_local_to_utf), compare2);
if (p == NULL)
{
/*
* not found in the ordinary map. if there's a combined
* character map, try with it
*/
if (cmap)
{
cp = bsearch(&iiso, cmap, size2,
sizeof(pg_local_to_utf_combined), compare4);
if (cp)
{
if (cp->utf1 & 0xff000000)
*utf++ = cp->utf1 >> 24;
if (cp->utf1 & 0x00ff0000)
*utf++ = (cp->utf1 & 0x00ff0000) >> 16;
if (cp->utf1 & 0x0000ff00)
*utf++ = (cp->utf1 & 0x0000ff00) >> 8;
if (cp->utf1 & 0x000000ff)
*utf++ = cp->utf1 & 0x000000ff;
if (cp->utf2 & 0xff000000)
*utf++ = cp->utf2 >> 24;
if (cp->utf2 & 0x00ff0000)
*utf++ = (cp->utf2 & 0x00ff0000) >> 16;
if (cp->utf2 & 0x0000ff00)
*utf++ = (cp->utf2 & 0x0000ff00) >> 8;
if (cp->utf2 & 0x000000ff)
*utf++ = cp->utf2 & 0x000000ff;
continue;
}
}
report_untranslatable_char(encoding, PG_UTF8,
(const char *) (iso - l), len);
if (p->utf & 0xff000000)
*utf++ = p->utf >> 24;
if (p->utf & 0x00ff0000)
*utf++ = (p->utf & 0x00ff0000) >> 16;
if (p->utf & 0x0000ff00)
*utf++ = (p->utf & 0x0000ff00) >> 8;
if (p->utf & 0x000000ff)
*utf++ = p->utf & 0x000000ff;
}
else
{
if (p->utf & 0xff000000)
*utf++ = p->utf >> 24;
if (p->utf & 0x00ff0000)
*utf++ = (p->utf & 0x00ff0000) >> 16;
if (p->utf & 0x0000ff00)
*utf++ = (p->utf & 0x0000ff00) >> 8;
if (p->utf & 0x000000ff)
*utf++ = p->utf & 0x000000ff;
}
}
if (len > 0)
......
......@@ -4,7 +4,7 @@
# Makefile for utils/mb/conversion_procs
#
# IDENTIFICATION
# $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/Makefile,v 1.17 2006/02/27 16:09:49 petere Exp $
# $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/Makefile,v 1.18 2007/03/25 11:56:02 ishii Exp $
#
#-------------------------------------------------------------------------
......@@ -23,7 +23,8 @@ DIRS = \
utf8_and_ascii utf8_and_big5 utf8_and_cyrillic utf8_and_euc_cn \
utf8_and_euc_jp utf8_and_euc_kr utf8_and_euc_tw utf8_and_gb18030 \
utf8_and_gbk utf8_and_iso8859 utf8_and_iso8859_1 utf8_and_johab \
utf8_and_sjis utf8_and_win utf8_and_uhc
utf8_and_sjis utf8_and_win utf8_and_uhc \
utf8_and_euc_jis_2004 utf8_and_shift_jis_2004 euc_jis_2004_and_shift_jis_2004
# conversion_name source_encoding destination_encoding function object
CONVERSIONS = \
......@@ -150,8 +151,13 @@ CONVERSIONS = \
sjis_to_utf8 SJIS UTF8 sjis_to_utf8 utf8_and_sjis \
utf8_to_sjis UTF8 SJIS utf8_to_sjis utf8_and_sjis \
uhc_to_utf8 UHC UTF8 uhc_to_utf8 utf8_and_uhc \
utf8_to_uhc UTF8 UHC utf8_to_uhc utf8_and_uhc
utf8_to_uhc UTF8 UHC utf8_to_uhc utf8_and_uhc \
euc_jis_2004_to_utf8 EUC_JIS_2004 UTF8 euc_jis_2004_to_utf8 utf8_and_euc_jis_2004 \
utf8_to_euc_jis_2004 UTF8 EUC_JIS_2004 utf8_to_euc_jis_2004 utf8_and_euc_jis_2004 \
shift_jis_2004_to_utf8 SHIFT_JIS_2004 UTF8 shift_jis_2004_to_utf8 utf8_and_shift_jis_2004 \
utf8_to_shift_jis_2004 UTF8 SHIFT_JIS_2004 utf8_to_shift_jis_2004 utf8_and_shift_jis_2004 \
euc_jis_2004_to_shift_jis_2004 EUC_JIS_2004 SHIFT_JIS_2004 euc_jis_2004_to_shift_jis_2004 euc_jis_2004_and_shift_jis_2004 \
shift_jis_2004_to_euc_jis_2004 SHIFT_JIS_2004 EUC_JIS_2004 shift_jis_2004_to_euc_jis_2004 euc_jis_2004_and_shift_jis_2004
all: $(SQLSCRIPT)
@for dir in $(DIRS); do $(MAKE) -C $$dir $@ || exit; done
......
#-------------------------------------------------------------------------
#
# $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/euc_jis_2004_and_shift_jis_2004/Makefile,v 1.1 2007/03/25 11:56:02 ishii Exp $
#
#-------------------------------------------------------------------------
subdir = src/backend/utils/mb/conversion_procs/euc_jis_2004_and_shit_jis_2004
top_builddir = ../../../../../..
include $(top_builddir)/src/Makefile.global
NAME = euc_jis_2004_and_shift_jis_2004
include $(srcdir)/../proc.mk
/*-------------------------------------------------------------------------
*
* EUC_JIS_2004, SHIFT_JIS_2004
*
* Copyright (c) 2007, PostgreSQL Global Development Group
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/euc_jis_2004_and_shift_jis_2004/euc_jis_2004_and_shift_jis_2004.c,v 1.1 2007/03/25 11:56:02 ishii Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "fmgr.h"
#include "mb/pg_wchar.h"
PG_MODULE_MAGIC;
PG_FUNCTION_INFO_V1(euc_jis_2004_to_shift_jis_2004);
PG_FUNCTION_INFO_V1(shift_jis_2004_to_euc_jis_2004);
extern Datum euc_jis_2004_to_shift_jis_2004(PG_FUNCTION_ARGS);
extern Datum shift_jis_2004_to_euc_jis_2004(PG_FUNCTION_ARGS);
static void euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len);
static void shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len);
/* ----------
* conv_proc(
* INTEGER, -- source encoding id
* INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string)
* INTEGER -- source string length
* ) returns VOID;
* ----------
*/
Datum
euc_jis_2004_to_shift_jis_2004(PG_FUNCTION_ARGS)
{
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
Assert(PG_GETARG_INT32(0) == PG_EUC_JIS_2004);
Assert(PG_GETARG_INT32(1) == PG_SHIFT_JIS_2004);
Assert(len >= 0);
euc_jis_20042shift_jis_2004(src, dest, len);
PG_RETURN_VOID();
}
Datum
shift_jis_2004_to_euc_jis_2004(PG_FUNCTION_ARGS)
{
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
Assert(PG_GETARG_INT32(0) == PG_SHIFT_JIS_2004);
Assert(PG_GETARG_INT32(1) == PG_EUC_JIS_2004);
Assert(len >= 0);
shift_jis_20042euc_jis_2004(src, dest, len);
PG_RETURN_VOID();
}
/*
* EUC_JIS_2004 -> SHIFT_JIS_2004
*/
static void
euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len)
{
int c1,
ku,
ten;
int l;
while (len > 0)
{
c1 = *euc;
if (!IS_HIGHBIT_SET(c1))
{
/* ASCII */
if (c1 == 0)
report_invalid_encoding(PG_EUC_JIS_2004,
(const char *) euc, len);
*p++ = c1;
euc++;
len--;
continue;
}
l = pg_encoding_verifymb(PG_EUC_JIS_2004, (const char *) euc, len);
if (l < 0)
report_invalid_encoding(PG_EUC_JIS_2004,
(const char *) euc, len);
if (c1 == SS2 && l == 2) /* JIS X 0201 kana? */
{
*p++ = euc[1];
}
else if (c1 == SS3 && l == 3) /* JIS X 0213 plane 2? */
{
ku = euc[1] - 0xa0;
ten = euc[2] - 0xa0;
switch (ku)
{
case 1:
case 3:
case 4:
case 5:
case 8:
case 12:
case 13:
case 14:
case 15:
*p++ = ((ku + 0x1df) >> 1) - (ku >> 3) * 3;
break;
default:
if (ku >= 78 && ku <= 94)
{
*p++ = (ku + 0x19b) >> 1;
}
else
report_invalid_encoding(PG_EUC_JIS_2004,
(const char *) euc, len);
}
if (ku % 2)
{
if (ten >= 1 && ten <= 63)
*p++ = ten + 0x3f;
else if (ten >= 64 && ten <= 94)
*p++ = ten + 0x40;
else
report_invalid_encoding(PG_EUC_JIS_2004,
(const char *) euc, len);
}
else
*p++ = ten + 0x9e;
}
else if (l == 2) /* JIS X 0213 plane 1? */
{
ku = c1 - 0xa0;
ten = euc[1] - 0xa0;
if (ku >= 1 && ku <= 62)
*p++ = (ku + 0x101) >> 1;
else if (ku >= 63 && ku <= 94)
*p++ = (ku + 0x181) >> 1;
else
report_invalid_encoding(PG_EUC_JIS_2004,
(const char *) euc, len);
if (ku % 2)
{
if (ten >= 1 && ten <= 63)
*p++ = ten + 0x3f;
else if (ten >= 64 && ten <= 94)
*p++ = ten + 0x40;
else
report_invalid_encoding(PG_EUC_JIS_2004,
(const char *) euc, len);
}
else
*p++ = ten + 0x9e;
}
else
report_invalid_encoding(PG_EUC_JIS_2004,
(const char *) euc, len);
euc += l;
len -= l;
}
*p = '\0';
}
/*
* returns SHIFT_JIS_2004 "ku" code indicated by second byte
* *ku = 0: "ku" = even
* *ku = 1: "ku" = odd
*/
static int get_ten(int b, int *ku)
{
int ten;
if (b >= 0x40 && b <= 0x7e)
{
ten = b - 0x3f;
*ku = 1;
} else if (b >= 0x80 && b <= 0x9e)
{
ten = b - 0x40;
*ku = 1;
} else if (b >= 0x9f && b <= 0xfc)
{
ten = b - 0x9e;
*ku = 0;
}
else
{
ten = -1; /* error */
}
return ten;
}
/*
* SHIFT_JIS_2004 ---> EUC_JIS_2004
*/
static void
shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len)
{
int c1,
c2;
int ku, ten, kubun;
int plane;
int l;
while (len > 0)
{
c1 = *sjis;
c2 = sjis[1];
if (!IS_HIGHBIT_SET(c1))
{
/* ASCII */
if (c1 == 0)
report_invalid_encoding(PG_SHIFT_JIS_2004,
(const char *) sjis, len);
*p++ = c1;
sjis++;
len--;
continue;
}
l = pg_encoding_verifymb(PG_SHIFT_JIS_2004, (const char *) sjis, len);
if (l < 0)
report_invalid_encoding(PG_SHIFT_JIS_2004,
(const char *) sjis, len);
if (c1 >= 0xa1 && c1 <= 0xdf && l == 1)
{
/* JIS X0201 (1 byte kana) */
*p++ = SS2;
*p++ = c1;
}
else if (l == 2)
{
plane = 1;
ku = 1;
ten = 1;
/*
* JIS X 0213
*/
if (c1 >= 0x81 && c1 <= 0x9f) /* plane 1 1ku-62ku */
{
ku = (c1 << 1) - 0x100;
ten = get_ten(c2, &kubun);
if (ten < 0)
report_invalid_encoding(PG_SHIFT_JIS_2004,
(const char *) sjis, len);
ku -= kubun;
}
else if (c1 >= 0xe0 && c1 <= 0xef) /* plane 1 62ku-94ku */
{
ku = (c1 << 1) - 0x180;
ten = get_ten(c2, &kubun);
if (ten < 0)
report_invalid_encoding(PG_SHIFT_JIS_2004,
(const char *) sjis, len);
ku -= kubun;
}
else if (c1 >= 0xf0 && c1 <= 0xf3) /* plane 2 1,3,4,5,8,12,13,14,15 ku */
{
plane = 2;
ten = get_ten(c2, &kubun);
if (ten < 0)
report_invalid_encoding(PG_SHIFT_JIS_2004,
(const char *) sjis, len);
switch (c1)
{
case 0xf0:
ku = kubun == 0? 8: 1;
break;
case 0xf1:
ku = kubun == 0? 4: 3;
break;
case 0xf2:
ku = kubun == 0? 12: 5;
break;
default:
ku = kubun == 0? 14: 13;
break;
}
}
else if (c1 >= 0xf4 && c1 <= 0xfc) /* plane 2 78-94ku */
{
plane = 2;
ten = get_ten(c2, &kubun);
if (ten < 0)
report_invalid_encoding(PG_SHIFT_JIS_2004,
(const char *) sjis, len);
if (c1 == 0xf4 && kubun == 1)
ku = 15;
else
ku = (c1 << 1) - 0x19a - kubun;
}
else
report_invalid_encoding(PG_SHIFT_JIS_2004,
(const char *) sjis, len);
if (plane == 2)
*p++ = SS3;
*p++ = ku + 0xa0;
*p++ = ten + 0xa0;
}
sjis += l;
len -= l;
}
*p = '\0';
}
......@@ -6,7 +6,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_big5/utf8_and_big5.c,v 1.15 2007/01/05 22:19:45 momjian Exp $
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_big5/utf8_and_big5.c,v 1.16 2007/03/25 11:56:02 ishii Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -46,8 +46,8 @@ big5_to_utf8(PG_FUNCTION_ARGS)
Assert(PG_GETARG_INT32(1) == PG_UTF8);
Assert(len >= 0);
LocalToUtf(src, dest, LUmapBIG5,
sizeof(LUmapBIG5) / sizeof(pg_local_to_utf), PG_BIG5, len);
LocalToUtf(src, dest, LUmapBIG5, NULL,
sizeof(LUmapBIG5) / sizeof(pg_local_to_utf), 0, PG_BIG5, len);
PG_RETURN_VOID();
}
......@@ -63,8 +63,8 @@ utf8_to_big5(PG_FUNCTION_ARGS)
Assert(PG_GETARG_INT32(1) == PG_BIG5);
Assert(len >= 0);
UtfToLocal(src, dest, ULmapBIG5,
sizeof(ULmapBIG5) / sizeof(pg_utf_to_local), PG_BIG5, len);
UtfToLocal(src, dest, ULmapBIG5, NULL,
sizeof(ULmapBIG5) / sizeof(pg_utf_to_local), 0, PG_BIG5, len);
PG_RETURN_VOID();
}
......@@ -6,7 +6,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_cyrillic/utf8_and_cyrillic.c,v 1.18 2007/01/05 22:19:45 momjian Exp $
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_cyrillic/utf8_and_cyrillic.c,v 1.19 2007/03/25 11:56:02 ishii Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -47,8 +47,8 @@ utf8_to_koi8r(PG_FUNCTION_ARGS)
Assert(PG_GETARG_INT32(1) == PG_KOI8R);
Assert(len >= 0);
UtfToLocal(src, dest, ULmapKOI8R,
sizeof(ULmapKOI8R) / sizeof(pg_utf_to_local), PG_KOI8R, len);
UtfToLocal(src, dest, ULmapKOI8R, NULL,
sizeof(ULmapKOI8R) / sizeof(pg_utf_to_local), 0, PG_KOI8R, len);
PG_RETURN_VOID();
}
......@@ -64,8 +64,8 @@ koi8r_to_utf8(PG_FUNCTION_ARGS)
Assert(PG_GETARG_INT32(1) == PG_UTF8);
Assert(len >= 0);
LocalToUtf(src, dest, LUmapKOI8R,
sizeof(LUmapKOI8R) / sizeof(pg_local_to_utf), PG_KOI8R, len);
LocalToUtf(src, dest, LUmapKOI8R, NULL,
sizeof(LUmapKOI8R) / sizeof(pg_local_to_utf), 0, PG_KOI8R, len);
PG_RETURN_VOID();
}
......@@ -6,7 +6,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_euc_cn/utf8_and_euc_cn.c,v 1.16 2007/01/05 22:19:45 momjian Exp $
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_euc_cn/utf8_and_euc_cn.c,v 1.17 2007/03/25 11:56:02 ishii Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -46,8 +46,8 @@ euc_cn_to_utf8(PG_FUNCTION_ARGS)
Assert(PG_GETARG_INT32(1) == PG_UTF8);
Assert(len >= 0);
LocalToUtf(src, dest, LUmapEUC_CN,
sizeof(LUmapEUC_CN) / sizeof(pg_local_to_utf), PG_EUC_CN, len);
LocalToUtf(src, dest, LUmapEUC_CN, NULL,
sizeof(LUmapEUC_CN) / sizeof(pg_local_to_utf), 0, PG_EUC_CN, len);
PG_RETURN_VOID();
}
......@@ -63,8 +63,8 @@ utf8_to_euc_cn(PG_FUNCTION_ARGS)
Assert(PG_GETARG_INT32(1) == PG_EUC_CN);
Assert(len >= 0);
UtfToLocal(src, dest, ULmapEUC_CN,
sizeof(ULmapEUC_CN) / sizeof(pg_utf_to_local), PG_EUC_CN, len);
UtfToLocal(src, dest, ULmapEUC_CN, NULL,
sizeof(ULmapEUC_CN) / sizeof(pg_utf_to_local), 0, PG_EUC_CN, len);
PG_RETURN_VOID();
}
#-------------------------------------------------------------------------
#
# $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_euc_jis_2004/Makefile,v 1.1 2007/03/25 11:56:02 ishii Exp $
#
#-------------------------------------------------------------------------
subdir = src/backend/utils/mb/conversion_procs/utf8_and_euc_jis_2004
top_builddir = ../../../../../..
include $(top_builddir)/src/Makefile.global
NAME = utf8_and_euc_jis_2004
include $(srcdir)/../proc.mk
/*-------------------------------------------------------------------------
*
* EUC_JIS_2004 <--> UTF8
*
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_euc_jis_2004/utf8_and_euc_jis_2004.c,v 1.1 2007/03/25 11:56:03 ishii Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "fmgr.h"
#include "mb/pg_wchar.h"
#include "../../Unicode/euc_jis_2004_to_utf8.map"
#include "../../Unicode/utf8_to_euc_jis_2004.map"
#include "../../Unicode/euc_jis_2004_to_utf8_combined.map"
#include "../../Unicode/utf8_to_euc_jis_2004_combined.map"
PG_MODULE_MAGIC;
PG_FUNCTION_INFO_V1(euc_jis_2004_to_utf8);
PG_FUNCTION_INFO_V1(utf8_to_euc_jis_2004);
extern Datum euc_jis_2004_to_utf8(PG_FUNCTION_ARGS);
extern Datum utf8_to_euc_jis_2004(PG_FUNCTION_ARGS);
/* ----------
* conv_proc(
* INTEGER, -- source encoding id
* INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string)
* INTEGER -- source string length
* ) returns VOID;
* ----------
*/
Datum
euc_jis_2004_to_utf8(PG_FUNCTION_ARGS)
{
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
Assert(PG_GETARG_INT32(0) == PG_EUC_JIS_2004);
Assert(PG_GETARG_INT32(1) == PG_UTF8);
Assert(len >= 0);
LocalToUtf(src, dest, LUmapEUC_JIS_2004, LUmapEUC_JIS_2004_combined,
sizeof(LUmapEUC_JIS_2004) / sizeof(pg_local_to_utf),
sizeof(LUmapEUC_JIS_2004_combined) / sizeof(pg_local_to_utf_combined),
PG_EUC_JIS_2004, len);
PG_RETURN_VOID();
}
Datum
utf8_to_euc_jis_2004(PG_FUNCTION_ARGS)
{
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
Assert(PG_GETARG_INT32(0) == PG_UTF8);
Assert(PG_GETARG_INT32(1) == PG_EUC_JIS_2004);
Assert(len >= 0);
UtfToLocal(src, dest, ULmapEUC_JIS_2004, ULmapEUC_JIS_2004_combined,
sizeof(ULmapEUC_JIS_2004) / sizeof(pg_utf_to_local),
sizeof(ULmapEUC_JIS_2004_combined) / sizeof(pg_utf_to_local_combined),
PG_EUC_JIS_2004, len);
PG_RETURN_VOID();
}
......@@ -6,7 +6,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_euc_jp/utf8_and_euc_jp.c,v 1.16 2007/01/05 22:19:45 momjian Exp $
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_euc_jp/utf8_and_euc_jp.c,v 1.17 2007/03/25 11:56:03 ishii Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -46,8 +46,8 @@ euc_jp_to_utf8(PG_FUNCTION_ARGS)
Assert(PG_GETARG_INT32(1) == PG_UTF8);
Assert(len >= 0);
LocalToUtf(src, dest, LUmapEUC_JP,
sizeof(LUmapEUC_JP) / sizeof(pg_local_to_utf), PG_EUC_JP, len);
LocalToUtf(src, dest, LUmapEUC_JP, NULL,
sizeof(LUmapEUC_JP) / sizeof(pg_local_to_utf), 0, PG_EUC_JP, len);
PG_RETURN_VOID();
}
......@@ -63,8 +63,8 @@ utf8_to_euc_jp(PG_FUNCTION_ARGS)
Assert(PG_GETARG_INT32(1) == PG_EUC_JP);
Assert(len >= 0);
UtfToLocal(src, dest, ULmapEUC_JP,
sizeof(ULmapEUC_JP) / sizeof(pg_utf_to_local), PG_EUC_JP, len);
UtfToLocal(src, dest, ULmapEUC_JP, NULL,
sizeof(ULmapEUC_JP) / sizeof(pg_utf_to_local), 0, PG_EUC_JP, len);
PG_RETURN_VOID();
}
......@@ -6,7 +6,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_euc_kr/utf8_and_euc_kr.c,v 1.16 2007/01/05 22:19:45 momjian Exp $
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_euc_kr/utf8_and_euc_kr.c,v 1.17 2007/03/25 11:56:03 ishii Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -46,8 +46,8 @@ euc_kr_to_utf8(PG_FUNCTION_ARGS)
Assert(PG_GETARG_INT32(1) == PG_UTF8);
Assert(len >= 0);
LocalToUtf(src, dest, LUmapEUC_KR,
sizeof(LUmapEUC_KR) / sizeof(pg_local_to_utf), PG_EUC_KR, len);
LocalToUtf(src, dest, LUmapEUC_KR, NULL,
sizeof(LUmapEUC_KR) / sizeof(pg_local_to_utf), 0, PG_EUC_KR, len);
PG_RETURN_VOID();
}
......@@ -63,8 +63,8 @@ utf8_to_euc_kr(PG_FUNCTION_ARGS)
Assert(PG_GETARG_INT32(1) == PG_EUC_KR);
Assert(len >= 0);
UtfToLocal(src, dest, ULmapEUC_KR,
sizeof(ULmapEUC_KR) / sizeof(pg_utf_to_local), PG_EUC_KR, len);
UtfToLocal(src, dest, ULmapEUC_KR, NULL,
sizeof(ULmapEUC_KR) / sizeof(pg_utf_to_local), 0, PG_EUC_KR, len);
PG_RETURN_VOID();
}
......@@ -6,7 +6,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_euc_tw/utf8_and_euc_tw.c,v 1.16 2007/01/05 22:19:45 momjian Exp $
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_euc_tw/utf8_and_euc_tw.c,v 1.17 2007/03/25 11:56:03 ishii Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -46,8 +46,8 @@ euc_tw_to_utf8(PG_FUNCTION_ARGS)
Assert(PG_GETARG_INT32(1) == PG_UTF8);
Assert(len >= 0);
LocalToUtf(src, dest, LUmapEUC_TW,
sizeof(LUmapEUC_TW) / sizeof(pg_local_to_utf), PG_EUC_TW, len);
LocalToUtf(src, dest, LUmapEUC_TW, NULL,
sizeof(LUmapEUC_TW) / sizeof(pg_local_to_utf), 0, PG_EUC_TW, len);
PG_RETURN_VOID();
}
......@@ -63,8 +63,8 @@ utf8_to_euc_tw(PG_FUNCTION_ARGS)
Assert(PG_GETARG_INT32(1) == PG_EUC_TW);
Assert(len >= 0);
UtfToLocal(src, dest, ULmapEUC_TW,
sizeof(ULmapEUC_TW) / sizeof(pg_utf_to_local), PG_EUC_TW, len);
UtfToLocal(src, dest, ULmapEUC_TW, NULL,
sizeof(ULmapEUC_TW) / sizeof(pg_utf_to_local), 0, PG_EUC_TW, len);
PG_RETURN_VOID();
}
......@@ -6,7 +6,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_gb18030/utf8_and_gb18030.c,v 1.17 2007/01/05 22:19:46 momjian Exp $
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_gb18030/utf8_and_gb18030.c,v 1.18 2007/03/25 11:56:03 ishii Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -46,8 +46,8 @@ gb18030_to_utf8(PG_FUNCTION_ARGS)
Assert(PG_GETARG_INT32(1) == PG_UTF8);
Assert(len >= 0);
LocalToUtf(src, dest, LUmapGB18030,
sizeof(LUmapGB18030) / sizeof(pg_local_to_utf), PG_GB18030, len);
LocalToUtf(src, dest, LUmapGB18030, NULL,
sizeof(LUmapGB18030) / sizeof(pg_local_to_utf), 0, PG_GB18030, len);
PG_RETURN_VOID();
}
......@@ -63,8 +63,8 @@ utf8_to_gb18030(PG_FUNCTION_ARGS)
Assert(PG_GETARG_INT32(1) == PG_GB18030);
Assert(len >= 0);
UtfToLocal(src, dest, ULmapGB18030,
sizeof(ULmapGB18030) / sizeof(pg_utf_to_local), PG_GB18030, len);
UtfToLocal(src, dest, ULmapGB18030, NULL,
sizeof(ULmapGB18030) / sizeof(pg_utf_to_local), 0, PG_GB18030, len);
PG_RETURN_VOID();
}
......@@ -6,7 +6,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_gbk/utf8_and_gbk.c,v 1.15 2007/01/05 22:19:46 momjian Exp $
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_gbk/utf8_and_gbk.c,v 1.16 2007/03/25 11:56:03 ishii Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -46,8 +46,8 @@ gbk_to_utf8(PG_FUNCTION_ARGS)
Assert(PG_GETARG_INT32(1) == PG_UTF8);
Assert(len >= 0);
LocalToUtf(src, dest, LUmapGBK,
sizeof(LUmapGBK) / sizeof(pg_local_to_utf), PG_GBK, len);
LocalToUtf(src, dest, LUmapGBK, NULL,
sizeof(LUmapGBK) / sizeof(pg_local_to_utf), 0, PG_GBK, len);
PG_RETURN_VOID();
}
......@@ -63,8 +63,8 @@ utf8_to_gbk(PG_FUNCTION_ARGS)
Assert(PG_GETARG_INT32(1) == PG_GBK);
Assert(len >= 0);
UtfToLocal(src, dest, ULmapGBK,
sizeof(ULmapGBK) / sizeof(pg_utf_to_local), PG_GBK, len);
UtfToLocal(src, dest, ULmapGBK, NULL,
sizeof(ULmapGBK) / sizeof(pg_utf_to_local), 0, PG_GBK, len);
PG_RETURN_VOID();
}
......@@ -6,7 +6,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_iso8859/utf8_and_iso8859.c,v 1.25 2007/01/05 22:19:46 momjian Exp $
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_iso8859/utf8_and_iso8859.c,v 1.26 2007/03/25 11:56:03 ishii Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -127,7 +127,7 @@ iso8859_to_utf8(PG_FUNCTION_ARGS)
{
if (encoding == maps[i].encoding)
{
LocalToUtf(src, dest, maps[i].map1, maps[i].size1, encoding, len);
LocalToUtf(src, dest, maps[i].map1, NULL, maps[i].size1, 0, encoding, len);
PG_RETURN_VOID();
}
}
......@@ -155,7 +155,7 @@ utf8_to_iso8859(PG_FUNCTION_ARGS)
{
if (encoding == maps[i].encoding)
{
UtfToLocal(src, dest, maps[i].map2, maps[i].size2, encoding, len);
UtfToLocal(src, dest, maps[i].map2, NULL, maps[i].size2, 0, encoding, len);
PG_RETURN_VOID();
}
}
......
......@@ -6,7 +6,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_johab/utf8_and_johab.c,v 1.16 2007/01/05 22:19:46 momjian Exp $
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_johab/utf8_and_johab.c,v 1.17 2007/03/25 11:56:03 ishii Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -46,8 +46,8 @@ johab_to_utf8(PG_FUNCTION_ARGS)
Assert(PG_GETARG_INT32(1) == PG_UTF8);
Assert(len >= 0);
LocalToUtf(src, dest, LUmapJOHAB,
sizeof(LUmapJOHAB) / sizeof(pg_local_to_utf), PG_JOHAB, len);
LocalToUtf(src, dest, LUmapJOHAB, NULL,
sizeof(LUmapJOHAB) / sizeof(pg_local_to_utf), 0, PG_JOHAB, len);
PG_RETURN_VOID();
}
......@@ -63,8 +63,8 @@ utf8_to_johab(PG_FUNCTION_ARGS)
Assert(PG_GETARG_INT32(1) == PG_JOHAB);
Assert(len >= 0);
UtfToLocal(src, dest, ULmapJOHAB,
sizeof(ULmapJOHAB) / sizeof(pg_utf_to_local), PG_JOHAB, len);
UtfToLocal(src, dest, ULmapJOHAB, NULL,
sizeof(ULmapJOHAB) / sizeof(pg_utf_to_local), 0, PG_JOHAB, len);
PG_RETURN_VOID();
}
#-------------------------------------------------------------------------
#
# $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_shift_jis_2004/Makefile,v 1.1 2007/03/25 11:56:03 ishii Exp $
#
#-------------------------------------------------------------------------
subdir = src/backend/utils/mb/conversion_procs/utf8_and_shift_jis_2004
top_builddir = ../../../../../..
include $(top_builddir)/src/Makefile.global
NAME = utf8_and_shift_jis_2004
include $(srcdir)/../proc.mk
/*-------------------------------------------------------------------------
*
* SHIFT_JIS_2004 <--> UTF8
*
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_shift_jis_2004/utf8_and_shift_jis_2004.c,v 1.1 2007/03/25 11:56:03 ishii Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "fmgr.h"
#include "mb/pg_wchar.h"
#include "../../Unicode/shift_jis_2004_to_utf8.map"
#include "../../Unicode/utf8_to_shift_jis_2004.map"
#include "../../Unicode/shift_jis_2004_to_utf8_combined.map"
#include "../../Unicode/utf8_to_shift_jis_2004_combined.map"
PG_MODULE_MAGIC;
PG_FUNCTION_INFO_V1(shift_jis_2004_to_utf8);
PG_FUNCTION_INFO_V1(utf8_to_shift_jis_2004);
extern Datum shift_jis_2004_to_utf8(PG_FUNCTION_ARGS);
extern Datum utf8_to_shift_jis_2004(PG_FUNCTION_ARGS);
/* ----------
* conv_proc(
* INTEGER, -- source encoding id
* INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string)
* INTEGER -- source string length
* ) returns VOID;
* ----------
*/
Datum
shift_jis_2004_to_utf8(PG_FUNCTION_ARGS)
{
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
Assert(PG_GETARG_INT32(0) == PG_SHIFT_JIS_2004);
Assert(PG_GETARG_INT32(1) == PG_UTF8);
Assert(len >= 0);
LocalToUtf(src, dest, LUmapSHIFT_JIS_2004, LUmapSHIFT_JIS_2004_combined,
sizeof(LUmapSHIFT_JIS_2004) / sizeof(pg_local_to_utf),
sizeof(LUmapSHIFT_JIS_2004_combined) / sizeof(pg_local_to_utf_combined),
PG_SHIFT_JIS_2004, len);
PG_RETURN_VOID();
}
Datum
utf8_to_shift_jis_2004(PG_FUNCTION_ARGS)
{
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
Assert(PG_GETARG_INT32(0) == PG_UTF8);
Assert(PG_GETARG_INT32(1) == PG_SHIFT_JIS_2004);
Assert(len >= 0);
UtfToLocal(src, dest, ULmapSHIFT_JIS_2004, ULmapSHIFT_JIS_2004_combined,
sizeof(ULmapSHIFT_JIS_2004) / sizeof(pg_utf_to_local),
sizeof(ULmapSHIFT_JIS_2004_combined) / sizeof(pg_utf_to_local_combined),
PG_SHIFT_JIS_2004, len);
PG_RETURN_VOID();
}
......@@ -6,7 +6,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_sjis/utf8_and_sjis.c,v 1.15 2007/01/05 22:19:46 momjian Exp $
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_sjis/utf8_and_sjis.c,v 1.16 2007/03/25 11:56:03 ishii Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -46,8 +46,8 @@ sjis_to_utf8(PG_FUNCTION_ARGS)
Assert(PG_GETARG_INT32(1) == PG_UTF8);
Assert(len >= 0);
LocalToUtf(src, dest, LUmapSJIS,
sizeof(LUmapSJIS) / sizeof(pg_local_to_utf), PG_SJIS, len);
LocalToUtf(src, dest, LUmapSJIS, NULL,
sizeof(LUmapSJIS) / sizeof(pg_local_to_utf), 0, PG_SJIS, len);
PG_RETURN_VOID();
}
......@@ -63,8 +63,8 @@ utf8_to_sjis(PG_FUNCTION_ARGS)
Assert(PG_GETARG_INT32(1) == PG_SJIS);
Assert(len >= 0);
UtfToLocal(src, dest, ULmapSJIS,
sizeof(ULmapSJIS) / sizeof(pg_utf_to_local), PG_SJIS, len);
UtfToLocal(src, dest, ULmapSJIS, NULL,
sizeof(ULmapSJIS) / sizeof(pg_utf_to_local), 0, PG_SJIS, len);
PG_RETURN_VOID();
}
......@@ -6,7 +6,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_uhc/utf8_and_uhc.c,v 1.15 2007/01/05 22:19:46 momjian Exp $
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_uhc/utf8_and_uhc.c,v 1.16 2007/03/25 11:56:03 ishii Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -46,8 +46,8 @@ uhc_to_utf8(PG_FUNCTION_ARGS)
Assert(PG_GETARG_INT32(1) == PG_UTF8);
Assert(len >= 0);
LocalToUtf(src, dest, LUmapUHC,
sizeof(LUmapUHC) / sizeof(pg_local_to_utf), PG_UHC, len);
LocalToUtf(src, dest, LUmapUHC, NULL,
sizeof(LUmapUHC) / sizeof(pg_local_to_utf), 0, PG_UHC, len);
PG_RETURN_VOID();
}
......@@ -63,8 +63,8 @@ utf8_to_uhc(PG_FUNCTION_ARGS)
Assert(PG_GETARG_INT32(1) == PG_UHC);
Assert(len >= 0);
UtfToLocal(src, dest, ULmapUHC,
sizeof(ULmapUHC) / sizeof(pg_utf_to_local), PG_UHC, len);
UtfToLocal(src, dest, ULmapUHC, NULL,
sizeof(ULmapUHC) / sizeof(pg_utf_to_local), 0, PG_UHC, len);
PG_RETURN_VOID();
}
......@@ -6,7 +6,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_win/utf8_and_win.c,v 1.9 2007/01/05 22:19:46 momjian Exp $
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_win/utf8_and_win.c,v 1.10 2007/03/25 11:56:04 ishii Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -117,7 +117,7 @@ win_to_utf8(PG_FUNCTION_ARGS)
{
if (encoding == maps[i].encoding)
{
LocalToUtf(src, dest, maps[i].map1, maps[i].size1, encoding, len);
LocalToUtf(src, dest, maps[i].map1, NULL, maps[i].size1, 0, encoding, len);
PG_RETURN_VOID();
}
}
......@@ -145,7 +145,7 @@ utf8_to_win(PG_FUNCTION_ARGS)
{
if (encoding == maps[i].encoding)
{
UtfToLocal(src, dest, maps[i].map2, maps[i].size2, encoding, len);
UtfToLocal(src, dest, maps[i].map2, NULL, maps[i].size2, 0, encoding, len);
PG_RETURN_VOID();
}
}
......
......@@ -2,7 +2,7 @@
* Encoding names and routines for work with it. All
* in this file is shared bedween FE and BE.
*
* $PostgreSQL: pgsql/src/backend/utils/mb/encnames.c,v 1.31 2006/07/14 14:52:25 momjian Exp $
* $PostgreSQL: pgsql/src/backend/utils/mb/encnames.c,v 1.32 2007/03/25 11:56:02 ishii Exp $
*/
#ifdef FRONTEND
#include "postgres_fe.h"
......@@ -44,6 +44,10 @@ pg_encname pg_encname_tbl[] =
"euccn", PG_EUC_CN
}, /* EUC-CN; Extended Unix Code for simplified
* Chinese */
{
"eucjis2004", PG_EUC_JIS_2004
}, /* EUC-JIS-2004; Extended UNIX Code fixed Width for
* Japanese, standard JIS X 0213 */
{
"eucjp", PG_EUC_JP
}, /* EUC-JP; Extended UNIX Code fixed Width for
......@@ -156,6 +160,11 @@ pg_encname pg_encname_tbl[] =
{
"shiftjis", PG_SJIS
}, /* Shift_JIS; JIS X 0202-1991 */
{
"shiftjis2004", PG_SHIFT_JIS_2004
}, /* SHIFT-JIS-2004; Shift JIS for
* Japanese, standard JIS X 0213 */
{
"sjis", PG_SJIS
}, /* alias for Shift_JIS */
......@@ -391,6 +400,9 @@ pg_enc2name pg_enc2name_tbl[] =
{
"WIN1257", PG_WIN1257
},
{
"EUC_JIS_2004", PG_EUC_JIS_2004
},
{
"SJIS", PG_SJIS
},
......@@ -405,6 +417,9 @@ pg_enc2name pg_enc2name_tbl[] =
},
{
"GB18030", PG_GB18030
},
{
"SHIFT_JIS_2004", PG_SHIFT_JIS_2004
}
};
......
/*
* conversion functions between pg_wchar and multibyte streams.
* Tatsuo Ishii
* $PostgreSQL: pgsql/src/backend/utils/mb/wchar.c,v 1.59 2007/01/24 17:12:17 tgl Exp $
* $PostgreSQL: pgsql/src/backend/utils/mb/wchar.c,v 1.60 2007/03/25 11:56:02 ishii Exp $
*
* WIN1250 client encoding updated by Pavel Behal
*
......@@ -1346,11 +1346,13 @@ pg_wchar_tbl pg_wchar_table[] = {
{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* 30; PG_WIN1254 */
{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* 31; PG_WIN1255 */
{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* 32; PG_WIN1257 */
{0, pg_sjis_mblen, pg_sjis_dsplen, pg_sjis_verifier, 2}, /* 33; PG_SJIS */
{0, pg_big5_mblen, pg_big5_dsplen, pg_big5_verifier, 2}, /* 34; PG_BIG5 */
{0, pg_gbk_mblen, pg_gbk_dsplen, pg_gbk_verifier, 2}, /* 35; PG_GBK */
{0, pg_uhc_mblen, pg_uhc_dsplen, pg_uhc_verifier, 2}, /* 36; PG_UHC */
{0, pg_gb18030_mblen, pg_gb18030_dsplen, pg_gb18030_verifier, 2} /* 37; PG_GB18030 */
{pg_eucjp2wchar_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifier, 3}, /* 33; PG_EUC_JIS_2004 */
{0, pg_sjis_mblen, pg_sjis_dsplen, pg_sjis_verifier, 2}, /* 34; PG_SJIS */
{0, pg_big5_mblen, pg_big5_dsplen, pg_big5_verifier, 2}, /* 35; PG_BIG5 */
{0, pg_gbk_mblen, pg_gbk_dsplen, pg_gbk_verifier, 2}, /* 36; PG_GBK */
{0, pg_uhc_mblen, pg_uhc_dsplen, pg_uhc_verifier, 2}, /* 37; PG_UHC */
{0, pg_gb18030_mblen, pg_gb18030_dsplen, pg_gb18030_verifier, 2}, /* 38; PG_GB18030 */
{0, pg_sjis_mblen, pg_sjis_dsplen, pg_sjis_verifier, 2} /* 39; PG_SHIFT_JIS_2004 */
};
/* returns the byte length of a word for mule internal code */
......
......@@ -37,7 +37,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.393 2007/03/20 05:45:00 neilc Exp $
* $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.394 2007/03/25 11:56:04 ishii Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -53,6 +53,6 @@
*/
/* yyyymmddN */
#define CATALOG_VERSION_NO 200703201
#define CATALOG_VERSION_NO 200703251
#endif
/* $PostgreSQL: pgsql/src/include/mb/pg_wchar.h,v 1.70 2006/12/24 00:57:48 tgl Exp $ */
/* $PostgreSQL: pgsql/src/include/mb/pg_wchar.h,v 1.71 2007/03/25 11:56:04 ishii Exp $ */
#ifndef PG_WCHAR_H
#define PG_WCHAR_H
......@@ -187,6 +187,7 @@ typedef enum pg_enc
PG_WIN1254, /* windows-1254 */
PG_WIN1255, /* windows-1255 */
PG_WIN1257, /* windows-1257 */
PG_EUC_JIS_2004, /* EUC-JIS-2004 */
/* PG_ENCODING_BE_LAST points to the above entry */
/* followings are for client encoding only */
......@@ -195,11 +196,12 @@ typedef enum pg_enc
PG_GBK, /* GBK (Windows-936) */
PG_UHC, /* UHC (Windows-949) */
PG_GB18030, /* GB18030 */
PG_SHIFT_JIS_2004, /* Shift-JIS-2004 */
_PG_LAST_ENCODING_ /* mark only */
} pg_enc;
#define PG_ENCODING_BE_LAST PG_WIN1257
#define PG_ENCODING_BE_LAST PG_EUC_JIS_2004
/*
* Please use these tests before access to pg_encconv_tbl[]
......@@ -274,23 +276,45 @@ typedef struct
extern pg_wchar_tbl pg_wchar_table[];
/*
* UTF8 to local code conversion map
* UTF-8 to local code conversion map
* Note that we limit the max length of UTF-8 to 4 bytes,
* which is UCS-4 00010000-001FFFFF range.
*/
typedef struct
{
unsigned int utf; /* UTF8 */
unsigned int code; /* local code */
uint32 utf; /* UTF-8 */
uint32 code; /* local code */
} pg_utf_to_local;
/*
* local code to UTF8 conversion map
* local code to UTF-8 conversion map
*/
typedef struct
{
unsigned int code; /* local code */
unsigned int utf; /* UTF8 */
uint32 code; /* local code */
uint32 utf; /* UTF-8 */
} pg_local_to_utf;
/*
* UTF-8 to local code conversion map(combined characters)
*/
typedef struct
{
uint32 utf1; /* UTF-8 code 1 */
uint32 utf2; /* UTF-8 code 2 */
uint32 code; /* local code */
} pg_utf_to_local_combined;
/*
* local code to UTF-8 conversion map(combined characters)
*/
typedef struct
{
uint32 code; /* local code */
uint32 utf1; /* UTF-8 code 1 */
uint32 utf2; /* UTF-8 code 2 */
} pg_local_to_utf_combined;
extern int pg_mb2wchar(const char *from, pg_wchar *to);
extern int pg_mb2wchar_with_len(const char *from, pg_wchar *to, int len);
extern int pg_encoding_mb2wchar_with_len(int encoding,
......@@ -338,10 +362,12 @@ extern unsigned short BIG5toCNS(unsigned short big5, unsigned char *lc);
extern unsigned short CNStoBIG5(unsigned short cns, unsigned char lc);
extern void LocalToUtf(const unsigned char *iso, unsigned char *utf,
const pg_local_to_utf *map, int size, int encoding, int len);
const pg_local_to_utf *map, const pg_local_to_utf_combined *cmap,
int size1, int size2, int encoding, int len);
extern void UtfToLocal(const unsigned char *utf, unsigned char *iso,
const pg_utf_to_local *map, int size, int encoding, int len);
const pg_utf_to_local *map, const pg_utf_to_local_combined *cmap,
int size1, int size2, int encoding, int len);
extern bool pg_verifymbstr(const char *mbstr, int len, bool noError);
extern bool pg_verify_mbstr(int encoding, const char *mbstr, int len,
......
......@@ -1642,6 +1642,84 @@ SELECT CONVERT('foo', 'WIN1257', 'UTF8');
foo
(1 row)
-- UTF8 --> EUC_JIS_2004
SELECT CONVERT('foo' USING utf8_to_euc_jis_2004);
convert_using
---------------
foo
(1 row)
SELECT CONVERT('foo', 'UTF8', 'EUC_JIS_2004');
convert
---------
foo
(1 row)
-- EUC_JIS_2004 --> UTF8
SELECT CONVERT('foo' USING euc_jis_2004_to_utf8);
convert_using
---------------
foo
(1 row)
SELECT CONVERT('foo', 'EUC_JIS_2004', 'UTF8');
convert
---------
foo
(1 row)
-- UTF8 --> SHIFT_JIS_2004
SELECT CONVERT('foo' USING utf8_to_euc_jis_2004);
convert_using
---------------
foo
(1 row)
SELECT CONVERT('foo', 'UTF8', 'SHIFT_JIS_2004');
convert
---------
foo
(1 row)
-- SHIFT_JIS_2004 --> UTF8
SELECT CONVERT('foo' USING shift_jis_2004_to_utf8);
convert_using
---------------
foo
(1 row)
SELECT CONVERT('foo', 'SHIFT_JIS_2004', 'UTF8');
convert
---------
foo
(1 row)
-- EUC_JIS_2004 --> SHIFT_JIS_2004
SELECT CONVERT('foo' USING euc_jis_2004_to_shift_jis_2004);
convert_using
---------------
foo
(1 row)
SELECT CONVERT('foo', 'EUC_JIS_2004', 'SHIFT_JIS_2004');
convert
---------
foo
(1 row)
-- SHIFT_JIS_2004 --> EUC_JIS_2004
SELECT CONVERT('foo' USING shift_jis_2004_to_euc_jis_2004);
convert_using
---------------
foo
(1 row)
SELECT CONVERT('foo', 'SHIFT_JIS_2004', 'EUC_JIS_2004');
convert
---------
foo
(1 row)
--
-- return to the super user
--
......
......@@ -399,6 +399,24 @@ SELECT CONVERT('foo', 'UTF8', 'WIN1257');
-- WIN1257 --> UTF8
SELECT CONVERT('foo' USING windows_1257_to_utf8);
SELECT CONVERT('foo', 'WIN1257', 'UTF8');
-- UTF8 --> EUC_JIS_2004
SELECT CONVERT('foo' USING utf8_to_euc_jis_2004);
SELECT CONVERT('foo', 'UTF8', 'EUC_JIS_2004');
-- EUC_JIS_2004 --> UTF8
SELECT CONVERT('foo' USING euc_jis_2004_to_utf8);
SELECT CONVERT('foo', 'EUC_JIS_2004', 'UTF8');
-- UTF8 --> SHIFT_JIS_2004
SELECT CONVERT('foo' USING utf8_to_euc_jis_2004);
SELECT CONVERT('foo', 'UTF8', 'SHIFT_JIS_2004');
-- SHIFT_JIS_2004 --> UTF8
SELECT CONVERT('foo' USING shift_jis_2004_to_utf8);
SELECT CONVERT('foo', 'SHIFT_JIS_2004', 'UTF8');
-- EUC_JIS_2004 --> SHIFT_JIS_2004
SELECT CONVERT('foo' USING euc_jis_2004_to_shift_jis_2004);
SELECT CONVERT('foo', 'EUC_JIS_2004', 'SHIFT_JIS_2004');
-- SHIFT_JIS_2004 --> EUC_JIS_2004
SELECT CONVERT('foo' USING shift_jis_2004_to_euc_jis_2004);
SELECT CONVERT('foo', 'SHIFT_JIS_2004', 'EUC_JIS_2004');
--
-- return to the super user
--
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment