Commit 1acf6f9c authored by Tatsuo Ishii's avatar Tatsuo Ishii

Add support for code conversion between Unicode and other encodings.

Supported encodings are: EUC_JP, EUC_CN, EUC_KR, EUC_TW, Shift JIS,
Big5, ISO8859-[1-5].
TODO: testings! and documentations...
parent 0b10d35e
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -4,7 +4,7 @@
# Makefile for utils/mb
#
# IDENTIFICATION
# $Header: /cvsroot/pgsql/src/backend/utils/mb/Makefile,v 1.13 2000/10/25 19:44:44 tgl Exp $
# $Header: /cvsroot/pgsql/src/backend/utils/mb/Makefile,v 1.14 2000/10/30 10:40:28 ishii Exp $
#
#-------------------------------------------------------------------------
......@@ -30,12 +30,6 @@ liketest: liketest.o palloc.o $(OBJS)
utftest: utftest.o palloc.o common.o wstrcmp.o wstrncmp.o big5.o
$(CC) -o $@ $^ $(CFLAGS) $(LDFLAGS)
uconv: uconv.o palloc.o common.o conv.o wchar.o big5.o mbutils.o
$(CC) -o $@ $^ $(CFLAGS) $(LDFLAGS)
uconv2: uconv2.o palloc.o common.o conv.o wchar.o big5.o mbutils.o
$(CC) -o $@ $^ $(CFLAGS) $(LDFLAGS)
depend dep:
$(CC) -MM $(CFLAGS) *.c >depend
......
This source diff could not be displayed because it is too large. You can view the blob instead.
#-------------------------------------------------------------------------
#
# Makefile for src/backend/utils/mb/Unicode
#
# Copyright 2001 by PostgreSQL Global Development Group
#
# $Header: /cvsroot/pgsql/src/backend/utils/mb/Unicode/Makefile,v 1.1 2000/10/30 10:40:29 ishii Exp $
#
#-------------------------------------------------------------------------
subdir = src/backend/utils/mb/Unicode
top_builddir = ../../../../..
include $(top_builddir)/src/Makefile.global
ISO8859MAPS=iso8859_2_to_utf8.map iso8859_3_to_utf8.map \
iso8859_4_to_utf8.map iso8859_5_to_utf8.map \
utf8_to_iso8859_2.map utf8_to_iso8859_3.map \
utf8_to_iso8859_4.map utf8_to_iso8859_5.map
MAPS= $(ISO8859MAPS) \
big5_to_utf8.map euc_cn_to_utf8.map euc_jp_to_utf8.map \
euc_kr_to_utf8.map euc_tw_to_utf8.map sjis_to_utf8.map \
utf8_to_big5.map utf8_to_euc_cn.map utf8_to_euc_jp.map \
utf8_to_euc_kr.map utf8_to_euc_tw.map utf8_to_iso8859_2.map \
utf8_to_sjis.map
ISO8859TEXTS= 8859-2.TXT 8859-3.TXT 8859-4.TXT 8859-5.TXT
TEXTS=$(ISO8859TEXTS) \
BIG5.TXT CNS11643.TXT GB2312.TXT \
JIS0201.TXT JIS0208.TXT JIS0212.TXT \
OLD5601.TXT SHIFTJIS.TXT
all: $(MAPS)
$(ISO8859MAPS) : $(ISO8859TEXTS)
./UCS_to_8859.pl
euc_jp_to_utf8.map utf8_to_euc_jp.map : JIS0201.TXT JIS0208.TXT JIS0212.TXT
./UCS_to_EUC_JP.pl
euc_cn_to_utf8.map utf8_to_euc_cn.map : GB2312.TXT
./UCS_to_EUC_CN.pl
euc_kr_to_utf8.map utf8_to_euc_kr.map : OLD5601.TXT
./UCS_to_EUC_KR.pl
euc_tw_to_utf8.map utf8_to_euc_tw.map : CNS11643.TXT
./UCS_to_EUC_TW.pl
sjis_to_utf8.map utf8_to_sjis.map : SHIFTJIS.TXT
./UCS_to_SJIS.pl
big5_to_utf8.map utf8_to_big5.map : BIG5.TXT
./UCS_to_BIG5.pl
clean:
rm -f $(MAPS)
distclean: clean
rm -f $(TEXTS)
#! /usr/bin/perl
#
# Copyright 2001 by PostgreSQL Global Development Group
#
# $Id: UCS_to_8859.pl,v 1.1 2000/10/30 10:40:29 ishii Exp $
#
# Generate UTF-8 <--> ISO8859 code conversion tables from
# map files provided by Unicode organization.
# Unfortunately it is prohibited by the organization
# to distribute the map files. So if you try to use this script,
# you have to obtain "8859-[2-5].TXT" from the organization's ftp site.
# We assume the file include three tab-separated columns:
# ISO/IEC 8859 code in hex
# UCS-2 code in hex
# # and Unicode name (not used in this script)
require "ucs2utf.pl";
@charsets = (2,3,4,5);
foreach $charset (@charsets) {
#
# first, generate UTF8->ISO8859 table
#
$in_file = "8859-${charset}.TXT";
open( FILE, $in_file ) || die( "cannot open $in_file" );
reset 'array';
while( <FILE> ){
chop;
if( /^#/ ){
next;
}
( $c, $u, $rest ) = split;
$ucs = hex($u);
$code = hex($c);
if( $code >= 0x80 && $ucs >= 0x100 ){
$utf = &ucs2utf($ucs);
if( $array{ $utf } ne "" ){
printf STDERR "Warning: duplicate unicode: %04x\n",$ucs;
next;
}
$count++;
$array{ $utf } = $code;
}
}
close( FILE );
$file = "utf8_to_iso8859_${charset}.map";
open( FILE, "> $file" ) || die( "cannot open $file" );
print FILE "static pg_utf_to_local ULmapISO8859_${charset}[ $count ] = {\n";
for $index ( sort {$a <=> $b} keys( %array ) ){
$code = $array{ $index };
$count--;
if( $count == 0 ){
printf FILE " {0x%04x, 0x%04x}\n", $index, $code;
} else {
printf FILE " {0x%04x, 0x%04x},\n", $index, $code;
}
}
print FILE "};\n";
close(FILE);
#
# then generate ISO885->UTF8 table
#
open( FILE, $in_file ) || die( "cannot open $in_file" );
reset 'array';
while( <FILE> ){
chop;
if( /^#/ ){
next;
}
( $c, $u, $rest ) = split;
$ucs = hex($u);
$code = hex($c);
if( $code >= 0x80 && $ucs >= 0x100 ){
$utf = &ucs2utf($ucs);
if( $array{ $utf } ne "" ){
printf STDERR "Warning: duplicate unicode: %04x\n",$ucs;
next;
}
$count++;
$array{ $code } = $utf;
}
}
close( FILE );
$file = "iso8859_${charset}_to_utf8.map";
open( FILE, "> $file" ) || die( "cannot open $file" );
print FILE "static pg_local_to_utf LUmapISO8859_${charset}[ $count ] = {\n";
for $index ( sort {$a <=> $b} keys( %array ) ){
$utf = $array{ $index };
$count--;
if( $count == 0 ){
printf FILE " {0x%04x, 0x%04x}\n", $index, $utf;
} else {
printf FILE " {0x%04x, 0x%04x},\n", $index, $utf;
}
}
print FILE "};\n";
close(FILE);
}
#! /usr/bin/perl
#
# Copyright 2001 by PostgreSQL Global Development Group
#
# $Id: UCS_to_BIG5.pl,v 1.1 2000/10/30 10:40:29 ishii Exp $
#
# Generate UTF-8 <--> BIG5 code conversion tables from
# map files provided by Unicode organization.
# Unfortunately it is prohibited by the organization
# to distribute the map files. So if you try to use this script,
# you have to obtain OLD5601.TXT from
# the organization's ftp site.
#
# OLD5601.TXT format:
# KSC5601 code in hex
# UCS-2 code in hex
# # and Unicode name (not used in this script)
require "ucs2utf.pl";
# first generate UTF-8 --> BIG5 table
$in_file = "BIG5.TXT";
open( FILE, $in_file ) || die( "cannot open $in_file" );
while( <FILE> ){
chop;
if( /^#/ ){
next;
}
( $c, $u, $rest ) = split;
$ucs = hex($u);
$code = hex($c);
if( $code >= 0x80 && $ucs >= 0x100 ){
$utf = &ucs2utf($ucs);
if( $array{ $utf } ne "" ){
printf STDERR "Warning: duplicate unicode: %04x\n",$ucs;
next;
}
$count++;
$array{ $utf } = $code;
}
}
close( FILE );
#
# first, generate UTF8 --> BIG5 table
#
$file = "utf8_to_big5.map";
open( FILE, "> $file" ) || die( "cannot open $file" );
print FILE "static pg_utf_to_local ULmapBIG5[ $count ] = {\n";
for $index ( sort {$a <=> $b} keys( %array ) ){
$code = $array{ $index };
$count--;
if( $count == 0 ){
printf FILE " {0x%04x, 0x%04x}\n", $index, $code;
} else {
printf FILE " {0x%04x, 0x%04x},\n", $index, $code;
}
}
print FILE "};\n";
close(FILE);
#
# then generate EUC_JP --> UTF8 table
#
reset 'array';
open( FILE, $in_file ) || die( "cannot open $in_file" );
while( <FILE> ){
chop;
if( /^#/ ){
next;
}
( $c, $u, $rest ) = split;
$ucs = hex($u);
$code = hex($c);
if( $code >= 0x80 && $ucs >= 0x100 ){
$utf = &ucs2utf($ucs);
if( $array{ $code } ne "" ){
printf STDERR "Warning: duplicate code: %04x\n",$ucs;
next;
}
$count++;
$array{ $code } = $utf;
}
}
close( FILE );
$file = "big5_to_utf8.map";
open( FILE, "> $file" ) || die( "cannot open $file" );
print FILE "static pg_local_to_utf LUmapBIG5[ $count ] = {\n";
for $index ( sort {$a <=> $b} keys( %array ) ){
$utf = $array{ $index };
$count--;
if( $count == 0 ){
printf FILE " {0x%04x, 0x%04x}\n", $index, $utf;
} else {
printf FILE " {0x%04x, 0x%04x},\n", $index, $utf;
}
}
print FILE "};\n";
close(FILE);
#! /usr/bin/perl
#
# Copyright 2001 by PostgreSQL Global Development Group
#
# $Id: UCS_to_EUC_CN.pl,v 1.1 2000/10/30 10:40:29 ishii Exp $
#
# Generate UTF-8 <--> EUC_CN code conversion tables from
# map files provided by Unicode organization.
# Unfortunately it is prohibited by the organization
# to distribute the map files. So if you try to use this script,
# you have to obtain GB2312.TXT from
# the organization's ftp site.
#
# GB2312.TXT format:
# GB2312 code in hex
# UCS-2 code in hex
# # and Unicode name (not used in this script)
require "ucs2utf.pl";
# first generate UTF-8 --> EUC_CN table
$in_file = "GB2312.TXT";
open( FILE, $in_file ) || die( "cannot open $in_file" );
while( <FILE> ){
chop;
if( /^#/ ){
next;
}
( $c, $u, $rest ) = split;
$ucs = hex($u);
$code = hex($c);
if( $code >= 0x80 && $ucs >= 0x100 ){
$utf = &ucs2utf($ucs);
if( $array{ $utf } ne "" ){
printf STDERR "Warning: duplicate unicode: %04x\n",$ucs;
next;
}
$count++;
$array{ $utf } = ($code | 0x8080);
}
}
close( FILE );
#
# first, generate UTF8 --> EUC_CN table
#
$file = "utf8_to_euc_cn.map";
open( FILE, "> $file" ) || die( "cannot open $file" );
print FILE "static pg_utf_to_local ULmapEUC_CN[ $count ] = {\n";
for $index ( sort {$a <=> $b} keys( %array ) ){
$code = $array{ $index };
$count--;
if( $count == 0 ){
printf FILE " {0x%04x, 0x%04x}\n", $index, $code;
} else {
printf FILE " {0x%04x, 0x%04x},\n", $index, $code;
}
}
print FILE "};\n";
close(FILE);
#
# then generate EUC_JP --> UTF8 table
#
reset 'array';
open( FILE, $in_file ) || die( "cannot open $in_file" );
while( <FILE> ){
chop;
if( /^#/ ){
next;
}
( $c, $u, $rest ) = split;
$ucs = hex($u);
$code = hex($c);
if( $code >= 0x80 && $ucs >= 0x100 ){
$utf = &ucs2utf($ucs);
if( $array{ $code } ne "" ){
printf STDERR "Warning: duplicate code: %04x\n",$ucs;
next;
}
$count++;
$code |= 0x8080;
$array{ $code } = $utf;
}
}
close( FILE );
$file = "euc_cn_to_utf8.map";
open( FILE, "> $file" ) || die( "cannot open $file" );
print FILE "static pg_local_to_utf LUmapEUC_CN[ $count ] = {\n";
for $index ( sort {$a <=> $b} keys( %array ) ){
$utf = $array{ $index };
$count--;
if( $count == 0 ){
printf FILE " {0x%04x, 0x%04x}\n", $index, $utf;
} else {
printf FILE " {0x%04x, 0x%04x},\n", $index, $utf;
}
}
print FILE "};\n";
close(FILE);
#! /usr/bin/perl
#
# Copyright 2001 by PostgreSQL Global Development Group
#
# $Id: UCS_to_EUC_JP.pl,v 1.1 2000/10/30 10:40:29 ishii Exp $
#
# Generate UTF-8 <--> EUC_JP code conversion tables from
# map files provided by Unicode organization.
# Unfortunately it is prohibited by the organization
# to distribute the map files. So if you try to use this script,
# you have to obtain JIS0201.TXT, JIS0208.TXT, JIS0212.TXT from
# the organization's ftp site.
#
# JIS0201.TXT format:
# JIS0201 code in hex
# UCS-2 code in hex
# # and Unicode name (not used in this script)
#
# JIS0208.TXT format:
# JIS0208 shift-JIS code in hex
# JIS0208 code in hex
# UCS-2 code in hex
# # and Unicode name (not used in this script)
#
# JIS0212.TXT format:
# JIS0212 code in hex
# UCS-2 code in hex
# # and Unicode name (not used in this script)
require "ucs2utf.pl";
# first generate UTF-8 --> EUC_JP table
#
# JIS0201
#
$in_file = "JIS0201.TXT";
open( FILE, $in_file ) || die( "cannot open $in_file" );
reset 'array';
while( <FILE> ){
chop;
if( /^#/ ){
next;
}
( $c, $u, $rest ) = split;
$ucs = hex($u);
$code = hex($c);
if( $code >= 0x80 && $ucs >= 0x100 ){
$utf = &ucs2utf($ucs);
if( $array{ $utf } ne "" ){
printf STDERR "Warning: duplicate unicode: %04x\n",$ucs;
next;
}
$count++;
# add single shift 2
$array{ $utf } = ($code | 0x8e00);
}
}
close( FILE );
#
# JIS0208
#
$in_file = "JIS0208.TXT";
open( FILE, $in_file ) || die( "cannot open $in_file" );
while( <FILE> ){
chop;
if( /^#/ ){
next;
}
( $s, $c, $u, $rest ) = split;
$ucs = hex($u);
$code = hex($c);
if( $code >= 0x80 && $ucs >= 0x100 ){
$utf = &ucs2utf($ucs);
if( $array{ $utf } ne "" ){
printf STDERR "Warning: duplicate unicode: %04x\n",$ucs;
next;
}
$count++;
$array{ $utf } = ($code | 0x8080);
}
}
close( FILE );
#
# JIS0212
#
$in_file = "JIS0212.TXT";
open( FILE, $in_file ) || die( "cannot open $in_file" );
while( <FILE> ){
chop;
if( /^#/ ){
next;
}
( $c, $u, $rest ) = split;
$ucs = hex($u);
$code = hex($c);
if( $code >= 0x80 && $ucs >= 0x100 ){
$utf = &ucs2utf($ucs);
if( $array{ $utf } ne "" ){
printf STDERR "Warning: duplicate unicode: %04x\n",$ucs;
next;
}
$count++;
$array{ $utf } = ($code | 0x8f8080);
}
}
close( FILE );
#
# first, generate UTF8 --> EUC_JP table
#
$file = "utf8_to_euc_jp.map";
open( FILE, "> $file" ) || die( "cannot open $file" );
print FILE "static pg_utf_to_local ULmapEUC_JP[ $count ] = {\n";
for $index ( sort {$a <=> $b} keys( %array ) ){
$code = $array{ $index };
$count--;
if( $count == 0 ){
printf FILE " {0x%04x, 0x%04x}\n", $index, $code;
} else {
printf FILE " {0x%04x, 0x%04x},\n", $index, $code;
}
}
print FILE "};\n";
close(FILE);
#
# then generate EUC_JP --> UTF8 table
#
#
# JIS0201
#
$in_file = "JIS0201.TXT";
open( FILE, $in_file ) || die( "cannot open $in_file" );
reset 'array';
while( <FILE> ){
chop;
if( /^#/ ){
next;
}
( $c, $u, $rest ) = split;
$ucs = hex($u);
$code = hex($c);
if( $code >= 0x80 && $ucs >= 0x100 ){
$utf = &ucs2utf($ucs);
if( $array{ $code } ne "" ){
printf STDERR "Warning: duplicate code: %04x\n",$ucs;
next;
}
$count++;
# add single shift 2
$code |= 0x8e00;
$array{ $code } = $utf;
}
}
close( FILE );
#
# JIS0208
#
$in_file = "JIS0208.TXT";
open( FILE, $in_file ) || die( "cannot open $in_file" );
while( <FILE> ){
chop;
if( /^#/ ){
next;
}
( $s, $c, $u, $rest ) = split;
$ucs = hex($u);
$code = hex($c);
if( $code >= 0x80 && $ucs >= 0x100 ){
$utf = &ucs2utf($ucs);
if( $array{ $code } ne "" ){
printf STDERR "Warning: duplicate code: %04x\n",$ucs;
next;
}
$count++;
$code |= 0x8080;
$array{ $code } = $utf;
}
}
close( FILE );
#
# JIS0212
#
$in_file = "JIS0212.TXT";
open( FILE, $in_file ) || die( "cannot open $in_file" );
while( <FILE> ){
chop;
if( /^#/ ){
next;
}
( $c, $u, $rest ) = split;
$ucs = hex($u);
$code = hex($c);
if( $code >= 0x80 && $ucs >= 0x100 ){
$utf = &ucs2utf($ucs);
if( $array{ $code } ne "" ){
printf STDERR "Warning: duplicate code: %04x\n",$ucs;
next;
}
$count++;
$code |= 0x8f8080;
$array{ $code } = $utf;
}
}
close( FILE );
$file = "euc_jp_to_utf8.map";
open( FILE, "> $file" ) || die( "cannot open $file" );
print FILE "static pg_local_to_utf LUmapEUC_JP[ $count ] = {\n";
for $index ( sort {$a <=> $b} keys( %array ) ){
$utf = $array{ $index };
$count--;
if( $count == 0 ){
printf FILE " {0x%04x, 0x%04x}\n", $index, $utf;
} else {
printf FILE " {0x%04x, 0x%04x},\n", $index, $utf;
}
}
print FILE "};\n";
close(FILE);
#! /usr/bin/perl
#
# Copyright 2001 by PostgreSQL Global Development Group
#
# $Id: UCS_to_EUC_KR.pl,v 1.1 2000/10/30 10:40:29 ishii Exp $
#
# Generate UTF-8 <--> EUC_KR code conversion tables from
# map files provided by Unicode organization.
# Unfortunately it is prohibited by the organization
# to distribute the map files. So if you try to use this script,
# you have to obtain OLD5601.TXT from
# the organization's ftp site.
#
# OLD5601.TXT format:
# KSC5601 code in hex
# UCS-2 code in hex
# # and Unicode name (not used in this script)
require "ucs2utf.pl";
# first generate UTF-8 --> EUC_KR table
$in_file = "OLD5601.TXT";
open( FILE, $in_file ) || die( "cannot open $in_file" );
while( <FILE> ){
chop;
if( /^#/ ){
next;
}
( $c, $u, $rest ) = split;
$ucs = hex($u);
$code = hex($c);
if( $code >= 0x80 && $ucs >= 0x100 ){
$utf = &ucs2utf($ucs);
if( $array{ $utf } ne "" ){
printf STDERR "Warning: duplicate unicode: %04x\n",$ucs;
next;
}
$count++;
$array{ $utf } = ($code | 0x8080);
}
}
close( FILE );
#
# first, generate UTF8 --> EUC_KR table
#
$file = "utf8_to_euc_kr.map";
open( FILE, "> $file" ) || die( "cannot open $file" );
print FILE "static pg_utf_to_local ULmapEUC_KR[ $count ] = {\n";
for $index ( sort {$a <=> $b} keys( %array ) ){
$code = $array{ $index };
$count--;
if( $count == 0 ){
printf FILE " {0x%04x, 0x%04x}\n", $index, $code;
} else {
printf FILE " {0x%04x, 0x%04x},\n", $index, $code;
}
}
print FILE "};\n";
close(FILE);
#
# then generate EUC_JP --> UTF8 table
#
reset 'array';
open( FILE, $in_file ) || die( "cannot open $in_file" );
while( <FILE> ){
chop;
if( /^#/ ){
next;
}
( $c, $u, $rest ) = split;
$ucs = hex($u);
$code = hex($c);
if( $code >= 0x80 && $ucs >= 0x100 ){
$utf = &ucs2utf($ucs);
if( $array{ $code } ne "" ){
printf STDERR "Warning: duplicate code: %04x\n",$ucs;
next;
}
$count++;
$code |= 0x8080;
$array{ $code } = $utf;
}
}
close( FILE );
$file = "euc_kr_to_utf8.map";
open( FILE, "> $file" ) || die( "cannot open $file" );
print FILE "static pg_local_to_utf LUmapEUC_KR[ $count ] = {\n";
for $index ( sort {$a <=> $b} keys( %array ) ){
$utf = $array{ $index };
$count--;
if( $count == 0 ){
printf FILE " {0x%04x, 0x%04x}\n", $index, $utf;
} else {
printf FILE " {0x%04x, 0x%04x},\n", $index, $utf;
}
}
print FILE "};\n";
close(FILE);
#! /usr/bin/perl
#
# Copyright 2001 by PostgreSQL Global Development Group
#
# $Id: UCS_to_EUC_TW.pl,v 1.1 2000/10/30 10:40:29 ishii Exp $
#
# Generate UTF-8 <--> EUC_TW code conversion tables from
# map files provided by Unicode organization.
# Unfortunately it is prohibited by the organization
# to distribute the map files. So if you try to use this script,
# you have to obtain CNS11643.TXT from
# the organization's ftp site.
#
# CNS11643.TXT format:
# CNS11643 code in hex (3 bytes)
# (I guess the first byte means the plane No.)
# UCS-2 code in hex
# # and Unicode name (not used in this script)
require "ucs2utf.pl";
# first generate UTF-8 --> EUC_TW table
$in_file = "CNS11643.TXT";
open( FILE, $in_file ) || die( "cannot open $in_file" );
while( <FILE> ){
chop;
if( /^#/ ){
next;
}
( $c, $u, $rest ) = split;
$ucs = hex($u);
$code = hex($c);
if( $code >= 0x80 && $ucs >= 0x100 ){
$utf = &ucs2utf($ucs);
if( $array{ $utf } ne "" ){
printf STDERR "Warning: duplicate unicode: %04x\n",$ucs;
next;
}
$count++;
$plane = ($code & 0x1f0000) >> 16;
if ($plane > 16) {
printf STDERR "Warning: invalid plane No.$plane. ignored\n";
next;
}
if ($plane == 1) {
$array{ $utf } = (($code & 0xffff) | 0x8080);
} else {
$array{ $utf } = (0x8ea00000 + ($plane << 16)) | (($code & 0xffff) | 0x8080);
}
}
}
close( FILE );
#
# first, generate UTF8 --> EUC_TW table
#
$file = "utf8_to_euc_tw.map";
open( FILE, "> $file" ) || die( "cannot open $file" );
print FILE "static pg_utf_to_local ULmapEUC_TW[ $count ] = {\n";
for $index ( sort {$a <=> $b} keys( %array ) ){
$code = $array{ $index };
$count--;
if( $count == 0 ){
printf FILE " {0x%04x, 0x%04x}\n", $index, $code;
} else {
printf FILE " {0x%04x, 0x%04x},\n", $index, $code;
}
}
print FILE "};\n";
close(FILE);
#
# then generate EUC_JP --> UTF8 table
#
reset 'array';
open( FILE, $in_file ) || die( "cannot open $in_file" );
while( <FILE> ){
chop;
if( /^#/ ){
next;
}
( $c, $u, $rest ) = split;
$ucs = hex($u);
$code = hex($c);
if( $code >= 0x80 && $ucs >= 0x100 ){
$utf = &ucs2utf($ucs);
if( $array{ $code } ne "" ){
printf STDERR "Warning: duplicate code: %04x\n",$ucs;
next;
}
$count++;
$plane = ($code & 0x1f0000) >> 16;
if ($plane > 16) {
printf STDERR "Warning: invalid plane No.$plane. ignored\n";
next;
}
if ($plane == 1) {
$c = (($code & 0xffff) | 0x8080);
$array{ $c } = $utf;
$count++;
}
$c = (0x8ea00000 + ($plane << 16)) | (($code & 0xffff) | 0x8080);
$array{ $c } = $utf;
}
}
close( FILE );
$file = "euc_tw_to_utf8.map";
open( FILE, "> $file" ) || die( "cannot open $file" );
print FILE "static pg_local_to_utf LUmapEUC_TW[ $count ] = {\n";
for $index ( sort {$a <=> $b} keys( %array ) ){
$utf = $array{ $index };
$count--;
if( $count == 0 ){
printf FILE " {0x%04x, 0x%04x}\n", $index, $utf;
} else {
printf FILE " {0x%04x, 0x%04x},\n", $index, $utf;
}
}
print FILE "};\n";
close(FILE);
#! /usr/bin/perl
#
# Copyright 2001 by PostgreSQL Global Development Group
#
# $Id: UCS_to_SJIS.pl,v 1.1 2000/10/30 10:40:29 ishii Exp $
#
# Generate UTF-8 <--> SJIS code conversion tables from
# map files provided by Unicode organization.
# Unfortunately it is prohibited by the organization
# to distribute the map files. So if you try to use this script,
# you have to obtain SHIFTJIS.TXT from
# the organization's ftp site.
#
# SHIFTJIS.TXT format:
# SHIFTJIS code in hex
# UCS-2 code in hex
# # and Unicode name (not used in this script)
# Warning: SHIFTJIS.TXT contains only JIS0201 and JIS0208. no JIS0212.
require "ucs2utf.pl";
# first generate UTF-8 --> SJIS table
$in_file = "SHIFTJIS.TXT";
open( FILE, $in_file ) || die( "cannot open $in_file" );
while( <FILE> ){
chop;
if( /^#/ ){
next;
}
( $c, $u, $rest ) = split;
$ucs = hex($u);
$code = hex($c);
if( $code >= 0x80 && $ucs >= 0x100 ){
$utf = &ucs2utf($ucs);
if( $array{ $utf } ne "" ){
printf STDERR "Warning: duplicate unicode: %04x\n",$ucs;
next;
}
$count++;
$array{ $utf } = $code;
}
}
close( FILE );
#
# first, generate UTF8 --> SJIS table
#
$file = "utf8_to_sjis.map";
open( FILE, "> $file" ) || die( "cannot open $file" );
print FILE "static pg_utf_to_local ULmapSJIS[ $count ] = {\n";
for $index ( sort {$a <=> $b} keys( %array ) ){
$code = $array{ $index };
$count--;
if( $count == 0 ){
printf FILE " {0x%04x, 0x%04x}\n", $index, $code;
} else {
printf FILE " {0x%04x, 0x%04x},\n", $index, $code;
}
}
print FILE "};\n";
close(FILE);
#
# then generate EUC_JP --> UTF8 table
#
open( FILE, $in_file ) || die( "cannot open $in_file" );
reset 'array';
while( <FILE> ){
chop;
if( /^#/ ){
next;
}
( $c, $u, $rest ) = split;
$ucs = hex($u);
$code = hex($c);
if( $code >= 0x80 && $ucs >= 0x100 ){
$utf = &ucs2utf($ucs);
if( $array{ $code } ne "" ){
printf STDERR "Warning: duplicate code: %04x\n",$ucs;
next;
}
$count++;
$array{ $code } = $utf;
}
}
close( FILE );
$file = "sjis_to_utf8.map";
open( FILE, "> $file" ) || die( "cannot open $file" );
print FILE "static pg_local_to_utf LUmapSJIS[ $count ] = {\n";
for $index ( sort {$a <=> $b} keys( %array ) ){
$utf = $array{ $index };
$count--;
if( $count == 0 ){
printf FILE " {0x%04x, 0x%04x}\n", $index, $utf;
} else {
printf FILE " {0x%04x, 0x%04x},\n", $index, $utf;
}
}
print FILE "};\n";
close(FILE);
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
static pg_local_to_utf LUmapISO8859_2[ 57 ] = {
{0x00a1, 0xc484},
{0x00a2, 0xcb98},
{0x00a3, 0xc581},
{0x00a5, 0xc4bd},
{0x00a6, 0xc59a},
{0x00a9, 0xc5a0},
{0x00aa, 0xc59e},
{0x00ab, 0xc5a4},
{0x00ac, 0xc5b9},
{0x00ae, 0xc5bd},
{0x00af, 0xc5bb},
{0x00b1, 0xc485},
{0x00b2, 0xcb9b},
{0x00b3, 0xc582},
{0x00b5, 0xc4be},
{0x00b6, 0xc59b},
{0x00b7, 0xcb87},
{0x00b9, 0xc5a1},
{0x00ba, 0xc59f},
{0x00bb, 0xc5a5},
{0x00bc, 0xc5ba},
{0x00bd, 0xcb9d},
{0x00be, 0xc5be},
{0x00bf, 0xc5bc},
{0x00c0, 0xc594},
{0x00c3, 0xc482},
{0x00c5, 0xc4b9},
{0x00c6, 0xc486},
{0x00c8, 0xc48c},
{0x00ca, 0xc498},
{0x00cc, 0xc49a},
{0x00cf, 0xc48e},
{0x00d0, 0xc490},
{0x00d1, 0xc583},
{0x00d2, 0xc587},
{0x00d5, 0xc590},
{0x00d8, 0xc598},
{0x00d9, 0xc5ae},
{0x00db, 0xc5b0},
{0x00de, 0xc5a2},
{0x00e0, 0xc595},
{0x00e3, 0xc483},
{0x00e5, 0xc4ba},
{0x00e6, 0xc487},
{0x00e8, 0xc48d},
{0x00ea, 0xc499},
{0x00ec, 0xc49b},
{0x00ef, 0xc48f},
{0x00f0, 0xc491},
{0x00f1, 0xc584},
{0x00f2, 0xc588},
{0x00f5, 0xc591},
{0x00f8, 0xc599},
{0x00f9, 0xc5af},
{0x00fb, 0xc5b1},
{0x00fe, 0xc5a3},
{0x00ff, 0xcb99}
};
static pg_local_to_utf LUmapISO8859_3[ 28 ] = {
{0x00a1, 0xc4a6},
{0x00a2, 0xcb98},
{0x00a6, 0xc4a4},
{0x00a9, 0xc4b0},
{0x00aa, 0xc59e},
{0x00ab, 0xc49e},
{0x00ac, 0xc4b4},
{0x00af, 0xc5bb},
{0x00b1, 0xc4a7},
{0x00b6, 0xc4a5},
{0x00b9, 0xc4b1},
{0x00ba, 0xc59f},
{0x00bb, 0xc49f},
{0x00bc, 0xc4b5},
{0x00bf, 0xc5bc},
{0x00c5, 0xc48a},
{0x00c6, 0xc488},
{0x00d5, 0xc4a0},
{0x00d8, 0xc49c},
{0x00dd, 0xc5ac},
{0x00de, 0xc59c},
{0x00e5, 0xc48b},
{0x00e6, 0xc489},
{0x00f5, 0xc4a1},
{0x00f8, 0xc49d},
{0x00fd, 0xc5ad},
{0x00fe, 0xc59d},
{0x00ff, 0xcb99}
};
static pg_local_to_utf LUmapISO8859_4[ 50 ] = {
{0x00a1, 0xc484},
{0x00a2, 0xc4b8},
{0x00a3, 0xc596},
{0x00a5, 0xc4a8},
{0x00a6, 0xc4bb},
{0x00a9, 0xc5a0},
{0x00aa, 0xc492},
{0x00ab, 0xc4a2},
{0x00ac, 0xc5a6},
{0x00ae, 0xc5bd},
{0x00b1, 0xc485},
{0x00b2, 0xcb9b},
{0x00b3, 0xc597},
{0x00b5, 0xc4a9},
{0x00b6, 0xc4bc},
{0x00b7, 0xcb87},
{0x00b9, 0xc5a1},
{0x00ba, 0xc493},
{0x00bb, 0xc4a3},
{0x00bc, 0xc5a7},
{0x00bd, 0xc58a},
{0x00be, 0xc5be},
{0x00bf, 0xc58b},
{0x00c0, 0xc480},
{0x00c7, 0xc4ae},
{0x00c8, 0xc48c},
{0x00ca, 0xc498},
{0x00cc, 0xc496},
{0x00cf, 0xc4aa},
{0x00d0, 0xc490},
{0x00d1, 0xc585},
{0x00d2, 0xc58c},
{0x00d3, 0xc4b6},
{0x00d9, 0xc5b2},
{0x00dd, 0xc5a8},
{0x00de, 0xc5aa},
{0x00e0, 0xc481},
{0x00e7, 0xc4af},
{0x00e8, 0xc48d},
{0x00ea, 0xc499},
{0x00ec, 0xc497},
{0x00ef, 0xc4ab},
{0x00f0, 0xc491},
{0x00f1, 0xc586},
{0x00f2, 0xc58d},
{0x00f3, 0xc4b7},
{0x00f9, 0xc5b3},
{0x00fd, 0xc5a9},
{0x00fe, 0xc5ab},
{0x00ff, 0xcb99}
};
static pg_local_to_utf LUmapISO8859_5[ 93 ] = {
{0x00a1, 0xd081},
{0x00a2, 0xd082},
{0x00a3, 0xd083},
{0x00a4, 0xd084},
{0x00a5, 0xd085},
{0x00a6, 0xd086},
{0x00a7, 0xd087},
{0x00a8, 0xd088},
{0x00a9, 0xd089},
{0x00aa, 0xd08a},
{0x00ab, 0xd08b},
{0x00ac, 0xd08c},
{0x00ae, 0xd08e},
{0x00af, 0xd08f},
{0x00b0, 0xd090},
{0x00b1, 0xd091},
{0x00b2, 0xd092},
{0x00b3, 0xd093},
{0x00b4, 0xd094},
{0x00b5, 0xd095},
{0x00b6, 0xd096},
{0x00b7, 0xd097},
{0x00b8, 0xd098},
{0x00b9, 0xd099},
{0x00ba, 0xd09a},
{0x00bb, 0xd09b},
{0x00bc, 0xd09c},
{0x00bd, 0xd09d},
{0x00be, 0xd09e},
{0x00bf, 0xd09f},
{0x00c0, 0xd0a0},
{0x00c1, 0xd0a1},
{0x00c2, 0xd0a2},
{0x00c3, 0xd0a3},
{0x00c4, 0xd0a4},
{0x00c5, 0xd0a5},
{0x00c6, 0xd0a6},
{0x00c7, 0xd0a7},
{0x00c8, 0xd0a8},
{0x00c9, 0xd0a9},
{0x00ca, 0xd0aa},
{0x00cb, 0xd0ab},
{0x00cc, 0xd0ac},
{0x00cd, 0xd0ad},
{0x00ce, 0xd0ae},
{0x00cf, 0xd0af},
{0x00d0, 0xd0b0},
{0x00d1, 0xd0b1},
{0x00d2, 0xd0b2},
{0x00d3, 0xd0b3},
{0x00d4, 0xd0b4},
{0x00d5, 0xd0b5},
{0x00d6, 0xd0b6},
{0x00d7, 0xd0b7},
{0x00d8, 0xd0b8},
{0x00d9, 0xd0b9},
{0x00da, 0xd0ba},
{0x00db, 0xd0bb},
{0x00dc, 0xd0bc},
{0x00dd, 0xd0bd},
{0x00de, 0xd0be},
{0x00df, 0xd0bf},
{0x00e0, 0xd180},
{0x00e1, 0xd181},
{0x00e2, 0xd182},
{0x00e3, 0xd183},
{0x00e4, 0xd184},
{0x00e5, 0xd185},
{0x00e6, 0xd186},
{0x00e7, 0xd187},
{0x00e8, 0xd188},
{0x00e9, 0xd189},
{0x00ea, 0xd18a},
{0x00eb, 0xd18b},
{0x00ec, 0xd18c},
{0x00ed, 0xd18d},
{0x00ee, 0xd18e},
{0x00ef, 0xd18f},
{0x00f0, 0xe28496},
{0x00f1, 0xd191},
{0x00f2, 0xd192},
{0x00f3, 0xd193},
{0x00f4, 0xd194},
{0x00f5, 0xd195},
{0x00f6, 0xd196},
{0x00f7, 0xd197},
{0x00f8, 0xd198},
{0x00f9, 0xd199},
{0x00fa, 0xd19a},
{0x00fb, 0xd19b},
{0x00fc, 0xd19c},
{0x00fe, 0xd19e},
{0x00ff, 0xd19f}
};
This source diff could not be displayed because it is too large. You can view the blob instead.
#
# $Id: ucs2utf.pl,v 1.1 2000/10/30 10:40:30 ishii Exp $
# convert UCS-2 to UTF-8
#
sub ucs2utf {
local($ucs) = @_;
local $utf;
if ($ucs <= 0x007f) {
$utf = $ucs;
} elsif ($ucs > 0x007f && $ucs <= 0x07ff) {
$utf = (($ucs & 0x003f) | 0x80) | ((($ucs >> 6) | 0xc0) << 8);
} else {
$utf = ((($ucs >> 12) | 0xe0) << 16) |
(((($ucs & 0x0fc0) >> 6) | 0x80) << 8) |
(($ucs & 0x003f) | 0x80);
}
return($utf);
}
1;
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
static pg_utf_to_local ULmapISO8859_2[ 57 ] = {
{0xc482, 0x00c3},
{0xc483, 0x00e3},
{0xc484, 0x00a1},
{0xc485, 0x00b1},
{0xc486, 0x00c6},
{0xc487, 0x00e6},
{0xc48c, 0x00c8},
{0xc48d, 0x00e8},
{0xc48e, 0x00cf},
{0xc48f, 0x00ef},
{0xc490, 0x00d0},
{0xc491, 0x00f0},
{0xc498, 0x00ca},
{0xc499, 0x00ea},
{0xc49a, 0x00cc},
{0xc49b, 0x00ec},
{0xc4b9, 0x00c5},
{0xc4ba, 0x00e5},
{0xc4bd, 0x00a5},
{0xc4be, 0x00b5},
{0xc581, 0x00a3},
{0xc582, 0x00b3},
{0xc583, 0x00d1},
{0xc584, 0x00f1},
{0xc587, 0x00d2},
{0xc588, 0x00f2},
{0xc590, 0x00d5},
{0xc591, 0x00f5},
{0xc594, 0x00c0},
{0xc595, 0x00e0},
{0xc598, 0x00d8},
{0xc599, 0x00f8},
{0xc59a, 0x00a6},
{0xc59b, 0x00b6},
{0xc59e, 0x00aa},
{0xc59f, 0x00ba},
{0xc5a0, 0x00a9},
{0xc5a1, 0x00b9},
{0xc5a2, 0x00de},
{0xc5a3, 0x00fe},
{0xc5a4, 0x00ab},
{0xc5a5, 0x00bb},
{0xc5ae, 0x00d9},
{0xc5af, 0x00f9},
{0xc5b0, 0x00db},
{0xc5b1, 0x00fb},
{0xc5b9, 0x00ac},
{0xc5ba, 0x00bc},
{0xc5bb, 0x00af},
{0xc5bc, 0x00bf},
{0xc5bd, 0x00ae},
{0xc5be, 0x00be},
{0xcb87, 0x00b7},
{0xcb98, 0x00a2},
{0xcb99, 0x00ff},
{0xcb9b, 0x00b2},
{0xcb9d, 0x00bd}
};
static pg_utf_to_local ULmapISO8859_3[ 28 ] = {
{0xc488, 0x00c6},
{0xc489, 0x00e6},
{0xc48a, 0x00c5},
{0xc48b, 0x00e5},
{0xc49c, 0x00d8},
{0xc49d, 0x00f8},
{0xc49e, 0x00ab},
{0xc49f, 0x00bb},
{0xc4a0, 0x00d5},
{0xc4a1, 0x00f5},
{0xc4a4, 0x00a6},
{0xc4a5, 0x00b6},
{0xc4a6, 0x00a1},
{0xc4a7, 0x00b1},
{0xc4b0, 0x00a9},
{0xc4b1, 0x00b9},
{0xc4b4, 0x00ac},
{0xc4b5, 0x00bc},
{0xc59c, 0x00de},
{0xc59d, 0x00fe},
{0xc59e, 0x00aa},
{0xc59f, 0x00ba},
{0xc5ac, 0x00dd},
{0xc5ad, 0x00fd},
{0xc5bb, 0x00af},
{0xc5bc, 0x00bf},
{0xcb98, 0x00a2},
{0xcb99, 0x00ff}
};
static pg_utf_to_local ULmapISO8859_4[ 50 ] = {
{0xc480, 0x00c0},
{0xc481, 0x00e0},
{0xc484, 0x00a1},
{0xc485, 0x00b1},
{0xc48c, 0x00c8},
{0xc48d, 0x00e8},
{0xc490, 0x00d0},
{0xc491, 0x00f0},
{0xc492, 0x00aa},
{0xc493, 0x00ba},
{0xc496, 0x00cc},
{0xc497, 0x00ec},
{0xc498, 0x00ca},
{0xc499, 0x00ea},
{0xc4a2, 0x00ab},
{0xc4a3, 0x00bb},
{0xc4a8, 0x00a5},
{0xc4a9, 0x00b5},
{0xc4aa, 0x00cf},
{0xc4ab, 0x00ef},
{0xc4ae, 0x00c7},
{0xc4af, 0x00e7},
{0xc4b6, 0x00d3},
{0xc4b7, 0x00f3},
{0xc4b8, 0x00a2},
{0xc4bb, 0x00a6},
{0xc4bc, 0x00b6},
{0xc585, 0x00d1},
{0xc586, 0x00f1},
{0xc58a, 0x00bd},
{0xc58b, 0x00bf},
{0xc58c, 0x00d2},
{0xc58d, 0x00f2},
{0xc596, 0x00a3},
{0xc597, 0x00b3},
{0xc5a0, 0x00a9},
{0xc5a1, 0x00b9},
{0xc5a6, 0x00ac},
{0xc5a7, 0x00bc},
{0xc5a8, 0x00dd},
{0xc5a9, 0x00fd},
{0xc5aa, 0x00de},
{0xc5ab, 0x00fe},
{0xc5b2, 0x00d9},
{0xc5b3, 0x00f9},
{0xc5bd, 0x00ae},
{0xc5be, 0x00be},
{0xcb87, 0x00b7},
{0xcb99, 0x00ff},
{0xcb9b, 0x00b2}
};
static pg_utf_to_local ULmapISO8859_5[ 93 ] = {
{0xd081, 0x00a1},
{0xd082, 0x00a2},
{0xd083, 0x00a3},
{0xd084, 0x00a4},
{0xd085, 0x00a5},
{0xd086, 0x00a6},
{0xd087, 0x00a7},
{0xd088, 0x00a8},
{0xd089, 0x00a9},
{0xd08a, 0x00aa},
{0xd08b, 0x00ab},
{0xd08c, 0x00ac},
{0xd08e, 0x00ae},
{0xd08f, 0x00af},
{0xd090, 0x00b0},
{0xd091, 0x00b1},
{0xd092, 0x00b2},
{0xd093, 0x00b3},
{0xd094, 0x00b4},
{0xd095, 0x00b5},
{0xd096, 0x00b6},
{0xd097, 0x00b7},
{0xd098, 0x00b8},
{0xd099, 0x00b9},
{0xd09a, 0x00ba},
{0xd09b, 0x00bb},
{0xd09c, 0x00bc},
{0xd09d, 0x00bd},
{0xd09e, 0x00be},
{0xd09f, 0x00bf},
{0xd0a0, 0x00c0},
{0xd0a1, 0x00c1},
{0xd0a2, 0x00c2},
{0xd0a3, 0x00c3},
{0xd0a4, 0x00c4},
{0xd0a5, 0x00c5},
{0xd0a6, 0x00c6},
{0xd0a7, 0x00c7},
{0xd0a8, 0x00c8},
{0xd0a9, 0x00c9},
{0xd0aa, 0x00ca},
{0xd0ab, 0x00cb},
{0xd0ac, 0x00cc},
{0xd0ad, 0x00cd},
{0xd0ae, 0x00ce},
{0xd0af, 0x00cf},
{0xd0b0, 0x00d0},
{0xd0b1, 0x00d1},
{0xd0b2, 0x00d2},
{0xd0b3, 0x00d3},
{0xd0b4, 0x00d4},
{0xd0b5, 0x00d5},
{0xd0b6, 0x00d6},
{0xd0b7, 0x00d7},
{0xd0b8, 0x00d8},
{0xd0b9, 0x00d9},
{0xd0ba, 0x00da},
{0xd0bb, 0x00db},
{0xd0bc, 0x00dc},
{0xd0bd, 0x00dd},
{0xd0be, 0x00de},
{0xd0bf, 0x00df},
{0xd180, 0x00e0},
{0xd181, 0x00e1},
{0xd182, 0x00e2},
{0xd183, 0x00e3},
{0xd184, 0x00e4},
{0xd185, 0x00e5},
{0xd186, 0x00e6},
{0xd187, 0x00e7},
{0xd188, 0x00e8},
{0xd189, 0x00e9},
{0xd18a, 0x00ea},
{0xd18b, 0x00eb},
{0xd18c, 0x00ec},
{0xd18d, 0x00ed},
{0xd18e, 0x00ee},
{0xd18f, 0x00ef},
{0xd191, 0x00f1},
{0xd192, 0x00f2},
{0xd193, 0x00f3},
{0xd194, 0x00f4},
{0xd195, 0x00f5},
{0xd196, 0x00f6},
{0xd197, 0x00f7},
{0xd198, 0x00f8},
{0xd199, 0x00f9},
{0xd19a, 0x00fa},
{0xd19b, 0x00fb},
{0xd19c, 0x00fc},
{0xd19e, 0x00fe},
{0xd19f, 0x00ff},
{0xe28496, 0x00f0}
};
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -2,7 +2,7 @@
* This file contains some public functions
* usable for both the backend and the frontend.
* Tatsuo Ishii
* $Id: common.c,v 1.9 2000/06/13 07:35:15 tgl Exp $ */
* $Id: common.c,v 1.10 2000/10/30 10:40:28 ishii Exp $ */
#include <stdlib.h>
#include <string.h>
......@@ -19,6 +19,7 @@
#include "mb/pg_wchar.h"
#include "utils/builtins.h"
#ifndef FRONTEND
/*
* convert encoding char to encoding symbol value.
* case is ignored.
......@@ -97,6 +98,8 @@ PG_encoding_to_char(PG_FUNCTION_ARGS)
PG_RETURN_NAME(pg_encoding_to_char(encoding));
}
#endif
/* returns the byte length of a multi-byte word for an encoding */
int
pg_encoding_mblen(int encoding, const unsigned char *mbstr)
......
......@@ -6,7 +6,7 @@
* WIN1250 client encoding support contributed by Pavel Behal
* SJIS UDC (NEC selection IBM kanji) support contributed by Eiji Tokuya
*
* $Id: conv.c,v 1.19 2000/10/27 02:23:51 ishii Exp $
* $Id: conv.c,v 1.20 2000/10/30 10:40:28 ishii Exp $
*
*
*/
......@@ -16,24 +16,33 @@
#include "mb/pg_wchar.h"
/*
* XXX dummy elog() function for frontend only. Note that elog would
* never be called from frontend, but to avoid the linking errors we
* have to do it anyway. In the future, we should consider reorganizing
* sources in this directory to avoid this kind of ugliness...
*/
#ifdef FRONTEND
static void
elog(int lev, const char *fmt, ...) {}
#endif
#ifdef UNICODE_CONVERSION
/*
* for Unicode (UTF-8) support
*/
#include "iso8859.map" /* UTF-8 <--> ISO8859 map */
#include "UTF_to_EUC_JP.map" /* UTF-8 --> EUC_JP map */
#include "EUC_JP_to_UTF.map" /* UTF-8 <-- EUC_JP map */
#include "Unicode/utf8_to_iso8859_2.map"
#include "Unicode/utf8_to_iso8859_3.map"
#include "Unicode/utf8_to_iso8859_4.map"
#include "Unicode/utf8_to_iso8859_5.map"
#include "Unicode/iso8859_2_to_utf8.map"
#include "Unicode/iso8859_3_to_utf8.map"
#include "Unicode/iso8859_4_to_utf8.map"
#include "Unicode/iso8859_5_to_utf8.map"
#include "Unicode/utf8_to_euc_jp.map"
#include "Unicode/euc_jp_to_utf8.map"
#include "Unicode/utf8_to_euc_cn.map"
#include "Unicode/euc_cn_to_utf8.map"
#include "Unicode/utf8_to_euc_kr.map"
#include "Unicode/euc_kr_to_utf8.map"
#include "Unicode/utf8_to_euc_tw.map"
#include "Unicode/euc_tw_to_utf8.map"
#include "Unicode/utf8_to_sjis.map"
#include "Unicode/sjis_to_utf8.map"
#include "Unicode/utf8_to_big5.map"
#include "Unicode/big5_to_utf8.map"
#endif /* UNICODE_CONVERSION */
/*
* SJIS alternative code.
......@@ -1119,6 +1128,8 @@ mic2win1250(unsigned char *mic, unsigned char *p, int len)
mic2latin_with_table(mic, p, len, LC_ISO8859_2, iso88592_2_win1250);
}
#ifdef UNICODE_CONVERSION
/*
* UNICODE(UTF-8) support
*/
......@@ -1223,21 +1234,20 @@ static int compare2(const void *p1, const void *p2)
/*
* UTF-8 ---> local code
*
* utf: input UTF-8 string. Its length is limited by "len" parameter
* or a null terminater.
* iso: pointer to the output.
* map: the conversion map.
* size: the size of the conversion map.
*/
static void
utf_to_local(unsigned char *utf, unsigned char *iso,
pg_utf_to_local *map, int size, int encoding, int len)
pg_utf_to_local *map, int size, int len)
{
unsigned int iutf;
int l;
pg_utf_to_local *p;
pg_encoding_conv_tbl *e;
e = pg_get_enc_ent(encoding);
if (e == 0)
{
elog(ERROR, "Invalid encoding number %d", encoding);
}
for (;len > 0 && *utf; len -= l)
{
......@@ -1260,14 +1270,9 @@ utf_to_local(unsigned char *utf, unsigned char *iso,
}
p = bsearch(&iutf, map, size,
sizeof(pg_utf_to_local), compare1);
if (p == NULL || p->encoding != encoding)
if (p == NULL)
{
elog(NOTICE, "utf_to_latin: could not convert UTF-8 (0x%04x) to %s. Ignored",
iutf, e->name);
/*
printf("utf_to_latin: could not convert UTF-8 (0x%04x) to %s. Ignored",
iutf, e->name);
*/
elog(NOTICE, "utf_to_latin: could not convert UTF-8 (0x%04x) Ignored", iutf);
continue;
}
if (p->code & 0xff000000)
......@@ -1287,9 +1292,8 @@ utf_to_local(unsigned char *utf, unsigned char *iso,
*/
static void
utf_to_latin2(unsigned char *utf, unsigned char *iso, int len)
{
utf_to_local(utf, iso, mapISO8859, sizeof(mapISO8859)/sizeof(pg_utf_to_local), LATIN2, len);
utf_to_local(utf, iso, ULmapISO8859_2, sizeof(ULmapISO8859_2)/sizeof(pg_utf_to_local), len);
}
/*
......@@ -1297,9 +1301,8 @@ utf_to_latin2(unsigned char *utf, unsigned char *iso, int len)
*/
static void
utf_to_latin3(unsigned char *utf, unsigned char *iso, int len)
{
utf_to_local(utf, iso, mapISO8859, sizeof(mapISO8859)/sizeof(pg_utf_to_local), LATIN3, len);
utf_to_local(utf, iso, ULmapISO8859_3, sizeof(ULmapISO8859_3)/sizeof(pg_utf_to_local), len);
}
/*
......@@ -1308,7 +1311,7 @@ utf_to_latin3(unsigned char *utf, unsigned char *iso, int len)
static void
utf_to_latin4(unsigned char *utf, unsigned char *iso, int len)
{
utf_to_local(utf, iso, mapISO8859, sizeof(mapISO8859)/sizeof(pg_utf_to_local), LATIN4, len);
utf_to_local(utf, iso, ULmapISO8859_4, sizeof(ULmapISO8859_4)/sizeof(pg_utf_to_local), len);
}
/*
......@@ -1318,7 +1321,7 @@ static void
utf_to_latin5(unsigned char *utf, unsigned char *iso, int len)
{
utf_to_local(utf, iso, mapISO8859, sizeof(mapISO8859)/sizeof(pg_utf_to_local), LATIN5, len);
utf_to_local(utf, iso, ULmapISO8859_5, sizeof(ULmapISO8859_5)/sizeof(pg_utf_to_local), len);
}
/*
......@@ -1348,7 +1351,7 @@ local_to_utf(unsigned char *iso, unsigned char *utf,
continue;
}
l = pg_mblen_with_encoding(iso, encoding);
l = pg_encoding_mblen(encoding, iso);
if (l == 1)
{
......@@ -1378,10 +1381,6 @@ local_to_utf(unsigned char *iso, unsigned char *utf,
{
elog(NOTICE, "local_to_utf: could not convert (0x%04x) %s to UTF-8. Ignored",
iiso, e->name);
/*
printf("local_to_utf: could not convert (0x%04x) %s to UTF-8. Ignored",
iiso, e->name);
*/
continue;
}
if (p->utf & 0xff000000)
......@@ -1402,7 +1401,7 @@ local_to_utf(unsigned char *iso, unsigned char *utf,
static void
latin2_to_utf(unsigned char *iso, unsigned char *utf, int len)
{
local_to_utf(iso, utf, ISO8859_2, sizeof(ISO8859_2)/sizeof(pg_local_to_utf), LATIN2, len);
local_to_utf(iso, utf, LUmapISO8859_2, sizeof(LUmapISO8859_2)/sizeof(pg_local_to_utf), LATIN2, len);
}
/*
......@@ -1411,7 +1410,7 @@ latin2_to_utf(unsigned char *iso, unsigned char *utf, int len)
static void
latin3_to_utf(unsigned char *iso, unsigned char *utf, int len)
{
local_to_utf(iso, utf, ISO8859_3, sizeof(ISO8859_3)/sizeof(pg_local_to_utf), LATIN2, len);
local_to_utf(iso, utf, LUmapISO8859_3, sizeof(LUmapISO8859_3)/sizeof(pg_local_to_utf), LATIN3, len);
}
/*
......@@ -1420,7 +1419,7 @@ latin3_to_utf(unsigned char *iso, unsigned char *utf, int len)
static void
latin4_to_utf(unsigned char *iso, unsigned char *utf, int len)
{
local_to_utf(iso, utf, ISO8859_4, sizeof(ISO8859_4)/sizeof(pg_local_to_utf), LATIN2, len);
local_to_utf(iso, utf, LUmapISO8859_4, sizeof(LUmapISO8859_4)/sizeof(pg_local_to_utf), LATIN4, len);
}
/*
......@@ -1429,7 +1428,7 @@ latin4_to_utf(unsigned char *iso, unsigned char *utf, int len)
static void
latin5_to_utf(unsigned char *iso, unsigned char *utf, int len)
{
local_to_utf(iso, utf, ISO8859_5, sizeof(ISO8859_5)/sizeof(pg_local_to_utf), LATIN2, len);
local_to_utf(iso, utf, LUmapISO8859_5, sizeof(LUmapISO8859_5)/sizeof(pg_local_to_utf), LATIN5, len);
}
/*
......@@ -1439,8 +1438,8 @@ static void
utf_to_euc_jp(unsigned char *utf, unsigned char *euc, int len)
{
utf_to_local(utf, euc, mapUTF_to_EUC_JP,
sizeof(mapUTF_to_EUC_JP)/sizeof(pg_utf_to_local), EUC_JP, len);
utf_to_local(utf, euc, ULmapEUC_JP,
sizeof(ULmapEUC_JP)/sizeof(pg_utf_to_local), len);
}
/*
......@@ -1449,8 +1448,113 @@ utf_to_euc_jp(unsigned char *utf, unsigned char *euc, int len)
static void
euc_jp_to_utf(unsigned char *euc, unsigned char *utf, int len)
{
local_to_utf(euc, utf, mapEUC_JP_to_UTF,
sizeof(mapEUC_JP_to_UTF)/sizeof(pg_local_to_utf), EUC_JP, len);
local_to_utf(euc, utf, LUmapEUC_JP,
sizeof(LUmapEUC_JP)/sizeof(pg_local_to_utf), EUC_JP, len);
}
/*
* UTF-8 ---> EUC_CN
*/
static void
utf_to_euc_cn(unsigned char *utf, unsigned char *euc, int len)
{
utf_to_local(utf, euc, ULmapEUC_CN,
sizeof(ULmapEUC_CN)/sizeof(pg_utf_to_local), len);
}
/*
* EUC_CN ---> UTF-8
*/
static void
euc_cn_to_utf(unsigned char *euc, unsigned char *utf, int len)
{
local_to_utf(euc, utf, LUmapEUC_CN,
sizeof(LUmapEUC_CN)/sizeof(pg_local_to_utf), EUC_CN, len);
}
/*
* UTF-8 ---> EUC_KR
*/
static void
utf_to_euc_kr(unsigned char *utf, unsigned char *euc, int len)
{
utf_to_local(utf, euc, ULmapEUC_KR,
sizeof(ULmapEUC_KR)/sizeof(pg_utf_to_local), len);
}
/*
* EUC_KR ---> UTF-8
*/
static void
euc_kr_to_utf(unsigned char *euc, unsigned char *utf, int len)
{
local_to_utf(euc, utf, LUmapEUC_KR,
sizeof(LUmapEUC_KR)/sizeof(pg_local_to_utf), EUC_KR, len);
}
/*
* UTF-8 ---> EUC_TW
*/
static void
utf_to_euc_tw(unsigned char *utf, unsigned char *euc, int len)
{
utf_to_local(utf, euc, ULmapEUC_TW,
sizeof(ULmapEUC_TW)/sizeof(pg_utf_to_local), len);
}
/*
* EUC_TW ---> UTF-8
*/
static void
euc_tw_to_utf(unsigned char *euc, unsigned char *utf, int len)
{
local_to_utf(euc, utf, LUmapEUC_TW,
sizeof(LUmapEUC_TW)/sizeof(pg_local_to_utf), EUC_TW, len);
}
/*
* UTF-8 ---> SJIS
*/
static void
utf_to_sjis(unsigned char *utf, unsigned char *euc, int len)
{
utf_to_local(utf, euc, ULmapSJIS,
sizeof(ULmapSJIS)/sizeof(pg_utf_to_local), len);
}
/*
* SJIS ---> UTF-8
*/
static void
sjis_to_utf(unsigned char *euc, unsigned char *utf, int len)
{
local_to_utf(euc, utf, LUmapSJIS,
sizeof(LUmapSJIS)/sizeof(pg_local_to_utf), SJIS, len);
}
/*
* UTF-8 ---> BIG5
*/
static void
utf_to_big5(unsigned char *utf, unsigned char *euc, int len)
{
utf_to_local(utf, euc, ULmapBIG5,
sizeof(ULmapBIG5)/sizeof(pg_utf_to_local), len);
}
/*
* BIG5 ---> UTF-8
*/
static void
big5_to_utf(unsigned char *euc, unsigned char *utf, int len)
{
local_to_utf(euc, utf, LUmapBIG5,
sizeof(LUmapBIG5)/sizeof(pg_local_to_utf), BIG5, len);
}
/*-----------------------------------------------------------------*/
......@@ -1460,9 +1564,12 @@ pg_encoding_conv_tbl pg_conv_tbl[] = {
ascii2utf, utf2ascii}, /* SQL/ASCII */
{EUC_JP, "EUC_JP", 0, euc_jp2mic, mic2euc_jp,
euc_jp_to_utf, utf_to_euc_jp}, /* EUC_JP */
{EUC_CN, "EUC_CN", 0, euc_cn2mic, mic2euc_cn}, /* EUC_CN */
{EUC_KR, "EUC_KR", 0, euc_kr2mic, mic2euc_kr}, /* EUC_KR */
{EUC_TW, "EUC_TW", 0, euc_tw2mic, mic2euc_tw}, /* EUC_TW */
{EUC_CN, "EUC_CN", 0, euc_cn2mic, mic2euc_cn,
euc_cn_to_utf, utf_to_euc_cn}, /* EUC_CN */
{EUC_KR, "EUC_KR", 0, euc_kr2mic, mic2euc_kr,
euc_kr_to_utf, utf_to_euc_kr}, /* EUC_KR */
{EUC_TW, "EUC_TW", 0, euc_tw2mic, mic2euc_tw,
euc_tw_to_utf, utf_to_euc_tw}, /* EUC_TW */
{UNICODE, "UNICODE", 0, 0, 0}, /* UNICODE */
{MULE_INTERNAL, "MULE_INTERNAL", 0, 0, 0}, /* MULE_INTERNAL */
{LATIN1, "LATIN1", 0, latin12mic, mic2latin1,
......@@ -1482,10 +1589,50 @@ pg_encoding_conv_tbl pg_conv_tbl[] = {
{ALT, "ALT", 0, alt2mic, mic2alt,
0,0}, /* CP866 */
{SJIS, "SJIS", 1, sjis2mic, mic2sjis,
0,0}, /* SJIS */
sjis_to_utf, utf_to_sjis}, /* SJIS */
{BIG5, "BIG5", 1, big52mic, mic2big5,
big5_to_utf, utf_to_big5}, /* Big5 */
{WIN1250, "WIN1250", 1, win12502mic, mic2win1250,
0,0}, /* WIN 1250 */
{-1, "", 0, 0, 0, 0} /* end mark */
};
#else
pg_encoding_conv_tbl pg_conv_tbl[] = {
{SQL_ASCII, "SQL_ASCII", 0, ascii2mic, mic2ascii,
0, 0}, /* SQL/ASCII */
{EUC_JP, "EUC_JP", 0, euc_jp2mic, mic2euc_jp,
0, 0}, /* EUC_JP */
{EUC_CN, "EUC_CN", 0, euc_cn2mic, mic2euc_cn,
0, 0}, /* EUC_CN */
{EUC_KR, "EUC_KR", 0, euc_kr2mic, mic2euc_kr}, /* EUC_KR */
{EUC_TW, "EUC_TW", 0, euc_tw2mic, mic2euc_tw}, /* EUC_TW */
{UNICODE, "UNICODE", 0, 0, 0}, /* UNICODE */
{MULE_INTERNAL, "MULE_INTERNAL", 0, 0, 0}, /* MULE_INTERNAL */
{LATIN1, "LATIN1", 0, latin12mic, mic2latin1,
0, 0}, /* ISO 8859 Latin 1 */
{LATIN2, "LATIN2", 0, latin22mic, mic2latin2,
0, 0}, /* ISO 8859 Latin 2 */
{LATIN3, "LATIN3", 0, latin32mic, mic2latin3,
0, 0}, /* ISO 8859 Latin 3 */
{LATIN4, "LATIN4", 0, latin42mic, mic2latin4,
0, 0}, /* ISO 8859 Latin 4 */
{LATIN5, "LATIN5", 0, iso2mic, mic2iso,
0, 0}, /* ISO 8859 Latin 5 */
{KOI8, "KOI8", 0, koi2mic, mic2koi,
0, 0}, /* KOI8-R */
{WIN, "WIN", 0, win2mic, mic2win,
0,0}, /* CP1251 */
{ALT, "ALT", 0, alt2mic, mic2alt,
0,0}, /* CP866 */
{SJIS, "SJIS", 1, sjis2mic, mic2sjis,
0, 0}, /* SJIS */
{BIG5, "BIG5", 1, big52mic, mic2big5,
0,0}, /* Big5 */
{WIN1250, "WIN1250", 1, win12502mic, mic2win1250,
0,0}, /* WIN 1250 */
{-1, "", 0, 0, 0, 0} /* end mark */
};
#endif /* UNICODE_CONVERSION */
static pg_utf_to_local mapISO8859[] = {
{0xc480, 0x00c0, LATIN4},
{0xc481, 0x00e0, LATIN4},
{0xc482, 0x00c3, LATIN2},
{0xc483, 0x00e3, LATIN2},
{0xc484, 0x00a1, LATIN2},
{0xc485, 0x00b1, LATIN2},
{0xc486, 0x00c6, LATIN2},
{0xc487, 0x00e6, LATIN2},
{0xc488, 0x00c6, LATIN3},
{0xc489, 0x00e6, LATIN3},
{0xc48a, 0x00c5, LATIN3},
{0xc48b, 0x00e5, LATIN3},
{0xc48c, 0x00c8, LATIN2},
{0xc48d, 0x00e8, LATIN2},
{0xc48e, 0x00cf, LATIN2},
{0xc48f, 0x00ef, LATIN2},
{0xc490, 0x00d0, LATIN2},
{0xc491, 0x00f0, LATIN2},
{0xc492, 0x00aa, LATIN4},
{0xc493, 0x00ba, LATIN4},
{0xc496, 0x00cc, LATIN4},
{0xc497, 0x00ec, LATIN4},
{0xc498, 0x00ca, LATIN2},
{0xc499, 0x00ea, LATIN2},
{0xc49a, 0x00cc, LATIN2},
{0xc49b, 0x00ec, LATIN2},
{0xc49c, 0x00d8, LATIN3},
{0xc49d, 0x00f8, LATIN3},
{0xc49e, 0x00ab, LATIN3},
{0xc49f, 0x00bb, LATIN3},
{0xc4a0, 0x00d5, LATIN3},
{0xc4a1, 0x00f5, LATIN3},
{0xc4a2, 0x00ab, LATIN4},
{0xc4a3, 0x00bb, LATIN4},
{0xc4a4, 0x00a6, LATIN3},
{0xc4a5, 0x00b6, LATIN3},
{0xc4a6, 0x00a1, LATIN3},
{0xc4a7, 0x00b1, LATIN3},
{0xc4a8, 0x00a5, LATIN4},
{0xc4a9, 0x00b5, LATIN4},
{0xc4aa, 0x00cf, LATIN4},
{0xc4ab, 0x00ef, LATIN4},
{0xc4ae, 0x00c7, LATIN4},
{0xc4af, 0x00e7, LATIN4},
{0xc4b0, 0x00a9, LATIN3},
{0xc4b1, 0x00b9, LATIN3},
{0xc4b4, 0x00ac, LATIN3},
{0xc4b5, 0x00bc, LATIN3},
{0xc4b6, 0x00d3, LATIN4},
{0xc4b7, 0x00f3, LATIN4},
{0xc4b8, 0x00a2, LATIN4},
{0xc4b9, 0x00c5, LATIN2},
{0xc4ba, 0x00e5, LATIN2},
{0xc4bb, 0x00a6, LATIN4},
{0xc4bc, 0x00b6, LATIN4},
{0xc4bd, 0x00a5, LATIN2},
{0xc4be, 0x00b5, LATIN2},
{0xc581, 0x00a3, LATIN2},
{0xc582, 0x00b3, LATIN2},
{0xc583, 0x00d1, LATIN2},
{0xc584, 0x00f1, LATIN2},
{0xc585, 0x00d1, LATIN4},
{0xc586, 0x00f1, LATIN4},
{0xc587, 0x00d2, LATIN2},
{0xc588, 0x00f2, LATIN2},
{0xc58a, 0x00bd, LATIN4},
{0xc58b, 0x00bf, LATIN4},
{0xc58c, 0x00d2, LATIN4},
{0xc58d, 0x00f2, LATIN4},
{0xc590, 0x00d5, LATIN2},
{0xc591, 0x00f5, LATIN2},
{0xc594, 0x00c0, LATIN2},
{0xc595, 0x00e0, LATIN2},
{0xc596, 0x00a3, LATIN4},
{0xc597, 0x00b3, LATIN4},
{0xc598, 0x00d8, LATIN2},
{0xc599, 0x00f8, LATIN2},
{0xc59a, 0x00a6, LATIN2},
{0xc59b, 0x00b6, LATIN2},
{0xc59c, 0x00de, LATIN3},
{0xc59d, 0x00fe, LATIN3},
{0xc59e, 0x00aa, LATIN2},
{0xc59f, 0x00ba, LATIN2},
{0xc5a0, 0x00a9, LATIN2},
{0xc5a1, 0x00b9, LATIN2},
{0xc5a2, 0x00de, LATIN2},
{0xc5a3, 0x00fe, LATIN2},
{0xc5a4, 0x00ab, LATIN2},
{0xc5a5, 0x00bb, LATIN2},
{0xc5a6, 0x00ac, LATIN4},
{0xc5a7, 0x00bc, LATIN4},
{0xc5a8, 0x00dd, LATIN4},
{0xc5a9, 0x00fd, LATIN4},
{0xc5aa, 0x00de, LATIN4},
{0xc5ab, 0x00fe, LATIN4},
{0xc5ac, 0x00dd, LATIN3},
{0xc5ad, 0x00fd, LATIN3},
{0xc5ae, 0x00d9, LATIN2},
{0xc5af, 0x00f9, LATIN2},
{0xc5b0, 0x00db, LATIN2},
{0xc5b1, 0x00fb, LATIN2},
{0xc5b2, 0x00d9, LATIN4},
{0xc5b3, 0x00f9, LATIN4},
{0xc5b9, 0x00ac, LATIN2},
{0xc5ba, 0x00bc, LATIN2},
{0xc5bb, 0x00af, LATIN2},
{0xc5bc, 0x00bf, LATIN2},
{0xc5bd, 0x00ae, LATIN2},
{0xc5be, 0x00be, LATIN2},
{0xcb87, 0x00b7, LATIN2},
{0xcb98, 0x00a2, LATIN2},
{0xcb99, 0x00ff, LATIN2},
{0xcb9b, 0x00b2, LATIN2},
{0xcb9d, 0x00bd, LATIN2},
{0xd081, 0x00a1, LATIN5},
{0xd082, 0x00a2, LATIN5},
{0xd083, 0x00a3, LATIN5},
{0xd084, 0x00a4, LATIN5},
{0xd085, 0x00a5, LATIN5},
{0xd086, 0x00a6, LATIN5},
{0xd087, 0x00a7, LATIN5},
{0xd088, 0x00a8, LATIN5},
{0xd089, 0x00a9, LATIN5},
{0xd08a, 0x00aa, LATIN5},
{0xd08b, 0x00ab, LATIN5},
{0xd08c, 0x00ac, LATIN5},
{0xd08e, 0x00ae, LATIN5},
{0xd08f, 0x00af, LATIN5},
{0xd090, 0x00b0, LATIN5},
{0xd091, 0x00b1, LATIN5},
{0xd092, 0x00b2, LATIN5},
{0xd093, 0x00b3, LATIN5},
{0xd094, 0x00b4, LATIN5},
{0xd095, 0x00b5, LATIN5},
{0xd096, 0x00b6, LATIN5},
{0xd097, 0x00b7, LATIN5},
{0xd098, 0x00b8, LATIN5},
{0xd099, 0x00b9, LATIN5},
{0xd09a, 0x00ba, LATIN5},
{0xd09b, 0x00bb, LATIN5},
{0xd09c, 0x00bc, LATIN5},
{0xd09d, 0x00bd, LATIN5},
{0xd09e, 0x00be, LATIN5},
{0xd09f, 0x00bf, LATIN5},
{0xd0a0, 0x00c0, LATIN5},
{0xd0a1, 0x00c1, LATIN5},
{0xd0a2, 0x00c2, LATIN5},
{0xd0a3, 0x00c3, LATIN5},
{0xd0a4, 0x00c4, LATIN5},
{0xd0a5, 0x00c5, LATIN5},
{0xd0a6, 0x00c6, LATIN5},
{0xd0a7, 0x00c7, LATIN5},
{0xd0a8, 0x00c8, LATIN5},
{0xd0a9, 0x00c9, LATIN5},
{0xd0aa, 0x00ca, LATIN5},
{0xd0ab, 0x00cb, LATIN5},
{0xd0ac, 0x00cc, LATIN5},
{0xd0ad, 0x00cd, LATIN5},
{0xd0ae, 0x00ce, LATIN5},
{0xd0af, 0x00cf, LATIN5},
{0xd0b0, 0x00d0, LATIN5},
{0xd0b1, 0x00d1, LATIN5},
{0xd0b2, 0x00d2, LATIN5},
{0xd0b3, 0x00d3, LATIN5},
{0xd0b4, 0x00d4, LATIN5},
{0xd0b5, 0x00d5, LATIN5},
{0xd0b6, 0x00d6, LATIN5},
{0xd0b7, 0x00d7, LATIN5},
{0xd0b8, 0x00d8, LATIN5},
{0xd0b9, 0x00d9, LATIN5},
{0xd0ba, 0x00da, LATIN5},
{0xd0bb, 0x00db, LATIN5},
{0xd0bc, 0x00dc, LATIN5},
{0xd0bd, 0x00dd, LATIN5},
{0xd0be, 0x00de, LATIN5},
{0xd0bf, 0x00df, LATIN5},
{0xd180, 0x00e0, LATIN5},
{0xd181, 0x00e1, LATIN5},
{0xd182, 0x00e2, LATIN5},
{0xd183, 0x00e3, LATIN5},
{0xd184, 0x00e4, LATIN5},
{0xd185, 0x00e5, LATIN5},
{0xd186, 0x00e6, LATIN5},
{0xd187, 0x00e7, LATIN5},
{0xd188, 0x00e8, LATIN5},
{0xd189, 0x00e9, LATIN5},
{0xd18a, 0x00ea, LATIN5},
{0xd18b, 0x00eb, LATIN5},
{0xd18c, 0x00ec, LATIN5},
{0xd18d, 0x00ed, LATIN5},
{0xd18e, 0x00ee, LATIN5},
{0xd18f, 0x00ef, LATIN5},
{0xd191, 0x00f1, LATIN5},
{0xd192, 0x00f2, LATIN5},
{0xd193, 0x00f3, LATIN5},
{0xd194, 0x00f4, LATIN5},
{0xd195, 0x00f5, LATIN5},
{0xd196, 0x00f6, LATIN5},
{0xd197, 0x00f7, LATIN5},
{0xd198, 0x00f8, LATIN5},
{0xd199, 0x00f9, LATIN5},
{0xd19a, 0x00fa, LATIN5},
{0xd19b, 0x00fb, LATIN5},
{0xd19c, 0x00fc, LATIN5},
{0xd19e, 0x00fe, LATIN5},
{0xd19f, 0x00ff, LATIN5},
{0xe28496, 0x00f0, LATIN5},
};
static pg_local_to_utf ISO8859_2[] = {
{0x00a0, 0xc2a0},
{0x00a1, 0xc484},
{0x00a2, 0xcb98},
{0x00a3, 0xc581},
{0x00a4, 0xc2a4},
{0x00a5, 0xc4bd},
{0x00a6, 0xc59a},
{0x00a7, 0xc2a7},
{0x00a8, 0xc2a8},
{0x00a9, 0xc5a0},
{0x00aa, 0xc59e},
{0x00ab, 0xc5a4},
{0x00ac, 0xc5b9},
{0x00ad, 0xc2ad},
{0x00ae, 0xc5bd},
{0x00af, 0xc5bb},
{0x00b0, 0xc2b0},
{0x00b1, 0xc485},
{0x00b2, 0xcb9b},
{0x00b3, 0xc582},
{0x00b4, 0xc2b4},
{0x00b5, 0xc4be},
{0x00b6, 0xc59b},
{0x00b7, 0xcb87},
{0x00b8, 0xc2b8},
{0x00b9, 0xc5a1},
{0x00ba, 0xc59f},
{0x00bb, 0xc5a5},
{0x00bc, 0xc5ba},
{0x00bd, 0xcb9d},
{0x00be, 0xc5be},
{0x00bf, 0xc5bc},
{0x00c0, 0xc594},
{0x00c1, 0xc381},
{0x00c2, 0xc382},
{0x00c3, 0xc482},
{0x00c4, 0xc384},
{0x00c5, 0xc4b9},
{0x00c6, 0xc486},
{0x00c7, 0xc387},
{0x00c8, 0xc48c},
{0x00c9, 0xc389},
{0x00ca, 0xc498},
{0x00cb, 0xc38b},
{0x00cc, 0xc49a},
{0x00cd, 0xc38d},
{0x00ce, 0xc38e},
{0x00cf, 0xc48e},
{0x00d0, 0xc490},
{0x00d1, 0xc583},
{0x00d2, 0xc587},
{0x00d3, 0xc393},
{0x00d4, 0xc394},
{0x00d5, 0xc590},
{0x00d6, 0xc396},
{0x00d7, 0xc397},
{0x00d8, 0xc598},
{0x00d9, 0xc5ae},
{0x00da, 0xc39a},
{0x00db, 0xc5b0},
{0x00dc, 0xc39c},
{0x00dd, 0xc39d},
{0x00de, 0xc5a2},
{0x00df, 0xc39f},
{0x00e0, 0xc595},
{0x00e1, 0xc3a1},
{0x00e2, 0xc3a2},
{0x00e3, 0xc483},
{0x00e4, 0xc3a4},
{0x00e5, 0xc4ba},
{0x00e6, 0xc487},
{0x00e7, 0xc3a7},
{0x00e8, 0xc48d},
{0x00e9, 0xc3a9},
{0x00ea, 0xc499},
{0x00eb, 0xc3ab},
{0x00ec, 0xc49b},
{0x00ed, 0xc3ad},
{0x00ee, 0xc3ae},
{0x00ef, 0xc48f},
{0x00f0, 0xc491},
{0x00f1, 0xc584},
{0x00f2, 0xc588},
{0x00f3, 0xc3b3},
{0x00f4, 0xc3b4},
{0x00f5, 0xc591},
{0x00f6, 0xc3b6},
{0x00f7, 0xc3b7},
{0x00f8, 0xc599},
{0x00f9, 0xc5af},
{0x00fa, 0xc3ba},
{0x00fb, 0xc5b1},
{0x00fc, 0xc3bc},
{0x00fd, 0xc3bd},
{0x00fe, 0xc5a3},
{0x00ff, 0xcb99},
};
static pg_local_to_utf ISO8859_3[] = {
{0x00a0, 0xc2a0},
{0x00a1, 0xc4a6},
{0x00a2, 0xcb98},
{0x00a3, 0xc2a3},
{0x00a4, 0xc2a4},
{0x00a6, 0xc4a4},
{0x00a7, 0xc2a7},
{0x00a8, 0xc2a8},
{0x00a9, 0xc4b0},
{0x00aa, 0xc59e},
{0x00ab, 0xc49e},
{0x00ac, 0xc4b4},
{0x00ad, 0xc2ad},
{0x00af, 0xc5bb},
{0x00b0, 0xc2b0},
{0x00b1, 0xc4a7},
{0x00b2, 0xc2b2},
{0x00b3, 0xc2b3},
{0x00b4, 0xc2b4},
{0x00b5, 0xc2b5},
{0x00b6, 0xc4a5},
{0x00b7, 0xc2b7},
{0x00b8, 0xc2b8},
{0x00b9, 0xc4b1},
{0x00ba, 0xc59f},
{0x00bb, 0xc49f},
{0x00bc, 0xc4b5},
{0x00bd, 0xc2bd},
{0x00bf, 0xc5bc},
{0x00c0, 0xc380},
{0x00c1, 0xc381},
{0x00c2, 0xc382},
{0x00c4, 0xc384},
{0x00c5, 0xc48a},
{0x00c6, 0xc488},
{0x00c7, 0xc387},
{0x00c8, 0xc388},
{0x00c9, 0xc389},
{0x00ca, 0xc38a},
{0x00cb, 0xc38b},
{0x00cc, 0xc38c},
{0x00cd, 0xc38d},
{0x00ce, 0xc38e},
{0x00cf, 0xc38f},
{0x00d1, 0xc391},
{0x00d2, 0xc392},
{0x00d3, 0xc393},
{0x00d4, 0xc394},
{0x00d5, 0xc4a0},
{0x00d6, 0xc396},
{0x00d7, 0xc397},
{0x00d8, 0xc49c},
{0x00d9, 0xc399},
{0x00da, 0xc39a},
{0x00db, 0xc39b},
{0x00dc, 0xc39c},
{0x00dd, 0xc5ac},
{0x00de, 0xc59c},
{0x00df, 0xc39f},
{0x00e0, 0xc3a0},
{0x00e1, 0xc3a1},
{0x00e2, 0xc3a2},
{0x00e4, 0xc3a4},
{0x00e5, 0xc48b},
{0x00e6, 0xc489},
{0x00e7, 0xc3a7},
{0x00e8, 0xc3a8},
{0x00e9, 0xc3a9},
{0x00ea, 0xc3aa},
{0x00eb, 0xc3ab},
{0x00ec, 0xc3ac},
{0x00ed, 0xc3ad},
{0x00ee, 0xc3ae},
{0x00ef, 0xc3af},
{0x00f1, 0xc3b1},
{0x00f2, 0xc3b2},
{0x00f3, 0xc3b3},
{0x00f4, 0xc3b4},
{0x00f5, 0xc4a1},
{0x00f6, 0xc3b6},
{0x00f7, 0xc3b7},
{0x00f8, 0xc49d},
{0x00f9, 0xc3b9},
{0x00fa, 0xc3ba},
{0x00fb, 0xc3bb},
{0x00fc, 0xc3bc},
{0x00fd, 0xc5ad},
{0x00fe, 0xc59d},
{0x00ff, 0xcb99},
};
static pg_local_to_utf ISO8859_4[] = {
{0x00a0, 0xc2a0},
{0x00a1, 0xc484},
{0x00a2, 0xc4b8},
{0x00a3, 0xc596},
{0x00a4, 0xc2a4},
{0x00a5, 0xc4a8},
{0x00a6, 0xc4bb},
{0x00a7, 0xc2a7},
{0x00a8, 0xc2a8},
{0x00a9, 0xc5a0},
{0x00aa, 0xc492},
{0x00ab, 0xc4a2},
{0x00ac, 0xc5a6},
{0x00ad, 0xc2ad},
{0x00ae, 0xc5bd},
{0x00af, 0xc2af},
{0x00b0, 0xc2b0},
{0x00b1, 0xc485},
{0x00b2, 0xcb9b},
{0x00b3, 0xc597},
{0x00b4, 0xc2b4},
{0x00b5, 0xc4a9},
{0x00b6, 0xc4bc},
{0x00b7, 0xcb87},
{0x00b8, 0xc2b8},
{0x00b9, 0xc5a1},
{0x00ba, 0xc493},
{0x00bb, 0xc4a3},
{0x00bc, 0xc5a7},
{0x00bd, 0xc58a},
{0x00be, 0xc5be},
{0x00bf, 0xc58b},
{0x00c0, 0xc480},
{0x00c1, 0xc381},
{0x00c2, 0xc382},
{0x00c3, 0xc383},
{0x00c4, 0xc384},
{0x00c5, 0xc385},
{0x00c6, 0xc386},
{0x00c7, 0xc4ae},
{0x00c8, 0xc48c},
{0x00c9, 0xc389},
{0x00ca, 0xc498},
{0x00cb, 0xc38b},
{0x00cc, 0xc496},
{0x00cd, 0xc38d},
{0x00ce, 0xc38e},
{0x00cf, 0xc4aa},
{0x00d0, 0xc490},
{0x00d1, 0xc585},
{0x00d2, 0xc58c},
{0x00d3, 0xc4b6},
{0x00d4, 0xc394},
{0x00d5, 0xc395},
{0x00d6, 0xc396},
{0x00d7, 0xc397},
{0x00d8, 0xc398},
{0x00d9, 0xc5b2},
{0x00da, 0xc39a},
{0x00db, 0xc39b},
{0x00dc, 0xc39c},
{0x00dd, 0xc5a8},
{0x00de, 0xc5aa},
{0x00df, 0xc39f},
{0x00e0, 0xc481},
{0x00e1, 0xc3a1},
{0x00e2, 0xc3a2},
{0x00e3, 0xc3a3},
{0x00e4, 0xc3a4},
{0x00e5, 0xc3a5},
{0x00e6, 0xc3a6},
{0x00e7, 0xc4af},
{0x00e8, 0xc48d},
{0x00e9, 0xc3a9},
{0x00ea, 0xc499},
{0x00eb, 0xc3ab},
{0x00ec, 0xc497},
{0x00ed, 0xc3ad},
{0x00ee, 0xc3ae},
{0x00ef, 0xc4ab},
{0x00f0, 0xc491},
{0x00f1, 0xc586},
{0x00f2, 0xc58d},
{0x00f3, 0xc4b7},
{0x00f4, 0xc3b4},
{0x00f5, 0xc3b5},
{0x00f6, 0xc3b6},
{0x00f7, 0xc3b7},
{0x00f8, 0xc3b8},
{0x00f9, 0xc5b3},
{0x00fa, 0xc3ba},
{0x00fb, 0xc3bb},
{0x00fc, 0xc3bc},
{0x00fd, 0xc5a9},
{0x00fe, 0xc5ab},
{0x00ff, 0xcb99},
};
static pg_local_to_utf ISO8859_5[] = {
{0x00a0, 0xc2a0},
{0x00a1, 0xd081},
{0x00a2, 0xd082},
{0x00a3, 0xd083},
{0x00a4, 0xd084},
{0x00a5, 0xd085},
{0x00a6, 0xd086},
{0x00a7, 0xd087},
{0x00a8, 0xd088},
{0x00a9, 0xd089},
{0x00aa, 0xd08a},
{0x00ab, 0xd08b},
{0x00ac, 0xd08c},
{0x00ad, 0xc2ad},
{0x00ae, 0xd08e},
{0x00af, 0xd08f},
{0x00b0, 0xd090},
{0x00b1, 0xd091},
{0x00b2, 0xd092},
{0x00b3, 0xd093},
{0x00b4, 0xd094},
{0x00b5, 0xd095},
{0x00b6, 0xd096},
{0x00b7, 0xd097},
{0x00b8, 0xd098},
{0x00b9, 0xd099},
{0x00ba, 0xd09a},
{0x00bb, 0xd09b},
{0x00bc, 0xd09c},
{0x00bd, 0xd09d},
{0x00be, 0xd09e},
{0x00bf, 0xd09f},
{0x00c0, 0xd0a0},
{0x00c1, 0xd0a1},
{0x00c2, 0xd0a2},
{0x00c3, 0xd0a3},
{0x00c4, 0xd0a4},
{0x00c5, 0xd0a5},
{0x00c6, 0xd0a6},
{0x00c7, 0xd0a7},
{0x00c8, 0xd0a8},
{0x00c9, 0xd0a9},
{0x00ca, 0xd0aa},
{0x00cb, 0xd0ab},
{0x00cc, 0xd0ac},
{0x00cd, 0xd0ad},
{0x00ce, 0xd0ae},
{0x00cf, 0xd0af},
{0x00d0, 0xd0b0},
{0x00d1, 0xd0b1},
{0x00d2, 0xd0b2},
{0x00d3, 0xd0b3},
{0x00d4, 0xd0b4},
{0x00d5, 0xd0b5},
{0x00d6, 0xd0b6},
{0x00d7, 0xd0b7},
{0x00d8, 0xd0b8},
{0x00d9, 0xd0b9},
{0x00da, 0xd0ba},
{0x00db, 0xd0bb},
{0x00dc, 0xd0bc},
{0x00dd, 0xd0bd},
{0x00de, 0xd0be},
{0x00df, 0xd0bf},
{0x00e0, 0xd180},
{0x00e1, 0xd181},
{0x00e2, 0xd182},
{0x00e3, 0xd183},
{0x00e4, 0xd184},
{0x00e5, 0xd185},
{0x00e6, 0xd186},
{0x00e7, 0xd187},
{0x00e8, 0xd188},
{0x00e9, 0xd189},
{0x00ea, 0xd18a},
{0x00eb, 0xd18b},
{0x00ec, 0xd18c},
{0x00ed, 0xd18d},
{0x00ee, 0xd18e},
{0x00ef, 0xd18f},
{0x00f0, 0xe28496},
{0x00f1, 0xd191},
{0x00f2, 0xd192},
{0x00f3, 0xd193},
{0x00f4, 0xd194},
{0x00f5, 0xd195},
{0x00f6, 0xd196},
{0x00f7, 0xd197},
{0x00f8, 0xd198},
{0x00f9, 0xd199},
{0x00fa, 0xd19a},
{0x00fb, 0xd19b},
{0x00fc, 0xd19c},
{0x00fd, 0xc2a7},
{0x00fe, 0xd19e},
{0x00ff, 0xd19f},
};
......@@ -3,7 +3,7 @@
* client encoding and server internal encoding.
* (currently mule internal code (mic) is used)
* Tatsuo Ishii
* $Id: mbutils.c,v 1.12 2000/10/12 06:06:50 ishii Exp $ */
* $Id: mbutils.c,v 1.13 2000/10/30 10:40:28 ishii Exp $ */
#include "postgres.h"
......@@ -209,13 +209,6 @@ pg_mblen(const unsigned char *mbstr)
return ((*pg_wchar_table[GetDatabaseEncoding()].mblen) (mbstr));
}
/* returns the byte length of a multi-byte word with specified enciding */
int
pg_mblen_with_encoding(const unsigned char *mbstr, int encoding)
{
return ((*pg_wchar_table[encoding].mblen) (mbstr));
}
/* returns the length (counted as a wchar) of a multi-byte string */
int
pg_mbstrlen(const unsigned char *mbstr)
......
/*
* $Id: uconv.c,v 1.1 2000/10/12 06:06:50 ishii Exp $
*/
#include "pg_wchar.h"
/*
* convert UCS-2 to UTF-8
* returns number of bytes of a UTF-8, that is atmost 3.
*/
static int
pg_ucs2utf(const unsigned short ucs, unsigned char *utf)
{
int len;
if (ucs <= 0x007f)
{
*utf = ucs;
len = 1;
}
else if (ucs > 0x007f && ucs <= 0x07ff)
{
*utf++ = (ucs >> 6) | 0xc0;
*utf = (ucs & 0x003f) | 0x80;
len = 2;
}
else
{
*utf++ = (ucs >> 12) | 0xe0;
*utf++ = ((ucs & 0x0fc0) >> 6) | 0x80;
*utf = (ucs & 0x003f) | 0x80;
len = 3;
}
return (len);
}
typedef struct
{
unsigned short ucs; /* UCS-2 */
unsigned short code; /* local code */
unsigned char encoding; /* encoding */
} ucs_to_local;
typedef struct
{
unsigned short code; /* local code */
unsigned short ucs; /* UCS-2 */
} local_to_ucs;
#include "ucs_to_iso8859.map"
#include "iso88592.rev"
#include "iso88593.rev"
#include "iso88594.rev"
#include "iso88595.rev"
#define X0208 0
#define X0212 1
#include "ucs_to_jis.map"
int
main()
{
int i,j;
int l;
unsigned int euc;
unsigned char u[4];
FILE *fd;
printf("static pg_utf_to_local mapISO8859[] = {\n");
for (i=0;i<sizeof(mapISO8859)/sizeof(ucs_to_local);i++) {
if (mapISO8859[i].encoding > LATIN5)
continue;
l = pg_ucs2utf(mapISO8859[i].ucs, u);
printf(" {0x");
for(j=0;j<l;j++) {
printf("%02x", u[j]);
}
printf(", 0x%04x, %s},\n",
mapISO8859[i].code|0x80,
pg_get_enc_ent(mapISO8859[i].encoding)->name);
}
printf("};\n");
printf("\nstatic pg_local_to_utf ISO8859_2[] = {\n");
for (i=0;i<sizeof(revISO8859_2)/sizeof(local_to_ucs);i++) {
l = pg_ucs2utf(revISO8859_2[i].ucs, u);
printf(" {0x%04x, ", revISO8859_2[i].code|0x80);
printf("0x");
for(j=0;j<l;j++) {
printf("%02x", u[j]);
}
printf("},\n");
}
printf("};\n");
printf("\nstatic pg_local_to_utf ISO8859_3[] = {\n");
for (i=0;i<sizeof(revISO8859_3)/sizeof(local_to_ucs);i++) {
l = pg_ucs2utf(revISO8859_3[i].ucs, u);
printf(" {0x%04x, ", revISO8859_3[i].code|0x80);
printf("0x");
for(j=0;j<l;j++) {
printf("%02x", u[j]);
}
printf("},\n");
}
printf("};\n");
printf("\nstatic pg_local_to_utf ISO8859_4[] = {\n");
for (i=0;i<sizeof(revISO8859_4)/sizeof(local_to_ucs);i++) {
l = pg_ucs2utf(revISO8859_4[i].ucs, u);
printf(" {0x%04x, ", revISO8859_4[i].code|0x80);
printf("0x");
for(j=0;j<l;j++) {
printf("%02x", u[j]);
}
printf("},\n");
}
printf("};\n");
printf("\nstatic pg_local_to_utf ISO8859_5[] = {\n");
for (i=0;i<sizeof(revISO8859_5)/sizeof(local_to_ucs);i++) {
l = pg_ucs2utf(revISO8859_5[i].ucs, u);
printf(" {0x%04x, ", revISO8859_5[i].code|0x80);
printf("0x");
for(j=0;j<l;j++) {
printf("%02x", u[j]);
}
printf("},\n");
}
printf("};\n");
fd = fopen("UTF_to_EUC_JP.map", "w");
fprintf(fd, "static pg_utf_to_local mapUTF_to_EUC_JP[] = {\n");
for (i=0;i<sizeof(mapJIS)/sizeof(ucs_to_local);i++) {
l = pg_ucs2utf(mapJIS[i].ucs, u);
fprintf(fd, " {0x");
for(j=0;j<l;j++) {
fprintf(fd, "%02x", u[j]);
}
if (mapJIS[i].encoding == X0208)
{
euc = mapJIS[i].code|0x8080;
}
else
{
euc = SS3 << 16 | mapJIS[i].code | 0x8080;
}
fprintf(fd, ", 0x%04x, %s},\n",
euc,
"EUC_JP");
}
fprintf(fd, "};\n");
fclose(fd);
return(0);
}
/*
* $Id: uconv2.c,v 1.1 2000/10/12 06:06:50 ishii Exp $
*/
#include "pg_wchar.h"
#include "UTF_to_EUC_JP.map"
static int compare1(const void *p1, const void *p2)
{
unsigned int v1, v2;
v1 = ((pg_utf_to_local *)p1)->code;
v2 = ((pg_utf_to_local *)p2)->code;
return(v1 - v2);
}
int
main()
{
int i;
FILE *fd;
qsort(mapUTF_to_EUC_JP, sizeof(mapUTF_to_EUC_JP)/sizeof(pg_utf_to_local),
sizeof(pg_utf_to_local),compare1);
fd = fopen("EUC_JP_to_UTF.map", "w");
fprintf(fd, "static pg_local_to_utf mapEUC_JP_to_UTF[] = {\n");
for (i=0;i<sizeof(mapUTF_to_EUC_JP)/sizeof(pg_utf_to_local);i++) {
fprintf(fd, " {0x%08x, 0x%08x},\n",
mapUTF_to_EUC_JP[i].code,
mapUTF_to_EUC_JP[i].utf);
}
fprintf(fd, "};\n");
fclose(fd);
return(0);
}
/* $Id: pg_wchar.h,v 1.21 2000/10/27 02:21:15 ishii Exp $ */
/* $Id: pg_wchar.h,v 1.22 2000/10/30 10:41:05 ishii Exp $ */
#ifndef PG_WCHAR_H
#define PG_WCHAR_H
......@@ -46,8 +46,8 @@ typedef unsigned int pg_wchar;
/*
* various definitions for EUC
*/
#define SS2 0x8e /* single shift 2 */
#define SS3 0x8f /* single shift 3 */
#define SS2 0x8e /* single shift 2 (JIS0201) */
#define SS3 0x8f /* single shift 3 (JIS0212) */
/*
* various definitions for mule internal code
......@@ -104,13 +104,18 @@ typedef struct
extern pg_wchar_tbl pg_wchar_table[];
/*
* UTF-8 to local code conversion map
*/
typedef struct
{
unsigned int utf; /* UTF-8 */
unsigned int code; /* local code */
unsigned char encoding; /* encoding */
} pg_utf_to_local;
/*
* local code to UTF-8 conversion map
*/
typedef struct
{
unsigned int code; /* local code */
......@@ -124,7 +129,6 @@ extern int pg_wchar_strncmp(const pg_wchar *, const pg_wchar *, size_t);
extern int pg_char_and_wchar_strncmp(const char *, const pg_wchar *, size_t);
extern size_t pg_wchar_strlen(const pg_wchar *);
extern int pg_mblen(const unsigned char *);
extern int pg_mblen_with_encoding(const unsigned char *, int);
extern int pg_encoding_mblen(int, const unsigned char *);
extern int pg_mule_mblen(const unsigned char *);
extern int pg_mic_mblen(const unsigned char *);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment