Commit 90a06dba authored by Tatsuo Ishii's avatar Tatsuo Ishii

Fix broken GB18030 <--> UTF-8 conversion map

parent 5eb6de59
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
# #
# Copyright 2002 by Bill Huang # Copyright 2002 by Bill Huang
# #
# $Id: UCS_to_GB18030.pl,v 1.1 2002/06/13 08:28:55 ishii Exp $ # $Id: UCS_to_GB18030.pl,v 1.2 2002/11/12 11:33:40 ishii Exp $
# #
# Generate UTF-8 <--> GB18030 code conversion tables from # Generate UTF-8 <--> GB18030 code conversion tables from
# map files provided by Unicode organization. # map files provided by Unicode organization.
...@@ -30,10 +30,18 @@ while( <FILE> ){ ...@@ -30,10 +30,18 @@ while( <FILE> ){
next; next;
} }
( $u, $c, $rest ) = split; ( $u, $c, $rest ) = split;
$utf = hex($u); $ucs = hex($u);
$code = hex($c); $code = hex($c);
$count++; if( $code >= 0x80 && $ucs >= 0x0080 ){
$array{ $utf } = ($code); $utf = &ucs2utf($ucs);
if( $array{ $utf } ne "" ){
printf STDERR "Warning: duplicate unicode: %04x\n",$ucs;
next;
}
$count++;
$array{ $utf } = $code;
}
} }
close( FILE ); close( FILE );
...@@ -70,11 +78,19 @@ while( <FILE> ){ ...@@ -70,11 +78,19 @@ while( <FILE> ){
if( /^#/ ){ if( /^#/ ){
next; next;
} }
( $u, $c, $rest ) = split; ( $c, $u, $rest ) = split;
$utf = hex($u); $ucs = hex($u);
$code = hex($c); $code = hex($c);
$count++; if( $code >= 0x80 && $ucs >= 0x0080 ){
$array{ $code } = $utf; $utf = &ucs2utf($ucs);
if( $array{ $code } ne "" ){
printf STDERR "Warning: duplicate code: %04x\n",$ucs;
next;
}
$count++;
$array{ $code } = $utf;
}
} }
close( FILE ); close( FILE );
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment