Commit 021d254d authored by Heikki Linnakangas's avatar Heikki Linnakangas

Make all unicode perl scripts to use strict, rearrange logic for clarity.

The loops were a bit difficult to understand, due to breaking out of them
early. Also fix things that perlcritic complained about.

Daniel Gustafsson
parent 81c52728
...@@ -24,8 +24,8 @@ ...@@ -24,8 +24,8 @@
# UCS-2 code in hex # UCS-2 code in hex
# # and Unicode name (not used in this script) # # and Unicode name (not used in this script)
use strict;
require "convutils.pm"; require convutils;
# Load BIG5.TXT # Load BIG5.TXT
my $all = &read_source("BIG5.TXT"); my $all = &read_source("BIG5.TXT");
......
...@@ -13,24 +13,24 @@ ...@@ -13,24 +13,24 @@
# where the "u" field is the Unicode code point in hex, # where the "u" field is the Unicode code point in hex,
# and the "b" field is the hex byte sequence for GB18030 # and the "b" field is the hex byte sequence for GB18030
require "convutils.pm"; use strict;
require convutils;
# Read the input # Read the input
$in_file = "gb-18030-2000.xml"; my $in_file = "gb-18030-2000.xml";
open(FILE, $in_file) || die("cannot open $in_file"); open(my $in, '<', $in_file) || die("cannot open $in_file");
my @mapping; my @mapping;
while (<FILE>) while (<$in>)
{ {
next if (!m/<a u="([0-9A-F]+)" b="([0-9A-F ]+)"/); next if (!m/<a u="([0-9A-F]+)" b="([0-9A-F ]+)"/);
$u = $1; my ($u, $c) = ($1, $2);
$c = $2;
$c =~ s/ //g; $c =~ s/ //g;
$ucs = hex($u); my $ucs = hex($u);
$code = hex($c); my $code = hex($c);
# The GB-18030 character set, which we use as the source, contains # The GB-18030 character set, which we use as the source, contains
# a lot of extra characters on top of the GB2312 character set that # a lot of extra characters on top of the GB2312 character set that
...@@ -71,6 +71,6 @@ while (<FILE>) ...@@ -71,6 +71,6 @@ while (<FILE>)
direction => 'both' direction => 'both'
} }
} }
close(FILE); close($in);
print_tables("EUC_CN", \@mapping); print_tables("EUC_CN", \@mapping);
...@@ -7,27 +7,27 @@ ...@@ -7,27 +7,27 @@
# Generate UTF-8 <--> EUC_JIS_2004 code conversion tables from # Generate UTF-8 <--> EUC_JIS_2004 code conversion tables from
# "euc-jis-2004-std.txt" (http://x0213.org) # "euc-jis-2004-std.txt" (http://x0213.org)
require "convutils.pm"; use strict;
require convutils;
# first generate UTF-8 --> EUC_JIS_2004 table # first generate UTF-8 --> EUC_JIS_2004 table
$in_file = "euc-jis-2004-std.txt"; my $in_file = "euc-jis-2004-std.txt";
open(FILE, $in_file) || die("cannot open $in_file"); open(my $in, '<', $in_file) || die("cannot open $in_file");
my @all; my @all;
while ($line = <FILE>) while (my $line = <$in>)
{ {
if ($line =~ /^0x(.*)[ \t]*U\+(.*)\+(.*)[ \t]*#(.*)$/) if ($line =~ /^0x(.*)[ \t]*U\+(.*)\+(.*)[ \t]*#(.*)$/)
{ {
$c = $1; # combined characters
$u1 = $2; my ($c, $u1, $u2) = ($1, $2, $3);
$u2 = $3; my $rest = "U+" . $u1 . "+" . $u2 . $4;
$rest = "U+" . $u1 . "+" . $u2 . $4; my $code = hex($c);
$code = hex($c); my $ucs1 = hex($u1);
$ucs1 = hex($u1); my $ucs2 = hex($u2);
$ucs2 = hex($u2);
push @all, { direction => 'both', push @all, { direction => 'both',
ucs => $ucs1, ucs => $ucs1,
...@@ -38,22 +38,16 @@ while ($line = <FILE>) ...@@ -38,22 +38,16 @@ while ($line = <FILE>)
} }
elsif ($line =~ /^0x(.*)[ \t]*U\+(.*)[ \t]*#(.*)$/) elsif ($line =~ /^0x(.*)[ \t]*U\+(.*)[ \t]*#(.*)$/)
{ {
$c = $1; # non-combined characters
$u = $2; my ($c, $u, $rest) = ($1, $2, "U+" . $2 . $3);
$rest = "U+" . $u . $3; my $ucs = hex($u);
} my $code = hex($c);
else
{
next;
}
$ucs = hex($u);
$code = hex($c);
next if ($code < 0x80 && $ucs < 0x80); next if ($code < 0x80 && $ucs < 0x80);
push @all, { direction => 'both', ucs => $ucs, code => $code, comment => $rest }; push @all, { direction => 'both', ucs => $ucs, code => $code, comment => $rest };
}
} }
close(FILE); close($in);
print_tables("EUC_JIS_2004", \@all, 1); print_tables("EUC_JIS_2004", \@all, 1);
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
# organization's ftp site. # organization's ftp site.
use strict; use strict;
require "convutils.pm"; require convutils;
# Load JIS0212.TXT # Load JIS0212.TXT
my $jis0212 = &read_source("JIS0212.TXT"); my $jis0212 = &read_source("JIS0212.TXT");
......
...@@ -16,7 +16,8 @@ ...@@ -16,7 +16,8 @@
# UCS-2 code in hex # UCS-2 code in hex
# # and Unicode name (not used in this script) # # and Unicode name (not used in this script)
require "convutils.pm"; use strict;
require convutils;
# Load the source file. # Load the source file.
......
...@@ -17,7 +17,8 @@ ...@@ -17,7 +17,8 @@
# UCS-2 code in hex # UCS-2 code in hex
# # and Unicode name (not used in this script) # # and Unicode name (not used in this script)
require "convutils.pm"; use strict;
require convutils;
my $mapping = &read_source("CNS11643.TXT"); my $mapping = &read_source("CNS11643.TXT");
......
...@@ -13,24 +13,24 @@ ...@@ -13,24 +13,24 @@
# where the "u" field is the Unicode code point in hex, # where the "u" field is the Unicode code point in hex,
# and the "b" field is the hex byte sequence for GB18030 # and the "b" field is the hex byte sequence for GB18030
require "convutils.pm"; use strict;
require convutils;
# Read the input # Read the input
$in_file = "gb-18030-2000.xml"; my $in_file = "gb-18030-2000.xml";
open(FILE, $in_file) || die("cannot open $in_file"); open(my $in, '<', $in_file) || die("cannot open $in_file");
my @mapping; my @mapping;
while (<FILE>) while (<$in>)
{ {
next if (!m/<a u="([0-9A-F]+)" b="([0-9A-F ]+)"/); next if (!m/<a u="([0-9A-F]+)" b="([0-9A-F ]+)"/);
$u = $1; my ($u, $c) = ($1, $2);
$c = $2;
$c =~ s/ //g; $c =~ s/ //g;
$ucs = hex($u); my $ucs = hex($u);
$code = hex($c); my $code = hex($c);
if ($code >= 0x80 && $ucs >= 0x0080) if ($code >= 0x80 && $ucs >= 0x0080)
{ {
push @mapping, { push @mapping, {
...@@ -40,6 +40,6 @@ while (<FILE>) ...@@ -40,6 +40,6 @@ while (<FILE>)
} }
} }
} }
close(FILE); close($in);
print_tables("GB18030", \@mapping); print_tables("GB18030", \@mapping);
...@@ -15,7 +15,8 @@ ...@@ -15,7 +15,8 @@
# UCS-2 code in hex # UCS-2 code in hex
# # and Unicode name (not used in this script) # # and Unicode name (not used in this script)
require "convutils.pm"; use strict;
require convutils;
# Load the source file. # Load the source file.
......
...@@ -7,27 +7,27 @@ ...@@ -7,27 +7,27 @@
# Generate UTF-8 <--> SHIFT_JIS_2004 code conversion tables from # Generate UTF-8 <--> SHIFT_JIS_2004 code conversion tables from
# "sjis-0213-2004-std.txt" (http://x0213.org) # "sjis-0213-2004-std.txt" (http://x0213.org)
require "convutils.pm"; use strict;
require convutils;
# first generate UTF-8 --> SHIFT_JIS_2004 table # first generate UTF-8 --> SHIFT_JIS_2004 table
$in_file = "sjis-0213-2004-std.txt"; my $in_file = "sjis-0213-2004-std.txt";
open(FILE, $in_file) || die("cannot open $in_file"); open(my $in, '<', $in_file) || die("cannot open $in_file");
my @mapping; my @mapping;
while ($line = <FILE>) while (my $line = <$in>)
{ {
if ($line =~ /^0x(.*)[ \t]*U\+(.*)\+(.*)[ \t]*#(.*)$/) if ($line =~ /^0x(.*)[ \t]*U\+(.*)\+(.*)[ \t]*#(.*)$/)
{ {
$c = $1; # combined characters
$u1 = $2; my ($c, $u1, $u2) = ($1, $2, $3);
$u2 = $3; my $rest = "U+" . $u1 . "+" . $u2 . $4;
$rest = "U+" . $u1 . "+" . $u2 . $4; my $code = hex($c);
$code = hex($c); my $ucs1 = hex($u1);
$ucs1 = hex($u1); my $ucs2 = hex($u2);
$ucs2 = hex($u2);
push @mapping, { push @mapping, {
code => $code, code => $code,
...@@ -40,17 +40,11 @@ while ($line = <FILE>) ...@@ -40,17 +40,11 @@ while ($line = <FILE>)
} }
elsif ($line =~ /^0x(.*)[ \t]*U\+(.*)[ \t]*#(.*)$/) elsif ($line =~ /^0x(.*)[ \t]*U\+(.*)[ \t]*#(.*)$/)
{ {
$c = $1; # non-combined characters
$u = $2; my ($c, $u, $rest) = ($1, $2, "U+" . $2 . $3);
$rest = "U+" . $u . $3; my $ucs = hex($u);
} my $code = hex($c);
else my $direction;
{
next;
}
$ucs = hex($u);
$code = hex($c);
if ($code < 0x80 && $ucs < 0x80) if ($code < 0x80 && $ucs < 0x80)
{ {
...@@ -75,7 +69,8 @@ while ($line = <FILE>) ...@@ -75,7 +69,8 @@ while ($line = <FILE>)
comment => $rest, comment => $rest,
direction => $direction direction => $direction
}; };
}
} }
close(FILE); close($in);
print_tables("SHIFT_JIS_2004", \@mapping, 1); print_tables("SHIFT_JIS_2004", \@mapping, 1);
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
# ftp site. # ftp site.
use strict; use strict;
require "convutils.pm"; require convutils;
my $charset = read_source("CP932.TXT"); my $charset = read_source("CP932.TXT");
......
...@@ -13,24 +13,24 @@ ...@@ -13,24 +13,24 @@
# where the "u" field is the Unicode code point in hex, # where the "u" field is the Unicode code point in hex,
# and the "b" field is the hex byte sequence for UHC # and the "b" field is the hex byte sequence for UHC
require "convutils.pm"; use strict;
require convutils;
# Read the input # Read the input
$in_file = "windows-949-2000.xml"; my $in_file = "windows-949-2000.xml";
open(FILE, $in_file) || die("cannot open $in_file"); open(my $in, '<', $in_file) || die("cannot open $in_file");
my @mapping; my @mapping;
while (<FILE>) while (<$in>)
{ {
next if (!m/<a u="([0-9A-F]+)" b="([0-9A-F ]+)"/); next if (!m/<a u="([0-9A-F]+)" b="([0-9A-F ]+)"/);
$u = $1; my ($u, $c) = ($1, $2);
$c = $2;
$c =~ s/ //g; $c =~ s/ //g;
$ucs = hex($u); my $ucs = hex($u);
$code = hex($c); my $code = hex($c);
next if ($code == 0x0080 || $code == 0x00FF); next if ($code == 0x0080 || $code == 0x00FF);
...@@ -43,7 +43,7 @@ while (<FILE>) ...@@ -43,7 +43,7 @@ while (<FILE>)
} }
} }
} }
close(FILE); close($in);
# One extra character that's not in the source file. # One extra character that's not in the source file.
push @mapping, { direction => 'both', code => 0xa2e8, ucs => 0x327e, comment => 'CIRCLED HANGUL IEUNG U' }; push @mapping, { direction => 'both', code => 0xa2e8, ucs => 0x327e, comment => 'CIRCLED HANGUL IEUNG U' };
......
...@@ -15,9 +15,10 @@ ...@@ -15,9 +15,10 @@
# UCS-2 code in hex # UCS-2 code in hex
# # and Unicode name (not used in this script) # # and Unicode name (not used in this script)
require "convutils.pm"; use strict;
require convutils;
%filename = ( my %filename = (
'WIN866' => 'CP866.TXT', 'WIN866' => 'CP866.TXT',
'WIN874' => 'CP874.TXT', 'WIN874' => 'CP874.TXT',
'WIN1250' => 'CP1250.TXT', 'WIN1250' => 'CP1250.TXT',
...@@ -46,9 +47,10 @@ require "convutils.pm"; ...@@ -46,9 +47,10 @@ require "convutils.pm";
'KOI8U' => 'KOI8-U.TXT', 'KOI8U' => 'KOI8-U.TXT',
'GBK' => 'CP936.TXT'); 'GBK' => 'CP936.TXT');
@charsets = keys(%filename); # make maps for all encodings if not specified
@charsets = @ARGV if scalar(@ARGV); my @charsets = (scalar(@ARGV) > 0) ? @ARGV : keys(%filename);
foreach $charset (@charsets)
foreach my $charset (@charsets)
{ {
my $mapping = &read_source($filename{$charset}); my $mapping = &read_source($filename{$charset});
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment