Make all unicode perl scripts to use strict, rearrange logic for clarity.

The loops were a bit difficult to understand, due to breaking out of them early. Also fix things that perlcritic complained about. Daniel Gustafsson

Make all unicode perl scripts to use strict, rearrange logic for clarity.
The loops were a bit difficult to understand, due to breaking out of them early. Also fix things that perlcritic complained about. Daniel Gustafsson
021d254d · Heikki Linnakangas · 81c52728 · 021d254d · 021d254d · 021d254d
Commit 021d254d authored Nov 30, 2016 by Heikki Linnakangas
12 changed files
--- a/src/backend/utils/mb/Unicode/UCS_to_BIG5.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_BIG5.pl
@@ -24,8 +24,8 @@
 #		 UCS-2 code in hex
 #		 # and Unicode name (not used in this script)
+use strict;
-require "convutils.pm";
+require convutils;
 # Load BIG5.TXT
 my $all = &read_source("BIG5.TXT");

--- a/src/backend/utils/mb/Unicode/UCS_to_EUC_CN.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_EUC_CN.pl
@@ -13,24 +13,24 @@
 # where the "u" field is the Unicode code point in hex,
 # and the "b" field is the hex byte sequence for GB18030
-require "convutils.pm";
+use strict;
+require convutils;
 # Read the input
-$in_file = "gb-18030-2000.xml";
+my $in_file = "gb-18030-2000.xml";
-open(FILE, $in_file) || die("cannot open $in_file");
+open(my $in, '<', $in_file) || die("cannot open $in_file");
 my @mapping;
-while (<FILE>)
+while (<$in>)
 {
 	next if (!m/<a u="([0-9A-F]+)" b="([0-9A-F ]+)"/);
-	$u = $1;
+	my ($u, $c) = ($1, $2);
-	$c = $2;
 	$c =~ s/ //g;
-	$ucs  = hex($u);
+	my $ucs  = hex($u);
-	$code = hex($c);
+	my $code = hex($c);
 	# The GB-18030 character set, which we use as the source, contains
 	# a lot of extra characters on top of the GB2312 character set that
@@ -71,6 +71,6 @@ while (<FILE>)
 		direction => 'both'
 	}
 }
-close(FILE);
+close($in);
 print_tables("EUC_CN", \@mapping);
--- a/src/backend/utils/mb/Unicode/UCS_to_EUC_JIS_2004.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_EUC_JIS_2004.pl
@@ -7,27 +7,27 @@
 # Generate UTF-8 <--> EUC_JIS_2004 code conversion tables from
 # "euc-jis-2004-std.txt" (http://x0213.org)
-require "convutils.pm";
+use strict;
+require convutils;
 # first generate UTF-8 --> EUC_JIS_2004 table
-$in_file = "euc-jis-2004-std.txt";
+my $in_file = "euc-jis-2004-std.txt";
-open(FILE, $in_file) || die("cannot open $in_file");
+open(my $in, '<', $in_file) || die("cannot open $in_file");
 my @all;
-while ($line = <FILE>)
+while (my $line = <$in>)
 {
 	if ($line =~ /^0x(.*)[ \t]*U\+(.*)\+(.*)[ \t]*#(.*)$/)
 	{
-		$c              = $1;
+		# combined characters
-		$u1             = $2;
+		my ($c, $u1, $u2) = ($1, $2, $3);
-		$u2             = $3;
+		my $rest = "U+" . $u1 . "+" . $u2 . $4;
-		$rest           = "U+" . $u1 . "+" . $u2 . $4;
+		my $code = hex($c);
-		$code           = hex($c);
+		my $ucs1 = hex($u1);
-		$ucs1           = hex($u1);
+		my $ucs2 = hex($u2);
-		$ucs2           = hex($u2);
 		push @all, { direction => 'both',
 					 ucs => $ucs1,
@@ -38,22 +38,16 @@ while ($line = <FILE>)
 	}
 	elsif ($line =~ /^0x(.*)[ \t]*U\+(.*)[ \t]*#(.*)$/)
 	{
-		$c    = $1;
+		# non-combined characters
-		$u    = $2;
+		my ($c, $u, $rest) = ($1, $2, "U+" . $2 . $3);
-		$rest = "U+" . $u . $3;
+		my $ucs  = hex($u);
-	}
+		my $code = hex($c);
-	else
-	{
-		next;
-	}
-	$ucs  = hex($u);
-	$code = hex($c);
 		next if ($code < 0x80 && $ucs < 0x80);
 		push @all, { direction => 'both', ucs => $ucs, code => $code, comment => $rest };
+	}
 }
-close(FILE);
+close($in);
 print_tables("EUC_JIS_2004", \@all, 1);
--- a/src/backend/utils/mb/Unicode/UCS_to_EUC_JP.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_EUC_JP.pl
@@ -12,7 +12,7 @@
 # organization's ftp site.
 use strict;
-require "convutils.pm";
+require convutils;
 # Load JIS0212.TXT
 my $jis0212 = &read_source("JIS0212.TXT");

--- a/src/backend/utils/mb/Unicode/UCS_to_EUC_KR.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_EUC_KR.pl
@@ -16,7 +16,8 @@
 #		 UCS-2 code in hex
 #		 # and Unicode name (not used in this script)
-require "convutils.pm";
+use strict;
+require convutils;
 # Load the source file.

--- a/src/backend/utils/mb/Unicode/UCS_to_EUC_TW.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_EUC_TW.pl
@@ -17,7 +17,8 @@
 #		 UCS-2 code in hex
 #		 # and Unicode name (not used in this script)
-require "convutils.pm";
+use strict;
+require convutils;
 my $mapping = &read_source("CNS11643.TXT");

--- a/src/backend/utils/mb/Unicode/UCS_to_GB18030.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_GB18030.pl
@@ -13,24 +13,24 @@
 # where the "u" field is the Unicode code point in hex,
 # and the "b" field is the hex byte sequence for GB18030
-require "convutils.pm";
+use strict;
+require convutils;
 # Read the input
-$in_file = "gb-18030-2000.xml";
+my $in_file = "gb-18030-2000.xml";
-open(FILE, $in_file) || die("cannot open $in_file");
+open(my $in, '<', $in_file) || die("cannot open $in_file");
 my @mapping;
-while (<FILE>)
+while (<$in>)
 {
 	next if (!m/<a u="([0-9A-F]+)" b="([0-9A-F ]+)"/);
-	$u = $1;
+	my ($u, $c) = ($1, $2);
-	$c = $2;
 	$c =~ s/ //g;
-	$ucs  = hex($u);
+	my $ucs  = hex($u);
-	$code = hex($c);
+	my $code = hex($c);
 	if ($code >= 0x80 && $ucs >= 0x0080)
 	{
 		push @mapping, {
@@ -40,6 +40,6 @@ while (<FILE>)
 		}
 	}
 }
-close(FILE);
+close($in);
 print_tables("GB18030", \@mapping);
--- a/src/backend/utils/mb/Unicode/UCS_to_JOHAB.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_JOHAB.pl
@@ -15,7 +15,8 @@
 #		 UCS-2 code in hex
 #		 # and Unicode name (not used in this script)
-require "convutils.pm";
+use strict;
+require convutils;
 # Load the source file.

--- a/src/backend/utils/mb/Unicode/UCS_to_SHIFT_JIS_2004.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_SHIFT_JIS_2004.pl
@@ -7,27 +7,27 @@
 # Generate UTF-8 <--> SHIFT_JIS_2004 code conversion tables from
 # "sjis-0213-2004-std.txt" (http://x0213.org)
-require "convutils.pm";
+use strict;
+require convutils;
 # first generate UTF-8 --> SHIFT_JIS_2004 table
-$in_file = "sjis-0213-2004-std.txt";
+my $in_file = "sjis-0213-2004-std.txt";
-open(FILE, $in_file) || die("cannot open $in_file");
+open(my $in, '<', $in_file) || die("cannot open $in_file");
 my @mapping;
-while ($line = <FILE>)
+while (my $line = <$in>)
 {
 	if ($line =~ /^0x(.*)[ \t]*U\+(.*)\+(.*)[ \t]*#(.*)$/)
 	{
-		$c              = $1;
+		# combined characters
-		$u1             = $2;
+		my ($c, $u1, $u2) = ($1, $2, $3);
-		$u2             = $3;
+		my $rest = "U+" . $u1 . "+" . $u2 . $4;
-		$rest           = "U+" . $u1 . "+" . $u2 . $4;
+		my $code = hex($c);
-		$code           = hex($c);
+		my $ucs1 = hex($u1);
-		$ucs1           = hex($u1);
+		my $ucs2 = hex($u2);
-		$ucs2           = hex($u2);
 		push @mapping, {
 			code => $code,
@@ -40,17 +40,11 @@ while ($line = <FILE>)
 	}
 	elsif ($line =~ /^0x(.*)[ \t]*U\+(.*)[ \t]*#(.*)$/)
 	{
-		$c    = $1;
+		# non-combined characters
-		$u    = $2;
+		my ($c, $u, $rest) = ($1, $2, "U+" . $2 . $3);
-		$rest = "U+" . $u . $3;
+		my $ucs  = hex($u);
-	}
+		my $code = hex($c);
-	else
+		my $direction;
-	{
-		next;
-	}
-	$ucs  = hex($u);
-	$code = hex($c);
 		if ($code < 0x80 && $ucs < 0x80)
 		{
@@ -75,7 +69,8 @@ while ($line = <FILE>)
 			comment => $rest,
 			direction => $direction
 		};
+	}
 }
-close(FILE);
+close($in);
 print_tables("SHIFT_JIS_2004", \@mapping, 1);
--- a/src/backend/utils/mb/Unicode/UCS_to_SJIS.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_SJIS.pl
@@ -11,7 +11,7 @@
 # ftp site.
 use strict;
-require "convutils.pm";
+require convutils;
 my $charset = read_source("CP932.TXT");

--- a/src/backend/utils/mb/Unicode/UCS_to_UHC.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_UHC.pl
@@ -13,24 +13,24 @@
 # where the "u" field is the Unicode code point in hex,
 # and the "b" field is the hex byte sequence for UHC
-require "convutils.pm";
+use strict;
+require convutils;
 # Read the input
-$in_file = "windows-949-2000.xml";
+my $in_file = "windows-949-2000.xml";
-open(FILE, $in_file) || die("cannot open $in_file");
+open(my $in, '<', $in_file) || die("cannot open $in_file");
 my @mapping;
-while (<FILE>)
+while (<$in>)
 {
 	next if (!m/<a u="([0-9A-F]+)" b="([0-9A-F ]+)"/);
-	$u = $1;
+	my ($u, $c) = ($1, $2);
-	$c = $2;
 	$c =~ s/ //g;
-	$ucs  = hex($u);
+	my $ucs  = hex($u);
-	$code = hex($c);
+	my $code = hex($c);
 	next if ($code == 0x0080 || $code == 0x00FF);
@@ -43,7 +43,7 @@ while (<FILE>)
 		}
 	}
 }
-close(FILE);
+close($in);
 # One extra character that's not in the source file.
 push @mapping, { direction => 'both', code => 0xa2e8, ucs => 0x327e, comment => 'CIRCLED HANGUL IEUNG U' };

--- a/src/backend/utils/mb/Unicode/UCS_to_most.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_most.pl
@@ -15,9 +15,10 @@
 #		 UCS-2 code in hex
 #		 # and Unicode name (not used in this script)
-require "convutils.pm";
+use strict;
+require convutils;
-%filename = (
+my %filename = (
 	'WIN866'     => 'CP866.TXT',
 	'WIN874'     => 'CP874.TXT',
 	'WIN1250'    => 'CP1250.TXT',
@@ -46,9 +47,10 @@ require "convutils.pm";
 	'KOI8U'      => 'KOI8-U.TXT',
 	'GBK'        => 'CP936.TXT');
-@charsets = keys(%filename);
+# make maps for all encodings if not specified
-@charsets = @ARGV if scalar(@ARGV);
+my @charsets = (scalar(@ARGV) > 0) ? @ARGV : keys(%filename);
-foreach $charset (@charsets)
+foreach my $charset (@charsets)
 {
 	my $mapping = &read_source($filename{$charset});