Commit 4c35ec53 authored by Tatsuo Ishii's avatar Tatsuo Ishii

Allow 4 bytes UTF-8 (UCS-4 range 00010000-001FFFFF)

This is necessary to support JIS X 0213 <--> UTF-8 conversion.
parent 6b77e3a8
# #
# $PostgreSQL: pgsql/src/backend/utils/mb/Unicode/ucs2utf.pl,v 1.2 2003/11/29 22:40:01 pgsql Exp $ # Copyright (c) 2001-2007, PostgreSQL Global Development Group
# convert UCS-2 to UTF-8 #
# $PostgreSQL: pgsql/src/backend/utils/mb/Unicode/ucs2utf.pl,v 1.3 2007/03/23 13:51:30 ishii Exp $
# convert UCS-4 to UTF-8
# #
sub ucs2utf { sub ucs2utf {
local($ucs) = @_; local($ucs) = @_;
...@@ -10,10 +12,15 @@ sub ucs2utf { ...@@ -10,10 +12,15 @@ sub ucs2utf {
$utf = $ucs; $utf = $ucs;
} elsif ($ucs > 0x007f && $ucs <= 0x07ff) { } elsif ($ucs > 0x007f && $ucs <= 0x07ff) {
$utf = (($ucs & 0x003f) | 0x80) | ((($ucs >> 6) | 0xc0) << 8); $utf = (($ucs & 0x003f) | 0x80) | ((($ucs >> 6) | 0xc0) << 8);
} else { } elsif ($ucs > 0x07ff && $ucs <= 0xffff) {
$utf = ((($ucs >> 12) | 0xe0) << 16) | $utf = ((($ucs >> 12) | 0xe0) << 16) |
(((($ucs & 0x0fc0) >> 6) | 0x80) << 8) | (((($ucs & 0x0fc0) >> 6) | 0x80) << 8) |
(($ucs & 0x003f) | 0x80); (($ucs & 0x003f) | 0x80);
} else {
$utf = ((($ucs >> 18) | 0xf0) << 24) |
(((($ucs & 0x3ffff) >> 12) | 0x80) << 16) |
(((($ucs & 0x0fc0) >> 6) | 0x80) << 8) |
(($ucs & 0x003f) | 0x80);
} }
return($utf); return($utf);
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment