Commit a7ad43cd authored by Bruce Momjian's avatar Bruce Momjian

Included patches make some enhancements to the multi-byte support.

o allow to use Big5 (a Chinese encoding used in Taiwan) as a client
  encoding. In this case the server side encoding should be EUC_TW

o add EUC_TW and Big5 test cases to the regression and the mb test
  (contributed by Jonah Kuo)

o fix mistake in include/mb/pg_wchar.h. An encoding id for EUC_TW was
  not correct (was 3 and now is 4)

o update documents (doc/README.mb and README.mb.jp)

o update psql helpfile (bin/psql/psqlHelp.h)

--
Tatsuo Ishii
t-ishii@sra.co.jp
parent 8358a8f6
postgresql 6.4 multi-byte (MB) support README Dec 16 1998
postgresql 6.5 multi-byte (MB) support README Jan 26 1999
Tatsuo Ishii
t-ishii@sra.co.jp
......@@ -113,6 +113,7 @@ Supported encodings for PGCLIENTENCODING are:
EUC_CN Chinese EUC
EUC_KR Korean EUC
EUC_TW Taiwan EUC
BIG5 Traditional chinese
MULE_INTERNAL Mule internal
LATIN1 ISO 8859-1 English and some European languages
LATIN2 ISO 8859-2 English and some European languages
......@@ -169,6 +170,12 @@ Unicode: http://www.unicode.org/
5. History
Jan 26, 1999
* Add support Big5 for fronend encoding
(you need to create a database with EUC_TW to use Big5)
* Add regression test case for EUC_TW
(contributed by Jonah Kuo <jonahkuo@mail.ttn.com.tw>)
Dec 15, 1998
* Bugs related to SQL_ASCII support fixed
......
postgresql 6.4 multi-byte (MB) support README 1998/12/16 $B:n@.(B
postgresql 6.5 multi-byte (MB) support README 1999/1/26 $B:n@.(B
$B@P0fC#IW(B
t-ishii@sra.co.jp
......@@ -130,7 +130,7 @@ initdb/createdb/create database $B$K$*$1$k%(%s%3!<%G%#%s%0$N;XDj$K$D$$$F(B
$B4D6-JQ?t(B PGCLIENTENCODING $B$,@_Dj$5$l$F$$$k>l9g$O$=$NCM$,M%@h$5$l!"%P%C(B
$B%/%(%s%IB&$H0[$J$k%(%s%3!<%G%#%s%0$,;HMQ$G$-$^$9!#@_Dj2DG=$J%(%s%3!<(B
$B%G%#%s%0$O!">e5-$K2C$(!"(BSJIS ($B%7%U%H(BJIS)$B$,;XDj$G$-$^$9!#(B
$B%G%#%s%0$O!">e5-$K2C$(!"(BSJIS ($B%7%U%H(BJIS)$B$H(B BiG5 $B$,;XDj$G$-$^$9!#(B
$B$A$J$_$K!"(BSJIS $B$O(B JISX0201 $B$N(B 1$B%P%$%H%+%J!"$$$o$f$k!VH>3Q%+%?(B
$B%+%J!W$b%5%]!<%H$7$F$$$^$9(B($B7h$7$F!VH>3Q%+%?%+%J!W$N;HMQ$r$*4+(B
......@@ -206,6 +206,13 @@ initdb/createdb/create database $B$K$*$1$k%(%s%3!<%G%#%s%0$N;XDj$K$D$$$F(B
$B2~DjMzNr!'(B
1999/1/26 Big5 $B%5%]!<%HDI2C(B
* Big5 $B$,%U%m%s%H%(%s%IB&$N%(%s%3!<%G%#%s%0$H$7$FMxMQ$G$-$k$h(B
$B$&$K$J$j$^$7$?!#$3$N>l9g!"%P%C%/%(%s%IB&$N%(%s%3!<%G%#%s%0$O(B
EUC_TW $B$^$?$O(B MULE_INTERNAL $B$H$7$^$9!#(B
* EUC_TW $B$N(B regression test $B%1!<%9$rDI2C(B
(contributed by Jonah Kuo <jonahkuo@mail.ttn.com.tw>)
1998/12/16 $BK\%I%-%e%a%s%H=$@5!#(B
* Makefile.custom $B$G(B MB=EUC_JP $B$J$I$H@_Dj$9$kJ}K!$O(B 6.4 $B0J9_(B
$B%5%]!<%H$5$l$F$$$J$$$N$G:o=|$7$?!#(B
......
......@@ -4,7 +4,7 @@
# Makefile for utils/mb
#
# IDENTIFICATION
# $Header: /cvsroot/pgsql/src/backend/utils/mb/Makefile,v 1.2 1998/07/26 04:31:03 scrappy Exp $
# $Header: /cvsroot/pgsql/src/backend/utils/mb/Makefile,v 1.3 1999/02/02 18:51:21 momjian Exp $
#
#-------------------------------------------------------------------------
......@@ -16,7 +16,8 @@ ifdef MULTIBYTE
CFLAGS+= $(MBFLAGS)
endif
OBJS = common.o conv.o mbutils.o wchar.o wstrcmp.o wstrncmp.o variable.o
OBJS = common.o conv.o mbutils.o wchar.o wstrcmp.o wstrncmp.o variable.o \
big5.o
all: SUBSYS.o
......
/*
* conversion between BIG5 and Mule Internal Code(CNS 116643-1992
* plane 1 and plane 2).
* This program is partially copied from lv(Multilingual file viewer)
* and slightly modified. lv is written and copyrighted by NARITA Tomio
* (nrt@web.ad.jp).
*
* 1999/1/15 Tatsuo Ishii
*
* $Id: big5.c,v 1.1 1999/02/02 18:51:22 momjian Exp $
*/
#include "mb/pg_wchar.h"
typedef struct {
unsigned short code, peer;
} codes_t;
/* map Big5 Level 1 to CNS 11643-1992 Plane 1 */
static codes_t big5Level1ToCnsPlane1[ 25 ] = { /* range */
{ 0xA140, 0x2121 },
{ 0xA1F6, 0x2258 },
{ 0xA1F7, 0x2257 },
{ 0xA1F8, 0x2259 },
{ 0xA2AF, 0x2421 },
{ 0xA3C0, 0x4221 },
{ 0xa3e1, 0x0000 },
{ 0xA440, 0x4421 },
{ 0xACFE, 0x5753 },
{ 0xacff, 0x0000 },
{ 0xAD40, 0x5323 },
{ 0xAFD0, 0x5754 },
{ 0xBBC8, 0x6B51 },
{ 0xBE52, 0x6B50 },
{ 0xBE53, 0x6F5C },
{ 0xC1AB, 0x7536 },
{ 0xC2CB, 0x7535 },
{ 0xC2CC, 0x7737 },
{ 0xC361, 0x782E },
{ 0xC3B9, 0x7865 },
{ 0xC3BA, 0x7864 },
{ 0xC3BB, 0x7866 },
{ 0xC456, 0x782D },
{ 0xC457, 0x7962 },
{ 0xc67f, 0x0000 }
};
/* map CNS 11643-1992 Plane 1 to Big5 Level 1 */
static codes_t cnsPlane1ToBig5Level1[ 26 ] = { /* range */
{ 0x2121, 0xA140 },
{ 0x2257, 0xA1F7 },
{ 0x2258, 0xA1F6 },
{ 0x2259, 0xA1F8 },
{ 0x234f, 0x0000 },
{ 0x2421, 0xA2AF },
{ 0x2571, 0x0000 },
{ 0x4221, 0xA3C0 },
{ 0x4242, 0x0000 },
{ 0x4421, 0xA440 },
{ 0x5323, 0xAD40 },
{ 0x5753, 0xACFE },
{ 0x5754, 0xAFD0 },
{ 0x6B50, 0xBE52 },
{ 0x6B51, 0xBBC8 },
{ 0x6F5C, 0xBE53 },
{ 0x7535, 0xC2CB },
{ 0x7536, 0xC1AB },
{ 0x7737, 0xC2CC },
{ 0x782D, 0xC456 },
{ 0x782E, 0xC361 },
{ 0x7864, 0xC3BA },
{ 0x7865, 0xC3B9 },
{ 0x7866, 0xC3BB },
{ 0x7962, 0xC457 },
{ 0x7d4c, 0x0000 }
};
/* map Big5 Level 2 to CNS 11643-1992 Plane 2 */
static codes_t big5Level2ToCnsPlane2[ 48 ] = { /* range */
{ 0xC940, 0x2121 },
{ 0xc94a, 0x0000 },
{ 0xC94B, 0x212B },
{ 0xC96C, 0x214D },
{ 0xC9BE, 0x214C },
{ 0xC9BF, 0x217D },
{ 0xC9ED, 0x224E },
{ 0xCAF7, 0x224D },
{ 0xCAF8, 0x2439 },
{ 0xD77A, 0x3F6A },
{ 0xD77B, 0x387E },
{ 0xDBA7, 0x3F6B },
{ 0xDDFC, 0x4176 },
{ 0xDDFD, 0x4424 },
{ 0xE8A3, 0x554C },
{ 0xE976, 0x5723 },
{ 0xEB5B, 0x5A29 },
{ 0xEBF1, 0x554B },
{ 0xEBF2, 0x5B3F },
{ 0xECDE, 0x5722 },
{ 0xECDF, 0x5C6A },
{ 0xEDAA, 0x5D75 },
{ 0xEEEB, 0x642F },
{ 0xEEEC, 0x6039 },
{ 0xF056, 0x5D74 },
{ 0xF057, 0x6243 },
{ 0xF0CB, 0x5A28 },
{ 0xF0CC, 0x6337 },
{ 0xF163, 0x6430 },
{ 0xF16B, 0x6761 },
{ 0xF16C, 0x6438 },
{ 0xF268, 0x6934 },
{ 0xF269, 0x6573 },
{ 0xF2C3, 0x664E },
{ 0xF375, 0x6762 },
{ 0xF466, 0x6935 },
{ 0xF4B5, 0x664D },
{ 0xF4B6, 0x6962 },
{ 0xF4FD, 0x6A4C },
{ 0xF663, 0x6A4B },
{ 0xF664, 0x6C52 },
{ 0xF977, 0x7167 },
{ 0xF9C4, 0x7166 },
{ 0xF9C5, 0x7234 },
{ 0xF9C6, 0x7240 },
{ 0xF9C7, 0x7235 },
{ 0xF9D2, 0x7241 },
{ 0xf9d6, 0x0000 }
};
/* map CNS 11643-1992 Plane 2 to Big5 Level 2 */
static codes_t cnsPlane2ToBig5Level2[ 49 ] = { /* range */
{ 0x2121, 0xC940 },
{ 0x212B, 0xC94B },
{ 0x214C, 0xC9BE },
{ 0x214D, 0xC96C },
{ 0x217D, 0xC9BF },
{ 0x224D, 0xCAF7 },
{ 0x224E, 0xC9ED },
{ 0x2439, 0xCAF8 },
{ 0x387E, 0xD77B },
{ 0x3F6A, 0xD77A },
{ 0x3F6B, 0xDBA7 },
{ 0x4424, 0x0000 },
{ 0x4176, 0xDDFC },
{ 0x4177, 0x0000 },
{ 0x4424, 0xDDFD },
{ 0x554B, 0xEBF1 },
{ 0x554C, 0xE8A3 },
{ 0x5722, 0xECDE },
{ 0x5723, 0xE976 },
{ 0x5A28, 0xF0CB },
{ 0x5A29, 0xEB5B },
{ 0x5B3F, 0xEBF2 },
{ 0x5C6A, 0xECDF },
{ 0x5D74, 0xF056 },
{ 0x5D75, 0xEDAA },
{ 0x6039, 0xEEEC },
{ 0x6243, 0xF057 },
{ 0x6337, 0xF0CC },
{ 0x642F, 0xEEEB },
{ 0x6430, 0xF163 },
{ 0x6438, 0xF16C },
{ 0x6573, 0xF269 },
{ 0x664D, 0xF4B5 },
{ 0x664E, 0xF2C3 },
{ 0x6761, 0xF16B },
{ 0x6762, 0xF375 },
{ 0x6934, 0xF268 },
{ 0x6935, 0xF466 },
{ 0x6962, 0xF4B6 },
{ 0x6A4B, 0xF663 },
{ 0x6A4C, 0xF4FD },
{ 0x6C52, 0xF664 },
{ 0x7166, 0xF9C4 },
{ 0x7167, 0xF977 },
{ 0x7234, 0xF9C5 },
{ 0x7235, 0xF9C7 },
{ 0x7240, 0xF9C6 },
{ 0x7241, 0xF9D2 },
{ 0x7245, 0x0000 }
};
/* Big Five Level 1 Correspondence to CNS 11643-1992 Plane 4 */
static unsigned short b1c4[][2] = {
{0xC879, 0x2123},
{0xC87B, 0x2124},
{0xC87D, 0x212A},
{0xC8A2, 0x2152}
};
/* Big Five Level 2 Correspondence to CNS 11643-1992 Plane 3 */
static unsigned short b2c3[][2] = {
{0xF9D6, 0x4337},
{0xF9D7, 0x4F50},
{0xF9D8, 0x444E},
{0xF9D9, 0x504A},
{0xF9DA, 0x2C5D},
{0xF9DB, 0x3D7E},
{0xF9DC, 0x4B5C}
};
static unsigned short BinarySearchRange
(codes_t *array, int high, unsigned short code )
{
int low, mid, distance, tmp;
low = 0;
mid = high >> 1;
for( ; low <= high ; mid = ( low + high ) >> 1 ){
if( ( array[ mid ].code <= code ) && ( array[ mid + 1 ].code > code ) ){
if( 0 == array[ mid ].peer )
return 0;
if( code >= 0xa140U ){
/* big5 to cns */
tmp = ( ( code & 0xff00 ) - ( array[ mid ].code & 0xff00 ) ) >> 8;
high = code & 0x00ff;
low = array[ mid ].code & 0x00ff;
/*
* NOTE: big5 high_byte: 0xa1-0xfe, low_byte: 0x40-0x7e, 0xa1-0xfe
* (radicals: 0x00-0x3e, 0x3f-0x9c)
* big5 radix is 0x9d. [region_low, region_high]
* We should remember big5 has two different regions (above).
* There is a bias for the distance between these regions.
* 0xa1 - 0x7e + bias = 1 (Distance between 0xa1 and 0x7e is 1.)
* bias = - 0x22.
*/
distance = tmp * 0x9d + high - low +
( high >= 0xa1 ? ( low >= 0xa1 ? 0 : - 0x22 )
: ( low >= 0xa1 ? + 0x22 : 0 ) );
/*
* NOTE: we have to convert the distance into a code point.
* The code point's low_byte is 0x21 plus mod_0x5e.
* In the first, we extract the mod_0x5e of the starting
* code point, subtracting 0x21, and add distance to it.
* Then we calculate again mod_0x5e of them, and restore
* the final codepoint, adding 0x21.
*/
tmp = ( array[ mid ].peer & 0x00ff ) + distance - 0x21;
tmp = ( array[ mid ].peer & 0xff00 ) + ( ( tmp / 0x5e ) << 8 )
+ 0x21 + tmp % 0x5e;
return tmp;
} else {
/* cns to big5 */
tmp = ( ( code & 0xff00 ) - ( array[ mid ].code & 0xff00 ) ) >> 8;
/*
* NOTE: ISO charsets ranges between 0x21-0xfe (94charset).
* Its radix is 0x5e. But there is no distance bias like big5.
*/
distance = tmp * 0x5e
+ ( (int)( code & 0x00ff ) - (int)( array[ mid ].code & 0x00ff ) );
/*
* NOTE: Similar to big5 to cns conversion, we extract mod_0x9d and
* restore mod_0x9d into a code point.
*/
low = array[ mid ].peer & 0x00ff;
tmp = low + distance - ( low >= 0xa1 ? 0x62 : 0x40 );
low = tmp % 0x9d;
tmp = ( array[ mid ].peer & 0xff00 ) + ( ( tmp / 0x9d ) << 8 )
+ ( low > 0x3e ? 0x62 : 0x40 ) + low;
return tmp;
}
} else if( array[ mid ].code > code ){
high = mid - 1;
} else {
low = mid + 1;
}
}
return 0;
}
unsigned short BIG5toCNS(unsigned short big5, unsigned char *lc )
{
unsigned short cns = 0;
int i;
if( big5 < 0xc940U ){
/* level 1 */
for (i=0;i<sizeof(b1c4)/sizeof(unsigned short);i++) {
if (b1c4[i][0] == big5) {
*lc = LC_CNS11643_4;
return(b1c4[i][1] | 0x8080U);
}
}
if( 0 < (cns = BinarySearchRange( big5Level1ToCnsPlane1, 23, big5 )) )
*lc = LC_CNS11643_1;
} else if( big5 == 0xc94aU ){
/* level 2 */
*lc = LC_CNS11643_1;
cns = 0x4442;
} else {
/* level 2 */
for (i=0;i<sizeof(b2c3)/sizeof(unsigned short);i++) {
if (b2c3[i][0] == big5) {
*lc = LC_CNS11643_3;
return(b2c3[i][1]);
}
}
if( 0 < (cns = BinarySearchRange( big5Level2ToCnsPlane2, 46, big5 )) )
*lc = LC_CNS11643_2;
}
if( 0 == cns ){ /* no mapping Big5 to CNS 11643-1992 */
*lc = 0;
return (unsigned short)'?';
}
return cns | 0x8080;
}
unsigned short CNStoBIG5( unsigned short cns, unsigned char lc )
{
int i;
unsigned int big5 = 0;
cns &= 0x7f7f;
switch( lc ){
case LC_CNS11643_1:
big5 = BinarySearchRange( cnsPlane1ToBig5Level1, 24, cns );
break;
case LC_CNS11643_2:
big5 = BinarySearchRange( cnsPlane2ToBig5Level2, 47, cns );
break;
case LC_CNS11643_3:
for (i=0;i<sizeof(b2c3)/sizeof(unsigned short);i++) {
if (b2c3[i][1] == cns) {
return(b2c3[i][0]);
}
}
break;
case LC_CNS11643_4:
for (i=0;i<sizeof(b1c4)/sizeof(unsigned short);i++) {
if (b1c4[i][1] == cns) {
return(b1c4[i][0]);
}
}
default:
break;
}
return big5;
}
......@@ -2,7 +2,7 @@
* conversion between client encoding and server internal encoding
* (currently mule internal code (mic) is used)
* Tatsuo Ishii
* $Id: conv.c,v 1.4 1998/12/14 04:59:58 momjian Exp $
* $Id: conv.c,v 1.5 1999/02/02 18:51:23 momjian Exp $
*/
#include <stdio.h>
#include <string.h>
......@@ -369,6 +369,94 @@ mic2euc_tw(unsigned char *mic, unsigned char *p, int len)
*p = '\0';
}
/*
* Big5 ---> MIC
*/
static void
big52mic(unsigned char *big5, unsigned char *p, int len)
{
unsigned short c1;
unsigned short big5buf, cnsBuf;
unsigned char lc;
char bogusBuf[2];
int i;
while (len > 0 && (c1 = *big5++))
{
if (c1 <= 0x007fU) { /* ASCII */
len--;
*p++ = c1;
} else {
len -= 2;
big5buf = c1 << 8;
c1 = *big5++;
big5buf |= c1;
cnsBuf = BIG5toCNS(big5buf, &lc);
if (lc != 0) {
if (lc == LC_CNS11643_3 || lc == LC_CNS11643_4) {
*p++ = 0x9d; /* LCPRV2 */
}
*p++ = lc; /* Plane No. */
*p++ = (cnsBuf >> 8) & 0x00ff;
*p++ = cnsBuf & 0x00ff;
} else { /* cannot convert */
big5 -= 2;
*p++ = '(';
for (i=0;i<2;i++) {
sprintf(bogusBuf,"%02x",*big5++);
*p++ = bogusBuf[0];
*p++ = bogusBuf[1];
}
*p++ = ')';
}
}
}
*p = '\0';
}
/*
* MIC ---> Big5
*/
static void
mic2big5(unsigned char *mic, unsigned char *p, int len)
{
int l;
unsigned short c1;
unsigned short big5buf, cnsBuf;
while (len > 0 && (c1 = *mic))
{
l = pg_mic_mblen(mic++);
len -= l;
/* 0x9d means LCPRV2 */
if (c1 == LC_CNS11643_1 || c1 == LC_CNS11643_2 || c1 == 0x9d)
{
if (c1 == 0x9d) {
c1 = *mic++; /* get plane no. */
}
cnsBuf = (*mic++)<<8;
cnsBuf |= (*mic++) & 0x00ff;
big5buf = CNStoBIG5(cnsBuf, c1);
if (big5buf == 0) { /* cannot convert to Big5! */
mic -= l;
printBogusChar(&mic, &p);
} else {
*p++ = (big5buf >> 8) & 0x00ff;
*p++ = big5buf & 0x00ff;
}
}
else if (c1 <= 0x7f) /* ASCII */
{
*p++ = c1;
} else { /* cannot convert to Big5! */
mic--;
printBogusChar(&mic, &p);
}
}
*p = '\0';
}
/*
* LATINn ---> MIC
*/
......@@ -514,5 +602,6 @@ pg_encoding_conv_tbl pg_conv_tbl[] = {
{LATIN4, "LATIN4", 0, latin42mic, mic2latin4}, /* ISO 8859 Latin 4 */
{LATIN5, "LATIN5", 0, latin52mic, mic2latin5}, /* ISO 8859 Latin 5 */
{SJIS, "SJIS", 1, sjis2mic, mic2sjis}, /* SJIS */
{BIG5, "BIG5", 1, big52mic, mic2big5}, /* Big5 */
{-1, "", 0, 0, 0} /* end mark */
};
......@@ -3,14 +3,14 @@
* client encoding and server internal encoding.
* (currently mule internal code (mic) is used)
* Tatsuo Ishii
* $Id: mbutils.c,v 1.4 1998/09/25 01:46:23 momjian Exp $ */
* $Id: mbutils.c,v 1.5 1999/02/02 18:51:23 momjian Exp $ */
#include <stdio.h>
#include <string.h>
#include "mb/pg_wchar.h"
static client_encoding = -1;
static int client_encoding = -1;
static void (*client_to_mic) ();/* something to MIC */
static void (*client_from_mic) (); /* MIC to something */
static void (*server_to_mic) ();/* something to MIC */
......
/*
* conversion functions between pg_wchar and multi-byte streams.
* Tatsuo Ishii
* $Id: wchar.c,v 1.4 1998/09/01 04:33:26 momjian Exp $
* $Id: wchar.c,v 1.5 1999/02/02 18:51:23 momjian Exp $
*/
#include "mb/pg_wchar.h"
......@@ -396,6 +396,25 @@ pg_sjis_mblen(const unsigned char *s)
return (len);
}
/*
* Big5
*/
static int
pg_big5_mblen(const unsigned char *s)
{
int len;
if (*s > 0x7f)
{ /* kanji? */
len = 2;
}
else
{ /* should be ASCII */
len = 1;
}
return (len);
}
pg_wchar_tbl pg_wchar_table[] = {
{pg_ascii2wchar_with_len, pg_ascii_mblen},
{pg_eucjp2wchar_with_len, pg_eucjp_mblen},
......@@ -429,7 +448,8 @@ pg_wchar_tbl pg_wchar_table[] = {
{0, 0},
{0, 0},
{0, 0},
{0, pg_sjis_mblen}
{0, pg_sjis_mblen},
{0, pg_big5_mblen}
};
/* returns the byte length of a word for mule internal code */
......
......@@ -5,7 +5,7 @@
*
* Copyright (c) 1994, Regents of the University of California
*
* $Id: psqlHelp.h,v 1.57 1999/02/02 18:41:17 momjian Exp $
* $Id: psqlHelp.h,v 1.58 1999/02/02 18:51:24 momjian Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -330,7 +330,7 @@ static struct _helpStruct QL_HELP[] = {
set GEQO TO 'ON[=#]'|'OFF'\n\
set R_PLANS TO 'ON'|'OFF'\n\
set QUERY_LIMIT TO #\n\
set CLIENT_ENCODING TO 'EUC_JP'|'SJIS'|'EUC_CN'|'EUC_KR'|'EUC_TW'|'MULE_INTERNAL'|'LATIN1'|'LATIN2'|'LATIN3'|'LATIN4'|'LATIN5'"},
set CLIENT_ENCODING TO 'EUC_JP'|'SJIS'|'EUC_CN'|'EUC_KR'|'EUC_TW'|'BIG5'|'MULE_INTERNAL'|'LATIN1'|'LATIN2'|'LATIN3'|'LATIN4'|'LATIN5'"},
#else
"\
\tSET DateStyle TO 'ISO'|'SQL'|'Postgres'|'European'|'US'|'NonEuropean'\n\
......
/* $Id: pg_wchar.h,v 1.5 1998/09/25 01:46:25 momjian Exp $ */
/* $Id: pg_wchar.h,v 1.6 1999/02/02 18:51:25 momjian Exp $ */
#ifndef PG_WCHAR_H
#define PG_WCHAR_H
......@@ -11,7 +11,7 @@
#define EUC_JP 1 /* EUC for Japanese */
#define EUC_CN 2 /* EUC for Chinese */
#define EUC_KR 3 /* EUC for Korean */
#define EUC_TW 3 /* EUC for Taiwan */
#define EUC_TW 4 /* EUC for Taiwan */
#define UNICODE 5 /* Unicode UTF-8 */
#define MULE_INTERNAL 6 /* Mule internal code */
#define LATIN1 7 /* ISO-8859 Latin 1 */
......@@ -25,6 +25,7 @@
#define LATIN9 15 /* ISO-8859 Latin 9 */
/* followings are for client encoding only */
#define SJIS 32 /* Shift JIS */
#define BIG5 33 /* Big5 */
#ifdef MULTIBYTE
typedef unsigned int pg_wchar;
......@@ -122,6 +123,8 @@ extern int GetDatabaseEncoding(void);
extern void SetDatabaseEncoding(int);
extern void SetTemplateEncoding(int);
extern int GetTemplateEncoding(void);
extern unsigned short BIG5toCNS(unsigned short, unsigned char *);
extern unsigned short CNStoBIG5(unsigned short, unsigned char);
#endif /* MULTIBYTE */
......
......@@ -6,7 +6,7 @@
# Copyright (c) 1994, Regents of the University of California
#
# IDENTIFICATION
# $Header: /cvsroot/pgsql/src/interfaces/libpq/Attic/Makefile.in,v 1.42 1999/01/17 06:19:34 momjian Exp $
# $Header: /cvsroot/pgsql/src/interfaces/libpq/Attic/Makefile.in,v 1.43 1999/02/02 18:51:29 momjian Exp $
#
#-------------------------------------------------------------------------
......@@ -31,7 +31,7 @@ OBJS= fe-auth.o fe-connect.o fe-exec.o fe-misc.o fe-print.o fe-lobj.o \
dllist.o pqsignal.o
ifdef MULTIBYTE
OBJS+= common.o wchar.o conv.o
OBJS+= common.o wchar.o conv.o big5.o
endif
# If crypt is a separate library, rather than part of libc,
......@@ -62,6 +62,9 @@ wchar.c: $(SRCDIR)/backend/utils/mb/wchar.c
conv.c: $(SRCDIR)/backend/utils/mb/conv.c
-$(LN_S) $(SRCDIR)/backend/utils/mb/conv.c .
big5.c: $(SRCDIR)/backend/utils/mb/big5.c
-$(LN_S) $(SRCDIR)/backend/utils/mb/big5.c .
endif
# The following rules cause dependencies in the backend directory to
......@@ -142,7 +145,7 @@ beforeinstall-headers:
.PHONY: clean
clean:
rm -f libpq.a $(shlib) $(OBJS)
rm -f dllist.c common.c wchar.c conv.c
rm -f dllist.c common.c wchar.c conv.c big5.c
ifeq ($(PORTNAME), win)
rm -f pq.def
endif
......
QUERY: drop table 廠商資料;
QUERY: create table 廠商資料 (行業別 text, 公司抬頭 varchar, 地址 varchar(16));
QUERY: create index 廠商資料index1 on 廠商資料 using btree (行業別);
QUERY: create index 廠商資料index2 on 廠商資料 using hash (公司抬頭);
QUERY: insert into 廠商資料 values ('電腦業', '達達科技', '北A01仁');
QUERY: insert into 廠商資料 values ('製造業', '財源有限公司', '中B10中');
QUERY: insert into 廠商資料 values ('餐飲業', '美味股份有限公司', '高Z01九');
QUERY: vacuum 廠商資料;
QUERY: select * from 廠商資料;
行業別|公司抬頭 |地址
------+----------------+-------
電腦業|達達科技 |北A01仁
製造業|財源有限公司 |中B10中
餐飲業|美味股份有限公司|高Z01九
(3 rows)
QUERY: select * from 廠商資料 where 地址 = '高Z01九';
行業別|公司抬頭 |地址
------+----------------+-------
餐飲業|美味股份有限公司|高Z01九
(1 row)
QUERY: select * from 廠商資料 where 地址 ~* '高z01九';
行業別|公司抬頭 |地址
------+----------------+-------
餐飲業|美味股份有限公司|高Z01九
(1 row)
QUERY: select * from 廠商資料 where 地址 like '_Z01_';
行業別|公司抬頭 |地址
------+----------------+-------
餐飲業|美味股份有限公司|高Z01九
(1 row)
QUERY: select * from 廠商資料 where 地址 like '_Z%';
行業別|公司抬頭 |地址
------+----------------+-------
餐飲業|美味股份有限公司|高Z01九
(1 row)
QUERY: select * from 廠商資料 where 公司抬頭 ~ '達達科[寄記技]';
行業別|公司抬頭|地址
------+--------+-------
電腦業|達達科技|北A01仁
(1 row)
QUERY: select * from 廠商資料 where 公司抬頭 ~* '達達科[寄記技]';
行業別|公司抬頭|地址
------+--------+-------
電腦業|達達科技|北A01仁
(1 row)
QUERY: select *, character_length(行業別) from 廠商資料;
行業別|公司抬頭 |地址 |length
------+----------------+-------+------
電腦業|達達科技 |北A01仁| 3
製造業|財源有限公司 |中B10中| 3
餐飲業|美味股份有限公司|高Z01九| 3
(3 rows)
QUERY: select *, octet_length(行業別) from 廠商資料;
行業別|公司抬頭 |地址 |octet_length
------+----------------+-------+------------
電腦業|達達科技 |北A01仁| 6
製造業|財源有限公司 |中B10中| 6
餐飲業|美味股份有限公司|高Z01九| 6
(3 rows)
QUERY: select *, position('有限' in 公司抬頭) from 廠商資料;
行業別|公司抬頭 |地址 |strpos
------+----------------+-------+------
電腦業|達達科技 |北A01仁| 0
製造業|財源有限公司 |中B10中| 3
餐飲業|美味股份有限公司|高Z01九| 5
(3 rows)
QUERY: select *, substring(公司抬頭 from 3 for 6 ) from 廠商資料;
行業別|公司抬頭 |地址 |substr
------+----------------+-------+------------
電腦業|達達科技 |北A01仁|科技
製造業|財源有限公司 |中B10中|有限公司
餐飲業|美味股份有限公司|高Z01九|股份有限公司
(3 rows)
#! /bin/sh
# $Header: /cvsroot/pgsql/src/test/mb/mbregress.sh,v 1.2 1998/07/26 04:31:38 scrappy Exp $
# $Header: /cvsroot/pgsql/src/test/mb/mbregress.sh,v 1.3 1999/02/02 18:51:32 momjian Exp $
if echo '\c' | grep -s c >/dev/null 2>&1
then
......@@ -15,7 +15,7 @@ if [ ! -d results ];then
fi
PSQL="psql -n -e -q"
tests="euc_jp sjis euc_kr euc_cn unicode mule_internal"
tests="euc_jp sjis euc_kr euc_cn euc_tw big5 unicode mule_internal"
unset PGCLIENTENCODING
for i in $tests
do
......@@ -26,6 +26,11 @@ do
export PGCLIENTENCODING
$PSQL euc_jp < sql/sjis.sql > results/sjis.out 2>&1
unset PGCLIENTENCODING
elif [ $i = big5 ];then
PGCLIENTENCODING=BIG5
export PGCLIENTENCODING
$PSQL euc_tw < sql/big5.sql > results/big5.out 2>&1
unset PGCLIENTENCODING
else
destroydb $i >/dev/null 2>&1
createdb -E `echo $i|tr "[a-z]" "[A-Z]"` $i
......
drop table 廠商資料;
create table 廠商資料 (行業別 text, 公司抬頭 varchar, 地址 varchar(16));
create index 廠商資料index1 on 廠商資料 using btree (行業別);
create index 廠商資料index2 on 廠商資料 using hash (公司抬頭);
insert into 廠商資料 values ('電腦業', '達達科技', '北A01仁');
insert into 廠商資料 values ('製造業', '財源有限公司', '中B10中');
insert into 廠商資料 values ('餐飲業', '美味股份有限公司', '高Z01九');
vacuum 廠商資料;
select * from 廠商資料;
select * from 廠商資料 where 地址 = '高Z01九';
select * from 廠商資料 where 地址 ~* '高z01九';
select * from 廠商資料 where 地址 like '_Z01_';
select * from 廠商資料 where 地址 like '_Z%';
select * from 廠商資料 where 公司抬頭 ~ '達達科[寄記技]';
select * from 廠商資料 where 公司抬頭 ~* '達達科[寄記技]';
select *, character_length(行業別) from 廠商資料;
select *, octet_length(行業別) from 廠商資料;
select *, position('有限' in 公司抬頭) from 廠商資料;
select *, substring(公司抬頭 from 3 for 6 ) from 廠商資料;
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment