Support for conversion between UNICODE and other encodings

currently ISO8859-[1-5] and EUC_JP are supported. support for other encodings will be coming soon.

Support for conversion between UNICODE and other encodings
currently ISO8859-[1-5] and EUC_JP are supported. support for other encodings will be coming soon.
de53ce81 · Tatsuo Ishii · 6619ad11 · de53ce81 · de53ce81 · de53ce81
Commit de53ce81 authored Oct 12, 2000 by Tatsuo Ishii
14 changed files
--- a/src/backend/utils/mb/EUC_JP_to_UTF.map
+++ b/src/backend/utils/mb/EUC_JP_to_UTF.map
--- a/src/backend/utils/mb/Makefile
+++ b/src/backend/utils/mb/Makefile
@@ -4,7 +4,7 @@
 #    Makefile for utils/mb
 #
 # IDENTIFICATION
-#    $Header: /cvsroot/pgsql/src/backend/utils/mb/Makefile,v 1.10 2000/08/31 16:10:56 petere Exp $
+#    $Header: /cvsroot/pgsql/src/backend/utils/mb/Makefile,v 1.11 2000/10/12 06:06:49 ishii Exp $
 #
 #-------------------------------------------------------------------------
@@ -29,6 +29,15 @@ sjistest.o: sjistest.c
 liketest.o: liketest.c
 	$(CC) -c $(CFLAGS) liketest.c
+uconv.o: uconv.c
+	$(CC) -c $(CFLAGS) uconv.c
+uconv2.o: uconv2.c
+	$(CC) -c $(CFLAGS) uconv2.c
+utftest.o: utftest.c conv.c wchar.c mbutils.c
+	$(CC) -c $(CFLAGS) utftest.c
 sjistest: $(OBJS) sjistest.o palloc.o
 	$(CC) -o sjistest sjistest.o palloc.o \
 	common.o mbutils.o wchar.o wstrcmp.o wstrncmp.o variable.o \
@@ -39,6 +48,21 @@ liketest: $(OBJS) liketest.o palloc.o
 	common.o mbutils.o wchar.o wstrcmp.o wstrncmp.o variable.o \
       big5.o $(LDFLAGS)
+utftest: $(OBJS) utftest.o palloc.o
+	$(CC) -o utftest utftest.o palloc.o \
+	common.o wstrcmp.o wstrncmp.o variable.o \
+       big5.o $(LDFLAGS)
+uconv: uconv.o palloc.o
+	$(CC) -o uconv uconv.o palloc.o \
+		common.o conv.o wchar.o \
+       big5.o mbutils.o $(LDFLAGS)
+uconv2: uconv2.o palloc.o
+	$(CC) -o uconv2 uconv2.o palloc.o \
+		common.o conv.o wchar.o \
+       big5.o mbutils.o $(LDFLAGS)
 depend dep:
 	$(CC) -MM $(CFLAGS) *.c >depend

--- a/src/backend/utils/mb/UTF_to_EUC_JP.map
+++ b/src/backend/utils/mb/UTF_to_EUC_JP.map
--- a/src/backend/utils/mb/conv.c
+++ b/src/backend/utils/mb/conv.c
--- a/src/backend/utils/mb/iso8859.map
+++ b/src/backend/utils/mb/iso8859.map
--- a/src/backend/utils/mb/liketest.c
+++ b/src/backend/utils/mb/liketest.c
+#include <stdio.h>
+#include <string.h>
+#include "mb/pg_wchar.h"
+#define LIKE_FALSE 0
+#define LIKE_TRUE 1
+#define LIKE_ABORT 2
+#define PG_CHAR unsigned char
+#define UCHARMAX 0xff
+/*----------------------------------------------------------------*/
+static int wchareq(unsigned char *p1, unsigned char *p2)
+{
+	int l;
+	l = pg_mblen(p1);
+	if (pg_mblen(p2) != l) {
+		return(0);
+	}
+	while (l--) {
+		if (*p1++ != *p2++)
+			return(0);
+	}
+	return(1);
+}
+static int iwchareq(unsigned char *p1, unsigned char *p2)
+{
+	int c1, c2;
+	int l;
+	/* short cut. if *p1 and *p2 is lower than UCHARMAX, then
+	   we assume they are ASCII */
+	if (*p1 < UCHARMAX && *p2 < UCHARMAX)
+		return(tolower(*p1) == tolower(*p2));
+	if (*p1 < UCHARMAX)
+		c1 = tolower(*p1);
+	else
+	{
+		l = pg_mblen(p1);
+		(void)pg_mb2wchar_with_len(p1, (pg_wchar *)&c1, l);
+		c1 = tolower(c1);
+	}
+	if (*p2 < UCHARMAX)
+		c2 = tolower(*p2);
+	else
+	{
+		l = pg_mblen(p2);
+		(void)pg_mb2wchar_with_len(p2, (pg_wchar *)&c2, l);
+		c2 = tolower(c2);
+	}
+	return(c1 == c2);
+}
+#ifdef MULTIBYTE
+#define CHAREQ(p1, p2) wchareq(p1, p2)
+#define ICHAREQ(p1, p2) iwchareq(p1, p2)
+#define NextChar(p, plen) {int __l = pg_mblen(p); (p) +=__l; (plen) -=__l;}
+#else
+#define CHAREQ(p1, p2) (*(p1) == *(p2))
+#define ICHAREQ(p1, p2) (tolower(*(p1)) == tolower(*(p2)))
+#define NextChar(p, plen) (p)++, (plen)--
+#endif
+static int
+MatchText(PG_CHAR * t, int tlen, PG_CHAR * p, int plen, char *e)
+{
+	/* Fast path for match-everything pattern
+	 * Include weird case of escape character as a percent sign or underscore,
+	 * when presumably that wildcard character becomes a literal.
+	 */
+	if ((plen == 1) && (*p == '%')
+		&& ! ((e != NULL) && (*e == '%')))
+		return LIKE_TRUE;
+	while ((tlen > 0) && (plen > 0))
+	{
+		/* If an escape character was specified and we find it here in the pattern,
+		 * then we'd better have an exact match for the next character.
+		 */
+		if ((e != NULL) && CHAREQ(p,e))
+		{
+			NextChar(p, plen);
+			if ((plen <= 0) || !CHAREQ(t,p))
+				return LIKE_FALSE;
+		}
+		else if (*p == '%')
+		{
+			/* %% is the same as % according to the SQL standard */
+			/* Advance past all %'s */
+			while ((plen > 0) && (*p == '%'))
+				NextChar(p, plen);
+			/* Trailing percent matches everything. */
+			if (plen <= 0)
+				return LIKE_TRUE;
+			/*
+			 * Otherwise, scan for a text position at which we can
+			 * match the rest of the pattern.
+			 */
+			while (tlen > 0)
+			{
+				/*
+				 * Optimization to prevent most recursion: don't
+				 * recurse unless first pattern char might match this
+				 * text char.
+				 */
+				if (CHAREQ(t,p) || (*p == '_')
+					|| ((e != NULL) && CHAREQ(p,e)))
+				{
+					int matched = MatchText(t, tlen, p, plen, e);
+					if (matched != LIKE_FALSE)
+						return matched;		/* TRUE or ABORT */
+				}
+				NextChar(t, tlen);
+			}
+			/*
+			 * End of text with no match, so no point in trying later
+			 * places to start matching this pattern.
+			 */
+			return LIKE_ABORT;
+		}
+		else if ((*p != '_') && !CHAREQ(t,p))
+		{
+			/* Not the single-character wildcard and no explicit match?
+			 * Then time to quit...
+			 */
+			return LIKE_FALSE;
+		}
+		NextChar(t, tlen);
+		NextChar(p, plen);
+	}
+	if (tlen > 0)
+		return LIKE_FALSE;		/* end of pattern, but not of text */
+	/* End of input string.  Do we have matching pattern remaining? */
+	while ((plen > 0) && (*p == '%'))	/* allow multiple %'s at end of pattern */
+		NextChar(p, plen);
+	if (plen <= 0)
+		return LIKE_TRUE;
+	/*
+	 * End of text with no match, so no point in trying later places to
+	 * start matching this pattern.
+	 */
+	return LIKE_ABORT;
+} /* MatchText() */
+static int
+MatchTextLower(PG_CHAR * t, int tlen, PG_CHAR * p, int plen, char *e)
+{
+	/* Fast path for match-everything pattern
+	 * Include weird case of escape character as a percent sign or underscore,
+	 * when presumably that wildcard character becomes a literal.
+	 */
+	if ((plen == 1) && (*p == '%')
+		&& ! ((e != NULL) && (*e == '%')))
+		return LIKE_TRUE;
+	while ((tlen > 0) && (plen > 0))
+	{
+		/* If an escape character was specified and we find it here in the pattern,
+		 * then we'd better have an exact match for the next character.
+		 */
+		if ((e != NULL) && ICHAREQ(p,e))
+		{
+			NextChar(p, plen);
+			if ((plen <= 0) || !ICHAREQ(t,p))
+				return LIKE_FALSE;
+		}
+		else if (*p == '%')
+		{
+			/* %% is the same as % according to the SQL standard */
+			/* Advance past all %'s */
+			while ((plen > 0) && (*p == '%'))
+				NextChar(p, plen);
+			/* Trailing percent matches everything. */
+			if (plen <= 0)
+				return LIKE_TRUE;
+			/*
+			 * Otherwise, scan for a text position at which we can
+			 * match the rest of the pattern.
+			 */
+			while (tlen > 0)
+			{
+				/*
+				 * Optimization to prevent most recursion: don't
+				 * recurse unless first pattern char might match this
+				 * text char.
+				 */
+				if (ICHAREQ(t,p) || (*p == '_')
+					|| ((e != NULL) && ICHAREQ(p,e)))
+				{
+					int matched = MatchText(t, tlen, p, plen, e);
+					if (matched != LIKE_FALSE)
+						return matched;		/* TRUE or ABORT */
+				}
+				NextChar(t, tlen);
+			}
+			/*
+			 * End of text with no match, so no point in trying later
+			 * places to start matching this pattern.
+			 */
+			return LIKE_ABORT;
+		}
+		else if ((*p != '_') && !ICHAREQ(t,p))
+		{
+			return LIKE_FALSE;
+		}
+		NextChar(t, tlen);
+		NextChar(p, plen);
+	}
+	if (tlen > 0)
+		return LIKE_FALSE;		/* end of pattern, but not of text */
+	/* End of input string.  Do we have matching pattern remaining? */
+	while ((plen > 0) && (*p == '%'))	/* allow multiple %'s at end of pattern */
+		NextChar(p, plen);
+	if (plen <= 0)
+		return LIKE_TRUE;
+	/*
+	 * End of text with no match, so no point in trying later places to
+	 * start matching this pattern.
+	 */
+	return LIKE_ABORT;
+} /* MatchTextLower() */
+main()
+{
+	unsigned char *t = "Z01";
+	unsigned char *p = "_Z%";
+	int tlen, plen;
+	tlen = strlen(t);
+	plen = strlen(p);
+	printf("%d\n",MatchTextLower(t,tlen,p,plen,"\\"));
+}
--- a/src/backend/utils/mb/mbutils.c
+++ b/src/backend/utils/mb/mbutils.c
@@ -3,7 +3,7 @@
 * client encoding and server internal encoding.
 * (currently mule internal code (mic) is used)
 * Tatsuo Ishii
- * $Id: mbutils.c,v 1.11 2000/08/27 10:40:48 ishii Exp $ */
+ * $Id: mbutils.c,v 1.12 2000/10/12 06:06:50 ishii Exp $ */
 #include "postgres.h"
@@ -21,8 +21,8 @@ static void (*server_from_mic) ();		/* MIC to something */
 /*
 * find encoding table entry by encoding
 */
-static pg_encoding_conv_tbl *
+pg_encoding_conv_tbl *
-get_enc_ent(int encoding)
+pg_get_enc_ent(int encoding)
 {
 	pg_encoding_conv_tbl *p = pg_conv_tbl;
@@ -35,8 +35,8 @@ get_enc_ent(int encoding)
 }
 /*
- * set the client encoding. if client/server encoding is
+ * set the client encoding. if encoding conversion between
- * not supported, returns -1
+ * client/server encoding is not supported, returns -1
 */
 int
 pg_set_client_encoding(int encoding)
@@ -52,8 +52,8 @@ pg_set_client_encoding(int encoding)
 	}
 	else if (current_server_encoding == MULE_INTERNAL)
 	{							/* server == MULE_INETRNAL? */
-		client_to_mic = get_enc_ent(encoding)->to_mic;
+		client_to_mic = pg_get_enc_ent(encoding)->to_mic;
-		client_from_mic = get_enc_ent(encoding)->from_mic;
+		client_from_mic = pg_get_enc_ent(encoding)->from_mic;
 		server_to_mic = server_from_mic = 0;
 		if (client_to_mic == 0 || client_from_mic == 0)
 			return (-1);
@@ -61,17 +61,33 @@ pg_set_client_encoding(int encoding)
 	else if (encoding == MULE_INTERNAL)
 	{							/* client == MULE_INETRNAL? */
 		client_to_mic = client_from_mic = 0;
-		server_to_mic = get_enc_ent(current_server_encoding)->to_mic;
+		server_to_mic = pg_get_enc_ent(current_server_encoding)->to_mic;
-		server_from_mic = get_enc_ent(current_server_encoding)->from_mic;
+		server_from_mic = pg_get_enc_ent(current_server_encoding)->from_mic;
+		if (server_to_mic == 0 || server_from_mic == 0)
+			return (-1);
+	}
+	else if (current_server_encoding == UNICODE)
+	{							/* server == UNICODE? */
+		client_to_mic = pg_get_enc_ent(encoding)->to_unicode;
+		client_from_mic = pg_get_enc_ent(encoding)->from_unicode;
+		server_to_mic = server_from_mic = 0;
+		if (client_to_mic == 0 || client_from_mic == 0)
+			return (-1);
+	}
+	else if (encoding == UNICODE)
+	{							/* client == UNICODE? */
+		client_to_mic = client_from_mic = 0;
+		server_to_mic = pg_get_enc_ent(current_server_encoding)->to_unicode;
+		server_from_mic = pg_get_enc_ent(current_server_encoding)->from_unicode;
 		if (server_to_mic == 0 || server_from_mic == 0)
 			return (-1);
 	}
 	else
 	{
-		client_to_mic = get_enc_ent(encoding)->to_mic;
+		client_to_mic = pg_get_enc_ent(encoding)->to_mic;
-		client_from_mic = get_enc_ent(encoding)->from_mic;
+		client_from_mic = pg_get_enc_ent(encoding)->from_mic;
-		server_to_mic = get_enc_ent(current_server_encoding)->to_mic;
+		server_to_mic = pg_get_enc_ent(current_server_encoding)->to_mic;
-		server_from_mic = get_enc_ent(current_server_encoding)->from_mic;
+		server_from_mic = pg_get_enc_ent(current_server_encoding)->from_mic;
 		if (client_to_mic == 0 || client_from_mic == 0)
 			return (-1);
 		if (server_to_mic == 0 || server_from_mic == 0)
@@ -193,6 +209,13 @@ pg_mblen(const unsigned char *mbstr)
 	return ((*pg_wchar_table[GetDatabaseEncoding()].mblen) (mbstr));
 }
+/* returns the byte length of a multi-byte word with specified enciding */
+int
+pg_mblen_with_encoding(const unsigned char *mbstr, int encoding)
+{
+	return ((*pg_wchar_table[encoding].mblen) (mbstr));
+}
 /* returns the length (counted as a wchar) of a multi-byte string */
 int
 pg_mbstrlen(const unsigned char *mbstr)

--- a/src/backend/utils/mb/palloc.c
+++ b/src/backend/utils/mb/palloc.c
+#include "postgres.h"
+#include "utils/memutils.h"
+void
+elog(int lev, const char *fmt,...)
+{
+  printf(fmt);
+}
+MemoryContext CurrentMemoryContext;
+void *
+MemoryContextAlloc(MemoryContext context, Size size)
+{
+}
+void
+pfree(void *pointer)
+{
+}
+void *
+repalloc(void *pointer, Size size)
+{
+}
--- a/src/backend/utils/mb/sjis.map
+++ b/src/backend/utils/mb/sjis.map
--- a/src/backend/utils/mb/sjistest.c
+++ b/src/backend/utils/mb/sjistest.c
+/*
+ *	testing for sjis2mic() and mic2sjis()
+ */
+#include "conv.c"
+int
+main()
+{
+	unsigned char eucbuf[1024];
+	unsigned char sjisbuf[1024];
+	unsigned char sjis[] = {0x81, 0x40, 0xa1, 0xf0, 0x40, 0xf0, 0x9e, 0xf5, 0x40, 0xfa, 0x40, 0xfa, 0x54, 0xfa, 0x7b, 0x00};
+	int			i;
+	sjis2mic(sjis, eucbuf, 1024);
+	for (i = 0; i < 1024; i++)
+	{
+		if (eucbuf[i])
+			printf("%02x ", eucbuf[i]);
+		else
+		{
+			printf("\n");
+			break;
+		}
+	}
+	mic2sjis(eucbuf, sjisbuf, 1024);
+	for (i = 0; i < 1024; i++)
+	{
+		if (sjisbuf[i])
+			printf("%02x ", sjisbuf[i]);
+		else
+		{
+			printf("\n");
+			break;
+		}
+	}
+	return (0);
+}
--- a/src/backend/utils/mb/uconv.c
+++ b/src/backend/utils/mb/uconv.c
+/*
+ * $Id: uconv.c,v 1.1 2000/10/12 06:06:50 ishii Exp $
+ */
+#include "pg_wchar.h"
+/*
+ * convert UCS-2 to UTF-8
+ * returns number of bytes of a UTF-8, that is atmost 3.
+ */
+static int
+pg_ucs2utf(const unsigned short ucs, unsigned char *utf)
+{
+	int len;
+	if (ucs <= 0x007f)
+	{
+		*utf = ucs;
+		len = 1;
+	}
+	else if (ucs > 0x007f && ucs <= 0x07ff)
+	{
+		*utf++ = (ucs >> 6) | 0xc0;
+		*utf = (ucs & 0x003f) | 0x80;
+		len = 2;
+	}
+	else
+	{
+		*utf++ = (ucs >> 12) | 0xe0;
+		*utf++ = ((ucs & 0x0fc0) >> 6) | 0x80;
+		*utf = (ucs & 0x003f) | 0x80;
+		len = 3;
+	}
+	return (len);
+}
+typedef struct
+{
+	unsigned short ucs;	/* UCS-2 */
+	unsigned short code;		/* local code */
+	unsigned char encoding;		/* encoding */
+} ucs_to_local;
+typedef struct
+{
+	unsigned short code;		/* local code */
+	unsigned short ucs;	/* UCS-2 */
+} local_to_ucs;
+#include "ucs_to_iso8859.map"
+#include "iso88592.rev"
+#include "iso88593.rev"
+#include "iso88594.rev"
+#include "iso88595.rev"
+#define X0208 0
+#define X0212 1
+#include "ucs_to_jis.map"
+int
+main()
+{
+  int i,j;
+  int l;
+  unsigned int euc;
+  unsigned char u[4];
+  FILE *fd;
+  printf("static pg_utf_to_local mapISO8859[] = {\n");
+  for (i=0;i<sizeof(mapISO8859)/sizeof(ucs_to_local);i++) {
+	if (mapISO8859[i].encoding > LATIN5)
+	  continue;
+	l = pg_ucs2utf(mapISO8859[i].ucs, u);
+	printf("  {0x");
+	for(j=0;j<l;j++) {
+	  printf("%02x", u[j]);
+	}
+	printf(", 0x%04x, %s},\n",
+		   mapISO8859[i].code|0x80, 
+		   pg_get_enc_ent(mapISO8859[i].encoding)->name);
+  }
+  printf("};\n");
+  printf("\nstatic pg_local_to_utf ISO8859_2[] = {\n");
+  for (i=0;i<sizeof(revISO8859_2)/sizeof(local_to_ucs);i++) {
+	l = pg_ucs2utf(revISO8859_2[i].ucs, u);
+	printf(" {0x%04x, ", revISO8859_2[i].code|0x80);
+	printf("0x");
+	for(j=0;j<l;j++) {
+	  printf("%02x", u[j]);
+	}
+	printf("},\n");
+  }
+  printf("};\n");
+  printf("\nstatic pg_local_to_utf ISO8859_3[] = {\n");
+  for (i=0;i<sizeof(revISO8859_3)/sizeof(local_to_ucs);i++) {
+	l = pg_ucs2utf(revISO8859_3[i].ucs, u);
+	printf(" {0x%04x, ", revISO8859_3[i].code|0x80);
+	printf("0x");
+	for(j=0;j<l;j++) {
+	  printf("%02x", u[j]);
+	}
+	printf("},\n");
+  }
+  printf("};\n");
+  printf("\nstatic pg_local_to_utf ISO8859_4[] = {\n");
+  for (i=0;i<sizeof(revISO8859_4)/sizeof(local_to_ucs);i++) {
+	l = pg_ucs2utf(revISO8859_4[i].ucs, u);
+	printf(" {0x%04x, ", revISO8859_4[i].code|0x80);
+	printf("0x");
+	for(j=0;j<l;j++) {
+	  printf("%02x", u[j]);
+	}
+	printf("},\n");
+  }
+  printf("};\n");
+  printf("\nstatic pg_local_to_utf ISO8859_5[] = {\n");
+  for (i=0;i<sizeof(revISO8859_5)/sizeof(local_to_ucs);i++) {
+	l = pg_ucs2utf(revISO8859_5[i].ucs, u);
+	printf(" {0x%04x, ", revISO8859_5[i].code|0x80);
+	printf("0x");
+	for(j=0;j<l;j++) {
+	  printf("%02x", u[j]);
+	}
+	printf("},\n");
+  }
+  printf("};\n");
+  fd = fopen("UTF_to_EUC_JP.map", "w");
+  fprintf(fd, "static pg_utf_to_local mapUTF_to_EUC_JP[] = {\n");
+  for (i=0;i<sizeof(mapJIS)/sizeof(ucs_to_local);i++) {
+	l = pg_ucs2utf(mapJIS[i].ucs, u);
+	fprintf(fd, "  {0x");
+	for(j=0;j<l;j++) {
+	  fprintf(fd, "%02x", u[j]);
+	}
+	if (mapJIS[i].encoding == X0208)
+	{
+		euc = mapJIS[i].code|0x8080;
+	}
+	else
+	{
+		euc = SS3 << 16 | mapJIS[i].code | 0x8080;
+	}
+	fprintf(fd, ", 0x%04x, %s},\n",
+		   euc,
+		   "EUC_JP");
+  }
+  fprintf(fd, "};\n");
+  fclose(fd);
+  return(0);
+}
--- a/src/backend/utils/mb/uconv2.c
+++ b/src/backend/utils/mb/uconv2.c
+/*
+ * $Id: uconv2.c,v 1.1 2000/10/12 06:06:50 ishii Exp $
+ */
+#include "pg_wchar.h"
+#include "UTF_to_EUC_JP.map"
+static int compare1(const void *p1, const void *p2)
+{
+	unsigned int v1, v2;
+	v1 = ((pg_utf_to_local *)p1)->code;
+	v2 = ((pg_utf_to_local *)p2)->code;
+	return(v1 - v2);
+}
+int
+main()
+{
+  int i;
+  FILE *fd;
+  qsort(mapUTF_to_EUC_JP, sizeof(mapUTF_to_EUC_JP)/sizeof(pg_utf_to_local),
+		sizeof(pg_utf_to_local),compare1);
+  fd = fopen("EUC_JP_to_UTF.map", "w");
+  fprintf(fd, "static pg_local_to_utf mapEUC_JP_to_UTF[] = {\n");
+  for (i=0;i<sizeof(mapUTF_to_EUC_JP)/sizeof(pg_utf_to_local);i++) {
+	fprintf(fd, "  {0x%08x, 0x%08x},\n",
+			mapUTF_to_EUC_JP[i].code,
+			mapUTF_to_EUC_JP[i].utf);
+  }
+  fprintf(fd, "};\n");
+  fclose(fd);
+  return(0);
+}
--- a/src/backend/utils/mb/utftest.c
+++ b/src/backend/utils/mb/utftest.c
 /*
- * testing of utf2wchar()
+ * $Id: utftest.c,v 1.4 2000/10/12 06:06:50 ishii Exp $
- * $Id: utftest.c,v 1.3 1999/07/15 23:03:31 momjian Exp $
 */
-#include "regex/regex.h"
+#include "conv.c"
-#include "regex/utils.h"
+#include "wchar.c"
-#include "regex/regex2.h"
+#include "mbutils.c"
-#include "regex/pg_wchar.h"
+int
 main()
 {
 	/* Example 1 from RFC2044 */
@@ -21,11 +19,17 @@ main()
 	char	   *utf[] = {utf1, utf2, utf3};
 	pg_wchar	ucs[128];
 	pg_wchar   *p;
+	unsigned char iso[1024];
 	int			i;
+	/* UTF8-->ISO8859-2 test */
+	unsigned char utf_iso8859_2[] = {0x01, 0x00, 0x01, 0x02, 0x01, 0x55, 0x02, 0xdd, 0x00};
+	printf("===== testing of pg_utf2wchar_with_len =====\n");
 	for (i = 0; i < sizeof(utf) / sizeof(char *); i++)
 	{
-		pg_utf2wchar(utf[i], ucs);
+		pg_utf2wchar_with_len(utf[i], ucs, 128);
 		p = ucs;
 		while (*p)
 		{
@@ -34,4 +38,16 @@ main()
 		}
 		printf("\n");
 	}
+	printf("===== testing of utf_to_latin2 =====\n");
+	utf_to_latin(utf_iso8859_2, iso, LATIN2, 128);
+	for (i = 0; i < sizeof(iso) / sizeof(char *); i++)
+	{
+		printf("%04x ", iso[i]);
+		if (iso[i] == 0x00)
+			break;
+	}
+	printf("\n");
+	return(0);
 }
--- a/src/backend/utils/mb/wchar.c
+++ b/src/backend/utils/mb/wchar.c
 /*
 * conversion functions between pg_wchar and multi-byte streams.
 * Tatsuo Ishii
- * $Id: wchar.c,v 1.12 2000/08/27 10:40:48 ishii Exp $
+ * $Id: wchar.c,v 1.13 2000/10/12 06:06:50 ishii Exp $
 *
 * WIN1250 client encoding updated by Pavel Behal
 *
@@ -246,7 +246,7 @@ pg_euctw_mblen(const unsigned char *s)
 }
 /*
- * convert UTF-8 to pg_wchar (UCS-2)
+ * convert UTF-8 string to pg_wchar (UCS-2)
 * caller should allocate enough space for "to"
 * len: length of from.
 * "from" not necessarily null terminated.
@@ -296,7 +296,10 @@ pg_utf2wchar_with_len(const unsigned char *from, pg_wchar * to, int len)
 	return(cnt);
 }
-static int
+/*
+ * returns the byte length of a UTF-8 word pointed to by s
+ */
+int
 pg_utf_mblen(const unsigned char *s)
 {
 	int			len = 1;