Commit 3c7798f0 authored by Tatsuo Ishii's avatar Tatsuo Ishii

Add conversion procs for CREATE CONVERSION

parent 743b7472
#-------------------------------------------------------------------------
#
# Makefile--
# Makefile for utils/mb/conversion_procs
#
# IDENTIFICATION
# $Header: /cvsroot/pgsql/src/backend/utils/mb/conversion_procs/Makefile,v 1.1 2002/07/16 09:25:04 ishii Exp $
#
#-------------------------------------------------------------------------
subdir = src/backend/utils/mb/conversion_procs
top_builddir = ../../../../..
include $(top_builddir)/src/Makefile.global
SQLSCRIPT = conversion_create.sql
DIRS = \
utf8_and_ascii utf8_and_iso8859_1 \
utf8_and_euc_jp utf8_and_euc_kr utf8_and_euc_cn utf8_and_euc_tw \
utf8_and_sjis utf8_and_big5 utf8_and_gbk utf8_and_gb18030 \
utf8_and_uhc utf8_and_johab utf8_and_tcvn utf8_and_iso8859 \
euc_jp_and_sjis euc_tw_and_big5
# conversion_name source_encoding destination_encoding function object
$(SQLSCRIPT): Makefile
@set \
utf8_to_ascii UNICODE SQL_ASCII utf8_to_ascii utf8_and_ascii \
ascii_to_utf8 SQL_ASCII UNICODE ascii_to_utf8 utf8_and_ascii \
utf8_to_iso8859_1 UNICODE LATIN1 utf8_to_iso8859_1 utf8_and_iso8859_1 \
iso8859_1_to_utf8 LATIN1 UNICODE iso8859_1_to_utf8 utf8_and_iso8859_1 \
euc_jp_to_utf8 EUC_JP UNICODE euc_jp_to_utf8 utf8_and_euc_jp \
utf8_to_euc_jp UNICODE EUC_JP utf8_to_euc_jp utf8_and_euc_jp \
euc_kr_to_utf8 EUC_KR UNICODE euc_kr_to_utf8 utf8_and_euc_kr \
utf8_to_euc_kr UNICODE EUC_KR utf8_to_euc_kr utf8_and_euc_kr \
euc_cn_to_utf8 EUC_CN UNICODE euc_cn_to_utf8 utf8_and_euc_cn \
utf8_to_euc_cn UNICODE EUC_CN utf8_to_euc_cn utf8_and_euc_cn \
euc_tw_to_utf8 EUC_TW UNICODE euc_tw_to_utf8 utf8_and_euc_tw \
utf8_to_euc_tw UNICODE EUC_TW utf8_to_euc_tw utf8_and_euc_tw \
sjis_to_utf8 SJIS UNICODE sjis_to_utf8 utf8_and_sjis \
utf8_to_sjis UNICODE SJIS utf8_to_sjis utf8_and_sjis \
big5_to_utf8 BIG5 UNICODE big5_to_utf8 utf8_and_big5 \
utf8_to_big5 UNICODE BIG5 utf8_to_big5 utf8_and_big5 \
gbk_to_utf8 GBK UNICODE gbk_to_utf8 utf8_and_gbk \
utf8_to_gbk UNICODE GBK utf8_to_gbk utf8_and_gbk \
gb18030_to_utf8 GB18030 UNICODE gb18030_to_utf8 utf8_and_gb18030 \
utf8_to_gb18030 UNICODE GB18030 utf8_to_gb18030 utf8_and_gb18030 \
uhc_to_utf8 UHC UNICODE uhc_to_utf8 utf8_and_uhc \
utf8_to_uhc UNICODE UHC utf8_to_uhc utf8_and_uhc \
johab_to_utf8 JOHAB UNICODE johab_to_utf8 utf8_and_johab \
utf8_to_johab UNICODE JOHAB utf8_to_johab utf8_and_johab \
tcvn_to_utf8 TCVN UNICODE tcvn_to_utf8 utf8_and_tcvn \
utf8_to_tcvn UNICODE TCVN utf8_to_tcvn utf8_and_tcvn \
utf8_to_iso8859_2 UNICODE LATIN2 utf8_to_iso8859 utf8_and_iso8859 \
iso8859_2_to_utf8 LATIN2 UNICODE iso8859_to_utf8 utf8_and_iso8859 \
utf8_to_iso8859_3 UNICODE LATIN3 utf8_to_iso8859 utf8_and_iso8859 \
iso8859_3_to_utf8 LATIN3 UNICODE iso8859_to_utf8 utf8_and_iso8859 \
utf8_to_iso8859_4 UNICODE LATIN4 utf8_to_iso8859 utf8_and_iso8859 \
iso8859_4_to_utf8 LATIN4 UNICODE iso8859_to_utf8 utf8_and_iso8859 \
utf8_to_iso8859_9 UNICODE LATIN5 utf8_to_iso8859 utf8_and_iso8859 \
iso8859_9_to_utf8 LATIN5 UNICODE iso8859_to_utf8 utf8_and_iso8859 \
utf8_to_iso8859_10 UNICODE LATIN6 utf8_to_iso8859 utf8_and_iso8859 \
iso8859_10_to_utf8 LATIN6 UNICODE iso8859_to_utf8 utf8_and_iso8859 \
utf8_to_iso8859_13 UNICODE LATIN7 utf8_to_iso8859 utf8_and_iso8859 \
iso8859_13_to_utf8 LATIN7 UNICODE iso8859_to_utf8 utf8_and_iso8859 \
utf8_to_iso8859_14 UNICODE LATIN8 utf8_to_iso8859 utf8_and_iso8859 \
iso8859_14_to_utf8 LATIN8 UNICODE iso8859_to_utf8 utf8_and_iso8859 \
utf8_to_iso8859_15 UNICODE LATIN9 utf8_to_iso8859 utf8_and_iso8859 \
iso8859_15_to_utf8 LATIN9 UNICODE iso8859_to_utf8 utf8_and_iso8859 \
utf8_to_iso8859_16 UNICODE LATIN10 utf8_to_iso8859 utf8_and_iso8859 \
iso8859_16_to_utf8 LATIN10 UNICODE iso8859_to_utf8 utf8_and_iso8859 \
utf8_to_iso8859_5 UNICODE ISO-8859-5 utf8_to_iso8859 utf8_and_iso8859 \
iso8859_5_to_utf8 ISO-8859-5 UNICODE iso8859_to_utf8 utf8_and_iso8859 \
utf8_to_iso8859_6 UNICODE ISO-8859-6 utf8_to_iso8859 utf8_and_iso8859 \
iso8859_6_to_utf8 ISO-8859-6 UNICODE iso8859_to_utf8 utf8_and_iso8859 \
utf8_to_iso8859_7 UNICODE ISO-8859-7 utf8_to_iso8859 utf8_and_iso8859 \
iso8859_7_to_utf8 ISO-8859-7 UNICODE iso8859_to_utf8 utf8_and_iso8859 \
utf8_to_iso8859_8 UNICODE ISO-8859-8 utf8_to_iso8859 utf8_and_iso8859 \
iso8859_8_to_utf8 ISO-8859-8 UNICODE iso8859_to_utf8 utf8_and_iso8859 \
euc_jp_to_sjis EUC_JP SJIS euc_jp_to_sjis euc_jp_and_sjis \
sjis_to_euc_jp SJIS EUC_JP sjis_to_euc_jp euc_jp_and_sjis \
euc_jp_to_mic EUC_JP MULE_INTERNAL euc_jp_to_mic euc_jp_and_sjis \
sjis_to_mic SJIS MULE_INTERNAL sjis_to_mic euc_jp_and_sjis \
mic_to_euc_jp MULE_INTERNAL EUC_JP mic_to_euc_jp euc_jp_and_sjis \
mic_to_sjis MULE_INTERNAL SJIS mic_to_sjis euc_jp_and_sjis \
euc_tw_to_big5 EUC_TW BIG5 euc_tw_to_big5 euc_tw_and_big5 \
big5_to_euc_tw BIG5 EUC_TW big5_to_euc_tw euc_tw_and_big5 \
euc_tw_to_mic EUC_TW MULE_INTERNAL euc_tw_to_mic euc_tw_and_big5 \
big5_to_mic BIG5 MULE_INTERNAL big5_to_mic euc_tw_and_big5 \
mic_to_euc_tw MULE_INTERNAL EUC_TW mic_to_euc_tw euc_tw_and_big5 \
mic_to_big5 MULE_INTERNAL BIG5 mic_to_big5 euc_tw_and_big5 \
; \
while [ "$$#" -gt 0 ] ; \
do \
name=$$1;shift; \
se=$$1;shift; \
de=$$1; shift; \
func=$$1; shift; \
obj=$$1; shift; \
echo "-- $$se --> $$de"; \
echo "CREATE OR REPLACE FUNCTION $$func (INTEGER, INTEGER, OPAQUE, OPAQUE, INTEGER) RETURNS INTEGER AS '$$"libdir"/$$obj', '$$func' LANGUAGE 'c';"; \
echo "DROP CONVERSION pg_catalog.$$name;"; \
echo "CREATE DEFAULT CONVERSION pg_catalog.$$name FOR '$$se' TO '$$de' FROM $$func;"; \
done > $@
install:
$(INSTALL_DATA) $(SQLSCRIPT) $(datadir)
@for dir in $(DIRS); do $(MAKE) -C $$dir $@ || exit; done
all: $(SQLSCRIPT)
@for dir in $(DIRS); do $(MAKE) -C $$dir $@ || exit; done
clean:
$(RM) $(SQLSCRIPT)
@for dir in $(DIRS); do $(MAKE) -C $$dir $@; done
distclean maintainer-clean:
$(RM) $(SQLSCRIPT)
@for dir in $(DIRS); do $(MAKE) -C $$dir $@; done
#-------------------------------------------------------------------------
#
# $Id: Makefile,v 1.1 2002/07/16 09:25:05 ishii Exp $
#
#-------------------------------------------------------------------------
top_builddir = ../../../../../..
include $(top_builddir)/src/Makefile.global
NAME := euc_jp_and_sjis
include ../proc.mk
/*-------------------------------------------------------------------------
*
* EUC_JP, SJIS and MULE_INTERNAL
*
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/euc_jp_and_sjis.c,v 1.1 2002/07/16 09:25:05 ishii Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "fmgr.h"
#include "mb/pg_wchar.h"
/*
* SJIS alternative code.
* this code is used if a mapping EUC -> SJIS is not defined.
*/
#define PGSJISALTCODE 0x81ac
#define PGEUCALTCODE 0xa2ae
/*
* conversion table between SJIS UDC (IBM kanji) and EUC_JP
*/
#include "sjis.map"
#define ENCODING_GROWTH_RATE 4
PG_FUNCTION_INFO_V1(euc_jp_to_sjis)
PG_FUNCTION_INFO_V1(sjis_to_euc_jp)
PG_FUNCTION_INFO_V1(euc_jp_to_mic)
PG_FUNCTION_INFO_V1(mic_to_euc_jp)
PG_FUNCTION_INFO_V1(sjis_to_mic)
PG_FUNCTION_INFO_V1(mic_to_sjis)
extern Datum euc_jp_to_sjis(PG_FUNCTION_ARGS);
extern Datum sjis_to_euc_jp(PG_FUNCTION_ARGS);
extern Datum euc_jp_to_mic(PG_FUNCTION_ARGS);
extern Datum mic_to_euc_jp(PG_FUNCTION_ARGS);
extern Datum sjis_to_mic(PG_FUNCTION_ARGS);
extern Datum mic_to_sjis(PG_FUNCTION_ARGS);
/* ----------
* conv_proc(
* INTEGER, -- source encoding id
* INTEGER, -- destination encoding id
* OPAQUE, -- source string (null terminated C string)
* OPAQUE, -- destination string (null terminated C string)
* INTEGER -- source string length
* ) returns INTEGER; -- dummy. returns nothing, actually.
* ----------
*/
static void sjis2mic(unsigned char *sjis, unsigned char *p, int len);
static void mic2sjis(unsigned char *mic, unsigned char *p, int len);
static void euc_jp2mic(unsigned char *euc, unsigned char *p, int len);
static void mic2euc_jp(unsigned char *mic, unsigned char *p, int len);
Datum
euc_jp_to_sjis(PG_FUNCTION_ARGS)
{
unsigned char *src = PG_GETARG_CSTRING(2);
unsigned char *dest = PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
unsigned char *buf;
Assert(PG_GETARG_INT32(0) == PG_EUC_JP);
Assert(PG_GETARG_INT32(1) == PG_SJIS);
Assert(len > 0);
buf = palloc(len * ENCODING_GROWTH_RATE);
euc_jp2mic(src, buf, len);
mic2sjis(buf, dest, strlen(buf));
pfree(buf);
PG_RETURN_INT32(0);
}
Datum
sjis_to_euc_jp(PG_FUNCTION_ARGS)
{
unsigned char *src = PG_GETARG_CSTRING(2);
unsigned char *dest = PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
unsigned char *buf;
Assert(PG_GETARG_INT32(0) == PG_SJIS);
Assert(PG_GETARG_INT32(1) == PG_EUC_JP);
Assert(len > 0);
buf = palloc(len * ENCODING_GROWTH_RATE);
sjis2mic(src, buf, len);
mic2euc_jp(buf, dest, strlen(buf));
pfree(buf);
PG_RETURN_INT32(0);
}
Datum
euc_jp_to_mic(PG_FUNCTION_ARGS)
{
unsigned char *src = PG_GETARG_CSTRING(2);
unsigned char *dest = PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
Assert(PG_GETARG_INT32(0) == PG_EUC_JP);
Assert(PG_GETARG_INT32(1) == PG_MULE_INTERNAL);
Assert(len > 0);
euc_jp2mic(src, dest, len);
PG_RETURN_INT32(0);
}
Datum
mic_to_euc_jp(PG_FUNCTION_ARGS)
{
unsigned char *src = PG_GETARG_CSTRING(2);
unsigned char *dest = PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
Assert(PG_GETARG_INT32(0) == PG_MULE_INTERNAL);
Assert(PG_GETARG_INT32(1) == PG_EUC_JP);
Assert(len > 0);
mic2sjis(src, dest, len);
PG_RETURN_INT32(0);
}
Datum
sjis_to_mic(PG_FUNCTION_ARGS)
{
unsigned char *src = PG_GETARG_CSTRING(2);
unsigned char *dest = PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
Assert(PG_GETARG_INT32(0) == PG_SJIS);
Assert(PG_GETARG_INT32(1) == PG_MULE_INTERNAL);
Assert(len > 0);
sjis2mic(src, dest, len);
PG_RETURN_INT32(0);
}
Datum
mic_to_sjis(PG_FUNCTION_ARGS)
{
unsigned char *src = PG_GETARG_CSTRING(2);
unsigned char *dest = PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
Assert(PG_GETARG_INT32(0) == PG_MULE_INTERNAL);
Assert(PG_GETARG_INT32(1) == PG_SJIS);
Assert(len > 0);
mic2sjis(src, dest, len);
PG_RETURN_INT32(0);
}
/*
* SJIS ---> MIC
*/
static void
sjis2mic(unsigned char *sjis, unsigned char *p, int len)
{
int c1,
c2,
/* Eiji Tokuya patched begin */
i,
k,
k2;
/* Eiji Tokuya patched end */
while (len > 0 && (c1 = *sjis++))
{
if (c1 >= 0xa1 && c1 <= 0xdf)
{
/* JIS X0201 (1 byte kana) */
len--;
*p++ = LC_JISX0201K;
*p++ = c1;
}
else if (c1 > 0x7f)
{
/*
* JIS X0208, X0212, user defined extended characters
*/
c2 = *sjis++;
k = (c1 << 8) + c2;
/* Eiji Tokuya patched begin */
if (k >= 0xed40 && k < 0xf040)
{
/* NEC selection IBM kanji */
for (i = 0;; i++)
{
k2 = ibmkanji[i].nec;
if (k2 == 0xffff)
break;
if (k2 == k)
{
k = ibmkanji[i].sjis;
c1 = (k >> 8) & 0xff;
c2 = k & 0xff;
}
}
}
if (k < 0xeb3f)
/* Eiji Tokuya patched end */
{
/* JIS X0208 */
len -= 2;
*p++ = LC_JISX0208;
*p++ = ((c1 & 0x3f) << 1) + 0x9f + (c2 > 0x9e);
*p++ = c2 + ((c2 > 0x9e) ? 2 : 0x60) + (c2 < 0x80);
}
/* Eiji Tokuya patched begin */
else if ((k >= 0xeb40 && k < 0xf040) || (k >= 0xfc4c && k <= 0xfcfc))
{
/* NEC selection IBM kanji - Other undecided justice */
/* Eiji Tokuya patched end */
*p++ = LC_JISX0208;
*p++ = PGEUCALTCODE >> 8;
*p++ = PGEUCALTCODE & 0xff;
}
else if (k >= 0xf040 && k < 0xf540)
{
/*
* UDC1 mapping to X0208 85 ku - 94 ku JIS code 0x7521 -
* 0x7e7e EUC 0xf5a1 - 0xfefe
*/
len -= 2;
*p++ = LC_JISX0208;
c1 -= 0x6f;
*p++ = ((c1 & 0x3f) << 1) + 0xf3 + (c2 > 0x9e);
*p++ = c2 + ((c2 > 0x9e) ? 2 : 0x60) + (c2 < 0x80);
}
else if (k >= 0xf540 && k < 0xfa40)
{
/*
* UDC2 mapping to X0212 85 ku - 94 ku JIS code 0x7521 -
* 0x7e7e EUC 0x8ff5a1 - 0x8ffefe
*/
len -= 2;
*p++ = LC_JISX0212;
c1 -= 0x74;
*p++ = ((c1 & 0x3f) << 1) + 0xf3 + (c2 > 0x9e);
*p++ = c2 + ((c2 > 0x9e) ? 2 : 0x60) + (c2 < 0x80);
}
else if (k >= 0xfa40)
{
/*
* mapping IBM kanji to X0208 and X0212
*
*/
len -= 2;
for (i = 0;; i++)
{
k2 = ibmkanji[i].sjis;
if (k2 == 0xffff)
break;
if (k2 == k)
{
k = ibmkanji[i].euc;
if (k >= 0x8f0000)
{
*p++ = LC_JISX0212;
*p++ = 0x80 | ((k & 0xff00) >> 8);
*p++ = 0x80 | (k & 0xff);
}
else
{
*p++ = LC_JISX0208;
*p++ = 0x80 | (k >> 8);
*p++ = 0x80 | (k & 0xff);
}
}
}
}
}
else
{ /* should be ASCII */
len--;
*p++ = c1;
}
}
*p = '\0';
}
/*
* MIC ---> SJIS
*/
static void
mic2sjis(unsigned char *mic, unsigned char *p, int len)
{
int c1,
c2,
k;
while (len > 0 && (c1 = *mic))
{
len -= pg_mic_mblen(mic++);
if (c1 == LC_JISX0201K)
*p++ = *mic++;
else if (c1 == LC_JISX0208)
{
c1 = *mic++;
c2 = *mic++;
k = (c1 << 8) | (c2 & 0xff);
if (k >= 0xf5a1)
{
/* UDC1 */
c1 -= 0x54;
*p++ = ((c1 - 0xa1) >> 1) + ((c1 < 0xdf) ? 0x81 : 0xc1) + 0x6f;
}
else
*p++ = ((c1 - 0xa1) >> 1) + ((c1 < 0xdf) ? 0x81 : 0xc1);
*p++ = c2 - ((c1 & 1) ? ((c2 < 0xe0) ? 0x61 : 0x60) : 2);
}
else if (c1 == LC_JISX0212)
{
int i,
k2;
c1 = *mic++;
c2 = *mic++;
k = c1 << 8 | c2;
if (k >= 0xf5a1)
{
/* UDC2 */
c1 -= 0x54;
*p++ = ((c1 - 0xa1) >> 1) + ((c1 < 0xdf) ? 0x81 : 0xc1) + 0x74;
*p++ = c2 - ((c1 & 1) ? ((c2 < 0xe0) ? 0x61 : 0x60) : 2);
}
else
{
/* IBM kanji */
for (i = 0;; i++)
{
k2 = ibmkanji[i].euc & 0xffff;
if (k2 == 0xffff)
{
*p++ = PGSJISALTCODE >> 8;
*p++ = PGSJISALTCODE & 0xff;
break;
}
if (k2 == k)
{
k = ibmkanji[i].sjis;
*p++ = k >> 8;
*p++ = k & 0xff;
break;
}
}
}
}
else if (c1 > 0x7f)
{
/* cannot convert to SJIS! */
*p++ = PGSJISALTCODE >> 8;
*p++ = PGSJISALTCODE & 0xff;
}
else
{ /* should be ASCII */
*p++ = c1;
}
}
*p = '\0';
}
/*
* EUC_JP ---> MIC
*/
static void
euc_jp2mic(unsigned char *euc, unsigned char *p, int len)
{
int c1;
while (len > 0 && (c1 = *euc++))
{
if (c1 == SS2)
{ /* 1 byte kana? */
len -= 2;
*p++ = LC_JISX0201K;
*p++ = *euc++;
}
else if (c1 == SS3)
{ /* JIS X0212 kanji? */
len -= 3;
*p++ = LC_JISX0212;
*p++ = *euc++;
*p++ = *euc++;
}
else if (c1 & 0x80)
{ /* kanji? */
len -= 2;
*p++ = LC_JISX0208;
*p++ = c1;
*p++ = *euc++;
}
else
{ /* should be ASCII */
len--;
*p++ = c1;
}
}
*p = '\0';
}
/*
* MIC ---> EUC_JP
*/
static void
mic2euc_jp(unsigned char *mic, unsigned char *p, int len)
{
int c1;
while (len > 0 && (c1 = *mic))
{
len -= pg_mic_mblen(mic++);
if (c1 == LC_JISX0201K)
{
*p++ = SS2;
*p++ = *mic++;
}
else if (c1 == LC_JISX0212)
{
*p++ = SS3;
*p++ = *mic++;
*p++ = *mic++;
}
else if (c1 == LC_JISX0208)
{
*p++ = *mic++;
*p++ = *mic++;
}
else if (c1 > 0x7f)
{ /* cannot convert to EUC_JP! */
mic--;
pg_print_bogus_char(&mic, &p);
}
else
{ /* should be ASCII */
*p++ = c1;
}
}
*p = '\0';
}
This diff is collapsed.
#-------------------------------------------------------------------------
#
# $Id: Makefile,v 1.1 2002/07/16 09:25:05 ishii Exp $
#
#-------------------------------------------------------------------------
top_builddir = ../../../../../..
include $(top_builddir)/src/Makefile.global
NAME := euc_tw_and_big5
SRCS += big5.c
OBJS += big5.o
include ../proc.mk
/*
* conversion between BIG5 and Mule Internal Code(CNS 116643-1992
* plane 1 and plane 2).
* This program is partially copied from lv(Multilingual file viewer)
* and slightly modified. lv is written and copyrighted by NARITA Tomio
* (nrt@web.ad.jp).
*
* 1999/1/15 Tatsuo Ishii
*
* $Id: big5.c,v 1.1 2002/07/16 09:25:05 ishii Exp $
*/
/* can be used in either frontend or backend */
#include "postgres_fe.h"
#include "mb/pg_wchar.h"
typedef struct
{
unsigned short code,
peer;
} codes_t;
/* map Big5 Level 1 to CNS 11643-1992 Plane 1 */
static codes_t big5Level1ToCnsPlane1[25] = { /* range */
{0xA140, 0x2121},
{0xA1F6, 0x2258},
{0xA1F7, 0x2257},
{0xA1F8, 0x2259},
{0xA2AF, 0x2421},
{0xA3C0, 0x4221},
{0xa3e1, 0x0000},
{0xA440, 0x4421},
{0xACFE, 0x5753},
{0xacff, 0x0000},
{0xAD40, 0x5323},
{0xAFD0, 0x5754},
{0xBBC8, 0x6B51},
{0xBE52, 0x6B50},
{0xBE53, 0x6F5C},
{0xC1AB, 0x7536},
{0xC2CB, 0x7535},
{0xC2CC, 0x7737},
{0xC361, 0x782E},
{0xC3B9, 0x7865},
{0xC3BA, 0x7864},
{0xC3BB, 0x7866},
{0xC456, 0x782D},
{0xC457, 0x7962},
{0xc67f, 0x0000}
};
/* map CNS 11643-1992 Plane 1 to Big5 Level 1 */
static codes_t cnsPlane1ToBig5Level1[26] = { /* range */
{0x2121, 0xA140},
{0x2257, 0xA1F7},
{0x2258, 0xA1F6},
{0x2259, 0xA1F8},
{0x234f, 0x0000},
{0x2421, 0xA2AF},
{0x2571, 0x0000},
{0x4221, 0xA3C0},
{0x4242, 0x0000},
{0x4421, 0xA440},
{0x5323, 0xAD40},
{0x5753, 0xACFE},
{0x5754, 0xAFD0},
{0x6B50, 0xBE52},
{0x6B51, 0xBBC8},
{0x6F5C, 0xBE53},
{0x7535, 0xC2CB},
{0x7536, 0xC1AB},
{0x7737, 0xC2CC},
{0x782D, 0xC456},
{0x782E, 0xC361},
{0x7864, 0xC3BA},
{0x7865, 0xC3B9},
{0x7866, 0xC3BB},
{0x7962, 0xC457},
{0x7d4c, 0x0000}
};
/* map Big5 Level 2 to CNS 11643-1992 Plane 2 */
static codes_t big5Level2ToCnsPlane2[48] = { /* range */
{0xC940, 0x2121},
{0xc94a, 0x0000},
{0xC94B, 0x212B},
{0xC96C, 0x214D},
{0xC9BE, 0x214C},
{0xC9BF, 0x217D},
{0xC9ED, 0x224E},
{0xCAF7, 0x224D},
{0xCAF8, 0x2439},
{0xD77A, 0x3F6A},
{0xD77B, 0x387E},
{0xDBA7, 0x3F6B},
{0xDDFC, 0x4176},
{0xDDFD, 0x4424},
{0xE8A3, 0x554C},
{0xE976, 0x5723},
{0xEB5B, 0x5A29},
{0xEBF1, 0x554B},
{0xEBF2, 0x5B3F},
{0xECDE, 0x5722},
{0xECDF, 0x5C6A},
{0xEDAA, 0x5D75},
{0xEEEB, 0x642F},
{0xEEEC, 0x6039},
{0xF056, 0x5D74},
{0xF057, 0x6243},
{0xF0CB, 0x5A28},
{0xF0CC, 0x6337},
{0xF163, 0x6430},
{0xF16B, 0x6761},
{0xF16C, 0x6438},
{0xF268, 0x6934},
{0xF269, 0x6573},
{0xF2C3, 0x664E},
{0xF375, 0x6762},
{0xF466, 0x6935},
{0xF4B5, 0x664D},
{0xF4B6, 0x6962},
{0xF4FD, 0x6A4C},
{0xF663, 0x6A4B},
{0xF664, 0x6C52},
{0xF977, 0x7167},
{0xF9C4, 0x7166},
{0xF9C5, 0x7234},
{0xF9C6, 0x7240},
{0xF9C7, 0x7235},
{0xF9D2, 0x7241},
{0xf9d6, 0x0000}
};
/* map CNS 11643-1992 Plane 2 to Big5 Level 2 */
static codes_t cnsPlane2ToBig5Level2[49] = { /* range */
{0x2121, 0xC940},
{0x212B, 0xC94B},
{0x214C, 0xC9BE},
{0x214D, 0xC96C},
{0x217D, 0xC9BF},
{0x224D, 0xCAF7},
{0x224E, 0xC9ED},
{0x2439, 0xCAF8},
{0x387E, 0xD77B},
{0x3F6A, 0xD77A},
{0x3F6B, 0xDBA7},
{0x4424, 0x0000},
{0x4176, 0xDDFC},
{0x4177, 0x0000},
{0x4424, 0xDDFD},
{0x554B, 0xEBF1},
{0x554C, 0xE8A3},
{0x5722, 0xECDE},
{0x5723, 0xE976},
{0x5A28, 0xF0CB},
{0x5A29, 0xEB5B},
{0x5B3F, 0xEBF2},
{0x5C6A, 0xECDF},
{0x5D74, 0xF056},
{0x5D75, 0xEDAA},
{0x6039, 0xEEEC},
{0x6243, 0xF057},
{0x6337, 0xF0CC},
{0x642F, 0xEEEB},
{0x6430, 0xF163},
{0x6438, 0xF16C},
{0x6573, 0xF269},
{0x664D, 0xF4B5},
{0x664E, 0xF2C3},
{0x6761, 0xF16B},
{0x6762, 0xF375},
{0x6934, 0xF268},
{0x6935, 0xF466},
{0x6962, 0xF4B6},
{0x6A4B, 0xF663},
{0x6A4C, 0xF4FD},
{0x6C52, 0xF664},
{0x7166, 0xF9C4},
{0x7167, 0xF977},
{0x7234, 0xF9C5},
{0x7235, 0xF9C7},
{0x7240, 0xF9C6},
{0x7241, 0xF9D2},
{0x7245, 0x0000}
};
/* Big Five Level 1 Correspondence to CNS 11643-1992 Plane 4 */
static unsigned short b1c4[][2] = {
{0xC879, 0x2123},
{0xC87B, 0x2124},
{0xC87D, 0x212A},
{0xC8A2, 0x2152}
};
/* Big Five Level 2 Correspondence to CNS 11643-1992 Plane 3 */
static unsigned short b2c3[][2] = {
{0xF9D6, 0x4337},
{0xF9D7, 0x4F50},
{0xF9D8, 0x444E},
{0xF9D9, 0x504A},
{0xF9DA, 0x2C5D},
{0xF9DB, 0x3D7E},
{0xF9DC, 0x4B5C}
};
static unsigned short BinarySearchRange
(codes_t *array, int high, unsigned short code)
{
int low,
mid,
distance,
tmp;
low = 0;
mid = high >> 1;
for (; low <= high; mid = (low + high) >> 1)
{
if ((array[mid].code <= code) && (array[mid + 1].code > code))
{
if (0 == array[mid].peer)
return 0;
if (code >= 0xa140U)
{
/* big5 to cns */
tmp = ((code & 0xff00) - (array[mid].code & 0xff00)) >> 8;
high = code & 0x00ff;
low = array[mid].code & 0x00ff;
/*
* NOTE: big5 high_byte: 0xa1-0xfe, low_byte: 0x40-0x7e,
* 0xa1-0xfe (radicals: 0x00-0x3e, 0x3f-0x9c) big5 radix
* is 0x9d. [region_low, region_high]
* We should remember big5 has two different regions
* (above). There is a bias for the distance between these
* regions. 0xa1 - 0x7e + bias = 1 (Distance between 0xa1
* and 0x7e is 1.) bias = - 0x22.
*/
distance = tmp * 0x9d + high - low +
(high >= 0xa1 ? (low >= 0xa1 ? 0 : -0x22)
: (low >= 0xa1 ? +0x22 : 0));
/*
* NOTE: we have to convert the distance into a code
* point. The code point's low_byte is 0x21 plus mod_0x5e.
* In the first, we extract the mod_0x5e of the starting
* code point, subtracting 0x21, and add distance to it.
* Then we calculate again mod_0x5e of them, and restore
* the final codepoint, adding 0x21.
*/
tmp = (array[mid].peer & 0x00ff) + distance - 0x21;
tmp = (array[mid].peer & 0xff00) + ((tmp / 0x5e) << 8)
+ 0x21 + tmp % 0x5e;
return tmp;
}
else
{
/* cns to big5 */
tmp = ((code & 0xff00) - (array[mid].code & 0xff00)) >> 8;
/*
* NOTE: ISO charsets ranges between 0x21-0xfe
* (94charset). Its radix is 0x5e. But there is no
* distance bias like big5.
*/
distance = tmp * 0x5e
+ ((int) (code & 0x00ff) - (int) (array[mid].code & 0x00ff));
/*
* NOTE: Similar to big5 to cns conversion, we extract
* mod_0x9d and restore mod_0x9d into a code point.
*/
low = array[mid].peer & 0x00ff;
tmp = low + distance - (low >= 0xa1 ? 0x62 : 0x40);
low = tmp % 0x9d;
tmp = (array[mid].peer & 0xff00) + ((tmp / 0x9d) << 8)
+ (low > 0x3e ? 0x62 : 0x40) + low;
return tmp;
}
}
else if (array[mid].code > code)
high = mid - 1;
else
low = mid + 1;
}
return 0;
}
unsigned short
BIG5toCNS(unsigned short big5, unsigned char *lc)
{
unsigned short cns = 0;
int i;
if (big5 < 0xc940U)
{
/* level 1 */
for (i = 0; i < sizeof(b1c4) / sizeof(unsigned short); i++)
{
if (b1c4[i][0] == big5)
{
*lc = LC_CNS11643_4;
return (b1c4[i][1] | 0x8080U);
}
}
if (0 < (cns = BinarySearchRange(big5Level1ToCnsPlane1, 23, big5)))
*lc = LC_CNS11643_1;
}
else if (big5 == 0xc94aU)
{
/* level 2 */
*lc = LC_CNS11643_1;
cns = 0x4442;
}
else
{
/* level 2 */
for (i = 0; i < sizeof(b2c3) / sizeof(unsigned short); i++)
{
if (b2c3[i][0] == big5)
{
*lc = LC_CNS11643_3;
return (b2c3[i][1] | 0x8080U);
}
}
if (0 < (cns = BinarySearchRange(big5Level2ToCnsPlane2, 46, big5)))
*lc = LC_CNS11643_2;
}
if (0 == cns)
{ /* no mapping Big5 to CNS 11643-1992 */
*lc = 0;
return (unsigned short) '?';
}
return cns | 0x8080;
}
unsigned short
CNStoBIG5(unsigned short cns, unsigned char lc)
{
int i;
unsigned int big5 = 0;
cns &= 0x7f7f;
switch (lc)
{
case LC_CNS11643_1:
big5 = BinarySearchRange(cnsPlane1ToBig5Level1, 24, cns);
break;
case LC_CNS11643_2:
big5 = BinarySearchRange(cnsPlane2ToBig5Level2, 47, cns);
break;
case LC_CNS11643_3:
for (i = 0; i < sizeof(b2c3) / sizeof(unsigned short); i++)
{
if (b2c3[i][1] == cns)
return (b2c3[i][0]);
}
break;
case LC_CNS11643_4:
for (i = 0; i < sizeof(b1c4) / sizeof(unsigned short); i++)
{
if (b1c4[i][1] == cns)
return (b1c4[i][0]);
}
default:
break;
}
return big5;
}
/*-------------------------------------------------------------------------
*
* EUC_TW, BIG5 and MULE_INTERNAL
*
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/utils/mb/conversion_procs/euc_tw_and_big5/euc_tw_and_big5.c,v 1.1 2002/07/16 09:25:05 ishii Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "fmgr.h"
#include "mb/pg_wchar.h"
#define ENCODING_GROWTH_RATE 4
PG_FUNCTION_INFO_V1(euc_tw_to_big5)
PG_FUNCTION_INFO_V1(big5_to_euc_tw)
PG_FUNCTION_INFO_V1(euc_tw_to_mic)
PG_FUNCTION_INFO_V1(mic_to_euc_tw)
PG_FUNCTION_INFO_V1(big5_to_mic)
PG_FUNCTION_INFO_V1(mic_to_big5)
extern Datum euc_tw_to_big5(PG_FUNCTION_ARGS);
extern Datum big5_to_euc_tw(PG_FUNCTION_ARGS);
extern Datum euc_tw_to_mic(PG_FUNCTION_ARGS);
extern Datum mic_to_euc_tw(PG_FUNCTION_ARGS);
extern Datum big5_to_mic(PG_FUNCTION_ARGS);
extern Datum mic_to_big5(PG_FUNCTION_ARGS);
/* ----------
* conv_proc(
* INTEGER, -- source encoding id
* INTEGER, -- destination encoding id
* OPAQUE, -- source string (null terminated C string)
* OPAQUE, -- destination string (null terminated C string)
* INTEGER -- source string length
* ) returns INTEGER; -- dummy. returns nothing, actually.
* ----------
*/
static void big52mic(unsigned char *big5, unsigned char *p, int len);
static void mic2big5(unsigned char *mic, unsigned char *p, int len);
static void euc_tw2mic(unsigned char *euc, unsigned char *p, int len);
static void mic2euc_tw(unsigned char *mic, unsigned char *p, int len);
Datum
euc_tw_to_big5(PG_FUNCTION_ARGS)
{
unsigned char *src = PG_GETARG_CSTRING(2);
unsigned char *dest = PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
unsigned char *buf;
Assert(PG_GETARG_INT32(0) == PG_EUC_TW);
Assert(PG_GETARG_INT32(1) == PG_BIG5);
Assert(len > 0);
buf = palloc(len * ENCODING_GROWTH_RATE);
euc_tw2mic(src, buf, len);
mic2big5(buf, dest, strlen(buf));
pfree(buf);
PG_RETURN_INT32(0);
}
Datum
big5_to_euc_tw(PG_FUNCTION_ARGS)
{
unsigned char *src = PG_GETARG_CSTRING(2);
unsigned char *dest = PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
unsigned char *buf;
Assert(PG_GETARG_INT32(0) == PG_BIG5);
Assert(PG_GETARG_INT32(1) == PG_EUC_TW);
Assert(len > 0);
buf = palloc(len * ENCODING_GROWTH_RATE);
big52mic(src, buf, len);
mic2euc_tw(buf, dest, strlen(buf));
pfree(buf);
PG_RETURN_INT32(0);
}
Datum
euc_tw_to_mic(PG_FUNCTION_ARGS)
{
unsigned char *src = PG_GETARG_CSTRING(2);
unsigned char *dest = PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
Assert(PG_GETARG_INT32(0) == PG_EUC_TW);
Assert(PG_GETARG_INT32(1) == PG_MULE_INTERNAL);
Assert(len > 0);
euc_tw2mic(src, dest, len);
PG_RETURN_INT32(0);
}
Datum
mic_to_euc_tw(PG_FUNCTION_ARGS)
{
unsigned char *src = PG_GETARG_CSTRING(2);
unsigned char *dest = PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
Assert(PG_GETARG_INT32(0) == PG_MULE_INTERNAL);
Assert(PG_GETARG_INT32(1) == PG_EUC_TW);
Assert(len > 0);
mic2big5(src, dest, len);
PG_RETURN_INT32(0);
}
Datum
big5_to_mic(PG_FUNCTION_ARGS)
{
unsigned char *src = PG_GETARG_CSTRING(2);
unsigned char *dest = PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
Assert(PG_GETARG_INT32(0) == PG_BIG5);
Assert(PG_GETARG_INT32(1) == PG_MULE_INTERNAL);
Assert(len > 0);
big52mic(src, dest, len);
PG_RETURN_INT32(0);
}
Datum
mic_to_big5(PG_FUNCTION_ARGS)
{
unsigned char *src = PG_GETARG_CSTRING(2);
unsigned char *dest = PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
Assert(PG_GETARG_INT32(0) == PG_MULE_INTERNAL);
Assert(PG_GETARG_INT32(1) == PG_BIG5);
Assert(len > 0);
mic2big5(src, dest, len);
PG_RETURN_INT32(0);
}
/*
* EUC_TW ---> MIC
*/
static void
euc_tw2mic(unsigned char *euc, unsigned char *p, int len)
{
int c1;
while (len > 0 && (c1 = *euc++))
{
if (c1 == SS2)
{
len -= 4;
c1 = *euc++; /* plane No. */
if (c1 == 0xa1)
*p++ = LC_CNS11643_1;
else if (c1 == 0xa2)
*p++ = LC_CNS11643_2;
else
{
*p++ = 0x9d; /* LCPRV2 */
*p++ = 0xa3 - c1 + LC_CNS11643_3;
}
*p++ = *euc++;
*p++ = *euc++;
}
else if (c1 & 0x80)
{ /* CNS11643-1 */
len -= 2;
*p++ = LC_CNS11643_1;
*p++ = c1;
*p++ = *euc++;
}
else
{ /* should be ASCII */
len--;
*p++ = c1;
}
}
*p = '\0';
}
/*
* MIC ---> EUC_TW
*/
static void
mic2euc_tw(unsigned char *mic, unsigned char *p, int len)
{
int c1;
while (len > 0 && (c1 = *mic))
{
len -= pg_mic_mblen(mic++);
if (c1 == LC_CNS11643_1)
{
*p++ = *mic++;
*p++ = *mic++;
}
else if (c1 == LC_CNS11643_2)
{
*p++ = SS2;
*p++ = 0xa2;
*p++ = *mic++;
*p++ = *mic++;
}
else if (c1 == 0x9d)
{ /* LCPRV2? */
*p++ = SS2;
*p++ = *mic++ - LC_CNS11643_3 + 0xa3;
*p++ = *mic++;
*p++ = *mic++;
}
else if (c1 > 0x7f)
{ /* cannot convert to EUC_TW! */
mic--;
pg_print_bogus_char(&mic, &p);
}
else
{ /* should be ASCII */
*p++ = c1;
}
}
*p = '\0';
}
/*
* Big5 ---> MIC
*/
static void
big52mic(unsigned char *big5, unsigned char *p, int len)
{
unsigned short c1;
unsigned short big5buf,
cnsBuf;
unsigned char lc;
char bogusBuf[3];
int i;
while (len > 0 && (c1 = *big5++))
{
if (c1 <= 0x7fU)
{ /* ASCII */
len--;
*p++ = c1;
}
else
{
len -= 2;
big5buf = c1 << 8;
c1 = *big5++;
big5buf |= c1;
cnsBuf = BIG5toCNS(big5buf, &lc);
if (lc != 0)
{
if (lc == LC_CNS11643_3 || lc == LC_CNS11643_4)
{
*p++ = 0x9d; /* LCPRV2 */
}
*p++ = lc; /* Plane No. */
*p++ = (cnsBuf >> 8) & 0x00ff;
*p++ = cnsBuf & 0x00ff;
}
else
{ /* cannot convert */
big5 -= 2;
*p++ = '(';
for (i = 0; i < 2; i++)
{
sprintf(bogusBuf, "%02x", *big5++);
*p++ = bogusBuf[0];
*p++ = bogusBuf[1];
}
*p++ = ')';
}
}
}
*p = '\0';
}
/*
* MIC ---> Big5
*/
static void
mic2big5(unsigned char *mic, unsigned char *p, int len)
{
int l;
unsigned short c1;
unsigned short big5buf,
cnsBuf;
while (len > 0 && (c1 = *mic))
{
l = pg_mic_mblen(mic++);
len -= l;
/* 0x9d means LCPRV2 */
if (c1 == LC_CNS11643_1 || c1 == LC_CNS11643_2 || c1 == 0x9d)
{
if (c1 == 0x9d)
{
c1 = *mic++; /* get plane no. */
}
cnsBuf = (*mic++) << 8;
cnsBuf |= (*mic++) & 0x00ff;
big5buf = CNStoBIG5(cnsBuf, c1);
if (big5buf == 0)
{ /* cannot convert to Big5! */
mic -= l;
pg_print_bogus_char(&mic, &p);
}
else
{
*p++ = (big5buf >> 8) & 0x00ff;
*p++ = big5buf & 0x00ff;
}
}
else if (c1 <= 0x7f) /* ASCII */
*p++ = c1;
else
{ /* cannot convert to Big5! */
mic--;
pg_print_bogus_char(&mic, &p);
}
}
*p = '\0';
}
SRCS += $(NAME).c
OBJS += $(NAME).o
PG_CPPFLAGS :=
SHLIB_LINK :=
SO_MAJOR_VERSION := 0
SO_MINOR_VERSION := 0
rpath =
install: all
$(INSTALL_SHLIB) $(shlib) $(DESTDIR)$(pkglibdir)/$(NAME)$(DLSUFFIX)
uninstall: uninstall-lib
clean distclean maintainer-clean: clean-lib
$(RM) $(OBJS)
include $(top_builddir)/src/Makefile.shlib
all: $(shlib)
#-------------------------------------------------------------------------
#
# $Id: Makefile,v 1.1 2002/07/16 09:25:05 ishii Exp $
#
#-------------------------------------------------------------------------
top_builddir = ../../../../../..
include $(top_builddir)/src/Makefile.global
NAME := utf8_and_ascii
include ../proc.mk
/*-------------------------------------------------------------------------
*
* ASCII <--> UTF-8
*
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/utils/mb/conversion_procs/utf8_and_ascii/utf8_and_ascii.c,v 1.1 2002/07/16 09:25:05 ishii Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "fmgr.h"
#include "mb/pg_wchar.h"
PG_FUNCTION_INFO_V1(ascii_to_utf8)
PG_FUNCTION_INFO_V1(utf8_to_ascii)
extern Datum ascii_to_utf8(PG_FUNCTION_ARGS);
extern Datum utf8_to_ascii(PG_FUNCTION_ARGS);
/* ----------
* conv_proc(
* INTEGER, -- source encoding id
* INTEGER, -- destination encoding id
* OPAQUE, -- source string (null terminated C string)
* OPAQUE, -- destination string (null terminated C string)
* INTEGER -- source string length
* ) returns INTEGER; -- dummy. returns nothing, actually.
* ----------
*/
Datum
ascii_to_utf8(PG_FUNCTION_ARGS)
{
unsigned char *src = PG_GETARG_CSTRING(2);
unsigned char *dest = PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
Assert(PG_GETARG_INT32(0) == PG_SQL_ASCII);
Assert(PG_GETARG_INT32(1) == PG_UTF8);
Assert(len > 0);
pg_ascii2mic(src, dest, len);
PG_RETURN_INT32(0);
}
Datum
utf8_to_ascii(PG_FUNCTION_ARGS)
{
unsigned char *src = PG_GETARG_CSTRING(2);
unsigned char *dest = PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
Assert(PG_GETARG_INT32(0) == PG_UTF8);
Assert(PG_GETARG_INT32(1) == PG_SQL_ASCII);
Assert(len > 0);
pg_mic2ascii(src, dest, len);
PG_RETURN_INT32(0);
}
#-------------------------------------------------------------------------
#
# $Id: Makefile,v 1.1 2002/07/16 09:25:05 ishii Exp $
#
#-------------------------------------------------------------------------
top_builddir = ../../../../../..
include $(top_builddir)/src/Makefile.global
NAME := utf8_and_big5
include ../proc.mk
/*-------------------------------------------------------------------------
*
* BIG5 <--> UTF-8
*
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/utils/mb/conversion_procs/utf8_and_big5/utf8_and_big5.c,v 1.1 2002/07/16 09:25:05 ishii Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "fmgr.h"
#include "mb/pg_wchar.h"
#include "../../Unicode/big5_to_utf8.map"
#include "../../Unicode/utf8_to_big5.map"
PG_FUNCTION_INFO_V1(big5_to_utf8)
PG_FUNCTION_INFO_V1(utf8_to_big5)
extern Datum big5_to_utf8(PG_FUNCTION_ARGS);
extern Datum utf8_to_big5(PG_FUNCTION_ARGS);
/* ----------
* conv_proc(
* INTEGER, -- source encoding id
* INTEGER, -- destination encoding id
* OPAQUE, -- source string (null terminated C string)
* OPAQUE, -- destination string (null terminated C string)
* INTEGER -- source string length
* ) returns INTEGER; -- dummy. returns nothing, actually.
* ----------
*/
Datum
big5_to_utf8(PG_FUNCTION_ARGS)
{
unsigned char *src = PG_GETARG_CSTRING(2);
unsigned char *dest = PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
Assert(PG_GETARG_INT32(0) == PG_BIG5);
Assert(PG_GETARG_INT32(1) == PG_UTF8);
Assert(len > 0);
LocalToUtf(src, dest, LUmapBIG5,
sizeof(LUmapBIG5) / sizeof(pg_local_to_utf), PG_BIG5, len);
PG_RETURN_INT32(0);
}
Datum
utf8_to_big5(PG_FUNCTION_ARGS)
{
unsigned char *src = PG_GETARG_CSTRING(2);
unsigned char *dest = PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
Assert(PG_GETARG_INT32(0) == PG_UTF8);
Assert(PG_GETARG_INT32(1) == PG_BIG5);
Assert(len > 0);
UtfToLocal(src, dest, ULmapBIG5,
sizeof(ULmapBIG5) / sizeof(pg_utf_to_local), len);
PG_RETURN_INT32(0);
}
#-------------------------------------------------------------------------
#
# $Id: Makefile,v 1.1 2002/07/16 09:25:05 ishii Exp $
#
#-------------------------------------------------------------------------
top_builddir = ../../../../../..
include $(top_builddir)/src/Makefile.global
NAME := utf8_and_euc_cn
include ../proc.mk
/*-------------------------------------------------------------------------
*
* EUC_CN <--> UTF-8
*
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/utils/mb/conversion_procs/utf8_and_euc_cn/utf8_and_euc_cn.c,v 1.1 2002/07/16 09:25:05 ishii Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "fmgr.h"
#include "mb/pg_wchar.h"
#include "../../Unicode/euc_cn_to_utf8.map"
#include "../../Unicode/utf8_to_euc_cn.map"
PG_FUNCTION_INFO_V1(euc_cn_to_utf8)
PG_FUNCTION_INFO_V1(utf8_to_euc_cn)
extern Datum euc_cn_to_utf8(PG_FUNCTION_ARGS);
extern Datum utf8_to_euc_cn(PG_FUNCTION_ARGS);
/* ----------
* conv_proc(
* INTEGER, -- source encoding id
* INTEGER, -- destination encoding id
* OPAQUE, -- source string (null terminated C string)
* OPAQUE, -- destination string (null terminated C string)
* INTEGER -- source string length
* ) returns INTEGER; -- dummy. returns nothing, actually.
* ----------
*/
Datum
euc_cn_to_utf8(PG_FUNCTION_ARGS)
{
unsigned char *src = PG_GETARG_CSTRING(2);
unsigned char *dest = PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
Assert(PG_GETARG_INT32(0) == PG_EUC_CN);
Assert(PG_GETARG_INT32(1) == PG_UTF8);
Assert(len > 0);
LocalToUtf(src, dest, LUmapEUC_CN,
sizeof(LUmapEUC_CN) / sizeof(pg_local_to_utf), PG_EUC_CN, len);
PG_RETURN_INT32(0);
}
Datum
utf8_to_euc_cn(PG_FUNCTION_ARGS)
{
unsigned char *src = PG_GETARG_CSTRING(2);
unsigned char *dest = PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
Assert(PG_GETARG_INT32(0) == PG_UTF8);
Assert(PG_GETARG_INT32(1) == PG_EUC_CN);
Assert(len > 0);
UtfToLocal(src, dest, ULmapEUC_CN,
sizeof(ULmapEUC_CN) / sizeof(pg_utf_to_local), len);
PG_RETURN_INT32(0);
}
#-------------------------------------------------------------------------
#
# $Id: Makefile,v 1.1 2002/07/16 09:25:05 ishii Exp $
#
#-------------------------------------------------------------------------
top_builddir = ../../../../../..
include $(top_builddir)/src/Makefile.global
NAME := utf8_and_euc_jp
include ../proc.mk
/*-------------------------------------------------------------------------
*
* EUC_JP <--> UTF-8
*
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/utils/mb/conversion_procs/utf8_and_euc_jp/utf8_and_euc_jp.c,v 1.1 2002/07/16 09:25:05 ishii Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "fmgr.h"
#include "mb/pg_wchar.h"
#include "../../Unicode/euc_jp_to_utf8.map"
#include "../../Unicode/utf8_to_euc_jp.map"
PG_FUNCTION_INFO_V1(euc_jp_to_utf8)
PG_FUNCTION_INFO_V1(utf8_to_euc_jp)
extern Datum euc_jp_to_utf8(PG_FUNCTION_ARGS);
extern Datum utf8_to_euc_jp(PG_FUNCTION_ARGS);
/* ----------
* conv_proc(
* INTEGER, -- source encoding id
* INTEGER, -- destination encoding id
* OPAQUE, -- source string (null terminated C string)
* OPAQUE, -- destination string (null terminated C string)
* INTEGER -- source string length
* ) returns INTEGER; -- dummy. returns nothing, actually.
* ----------
*/
Datum
euc_jp_to_utf8(PG_FUNCTION_ARGS)
{
unsigned char *src = PG_GETARG_CSTRING(2);
unsigned char *dest = PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
Assert(PG_GETARG_INT32(0) == PG_EUC_JP);
Assert(PG_GETARG_INT32(1) == PG_UTF8);
Assert(len > 0);
LocalToUtf(src, dest, LUmapEUC_JP,
sizeof(LUmapEUC_JP) / sizeof(pg_local_to_utf), PG_EUC_JP, len);
PG_RETURN_INT32(0);
}
Datum
utf8_to_euc_jp(PG_FUNCTION_ARGS)
{
unsigned char *src = PG_GETARG_CSTRING(2);
unsigned char *dest = PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
Assert(PG_GETARG_INT32(0) == PG_UTF8);
Assert(PG_GETARG_INT32(1) == PG_EUC_JP);
Assert(len > 0);
UtfToLocal(src, dest, ULmapEUC_JP,
sizeof(ULmapEUC_JP) / sizeof(pg_utf_to_local), len);
PG_RETURN_INT32(0);
}
#-------------------------------------------------------------------------
#
# $Id: Makefile,v 1.1 2002/07/16 09:25:05 ishii Exp $
#
#-------------------------------------------------------------------------
top_builddir = ../../../../../..
include $(top_builddir)/src/Makefile.global
NAME := utf8_and_euc_kr
include ../proc.mk
/*-------------------------------------------------------------------------
*
* EUC_KR <--> UTF-8
*
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/utils/mb/conversion_procs/utf8_and_euc_kr/utf8_and_euc_kr.c,v 1.1 2002/07/16 09:25:05 ishii Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "fmgr.h"
#include "mb/pg_wchar.h"
#include "../../Unicode/euc_kr_to_utf8.map"
#include "../../Unicode/utf8_to_euc_kr.map"
PG_FUNCTION_INFO_V1(euc_kr_to_utf8)
PG_FUNCTION_INFO_V1(utf8_to_euc_kr)
extern Datum euc_kr_to_utf8(PG_FUNCTION_ARGS);
extern Datum utf8_to_euc_kr(PG_FUNCTION_ARGS);
/* ----------
* conv_proc(
* INTEGER, -- source encoding id
* INTEGER, -- destination encoding id
* OPAQUE, -- source string (null terminated C string)
* OPAQUE, -- destination string (null terminated C string)
* INTEGER -- source string length
* ) returns INTEGER; -- dummy. returns nothing, actually.
* ----------
*/
Datum
euc_kr_to_utf8(PG_FUNCTION_ARGS)
{
unsigned char *src = PG_GETARG_CSTRING(2);
unsigned char *dest = PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
Assert(PG_GETARG_INT32(0) == PG_EUC_KR);
Assert(PG_GETARG_INT32(1) == PG_UTF8);
Assert(len > 0);
LocalToUtf(src, dest, LUmapEUC_KR,
sizeof(LUmapEUC_KR) / sizeof(pg_local_to_utf), PG_EUC_KR, len);
PG_RETURN_INT32(0);
}
Datum
utf8_to_euc_kr(PG_FUNCTION_ARGS)
{
unsigned char *src = PG_GETARG_CSTRING(2);
unsigned char *dest = PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
Assert(PG_GETARG_INT32(0) == PG_UTF8);
Assert(PG_GETARG_INT32(1) == PG_EUC_KR);
Assert(len > 0);
UtfToLocal(src, dest, ULmapEUC_KR,
sizeof(ULmapEUC_KR) / sizeof(pg_utf_to_local), len);
PG_RETURN_INT32(0);
}
#-------------------------------------------------------------------------
#
# $Id: Makefile,v 1.1 2002/07/16 09:25:05 ishii Exp $
#
#-------------------------------------------------------------------------
top_builddir = ../../../../../..
include $(top_builddir)/src/Makefile.global
NAME := utf8_and_euc_tw
include ../proc.mk
/*-------------------------------------------------------------------------
*
* EUC_TW <--> UTF-8
*
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/utils/mb/conversion_procs/utf8_and_euc_tw/utf8_and_euc_tw.c,v 1.1 2002/07/16 09:25:05 ishii Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "fmgr.h"
#include "mb/pg_wchar.h"
#include "../../Unicode/euc_tw_to_utf8.map"
#include "../../Unicode/utf8_to_euc_tw.map"
PG_FUNCTION_INFO_V1(euc_tw_to_utf8)
PG_FUNCTION_INFO_V1(utf8_to_euc_tw)
extern Datum euc_tw_to_utf8(PG_FUNCTION_ARGS);
extern Datum utf8_to_euc_tw(PG_FUNCTION_ARGS);
/* ----------
* conv_proc(
* INTEGER, -- source encoding id
* INTEGER, -- destination encoding id
* OPAQUE, -- source string (null terminated C string)
* OPAQUE, -- destination string (null terminated C string)
* INTEGER -- source string length
* ) returns INTEGER; -- dummy. returns nothing, actually.
* ----------
*/
Datum
euc_tw_to_utf8(PG_FUNCTION_ARGS)
{
unsigned char *src = PG_GETARG_CSTRING(2);
unsigned char *dest = PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
Assert(PG_GETARG_INT32(0) == PG_EUC_TW);
Assert(PG_GETARG_INT32(1) == PG_UTF8);
Assert(len > 0);
LocalToUtf(src, dest, LUmapEUC_TW,
sizeof(LUmapEUC_TW) / sizeof(pg_local_to_utf), PG_EUC_TW, len);
PG_RETURN_INT32(0);
}
Datum
utf8_to_euc_tw(PG_FUNCTION_ARGS)
{
unsigned char *src = PG_GETARG_CSTRING(2);
unsigned char *dest = PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
Assert(PG_GETARG_INT32(0) == PG_UTF8);
Assert(PG_GETARG_INT32(1) == PG_EUC_TW);
Assert(len > 0);
UtfToLocal(src, dest, ULmapEUC_TW,
sizeof(ULmapEUC_TW) / sizeof(pg_utf_to_local), len);
PG_RETURN_INT32(0);
}
#-------------------------------------------------------------------------
#
# $Id: Makefile,v 1.1 2002/07/16 09:25:05 ishii Exp $
#
#-------------------------------------------------------------------------
top_builddir = ../../../../../..
include $(top_builddir)/src/Makefile.global
NAME := utf8_and_gb18030
include ../proc.mk
/*-------------------------------------------------------------------------
*
* GB18030 <--> UTF-8
*
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/utils/mb/conversion_procs/utf8_and_gb18030/utf8_and_gb18030.c,v 1.1 2002/07/16 09:25:05 ishii Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "fmgr.h"
#include "mb/pg_wchar.h"
#include "../../Unicode/gb18030_to_utf8.map"
#include "../../Unicode/utf8_to_gb18030.map"
PG_FUNCTION_INFO_V1(gb18030_to_utf8)
PG_FUNCTION_INFO_V1(utf8_to_gb18030)
extern Datum gb18030_to_utf8(PG_FUNCTION_ARGS);
extern Datum utf8_to_gb18030(PG_FUNCTION_ARGS);
/* ----------
* conv_proc(
* INTEGER, -- source encoding id
* INTEGER, -- destination encoding id
* OPAQUE, -- source string (null terminated C string)
* OPAQUE, -- destination string (null terminated C string)
* INTEGER -- source string length
* ) returns INTEGER; -- dummy. returns nothing, actually.
* ----------
*/
Datum
gb18030_to_utf8(PG_FUNCTION_ARGS)
{
unsigned char *src = PG_GETARG_CSTRING(2);
unsigned char *dest = PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
Assert(PG_GETARG_INT32(0) == PG_GB18030);
Assert(PG_GETARG_INT32(1) == PG_UTF8);
Assert(len > 0);
LocalToUtf(src, dest, LUmapGB18030,
sizeof(LUmapGB18030) / sizeof(pg_local_to_utf), PG_GB18030, len);
PG_RETURN_INT32(0);
}
Datum
utf8_to_gb18030(PG_FUNCTION_ARGS)
{
unsigned char *src = PG_GETARG_CSTRING(2);
unsigned char *dest = PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
Assert(PG_GETARG_INT32(0) == PG_UTF8);
Assert(PG_GETARG_INT32(1) == PG_GB18030);
Assert(len > 0);
UtfToLocal(src, dest, ULmapGB18030,
sizeof(ULmapGB18030) / sizeof(pg_utf_to_local), len);
PG_RETURN_INT32(0);
}
#-------------------------------------------------------------------------
#
# $Id: Makefile,v 1.1 2002/07/16 09:25:05 ishii Exp $
#
#-------------------------------------------------------------------------
top_builddir = ../../../../../..
include $(top_builddir)/src/Makefile.global
NAME := utf8_and_gbk
include ../proc.mk
/*-------------------------------------------------------------------------
*
* GBK <--> UTF-8
*
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/utils/mb/conversion_procs/utf8_and_gbk/utf8_and_gbk.c,v 1.1 2002/07/16 09:25:05 ishii Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "fmgr.h"
#include "mb/pg_wchar.h"
#include "../../Unicode/gbk_to_utf8.map"
#include "../../Unicode/utf8_to_gbk.map"
PG_FUNCTION_INFO_V1(gbk_to_utf8)
PG_FUNCTION_INFO_V1(utf8_to_gbk)
extern Datum gbk_to_utf8(PG_FUNCTION_ARGS);
extern Datum utf8_to_gbk(PG_FUNCTION_ARGS);
/* ----------
* conv_proc(
* INTEGER, -- source encoding id
* INTEGER, -- destination encoding id
* OPAQUE, -- source string (null terminated C string)
* OPAQUE, -- destination string (null terminated C string)
* INTEGER -- source string length
* ) returns INTEGER; -- dummy. returns nothing, actually.
* ----------
*/
Datum
gbk_to_utf8(PG_FUNCTION_ARGS)
{
unsigned char *src = PG_GETARG_CSTRING(2);
unsigned char *dest = PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
Assert(PG_GETARG_INT32(0) == PG_GBK);
Assert(PG_GETARG_INT32(1) == PG_UTF8);
Assert(len > 0);
LocalToUtf(src, dest, LUmapGBK,
sizeof(LUmapGBK) / sizeof(pg_local_to_utf), PG_GBK, len);
PG_RETURN_INT32(0);
}
Datum
utf8_to_gbk(PG_FUNCTION_ARGS)
{
unsigned char *src = PG_GETARG_CSTRING(2);
unsigned char *dest = PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
Assert(PG_GETARG_INT32(0) == PG_UTF8);
Assert(PG_GETARG_INT32(1) == PG_GBK);
Assert(len > 0);
UtfToLocal(src, dest, ULmapGBK,
sizeof(ULmapGBK) / sizeof(pg_utf_to_local), len);
PG_RETURN_INT32(0);
}
#-------------------------------------------------------------------------
#
# $Id: Makefile,v 1.1 2002/07/16 09:25:05 ishii Exp $
#
#-------------------------------------------------------------------------
top_builddir = ../../../../../..
include $(top_builddir)/src/Makefile.global
NAME := utf8_and_iso8859
include ../proc.mk
/*-------------------------------------------------------------------------
*
* ISO 8859 2-16 <--> UTF-8
*
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/utils/mb/conversion_procs/utf8_and_iso8859/utf8_and_iso8859.c,v 1.1 2002/07/16 09:25:05 ishii Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "fmgr.h"
#include "mb/pg_wchar.h"
#include "../../Unicode/utf8_to_iso8859_2.map"
#include "../../Unicode/utf8_to_iso8859_3.map"
#include "../../Unicode/utf8_to_iso8859_4.map"
#include "../../Unicode/utf8_to_iso8859_5.map"
#include "../../Unicode/utf8_to_iso8859_6.map"
#include "../../Unicode/utf8_to_iso8859_7.map"
#include "../../Unicode/utf8_to_iso8859_8.map"
#include "../../Unicode/utf8_to_iso8859_9.map"
#include "../../Unicode/utf8_to_iso8859_10.map"
#include "../../Unicode/utf8_to_iso8859_13.map"
#include "../../Unicode/utf8_to_iso8859_14.map"
#include "../../Unicode/utf8_to_iso8859_15.map"
#include "../../Unicode/utf8_to_iso8859_16.map"
#include "../../Unicode/iso8859_2_to_utf8.map"
#include "../../Unicode/iso8859_3_to_utf8.map"
#include "../../Unicode/iso8859_4_to_utf8.map"
#include "../../Unicode/iso8859_5_to_utf8.map"
#include "../../Unicode/iso8859_6_to_utf8.map"
#include "../../Unicode/iso8859_7_to_utf8.map"
#include "../../Unicode/iso8859_8_to_utf8.map"
#include "../../Unicode/iso8859_9_to_utf8.map"
#include "../../Unicode/iso8859_10_to_utf8.map"
#include "../../Unicode/iso8859_13_to_utf8.map"
#include "../../Unicode/iso8859_14_to_utf8.map"
#include "../../Unicode/iso8859_15_to_utf8.map"
#include "../../Unicode/iso8859_16_to_utf8.map"
PG_FUNCTION_INFO_V1(iso8859_to_utf8)
PG_FUNCTION_INFO_V1(utf8_to_iso8859)
extern Datum iso8859_to_utf8(PG_FUNCTION_ARGS);
extern Datum utf8_to_iso8859(PG_FUNCTION_ARGS);
/* ----------
* conv_proc(
* INTEGER, -- source encoding id
* INTEGER, -- destination encoding id
* OPAQUE, -- source string (null terminated C string)
* OPAQUE, -- destination string (null terminated C string)
* INTEGER -- source string length
* ) returns INTEGER; -- dummy. returns nothing, actually.
* ----------
*/
typedef struct {
pg_enc encoding;
pg_local_to_utf *map1; /* to UTF-8 map name */
pg_utf_to_local *map2; /* from UTF-8 map name */
int size1; /* size of map1 */
int size2; /* size of map2 */
} pg_conv_map;
static pg_conv_map maps[] = {
{PG_SQL_ASCII}, /* SQL/ASCII */
{PG_EUC_JP}, /* EUC for Japanese */
{PG_EUC_CN}, /* EUC for Chinese */
{PG_EUC_KR}, /* EUC for Korean */
{PG_EUC_TW}, /* EUC for Taiwan */
{PG_JOHAB}, /* EUC for Korean JOHAB */
{PG_UTF8}, /* Unicode UTF-8 */
{PG_MULE_INTERNAL}, /* Mule internal code */
{PG_LATIN1}, /* ISO-8859-1 Latin 1 */
{PG_LATIN2, LUmapISO8859_2, ULmapISO8859_2,
sizeof(LUmapISO8859_2)/sizeof(pg_local_to_utf),
sizeof(ULmapISO8859_2)/sizeof(pg_utf_to_local)}, /* ISO-8859-2 Latin 2 */
{PG_LATIN3, LUmapISO8859_3, ULmapISO8859_3,
sizeof(LUmapISO8859_3)/sizeof(pg_local_to_utf),
sizeof(ULmapISO8859_3)/sizeof(pg_utf_to_local)}, /* ISO-8859-3 Latin 3 */
{PG_LATIN4, LUmapISO8859_4, ULmapISO8859_4,
sizeof(LUmapISO8859_4)/sizeof(pg_local_to_utf),
sizeof(ULmapISO8859_4)/sizeof(pg_utf_to_local)}, /* ISO-8859-4 Latin 4 */
{PG_LATIN5, LUmapISO8859_9, ULmapISO8859_9,
sizeof(LUmapISO8859_9)/sizeof(pg_local_to_utf),
sizeof(ULmapISO8859_9)/sizeof(pg_utf_to_local)}, /* ISO-8859-9 Latin 5 */
{PG_LATIN6, LUmapISO8859_10, ULmapISO8859_10,
sizeof(LUmapISO8859_10)/sizeof(pg_local_to_utf),
sizeof(ULmapISO8859_10)/sizeof(pg_utf_to_local)}, /* ISO-8859-10 Latin 6 */
{PG_LATIN7, LUmapISO8859_13, ULmapISO8859_13,
sizeof(LUmapISO8859_13)/sizeof(pg_local_to_utf),
sizeof(ULmapISO8859_13)/sizeof(pg_utf_to_local)}, /* ISO-8859-13 Latin 7 */
{PG_LATIN8, LUmapISO8859_14, ULmapISO8859_14,
sizeof(LUmapISO8859_14)/sizeof(pg_local_to_utf),
sizeof(ULmapISO8859_14)/sizeof(pg_utf_to_local)}, /* ISO-8859-14 Latin 8 */
{PG_LATIN9, LUmapISO8859_2, ULmapISO8859_2,
sizeof(LUmapISO8859_15)/sizeof(pg_local_to_utf),
sizeof(ULmapISO8859_15)/sizeof(pg_utf_to_local)}, /* ISO-8859-15 Latin 9 */
{PG_LATIN10, LUmapISO8859_16, ULmapISO8859_16,
sizeof(LUmapISO8859_16)/sizeof(pg_local_to_utf),
sizeof(ULmapISO8859_16)/sizeof(pg_utf_to_local)}, /* ISO-8859-16 Latin 10 */
{PG_WIN1256}, /* windows-1256 */
{PG_TCVN}, /* TCVN (Windows-1258) */
{PG_WIN874}, /* windows-874 */
{PG_KOI8R}, /* KOI8-R */
{PG_WIN1251}, /* windows-1251 (was: WIN) */
{PG_ALT}, /* (MS-DOS CP866) */
{PG_ISO_8859_5, LUmapISO8859_5, ULmapISO8859_5,
sizeof(LUmapISO8859_5)/sizeof(pg_local_to_utf),
sizeof(ULmapISO8859_5)/sizeof(pg_utf_to_local)}, /* ISO-8859-5 */
{PG_ISO_8859_6, LUmapISO8859_6, ULmapISO8859_6,
sizeof(LUmapISO8859_6)/sizeof(pg_local_to_utf),
sizeof(ULmapISO8859_6)/sizeof(pg_utf_to_local)}, /* ISO-8859-6 */
{PG_ISO_8859_7, LUmapISO8859_7, ULmapISO8859_7,
sizeof(LUmapISO8859_7)/sizeof(pg_local_to_utf),
sizeof(ULmapISO8859_7)/sizeof(pg_utf_to_local)}, /* ISO-8859-7 */
{PG_ISO_8859_8, LUmapISO8859_8, ULmapISO8859_8,
sizeof(LUmapISO8859_8)/sizeof(pg_local_to_utf),
sizeof(ULmapISO8859_8)/sizeof(pg_utf_to_local)}, /* ISO-8859-8 */
};
Datum
iso8859_to_utf8(PG_FUNCTION_ARGS)
{
int encoding = PG_GETARG_INT32(0);
unsigned char *src = PG_GETARG_CSTRING(2);
unsigned char *dest = PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
Assert(PG_GETARG_INT32(1) == PG_UTF8);
Assert(len > 0);
LocalToUtf(src, dest, maps[encoding].map1, maps[encoding].size1, encoding, len);
PG_RETURN_INT32(0);
}
Datum
utf8_to_iso8859(PG_FUNCTION_ARGS)
{
int encoding = PG_GETARG_INT32(1);
unsigned char *src = PG_GETARG_CSTRING(2);
unsigned char *dest = PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
Assert(PG_GETARG_INT32(0) == PG_UTF8);
Assert(len > 0);
UtfToLocal(src, dest, maps[encoding].map2, maps[encoding].size2, len);
PG_RETURN_INT32(0);
}
#-------------------------------------------------------------------------
#
# $Id: Makefile,v 1.1 2002/07/16 09:25:05 ishii Exp $
#
#-------------------------------------------------------------------------
top_builddir = ../../../../../..
include $(top_builddir)/src/Makefile.global
NAME := utf8_and_iso8859_1
include ../proc.mk
/*-------------------------------------------------------------------------
*
* ISO8859_1 <--> UTF-8
*
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/utils/mb/conversion_procs/utf8_and_iso8859_1/utf8_and_iso8859_1.c,v 1.1 2002/07/16 09:25:05 ishii Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "fmgr.h"
#include "mb/pg_wchar.h"
PG_FUNCTION_INFO_V1(iso8859_1_to_utf8)
PG_FUNCTION_INFO_V1(utf8_to_iso8859_1)
extern Datum iso8859_1_to_utf8(PG_FUNCTION_ARGS);
extern Datum utf8_to_iso8859_1(PG_FUNCTION_ARGS);
/* ----------
* conv_proc(
* INTEGER, -- source encoding id
* INTEGER, -- destination encoding id
* OPAQUE, -- source string (null terminated C string)
* OPAQUE, -- destination string (null terminated C string)
* INTEGER -- source string length
* ) returns INTEGER; -- dummy. returns nothing, actually.
* ----------
*/
Datum
iso8859_1_to_utf8(PG_FUNCTION_ARGS)
{
unsigned char *src = PG_GETARG_CSTRING(2);
unsigned char *dest = PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
unsigned short c;
Assert(PG_GETARG_INT32(0) == PG_LATIN1);
Assert(PG_GETARG_INT32(1) == PG_UTF8);
Assert(len > 0);
while (len-- > 0 && (c = *src++))
{
if (c < 0x80)
*dest++ = c;
else
{
*dest++ = (c >> 6) | 0xc0;
*dest++ = (c & 0x003f) | 0x80;
}
}
*dest = '\0';
PG_RETURN_INT32(0);
}
Datum
utf8_to_iso8859_1(PG_FUNCTION_ARGS)
{
unsigned char *src = PG_GETARG_CSTRING(2);
unsigned char *dest = PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
unsigned short c,
c1,
c2;
Assert(PG_GETARG_INT32(0) == PG_UTF8);
Assert(PG_GETARG_INT32(1) == PG_LATIN1);
Assert(len > 0);
while (len > 0 && (c = *src++))
{
if ((c & 0xe0) == 0xc0)
{
c1 = c & 0x1f;
c2 = *src++ & 0x3f;
*dest = c1 << 6;
*dest++ |= c2;
len -= 2;
}
else if ((c & 0xe0) == 0xe0)
elog(ERROR, "Could not convert UTF-8 to ISO8859-1");
else
{
*dest++ = c;
len--;
}
}
*dest = '\0';
PG_RETURN_INT32(0);
}
#-------------------------------------------------------------------------
#
# $Id: Makefile,v 1.1 2002/07/16 09:25:05 ishii Exp $
#
#-------------------------------------------------------------------------
top_builddir = ../../../../../..
include $(top_builddir)/src/Makefile.global
NAME := utf8_and_johab
include ../proc.mk
/*-------------------------------------------------------------------------
*
* JOHAB <--> UTF-8
*
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/utils/mb/conversion_procs/utf8_and_johab/utf8_and_johab.c,v 1.1 2002/07/16 09:25:06 ishii Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "fmgr.h"
#include "mb/pg_wchar.h"
#include "../../Unicode/johab_to_utf8.map"
#include "../../Unicode/utf8_to_johab.map"
PG_FUNCTION_INFO_V1(johab_to_utf8)
PG_FUNCTION_INFO_V1(utf8_to_johab)
extern Datum johab_to_utf8(PG_FUNCTION_ARGS);
extern Datum utf8_to_johab(PG_FUNCTION_ARGS);
/* ----------
* conv_proc(
* INTEGER, -- source encoding id
* INTEGER, -- destination encoding id
* OPAQUE, -- source string (null terminated C string)
* OPAQUE, -- destination string (null terminated C string)
* INTEGER -- source string length
* ) returns INTEGER; -- dummy. returns nothing, actually.
* ----------
*/
Datum
johab_to_utf8(PG_FUNCTION_ARGS)
{
unsigned char *src = PG_GETARG_CSTRING(2);
unsigned char *dest = PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
Assert(PG_GETARG_INT32(0) == PG_JOHAB);
Assert(PG_GETARG_INT32(1) == PG_UTF8);
Assert(len > 0);
LocalToUtf(src, dest, LUmapJOHAB,
sizeof(LUmapJOHAB) / sizeof(pg_local_to_utf), PG_JOHAB, len);
PG_RETURN_INT32(0);
}
Datum
utf8_to_johab(PG_FUNCTION_ARGS)
{
unsigned char *src = PG_GETARG_CSTRING(2);
unsigned char *dest = PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
Assert(PG_GETARG_INT32(0) == PG_UTF8);
Assert(PG_GETARG_INT32(1) == PG_JOHAB);
Assert(len > 0);
UtfToLocal(src, dest, ULmapJOHAB,
sizeof(ULmapJOHAB) / sizeof(pg_utf_to_local), len);
PG_RETURN_INT32(0);
}
#-------------------------------------------------------------------------
#
# $Id: Makefile,v 1.1 2002/07/16 09:25:06 ishii Exp $
#
#-------------------------------------------------------------------------
top_builddir = ../../../../../..
include $(top_builddir)/src/Makefile.global
NAME := utf8_and_sjis
include ../proc.mk
/*-------------------------------------------------------------------------
*
* SJIS <--> UTF-8
*
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/utils/mb/conversion_procs/utf8_and_sjis/utf8_and_sjis.c,v 1.1 2002/07/16 09:25:06 ishii Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "fmgr.h"
#include "mb/pg_wchar.h"
#include "../../Unicode/sjis_to_utf8.map"
#include "../../Unicode/utf8_to_sjis.map"
PG_FUNCTION_INFO_V1(sjis_to_utf8)
PG_FUNCTION_INFO_V1(utf8_to_sjis)
extern Datum sjis_to_utf8(PG_FUNCTION_ARGS);
extern Datum utf8_to_sjis(PG_FUNCTION_ARGS);
/* ----------
* conv_proc(
* INTEGER, -- source encoding id
* INTEGER, -- destination encoding id
* OPAQUE, -- source string (null terminated C string)
* OPAQUE, -- destination string (null terminated C string)
* INTEGER -- source string length
* ) returns INTEGER; -- dummy. returns nothing, actually.
* ----------
*/
Datum
sjis_to_utf8(PG_FUNCTION_ARGS)
{
unsigned char *src = PG_GETARG_CSTRING(2);
unsigned char *dest = PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
Assert(PG_GETARG_INT32(0) == PG_SJIS);
Assert(PG_GETARG_INT32(1) == PG_UTF8);
Assert(len > 0);
LocalToUtf(src, dest, LUmapSJIS,
sizeof(LUmapSJIS) / sizeof(pg_local_to_utf), PG_SJIS, len);
PG_RETURN_INT32(0);
}
Datum
utf8_to_sjis(PG_FUNCTION_ARGS)
{
unsigned char *src = PG_GETARG_CSTRING(2);
unsigned char *dest = PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
Assert(PG_GETARG_INT32(0) == PG_UTF8);
Assert(PG_GETARG_INT32(1) == PG_SJIS);
Assert(len > 0);
UtfToLocal(src, dest, ULmapSJIS,
sizeof(ULmapSJIS) / sizeof(pg_utf_to_local), len);
PG_RETURN_INT32(0);
}
#-------------------------------------------------------------------------
#
# $Id: Makefile,v 1.1 2002/07/16 09:25:06 ishii Exp $
#
#-------------------------------------------------------------------------
top_builddir = ../../../../../..
include $(top_builddir)/src/Makefile.global
NAME := utf8_and_tcvn
include ../proc.mk
/*-------------------------------------------------------------------------
*
* TCVN <--> UTF-8
*
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/utils/mb/conversion_procs/utf8_and_tcvn/Attic/utf8_and_tcvn.c,v 1.1 2002/07/16 09:25:06 ishii Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "fmgr.h"
#include "mb/pg_wchar.h"
#include "../../Unicode/tcvn_to_utf8.map"
#include "../../Unicode/utf8_to_tcvn.map"
PG_FUNCTION_INFO_V1(tcvn_to_utf8)
PG_FUNCTION_INFO_V1(utf8_to_tcvn)
extern Datum tcvn_to_utf8(PG_FUNCTION_ARGS);
extern Datum utf8_to_tcvn(PG_FUNCTION_ARGS);
/* ----------
* conv_proc(
* INTEGER, -- source encoding id
* INTEGER, -- destination encoding id
* OPAQUE, -- source string (null terminated C string)
* OPAQUE, -- destination string (null terminated C string)
* INTEGER -- source string length
* ) returns INTEGER; -- dummy. returns nothing, actually.
* ----------
*/
Datum
tcvn_to_utf8(PG_FUNCTION_ARGS)
{
unsigned char *src = PG_GETARG_CSTRING(2);
unsigned char *dest = PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
Assert(PG_GETARG_INT32(0) == PG_TCVN);
Assert(PG_GETARG_INT32(1) == PG_UTF8);
Assert(len > 0);
LocalToUtf(src, dest, LUmapTCVN,
sizeof(LUmapTCVN) / sizeof(pg_local_to_utf), PG_TCVN, len);
PG_RETURN_INT32(0);
}
Datum
utf8_to_tcvn(PG_FUNCTION_ARGS)
{
unsigned char *src = PG_GETARG_CSTRING(2);
unsigned char *dest = PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
Assert(PG_GETARG_INT32(0) == PG_UTF8);
Assert(PG_GETARG_INT32(1) == PG_TCVN);
Assert(len > 0);
UtfToLocal(src, dest, ULmapTCVN,
sizeof(ULmapTCVN) / sizeof(pg_utf_to_local), len);
PG_RETURN_INT32(0);
}
#-------------------------------------------------------------------------
#
# $Id: Makefile,v 1.1 2002/07/16 09:25:06 ishii Exp $
#
#-------------------------------------------------------------------------
top_builddir = ../../../../../..
include $(top_builddir)/src/Makefile.global
NAME := utf8_and_uhc
include ../proc.mk
/*-------------------------------------------------------------------------
*
* UHC <--> UTF-8
*
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/utils/mb/conversion_procs/utf8_and_uhc/utf8_and_uhc.c,v 1.1 2002/07/16 09:25:06 ishii Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "fmgr.h"
#include "mb/pg_wchar.h"
#include "../../Unicode/uhc_to_utf8.map"
#include "../../Unicode/utf8_to_uhc.map"
PG_FUNCTION_INFO_V1(uhc_to_utf8)
PG_FUNCTION_INFO_V1(utf8_to_uhc)
extern Datum uhc_to_utf8(PG_FUNCTION_ARGS);
extern Datum utf8_to_uhc(PG_FUNCTION_ARGS);
/* ----------
* conv_proc(
* INTEGER, -- source encoding id
* INTEGER, -- destination encoding id
* OPAQUE, -- source string (null terminated C string)
* OPAQUE, -- destination string (null terminated C string)
* INTEGER -- source string length
* ) returns INTEGER; -- dummy. returns nothing, actually.
* ----------
*/
Datum
uhc_to_utf8(PG_FUNCTION_ARGS)
{
unsigned char *src = PG_GETARG_CSTRING(2);
unsigned char *dest = PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
Assert(PG_GETARG_INT32(0) == PG_UHC);
Assert(PG_GETARG_INT32(1) == PG_UTF8);
Assert(len > 0);
LocalToUtf(src, dest, LUmapUHC,
sizeof(LUmapUHC) / sizeof(pg_local_to_utf), PG_UHC, len);
PG_RETURN_INT32(0);
}
Datum
utf8_to_uhc(PG_FUNCTION_ARGS)
{
unsigned char *src = PG_GETARG_CSTRING(2);
unsigned char *dest = PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
Assert(PG_GETARG_INT32(0) == PG_UTF8);
Assert(PG_GETARG_INT32(1) == PG_UHC);
Assert(len > 0);
UtfToLocal(src, dest, ULmapUHC,
sizeof(ULmapUHC) / sizeof(pg_utf_to_local), len);
PG_RETURN_INT32(0);
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment