pg_wchar.h 11.1 KB
Newer Older
Bruce Momjian's avatar
Bruce Momjian committed
1
/* $Id: pg_wchar.h,v 1.38 2002/03/05 05:52:50 momjian Exp $ */
Marc G. Fournier's avatar
Marc G. Fournier committed
2 3 4 5 6 7

#ifndef PG_WCHAR_H
#define PG_WCHAR_H

#include <sys/types.h>

8 9 10
#ifdef FRONTEND
#undef palloc
#define palloc malloc
11
#undef pfree
12 13 14
#define pfree free
#endif

Tatsuo Ishii's avatar
Tatsuo Ishii committed
15
/*
16
 * The pg_wchar
Tatsuo Ishii's avatar
Tatsuo Ishii committed
17
 */
Marc G. Fournier's avatar
Marc G. Fournier committed
18
#ifdef MULTIBYTE
Marc G. Fournier's avatar
Marc G. Fournier committed
19
typedef unsigned int pg_wchar;
20

Marc G. Fournier's avatar
Marc G. Fournier committed
21 22 23 24 25 26 27
#else
#define pg_wchar char
#endif

/*
 * various definitions for EUC
 */
28 29
#define SS2 0x8e				/* single shift 2 (JIS0201) */
#define SS3 0x8f				/* single shift 3 (JIS0212) */
Marc G. Fournier's avatar
Marc G. Fournier committed
30 31

/*
32
 * Leading byte types or leading prefix byte for MULE internal code.
33
 * See http://www.xemacs.org for more details.	(there is a doc titled
34 35 36 37 38 39 40 41 42
 * "XEmacs Internals Manual", "MULE Character Sets and Encodings"
 * section.
 */
/*
 * Is a leading byte for "official" single byte encodings?
 */
#define IS_LC1(c)	((unsigned char)(c) >= 0x81 && (unsigned char)(c) <= 0x8d)
/*
 * Is a prefix byte for "private" single byte encodings?
Marc G. Fournier's avatar
Marc G. Fournier committed
43 44
 */
#define IS_LCPRV1(c)	((unsigned char)(c) == 0x9a || (unsigned char)(c) == 0x9b)
45 46 47
/*
 * Is a leading byte for "official" multi byte encodings?
 */
Marc G. Fournier's avatar
Marc G. Fournier committed
48
#define IS_LC2(c)	((unsigned char)(c) >= 0x90 && (unsigned char)(c) <= 0x99)
49 50 51
/*
 * Is a prefix byte for "private" multi byte encodings?
 */
Marc G. Fournier's avatar
Marc G. Fournier committed
52 53
#define IS_LCPRV2(c)	((unsigned char)(c) == 0x9c || (unsigned char)(c) == 0x9d)

54
/*----------------------------------------------------
55
 * leading characters
56 57 58 59 60
 *----------------------------------------------------
 */

/*
 * Official single byte encodings (0x81-0x8e)
61
 */
62 63 64 65
#define LC_ISO8859_1	0x81	/* ISO8859 Latin 1 */
#define LC_ISO8859_2	0x82	/* ISO8859 Latin 2 */
#define LC_ISO8859_3	0x83	/* ISO8859 Latin 3 */
#define LC_ISO8859_4	0x84	/* ISO8859 Latin 4 */
66 67 68 69
#define LC_TIS620	0x85		/* Thai (not supported yet) */
#define LC_ISO8859_7	0x86	/* Greek (not supported yet) */
#define LC_ISO8859_6	0x87	/* Arabic (not supported yet) */
#define LC_ISO8859_8	0x88	/* Hebrew (not supported yet) */
70
#define LC_JISX0201K	0x89	/* Japanese 1 byte kana */
71
#define LC_JISX0201R	0x8a	/* Japanese 1 byte Roman */
72 73 74 75
/* Note that 0x8b seems to be unused in as of Emacs 20.7.
 * However, there might be a chance that 0x8b could be used
 * in later version of Emacs.
 */
76 77
#define LC_KOI8_R	0x8b		/* Cyrillic KOI8-R */
#define LC_KOI8_U	0x8b		/* Cyrillic KOI8-U */
78 79 80 81 82 83 84
#define LC_ISO8859_5	0x8c	/* ISO8859 Cyrillic */
#define LC_ISO8859_9	0x8d	/* ISO8859 Latin 5 (not supported yet) */
/* #define FREE		0x8e	free (unused) */

/*
 * Unused
 */
85
#define CONTROL_1	0x8f		/* control characters (unused) */
86 87 88 89 90

/*
 * Official multibyte byte encodings (0x90-0x99)
 * 0x9a-0x9d are free. 0x9e and 0x9f are reserved.
 */
91
#define LC_JISX0208_1978	0x90	/* Japanese Kanji, old JIS (not supported) */
92
/* #define FREE		0x90	free (unused) */
93
#define LC_GB2312_80	0x91	/* Chinese */
94 95 96
#define LC_JISX0208 0x92		/* Japanese Kanji (JIS X 0208) */
#define LC_KS5601	0x93		/* Korean */
#define LC_JISX0212 0x94		/* Japanese Kanji (JIS X 0212) */
97 98
#define LC_CNS11643_1	0x95	/* CNS 11643-1992 Plane 1 */
#define LC_CNS11643_2	0x96	/* CNS 11643-1992 Plane 2 */
99
/* #define FREE		0x97	free (unused) */
100 101 102 103
#define LC_BIG5_1	0x98		/* Plane 1 Chinese traditional (not
								 * supported) */
#define LC_BIG5_2	0x99		/* Plane 1 Chinese traditional (not
								 * supported) */
104 105 106 107

/*
 * Private single byte encodings (0xa0-0xef)
 */
108 109 110 111 112 113 114 115 116
#define LC_SISHENG	0xa0		/* Chinese SiSheng characters for
								 * PinYin/ZhuYin (not supported) */
#define LC_IPA		0xa1		/* IPA (International Phonetic
								 * Association) (not supported) */
#define LC_VISCII_LOWER 0xa2	/* Vietnamese VISCII1.1 lower-case (not
								 * supported) */
#define LC_VISCII_UPPER 0xa3	/* Vietnamese VISCII1.1 upper-case (not
								 * supported) */
#define LC_ARABIC_DIGIT 0xa4	/* Arabic digit (not supported) */
117
#define LC_ARABIC_1_COLUMN	0xa5	/* Arabic 1-column (not supported) */
118 119 120 121 122
#define LC_ASCII_RIGHT_TO_LEFT	0xa6	/* ASCII (left half of ISO8859-1)
										 * with right-to-left direction
										 * (not supported) */
#define LC_LAO		0xa7		/* Lao characters (ISO10646 0E80..0EDF)
								 * (not supported) */
123
#define LC_ARABIC_2_COLUMN	0xa8	/* Arabic 1-column (not supported) */
124 125 126 127

/*
 * Private multi byte encodings (0xf0-0xff)
 */
128 129
#define LC_INDIAN_1_COLUMN	0xf0/* Indian charset for 1-column width
								 * glypps (not supported) */
130
#define LC_TIBETAN_1_COLUMN 0xf1	/* Tibetan 1 column glyph (not supported) */
131
#define LC_ETHIOPIC 0xf5		/* Ethiopic characters (not supported) */
132 133 134 135 136
#define LC_CNS11643_3	0xf6	/* CNS 11643-1992 Plane 3 */
#define LC_CNS11643_4	0xf7	/* CNS 11643-1992 Plane 4 */
#define LC_CNS11643_5	0xf8	/* CNS 11643-1992 Plane 5 */
#define LC_CNS11643_6	0xf9	/* CNS 11643-1992 Plane 6 */
#define LC_CNS11643_7	0xfa	/* CNS 11643-1992 Plane 7 */
137 138 139
#define LC_INDIAN_2_COLUMN	0xfb/* Indian charset for 2-column width
								 * glypps (not supported) */
#define LC_TIBETAN	0xfc		/* Tibetan (not supported) */
140 141 142
/* #define FREE		0xfd	free (unused) */
/* #define FREE		0xfe	free (unused) */
/* #define FREE		0xff	free (unused) */
143

Tatsuo Ishii's avatar
Tatsuo Ishii committed
144 145 146
/*
 * Encoding numeral identificators
 *
147 148
 * WARNING: the order of this table must be same as order
 *			in the pg_enconv[] (mb/conv.c) and pg_enc2name[] (mb/encnames.c) array!
Tatsuo Ishii's avatar
Tatsuo Ishii committed
149
 *
150 151
 *			If you add some encoding don'y forget check
 *			PG_ENCODING_[BE|FE]_LAST macros.
Tatsuo Ishii's avatar
Tatsuo Ishii committed
152
 *
153
 *		The PG_SQL_ASCII is default encoding and must be = 0.
Tatsuo Ishii's avatar
Tatsuo Ishii committed
154 155 156
 */
typedef enum pg_enc
{
157 158 159 160 161
	PG_SQL_ASCII = 0,			/* SQL/ASCII */
	PG_EUC_JP,					/* EUC for Japanese */
	PG_EUC_CN,					/* EUC for Chinese */
	PG_EUC_KR,					/* EUC for Korean */
	PG_EUC_TW,					/* EUC for Taiwan */
Bruce Momjian's avatar
Bruce Momjian committed
162
	PG_JOHAB,					/* EUC for Korean JOHAB */
163 164 165 166 167 168 169 170 171 172 173 174
	PG_UTF8,					/* Unicode UTF-8 */
	PG_MULE_INTERNAL,			/* Mule internal code */
	PG_LATIN1,					/* ISO-8859-1 Latin 1 */
	PG_LATIN2,					/* ISO-8859-2 Latin 2 */
	PG_LATIN3,					/* ISO-8859-3 Latin 3 */
	PG_LATIN4,					/* ISO-8859-4 Latin 4 */
	PG_LATIN5,					/* ISO-8859-9 Latin 5 */
	PG_LATIN6,					/* ISO-8859-10 Latin6 */
	PG_LATIN7,					/* ISO-8859-13 Latin7 */
	PG_LATIN8,					/* ISO-8859-14 Latin8 */
	PG_LATIN9,					/* ISO-8859-15 Latin9 */
	PG_LATIN10,					/* ISO-8859-16 Latin10 */
Bruce Momjian's avatar
Bruce Momjian committed
175 176 177
	PG_WIN1256,					/* windows-1256 */
	PG_TCVN,					/* TCVN (Windows-1258) */
	PG_WIN874,					/* windows-874 */
178 179 180 181 182 183 184
	PG_KOI8R,					/* KOI8-R */
	PG_WIN1251,					/* windows-1251 (was: WIN) */
	PG_ALT,						/* (MS-DOS CP866) */
	PG_ISO_8859_5,				/* ISO-8859-5 */
	PG_ISO_8859_6,				/* ISO-8859-6 */
	PG_ISO_8859_7,				/* ISO-8859-7 */
	PG_ISO_8859_8,				/* ISO-8859-8 */
185

Tatsuo Ishii's avatar
Tatsuo Ishii committed
186
	/* followings are for client encoding only */
Bruce Momjian's avatar
Bruce Momjian committed
187 188 189 190
	PG_SJIS,					/* Shift JIS (Winindows-932) */
	PG_BIG5,					/* Big5 (Windows-950) */
	PG_GBK,					/* GBK (Windows-936) */
	PG_UHC,					/* UHC (Windows-949) */
191
	PG_WIN1250,					/* windows-1250 */
Tatsuo Ishii's avatar
Tatsuo Ishii committed
192

193
	_PG_LAST_ENCODING_			/* mark only */
Tatsuo Ishii's avatar
Tatsuo Ishii committed
194 195 196

} pg_enc;

197
#define PG_ENCODING_BE_LAST PG_ISO_8859_8
Bruce Momjian's avatar
Bruce Momjian committed
198
#define PG_ENCODING_FE_LAST PG_WIN1256
Tatsuo Ishii's avatar
Tatsuo Ishii committed
199 200


Marc G. Fournier's avatar
Marc G. Fournier committed
201
#ifdef MULTIBYTE
Tatsuo Ishii's avatar
Tatsuo Ishii committed
202 203

/*
204
 * Please use these tests before access to pg_encconv_tbl[]
Tatsuo Ishii's avatar
Tatsuo Ishii committed
205 206 207
 * or to other places...
 */
#define PG_VALID_BE_ENCODING(_enc) \
208 209
		((_enc) >= 0 && (_enc) <= PG_ENCODING_BE_LAST)

Tatsuo Ishii's avatar
Tatsuo Ishii committed
210 211 212 213 214 215
#define PG_ENCODING_IS_CLIEN_ONLY(_enc) \
		(((_enc) > PG_ENCODING_BE_LAST && (_enc) <= PG_ENCODING_FE_LAST)

#define PG_VALID_ENCODING(_enc) \
		((_enc) >= 0 && (_enc) < _PG_LAST_ENCODING_)

216
/* On FE are possible all encodings
Tatsuo Ishii's avatar
Tatsuo Ishii committed
217
 */
218
#define PG_VALID_FE_ENCODING(_enc)	PG_VALID_ENCODING(_enc)
Tatsuo Ishii's avatar
Tatsuo Ishii committed
219

220
/*
Tatsuo Ishii's avatar
Tatsuo Ishii committed
221 222 223 224
 * Encoding names with all aliases
 */
typedef struct pg_encname
{
225 226
	char	   *name;
	pg_enc		encoding;
Tatsuo Ishii's avatar
Tatsuo Ishii committed
227 228
} pg_encname;

229 230
extern pg_encname pg_encname_tbl[];
extern unsigned int pg_encname_tbl_sz;
Tatsuo Ishii's avatar
Tatsuo Ishii committed
231 232 233 234 235 236 237 238

/*
 * Careful:
 *
 * if (PG_VALID_ENCODING(encoding))
 *		pg_enc2name_tbl[ encoding ];
 */
typedef struct pg_enc2name
239
{
240 241
	char	   *name;
	pg_enc		encoding;
Tatsuo Ishii's avatar
Tatsuo Ishii committed
242 243
} pg_enc2name;

244
extern pg_enc2name pg_enc2name_tbl[];
Tatsuo Ishii's avatar
Tatsuo Ishii committed
245

246
extern pg_encname *pg_char_to_encname_struct(const char *name);
247

248 249
extern int	pg_char_to_encoding(const char *s);
extern const char *pg_encoding_to_char(int encoding);
Tatsuo Ishii's avatar
Tatsuo Ishii committed
250

251 252 253
typedef void (*to_mic_converter) (unsigned char *l, unsigned char *p, int len);
typedef void (*from_mic_converter) (unsigned char *mic, unsigned char *p, int len);

Tatsuo Ishii's avatar
Tatsuo Ishii committed
254 255 256 257 258 259 260 261 262 263
/*
 * The backend encoding conversion routines
 * Careful:
 *
 *	if (PG_VALID_ENCODING(enc))
 *		pg_encconv_tbl[ enc ]->foo
 */
#ifndef FRONTEND
typedef struct pg_enconv
{
264 265
	pg_enc		encoding;		/* encoding identifier */
	to_mic_converter to_mic;	/* client encoding to MIC */
266 267
	from_mic_converter from_mic;	/* MIC to client encoding */
	to_mic_converter to_unicode;	/* client encoding to UTF-8 */
268
	from_mic_converter from_unicode;	/* UTF-8 to client encoding */
Tatsuo Ishii's avatar
Tatsuo Ishii committed
269 270 271 272
} pg_enconv;

extern pg_enconv pg_enconv_tbl[];
extern pg_enconv *pg_get_enconv_by_encoding(int encoding);
273
#endif   /* FRONTEND */
Tatsuo Ishii's avatar
Tatsuo Ishii committed
274 275 276 277

/*
 * pg_wchar stuff
 */
278
typedef int (*mb2wchar_with_len_converter) (const unsigned char *from,
279 280
														pg_wchar *to,
														int len);
281 282
typedef int (*mblen_converter) (const unsigned char *mbstr);

283 284
typedef struct
{
285 286
	mb2wchar_with_len_converter mb2wchar_with_len;		/* convert a multi-byte
														 * string to a wchar */
287
	mblen_converter mblen;		/* returns the length of a multi-byte char */
288
	int			maxmblen;		/* max bytes for a char in this charset */
Tatsuo Ishii's avatar
Tatsuo Ishii committed
289
} pg_wchar_tbl;
290 291 292

extern pg_wchar_tbl pg_wchar_table[];

293 294 295
/*
 * UTF-8 to local code conversion map
 */
296 297
typedef struct
{
298 299
	unsigned int utf;			/* UTF-8 */
	unsigned int code;			/* local code */
Tatsuo Ishii's avatar
Tatsuo Ishii committed
300
} pg_utf_to_local;
301

302 303 304
/*
 * local code to UTF-8 conversion map
 */
305 306
typedef struct
{
307 308
	unsigned int code;			/* local code */
	unsigned int utf;			/* UTF-8 */
Tatsuo Ishii's avatar
Tatsuo Ishii committed
309
} pg_local_to_utf;
310

311 312
extern int	pg_mb2wchar(const unsigned char *, pg_wchar *);
extern int	pg_mb2wchar_with_len(const unsigned char *, pg_wchar *, int);
313 314 315
extern int	pg_char_and_wchar_strcmp(const char *, const pg_wchar *);
extern int	pg_wchar_strncmp(const pg_wchar *, const pg_wchar *, size_t);
extern int	pg_char_and_wchar_strncmp(const char *, const pg_wchar *, size_t);
316
extern size_t pg_wchar_strlen(const pg_wchar *);
317 318 319 320 321 322
extern int	pg_mblen(const unsigned char *);
extern int	pg_encoding_mblen(int, const unsigned char *);
extern int	pg_mule_mblen(const unsigned char *);
extern int	pg_mic_mblen(const unsigned char *);
extern int	pg_mbstrlen(const unsigned char *);
extern int	pg_mbstrlen_with_len(const unsigned char *, int);
Bruce Momjian's avatar
Bruce Momjian committed
323
extern int	pg_mbcliplen(const unsigned char *, int, int);
Tatsuo Ishii's avatar
Tatsuo Ishii committed
324
extern int	pg_mbcharcliplen(const unsigned char *, int, int);
325
extern int	pg_encoding_max_length(int);
326
extern int	pg_database_encoding_max_length(void);
Tatsuo Ishii's avatar
Tatsuo Ishii committed
327

328 329 330
extern int	pg_set_client_encoding(int);
extern int	pg_get_client_encoding(void);
extern const char *pg_get_client_encoding_name(void);
Tatsuo Ishii's avatar
Tatsuo Ishii committed
331

332 333 334
extern void SetDatabaseEncoding(int);
extern int	GetDatabaseEncoding(void);
extern const char *GetDatabaseEncodingName(void);
Tatsuo Ishii's avatar
Tatsuo Ishii committed
335 336 337 338

extern int	pg_valid_client_encoding(const char *name);
extern int	pg_valid_server_encoding(const char *name);

339
extern int	pg_utf_mblen(const unsigned char *);
340 341 342
extern int pg_find_encoding_converters(int src, int dest,
							to_mic_converter *src_to_mic,
							from_mic_converter *dest_from_mic);
343
extern unsigned char *pg_do_encoding_conversion(unsigned char *src, int len,
344 345
						  to_mic_converter src_to_mic,
						  from_mic_converter dest_from_mic);
Tatsuo Ishii's avatar
Tatsuo Ishii committed
346

Tatsuo Ishii's avatar
Tatsuo Ishii committed
347 348
extern unsigned char *pg_client_to_server(unsigned char *, int);
extern unsigned char *pg_server_to_client(unsigned char *, int);
Tatsuo Ishii's avatar
Tatsuo Ishii committed
349

350 351
extern unsigned short BIG5toCNS(unsigned short, unsigned char *);
extern unsigned short CNStoBIG5(unsigned short, unsigned char);
Marc G. Fournier's avatar
Marc G. Fournier committed
352

353
char	   *pg_verifymbstr(const unsigned char *, int);
354
#endif   /* MULTIBYTE */
355

356
#endif   /* PG_WCHAR_H */