Commit 1032445e authored by Tatsuo Ishii's avatar Tatsuo Ishii

TODO item:

* Make n of CHAR(n)/VARCHAR(n) the number of letters, not bytes
parent b08e86d5
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/utils/adt/varchar.c,v 1.80 2001/06/09 23:21:55 petere Exp $ * $Header: /cvsroot/pgsql/src/backend/utils/adt/varchar.c,v 1.81 2001/07/15 11:07:37 ishii Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -73,26 +73,48 @@ bpcharin(PG_FUNCTION_ARGS) ...@@ -73,26 +73,48 @@ bpcharin(PG_FUNCTION_ARGS)
char *r; char *r;
size_t len, maxlen; size_t len, maxlen;
int i; int i;
#ifdef MULTIBYTE
int charlen; /* number of charcters in the input string */
#endif
len = strlen(s); len = strlen(s);
#ifdef MULTIBYTE
charlen = pg_mbstrlen(s);
#endif
/* If typmod is -1 (or invalid), use the actual string length */ /* If typmod is -1 (or invalid), use the actual string length */
if (atttypmod < (int32) VARHDRSZ) if (atttypmod < (int32) VARHDRSZ)
#ifdef MULTIBYTE
maxlen = charlen;
#else
maxlen = len; maxlen = len;
#endif
else else
maxlen = atttypmod - VARHDRSZ; maxlen = atttypmod - VARHDRSZ;
#ifdef MULTIBYTE
if (charlen > maxlen)
#else
if (len > maxlen) if (len > maxlen)
#endif
{ {
/* Verify that extra characters are spaces, and clip them off */ /* Verify that extra characters are spaces, and clip them off */
#ifdef MULTIBYTE #ifdef MULTIBYTE
size_t mbmaxlen = pg_mbcliplen(s, len, maxlen); size_t mbmaxlen = pg_mbcharcliplen(s, len, maxlen);
/*
* at this point, len is the actual BYTE length of the
* input string, maxlen is the max number of
* CHARACTERS allowed for this bpchar type.
*/
if (strspn(s + mbmaxlen, " ") == len - mbmaxlen) if (strspn(s + mbmaxlen, " ") == len - mbmaxlen)
len = mbmaxlen; len = mbmaxlen;
else else
elog(ERROR, "value too long for type character(%d)", maxlen); elog(ERROR, "value too long for type character(%d)", maxlen);
Assert(len <= maxlen); /*
* XXX: at this point, maxlen is the necessary byte
* length, not the number of CHARACTERS!
*/
maxlen = len;
#else #else
if (strspn(s + maxlen, " ") == len - maxlen) if (strspn(s + maxlen, " ") == len - maxlen)
len = maxlen; len = maxlen;
...@@ -100,6 +122,16 @@ bpcharin(PG_FUNCTION_ARGS) ...@@ -100,6 +122,16 @@ bpcharin(PG_FUNCTION_ARGS)
elog(ERROR, "value too long for type character(%d)", maxlen); elog(ERROR, "value too long for type character(%d)", maxlen);
#endif #endif
} }
#ifdef MULTIBYTE
else
{
/*
* XXX: at this point, maxlen is the necessary byte
* length, not the number of CHARACTERS!
*/
maxlen = len + (maxlen - charlen);
}
#endif
result = palloc(maxlen + VARHDRSZ); result = palloc(maxlen + VARHDRSZ);
VARATT_SIZEP(result) = maxlen + VARHDRSZ; VARATT_SIZEP(result) = maxlen + VARHDRSZ;
...@@ -158,19 +190,29 @@ bpchar(PG_FUNCTION_ARGS) ...@@ -158,19 +190,29 @@ bpchar(PG_FUNCTION_ARGS)
char *r; char *r;
char *s; char *s;
int i; int i;
#ifdef MULTIBYTE
int charlen; /* number of charcters in the input string
+ VARHDRSZ*/
#endif
len = VARSIZE(source); len = VARSIZE(source);
#ifdef MULTIBYTE
charlen = pg_mbstrlen_with_len(VARDATA(source), len - VARHDRSZ) + VARHDRSZ;
#endif
/* No work if typmod is invalid or supplied data matches it already */ /* No work if typmod is invalid or supplied data matches it already */
if (maxlen < (int32) VARHDRSZ || len == maxlen) if (maxlen < (int32) VARHDRSZ || len == maxlen)
PG_RETURN_BPCHAR_P(source); PG_RETURN_BPCHAR_P(source);
#ifdef MULTIBYTE
if (charlen > maxlen)
#else
if (len > maxlen) if (len > maxlen)
#endif
{ {
/* Verify that extra characters are spaces, and clip them off */ /* Verify that extra characters are spaces, and clip them off */
#ifdef MULTIBYTE #ifdef MULTIBYTE
size_t maxmblen; size_t maxmblen;
maxmblen = pg_mbcliplen(VARDATA(source), len - VARHDRSZ, maxmblen = pg_mbcharcliplen(VARDATA(source), len - VARHDRSZ,
maxlen - VARHDRSZ) + VARHDRSZ; maxlen - VARHDRSZ) + VARHDRSZ;
for (i = maxmblen - VARHDRSZ; i < len - VARHDRSZ; i++) for (i = maxmblen - VARHDRSZ; i < len - VARHDRSZ; i++)
...@@ -179,7 +221,11 @@ bpchar(PG_FUNCTION_ARGS) ...@@ -179,7 +221,11 @@ bpchar(PG_FUNCTION_ARGS)
maxlen - VARHDRSZ); maxlen - VARHDRSZ);
len = maxmblen; len = maxmblen;
Assert(len <= maxlen); /*
* XXX: at this point, maxlen is the necessary byte
* length+VARHDRSZ, not the number of CHARACTERS!
*/
maxlen = len;
#else #else
for (i = maxlen - VARHDRSZ; i < len - VARHDRSZ; i++) for (i = maxlen - VARHDRSZ; i < len - VARHDRSZ; i++)
if (*(VARDATA(source) + i) != ' ') if (*(VARDATA(source) + i) != ' ')
...@@ -189,6 +235,16 @@ bpchar(PG_FUNCTION_ARGS) ...@@ -189,6 +235,16 @@ bpchar(PG_FUNCTION_ARGS)
len = maxlen; len = maxlen;
#endif #endif
} }
#ifdef MULTIBYTE
else
{
/*
* XXX: at this point, maxlen is the necessary byte
* length+VARHDRSZ, not the number of CHARACTERS!
*/
maxlen = len + (maxlen - charlen);
}
#endif
s = VARDATA(source); s = VARDATA(source);
...@@ -333,9 +389,12 @@ name_bpchar(PG_FUNCTION_ARGS) ...@@ -333,9 +389,12 @@ name_bpchar(PG_FUNCTION_ARGS)
* Convert a C string to VARCHAR internal representation. atttypmod * Convert a C string to VARCHAR internal representation. atttypmod
* is the declared length of the type plus VARHDRSZ. * is the declared length of the type plus VARHDRSZ.
* *
* If the C string is too long, raise an error, unless the extra * Note that if MULTIBYTE is enabled, atttypmod is regarded as the
* characters are spaces, in which case they're truncated. (per SQL) * number of characters, rather than number of bytes.
*/ *
* If the C string is too long,
* raise an error, unless the extra characters are spaces, in which
* case they're truncated. (per SQL) */
Datum Datum
varcharin(PG_FUNCTION_ARGS) varcharin(PG_FUNCTION_ARGS)
{ {
...@@ -354,7 +413,7 @@ varcharin(PG_FUNCTION_ARGS) ...@@ -354,7 +413,7 @@ varcharin(PG_FUNCTION_ARGS)
{ {
/* Verify that extra characters are spaces, and clip them off */ /* Verify that extra characters are spaces, and clip them off */
#ifdef MULTIBYTE #ifdef MULTIBYTE
size_t mbmaxlen = pg_mbcliplen(s, len, maxlen); size_t mbmaxlen = pg_mbcharcliplen(s, len, maxlen);
if (strspn(s + mbmaxlen, " ") == len - mbmaxlen) if (strspn(s + mbmaxlen, " ") == len - mbmaxlen)
len = mbmaxlen; len = mbmaxlen;
...@@ -428,7 +487,7 @@ varchar(PG_FUNCTION_ARGS) ...@@ -428,7 +487,7 @@ varchar(PG_FUNCTION_ARGS)
size_t maxmblen; size_t maxmblen;
/* truncate multi-byte string preserving multi-byte boundary */ /* truncate multi-byte string preserving multi-byte boundary */
maxmblen = pg_mbcliplen(VARDATA(source), len - VARHDRSZ, maxmblen = pg_mbcharcliplen(VARDATA(source), len - VARHDRSZ,
maxlen - VARHDRSZ) + VARHDRSZ; maxlen - VARHDRSZ) + VARHDRSZ;
for (i = maxmblen - VARHDRSZ; i < len - VARHDRSZ; i++) for (i = maxmblen - VARHDRSZ; i < len - VARHDRSZ; i++)
...@@ -515,22 +574,9 @@ bpcharlen(PG_FUNCTION_ARGS) ...@@ -515,22 +574,9 @@ bpcharlen(PG_FUNCTION_ARGS)
BpChar *arg = PG_GETARG_BPCHAR_P(0); BpChar *arg = PG_GETARG_BPCHAR_P(0);
#ifdef MULTIBYTE #ifdef MULTIBYTE
unsigned char *s; PG_RETURN_INT32(
int len, pg_mbstrlen_with_len(VARDATA(arg), VARSIZE(arg) - VARHDRSZ)
l, );
wl;
l = VARSIZE(arg) - VARHDRSZ;
len = 0;
s = VARDATA(arg);
while (l > 0)
{
wl = pg_mblen(s);
l -= wl;
s += wl;
len++;
}
PG_RETURN_INT32(len);
#else #else
PG_RETURN_INT32(VARSIZE(arg) - VARHDRSZ); PG_RETURN_INT32(VARSIZE(arg) - VARHDRSZ);
#endif #endif
...@@ -736,22 +782,9 @@ varcharlen(PG_FUNCTION_ARGS) ...@@ -736,22 +782,9 @@ varcharlen(PG_FUNCTION_ARGS)
VarChar *arg = PG_GETARG_VARCHAR_P(0); VarChar *arg = PG_GETARG_VARCHAR_P(0);
#ifdef MULTIBYTE #ifdef MULTIBYTE
unsigned char *s; PG_RETURN_INT32(
int len, pg_mbstrlen_with_len(VARDATA(arg), VARSIZE(arg) - VARHDRSZ)
l, );
wl;
len = 0;
s = VARDATA(arg);
l = VARSIZE(arg) - VARHDRSZ;
while (l > 0)
{
wl = pg_mblen(s);
l -= wl;
s += wl;
len++;
}
PG_RETURN_INT32(len);
#else #else
PG_RETURN_INT32(VARSIZE(arg) - VARHDRSZ); PG_RETURN_INT32(VARSIZE(arg) - VARHDRSZ);
#endif #endif
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
* client encoding and server internal encoding. * client encoding and server internal encoding.
* (currently mule internal code (mic) is used) * (currently mule internal code (mic) is used)
* Tatsuo Ishii * Tatsuo Ishii
* $Id: mbutils.c,v 1.17 2001/04/16 02:42:01 tgl Exp $ * $Id: mbutils.c,v 1.18 2001/07/15 11:07:36 ishii Exp $
*/ */
#include "postgres.h" #include "postgres.h"
...@@ -241,9 +241,9 @@ pg_mbstrlen_with_len(const unsigned char *mbstr, int limit) ...@@ -241,9 +241,9 @@ pg_mbstrlen_with_len(const unsigned char *mbstr, int limit)
} }
/* /*
* returns the length of a multi-byte string * returns the byte length of a multi-byte string
* (not necessarily NULL terminated) * (not necessarily NULL terminated)
* that is not longer than limit. * that is no longer than limit.
* this function does not break multi-byte word boundary. * this function does not break multi-byte word boundary.
*/ */
int int
...@@ -267,8 +267,30 @@ pg_mbcliplen(const unsigned char *mbstr, int len, int limit) ...@@ -267,8 +267,30 @@ pg_mbcliplen(const unsigned char *mbstr, int len, int limit)
} }
/* /*
* functions for utils/init * Similar to pg_mbcliplen but the limit parameter specifies the
*/ * character length, not the byte length. */
int
pg_mbcharcliplen(const unsigned char *mbstr, int len, int limit)
{
int clen = 0;
int nch = 0;
int l;
while (len > 0 && *mbstr)
{
l = pg_mblen(mbstr);
nch++;
if (nch > limit)
break;
clen += l;
len -= l;
mbstr += l;
}
return (clen);
}
/*
* functions for utils/init */
static int DatabaseEncoding = MULTIBYTE; static int DatabaseEncoding = MULTIBYTE;
void void
......
/* $Id: pg_wchar.h,v 1.26 2001/05/03 21:38:44 momjian Exp $ */ /* $Id: pg_wchar.h,v 1.27 2001/07/15 11:07:37 ishii Exp $ */
#ifndef PG_WCHAR_H #ifndef PG_WCHAR_H
#define PG_WCHAR_H #define PG_WCHAR_H
...@@ -136,6 +136,7 @@ extern int pg_mic_mblen(const unsigned char *); ...@@ -136,6 +136,7 @@ extern int pg_mic_mblen(const unsigned char *);
extern int pg_mbstrlen(const unsigned char *); extern int pg_mbstrlen(const unsigned char *);
extern int pg_mbstrlen_with_len(const unsigned char *, int); extern int pg_mbstrlen_with_len(const unsigned char *, int);
extern int pg_mbcliplen(const unsigned char *, int, int); extern int pg_mbcliplen(const unsigned char *, int, int);
extern int pg_mbcharcliplen(const unsigned char *, int, int);
extern pg_encoding_conv_tbl *pg_get_encent_by_encoding(int); extern pg_encoding_conv_tbl *pg_get_encent_by_encoding(int);
extern int pg_set_client_encoding(int); extern int pg_set_client_encoding(int);
extern int pg_get_client_encoding(void); extern int pg_get_client_encoding(void);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment