Commit 2d8314bd authored by Tom Lane's avatar Tom Lane

Rename utf2ucs() to utf8_to_unicode(), and export it so it can be used

elsewhere.

Similarly rename the version in mbprint.c, not because this affects anything
but just to keep the two copies in exact sync.  There was some discussion of
having only one copy in src/port/ instead, but this function is so small
and unlikely to change that that seems like overkill.

Slightly editorialized version of a patch by Joseph Adams.  (The bug-fix
aspect of his patch was applied separately, and back-patched.)
parent b5565bca
/* /*
* conversion functions between pg_wchar and multibyte streams. * conversion functions between pg_wchar and multibyte streams.
* Tatsuo Ishii * Tatsuo Ishii
* $PostgreSQL: pgsql/src/backend/utils/mb/wchar.c,v 1.74 2010/01/04 20:38:31 adunstan Exp $ * $PostgreSQL: pgsql/src/backend/utils/mb/wchar.c,v 1.75 2010/08/18 19:54:01 tgl Exp $
* *
*/ */
/* can be used in either frontend or backend */ /* can be used in either frontend or backend */
...@@ -462,7 +462,7 @@ unicode_to_utf8(pg_wchar c, unsigned char *utf8string) ...@@ -462,7 +462,7 @@ unicode_to_utf8(pg_wchar c, unsigned char *utf8string)
* We return "1" for any leading byte that is either flat-out illegal or * We return "1" for any leading byte that is either flat-out illegal or
* indicates a length larger than we support. * indicates a length larger than we support.
* *
* pg_utf2wchar_with_len(), utf2ucs(), pg_utf8_islegal(), and perhaps * pg_utf2wchar_with_len(), utf8_to_unicode(), pg_utf8_islegal(), and perhaps
* other places would need to be fixed to change this. * other places would need to be fixed to change this.
*/ */
int int
...@@ -632,13 +632,15 @@ ucs_wcwidth(pg_wchar ucs) ...@@ -632,13 +632,15 @@ ucs_wcwidth(pg_wchar ucs)
(ucs >= 0x20000 && ucs <= 0x2ffff))); (ucs >= 0x20000 && ucs <= 0x2ffff)));
} }
static pg_wchar /*
utf2ucs(const unsigned char *c) * Convert a UTF-8 character to a Unicode code point.
* This is a one-character version of pg_utf2wchar_with_len.
*
* No error checks here, c must point to a long-enough string.
*/
pg_wchar
utf8_to_unicode(const unsigned char *c)
{ {
/*
* one char version of pg_utf2wchar_with_len. no control here, c must
* point to a large enough string
*/
if ((*c & 0x80) == 0) if ((*c & 0x80) == 0)
return (pg_wchar) c[0]; return (pg_wchar) c[0];
else if ((*c & 0xe0) == 0xc0) else if ((*c & 0xe0) == 0xc0)
...@@ -661,7 +663,7 @@ utf2ucs(const unsigned char *c) ...@@ -661,7 +663,7 @@ utf2ucs(const unsigned char *c)
static int static int
pg_utf_dsplen(const unsigned char *s) pg_utf_dsplen(const unsigned char *s)
{ {
return ucs_wcwidth(utf2ucs(s)); return ucs_wcwidth(utf8_to_unicode(s));
} }
/* /*
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
* *
* Copyright (c) 2000-2010, PostgreSQL Global Development Group * Copyright (c) 2000-2010, PostgreSQL Global Development Group
* *
* $PostgreSQL: pgsql/src/bin/psql/mbprint.c,v 1.39 2010/08/16 00:06:18 tgl Exp $ * $PostgreSQL: pgsql/src/bin/psql/mbprint.c,v 1.40 2010/08/18 19:54:01 tgl Exp $
* *
* XXX this file does not really belong in psql/. Perhaps move to libpq? * XXX this file does not really belong in psql/. Perhaps move to libpq?
* It also seems that the mbvalidate function is redundant with existing * It also seems that the mbvalidate function is redundant with existing
...@@ -43,13 +43,15 @@ pg_get_utf8_id(void) ...@@ -43,13 +43,15 @@ pg_get_utf8_id(void)
#define PG_UTF8 pg_get_utf8_id() #define PG_UTF8 pg_get_utf8_id()
/*
* Convert a UTF-8 character to a Unicode code point.
* This is a one-character version of pg_utf2wchar_with_len.
*
* No error checks here, c must point to a long-enough string.
*/
static pg_wchar static pg_wchar
utf2ucs(const unsigned char *c) utf8_to_unicode(const unsigned char *c)
{ {
/*
* one char version of pg_utf2wchar_with_len. no control here, c must
* point to a large enough string
*/
if ((*c & 0x80) == 0) if ((*c & 0x80) == 0)
return (pg_wchar) c[0]; return (pg_wchar) c[0];
else if ((*c & 0xe0) == 0xc0) else if ((*c & 0xe0) == 0xc0)
...@@ -346,7 +348,7 @@ pg_wcsformat(unsigned char *pwcs, size_t len, int encoding, ...@@ -346,7 +348,7 @@ pg_wcsformat(unsigned char *pwcs, size_t len, int encoding,
else if (w < 0) /* Non-ascii control char */ else if (w < 0) /* Non-ascii control char */
{ {
if (encoding == PG_UTF8) if (encoding == PG_UTF8)
sprintf((char *) ptr, "\\u%04X", utf2ucs(pwcs)); sprintf((char *) ptr, "\\u%04X", utf8_to_unicode(pwcs));
else else
{ {
/* /*
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/mb/pg_wchar.h,v 1.94 2010/02/26 02:01:25 momjian Exp $ * $PostgreSQL: pgsql/src/include/mb/pg_wchar.h,v 1.95 2010/08/18 19:54:01 tgl Exp $
* *
* NOTES * NOTES
* This is used both by the backend and by libpq, but should not be * This is used both by the backend and by libpq, but should not be
...@@ -412,6 +412,7 @@ extern int pg_valid_client_encoding(const char *name); ...@@ -412,6 +412,7 @@ extern int pg_valid_client_encoding(const char *name);
extern int pg_valid_server_encoding(const char *name); extern int pg_valid_server_encoding(const char *name);
extern unsigned char *unicode_to_utf8(pg_wchar c, unsigned char *utf8string); extern unsigned char *unicode_to_utf8(pg_wchar c, unsigned char *utf8string);
extern pg_wchar utf8_to_unicode(const unsigned char *c);
extern int pg_utf_mblen(const unsigned char *); extern int pg_utf_mblen(const unsigned char *);
extern unsigned char *pg_do_encoding_conversion(unsigned char *src, int len, extern unsigned char *pg_do_encoding_conversion(unsigned char *src, int len,
int src_encoding, int src_encoding,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment