Allow psql multi-line column values to align in the proper columns

If the second output column value is 'a\nb', the 'b' should appear in the second display column, rather than the first column as it does now. Change libpq's PQdsplen() to return more useful values. > Note: this changes the PQdsplen function, it can now return zero or > minus one which was not possible before. It doesn't appear anyone is > actually using the functions other than psql but it is a change. The > functions are not actually documentated anywhere so it's not like we're > breaking a defined interface. The new semantics follow the Unicode > standard. BACKWARD COMPATIBLE CHANGE. The only user-visible change I saw in the regression tests is that a SELECT * on a table where all the columns have been dropped doesn't return a blank line like before. This seems like a step forward. Martijn van Oosterhout

Allow psql multi-line column values to align in the proper columns
If the second output column value is 'a\nb', the 'b' should appear in the second display column, rather than the first column as it does now. Change libpq's PQdsplen() to return more useful values. > Note: this changes the PQdsplen function, it can now return zero or > minus one which was not possible before. It doesn't appear anyone is > actually using the functions other than psql but it is a change. The > functions are not actually documentated anywhere so it's not like we're > breaking a defined interface. The new semantics follow the Unicode > standard. BACKWARD COMPATIBLE CHANGE. The only user-visible change I saw in the regression tests is that a SELECT * on a table where all the columns have been dropped doesn't return a blank line like before. This seems like a step forward. Martijn van Oosterhout
c01999a5 · Bruce Momjian · 593763c0 · c01999a5 · c01999a5 · c01999a5
Commit c01999a5 authored Feb 10, 2006 by Bruce Momjian
6 changed files
--- a/src/backend/utils/mb/wchar.c
+++ b/src/backend/utils/mb/wchar.c
 /*
 * conversion functions between pg_wchar and multibyte streams.
 * Tatsuo Ishii
- * $PostgreSQL: pgsql/src/backend/utils/mb/wchar.c,v 1.52 2005/12/26 19:30:44 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/mb/wchar.c,v 1.53 2006/02/10 00:39:04 momjian Exp $
 *
 * WIN1250 client encoding updated by Pavel Behal
 *
@@ -23,6 +23,13 @@
 * for the particular encoding. Note that if the encoding is only
 * supported in the client, you don't need to define
 * mb2wchar_with_len() function (SJIS is the case).
+ *
+ * Note: for the display output of psql to work properly, the return values
+ * of these functions must conform to the Unicode standard. In particular
+ * the NUL character is zero width and control characters are generally
+ * width -1. It is recommended that non-ASCII encodings refer their ASCII
+ * subset to the ASCII routines to ensure consistancy.
+ *
 */

 /*
@@ -53,6 +60,11 @@ pg_ascii_mblen(const unsigned char *s)
 static int
 pg_ascii_dsplen(const unsigned char *s)
 {
+	if (*s == '\0')
+		return 0;
+	if (*s < 0x20 || *s == 0x7f)
+		return -1;
+		
 	return 1;
 }

@@ -125,7 +137,7 @@ pg_euc_dsplen(const unsigned char *s)
 	else if (IS_HIGHBIT_SET(*s))
 		len = 2;
 	else
-		len = 1;
+		len = pg_ascii_dsplen(s);
 	return len;
 }

@@ -156,7 +168,7 @@ pg_eucjp_dsplen(const unsigned char *s)
 	else if (IS_HIGHBIT_SET(*s))
 		len = 2;
 	else
-		len = 1;
+		len = pg_ascii_dsplen(s);
 	return len;
 }

@@ -244,7 +256,7 @@ pg_euccn_dsplen(const unsigned char *s)
 	if (IS_HIGHBIT_SET(*s))
 		len = 2;
 	else
-		len = 1;
+		len = pg_ascii_dsplen(s);
 	return len;
 }

@@ -304,7 +316,7 @@ pg_euctw_mblen(const unsigned char *s)
 	else if (IS_HIGHBIT_SET(*s))
 		len = 2;
 	else
-		len = 1;
+		len = pg_ascii_dsplen(s);
 	return len;
 }

@@ -320,7 +332,7 @@ pg_euctw_dsplen(const unsigned char *s)
 	else if (IS_HIGHBIT_SET(*s))
 		len = 2;
 	else
-		len = 1;
+		len = pg_ascii_dsplen(s);
 	return len;
 }

@@ -419,10 +431,179 @@ pg_utf_mblen(const unsigned char *s)
 	return len;
 }

+/*
+ * This is an implementation of wcwidth() and wcswidth() as defined in
+ * "The Single UNIX Specification, Version 2, The Open Group, 1997"
+ * <http://www.UNIX-systems.org/online.html>
+ *
+ * Markus Kuhn -- 2001-09-08 -- public domain
+ *
+ * customised for PostgreSQL
+ *
+ * original available at : http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
+ */
+
+struct mbinterval
+{
+	unsigned short first;
+	unsigned short last;
+};
+
+/* auxiliary function for binary search in interval table */
+static int
+mbbisearch(pg_wchar ucs, const struct mbinterval *table, int max)
+{
+	int			min = 0;
+	int			mid;
+
+	if (ucs < table[0].first || ucs > table[max].last)
+		return 0;
+	while (max >= min)
+	{
+		mid = (min + max) / 2;
+		if (ucs > table[mid].last)
+			min = mid + 1;
+		else if (ucs < table[mid].first)
+			max = mid - 1;
+		else
+			return 1;
+	}
+
+	return 0;
+}
+
+
+/* The following functions define the column width of an ISO 10646
+ * character as follows:
+ *
+ *	  - The null character (U+0000) has a column width of 0.
+ *
+ *	  - Other C0/C1 control characters and DEL will lead to a return
+ *		value of -1.
+ *
+ *	  - Non-spacing and enclosing combining characters (general
+ *		category code Mn or Me in the Unicode database) have a
+ *		column width of 0.
+ *
+ *	  - Other format characters (general category code Cf in the Unicode
+ *		database) and ZERO WIDTH SPACE (U+200B) have a column width of 0.
+ *
+ *	  - Hangul Jamo medial vowels and final consonants (U+1160-U+11FF)
+ *		have a column width of 0.
+ *
+ *	  - Spacing characters in the East Asian Wide (W) or East Asian
+ *		FullWidth (F) category as defined in Unicode Technical
+ *		Report #11 have a column width of 2.
+ *
+ *	  - All remaining characters (including all printable
+ *		ISO 8859-1 and WGL4 characters, Unicode control characters,
+ *		etc.) have a column width of 1.
+ *
+ * This implementation assumes that wchar_t characters are encoded
+ * in ISO 10646.
+ */
+
+static int
+ucs_wcwidth(pg_wchar ucs)
+{
+	/* sorted list of non-overlapping intervals of non-spacing characters */
+	static const struct mbinterval combining[] = {
+		{0x0300, 0x034E}, {0x0360, 0x0362}, {0x0483, 0x0486},
+		{0x0488, 0x0489}, {0x0591, 0x05A1}, {0x05A3, 0x05B9},
+		{0x05BB, 0x05BD}, {0x05BF, 0x05BF}, {0x05C1, 0x05C2},
+		{0x05C4, 0x05C4}, {0x064B, 0x0655}, {0x0670, 0x0670},
+		{0x06D6, 0x06E4}, {0x06E7, 0x06E8}, {0x06EA, 0x06ED},
+		{0x070F, 0x070F}, {0x0711, 0x0711}, {0x0730, 0x074A},
+		{0x07A6, 0x07B0}, {0x0901, 0x0902}, {0x093C, 0x093C},
+		{0x0941, 0x0948}, {0x094D, 0x094D}, {0x0951, 0x0954},
+		{0x0962, 0x0963}, {0x0981, 0x0981}, {0x09BC, 0x09BC},
+		{0x09C1, 0x09C4}, {0x09CD, 0x09CD}, {0x09E2, 0x09E3},
+		{0x0A02, 0x0A02}, {0x0A3C, 0x0A3C}, {0x0A41, 0x0A42},
+		{0x0A47, 0x0A48}, {0x0A4B, 0x0A4D}, {0x0A70, 0x0A71},
+		{0x0A81, 0x0A82}, {0x0ABC, 0x0ABC}, {0x0AC1, 0x0AC5},
+		{0x0AC7, 0x0AC8}, {0x0ACD, 0x0ACD}, {0x0B01, 0x0B01},
+		{0x0B3C, 0x0B3C}, {0x0B3F, 0x0B3F}, {0x0B41, 0x0B43},
+		{0x0B4D, 0x0B4D}, {0x0B56, 0x0B56}, {0x0B82, 0x0B82},
+		{0x0BC0, 0x0BC0}, {0x0BCD, 0x0BCD}, {0x0C3E, 0x0C40},
+		{0x0C46, 0x0C48}, {0x0C4A, 0x0C4D}, {0x0C55, 0x0C56},
+		{0x0CBF, 0x0CBF}, {0x0CC6, 0x0CC6}, {0x0CCC, 0x0CCD},
+		{0x0D41, 0x0D43}, {0x0D4D, 0x0D4D}, {0x0DCA, 0x0DCA},
+		{0x0DD2, 0x0DD4}, {0x0DD6, 0x0DD6}, {0x0E31, 0x0E31},
+		{0x0E34, 0x0E3A}, {0x0E47, 0x0E4E}, {0x0EB1, 0x0EB1},
+		{0x0EB4, 0x0EB9}, {0x0EBB, 0x0EBC}, {0x0EC8, 0x0ECD},
+		{0x0F18, 0x0F19}, {0x0F35, 0x0F35}, {0x0F37, 0x0F37},
+		{0x0F39, 0x0F39}, {0x0F71, 0x0F7E}, {0x0F80, 0x0F84},
+		{0x0F86, 0x0F87}, {0x0F90, 0x0F97}, {0x0F99, 0x0FBC},
+		{0x0FC6, 0x0FC6}, {0x102D, 0x1030}, {0x1032, 0x1032},
+		{0x1036, 0x1037}, {0x1039, 0x1039}, {0x1058, 0x1059},
+		{0x1160, 0x11FF}, {0x17B7, 0x17BD}, {0x17C6, 0x17C6},
+		{0x17C9, 0x17D3}, {0x180B, 0x180E}, {0x18A9, 0x18A9},
+		{0x200B, 0x200F}, {0x202A, 0x202E}, {0x206A, 0x206F},
+		{0x20D0, 0x20E3}, {0x302A, 0x302F}, {0x3099, 0x309A},
+		{0xFB1E, 0xFB1E}, {0xFE20, 0xFE23}, {0xFEFF, 0xFEFF},
+		{0xFFF9, 0xFFFB}
+	};
+
+	/* test for 8-bit control characters */
+	if (ucs == 0)
+		return 0;
+
+	if (ucs < 0x20 || (ucs >= 0x7f && ucs < 0xa0) || ucs > 0x0010ffff)
+		return -1;
+
+	/* binary search in table of non-spacing characters */
+	if (mbbisearch(ucs, combining,
+				   sizeof(combining) / sizeof(struct mbinterval) - 1))
+		return 0;
+
+	/*
+	 * if we arrive here, ucs is not a combining or C0/C1 control character
+	 */
+
+	return 1 +
+		(ucs >= 0x1100 &&
+		 (ucs <= 0x115f ||		/* Hangul Jamo init. consonants */
+		  (ucs >= 0x2e80 && ucs <= 0xa4cf && (ucs & ~0x0011) != 0x300a &&
+		   ucs != 0x303f) ||	/* CJK ... Yi */
+		  (ucs >= 0xac00 && ucs <= 0xd7a3) ||	/* Hangul Syllables */
+		  (ucs >= 0xf900 && ucs <= 0xfaff) ||	/* CJK Compatibility
+												 * Ideographs */
+		  (ucs >= 0xfe30 && ucs <= 0xfe6f) ||	/* CJK Compatibility Forms */
+		  (ucs >= 0xff00 && ucs <= 0xff5f) ||	/* Fullwidth Forms */
+		  (ucs >= 0xffe0 && ucs <= 0xffe6) ||
+		  (ucs >= 0x20000 && ucs <= 0x2ffff)));
+}
+
+static pg_wchar
+utf2ucs(const unsigned char *c)
+{
+	/*
+	 * one char version of pg_utf2wchar_with_len. no control here, c must
+	 * point to a large enough string
+	 */
+	if ((*c & 0x80) == 0)
+		return (pg_wchar) c[0];
+	else if ((*c & 0xe0) == 0xc0)
+		return (pg_wchar) (((c[0] & 0x1f) << 6) |
+						   (c[1] & 0x3f));
+	else if ((*c & 0xf0) == 0xe0)
+		return (pg_wchar) (((c[0] & 0x0f) << 12) |
+						   ((c[1] & 0x3f) << 6) |
+						   (c[2] & 0x3f));
+	else if ((*c & 0xf0) == 0xf0)
+		return (pg_wchar) (((c[0] & 0x07) << 18) |
+						   ((c[1] & 0x3f) << 12) |
+						   ((c[2] & 0x3f) << 6) |
+						   (c[3] & 0x3f));
+	else
+		/* that is an invalid code on purpose */
+		return 0xffffffff;
+}
+
 static int
 pg_utf_dsplen(const unsigned char *s)
 {
-	return 1;					/* XXX fix me! */
+	return ucs_wcwidth(utf2ucs(s));
 }

 /*
@@ -499,7 +680,7 @@ pg_mule_mblen(const unsigned char *s)
 static int
 pg_mule_dsplen(const unsigned char *s)
 {
-	return 1;					/* XXX fix me! */
+	return pg_ascii_dsplen(s);					/* XXX fix me! */
 }

 /*
@@ -529,7 +710,7 @@ pg_latin1_mblen(const unsigned char *s)
 static int
 pg_latin1_dsplen(const unsigned char *s)
 {
-	return 1;
+	return pg_ascii_dsplen(s);
 }

 /*
@@ -559,7 +740,7 @@ pg_sjis_dsplen(const unsigned char *s)
 	else if (IS_HIGHBIT_SET(*s))
 		len = 2;	/* kanji? */
 	else
-		len = 1;	/* should be ASCII */
+		len = pg_ascii_dsplen(s);	/* should be ASCII */
 	return len;
 }

@@ -586,7 +767,7 @@ pg_big5_dsplen(const unsigned char *s)
 	if (IS_HIGHBIT_SET(*s))
 		len = 2;	/* kanji? */
 	else
-		len = 1;	/* should be ASCII */
+		len = pg_ascii_dsplen(s);	/* should be ASCII */
 	return len;
 }

@@ -613,7 +794,7 @@ pg_gbk_dsplen(const unsigned char *s)
 	if (IS_HIGHBIT_SET(*s))
 		len = 2;	/* kanji? */
 	else
-		len = 1;	/* should be ASCII */
+		len = pg_ascii_dsplen(s);	/* should be ASCII */
 	return len;
 }

@@ -640,7 +821,7 @@ pg_uhc_dsplen(const unsigned char *s)
 	if (IS_HIGHBIT_SET(*s))
 		len = 2;	/* 2byte? */
 	else
-		len = 1;	/* should be ASCII */
+		len = pg_ascii_dsplen(s);	/* should be ASCII */
 	return len;
 }

@@ -672,10 +853,10 @@ pg_gb18030_dsplen(const unsigned char *s)
 {
 	int			len;

-	if (!IS_HIGHBIT_SET(*s))
-		len = 1;	/* ASCII */
-	else
+	if (IS_HIGHBIT_SET(*s))
 		len = 2;
+	else
+		len = pg_ascii_dsplen(s);	/* ASCII */
 	return len;
 }


--- a/src/bin/psql/mbprint.c
+++ b/src/bin/psql/mbprint.c
--- a/src/bin/psql/mbprint.h
+++ b/src/bin/psql/mbprint.h
-/* $PostgreSQL: pgsql/src/bin/psql/mbprint.h,v 1.8 2005/09/24 17:53:27 tgl Exp $ */
+/* $PostgreSQL: pgsql/src/bin/psql/mbprint.h,v 1.9 2006/02/10 00:39:04 momjian Exp $ */
 #ifndef MBPRINT_H
 #define MBPRINT_H

 #include "mb/pg_wchar.h"

-extern char *mbvalidate(char *pwcs, int encoding);
+struct lineptr {
+	unsigned char *ptr;
+	int width;
+};

-extern int	pg_wcswidth(const char *pwcs, size_t len, int encoding);
+extern unsigned char *mbvalidate(unsigned char *pwcs, int encoding);
+
+extern int	pg_wcswidth(const unsigned char *pwcs, size_t len, int encoding);
+extern void	pg_wcsformat(unsigned char *pwcs, size_t len, int encoding, struct lineptr *lines, int count);
+extern int	pg_wcssize(unsigned char *pwcs, size_t len, int encoding, int *width, int *height, int *format_size);

 #endif   /* MBPRINT_H */
--- a/src/bin/psql/print.c
+++ b/src/bin/psql/print.c
--- a/src/test/regress/expected/alter_table.out
+++ b/src/test/regress/expected/alter_table.out
@@ -797,7 +797,6 @@ alter table atacc1 drop c;
 alter table atacc1 drop d;
 alter table atacc1 drop b;
 select * from atacc1;
-  
 --
 (1 row)


--- a/src/test/regress/expected/prepare.out
+++ b/src/test/regress/expected/prepare.out
@@ -149,18 +149,18 @@ PREPARE q7(unknown) AS
 SELECT name, statement, parameter_types FROM pg_prepared_statements
    ORDER BY name;
 name |                                                                                    statement                                                                                    |                    parameter_types                     
------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------
- q2   | PREPARE q2(text) AS
-	SELECT datname, datistemplate, datallowconn
-	FROM pg_database WHERE datname = $1;                                                                          | {text}
- q3   | PREPARE q3(text, int, float, boolean, oid, smallint) AS
-	SELECT * FROM tenk1 WHERE string4 = $1 AND (four = $2 OR
-	ten = $3::bigint OR true = $4 OR oid = $5 OR odd = $6::int); | {text,integer,"double precision",boolean,oid,smallint}
- q5   | PREPARE q5(int, text) AS
-	SELECT * FROM tenk1 WHERE unique1 = $1 OR stringu1 = $2;                                                                                              | {integer,text}
- q6   | PREPARE q6 AS
-    SELECT * FROM tenk1 WHERE unique1 = $1 AND stringu1 = $2;                                                                                                     | {integer,name}
- q7   | PREPARE q7(unknown) AS
-    SELECT * FROM road WHERE thepath = $1;                                                                                                               | {path}
+------+------------------------------------------------------------------+--------------------------------------------------------
+ q2   | PREPARE q2(text) AS                                              | {text}                                                
+      : \x09SELECT datname, datistemplate, datallowconn                                                                          
+      : \x09FROM pg_database WHERE datname = $1;                                                                                 
+ q3   | PREPARE q3(text, int, float, boolean, oid, smallint) AS          | {text,integer,"double precision",boolean,oid,smallint}
+      : \x09SELECT * FROM tenk1 WHERE string4 = $1 AND (four = $2 OR                                                             
+      : \x09ten = $3::bigint OR true = $4 OR oid = $5 OR odd = $6::int);                                                         
+ q5   | PREPARE q5(int, text) AS                                         | {integer,text}                                        
+      : \x09SELECT * FROM tenk1 WHERE unique1 = $1 OR stringu1 = $2;                                                             
+ q6   | PREPARE q6 AS                                                    | {integer,name}                                        
+      :     SELECT * FROM tenk1 WHERE unique1 = $1 AND stringu1 = $2;                                                            
+ q7   | PREPARE q7(unknown) AS                                           | {path}                                                
+      :     SELECT * FROM road WHERE thepath = $1;                                                                               
 (5 rows)