Revert COPY OUT to follow the pre-8.3 handling of ASCII control characters,

namely that \r, \n, \t, \b, \f, \v are dumped as those two-character representations rather than a backslash and the literal control character. I had made it do the other to save some code, but this was ill-advised, because dump files in which these characters appear literally are prone to newline mangling. Fortunately, doing it the old way should only cost a few more lines of code, and not slow down the copy loop materially. Per bug #3795 from Lou Duchez.

Revert COPY OUT to follow the pre-8.3 handling of ASCII control characters,
namely that \r, \n, \t, \b, \f, \v are dumped as those two-character representations rather than a backslash and the literal control character. I had made it do the other to save some code, but this was ill-advised, because dump files in which these characters appear literally are prone to newline mangling. Fortunately, doing it the old way should only cost a few more lines of code, and not slow down the copy loop materially. Per bug #3795 from Lou Duchez.
a63b63ff · Tom Lane · 3b3251cb · a63b63ff
Commit a63b63ff authored Dec 03, 2007 by Tom Lane
Hide whitespace changes
Inline Side-by-side

Showing with 49 additions and 17 deletions

src/backend/commands/copy.c src/backend/commands/copy.c +49 -17

No files found.
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.289 2007/11/30 21:22:53 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.290 2007/12/03 00:03:05 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -3102,27 +3102,43 @@ CopyAttributeOutText(CopyState cstate, char *string)
 			}
 			else if ((unsigned char) c < (unsigned char) 0x20)
 			{
+				/*
+				 * \r and \n must be escaped, the others are traditional.
+				 * We prefer to dump these using the C-like notation, rather
+				 * than a backslash and the literal character, because it
+				 * makes the dump file a bit more proof against Microsoftish
+				 * data mangling.
+				 */
 				switch (c)
 				{
-						/*
-						 * \r and \n must be escaped, the others are
-						 * traditional
-						 */
 					case '\b':
+						c = 'b';
+						break;
 					case '\f':
+						c = 'f';
+						break;
 					case '\n':
+						c = 'n';
+						break;
 					case '\r':
+						c = 'r';
+						break;
 					case '\t':
+						c = 't';
+						break;
 					case '\v':
-						DUMPSOFAR();
+						c = 'v';
-						CopySendChar(cstate, '\\');
-						start = ptr++;	/* we include char in next run */
 						break;
 					default:
 						/* All ASCII control chars are length 1 */
 						ptr++;
-						break;
+						continue;		/* fall to end of loop */
 				}
+				/* if we get here, we need to convert the control char */
+				DUMPSOFAR();
+				CopySendChar(cstate, '\\');
+				CopySendChar(cstate, c);
+				start = ++ptr;			/* do not include char in next run */
 			}
 			else if (IS_HIGHBIT_SET(c))
 				ptr += pg_encoding_mblen(cstate->client_encoding, ptr);
@@ -3143,27 +3159,43 @@ CopyAttributeOutText(CopyState cstate, char *string)
 			}
 			else if ((unsigned char) c < (unsigned char) 0x20)
 			{
+				/*
+				 * \r and \n must be escaped, the others are traditional.
+				 * We prefer to dump these using the C-like notation, rather
+				 * than a backslash and the literal character, because it
+				 * makes the dump file a bit more proof against Microsoftish
+				 * data mangling.
+				 */
 				switch (c)
 				{
-						/*
-						 * \r and \n must be escaped, the others are
-						 * traditional
-						 */
 					case '\b':
+						c = 'b';
+						break;
 					case '\f':
+						c = 'f';
+						break;
 					case '\n':
+						c = 'n';
+						break;
 					case '\r':
+						c = 'r';
+						break;
 					case '\t':
+						c = 't';
+						break;
 					case '\v':
-						DUMPSOFAR();
+						c = 'v';
-						CopySendChar(cstate, '\\');
-						start = ptr++;	/* we include char in next run */
 						break;
 					default:
 						/* All ASCII control chars are length 1 */
 						ptr++;
-						break;
+						continue;		/* fall to end of loop */
 				}
+				/* if we get here, we need to convert the control char */
+				DUMPSOFAR();
+				CopySendChar(cstate, '\\');
+				CopySendChar(cstate, c);
+				start = ++ptr;			/* do not include char in next run */
 			}
 			else
 				ptr++;