]> granicus.if.org Git - postgresql/commitdiff
Revert COPY OUT to follow the pre-8.3 handling of ASCII control characters,
authorTom Lane <tgl@sss.pgh.pa.us>
Mon, 3 Dec 2007 00:03:05 +0000 (00:03 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Mon, 3 Dec 2007 00:03:05 +0000 (00:03 +0000)
namely that \r, \n, \t, \b, \f, \v are dumped as those two-character
representations rather than a backslash and the literal control character.
I had made it do the other to save some code, but this was ill-advised,
because dump files in which these characters appear literally are prone to
newline mangling.  Fortunately, doing it the old way should only cost a few
more lines of code, and not slow down the copy loop materially.
Per bug #3795 from Lou Duchez.

src/backend/commands/copy.c

index c68d828fea0337b2da732a88981ad8073508eb77..55ecf0098d4ce504d3e3c9a9903eff14b680a56c 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.289 2007/11/30 21:22:53 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.290 2007/12/03 00:03:05 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -3102,27 +3102,43 @@ CopyAttributeOutText(CopyState cstate, char *string)
                        }
                        else if ((unsigned char) c < (unsigned char) 0x20)
                        {
+                               /*
+                                * \r and \n must be escaped, the others are traditional.
+                                * We prefer to dump these using the C-like notation, rather
+                                * than a backslash and the literal character, because it
+                                * makes the dump file a bit more proof against Microsoftish
+                                * data mangling.
+                                */
                                switch (c)
                                {
-                                               /*
-                                                * \r and \n must be escaped, the others are
-                                                * traditional
-                                                */
                                        case '\b':
+                                               c = 'b';
+                                               break;
                                        case '\f':
+                                               c = 'f';
+                                               break;
                                        case '\n':
+                                               c = 'n';
+                                               break;
                                        case '\r':
+                                               c = 'r';
+                                               break;
                                        case '\t':
+                                               c = 't';
+                                               break;
                                        case '\v':
-                                               DUMPSOFAR();
-                                               CopySendChar(cstate, '\\');
-                                               start = ptr++;  /* we include char in next run */
+                                               c = 'v';
                                                break;
                                        default:
                                                /* All ASCII control chars are length 1 */
                                                ptr++;
-                                               break;
+                                               continue;               /* fall to end of loop */
                                }
+                               /* if we get here, we need to convert the control char */
+                               DUMPSOFAR();
+                               CopySendChar(cstate, '\\');
+                               CopySendChar(cstate, c);
+                               start = ++ptr;                  /* do not include char in next run */
                        }
                        else if (IS_HIGHBIT_SET(c))
                                ptr += pg_encoding_mblen(cstate->client_encoding, ptr);
@@ -3143,27 +3159,43 @@ CopyAttributeOutText(CopyState cstate, char *string)
                        }
                        else if ((unsigned char) c < (unsigned char) 0x20)
                        {
+                               /*
+                                * \r and \n must be escaped, the others are traditional.
+                                * We prefer to dump these using the C-like notation, rather
+                                * than a backslash and the literal character, because it
+                                * makes the dump file a bit more proof against Microsoftish
+                                * data mangling.
+                                */
                                switch (c)
                                {
-                                               /*
-                                                * \r and \n must be escaped, the others are
-                                                * traditional
-                                                */
                                        case '\b':
+                                               c = 'b';
+                                               break;
                                        case '\f':
+                                               c = 'f';
+                                               break;
                                        case '\n':
+                                               c = 'n';
+                                               break;
                                        case '\r':
+                                               c = 'r';
+                                               break;
                                        case '\t':
+                                               c = 't';
+                                               break;
                                        case '\v':
-                                               DUMPSOFAR();
-                                               CopySendChar(cstate, '\\');
-                                               start = ptr++;  /* we include char in next run */
+                                               c = 'v';
                                                break;
                                        default:
                                                /* All ASCII control chars are length 1 */
                                                ptr++;
-                                               break;
+                                               continue;               /* fall to end of loop */
                                }
+                               /* if we get here, we need to convert the control char */
+                               DUMPSOFAR();
+                               CopySendChar(cstate, '\\');
+                               CopySendChar(cstate, c);
+                               start = ++ptr;                  /* do not include char in next run */
                        }
                        else
                                ptr++;