From: Tom Lane Date: Mon, 3 Dec 2007 00:03:05 +0000 (+0000) Subject: Revert COPY OUT to follow the pre-8.3 handling of ASCII control characters, X-Git-Tag: REL8_3_BETA4~1 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=a63b63ff96293e153d25e8e054a830d70f69938a;p=postgresql Revert COPY OUT to follow the pre-8.3 handling of ASCII control characters, namely that \r, \n, \t, \b, \f, \v are dumped as those two-character representations rather than a backslash and the literal control character. I had made it do the other to save some code, but this was ill-advised, because dump files in which these characters appear literally are prone to newline mangling. Fortunately, doing it the old way should only cost a few more lines of code, and not slow down the copy loop materially. Per bug #3795 from Lou Duchez. --- diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index c68d828fea..55ecf0098d 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.289 2007/11/30 21:22:53 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.290 2007/12/03 00:03:05 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -3102,27 +3102,43 @@ CopyAttributeOutText(CopyState cstate, char *string) } else if ((unsigned char) c < (unsigned char) 0x20) { + /* + * \r and \n must be escaped, the others are traditional. + * We prefer to dump these using the C-like notation, rather + * than a backslash and the literal character, because it + * makes the dump file a bit more proof against Microsoftish + * data mangling. + */ switch (c) { - /* - * \r and \n must be escaped, the others are - * traditional - */ case '\b': + c = 'b'; + break; case '\f': + c = 'f'; + break; case '\n': + c = 'n'; + break; case '\r': + c = 'r'; + break; case '\t': + c = 't'; + break; case '\v': - DUMPSOFAR(); - CopySendChar(cstate, '\\'); - start = ptr++; /* we include char in next run */ + c = 'v'; break; default: /* All ASCII control chars are length 1 */ ptr++; - break; + continue; /* fall to end of loop */ } + /* if we get here, we need to convert the control char */ + DUMPSOFAR(); + CopySendChar(cstate, '\\'); + CopySendChar(cstate, c); + start = ++ptr; /* do not include char in next run */ } else if (IS_HIGHBIT_SET(c)) ptr += pg_encoding_mblen(cstate->client_encoding, ptr); @@ -3143,27 +3159,43 @@ CopyAttributeOutText(CopyState cstate, char *string) } else if ((unsigned char) c < (unsigned char) 0x20) { + /* + * \r and \n must be escaped, the others are traditional. + * We prefer to dump these using the C-like notation, rather + * than a backslash and the literal character, because it + * makes the dump file a bit more proof against Microsoftish + * data mangling. + */ switch (c) { - /* - * \r and \n must be escaped, the others are - * traditional - */ case '\b': + c = 'b'; + break; case '\f': + c = 'f'; + break; case '\n': + c = 'n'; + break; case '\r': + c = 'r'; + break; case '\t': + c = 't'; + break; case '\v': - DUMPSOFAR(); - CopySendChar(cstate, '\\'); - start = ptr++; /* we include char in next run */ + c = 'v'; break; default: /* All ASCII control chars are length 1 */ ptr++; - break; + continue; /* fall to end of loop */ } + /* if we get here, we need to convert the control char */ + DUMPSOFAR(); + CopySendChar(cstate, '\\'); + CopySendChar(cstate, c); + start = ++ptr; /* do not include char in next run */ } else ptr++;