]> granicus.if.org Git - postgresql/commitdiff
Disallow digits and lower-case ASCII letters as the delimiter in non-CSV
authorTom Lane <tgl@sss.pgh.pa.us>
Thu, 27 Dec 2007 18:28:58 +0000 (18:28 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Thu, 27 Dec 2007 18:28:58 +0000 (18:28 +0000)
COPY.  We need a restriction here because when the delimiter occurs as a
data character, it is emitted with a backslash, and that will only work
as desired if CopyReadAttributesText() will interpret the backslash sequence
as representing the second character literally.  This is currently untrue
for 'b', 'f', 'n', 'r', 't', 'v', 'x', and octal digits.  For future-proofing
and simplicity of explanation, it seems best to disallow a-z and 0-9.
We must also disallow dot, since "\." by itself would look like copy EOF.
Note: "\N" is by default the null print string, so N would also cause a
problem, but that is already tested for.

src/backend/commands/copy.c

index ea90608c4352b9344cb836a02c188422a9f897a9..52e7e6952732b522b8381f08b817623692596562 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.292 2007/12/27 17:00:56 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.293 2007/12/27 18:28:58 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -872,11 +872,22 @@ DoCopy(const CopyStmt *stmt, const char *queryString)
                                (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                                 errmsg("COPY null representation cannot use newline or carriage return")));
 
-       /* Disallow backslash in non-CSV mode */
-       if (!cstate->csv_mode && strchr(cstate->delim, '\\') != NULL)
+       /*
+        * Disallow unsafe delimiter characters in non-CSV mode.  We can't allow
+        * backslash because it would be ambiguous.  We can't allow the other
+        * cases because data characters matching the delimiter must be
+        * backslashed, and certain backslash combinations are interpreted
+        * non-literally by COPY IN.  Disallowing all lower case ASCII letters
+        * is more than strictly necessary, but seems best for consistency and
+        * future-proofing.  Likewise we disallow all digits though only octal
+        * digits are actually dangerous.
+        */
+       if (!cstate->csv_mode &&
+               strchr("\\.abcdefghijklmnopqrstuvwxyz0123456789",
+                          cstate->delim[0]) != NULL)
                ereport(ERROR,
                                (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-                                errmsg("COPY delimiter cannot be backslash")));
+                                errmsg("COPY delimiter cannot be \"%s\"", cstate->delim)));
 
        /* Check header */
        if (!cstate->csv_mode && cstate->header_line)