]> granicus.if.org Git - postgresql/commitdiff
Fix some corner-case bugs in _sendSQLLine's parsing of SQL commands
authorTom Lane <tgl@sss.pgh.pa.us>
Sun, 11 Sep 2005 04:10:25 +0000 (04:10 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Sun, 11 Sep 2005 04:10:25 +0000 (04:10 +0000)
> found in a pg_dump archive.  It had problems with dollar-quote tags
broken across bufferload boundaries (this may explain bug report from
Rod Taylor), also with dollar-quote literals of the form $a$a$...,
and was also confused about the rules for backslash in double quoted
identifiers (ie, they're not special).  Also put in placeholder support
for E'...' literals --- this will need more work later.

src/bin/pg_dump/pg_backup_archiver.h
src/bin/pg_dump/pg_backup_db.c

index d9a09e328586749b15da8b8048b1802eec311eff..e7a05283d744b27241efd55bede0e12026626b6b 100644 (file)
@@ -17,7 +17,7 @@
  *
  *
  * IDENTIFICATION
- *             $PostgreSQL: pgsql/src/bin/pg_dump/pg_backup_archiver.h,v 1.66 2005/07/27 12:44:10 neilc Exp $
+ *             $PostgreSQL: pgsql/src/bin/pg_dump/pg_backup_archiver.h,v 1.67 2005/09/11 04:10:25 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -136,22 +136,24 @@ typedef struct _outputContext
 
 typedef enum
 {
-       SQL_SCAN = 0,
-       SQL_IN_SQL_COMMENT,
-       SQL_IN_EXT_COMMENT,
-       SQL_IN_QUOTE,
-       SQL_IN_DOLLARTAG,
-       SQL_IN_DOLLARQUOTE
+       SQL_SCAN = 0,                           /* normal */
+       SQL_IN_SQL_COMMENT,                     /* -- comment */
+       SQL_IN_EXT_COMMENT,                     /* slash-star comment */
+       SQL_IN_SINGLE_QUOTE,            /* '...' literal */
+       SQL_IN_E_QUOTE,                         /* E'...' literal */
+       SQL_IN_DOUBLE_QUOTE,            /* "..." identifier */
+       SQL_IN_DOLLAR_TAG,                      /* possible dollar-quote starting tag */
+       SQL_IN_DOLLAR_QUOTE                     /* body of dollar quote */
 } sqlparseState;
 
 typedef struct
 {
-       int                     backSlash;
-       sqlparseState state;
-       char            lastChar;
-       char            quoteChar;
-       int                     braceDepth;
-       PQExpBuffer tagBuf;
+       sqlparseState state;            /* see above */
+       char            lastChar;               /* preceding char, or '\0' initially */
+       bool            backSlash;              /* next char is backslash quoted? */
+       int                     braceDepth;             /* parenthesis nesting depth */
+       PQExpBuffer tagBuf;                     /* dollar quote tag (NULL if not created) */
+       int                     minTagEndPos;   /* first possible end position of $-quote */
 } sqlparseInfo;
 
 typedef enum
index 66833ebdb1e15682cc551e2d52e4e146ac97e9cf..2065f59a4dcaafd52cea9dfda3b08062944cfa5f 100644 (file)
@@ -5,7 +5,7 @@
  *     Implements the basic DB functions used by the archiver.
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/bin/pg_dump/pg_backup_db.c,v 1.64 2005/07/27 05:14:12 neilc Exp $
+ *       $PostgreSQL: pgsql/src/bin/pg_dump/pg_backup_db.c,v 1.65 2005/09/11 04:10:25 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -36,8 +36,8 @@ static void notice_processor(void *arg, const char *message);
 static char *_sendSQLLine(ArchiveHandle *AH, char *qry, char *eos);
 static char *_sendCopyLine(ArchiveHandle *AH, char *qry, char *eos);
 
-static int     _isIdentChar(unsigned char c);
-static int     _isDQChar(unsigned char c, int atStart);
+static bool _isIdentChar(unsigned char c);
+static bool _isDQChar(unsigned char c, bool atStart);
 
 #define DB_MAX_ERR_STMT 128
 
@@ -410,215 +410,187 @@ _sendCopyLine(ArchiveHandle *AH, char *qry, char *eos)
 }
 
 /*
- * Used by ExecuteSqlCommandBuf to send one buffered line of SQL (not data for the copy command).
+ * Used by ExecuteSqlCommandBuf to send one buffered line of SQL
+ * (not data for the copy command).
  */
 static char *
 _sendSQLLine(ArchiveHandle *AH, char *qry, char *eos)
 {
-       int                     pos = 0;                /* Current position */
-       char       *sqlPtr;
-       int                     consumed;
-       int                     startDT = 0;
-
        /*
         * The following is a mini state machine to assess the end of an SQL
         * statement. It really only needs to parse good SQL, or at least
         * that's the theory... End-of-statement is assumed to be an unquoted,
-        * un commented semi-colon.
-        */
-
-       /*
-        * fprintf(stderr, "Buffer at start is: '%s'\n\n", AH->sqlBuf->data);
+        * un-commented semi-colon that's not within any parentheses.
+        *
+        * Note: the input can be split into bufferloads at arbitrary boundaries.
+        * Therefore all state must be kept in AH->sqlparse, not in local
+        * variables of this routine.  We assume that AH->sqlparse was
+        * filled with zeroes when created.
         */
-
-       for (pos = 0; pos < (eos - qry); pos++)
+       for (; qry < eos; qry++)
        {
-               appendPQExpBufferChar(AH->sqlBuf, qry[pos]);
-               /* fprintf(stderr, " %c",qry[pos]); */
-
-               /* Loop until character consumed */
-               do
+               switch (AH->sqlparse.state)
                {
-                       /*
-                        * If a character needs to be scanned in a different state,
-                        * consumed can be set to 0 to avoid advancing. Care must be
-                        * taken to ensure internal state is not damaged.
-                        */
-                       consumed = 1;
-
-                       switch (AH->sqlparse.state)
-                       {
-
-                               case SQL_SCAN:  /* Default state == 0, set in _allocAH */
-                                       if (qry[pos] == ';' && AH->sqlparse.braceDepth == 0)
-                                       {
-                                               /*
-                                                * We've got the end of a statement. Send It &
-                                                * reset the buffer.
-                                                */
-
-                                               /*
-                                                * fprintf(stderr, "    sending: '%s'\n\n",
-                                                * AH->sqlBuf->data);
-                                                */
-                                               ExecuteSqlCommand(AH, AH->sqlBuf, "could not execute query");
-                                               resetPQExpBuffer(AH->sqlBuf);
-                                               AH->sqlparse.lastChar = '\0';
-
-                                               /*
-                                                * Remove any following newlines - so that
-                                                * embedded COPY commands don't get a starting
-                                                * newline.
-                                                */
-                                               pos++;
-                                               for (; pos < (eos - qry) && qry[pos] == '\n'; pos++);
-
-                                               /* We've got our line, so exit */
-                                               return qry + pos;
-                                       }
-                                       else
-                                       {
-                                               /*
-                                                * Look for normal boring quote chars, or
-                                                * dollar-quotes. We make the assumption that
-                                                * $-quotes will not have an ident character
-                                                * before them in all pg_dump output.
-                                                */
-                                               if (qry[pos] == '"'
-                                                       || qry[pos] == '\''
-                                                       || (qry[pos] == '$' && _isIdentChar(AH->sqlparse.lastChar) == 0)
-                                                       )
-                                               {
-                                                       /* fprintf(stderr,"[startquote]\n"); */
-                                                       AH->sqlparse.state = SQL_IN_QUOTE;
-                                                       AH->sqlparse.quoteChar = qry[pos];
-                                                       AH->sqlparse.backSlash = 0;
-                                                       if (qry[pos] == '$')
-                                                       {
-                                                               /* override the state */
-                                                               AH->sqlparse.state = SQL_IN_DOLLARTAG;
-                                                               /* Used for checking first char of tag */
-                                                               startDT = 1;
-                                                               /* We store the tag for later comparison. */
-                                                               AH->sqlparse.tagBuf = createPQExpBuffer();
-                                                               /* Get leading $ */
-                                                               appendPQExpBufferChar(AH->sqlparse.tagBuf, qry[pos]);
-                                                       }
-                                               }
-                                               else if (qry[pos] == '-' && AH->sqlparse.lastChar == '-')
-                                                       AH->sqlparse.state = SQL_IN_SQL_COMMENT;
-                                               else if (qry[pos] == '*' && AH->sqlparse.lastChar == '/')
-                                                       AH->sqlparse.state = SQL_IN_EXT_COMMENT;
-                                               else if (qry[pos] == '(')
-                                                       AH->sqlparse.braceDepth++;
-                                               else if (qry[pos] == ')')
-                                                       AH->sqlparse.braceDepth--;
-
-                                               AH->sqlparse.lastChar = qry[pos];
-                                       }
-                                       break;
-
-                               case SQL_IN_DOLLARTAG:
-
+                       case SQL_SCAN:          /* Default state == 0, set in _allocAH */
+                               if (*qry == ';' && AH->sqlparse.braceDepth == 0)
+                               {
                                        /*
-                                        * Like a quote, we look for a closing char *but* we
-                                        * only allow a very limited set of contained chars,
-                                        * and no escape chars. If invalid chars are found, we
-                                        * abort tag processing.
+                                        * We've found the end of a statement. Send it and
+                                        * reset the buffer.
                                         */
-
-                                       if (qry[pos] == '$')
-                                       {
-                                               /* fprintf(stderr,"[endquote]\n"); */
-                                               /* Get trailing $ */
-                                               appendPQExpBufferChar(AH->sqlparse.tagBuf, qry[pos]);
-                                               AH->sqlparse.state = SQL_IN_DOLLARQUOTE;
-                                       }
-                                       else
-                                       {
-                                               if (_isDQChar(qry[pos], startDT))
-                                               {
-                                                       /* Valid, so add */
-                                                       appendPQExpBufferChar(AH->sqlparse.tagBuf, qry[pos]);
-                                               }
-                                               else
-                                               {
-                                                       /*
-                                                        * Jump back to 'scan' state, we're not really
-                                                        * in a tag, and valid tag chars do not
-                                                        * include the various chars we look for in
-                                                        * this state machine, so it's safe to just
-                                                        * jump from this state back to SCAN. We set
-                                                        * consumed = 0 so that this char gets
-                                                        * rescanned in new state.
-                                                        */
-                                                       destroyPQExpBuffer(AH->sqlparse.tagBuf);
-                                                       AH->sqlparse.state = SQL_SCAN;
-                                                       consumed = 0;
-                                               }
-                                       }
-                                       startDT = 0;
-                                       break;
-
-
-                               case SQL_IN_DOLLARQUOTE:
+                                       appendPQExpBufferChar(AH->sqlBuf, ';'); /* inessential */
+                                       ExecuteSqlCommand(AH, AH->sqlBuf,
+                                                                         "could not execute query");
+                                       resetPQExpBuffer(AH->sqlBuf);
+                                       AH->sqlparse.lastChar = '\0';
 
                                        /*
-                                        * Comparing the entire string backwards each time is
-                                        * NOT efficient, but dollar quotes in pg_dump are
-                                        * small and the code is a lot simpler.
+                                        * Remove any following newlines - so that
+                                        * embedded COPY commands don't get a starting newline.
                                         */
-                                       sqlPtr = AH->sqlBuf->data + AH->sqlBuf->len - AH->sqlparse.tagBuf->len;
-
-                                       if (strncmp(AH->sqlparse.tagBuf->data, sqlPtr, AH->sqlparse.tagBuf->len) == 0)
-                                       {
-                                               /* End of $-quote */
-                                               AH->sqlparse.state = SQL_SCAN;
-                                               destroyPQExpBuffer(AH->sqlparse.tagBuf);
-                                       }
-                                       break;
-
-                               case SQL_IN_SQL_COMMENT:
-                                       if (qry[pos] == '\n')
-                                               AH->sqlparse.state = SQL_SCAN;
-                                       break;
-
-                               case SQL_IN_EXT_COMMENT:
-                                       if (AH->sqlparse.lastChar == '*' && qry[pos] == '/')
-                                               AH->sqlparse.state = SQL_SCAN;
-                                       break;
-
-                               case SQL_IN_QUOTE:
-
-                                       if (!AH->sqlparse.backSlash && AH->sqlparse.quoteChar == qry[pos])
-                                       {
-                                               /* fprintf(stderr,"[endquote]\n"); */
-                                               AH->sqlparse.state = SQL_SCAN;
-                                       }
+                                       qry++;
+                                       while (qry < eos && *qry == '\n')
+                                               qry++;
+
+                                       /* We've finished one line, so exit */
+                                       return qry;
+                               }
+                               else if (*qry == '\'')
+                               {
+                                       if (AH->sqlparse.lastChar == 'E')
+                                               AH->sqlparse.state = SQL_IN_E_QUOTE;
                                        else
-                                       {
-                                               if (qry[pos] == '\\')
-                                               {
-                                                       if (AH->sqlparse.lastChar == '\\')
-                                                               AH->sqlparse.backSlash = !AH->sqlparse.backSlash;
-                                                       else
-                                                               AH->sqlparse.backSlash = 1;
-                                               }
-                                               else
-                                                       AH->sqlparse.backSlash = 0;
-                                       }
-                                       break;
-
-                       }
-
-               } while (consumed == 0);
+                                               AH->sqlparse.state = SQL_IN_SINGLE_QUOTE;
+                                       AH->sqlparse.backSlash = false;
+                               }
+                               else if (*qry == '"')
+                               {
+                                       AH->sqlparse.state = SQL_IN_DOUBLE_QUOTE;
+                               }
+                               /*
+                                * Look for dollar-quotes. We make the assumption that
+                                * $-quotes will not have an ident character just
+                                * before them in pg_dump output.  XXX is this
+                                * good enough?
+                                */
+                               else if (*qry == '$' && !_isIdentChar(AH->sqlparse.lastChar))
+                               {
+                                       AH->sqlparse.state = SQL_IN_DOLLAR_TAG;
+                                       /* initialize separate buffer with possible tag */
+                                       if (AH->sqlparse.tagBuf == NULL)
+                                               AH->sqlparse.tagBuf = createPQExpBuffer();
+                                       else
+                                               resetPQExpBuffer(AH->sqlparse.tagBuf);
+                                       appendPQExpBufferChar(AH->sqlparse.tagBuf, *qry);
+                               }
+                               else if (*qry == '-' && AH->sqlparse.lastChar == '-')
+                                       AH->sqlparse.state = SQL_IN_SQL_COMMENT;
+                               else if (*qry == '*' && AH->sqlparse.lastChar == '/')
+                                       AH->sqlparse.state = SQL_IN_EXT_COMMENT;
+                               else if (*qry == '(')
+                                       AH->sqlparse.braceDepth++;
+                               else if (*qry == ')')
+                                       AH->sqlparse.braceDepth--;
+                               break;
+
+                       case SQL_IN_SQL_COMMENT:
+                               if (*qry == '\n')
+                                       AH->sqlparse.state = SQL_SCAN;
+                               break;
+
+                       case SQL_IN_EXT_COMMENT:
+                               /*
+                                * This isn't fully correct, because we don't account for
+                                * nested slash-stars, but pg_dump never emits such.
+                                */
+                               if (AH->sqlparse.lastChar == '*' && *qry == '/')
+                                       AH->sqlparse.state = SQL_SCAN;
+                               break;
+
+                       case SQL_IN_SINGLE_QUOTE:
+                               /* We needn't handle '' specially */
+                               if (*qry == '\'' && !AH->sqlparse.backSlash)
+                                       AH->sqlparse.state = SQL_SCAN;
+                               else if (*qry == '\\')
+                                       AH->sqlparse.backSlash = !AH->sqlparse.backSlash;
+                               else
+                                       AH->sqlparse.backSlash = false;
+                               break;
+
+                       case SQL_IN_E_QUOTE:
+                               /*
+                                * Eventually we will need to handle '' specially, because
+                                * after E'...''... we should still be in E_QUOTE state.
+                                *
+                                * XXX problem: how do we tell whether the dump was made
+                                * by a version that thinks backslashes aren't special
+                                * in non-E literals??
+                                */
+                               if (*qry == '\'' && !AH->sqlparse.backSlash)
+                                       AH->sqlparse.state = SQL_SCAN;
+                               else if (*qry == '\\')
+                                       AH->sqlparse.backSlash = !AH->sqlparse.backSlash;
+                               else
+                                       AH->sqlparse.backSlash = false;
+                               break;
+
+                       case SQL_IN_DOUBLE_QUOTE:
+                               /* We needn't handle "" specially */
+                               if (*qry == '"')
+                                       AH->sqlparse.state = SQL_SCAN;
+                               break;
+
+                       case SQL_IN_DOLLAR_TAG:
+                               if (*qry == '$')
+                               {
+                                       /* Do not add the closing $ to tagBuf */
+                                       AH->sqlparse.state = SQL_IN_DOLLAR_QUOTE;
+                                       AH->sqlparse.minTagEndPos = AH->sqlBuf->len + AH->sqlparse.tagBuf->len + 1;
+                               }
+                               else if (_isDQChar(*qry, (AH->sqlparse.tagBuf->len == 1)))
+                               {
+                                       /* Valid, so add to tag */
+                                       appendPQExpBufferChar(AH->sqlparse.tagBuf, *qry);
+                               }
+                               else
+                               {
+                                       /*
+                                        * Ooops, we're not really in a dollar-tag.  Valid tag
+                                        * chars do not include the various chars we look for
+                                        * in this state machine, so it's safe to just jump
+                                        * from this state back to SCAN.  We have to back up
+                                        * the qry pointer so that the current character gets
+                                        * rescanned in SCAN state; and then "continue" so that
+                                        * the bottom-of-loop actions aren't done yet.
+                                        */
+                                       AH->sqlparse.state = SQL_SCAN;
+                                       qry--;
+                                       continue;
+                               }
+                               break;
+
+                       case SQL_IN_DOLLAR_QUOTE:
+                               /*
+                                * If we are at a $, see whether what precedes it matches
+                                * tagBuf.  (Remember that the trailing $ of the tag was
+                                * not added to tagBuf.)  However, don't compare until we
+                                * have enough data to be a possible match --- this is
+                                * needed to avoid false match on '$a$a$...'
+                                */
+                               if (*qry == '$' &&
+                                       AH->sqlBuf->len >= AH->sqlparse.minTagEndPos &&
+                                       strcmp(AH->sqlparse.tagBuf->data,
+                                                  AH->sqlBuf->data + AH->sqlBuf->len - AH->sqlparse.tagBuf->len) == 0)
+                                       AH->sqlparse.state = SQL_SCAN;
+                               break;
+               }
 
-               AH->sqlparse.lastChar = qry[pos];
-               /* fprintf(stderr, "\n"); */
+               appendPQExpBufferChar(AH->sqlBuf, *qry);
+               AH->sqlparse.lastChar = *qry;
        }
 
        /*
-        * If we get here, we've processed entire string with no complete SQL
+        * If we get here, we've processed entire bufferload with no complete SQL
         * stmt
         */
        return eos;
@@ -673,7 +645,7 @@ CommitTransaction(ArchiveHandle *AH)
        destroyPQExpBuffer(qry);
 }
 
-static int
+static bool
 _isIdentChar(unsigned char c)
 {
        if ((c >= 'a' && c <= 'z')
@@ -684,22 +656,22 @@ _isIdentChar(unsigned char c)
                || (c >= (unsigned char) '\200')                /* no need to check <=
                                                                                                 * \377 */
                )
-               return 1;
+               return true;
        else
-               return 0;
+               return false;
 }
 
-static int
-_isDQChar(unsigned char c, int atStart)
+static bool
+_isDQChar(unsigned char c, bool atStart)
 {
        if ((c >= 'a' && c <= 'z')
                || (c >= 'A' && c <= 'Z')
                || (c == '_')
-               || (atStart == 0 && c >= '0' && c <= '9')
+               || (!atStart && c >= '0' && c <= '9')
                || (c >= (unsigned char) '\200')                /* no need to check <=
                                                                                                 * \377 */
                )
-               return 1;
+               return true;
        else
-               return 0;
+               return false;
 }