From 6545a901aaf84cb05212bb6a7674059908f527c3 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Thu, 28 Jul 2011 14:06:57 -0400 Subject: [PATCH] Fix pg_restore's direct-to-database mode for standard_conforming_strings. pg_backup_db.c contained a mini SQL lexer with which it tried to identify boundaries between SQL commands, but that code was not designed to cope with standard_conforming_strings, and would get the wrong answer if a backslash immediately precedes a closing single quote in such a string, as per report from Julian Mehnle. The bug only affects direct-to-database restores from archive files made with standard_conforming_strings = on. Rather than complicating the code some more to try to fix that, let's just rip it all out. The only reason it was needed was to cope with COPY data embedded into ordinary archive entries, which was a layout that was used only for about the first three weeks of the archive format's existence, and never in any production release of pg_dump. Instead, just rely on the archive file layout to tell us whether we're printing COPY data or not. This bug represents a data corruption hazard in all releases in which standard_conforming_strings can be turned on, ie 8.2 and later, so back-patch to all supported branches. --- src/bin/pg_dump/pg_backup_archiver.c | 47 ++-- src/bin/pg_dump/pg_backup_archiver.h | 27 -- src/bin/pg_dump/pg_backup_db.c | 375 ++++----------------------- src/bin/pg_dump/pg_backup_db.h | 4 +- 4 files changed, 67 insertions(+), 386 deletions(-) diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c index 0e1037cf00..c57799e84f 100644 --- a/src/bin/pg_dump/pg_backup_archiver.c +++ b/src/bin/pg_dump/pg_backup_archiver.c @@ -117,6 +117,7 @@ static TocEntry *getTocEntryByDumpId(ArchiveHandle *AH, DumpId id); static void _moveBefore(ArchiveHandle *AH, TocEntry *pos, TocEntry *te); static int _discoverArchiveFormat(ArchiveHandle *AH); +static int RestoringToDB(ArchiveHandle *AH); static void dump_lo_buf(ArchiveHandle *AH); static void _write_msg(const char *modulename, const char *fmt, va_list ap); static void _die_horribly(ArchiveHandle *AH, const char *modulename, const char *fmt, va_list ap); @@ -589,13 +590,7 @@ restore_toc_entry(ArchiveHandle *AH, TocEntry *te, } /* - * If we have a copy statement, use it. As of V1.3, these - * are separate to allow easy import from withing a - * database connection. Pre 1.3 archives can not use DB - * connections and are sent to output only. - * - * For V1.3+, the table data MUST have a copy statement so - * that we can go into appropriate mode with libpq. + * If we have a copy statement, use it. */ if (te->copyStmt && strlen(te->copyStmt) > 0) { @@ -605,7 +600,15 @@ restore_toc_entry(ArchiveHandle *AH, TocEntry *te, (*AH->PrintTocDataPtr) (AH, te, ropt); - AH->writingCopyData = false; + /* + * Terminate COPY if needed. + */ + if (AH->writingCopyData) + { + if (RestoringToDB(AH)) + EndDBCopyMode(AH, te); + AH->writingCopyData = false; + } /* close out the transaction started above */ if (is_parallel && te->created) @@ -1248,17 +1251,13 @@ ahprintf(ArchiveHandle *AH, const char *fmt,...) { char *p = NULL; va_list ap; - int bSize = strlen(fmt) + 256; /* Should be enough */ + int bSize = strlen(fmt) + 256; /* Usually enough */ int cnt = -1; /* * This is paranoid: deal with the possibility that vsnprintf is willing - * to ignore trailing null - */ - - /* - * or returns > 0 even if string does not fit. It may be the case that it - * returns cnt = bufsize + * to ignore trailing null or returns > 0 even if string does not fit. + * It may be the case that it returns cnt = bufsize. */ while (cnt < 0 || cnt >= (bSize - 1)) { @@ -1340,7 +1339,7 @@ dump_lo_buf(ArchiveHandle *AH) /* - * Write buffer to the output file (usually stdout). This is user for + * Write buffer to the output file (usually stdout). This is used for * outputting 'restore' scripts etc. It is even possible for an archive * format to create a custom output routine to 'fake' a restore if it * wants to generate a script (see TAR output). @@ -1392,7 +1391,7 @@ ahwrite(const void *ptr, size_t size, size_t nmemb, ArchiveHandle *AH) * connected then send it to the DB. */ if (RestoringToDB(AH)) - return ExecuteSqlCommandBuf(AH, (void *) ptr, size * nmemb); /* Always 1, currently */ + return ExecuteSqlCommandBuf(AH, (const char *) ptr, size * nmemb); else { res = fwrite((void *) ptr, size, nmemb, AH->OF); @@ -1985,9 +1984,6 @@ _allocAH(const char *FileSpec, const ArchiveFormat fmt, AH->mode = mode; AH->compression = compression; - AH->pgCopyBuf = createPQExpBuffer(); - AH->sqlBuf = createPQExpBuffer(); - /* Open stdout with no compression for AH output handle */ AH->gzOut = 0; AH->OF = stdout; @@ -4199,10 +4195,7 @@ CloneArchive(ArchiveHandle *AH) die_horribly(AH, modulename, "out of memory\n"); memcpy(clone, AH, sizeof(ArchiveHandle)); - /* Handle format-independent fields */ - clone->pgCopyBuf = createPQExpBuffer(); - clone->sqlBuf = createPQExpBuffer(); - clone->sqlparse.tagBuf = NULL; + /* Handle format-independent fields ... none at the moment */ /* The clone will have its own connection, so disregard connection state */ clone->connection = NULL; @@ -4235,11 +4228,7 @@ DeCloneArchive(ArchiveHandle *AH) /* Clear format-specific state */ (AH->DeClonePtr) (AH); - /* Clear state allocated by CloneArchive */ - destroyPQExpBuffer(AH->pgCopyBuf); - destroyPQExpBuffer(AH->sqlBuf); - if (AH->sqlparse.tagBuf) - destroyPQExpBuffer(AH->sqlparse.tagBuf); + /* Clear state allocated by CloneArchive ... none at the moment */ /* Clear any connection-local state */ if (AH->currUser) diff --git a/src/bin/pg_dump/pg_backup_archiver.h b/src/bin/pg_dump/pg_backup_archiver.h index f9af8742cc..8a3a6f9e22 100644 --- a/src/bin/pg_dump/pg_backup_archiver.h +++ b/src/bin/pg_dump/pg_backup_archiver.h @@ -132,28 +132,6 @@ typedef void (*DeClonePtr) (struct _archiveHandle * AH); typedef size_t (*CustomOutPtr) (struct _archiveHandle * AH, const void *buf, size_t len); -typedef enum -{ - SQL_SCAN = 0, /* normal */ - SQL_IN_SQL_COMMENT, /* -- comment */ - SQL_IN_EXT_COMMENT, /* slash-star comment */ - SQL_IN_SINGLE_QUOTE, /* '...' literal */ - SQL_IN_E_QUOTE, /* E'...' literal */ - SQL_IN_DOUBLE_QUOTE, /* "..." identifier */ - SQL_IN_DOLLAR_TAG, /* possible dollar-quote starting tag */ - SQL_IN_DOLLAR_QUOTE /* body of dollar quote */ -} sqlparseState; - -typedef struct -{ - sqlparseState state; /* see above */ - char lastChar; /* preceding char, or '\0' initially */ - bool backSlash; /* next char is backslash quoted? */ - int braceDepth; /* parenthesis nesting depth */ - PQExpBuffer tagBuf; /* dollar quote tag (NULL if not created) */ - int minTagEndPos; /* first possible end position of $-quote */ -} sqlparseInfo; - typedef enum { STAGE_NONE = 0, @@ -189,9 +167,6 @@ typedef struct _archiveHandle * Added V1.7 */ ArchiveFormat format; /* Archive format */ - sqlparseInfo sqlparse; - PQExpBuffer sqlBuf; - time_t createDate; /* Date archive created */ /* @@ -244,8 +219,6 @@ typedef struct _archiveHandle * required */ bool writingCopyData; /* True when we are sending COPY data */ bool pgCopyIn; /* Currently in libpq 'COPY IN' mode. */ - PQExpBuffer pgCopyBuf; /* Left-over data from incomplete lines in - * COPY IN */ int loFd; /* BLOB fd */ int writingBlob; /* Flag */ diff --git a/src/bin/pg_dump/pg_backup_db.c b/src/bin/pg_dump/pg_backup_db.c index cc3882c46c..600728d198 100644 --- a/src/bin/pg_dump/pg_backup_db.c +++ b/src/bin/pg_dump/pg_backup_db.c @@ -14,26 +14,19 @@ #include "dumputils.h" #include - #include - #ifdef HAVE_TERMIOS_H #include #endif +#define DB_MAX_ERR_STMT 128 + static const char *modulename = gettext_noop("archiver (db)"); static void _check_database_version(ArchiveHandle *AH); static PGconn *_connectDB(ArchiveHandle *AH, const char *newdbname, const char *newUser); static void notice_processor(void *arg, const char *message); -static char *_sendSQLLine(ArchiveHandle *AH, char *qry, char *eos); -static char *_sendCopyLine(ArchiveHandle *AH, char *qry, char *eos); - -static bool _isIdentChar(unsigned char c); -static bool _isDQChar(unsigned char c, bool atStart); - -#define DB_MAX_ERR_STMT 128 static int _parse_version(ArchiveHandle *AH, const char *versionString) @@ -330,8 +323,10 @@ notice_processor(void *arg, const char *message) } -/* Public interface */ -/* Convenience function to send a query. Monitors result to handle COPY statements */ +/* + * Convenience function to send a query. + * Monitors result to detect COPY statements + */ static void ExecuteSqlCommand(ArchiveHandle *AH, const char *qry, const char *desc) { @@ -348,6 +343,7 @@ ExecuteSqlCommand(ArchiveHandle *AH, const char *qry, const char *desc) { case PGRES_COMMAND_OK: case PGRES_TUPLES_OK: + case PGRES_EMPTY_QUERY: /* A-OK */ break; case PGRES_COPY_IN: @@ -372,78 +368,56 @@ ExecuteSqlCommand(ArchiveHandle *AH, const char *qry, const char *desc) PQclear(res); } + /* - * Used by ExecuteSqlCommandBuf to send one buffered line when running a COPY command. + * Implement ahwrite() for direct-to-DB restore */ -static char * -_sendCopyLine(ArchiveHandle *AH, char *qry, char *eos) +int +ExecuteSqlCommandBuf(ArchiveHandle *AH, const char *buf, size_t bufLen) { - size_t loc; /* Location of next newline */ - int pos = 0; /* Current position */ - int sPos = 0; /* Last pos of a slash char */ - int isEnd = 0; - - /* loop to find unquoted newline ending the line of COPY data */ - for (;;) + if (AH->writingCopyData) { - loc = strcspn(&qry[pos], "\n") + pos; - - /* If no match, then wait */ - if (loc >= (eos - qry)) /* None found */ - { - appendBinaryPQExpBuffer(AH->pgCopyBuf, qry, (eos - qry)); - return eos; - } - /* - * fprintf(stderr, "Found cr at %d, prev char was %c, next was %c\n", - * loc, qry[loc-1], qry[loc+1]); + * We drop the data on the floor if libpq has failed to enter COPY + * mode; this allows us to behave reasonably when trying to continue + * after an error in a COPY command. */ - - /* Count the number of preceding slashes */ - sPos = loc; - while (sPos > 0 && qry[sPos - 1] == '\\') - sPos--; - - sPos = loc - sPos; - + if (AH->pgCopyIn && + PQputCopyData(AH->connection, buf, bufLen) <= 0) + die_horribly(AH, modulename, "error returned by PQputCopyData: %s", + PQerrorMessage(AH->connection)); + } + else + { /* - * If an odd number of preceding slashes, then \n was escaped so set - * the next search pos, and loop (if any left). + * In most cases the data passed to us will be a null-terminated + * string, but if it's not, we have to add a trailing null. */ - if ((sPos & 1) == 1) + if (buf[bufLen] == '\0') + ExecuteSqlCommand(AH, buf, "could not execute query"); + else { - /* fprintf(stderr, "cr was escaped\n"); */ - pos = loc + 1; - if (pos >= (eos - qry)) - { - appendBinaryPQExpBuffer(AH->pgCopyBuf, qry, (eos - qry)); - return eos; - } + char *str = (char *) malloc(bufLen + 1); + + if (!str) + die_horribly(AH, modulename, "out of memory\n"); + memcpy(str, buf, bufLen); + str[bufLen] = '\0'; + ExecuteSqlCommand(AH, str, "could not execute query"); + free(str); } - else - break; } - /* We found an unquoted newline */ - qry[loc] = '\0'; - appendPQExpBuffer(AH->pgCopyBuf, "%s\n", qry); - isEnd = (strcmp(AH->pgCopyBuf->data, "\\.\n") == 0); - - /* - * Note that we drop the data on the floor if libpq has failed to enter - * COPY mode; this allows us to behave reasonably when trying to continue - * after an error in a COPY command. - */ - if (AH->pgCopyIn && - PQputCopyData(AH->connection, AH->pgCopyBuf->data, - AH->pgCopyBuf->len) <= 0) - die_horribly(AH, modulename, "error returned by PQputCopyData: %s", - PQerrorMessage(AH->connection)); - - resetPQExpBuffer(AH->pgCopyBuf); + return 1; +} - if (isEnd && AH->pgCopyIn) +/* + * Terminate a COPY operation during direct-to-DB restore + */ +void +EndDBCopyMode(ArchiveHandle *AH, TocEntry *te) +{ + if (AH->pgCopyIn) { PGresult *res; @@ -454,240 +428,12 @@ _sendCopyLine(ArchiveHandle *AH, char *qry, char *eos) /* Check command status and return to normal libpq state */ res = PQgetResult(AH->connection); if (PQresultStatus(res) != PGRES_COMMAND_OK) - warn_or_die_horribly(AH, modulename, "COPY failed: %s", - PQerrorMessage(AH->connection)); + warn_or_die_horribly(AH, modulename, "COPY failed for table \"%s\": %s", + te->tag, PQerrorMessage(AH->connection)); PQclear(res); AH->pgCopyIn = false; } - - return qry + loc + 1; -} - -/* - * Used by ExecuteSqlCommandBuf to send one buffered line of SQL - * (not data for the copy command). - */ -static char * -_sendSQLLine(ArchiveHandle *AH, char *qry, char *eos) -{ - /* - * The following is a mini state machine to assess the end of an SQL - * statement. It really only needs to parse good SQL, or at least that's - * the theory... End-of-statement is assumed to be an unquoted, - * un-commented semi-colon that's not within any parentheses. - * - * Note: the input can be split into bufferloads at arbitrary boundaries. - * Therefore all state must be kept in AH->sqlparse, not in local - * variables of this routine. We assume that AH->sqlparse was filled with - * zeroes when created. - */ - for (; qry < eos; qry++) - { - switch (AH->sqlparse.state) - { - case SQL_SCAN: /* Default state == 0, set in _allocAH */ - if (*qry == ';' && AH->sqlparse.braceDepth == 0) - { - /* - * We've found the end of a statement. Send it and reset - * the buffer. - */ - appendPQExpBufferChar(AH->sqlBuf, ';'); /* inessential */ - ExecuteSqlCommand(AH, AH->sqlBuf->data, - "could not execute query"); - resetPQExpBuffer(AH->sqlBuf); - AH->sqlparse.lastChar = '\0'; - - /* - * Remove any following newlines - so that embedded COPY - * commands don't get a starting newline. - */ - qry++; - while (qry < eos && *qry == '\n') - qry++; - - /* We've finished one line, so exit */ - return qry; - } - else if (*qry == '\'') - { - if (AH->sqlparse.lastChar == 'E') - AH->sqlparse.state = SQL_IN_E_QUOTE; - else - AH->sqlparse.state = SQL_IN_SINGLE_QUOTE; - AH->sqlparse.backSlash = false; - } - else if (*qry == '"') - { - AH->sqlparse.state = SQL_IN_DOUBLE_QUOTE; - } - - /* - * Look for dollar-quotes. We make the assumption that - * $-quotes will not have an ident character just before them - * in pg_dump output. XXX is this good enough? - */ - else if (*qry == '$' && !_isIdentChar(AH->sqlparse.lastChar)) - { - AH->sqlparse.state = SQL_IN_DOLLAR_TAG; - /* initialize separate buffer with possible tag */ - if (AH->sqlparse.tagBuf == NULL) - AH->sqlparse.tagBuf = createPQExpBuffer(); - else - resetPQExpBuffer(AH->sqlparse.tagBuf); - appendPQExpBufferChar(AH->sqlparse.tagBuf, *qry); - } - else if (*qry == '-' && AH->sqlparse.lastChar == '-') - AH->sqlparse.state = SQL_IN_SQL_COMMENT; - else if (*qry == '*' && AH->sqlparse.lastChar == '/') - AH->sqlparse.state = SQL_IN_EXT_COMMENT; - else if (*qry == '(') - AH->sqlparse.braceDepth++; - else if (*qry == ')') - AH->sqlparse.braceDepth--; - break; - - case SQL_IN_SQL_COMMENT: - if (*qry == '\n') - AH->sqlparse.state = SQL_SCAN; - break; - - case SQL_IN_EXT_COMMENT: - - /* - * This isn't fully correct, because we don't account for - * nested slash-stars, but pg_dump never emits such. - */ - if (AH->sqlparse.lastChar == '*' && *qry == '/') - AH->sqlparse.state = SQL_SCAN; - break; - - case SQL_IN_SINGLE_QUOTE: - /* We needn't handle '' specially */ - if (*qry == '\'' && !AH->sqlparse.backSlash) - AH->sqlparse.state = SQL_SCAN; - else if (*qry == '\\') - AH->sqlparse.backSlash = !AH->sqlparse.backSlash; - else - AH->sqlparse.backSlash = false; - break; - - case SQL_IN_E_QUOTE: - - /* - * Eventually we will need to handle '' specially, because - * after E'...''... we should still be in E_QUOTE state. - * - * XXX problem: how do we tell whether the dump was made by a - * version that thinks backslashes aren't special in non-E - * literals?? - */ - if (*qry == '\'' && !AH->sqlparse.backSlash) - AH->sqlparse.state = SQL_SCAN; - else if (*qry == '\\') - AH->sqlparse.backSlash = !AH->sqlparse.backSlash; - else - AH->sqlparse.backSlash = false; - break; - - case SQL_IN_DOUBLE_QUOTE: - /* We needn't handle "" specially */ - if (*qry == '"') - AH->sqlparse.state = SQL_SCAN; - break; - - case SQL_IN_DOLLAR_TAG: - if (*qry == '$') - { - /* Do not add the closing $ to tagBuf */ - AH->sqlparse.state = SQL_IN_DOLLAR_QUOTE; - AH->sqlparse.minTagEndPos = AH->sqlBuf->len + AH->sqlparse.tagBuf->len + 1; - } - else if (_isDQChar(*qry, (AH->sqlparse.tagBuf->len == 1))) - { - /* Valid, so add to tag */ - appendPQExpBufferChar(AH->sqlparse.tagBuf, *qry); - } - else - { - /* - * Ooops, we're not really in a dollar-tag. Valid tag - * chars do not include the various chars we look for in - * this state machine, so it's safe to just jump from this - * state back to SCAN. We have to back up the qry pointer - * so that the current character gets rescanned in SCAN - * state; and then "continue" so that the bottom-of-loop - * actions aren't done yet. - */ - AH->sqlparse.state = SQL_SCAN; - qry--; - continue; - } - break; - - case SQL_IN_DOLLAR_QUOTE: - - /* - * If we are at a $, see whether what precedes it matches - * tagBuf. (Remember that the trailing $ of the tag was not - * added to tagBuf.) However, don't compare until we have - * enough data to be a possible match --- this is needed to - * avoid false match on '$a$a$...' - */ - if (*qry == '$' && - AH->sqlBuf->len >= AH->sqlparse.minTagEndPos && - strcmp(AH->sqlparse.tagBuf->data, - AH->sqlBuf->data + AH->sqlBuf->len - AH->sqlparse.tagBuf->len) == 0) - AH->sqlparse.state = SQL_SCAN; - break; - } - - appendPQExpBufferChar(AH->sqlBuf, *qry); - AH->sqlparse.lastChar = *qry; - } - - /* - * If we get here, we've processed entire bufferload with no complete SQL - * stmt - */ - return eos; -} - - -/* Convenience function to send one or more queries. Monitors result to handle COPY statements */ -int -ExecuteSqlCommandBuf(ArchiveHandle *AH, void *qryv, size_t bufLen) -{ - char *qry = (char *) qryv; - char *eos = qry + bufLen; - - /* - * fprintf(stderr, "\n\n*****\n Buffer:\n\n%s\n*******************\n\n", - * qry); - */ - - /* Could switch between command and COPY IN mode at each line */ - while (qry < eos) - { - /* - * If libpq is in CopyIn mode *or* if the archive structure shows we - * are sending COPY data, treat the data as COPY data. The pgCopyIn - * check is only needed for backwards compatibility with ancient - * archive files that might just issue a COPY command without marking - * it properly. Note that in an archive entry that has a copyStmt, - * all data up to the end of the entry will go to _sendCopyLine, and - * therefore will be dropped if libpq has failed to enter COPY mode. - * Also, if a "\." data terminator is found, anything remaining in the - * archive entry will be dropped. - */ - if (AH->pgCopyIn || AH->writingCopyData) - qry = _sendCopyLine(AH, qry, eos); - else - qry = _sendSQLLine(AH, qry, eos); - } - - return 1; } void @@ -728,32 +474,3 @@ DropBlobIfExists(ArchiveHandle *AH, Oid oid) oid, oid); } } - -static bool -_isIdentChar(unsigned char c) -{ - if ((c >= 'a' && c <= 'z') - || (c >= 'A' && c <= 'Z') - || (c >= '0' && c <= '9') - || (c == '_') - || (c == '$') - || (c >= (unsigned char) '\200') /* no need to check <= \377 */ - ) - return true; - else - return false; -} - -static bool -_isDQChar(unsigned char c, bool atStart) -{ - if ((c >= 'a' && c <= 'z') - || (c >= 'A' && c <= 'Z') - || (c == '_') - || (!atStart && c >= '0' && c <= '9') - || (c >= (unsigned char) '\200') /* no need to check <= \377 */ - ) - return true; - else - return false; -} diff --git a/src/bin/pg_dump/pg_backup_db.h b/src/bin/pg_dump/pg_backup_db.h index 93970b5a91..a1a84825e4 100644 --- a/src/bin/pg_dump/pg_backup_db.h +++ b/src/bin/pg_dump/pg_backup_db.h @@ -10,7 +10,9 @@ #include "pg_backup_archiver.h" -extern int ExecuteSqlCommandBuf(ArchiveHandle *AH, void *qry, size_t bufLen); +extern int ExecuteSqlCommandBuf(ArchiveHandle *AH, const char *buf, size_t bufLen); + +extern void EndDBCopyMode(ArchiveHandle *AH, struct _tocEntry * te); extern void StartTransaction(ArchiveHandle *AH); extern void CommitTransaction(ArchiveHandle *AH); -- 2.40.0