X-Git-Url: https://granicus.if.org/sourcecode?a=blobdiff_plain;f=src%2Fbin%2Fpg_dump%2Fpg_backup_archiver.c;h=64d8d93dda81805540ac70dbd7afcf11390c19fc;hb=0d692a0dc9f0e532c67c577187fe5d7d323cb95b;hp=7c36aef5484c13fba267ca85d195a2b7540b9083;hpb=68528d37bbfbb3ae8dc83418f3e1d343c4050f0a;p=postgresql diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c index 7c36aef548..64d8d93dda 100644 --- a/src/bin/pg_dump/pg_backup_archiver.c +++ b/src/bin/pg_dump/pg_backup_archiver.c @@ -15,7 +15,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/bin/pg_dump/pg_backup_archiver.c,v 1.153 2008/03/20 17:36:57 tgl Exp $ + * src/bin/pg_dump/pg_backup_archiver.c * *------------------------------------------------------------------------- */ @@ -24,8 +24,9 @@ #include "dumputils.h" #include - #include +#include +#include #ifdef WIN32 #include @@ -33,11 +34,55 @@ #include "libpq/libpq-fs.h" +/* + * Special exit values from worker children. We reserve 0 for normal + * success; 1 and other small values should be interpreted as crashes. + */ +#define WORKER_CREATE_DONE 10 +#define WORKER_INHIBIT_DATA 11 +#define WORKER_IGNORED_ERRORS 12 + +/* + * Unix uses exit to return result from worker child, so function is void. + * Windows thread result comes via function return. + */ +#ifndef WIN32 +#define parallel_restore_result void +#else +#define parallel_restore_result DWORD +#endif + +/* IDs for worker children are either PIDs or thread handles */ +#ifndef WIN32 +#define thandle pid_t +#else +#define thandle HANDLE +#endif + +/* Arguments needed for a worker child */ +typedef struct _restore_args +{ + ArchiveHandle *AH; + TocEntry *te; +} RestoreArgs; + +/* State for each parallel activity slot */ +typedef struct _parallel_slot +{ + thandle child_id; + RestoreArgs *args; +} ParallelSlot; + +#define NO_SLOT (-1) const char *progname; static const char *modulename = gettext_noop("archiver"); +/* index array created by fix_dependencies -- only used in parallel restore */ +static TocEntry **tocsByDumpId; /* index by dumpId - 1 */ +static DumpId maxDumpId; /* length of above array */ + static ArchiveHandle *_allocAH(const char *FileSpec, const ArchiveFormat fmt, const int compression, ArchiveMode mode); @@ -57,10 +102,11 @@ static void _selectTablespace(ArchiveHandle *AH, const char *tablespace); static void processEncodingEntry(ArchiveHandle *AH, TocEntry *te); static void processStdStringsEntry(ArchiveHandle *AH, TocEntry *te); static teReqs _tocEntryRequired(TocEntry *te, RestoreOptions *ropt, bool include_acls); +static bool _tocEntryIsACL(TocEntry *te); static void _disableTriggersIfNecessary(ArchiveHandle *AH, TocEntry *te, RestoreOptions *ropt); static void _enableTriggersIfNecessary(ArchiveHandle *AH, TocEntry *te, RestoreOptions *ropt); static TocEntry *getTocEntryByDumpId(ArchiveHandle *AH, DumpId id); -static void _moveAfter(ArchiveHandle *AH, TocEntry *pos, TocEntry *te); +static void _moveBefore(ArchiveHandle *AH, TocEntry *pos, TocEntry *te); static int _discoverArchiveFormat(ArchiveHandle *AH); static void dump_lo_buf(ArchiveHandle *AH); @@ -71,6 +117,35 @@ static void dumpTimestamp(ArchiveHandle *AH, const char *msg, time_t tim); static OutputContext SetOutput(ArchiveHandle *AH, char *filename, int compression); static void ResetOutput(ArchiveHandle *AH, OutputContext savedContext); +static int restore_toc_entry(ArchiveHandle *AH, TocEntry *te, + RestoreOptions *ropt, bool is_parallel); +static void restore_toc_entries_parallel(ArchiveHandle *AH); +static thandle spawn_restore(RestoreArgs *args); +static thandle reap_child(ParallelSlot *slots, int n_slots, int *work_status); +static bool work_in_progress(ParallelSlot *slots, int n_slots); +static int get_next_slot(ParallelSlot *slots, int n_slots); +static void par_list_header_init(TocEntry *l); +static void par_list_append(TocEntry *l, TocEntry *te); +static void par_list_remove(TocEntry *te); +static TocEntry *get_next_work_item(ArchiveHandle *AH, + TocEntry *ready_list, + ParallelSlot *slots, int n_slots); +static parallel_restore_result parallel_restore(RestoreArgs *args); +static void mark_work_done(ArchiveHandle *AH, TocEntry *ready_list, + thandle worker, int status, + ParallelSlot *slots, int n_slots); +static void fix_dependencies(ArchiveHandle *AH); +static bool has_lock_conflicts(TocEntry *te1, TocEntry *te2); +static void repoint_table_dependencies(ArchiveHandle *AH, + DumpId tableId, DumpId tableDataId); +static void identify_locking_dependencies(TocEntry *te); +static void reduce_dependencies(ArchiveHandle *AH, TocEntry *te, + TocEntry *ready_list); +static void mark_create_done(ArchiveHandle *AH, TocEntry *te); +static void inhibit_data_for_failed_table(ArchiveHandle *AH, TocEntry *te); +static ArchiveHandle *CloneArchive(ArchiveHandle *AH); +static void DeCloneArchive(ArchiveHandle *AH); + /* * Wrapper functions. @@ -131,7 +206,6 @@ RestoreArchive(Archive *AHX, RestoreOptions *ropt) TocEntry *te; teReqs reqs; OutputContext sav; - bool defnDumped; AH->ropt = ropt; AH->stage = STAGE_INITIALIZING; @@ -139,14 +213,36 @@ RestoreArchive(Archive *AHX, RestoreOptions *ropt) /* * Check for nonsensical option combinations. * - * NB: create+dropSchema is useless because if you're creating the DB, + * NB: createDB+dropSchema is useless because if you're creating the DB, * there's no need to drop individual items in it. Moreover, if we tried * to do that then we'd issue the drops in the database initially * connected to, not the one we will create, which is very bad... */ - if (ropt->create && ropt->dropSchema) + if (ropt->createDB && ropt->dropSchema) die_horribly(AH, modulename, "-C and -c are incompatible options\n"); + /* + * -C is not compatible with -1, because we can't create a database inside + * a transaction block. + */ + if (ropt->createDB && ropt->single_txn) + die_horribly(AH, modulename, "-C and -1 are incompatible options\n"); + + /* + * Make sure we won't need (de)compression we haven't got + */ +#ifndef HAVE_LIBZ + if (AH->compression != 0 && AH->PrintTocDataPtr !=NULL) + { + for (te = AH->toc->next; te != AH->toc; te = te->next) + { + reqs = _tocEntryRequired(te, ropt, false); + if (te->hadDumper && (reqs & REQ_DATA) != 0) + die_horribly(AH, modulename, "cannot restore from compressed archive (compression not supported in this installation)\n"); + } + } +#endif + /* * If we're using a DB connection, then connect it. */ @@ -162,7 +258,7 @@ RestoreArchive(Archive *AHX, RestoreOptions *ropt) ConnectDatabase(AHX, ropt->dbname, ropt->pghost, ropt->pgport, ropt->username, - ropt->requirePassword, ropt->ignoreVersion); + ropt->promptPassword); /* * If we're talking to the DB directly, don't send comments since they @@ -209,7 +305,15 @@ RestoreArchive(Archive *AHX, RestoreOptions *ropt) ahprintf(AH, "--\n-- PostgreSQL database dump\n--\n\n"); if (AH->public.verbose) + { + if (AH->archiveRemoteVersion) + ahprintf(AH, "-- Dumped from database version %s\n", + AH->archiveRemoteVersion); + if (AH->archiveDumpVersion) + ahprintf(AH, "-- Dumped by pg_dump version %s\n", + AH->archiveDumpVersion); dumpTimestamp(AH, "Started on", AH->createDate); + } if (ropt->single_txn) { @@ -236,9 +340,9 @@ RestoreArchive(Archive *AHX, RestoreOptions *ropt) AH->currentTE = te; reqs = _tocEntryRequired(te, ropt, false /* needn't drop ACLs */ ); - if (((reqs & REQ_SCHEMA) != 0) && te->dropStmt) + /* We want anything that's selected and has a dropStmt */ + if (((reqs & (REQ_SCHEMA | REQ_DATA)) != 0) && te->dropStmt) { - /* We want the schema */ ahlog(AH, 1, "dropping %s %s\n", te->desc, te->tag); /* Select owner and schema as necessary */ _becomeOwner(AH, te); @@ -262,148 +366,21 @@ RestoreArchive(Archive *AHX, RestoreOptions *ropt) */ if (AH->currSchema) free(AH->currSchema); - AH->currSchema = strdup(""); + AH->currSchema = NULL; } /* - * Now process each non-ACL TOC entry + * In serial mode, we now process each non-ACL TOC entry. + * + * In parallel mode, turn control over to the parallel-restore logic. */ - for (te = AH->toc->next; te != AH->toc; te = te->next) + if (ropt->number_of_jobs > 1 && ropt->useDB) + restore_toc_entries_parallel(AH); + else { - AH->currentTE = te; - - /* Work out what, if anything, we want from this entry */ - reqs = _tocEntryRequired(te, ropt, false); - - /* Dump any relevant dump warnings to stderr */ - if (!ropt->suppressDumpWarnings && strcmp(te->desc, "WARNING") == 0) - { - if (!ropt->dataOnly && te->defn != NULL && strlen(te->defn) != 0) - write_msg(modulename, "warning from original dump file: %s\n", te->defn); - else if (te->copyStmt != NULL && strlen(te->copyStmt) != 0) - write_msg(modulename, "warning from original dump file: %s\n", te->copyStmt); - } - - defnDumped = false; - - if ((reqs & REQ_SCHEMA) != 0) /* We want the schema */ - { - ahlog(AH, 1, "creating %s %s\n", te->desc, te->tag); - - _printTocEntry(AH, te, ropt, false, false); - defnDumped = true; - - /* - * If we could not create a table and --no-data-for-failed-tables - * was given, ignore the corresponding TABLE DATA - */ - if (ropt->noDataForFailedTables && - AH->lastErrorTE == te && - strcmp(te->desc, "TABLE") == 0) - { - TocEntry *tes; - - ahlog(AH, 1, "table \"%s\" could not be created, will not restore its data\n", - te->tag); - - for (tes = te->next; tes != AH->toc; tes = tes->next) - { - if (strcmp(tes->desc, "TABLE DATA") == 0 && - strcmp(tes->tag, te->tag) == 0 && - strcmp(tes->namespace ? tes->namespace : "", - te->namespace ? te->namespace : "") == 0) - { - /* mark it unwanted */ - ropt->idWanted[tes->dumpId - 1] = false; - break; - } - } - } - - /* If we created a DB, connect to it... */ - if (strcmp(te->desc, "DATABASE") == 0) - { - ahlog(AH, 1, "connecting to new database \"%s\"\n", te->tag); - _reconnectToDB(AH, te->tag); - } - } - - /* - * If we have a data component, then process it - */ - if ((reqs & REQ_DATA) != 0) - { - /* - * hadDumper will be set if there is genuine data component for - * this node. Otherwise, we need to check the defn field for - * statements that need to be executed in data-only restores. - */ - if (te->hadDumper) - { - /* - * If we can output the data, then restore it. - */ - if (AH->PrintTocDataPtr !=NULL && (reqs & REQ_DATA) != 0) - { -#ifndef HAVE_LIBZ - if (AH->compression != 0) - die_horribly(AH, modulename, "cannot restore from compressed archive (compression not supported in this installation)\n"); -#endif - - _printTocEntry(AH, te, ropt, true, false); - - if (strcmp(te->desc, "BLOBS") == 0 || - strcmp(te->desc, "BLOB COMMENTS") == 0) - { - ahlog(AH, 1, "restoring %s\n", te->desc); - - _selectOutputSchema(AH, "pg_catalog"); - - (*AH->PrintTocDataPtr) (AH, te, ropt); - } - else - { - _disableTriggersIfNecessary(AH, te, ropt); - - /* Select owner and schema as necessary */ - _becomeOwner(AH, te); - _selectOutputSchema(AH, te->namespace); - - ahlog(AH, 1, "restoring data for table \"%s\"\n", - te->tag); - - /* - * If we have a copy statement, use it. As of V1.3, - * these are separate to allow easy import from - * withing a database connection. Pre 1.3 archives can - * not use DB connections and are sent to output only. - * - * For V1.3+, the table data MUST have a copy - * statement so that we can go into appropriate mode - * with libpq. - */ - if (te->copyStmt && strlen(te->copyStmt) > 0) - { - ahprintf(AH, "%s", te->copyStmt); - AH->writingCopyData = true; - } - - (*AH->PrintTocDataPtr) (AH, te, ropt); - - AH->writingCopyData = false; - - _enableTriggersIfNecessary(AH, te, ropt); - } - } - } - else if (!defnDumped) - { - /* If we haven't already dumped the defn part, do so now */ - ahlog(AH, 1, "executing %s %s\n", te->desc, te->tag); - _printTocEntry(AH, te, ropt, false, false); - } - } - } /* end loop over TOC entries */ + for (te = AH->toc->next; te != AH->toc; te = te->next) + (void) restore_toc_entry(AH, te, ropt, false); + } /* * Scan TOC again to output ownership commands and ACLs @@ -415,7 +392,8 @@ RestoreArchive(Archive *AHX, RestoreOptions *ropt) /* Work out what, if anything, we want from this entry */ reqs = _tocEntryRequired(te, ropt, true); - if ((reqs & REQ_SCHEMA) != 0) /* We want the schema */ + /* Both schema and data objects might now have ownership/ACLs */ + if ((reqs & (REQ_SCHEMA | REQ_DATA)) != 0) { ahlog(AH, 1, "setting owner and privileges for %s %s\n", te->desc, te->tag); @@ -451,6 +429,193 @@ RestoreArchive(Archive *AHX, RestoreOptions *ropt) } } +/* + * Restore a single TOC item. Used in both parallel and non-parallel restore; + * is_parallel is true if we are in a worker child process. + * + * Returns 0 normally, but WORKER_CREATE_DONE or WORKER_INHIBIT_DATA if + * the parallel parent has to make the corresponding status update. + */ +static int +restore_toc_entry(ArchiveHandle *AH, TocEntry *te, + RestoreOptions *ropt, bool is_parallel) +{ + int retval = 0; + teReqs reqs; + bool defnDumped; + + AH->currentTE = te; + + /* Work out what, if anything, we want from this entry */ + reqs = _tocEntryRequired(te, ropt, false); + + /* Dump any relevant dump warnings to stderr */ + if (!ropt->suppressDumpWarnings && strcmp(te->desc, "WARNING") == 0) + { + if (!ropt->dataOnly && te->defn != NULL && strlen(te->defn) != 0) + write_msg(modulename, "warning from original dump file: %s\n", te->defn); + else if (te->copyStmt != NULL && strlen(te->copyStmt) != 0) + write_msg(modulename, "warning from original dump file: %s\n", te->copyStmt); + } + + defnDumped = false; + + if ((reqs & REQ_SCHEMA) != 0) /* We want the schema */ + { + ahlog(AH, 1, "creating %s %s\n", te->desc, te->tag); + + _printTocEntry(AH, te, ropt, false, false); + defnDumped = true; + + if (strcmp(te->desc, "TABLE") == 0) + { + if (AH->lastErrorTE == te) + { + /* + * We failed to create the table. If + * --no-data-for-failed-tables was given, mark the + * corresponding TABLE DATA to be ignored. + * + * In the parallel case this must be done in the parent, so we + * just set the return value. + */ + if (ropt->noDataForFailedTables) + { + if (is_parallel) + retval = WORKER_INHIBIT_DATA; + else + inhibit_data_for_failed_table(AH, te); + } + } + else + { + /* + * We created the table successfully. Mark the corresponding + * TABLE DATA for possible truncation. + * + * In the parallel case this must be done in the parent, so we + * just set the return value. + */ + if (is_parallel) + retval = WORKER_CREATE_DONE; + else + mark_create_done(AH, te); + } + } + + /* If we created a DB, connect to it... */ + if (strcmp(te->desc, "DATABASE") == 0) + { + ahlog(AH, 1, "connecting to new database \"%s\"\n", te->tag); + _reconnectToDB(AH, te->tag); + ropt->dbname = strdup(te->tag); + } + } + + /* + * If we have a data component, then process it + */ + if ((reqs & REQ_DATA) != 0) + { + /* + * hadDumper will be set if there is genuine data component for this + * node. Otherwise, we need to check the defn field for statements + * that need to be executed in data-only restores. + */ + if (te->hadDumper) + { + /* + * If we can output the data, then restore it. + */ + if (AH->PrintTocDataPtr !=NULL && (reqs & REQ_DATA) != 0) + { + _printTocEntry(AH, te, ropt, true, false); + + if (strcmp(te->desc, "BLOBS") == 0 || + strcmp(te->desc, "BLOB COMMENTS") == 0) + { + ahlog(AH, 1, "restoring %s\n", te->desc); + + _selectOutputSchema(AH, "pg_catalog"); + + (*AH->PrintTocDataPtr) (AH, te, ropt); + } + else + { + _disableTriggersIfNecessary(AH, te, ropt); + + /* Select owner and schema as necessary */ + _becomeOwner(AH, te); + _selectOutputSchema(AH, te->namespace); + + ahlog(AH, 1, "restoring data for table \"%s\"\n", + te->tag); + + /* + * In parallel restore, if we created the table earlier in + * the run then we wrap the COPY in a transaction and + * precede it with a TRUNCATE. If archiving is not on + * this prevents WAL-logging the COPY. This obtains a + * speedup similar to that from using single_txn mode in + * non-parallel restores. + */ + if (is_parallel && te->created) + { + /* + * Parallel restore is always talking directly to a + * server, so no need to see if we should issue BEGIN. + */ + StartTransaction(AH); + + /* + * If the server version is >= 8.4, make sure we issue + * TRUNCATE with ONLY so that child tables are not + * wiped. + */ + ahprintf(AH, "TRUNCATE TABLE %s%s;\n\n", + (PQserverVersion(AH->connection) >= 80400 ? + "ONLY " : ""), + fmtId(te->tag)); + } + + /* + * If we have a copy statement, use it. As of V1.3, these + * are separate to allow easy import from withing a + * database connection. Pre 1.3 archives can not use DB + * connections and are sent to output only. + * + * For V1.3+, the table data MUST have a copy statement so + * that we can go into appropriate mode with libpq. + */ + if (te->copyStmt && strlen(te->copyStmt) > 0) + { + ahprintf(AH, "%s", te->copyStmt); + AH->writingCopyData = true; + } + + (*AH->PrintTocDataPtr) (AH, te, ropt); + + AH->writingCopyData = false; + + /* close out the transaction started above */ + if (is_parallel && te->created) + CommitTransaction(AH); + + _enableTriggersIfNecessary(AH, te, ropt); + } + } + } + else if (!defnDumped) + { + /* If we haven't already dumped the defn part, do so now */ + ahlog(AH, 1, "executing %s %s\n", te->desc, te->tag); + _printTocEntry(AH, te, ropt, false, false); + } + } + + return retval; +} + /* * Allocate a new RestoreOptions block. * This is mainly so we can initialize it, but also for future expansion, @@ -462,9 +627,9 @@ NewRestoreOptions(void) opts = (RestoreOptions *) calloc(1, sizeof(RestoreOptions)); + /* set any fields that shouldn't default to zeroes */ opts->format = archUnknown; - opts->suppressDumpWarnings = false; - opts->exit_on_error = false; + opts->promptPassword = TRI_DEFAULT; return opts; } @@ -550,7 +715,8 @@ ArchiveEntry(Archive *AHX, const char *namespace, const char *tablespace, const char *owner, bool withOids, - const char *desc, const char *defn, + const char *desc, teSection section, + const char *defn, const char *dropStmt, const char *copyStmt, const DumpId *deps, int nDeps, DataDumperPtr dumpFn, void *dumpArg) @@ -573,6 +739,7 @@ ArchiveEntry(Archive *AHX, newToc->catalogId = catalogId; newToc->dumpId = dumpId; + newToc->section = section; newToc->tag = strdup(tag); newToc->namespace = namespace ? strdup(namespace) : NULL; @@ -611,7 +778,7 @@ void PrintTOCSummary(Archive *AHX, RestoreOptions *ropt) { ArchiveHandle *AH = (ArchiveHandle *) AHX; - TocEntry *te = AH->toc->next; + TocEntry *te; OutputContext sav; char *fmtName; @@ -650,14 +817,25 @@ PrintTOCSummary(Archive *AHX, RestoreOptions *ropt) ahprintf(AH, ";\n;\n; Selected TOC Entries:\n;\n"); - while (te != AH->toc) + /* We should print DATABASE entries whether or not -C was specified */ + ropt->createDB = 1; + + for (te = AH->toc->next; te != AH->toc; te = te->next) { - if (_tocEntryRequired(te, ropt, true) != 0) + if (ropt->verbose || _tocEntryRequired(te, ropt, true) != 0) ahprintf(AH, "%d; %u %u %s %s %s %s\n", te->dumpId, te->catalogId.tableoid, te->catalogId.oid, te->desc, te->namespace ? te->namespace : "-", te->tag, te->owner); - te = te->next; + if (ropt->verbose && te->nDeps > 0) + { + int i; + + ahprintf(AH, ";\tdepends on:"); + for (i = 0; i < te->nDeps; i++) + ahprintf(AH, " %d", te->dependencies[i]); + ahprintf(AH, "\n"); + } } if (ropt->filename) @@ -729,7 +907,10 @@ EndRestoreBlobs(ArchiveHandle *AH) ahprintf(AH, "COMMIT;\n\n"); } - ahlog(AH, 1, "restored %d large objects\n", AH->blobCount); + ahlog(AH, 1, ngettext("restored %d large object\n", + "restored %d large objects\n", + AH->blobCount), + AH->blobCount); } @@ -737,8 +918,9 @@ EndRestoreBlobs(ArchiveHandle *AH) * Called by a format handler to initiate restoration of a blob */ void -StartRestoreBlob(ArchiveHandle *AH, Oid oid) +StartRestoreBlob(ArchiveHandle *AH, Oid oid, bool drop) { + bool old_blob_style = (AH->version < K_VERS_1_12); Oid loOid; AH->blobCount++; @@ -748,20 +930,32 @@ StartRestoreBlob(ArchiveHandle *AH, Oid oid) ahlog(AH, 2, "restoring large object with OID %u\n", oid); + /* With an old archive we must do drop and create logic here */ + if (old_blob_style && drop) + DropBlobIfExists(AH, oid); + if (AH->connection) { - loOid = lo_create(AH->connection, oid); - if (loOid == 0 || loOid != oid) - die_horribly(AH, modulename, "could not create large object %u\n", - oid); - + if (old_blob_style) + { + loOid = lo_create(AH->connection, oid); + if (loOid == 0 || loOid != oid) + die_horribly(AH, modulename, "could not create large object %u: %s", + oid, PQerrorMessage(AH->connection)); + } AH->loFd = lo_open(AH->connection, oid, INV_WRITE); if (AH->loFd == -1) - die_horribly(AH, modulename, "could not open large object\n"); + die_horribly(AH, modulename, "could not open large object %u: %s", + oid, PQerrorMessage(AH->connection)); } else { - ahprintf(AH, "SELECT lo_open(lo_create(%u), %d);\n", oid, INV_WRITE); + if (old_blob_style) + ahprintf(AH, "SELECT pg_catalog.lo_open(pg_catalog.lo_create('%u'), %d);\n", + oid, INV_WRITE); + else + ahprintf(AH, "SELECT pg_catalog.lo_open('%u', %d);\n", + oid, INV_WRITE); } AH->writingBlob = 1; @@ -785,7 +979,7 @@ EndRestoreBlob(ArchiveHandle *AH, Oid oid) } else { - ahprintf(AH, "SELECT lo_close(0);\n\n"); + ahprintf(AH, "SELECT pg_catalog.lo_close(0);\n\n"); } } @@ -803,15 +997,11 @@ SortTocFromFile(Archive *AHX, RestoreOptions *ropt) char *endptr; DumpId id; TocEntry *te; - TocEntry *tePrev; /* Allocate space for the 'wanted' array, and init it */ ropt->idWanted = (bool *) malloc(sizeof(bool) * AH->maxDumpId); memset(ropt->idWanted, 0, sizeof(bool) * AH->maxDumpId); - /* Set prev entry as head of list */ - tePrev = AH->toc; - /* Setup the file */ fh = fopen(ropt->tocFile, PG_BINARY_R); if (!fh) @@ -826,7 +1016,7 @@ SortTocFromFile(Archive *AHX, RestoreOptions *ropt) cmnt[0] = '\0'; /* Ignore if all blank */ - if (strspn(buf, " \t\r") == strlen(buf)) + if (strspn(buf, " \t\r\n") == strlen(buf)) continue; /* Get an ID, check it's valid and not already seen */ @@ -844,10 +1034,21 @@ SortTocFromFile(Archive *AHX, RestoreOptions *ropt) die_horribly(AH, modulename, "could not find entry for ID %d\n", id); + /* Mark it wanted */ ropt->idWanted[id - 1] = true; - _moveAfter(AH, tePrev, te); - tePrev = te; + /* + * Move each item to the end of the list as it is selected, so that + * they are placed in the desired order. Any unwanted items will end + * up at the front of the list, which may seem unintuitive but it's + * what we need. In an ordinary serial restore that makes no + * difference, but in a parallel restore we need to mark unrestored + * items' dependencies as satisfied before we start examining + * restorable items. Otherwise they could have surprising + * side-effects on the order in which restorable items actually get + * restored. + */ + _moveBefore(AH, AH->toc, te); } if (fclose(fh) != 0) @@ -1075,7 +1276,9 @@ dump_lo_buf(ArchiveHandle *AH) size_t res; res = lo_write(AH->connection, AH->loFd, AH->lo_buf, AH->lo_buf_used); - ahlog(AH, 5, "wrote %lu bytes of large object data (result = %lu)\n", + ahlog(AH, 5, ngettext("wrote %lu byte of large object data (result = %lu)\n", + "wrote %lu bytes of large object data (result = %lu)\n", + AH->lo_buf_used), (unsigned long) AH->lo_buf_used, (unsigned long) res); if (res != AH->lo_buf_used) die_horribly(AH, modulename, @@ -1084,20 +1287,19 @@ dump_lo_buf(ArchiveHandle *AH) } else { - unsigned char *str; - size_t len; + PQExpBuffer buf = createPQExpBuffer(); - str = PQescapeBytea((const unsigned char *) AH->lo_buf, - AH->lo_buf_used, &len); - if (!str) - die_horribly(AH, modulename, "out of memory\n"); + appendByteaLiteralAHX(buf, + (const unsigned char *) AH->lo_buf, + AH->lo_buf_used, + AH); /* Hack: turn off writingBlob so ahwrite doesn't recurse to here */ AH->writingBlob = 0; - ahprintf(AH, "SELECT lowrite(0, '%s');\n", str); + ahprintf(AH, "SELECT pg_catalog.lowrite(0, %s);\n", buf->data); AH->writingBlob = 1; - free(str); + destroyPQExpBuffer(buf); } AH->lo_buf_used = 0; } @@ -1278,45 +1480,47 @@ warn_or_die_horribly(ArchiveHandle *AH, va_end(ap); } +#ifdef NOT_USED + static void _moveAfter(ArchiveHandle *AH, TocEntry *pos, TocEntry *te) { + /* Unlink te from list */ te->prev->next = te->next; te->next->prev = te->prev; + /* and insert it after "pos" */ te->prev = pos; te->next = pos->next; - pos->next->prev = te; pos->next = te; } -#ifdef NOT_USED +#endif static void _moveBefore(ArchiveHandle *AH, TocEntry *pos, TocEntry *te) { + /* Unlink te from list */ te->prev->next = te->next; te->next->prev = te->prev; + /* and insert it before "pos" */ te->prev = pos->prev; te->next = pos; pos->prev->next = te; pos->prev = te; } -#endif static TocEntry * getTocEntryByDumpId(ArchiveHandle *AH, DumpId id) { TocEntry *te; - te = AH->toc->next; - while (te != AH->toc) + for (te = AH->toc->next; te != AH->toc; te = te->next) { if (te->dumpId == id) return te; - te = te->next; } return NULL; } @@ -1564,6 +1768,11 @@ _discoverArchiveFormat(ArchiveHandle *AH) if (strncmp(sig, "PGDMP", 5) == 0) { + /* + * Finish reading (most of) a custom-format header. + * + * NB: this code must agree with ReadHead(). + */ AH->vmaj = fgetc(fh); AH->vmin = fgetc(fh); @@ -1627,11 +1836,6 @@ _discoverArchiveFormat(ArchiveHandle *AH) else AH->lookaheadLen = 0; /* Don't bother since we've reset the file */ -#if 0 - write_msg(modulename, "read %lu bytes into lookahead buffer\n", - (unsigned long) AH->lookaheadLen); -#endif - /* Close the file */ if (wantClose) if (fclose(fh) != 0) @@ -1665,6 +1869,9 @@ _allocAH(const char *FileSpec, const ArchiveFormat fmt, AH->vmin = K_VERS_MINOR; AH->vrev = K_VERS_REV; + /* Make a convenient integer 00 */ + AH->version = ((AH->vmaj * 256 + AH->vmin) * 256 + AH->vrev) * 256 + 0; + /* initialize for backwards compatible string processing */ AH->public.encoding = 0; /* PG_SQL_ASCII */ AH->public.std_strings = false; @@ -1673,6 +1880,8 @@ _allocAH(const char *FileSpec, const ArchiveFormat fmt, AH->public.exit_on_error = true; AH->public.n_errors = 0; + AH->archiveDumpVersion = PG_VERSION; + AH->createDate = time(NULL); AH->intSize = sizeof(int); @@ -1691,9 +1900,9 @@ _allocAH(const char *FileSpec, const ArchiveFormat fmt, else AH->fSpec = NULL; - AH->currUser = strdup(""); /* So it's valid, but we can free() it later - * if necessary */ - AH->currSchema = strdup(""); /* ditto */ + AH->currUser = NULL; /* unknown */ + AH->currSchema = NULL; /* ditto */ + AH->currTablespace = NULL; /* ditto */ AH->currWithOids = -1; /* force SET */ AH->toc = (TocEntry *) calloc(1, sizeof(TocEntry)); @@ -1729,15 +1938,13 @@ _allocAH(const char *FileSpec, const ArchiveFormat fmt, } #endif -#if 0 - write_msg(modulename, "archive format is %d\n", fmt); -#endif - if (fmt == archUnknown) AH->format = _discoverArchiveFormat(AH); else AH->format = fmt; + AH->promptPassword = TRI_DEFAULT; + switch (AH->format) { case archCustom: @@ -1767,11 +1974,11 @@ _allocAH(const char *FileSpec, const ArchiveFormat fmt, void WriteDataChunks(ArchiveHandle *AH) { - TocEntry *te = AH->toc->next; + TocEntry *te; StartDataPtr startPtr; EndDataPtr endPtr; - while (te != AH->toc) + for (te = AH->toc->next; te != AH->toc; te = te->next) { if (te->dataDumper != NULL) { @@ -1806,7 +2013,6 @@ WriteDataChunks(ArchiveHandle *AH) (*endPtr) (AH, te); AH->currToc = NULL; } - te = te->next; } } @@ -1834,6 +2040,7 @@ WriteToc(ArchiveHandle *AH) WriteStr(AH, te->tag); WriteStr(AH, te->desc); + WriteInt(AH, te->section); WriteStr(AH, te->defn); WriteStr(AH, te->dropStmt); WriteStr(AH, te->copyStmt); @@ -1863,8 +2070,7 @@ ReadToc(ArchiveHandle *AH) DumpId *deps; int depIdx; int depSize; - - TocEntry *te = AH->toc->next; + TocEntry *te; AH->tocCount = ReadInt(AH); AH->maxDumpId = 0; @@ -1899,6 +2105,37 @@ ReadToc(ArchiveHandle *AH) te->tag = ReadStr(AH); te->desc = ReadStr(AH); + + if (AH->version >= K_VERS_1_11) + { + te->section = ReadInt(AH); + } + else + { + /* + * Rules for pre-8.4 archives wherein pg_dump hasn't classified + * the entries into sections. This list need not cover entry + * types added later than 8.4. + */ + if (strcmp(te->desc, "COMMENT") == 0 || + strcmp(te->desc, "ACL") == 0 || + strcmp(te->desc, "ACL LANGUAGE") == 0) + te->section = SECTION_NONE; + else if (strcmp(te->desc, "TABLE DATA") == 0 || + strcmp(te->desc, "BLOBS") == 0 || + strcmp(te->desc, "BLOB COMMENTS") == 0) + te->section = SECTION_DATA; + else if (strcmp(te->desc, "CONSTRAINT") == 0 || + strcmp(te->desc, "CHECK CONSTRAINT") == 0 || + strcmp(te->desc, "FK CONSTRAINT") == 0 || + strcmp(te->desc, "INDEX") == 0 || + strcmp(te->desc, "RULE") == 0 || + strcmp(te->desc, "TRIGGER") == 0) + te->section = SECTION_POST_DATA; + else + te->section = SECTION_PRE_DATA; + } + te->defn = ReadStr(AH); te->dropStmt = ReadStr(AH); @@ -2037,10 +2274,15 @@ _tocEntryRequired(TocEntry *te, RestoreOptions *ropt, bool include_acls) return 0; /* If it's an ACL, maybe ignore it */ - if ((!include_acls || ropt->aclsSkip) && strcmp(te->desc, "ACL") == 0) + if ((!include_acls || ropt->aclsSkip) && _tocEntryIsACL(te)) + return 0; + + /* If it's security labels, maybe ignore it */ + if (ropt->skip_seclabel && strcmp(te->desc, "SECURITY LABEL") == 0) return 0; - if (!ropt->create && strcmp(te->desc, "DATABASE") == 0) + /* Ignore DATABASE entry unless we should create it */ + if (!ropt->createDB && strcmp(te->desc, "DATABASE") == 0) return 0; /* Check options for selective dump/restore */ @@ -2094,9 +2336,20 @@ _tocEntryRequired(TocEntry *te, RestoreOptions *ropt, bool include_acls) if (!te->hadDumper) { /* - * Special Case: If 'SEQUENCE SET' then it is considered a data entry + * Special Case: If 'SEQUENCE SET' or anything to do with BLOBs, then + * it is considered a data entry. We don't need to check for the + * BLOBS entry or old-style BLOB COMMENTS, because they will have + * hadDumper = true ... but we do need to check new-style BLOB + * comments. */ - if (strcmp(te->desc, "SEQUENCE SET") == 0) + if (strcmp(te->desc, "SEQUENCE SET") == 0 || + strcmp(te->desc, "BLOB") == 0 || + (strcmp(te->desc, "ACL") == 0 && + strncmp(te->tag, "LARGE OBJECT ", 13) == 0) || + (strcmp(te->desc, "COMMENT") == 0 && + strncmp(te->tag, "LARGE OBJECT ", 13) == 0) || + (strcmp(te->desc, "SECURITY LABEL") == 0 && + strncmp(te->tag, "LARGE OBJECT ", 13) == 0)) res = res & REQ_DATA; else res = res & ~REQ_DATA; @@ -2128,6 +2381,20 @@ _tocEntryRequired(TocEntry *te, RestoreOptions *ropt, bool include_acls) return res; } +/* + * Identify TOC entries that are ACLs. + */ +static bool +_tocEntryIsACL(TocEntry *te) +{ + /* "ACL LANGUAGE" was a crock emitted only in PG 7.4 */ + if (strcmp(te->desc, "ACL") == 0 || + strcmp(te->desc, "ACL LANGUAGE") == 0 || + strcmp(te->desc, "DEFAULT ACL") == 0) + return true; + return false; +} + /* * Issue SET commands for parameters that we want to have set the same way * at all times during execution of a restore script. @@ -2135,6 +2402,9 @@ _tocEntryRequired(TocEntry *te, RestoreOptions *ropt, bool include_acls) static void _doSetFixedOutputState(ArchiveHandle *AH) { + /* Disable statement_timeout in archive for pg_restore/psql */ + ahprintf(AH, "SET statement_timeout = 0;\n"); + /* Select the correct character set encoding */ ahprintf(AH, "SET client_encoding = '%s';\n", pg_encoding_to_char(AH->public.encoding)); @@ -2143,6 +2413,10 @@ _doSetFixedOutputState(ArchiveHandle *AH) ahprintf(AH, "SET standard_conforming_strings = %s;\n", AH->public.std_strings ? "on" : "off"); + /* Select the role to be used during restore */ + if (AH->ropt && AH->ropt->use_role) + ahprintf(AH, "SET ROLE %s;\n", fmtId(AH->ropt->use_role)); + /* Make sure function checking is disabled */ ahprintf(AH, "SET check_function_bodies = false;\n"); @@ -2257,13 +2531,15 @@ _reconnectToDB(ArchiveHandle *AH, const char *dbname) */ if (AH->currUser) free(AH->currUser); + AH->currUser = NULL; - AH->currUser = strdup(""); - - /* don't assume we still know the output schema */ + /* don't assume we still know the output schema, tablespace, etc either */ if (AH->currSchema) free(AH->currSchema); - AH->currSchema = strdup(""); + AH->currSchema = NULL; + if (AH->currTablespace) + free(AH->currTablespace); + AH->currTablespace = NULL; AH->currWithOids = -1; /* re-establish fixed state */ @@ -2292,12 +2568,11 @@ _becomeUser(ArchiveHandle *AH, const char *user) */ if (AH->currUser) free(AH->currUser); - AH->currUser = strdup(user); } /* - * Become the owner of the the given TOC entry object. If + * Become the owner of the given TOC entry object. If * changes in ownership are not allowed, this doesn't do anything. */ static void @@ -2451,6 +2726,7 @@ _getObjectDescription(PQExpBuffer buf, TocEntry *te, ArchiveHandle *AH) strcmp(type, "DOMAIN") == 0 || strcmp(type, "TABLE") == 0 || strcmp(type, "TYPE") == 0 || + strcmp(type, "FOREIGN TABLE") == 0 || strcmp(type, "TEXT SEARCH DICTIONARY") == 0 || strcmp(type, "TEXT SEARCH CONFIGURATION") == 0) { @@ -2476,12 +2752,22 @@ _getObjectDescription(PQExpBuffer buf, TocEntry *te, ArchiveHandle *AH) /* objects named by just a name */ if (strcmp(type, "DATABASE") == 0 || strcmp(type, "PROCEDURAL LANGUAGE") == 0 || - strcmp(type, "SCHEMA") == 0) + strcmp(type, "SCHEMA") == 0 || + strcmp(type, "FOREIGN DATA WRAPPER") == 0 || + strcmp(type, "SERVER") == 0 || + strcmp(type, "USER MAPPING") == 0) { appendPQExpBuffer(buf, "%s %s", type, fmtId(te->tag)); return; } + /* BLOBs just have a name, but it's numeric so must not use fmtId */ + if (strcmp(type, "BLOB") == 0) + { + appendPQExpBuffer(buf, "LARGE OBJECT %s", te->tag); + return; + } + /* * These object types require additional decoration. Fortunately, the * information needed is exactly what's in the DROP command. @@ -2520,12 +2806,12 @@ _printTocEntry(ArchiveHandle *AH, TocEntry *te, RestoreOptions *ropt, bool isDat /* ACLs are dumped only during acl pass */ if (acl_pass) { - if (strcmp(te->desc, "ACL") != 0) + if (!_tocEntryIsACL(te)) return; } else { - if (strcmp(te->desc, "ACL") == 0) + if (_tocEntryIsACL(te)) return; } @@ -2539,6 +2825,7 @@ _printTocEntry(ArchiveHandle *AH, TocEntry *te, RestoreOptions *ropt, bool isDat if (strcmp(te->desc, "SCHEMA") == 0 && strcmp(te->tag, "public") == 0) return; + /* The comment restore would require super-user privs, so avoid it. */ if (strcmp(te->desc, "COMMENT") == 0 && strcmp(te->tag, "SCHEMA public") == 0) return; @@ -2618,6 +2905,7 @@ _printTocEntry(ArchiveHandle *AH, TocEntry *te, RestoreOptions *ropt, bool isDat strlen(te->owner) > 0 && strlen(te->dropStmt) > 0) { if (strcmp(te->desc, "AGGREGATE") == 0 || + strcmp(te->desc, "BLOB") == 0 || strcmp(te->desc, "CONVERSION") == 0 || strcmp(te->desc, "DATABASE") == 0 || strcmp(te->desc, "DOMAIN") == 0 || @@ -2631,8 +2919,11 @@ _printTocEntry(ArchiveHandle *AH, TocEntry *te, RestoreOptions *ropt, bool isDat strcmp(te->desc, "TYPE") == 0 || strcmp(te->desc, "VIEW") == 0 || strcmp(te->desc, "SEQUENCE") == 0 || + strcmp(te->desc, "FOREIGN TABLE") == 0 || strcmp(te->desc, "TEXT SEARCH DICTIONARY") == 0 || - strcmp(te->desc, "TEXT SEARCH CONFIGURATION") == 0) + strcmp(te->desc, "TEXT SEARCH CONFIGURATION") == 0 || + strcmp(te->desc, "FOREIGN DATA WRAPPER") == 0 || + strcmp(te->desc, "SERVER") == 0) { PQExpBuffer temp = createPQExpBuffer(); @@ -2649,7 +2940,8 @@ _printTocEntry(ArchiveHandle *AH, TocEntry *te, RestoreOptions *ropt, bool isDat strcmp(te->desc, "FK CONSTRAINT") == 0 || strcmp(te->desc, "INDEX") == 0 || strcmp(te->desc, "RULE") == 0 || - strcmp(te->desc, "TRIGGER") == 0) + strcmp(te->desc, "TRIGGER") == 0 || + strcmp(te->desc, "USER MAPPING") == 0) { /* these object types don't have separate owners */ } @@ -2664,7 +2956,7 @@ _printTocEntry(ArchiveHandle *AH, TocEntry *te, RestoreOptions *ropt, bool isDat * If it's an ACL entry, it might contain SET SESSION AUTHORIZATION * commands, so we can no longer assume we know the current auth setting. */ - if (strncmp(te->desc, "ACL", 3) == 0) + if (acl_pass) { if (AH->currUser) free(AH->currUser); @@ -2715,7 +3007,12 @@ ReadHead(ArchiveHandle *AH) int fmt; struct tm crtm; - /* If we haven't already read the header... */ + /* + * If we haven't already read the header, do so. + * + * NB: this code must agree with _discoverArchiveFormat(). Maybe find a + * way to unify the cases? + */ if (!AH->readHeader) { if ((*AH->ReadBufPtr) (AH, tmpMag, 5) != 5) @@ -2734,7 +3031,6 @@ ReadHead(ArchiveHandle *AH) AH->version = ((AH->vmaj * 256 + AH->vmin) * 256 + AH->vrev) * 256 + 0; - if (AH->version < K_VERS_1_0 || AH->version > K_VERS_MAX) die_horribly(AH, modulename, "unsupported version (%d.%d) in file header\n", AH->vmaj, AH->vmin); @@ -2797,34 +3093,42 @@ ReadHead(ArchiveHandle *AH) AH->archiveRemoteVersion = ReadStr(AH); AH->archiveDumpVersion = ReadStr(AH); } - } /* * checkSeek - * check to see if fseek can be performed. + * check to see if ftell/fseek can be performed. */ - bool checkSeek(FILE *fp) { + pgoff_t tpos; - if (fseeko(fp, 0, SEEK_CUR) != 0) + /* + * If pgoff_t is wider than long, we must have "real" fseeko and not an + * emulation using fseek. Otherwise report no seek capability. + */ +#ifndef HAVE_FSEEKO + if (sizeof(pgoff_t) > sizeof(long)) return false; - else if (sizeof(pgoff_t) > sizeof(long)) +#endif - /* - * At this point, pgoff_t is too large for long, so we return based on - * whether an pgoff_t version of fseek is available. - */ -#ifdef HAVE_FSEEKO - return true; -#else + /* Check that ftello works on this file */ + errno = 0; + tpos = ftello(fp); + if (errno) return false; -#endif - else - return true; + + /* + * Check that fseeko(SEEK_SET) works, too. NB: we used to try to test + * this with fseeko(fp, 0, SEEK_CUR). But some platforms treat that as a + * successful no-op even on files that are otherwise unseekable. + */ + if (fseeko(fp, tpos, SEEK_SET) != 0) + return false; + + return true; } @@ -2851,3 +3155,990 @@ dumpTimestamp(ArchiveHandle *AH, const char *msg, time_t tim) localtime(&tim)) != 0) ahprintf(AH, "-- %s %s\n\n", msg, buf); } + + +/* + * Main engine for parallel restore. + * + * Work is done in three phases. + * First we process tocEntries until we come to one that is marked + * SECTION_DATA or SECTION_POST_DATA, in a single connection, just as for a + * standard restore. Second we process the remaining non-ACL steps in + * parallel worker children (threads on Windows, processes on Unix), each of + * which connects separately to the database. Finally we process all the ACL + * entries in a single connection (that happens back in RestoreArchive). + */ +static void +restore_toc_entries_parallel(ArchiveHandle *AH) +{ + RestoreOptions *ropt = AH->ropt; + int n_slots = ropt->number_of_jobs; + ParallelSlot *slots; + int work_status; + int next_slot; + TocEntry pending_list; + TocEntry ready_list; + TocEntry *next_work_item; + thandle ret_child; + TocEntry *te; + + ahlog(AH, 2, "entering restore_toc_entries_parallel\n"); + + /* we haven't got round to making this work for all archive formats */ + if (AH->ClonePtr == NULL || AH->ReopenPtr == NULL) + die_horribly(AH, modulename, "parallel restore is not supported with this archive file format\n"); + + /* doesn't work if the archive represents dependencies as OIDs, either */ + if (AH->version < K_VERS_1_8) + die_horribly(AH, modulename, "parallel restore is not supported with archives made by pre-8.0 pg_dump\n"); + + slots = (ParallelSlot *) calloc(sizeof(ParallelSlot), n_slots); + + /* Adjust dependency information */ + fix_dependencies(AH); + + /* + * Do all the early stuff in a single connection in the parent. There's no + * great point in running it in parallel, in fact it will actually run + * faster in a single connection because we avoid all the connection and + * setup overhead. Also, pg_dump is not currently very good about + * showing all the dependencies of SECTION_PRE_DATA items, so we do not + * risk trying to process them out-of-order. + */ + for (next_work_item = AH->toc->next; next_work_item != AH->toc; next_work_item = next_work_item->next) + { + /* Non-PRE_DATA items are just ignored for now */ + if (next_work_item->section == SECTION_DATA || + next_work_item->section == SECTION_POST_DATA) + continue; + + ahlog(AH, 1, "processing item %d %s %s\n", + next_work_item->dumpId, + next_work_item->desc, next_work_item->tag); + + (void) restore_toc_entry(AH, next_work_item, ropt, false); + + /* there should be no touch of ready_list here, so pass NULL */ + reduce_dependencies(AH, next_work_item, NULL); + } + + /* + * Now close parent connection in prep for parallel steps. We do this + * mainly to ensure that we don't exceed the specified number of parallel + * connections. + */ + PQfinish(AH->connection); + AH->connection = NULL; + + /* blow away any transient state from the old connection */ + if (AH->currUser) + free(AH->currUser); + AH->currUser = NULL; + if (AH->currSchema) + free(AH->currSchema); + AH->currSchema = NULL; + if (AH->currTablespace) + free(AH->currTablespace); + AH->currTablespace = NULL; + AH->currWithOids = -1; + + /* + * Initialize the lists of pending and ready items. After this setup, the + * pending list is everything that needs to be done but is blocked by one + * or more dependencies, while the ready list contains items that have no + * remaining dependencies. Note: we don't yet filter out entries that + * aren't going to be restored. They might participate in dependency + * chains connecting entries that should be restored, so we treat them as + * live until we actually process them. + */ + par_list_header_init(&pending_list); + par_list_header_init(&ready_list); + for (next_work_item = AH->toc->next; next_work_item != AH->toc; next_work_item = next_work_item->next) + { + /* All PRE_DATA items were dealt with above */ + if (next_work_item->section == SECTION_DATA || + next_work_item->section == SECTION_POST_DATA) + { + if (next_work_item->depCount > 0) + par_list_append(&pending_list, next_work_item); + else + par_list_append(&ready_list, next_work_item); + } + } + + /* + * main parent loop + * + * Keep going until there is no worker still running AND there is no work + * left to be done. + */ + + ahlog(AH, 1, "entering main parallel loop\n"); + + while ((next_work_item = get_next_work_item(AH, &ready_list, + slots, n_slots)) != NULL || + work_in_progress(slots, n_slots)) + { + if (next_work_item != NULL) + { + teReqs reqs; + + /* If not to be dumped, don't waste time launching a worker */ + reqs = _tocEntryRequired(next_work_item, AH->ropt, false); + if ((reqs & (REQ_SCHEMA | REQ_DATA)) == 0) + { + ahlog(AH, 1, "skipping item %d %s %s\n", + next_work_item->dumpId, + next_work_item->desc, next_work_item->tag); + + par_list_remove(next_work_item); + reduce_dependencies(AH, next_work_item, &ready_list); + + continue; + } + + if ((next_slot = get_next_slot(slots, n_slots)) != NO_SLOT) + { + /* There is work still to do and a worker slot available */ + thandle child; + RestoreArgs *args; + + ahlog(AH, 1, "launching item %d %s %s\n", + next_work_item->dumpId, + next_work_item->desc, next_work_item->tag); + + par_list_remove(next_work_item); + + /* this memory is dealloced in mark_work_done() */ + args = malloc(sizeof(RestoreArgs)); + args->AH = CloneArchive(AH); + args->te = next_work_item; + + /* run the step in a worker child */ + child = spawn_restore(args); + + slots[next_slot].child_id = child; + slots[next_slot].args = args; + + continue; + } + } + + /* + * If we get here there must be work being done. Either there is no + * work available to schedule (and work_in_progress returned true) or + * there are no slots available. So we wait for a worker to finish, + * and process the result. + */ + ret_child = reap_child(slots, n_slots, &work_status); + + if (WIFEXITED(work_status)) + { + mark_work_done(AH, &ready_list, + ret_child, WEXITSTATUS(work_status), + slots, n_slots); + } + else + { + die_horribly(AH, modulename, "worker process crashed: status %d\n", + work_status); + } + } + + ahlog(AH, 1, "finished main parallel loop\n"); + + /* + * Now reconnect the single parent connection. + */ + ConnectDatabase((Archive *) AH, ropt->dbname, + ropt->pghost, ropt->pgport, ropt->username, + ropt->promptPassword); + + _doSetFixedOutputState(AH); + + /* + * Make sure there is no non-ACL work left due to, say, circular + * dependencies, or some other pathological condition. If so, do it in the + * single parent connection. + */ + for (te = pending_list.par_next; te != &pending_list; te = te->par_next) + { + ahlog(AH, 1, "processing missed item %d %s %s\n", + te->dumpId, te->desc, te->tag); + (void) restore_toc_entry(AH, te, ropt, false); + } + + /* The ACLs will be handled back in RestoreArchive. */ +} + +/* + * create a worker child to perform a restore step in parallel + */ +static thandle +spawn_restore(RestoreArgs *args) +{ + thandle child; + + /* Ensure stdio state is quiesced before forking */ + fflush(NULL); + +#ifndef WIN32 + child = fork(); + if (child == 0) + { + /* in child process */ + parallel_restore(args); + die_horribly(args->AH, modulename, + "parallel_restore should not return\n"); + } + else if (child < 0) + { + /* fork failed */ + die_horribly(args->AH, modulename, + "could not create worker process: %s\n", + strerror(errno)); + } +#else + child = (HANDLE) _beginthreadex(NULL, 0, (void *) parallel_restore, + args, 0, NULL); + if (child == 0) + die_horribly(args->AH, modulename, + "could not create worker thread: %s\n", + strerror(errno)); +#endif + + return child; +} + +/* + * collect status from a completed worker child + */ +static thandle +reap_child(ParallelSlot *slots, int n_slots, int *work_status) +{ +#ifndef WIN32 + /* Unix is so much easier ... */ + return wait(work_status); +#else + static HANDLE *handles = NULL; + int hindex, + snum, + tnum; + thandle ret_child; + DWORD res; + + /* first time around only, make space for handles to listen on */ + if (handles == NULL) + handles = (HANDLE *) calloc(sizeof(HANDLE), n_slots); + + /* set up list of handles to listen to */ + for (snum = 0, tnum = 0; snum < n_slots; snum++) + if (slots[snum].child_id != 0) + handles[tnum++] = slots[snum].child_id; + + /* wait for one to finish */ + hindex = WaitForMultipleObjects(tnum, handles, false, INFINITE); + + /* get handle of finished thread */ + ret_child = handles[hindex - WAIT_OBJECT_0]; + + /* get the result */ + GetExitCodeThread(ret_child, &res); + *work_status = res; + + /* dispose of handle to stop leaks */ + CloseHandle(ret_child); + + return ret_child; +#endif +} + +/* + * are we doing anything now? + */ +static bool +work_in_progress(ParallelSlot *slots, int n_slots) +{ + int i; + + for (i = 0; i < n_slots; i++) + { + if (slots[i].child_id != 0) + return true; + } + return false; +} + +/* + * find the first free parallel slot (if any). + */ +static int +get_next_slot(ParallelSlot *slots, int n_slots) +{ + int i; + + for (i = 0; i < n_slots; i++) + { + if (slots[i].child_id == 0) + return i; + } + return NO_SLOT; +} + + +/* + * Check if te1 has an exclusive lock requirement for an item that te2 also + * requires, whether or not te2's requirement is for an exclusive lock. + */ +static bool +has_lock_conflicts(TocEntry *te1, TocEntry *te2) +{ + int j, + k; + + for (j = 0; j < te1->nLockDeps; j++) + { + for (k = 0; k < te2->nDeps; k++) + { + if (te1->lockDeps[j] == te2->dependencies[k]) + return true; + } + } + return false; +} + + +/* + * Initialize the header of a parallel-processing list. + * + * These are circular lists with a dummy TocEntry as header, just like the + * main TOC list; but we use separate list links so that an entry can be in + * the main TOC list as well as in a parallel-processing list. + */ +static void +par_list_header_init(TocEntry *l) +{ + l->par_prev = l->par_next = l; +} + +/* Append te to the end of the parallel-processing list headed by l */ +static void +par_list_append(TocEntry *l, TocEntry *te) +{ + te->par_prev = l->par_prev; + l->par_prev->par_next = te; + l->par_prev = te; + te->par_next = l; +} + +/* Remove te from whatever parallel-processing list it's in */ +static void +par_list_remove(TocEntry *te) +{ + te->par_prev->par_next = te->par_next; + te->par_next->par_prev = te->par_prev; + te->par_prev = NULL; + te->par_next = NULL; +} + + +/* + * Find the next work item (if any) that is capable of being run now. + * + * To qualify, the item must have no remaining dependencies + * and no requirements for locks that are incompatible with + * items currently running. Items in the ready_list are known to have + * no remaining dependencies, but we have to check for lock conflicts. + * + * Note that the returned item has *not* been removed from ready_list. + * The caller must do that after successfully dispatching the item. + * + * pref_non_data is for an alternative selection algorithm that gives + * preference to non-data items if there is already a data load running. + * It is currently disabled. + */ +static TocEntry * +get_next_work_item(ArchiveHandle *AH, TocEntry *ready_list, + ParallelSlot *slots, int n_slots) +{ + bool pref_non_data = false; /* or get from AH->ropt */ + TocEntry *data_te = NULL; + TocEntry *te; + int i, + k; + + /* + * Bogus heuristics for pref_non_data + */ + if (pref_non_data) + { + int count = 0; + + for (k = 0; k < n_slots; k++) + if (slots[k].args->te != NULL && + slots[k].args->te->section == SECTION_DATA) + count++; + if (n_slots == 0 || count * 4 < n_slots) + pref_non_data = false; + } + + /* + * Search the ready_list until we find a suitable item. + */ + for (te = ready_list->par_next; te != ready_list; te = te->par_next) + { + bool conflicts = false; + + /* + * Check to see if the item would need exclusive lock on something + * that a currently running item also needs lock on, or vice versa. If + * so, we don't want to schedule them together. + */ + for (i = 0; i < n_slots && !conflicts; i++) + { + TocEntry *running_te; + + if (slots[i].args == NULL) + continue; + running_te = slots[i].args->te; + + if (has_lock_conflicts(te, running_te) || + has_lock_conflicts(running_te, te)) + { + conflicts = true; + break; + } + } + + if (conflicts) + continue; + + if (pref_non_data && te->section == SECTION_DATA) + { + if (data_te == NULL) + data_te = te; + continue; + } + + /* passed all tests, so this item can run */ + return te; + } + + if (data_te != NULL) + return data_te; + + ahlog(AH, 2, "no item ready\n"); + return NULL; +} + + +/* + * Restore a single TOC item in parallel with others + * + * this is the procedure run as a thread (Windows) or a + * separate process (everything else). + */ +static parallel_restore_result +parallel_restore(RestoreArgs *args) +{ + ArchiveHandle *AH = args->AH; + TocEntry *te = args->te; + RestoreOptions *ropt = AH->ropt; + int retval; + + /* + * Close and reopen the input file so we have a private file pointer that + * doesn't stomp on anyone else's file pointer, if we're actually going to + * need to read from the file. Otherwise, just close it except on Windows, + * where it will possibly be needed by other threads. + * + * Note: on Windows, since we are using threads not processes, the reopen + * call *doesn't* close the original file pointer but just open a new one. + */ + if (te->section == SECTION_DATA) + (AH->ReopenPtr) (AH); +#ifndef WIN32 + else + (AH->ClosePtr) (AH); +#endif + + /* + * We need our own database connection, too + */ + ConnectDatabase((Archive *) AH, ropt->dbname, + ropt->pghost, ropt->pgport, ropt->username, + ropt->promptPassword); + + _doSetFixedOutputState(AH); + + /* Restore the TOC item */ + retval = restore_toc_entry(AH, te, ropt, true); + + /* And clean up */ + PQfinish(AH->connection); + AH->connection = NULL; + + /* If we reopened the file, we are done with it, so close it now */ + if (te->section == SECTION_DATA) + (AH->ClosePtr) (AH); + + if (retval == 0 && AH->public.n_errors) + retval = WORKER_IGNORED_ERRORS; + +#ifndef WIN32 + exit(retval); +#else + return retval; +#endif +} + + +/* + * Housekeeping to be done after a step has been parallel restored. + * + * Clear the appropriate slot, free all the extra memory we allocated, + * update status, and reduce the dependency count of any dependent items. + */ +static void +mark_work_done(ArchiveHandle *AH, TocEntry *ready_list, + thandle worker, int status, + ParallelSlot *slots, int n_slots) +{ + TocEntry *te = NULL; + int i; + + for (i = 0; i < n_slots; i++) + { + if (slots[i].child_id == worker) + { + slots[i].child_id = 0; + te = slots[i].args->te; + DeCloneArchive(slots[i].args->AH); + free(slots[i].args); + slots[i].args = NULL; + + break; + } + } + + if (te == NULL) + die_horribly(AH, modulename, "could not find slot of finished worker\n"); + + ahlog(AH, 1, "finished item %d %s %s\n", + te->dumpId, te->desc, te->tag); + + if (status == WORKER_CREATE_DONE) + mark_create_done(AH, te); + else if (status == WORKER_INHIBIT_DATA) + { + inhibit_data_for_failed_table(AH, te); + AH->public.n_errors++; + } + else if (status == WORKER_IGNORED_ERRORS) + AH->public.n_errors++; + else if (status != 0) + die_horribly(AH, modulename, "worker process failed: exit code %d\n", + status); + + reduce_dependencies(AH, te, ready_list); +} + + +/* + * Process the dependency information into a form useful for parallel restore. + * + * This function takes care of fixing up some missing or badly designed + * dependencies, and then prepares subsidiary data structures that will be + * used in the main parallel-restore logic, including: + * 1. We build the tocsByDumpId[] index array. + * 2. We build the revDeps[] arrays of incoming dependency dumpIds. + * 3. We set up depCount fields that are the number of as-yet-unprocessed + * dependencies for each TOC entry. + * + * We also identify locking dependencies so that we can avoid trying to + * schedule conflicting items at the same time. + */ +static void +fix_dependencies(ArchiveHandle *AH) +{ + TocEntry *te; + int i; + + /* + * It is convenient to have an array that indexes the TOC entries by dump + * ID, rather than searching the TOC list repeatedly. Entries for dump + * IDs not present in the TOC will be NULL. + * + * NOTE: because maxDumpId is just the highest dump ID defined in the + * archive, there might be dependencies for IDs > maxDumpId. All uses of + * this array must guard against out-of-range dependency numbers. + * + * Also, initialize the depCount/revDeps/nRevDeps fields, and make sure + * the TOC items are marked as not being in any parallel-processing list. + */ + maxDumpId = AH->maxDumpId; + tocsByDumpId = (TocEntry **) calloc(maxDumpId, sizeof(TocEntry *)); + for (te = AH->toc->next; te != AH->toc; te = te->next) + { + tocsByDumpId[te->dumpId - 1] = te; + te->depCount = te->nDeps; + te->revDeps = NULL; + te->nRevDeps = 0; + te->par_prev = NULL; + te->par_next = NULL; + } + + /* + * POST_DATA items that are shown as depending on a table need to be + * re-pointed to depend on that table's data, instead. This ensures they + * won't get scheduled until the data has been loaded. We handle this by + * first finding TABLE/TABLE DATA pairs and then scanning all the + * dependencies. + * + * Note: currently, a TABLE DATA should always have exactly one + * dependency, on its TABLE item. So we don't bother to search, but look + * just at the first dependency. We do trouble to make sure that it's a + * TABLE, if possible. However, if the dependency isn't in the archive + * then just assume it was a TABLE; this is to cover cases where the table + * was suppressed but we have the data and some dependent post-data items. + * + * XXX this is O(N^2) if there are a lot of tables. We ought to fix + * pg_dump to produce correctly-linked dependencies in the first place. + */ + for (te = AH->toc->next; te != AH->toc; te = te->next) + { + if (strcmp(te->desc, "TABLE DATA") == 0 && te->nDeps > 0) + { + DumpId tableId = te->dependencies[0]; + + if (tableId > maxDumpId || + tocsByDumpId[tableId - 1] == NULL || + strcmp(tocsByDumpId[tableId - 1]->desc, "TABLE") == 0) + { + repoint_table_dependencies(AH, tableId, te->dumpId); + } + } + } + + /* + * Pre-8.4 versions of pg_dump neglected to set up a dependency from BLOB + * COMMENTS to BLOBS. Cope. (We assume there's only one BLOBS and only + * one BLOB COMMENTS in such files.) + */ + if (AH->version < K_VERS_1_11) + { + for (te = AH->toc->next; te != AH->toc; te = te->next) + { + if (strcmp(te->desc, "BLOB COMMENTS") == 0 && te->nDeps == 0) + { + TocEntry *te2; + + for (te2 = AH->toc->next; te2 != AH->toc; te2 = te2->next) + { + if (strcmp(te2->desc, "BLOBS") == 0) + { + te->dependencies = (DumpId *) malloc(sizeof(DumpId)); + te->dependencies[0] = te2->dumpId; + te->nDeps++; + te->depCount++; + break; + } + } + break; + } + } + } + + /* + * At this point we start to build the revDeps reverse-dependency arrays, + * so all changes of dependencies must be complete. + */ + + /* + * Count the incoming dependencies for each item. Also, it is possible + * that the dependencies list items that are not in the archive at + * all. Subtract such items from the depCounts. + */ + for (te = AH->toc->next; te != AH->toc; te = te->next) + { + for (i = 0; i < te->nDeps; i++) + { + DumpId depid = te->dependencies[i]; + + if (depid <= maxDumpId && tocsByDumpId[depid - 1] != NULL) + tocsByDumpId[depid - 1]->nRevDeps++; + else + te->depCount--; + } + } + + /* + * Allocate space for revDeps[] arrays, and reset nRevDeps so we can + * use it as a counter below. + */ + for (te = AH->toc->next; te != AH->toc; te = te->next) + { + if (te->nRevDeps > 0) + te->revDeps = (DumpId *) malloc(te->nRevDeps * sizeof(DumpId)); + te->nRevDeps = 0; + } + + /* + * Build the revDeps[] arrays of incoming-dependency dumpIds. This + * had better agree with the loops above. + */ + for (te = AH->toc->next; te != AH->toc; te = te->next) + { + for (i = 0; i < te->nDeps; i++) + { + DumpId depid = te->dependencies[i]; + + if (depid <= maxDumpId && tocsByDumpId[depid - 1] != NULL) + { + TocEntry *otherte = tocsByDumpId[depid - 1]; + + otherte->revDeps[otherte->nRevDeps++] = te->dumpId; + } + } + } + + /* + * Lastly, work out the locking dependencies. + */ + for (te = AH->toc->next; te != AH->toc; te = te->next) + { + te->lockDeps = NULL; + te->nLockDeps = 0; + identify_locking_dependencies(te); + } +} + +/* + * Change dependencies on tableId to depend on tableDataId instead, + * but only in POST_DATA items. + */ +static void +repoint_table_dependencies(ArchiveHandle *AH, + DumpId tableId, DumpId tableDataId) +{ + TocEntry *te; + int i; + + for (te = AH->toc->next; te != AH->toc; te = te->next) + { + if (te->section != SECTION_POST_DATA) + continue; + for (i = 0; i < te->nDeps; i++) + { + if (te->dependencies[i] == tableId) + { + te->dependencies[i] = tableDataId; + ahlog(AH, 2, "transferring dependency %d -> %d to %d\n", + te->dumpId, tableId, tableDataId); + } + } + } +} + +/* + * Identify which objects we'll need exclusive lock on in order to restore + * the given TOC entry (*other* than the one identified by the TOC entry + * itself). Record their dump IDs in the entry's lockDeps[] array. + */ +static void +identify_locking_dependencies(TocEntry *te) +{ + DumpId *lockids; + int nlockids; + int i; + + /* Quick exit if no dependencies at all */ + if (te->nDeps == 0) + return; + + /* Exit if this entry doesn't need exclusive lock on other objects */ + if (!(strcmp(te->desc, "CONSTRAINT") == 0 || + strcmp(te->desc, "CHECK CONSTRAINT") == 0 || + strcmp(te->desc, "FK CONSTRAINT") == 0 || + strcmp(te->desc, "RULE") == 0 || + strcmp(te->desc, "TRIGGER") == 0)) + return; + + /* + * We assume the item requires exclusive lock on each TABLE DATA item + * listed among its dependencies. (This was originally a dependency on + * the TABLE, but fix_dependencies repointed it to the data item. Note + * that all the entry types we are interested in here are POST_DATA, so + * they will all have been changed this way.) + */ + lockids = (DumpId *) malloc(te->nDeps * sizeof(DumpId)); + nlockids = 0; + for (i = 0; i < te->nDeps; i++) + { + DumpId depid = te->dependencies[i]; + + if (depid <= maxDumpId && tocsByDumpId[depid - 1] && + strcmp(tocsByDumpId[depid - 1]->desc, "TABLE DATA") == 0) + lockids[nlockids++] = depid; + } + + if (nlockids == 0) + { + free(lockids); + return; + } + + te->lockDeps = realloc(lockids, nlockids * sizeof(DumpId)); + te->nLockDeps = nlockids; +} + +/* + * Remove the specified TOC entry from the depCounts of items that depend on + * it, thereby possibly making them ready-to-run. Any pending item that + * becomes ready should be moved to the ready list. + */ +static void +reduce_dependencies(ArchiveHandle *AH, TocEntry *te, TocEntry *ready_list) +{ + int i; + + ahlog(AH, 2, "reducing dependencies for %d\n", te->dumpId); + + for (i = 0; i < te->nRevDeps; i++) + { + TocEntry *otherte = tocsByDumpId[te->revDeps[i] - 1]; + + otherte->depCount--; + if (otherte->depCount == 0 && otherte->par_prev != NULL) + { + /* It must be in the pending list, so remove it ... */ + par_list_remove(otherte); + /* ... and add to ready_list */ + par_list_append(ready_list, otherte); + } + } +} + +/* + * Set the created flag on the DATA member corresponding to the given + * TABLE member + */ +static void +mark_create_done(ArchiveHandle *AH, TocEntry *te) +{ + TocEntry *tes; + + for (tes = AH->toc->next; tes != AH->toc; tes = tes->next) + { + if (strcmp(tes->desc, "TABLE DATA") == 0 && + strcmp(tes->tag, te->tag) == 0 && + strcmp(tes->namespace ? tes->namespace : "", + te->namespace ? te->namespace : "") == 0) + { + tes->created = true; + break; + } + } +} + +/* + * Mark the DATA member corresponding to the given TABLE member + * as not wanted + */ +static void +inhibit_data_for_failed_table(ArchiveHandle *AH, TocEntry *te) +{ + RestoreOptions *ropt = AH->ropt; + TocEntry *tes; + + ahlog(AH, 1, "table \"%s\" could not be created, will not restore its data\n", + te->tag); + + for (tes = AH->toc->next; tes != AH->toc; tes = tes->next) + { + if (strcmp(tes->desc, "TABLE DATA") == 0 && + strcmp(tes->tag, te->tag) == 0 && + strcmp(tes->namespace ? tes->namespace : "", + te->namespace ? te->namespace : "") == 0) + { + /* mark it unwanted; we assume idWanted array already exists */ + ropt->idWanted[tes->dumpId - 1] = false; + break; + } + } +} + + +/* + * Clone and de-clone routines used in parallel restoration. + * + * Enough of the structure is cloned to ensure that there is no + * conflict between different threads each with their own clone. + * + * These could be public, but no need at present. + */ +static ArchiveHandle * +CloneArchive(ArchiveHandle *AH) +{ + ArchiveHandle *clone; + + /* Make a "flat" copy */ + clone = (ArchiveHandle *) malloc(sizeof(ArchiveHandle)); + if (clone == NULL) + die_horribly(AH, modulename, "out of memory\n"); + memcpy(clone, AH, sizeof(ArchiveHandle)); + + /* Handle format-independent fields */ + clone->pgCopyBuf = createPQExpBuffer(); + clone->sqlBuf = createPQExpBuffer(); + clone->sqlparse.tagBuf = NULL; + + /* The clone will have its own connection, so disregard connection state */ + clone->connection = NULL; + clone->currUser = NULL; + clone->currSchema = NULL; + clone->currTablespace = NULL; + clone->currWithOids = -1; + + /* savedPassword must be local in case we change it while connecting */ + if (clone->savedPassword) + clone->savedPassword = strdup(clone->savedPassword); + + /* clone has its own error count, too */ + clone->public.n_errors = 0; + + /* Let the format-specific code have a chance too */ + (clone->ClonePtr) (clone); + + return clone; +} + +/* + * Release clone-local storage. + * + * Note: we assume any clone-local connection was already closed. + */ +static void +DeCloneArchive(ArchiveHandle *AH) +{ + /* Clear format-specific state */ + (AH->DeClonePtr) (AH); + + /* Clear state allocated by CloneArchive */ + destroyPQExpBuffer(AH->pgCopyBuf); + destroyPQExpBuffer(AH->sqlBuf); + if (AH->sqlparse.tagBuf) + destroyPQExpBuffer(AH->sqlparse.tagBuf); + + /* Clear any connection-local state */ + if (AH->currUser) + free(AH->currUser); + if (AH->currSchema) + free(AH->currSchema); + if (AH->currTablespace) + free(AH->currTablespace); + if (AH->savedPassword) + free(AH->savedPassword); + + free(AH); +}