]> granicus.if.org Git - postgresql/blobdiff - src/bin/pg_dump/pg_backup_archiver.c
Basic foreign table support.
[postgresql] / src / bin / pg_dump / pg_backup_archiver.c
index cd84c608827636914088e0fc3b935352b3d8974b..64d8d93dda81805540ac70dbd7afcf11390c19fc 100644 (file)
@@ -15,7 +15,7 @@
  *
  *
  * IDENTIFICATION
- *             $PostgreSQL: pgsql/src/bin/pg_dump/pg_backup_archiver.c,v 1.140 2007/01/25 03:30:43 momjian Exp $
+ *             src/bin/pg_dump/pg_backup_archiver.c
  *
  *-------------------------------------------------------------------------
  */
 #include "dumputils.h"
 
 #include <ctype.h>
-
 #include <unistd.h>
+#include <sys/types.h>
+#include <sys/wait.h>
 
 #ifdef WIN32
 #include <io.h>
 #endif
 
 #include "libpq/libpq-fs.h"
-#include "mb/pg_wchar.h"
 
+/*
+ * Special exit values from worker children.  We reserve 0 for normal
+ * success; 1 and other small values should be interpreted as crashes.
+ */
+#define WORKER_CREATE_DONE             10
+#define WORKER_INHIBIT_DATA            11
+#define WORKER_IGNORED_ERRORS  12
+
+/*
+ * Unix uses exit to return result from worker child, so function is void.
+ * Windows thread result comes via function return.
+ */
+#ifndef WIN32
+#define parallel_restore_result void
+#else
+#define parallel_restore_result DWORD
+#endif
+
+/* IDs for worker children are either PIDs or thread handles */
+#ifndef WIN32
+#define thandle pid_t
+#else
+#define thandle HANDLE
+#endif
+
+/* Arguments needed for a worker child */
+typedef struct _restore_args
+{
+       ArchiveHandle *AH;
+       TocEntry   *te;
+} RestoreArgs;
+
+/* State for each parallel activity slot */
+typedef struct _parallel_slot
+{
+       thandle         child_id;
+       RestoreArgs *args;
+} ParallelSlot;
+
+#define NO_SLOT (-1)
 
 const char *progname;
 
-static char *modulename = gettext_noop("archiver");
+static const char *modulename = gettext_noop("archiver");
+
+/* index array created by fix_dependencies -- only used in parallel restore */
+static TocEntry          **tocsByDumpId;                       /* index by dumpId - 1 */
+static DumpId          maxDumpId;                              /* length of above array */
 
 
 static ArchiveHandle *_allocAH(const char *FileSpec, const ArchiveFormat fmt,
@@ -58,10 +102,11 @@ static void _selectTablespace(ArchiveHandle *AH, const char *tablespace);
 static void processEncodingEntry(ArchiveHandle *AH, TocEntry *te);
 static void processStdStringsEntry(ArchiveHandle *AH, TocEntry *te);
 static teReqs _tocEntryRequired(TocEntry *te, RestoreOptions *ropt, bool include_acls);
+static bool _tocEntryIsACL(TocEntry *te);
 static void _disableTriggersIfNecessary(ArchiveHandle *AH, TocEntry *te, RestoreOptions *ropt);
 static void _enableTriggersIfNecessary(ArchiveHandle *AH, TocEntry *te, RestoreOptions *ropt);
 static TocEntry *getTocEntryByDumpId(ArchiveHandle *AH, DumpId id);
-static void _moveAfter(ArchiveHandle *AH, TocEntry *pos, TocEntry *te);
+static void _moveBefore(ArchiveHandle *AH, TocEntry *pos, TocEntry *te);
 static int     _discoverArchiveFormat(ArchiveHandle *AH);
 
 static void dump_lo_buf(ArchiveHandle *AH);
@@ -72,6 +117,35 @@ static void dumpTimestamp(ArchiveHandle *AH, const char *msg, time_t tim);
 static OutputContext SetOutput(ArchiveHandle *AH, char *filename, int compression);
 static void ResetOutput(ArchiveHandle *AH, OutputContext savedContext);
 
+static int restore_toc_entry(ArchiveHandle *AH, TocEntry *te,
+                                 RestoreOptions *ropt, bool is_parallel);
+static void restore_toc_entries_parallel(ArchiveHandle *AH);
+static thandle spawn_restore(RestoreArgs *args);
+static thandle reap_child(ParallelSlot *slots, int n_slots, int *work_status);
+static bool work_in_progress(ParallelSlot *slots, int n_slots);
+static int     get_next_slot(ParallelSlot *slots, int n_slots);
+static void par_list_header_init(TocEntry *l);
+static void par_list_append(TocEntry *l, TocEntry *te);
+static void par_list_remove(TocEntry *te);
+static TocEntry *get_next_work_item(ArchiveHandle *AH,
+                                  TocEntry *ready_list,
+                                  ParallelSlot *slots, int n_slots);
+static parallel_restore_result parallel_restore(RestoreArgs *args);
+static void mark_work_done(ArchiveHandle *AH, TocEntry *ready_list,
+                          thandle worker, int status,
+                          ParallelSlot *slots, int n_slots);
+static void fix_dependencies(ArchiveHandle *AH);
+static bool has_lock_conflicts(TocEntry *te1, TocEntry *te2);
+static void repoint_table_dependencies(ArchiveHandle *AH,
+                                                  DumpId tableId, DumpId tableDataId);
+static void identify_locking_dependencies(TocEntry *te);
+static void reduce_dependencies(ArchiveHandle *AH, TocEntry *te,
+                                       TocEntry *ready_list);
+static void mark_create_done(ArchiveHandle *AH, TocEntry *te);
+static void inhibit_data_for_failed_table(ArchiveHandle *AH, TocEntry *te);
+static ArchiveHandle *CloneArchive(ArchiveHandle *AH);
+static void DeCloneArchive(ArchiveHandle *AH);
+
 
 /*
  *     Wrapper functions.
@@ -132,7 +206,6 @@ RestoreArchive(Archive *AHX, RestoreOptions *ropt)
        TocEntry   *te;
        teReqs          reqs;
        OutputContext sav;
-       bool            defnDumped;
 
        AH->ropt = ropt;
        AH->stage = STAGE_INITIALIZING;
@@ -140,14 +213,36 @@ RestoreArchive(Archive *AHX, RestoreOptions *ropt)
        /*
         * Check for nonsensical option combinations.
         *
-        * NB: create+dropSchema is useless because if you're creating the DB,
+        * NB: createDB+dropSchema is useless because if you're creating the DB,
         * there's no need to drop individual items in it.  Moreover, if we tried
         * to do that then we'd issue the drops in the database initially
         * connected to, not the one we will create, which is very bad...
         */
-       if (ropt->create && ropt->dropSchema)
+       if (ropt->createDB && ropt->dropSchema)
                die_horribly(AH, modulename, "-C and -c are incompatible options\n");
 
+       /*
+        * -C is not compatible with -1, because we can't create a database inside
+        * a transaction block.
+        */
+       if (ropt->createDB && ropt->single_txn)
+               die_horribly(AH, modulename, "-C and -1 are incompatible options\n");
+
+       /*
+        * Make sure we won't need (de)compression we haven't got
+        */
+#ifndef HAVE_LIBZ
+       if (AH->compression != 0 && AH->PrintTocDataPtr !=NULL)
+       {
+               for (te = AH->toc->next; te != AH->toc; te = te->next)
+               {
+                       reqs = _tocEntryRequired(te, ropt, false);
+                       if (te->hadDumper && (reqs & REQ_DATA) != 0)
+                               die_horribly(AH, modulename, "cannot restore from compressed archive (compression not supported in this installation)\n");
+               }
+       }
+#endif
+
        /*
         * If we're using a DB connection, then connect it.
         */
@@ -163,7 +258,7 @@ RestoreArchive(Archive *AHX, RestoreOptions *ropt)
 
                ConnectDatabase(AHX, ropt->dbname,
                                                ropt->pghost, ropt->pgport, ropt->username,
-                                               ropt->requirePassword, ropt->ignoreVersion);
+                                               ropt->promptPassword);
 
                /*
                 * If we're talking to the DB directly, don't send comments since they
@@ -203,14 +298,22 @@ RestoreArchive(Archive *AHX, RestoreOptions *ropt)
 
        /*
         * Setup the output file if necessary.
-        */     
+        */
        if (ropt->filename || ropt->compression)
                sav = SetOutput(AH, ropt->filename, ropt->compression);
 
        ahprintf(AH, "--\n-- PostgreSQL database dump\n--\n\n");
 
        if (AH->public.verbose)
+       {
+               if (AH->archiveRemoteVersion)
+                       ahprintf(AH, "-- Dumped from database version %s\n",
+                                        AH->archiveRemoteVersion);
+               if (AH->archiveDumpVersion)
+                       ahprintf(AH, "-- Dumped by pg_dump version %s\n",
+                                        AH->archiveDumpVersion);
                dumpTimestamp(AH, "Started on", AH->createDate);
+       }
 
        if (ropt->single_txn)
        {
@@ -237,9 +340,9 @@ RestoreArchive(Archive *AHX, RestoreOptions *ropt)
                        AH->currentTE = te;
 
                        reqs = _tocEntryRequired(te, ropt, false /* needn't drop ACLs */ );
-                       if (((reqs & REQ_SCHEMA) != 0) && te->dropStmt)
+                       /* We want anything that's selected and has a dropStmt */
+                       if (((reqs & (REQ_SCHEMA | REQ_DATA)) != 0) && te->dropStmt)
                        {
-                               /* We want the schema */
                                ahlog(AH, 1, "dropping %s %s\n", te->desc, te->tag);
                                /* Select owner and schema as necessary */
                                _becomeOwner(AH, te);
@@ -248,147 +351,36 @@ RestoreArchive(Archive *AHX, RestoreOptions *ropt)
                                ahprintf(AH, "%s", te->dropStmt);
                        }
                }
+
+               /*
+                * _selectOutputSchema may have set currSchema to reflect the effect
+                * of a "SET search_path" command it emitted.  However, by now we may
+                * have dropped that schema; or it might not have existed in the first
+                * place.  In either case the effective value of search_path will not
+                * be what we think.  Forcibly reset currSchema so that we will
+                * re-establish the search_path setting when needed (after creating
+                * the schema).
+                *
+                * If we treated users as pg_dump'able objects then we'd need to reset
+                * currUser here too.
+                */
+               if (AH->currSchema)
+                       free(AH->currSchema);
+               AH->currSchema = NULL;
        }
 
        /*
-        * Now process each non-ACL TOC entry
+        * In serial mode, we now process each non-ACL TOC entry.
+        *
+        * In parallel mode, turn control over to the parallel-restore logic.
         */
-       for (te = AH->toc->next; te != AH->toc; te = te->next)
+       if (ropt->number_of_jobs > 1 && ropt->useDB)
+               restore_toc_entries_parallel(AH);
+       else
        {
-               AH->currentTE = te;
-
-               /* Work out what, if anything, we want from this entry */
-               reqs = _tocEntryRequired(te, ropt, false);
-
-               /* Dump any relevant dump warnings to stderr */
-               if (!ropt->suppressDumpWarnings && strcmp(te->desc, "WARNING") == 0)
-               {
-                       if (!ropt->dataOnly && te->defn != NULL && strlen(te->defn) != 0)
-                               write_msg(modulename, "warning from original dump file: %s\n", te->defn);
-                       else if (te->copyStmt != NULL && strlen(te->copyStmt) != 0)
-                               write_msg(modulename, "warning from original dump file: %s\n", te->copyStmt);
-               }
-
-               defnDumped = false;
-
-               if ((reqs & REQ_SCHEMA) != 0)   /* We want the schema */
-               {
-                       ahlog(AH, 1, "creating %s %s\n", te->desc, te->tag);
-
-                       _printTocEntry(AH, te, ropt, false, false);
-                       defnDumped = true;
-
-                       /*
-                        * If we could not create a table and --no-data-for-failed-tables
-                        * was given, ignore the corresponding TABLE DATA
-                        */
-                       if (ropt->noDataForFailedTables &&
-                               AH->lastErrorTE == te &&
-                               strcmp(te->desc, "TABLE") == 0)
-                       {
-                               TocEntry   *tes;
-
-                               ahlog(AH, 1, "table \"%s\" could not be created, will not restore its data\n",
-                                         te->tag);
-
-                               for (tes = te->next; tes != AH->toc; tes = tes->next)
-                               {
-                                       if (strcmp(tes->desc, "TABLE DATA") == 0 &&
-                                               strcmp(tes->tag, te->tag) == 0 &&
-                                               strcmp(tes->namespace ? tes->namespace : "",
-                                                          te->namespace ? te->namespace : "") == 0)
-                                       {
-                                               /* mark it unwanted */
-                                               ropt->idWanted[tes->dumpId - 1] = false;
-                                               break;
-                                       }
-                               }
-                       }
-
-                       /* If we created a DB, connect to it... */
-                       if (strcmp(te->desc, "DATABASE") == 0)
-                       {
-                               ahlog(AH, 1, "connecting to new database \"%s\"\n", te->tag);
-                               _reconnectToDB(AH, te->tag);
-                       }
-               }
-
-               /*
-                * If we have a data component, then process it
-                */
-               if ((reqs & REQ_DATA) != 0)
-               {
-                       /*
-                        * hadDumper will be set if there is genuine data component for
-                        * this node. Otherwise, we need to check the defn field for
-                        * statements that need to be executed in data-only restores.
-                        */
-                       if (te->hadDumper)
-                       {
-                               /*
-                                * If we can output the data, then restore it.
-                                */
-                               if (AH->PrintTocDataPtr !=NULL && (reqs & REQ_DATA) != 0)
-                               {
-#ifndef HAVE_LIBZ
-                                       if (AH->compression != 0)
-                                               die_horribly(AH, modulename, "cannot restore from compressed archive (compression not supported in this installation)\n");
-#endif
-
-                                       _printTocEntry(AH, te, ropt, true, false);
-
-                                       if (strcmp(te->desc, "BLOBS") == 0 ||
-                                               strcmp(te->desc, "BLOB COMMENTS") == 0)
-                                       {
-                                               ahlog(AH, 1, "restoring %s\n", te->desc);
-
-                                               _selectOutputSchema(AH, "pg_catalog");
-
-                                               (*AH->PrintTocDataPtr) (AH, te, ropt);
-                                       }
-                                       else
-                                       {
-                                               _disableTriggersIfNecessary(AH, te, ropt);
-
-                                               /* Select owner and schema as necessary */
-                                               _becomeOwner(AH, te);
-                                               _selectOutputSchema(AH, te->namespace);
-
-                                               ahlog(AH, 1, "restoring data for table \"%s\"\n",
-                                                         te->tag);
-
-                                               /*
-                                                * If we have a copy statement, use it. As of V1.3,
-                                                * these are separate to allow easy import from
-                                                * withing a database connection. Pre 1.3 archives can
-                                                * not use DB connections and are sent to output only.
-                                                *
-                                                * For V1.3+, the table data MUST have a copy
-                                                * statement so that we can go into appropriate mode
-                                                * with libpq.
-                                                */
-                                               if (te->copyStmt && strlen(te->copyStmt) > 0)
-                                               {
-                                                       ahprintf(AH, "%s", te->copyStmt);
-                                                       AH->writingCopyData = true;
-                                               }
-
-                                               (*AH->PrintTocDataPtr) (AH, te, ropt);
-
-                                               AH->writingCopyData = false;
-
-                                               _enableTriggersIfNecessary(AH, te, ropt);
-                                       }
-                               }
-                       }
-                       else if (!defnDumped)
-                       {
-                               /* If we haven't already dumped the defn part, do so now */
-                               ahlog(AH, 1, "executing %s %s\n", te->desc, te->tag);
-                               _printTocEntry(AH, te, ropt, false, false);
-                       }
-               }
-       }                                                       /* end loop over TOC entries */
+               for (te = AH->toc->next; te != AH->toc; te = te->next)
+                       (void) restore_toc_entry(AH, te, ropt, false);
+       }
 
        /*
         * Scan TOC again to output ownership commands and ACLs
@@ -400,7 +392,8 @@ RestoreArchive(Archive *AHX, RestoreOptions *ropt)
                /* Work out what, if anything, we want from this entry */
                reqs = _tocEntryRequired(te, ropt, true);
 
-               if ((reqs & REQ_SCHEMA) != 0)   /* We want the schema */
+               /* Both schema and data objects might now have ownership/ACLs */
+               if ((reqs & (REQ_SCHEMA | REQ_DATA)) != 0)
                {
                        ahlog(AH, 1, "setting owner and privileges for %s %s\n",
                                  te->desc, te->tag);
@@ -436,6 +429,193 @@ RestoreArchive(Archive *AHX, RestoreOptions *ropt)
        }
 }
 
+/*
+ * Restore a single TOC item.  Used in both parallel and non-parallel restore;
+ * is_parallel is true if we are in a worker child process.
+ *
+ * Returns 0 normally, but WORKER_CREATE_DONE or WORKER_INHIBIT_DATA if
+ * the parallel parent has to make the corresponding status update.
+ */
+static int
+restore_toc_entry(ArchiveHandle *AH, TocEntry *te,
+                                 RestoreOptions *ropt, bool is_parallel)
+{
+       int                     retval = 0;
+       teReqs          reqs;
+       bool            defnDumped;
+
+       AH->currentTE = te;
+
+       /* Work out what, if anything, we want from this entry */
+       reqs = _tocEntryRequired(te, ropt, false);
+
+       /* Dump any relevant dump warnings to stderr */
+       if (!ropt->suppressDumpWarnings && strcmp(te->desc, "WARNING") == 0)
+       {
+               if (!ropt->dataOnly && te->defn != NULL && strlen(te->defn) != 0)
+                       write_msg(modulename, "warning from original dump file: %s\n", te->defn);
+               else if (te->copyStmt != NULL && strlen(te->copyStmt) != 0)
+                       write_msg(modulename, "warning from original dump file: %s\n", te->copyStmt);
+       }
+
+       defnDumped = false;
+
+       if ((reqs & REQ_SCHEMA) != 0)           /* We want the schema */
+       {
+               ahlog(AH, 1, "creating %s %s\n", te->desc, te->tag);
+
+               _printTocEntry(AH, te, ropt, false, false);
+               defnDumped = true;
+
+               if (strcmp(te->desc, "TABLE") == 0)
+               {
+                       if (AH->lastErrorTE == te)
+                       {
+                               /*
+                                * We failed to create the table. If
+                                * --no-data-for-failed-tables was given, mark the
+                                * corresponding TABLE DATA to be ignored.
+                                *
+                                * In the parallel case this must be done in the parent, so we
+                                * just set the return value.
+                                */
+                               if (ropt->noDataForFailedTables)
+                               {
+                                       if (is_parallel)
+                                               retval = WORKER_INHIBIT_DATA;
+                                       else
+                                               inhibit_data_for_failed_table(AH, te);
+                               }
+                       }
+                       else
+                       {
+                               /*
+                                * We created the table successfully.  Mark the corresponding
+                                * TABLE DATA for possible truncation.
+                                *
+                                * In the parallel case this must be done in the parent, so we
+                                * just set the return value.
+                                */
+                               if (is_parallel)
+                                       retval = WORKER_CREATE_DONE;
+                               else
+                                       mark_create_done(AH, te);
+                       }
+               }
+
+               /* If we created a DB, connect to it... */
+               if (strcmp(te->desc, "DATABASE") == 0)
+               {
+                       ahlog(AH, 1, "connecting to new database \"%s\"\n", te->tag);
+                       _reconnectToDB(AH, te->tag);
+                       ropt->dbname = strdup(te->tag);
+               }
+       }
+
+       /*
+        * If we have a data component, then process it
+        */
+       if ((reqs & REQ_DATA) != 0)
+       {
+               /*
+                * hadDumper will be set if there is genuine data component for this
+                * node. Otherwise, we need to check the defn field for statements
+                * that need to be executed in data-only restores.
+                */
+               if (te->hadDumper)
+               {
+                       /*
+                        * If we can output the data, then restore it.
+                        */
+                       if (AH->PrintTocDataPtr !=NULL && (reqs & REQ_DATA) != 0)
+                       {
+                               _printTocEntry(AH, te, ropt, true, false);
+
+                               if (strcmp(te->desc, "BLOBS") == 0 ||
+                                       strcmp(te->desc, "BLOB COMMENTS") == 0)
+                               {
+                                       ahlog(AH, 1, "restoring %s\n", te->desc);
+
+                                       _selectOutputSchema(AH, "pg_catalog");
+
+                                       (*AH->PrintTocDataPtr) (AH, te, ropt);
+                               }
+                               else
+                               {
+                                       _disableTriggersIfNecessary(AH, te, ropt);
+
+                                       /* Select owner and schema as necessary */
+                                       _becomeOwner(AH, te);
+                                       _selectOutputSchema(AH, te->namespace);
+
+                                       ahlog(AH, 1, "restoring data for table \"%s\"\n",
+                                                 te->tag);
+
+                                       /*
+                                        * In parallel restore, if we created the table earlier in
+                                        * the run then we wrap the COPY in a transaction and
+                                        * precede it with a TRUNCATE.  If archiving is not on
+                                        * this prevents WAL-logging the COPY.  This obtains a
+                                        * speedup similar to that from using single_txn mode in
+                                        * non-parallel restores.
+                                        */
+                                       if (is_parallel && te->created)
+                                       {
+                                               /*
+                                                * Parallel restore is always talking directly to a
+                                                * server, so no need to see if we should issue BEGIN.
+                                                */
+                                               StartTransaction(AH);
+
+                                               /*
+                                                * If the server version is >= 8.4, make sure we issue
+                                                * TRUNCATE with ONLY so that child tables are not
+                                                * wiped.
+                                                */
+                                               ahprintf(AH, "TRUNCATE TABLE %s%s;\n\n",
+                                                                (PQserverVersion(AH->connection) >= 80400 ?
+                                                                 "ONLY " : ""),
+                                                                fmtId(te->tag));
+                                       }
+
+                                       /*
+                                        * If we have a copy statement, use it. As of V1.3, these
+                                        * are separate to allow easy import from withing a
+                                        * database connection. Pre 1.3 archives can not use DB
+                                        * connections and are sent to output only.
+                                        *
+                                        * For V1.3+, the table data MUST have a copy statement so
+                                        * that we can go into appropriate mode with libpq.
+                                        */
+                                       if (te->copyStmt && strlen(te->copyStmt) > 0)
+                                       {
+                                               ahprintf(AH, "%s", te->copyStmt);
+                                               AH->writingCopyData = true;
+                                       }
+
+                                       (*AH->PrintTocDataPtr) (AH, te, ropt);
+
+                                       AH->writingCopyData = false;
+
+                                       /* close out the transaction started above */
+                                       if (is_parallel && te->created)
+                                               CommitTransaction(AH);
+
+                                       _enableTriggersIfNecessary(AH, te, ropt);
+                               }
+                       }
+               }
+               else if (!defnDumped)
+               {
+                       /* If we haven't already dumped the defn part, do so now */
+                       ahlog(AH, 1, "executing %s %s\n", te->desc, te->tag);
+                       _printTocEntry(AH, te, ropt, false, false);
+               }
+       }
+
+       return retval;
+}
+
 /*
  * Allocate a new RestoreOptions block.
  * This is mainly so we can initialize it, but also for future expansion,
@@ -447,9 +627,9 @@ NewRestoreOptions(void)
 
        opts = (RestoreOptions *) calloc(1, sizeof(RestoreOptions));
 
+       /* set any fields that shouldn't default to zeroes */
        opts->format = archUnknown;
-       opts->suppressDumpWarnings = false;
-       opts->exit_on_error = false;
+       opts->promptPassword = TRI_DEFAULT;
 
        return opts;
 }
@@ -535,7 +715,8 @@ ArchiveEntry(Archive *AHX,
                         const char *namespace,
                         const char *tablespace,
                         const char *owner, bool withOids,
-                        const char *desc, const char *defn,
+                        const char *desc, teSection section,
+                        const char *defn,
                         const char *dropStmt, const char *copyStmt,
                         const DumpId *deps, int nDeps,
                         DataDumperPtr dumpFn, void *dumpArg)
@@ -558,6 +739,7 @@ ArchiveEntry(Archive *AHX,
 
        newToc->catalogId = catalogId;
        newToc->dumpId = dumpId;
+       newToc->section = section;
 
        newToc->tag = strdup(tag);
        newToc->namespace = namespace ? strdup(namespace) : NULL;
@@ -596,7 +778,7 @@ void
 PrintTOCSummary(Archive *AHX, RestoreOptions *ropt)
 {
        ArchiveHandle *AH = (ArchiveHandle *) AHX;
-       TocEntry   *te = AH->toc->next;
+       TocEntry   *te;
        OutputContext sav;
        char       *fmtName;
 
@@ -635,14 +817,25 @@ PrintTOCSummary(Archive *AHX, RestoreOptions *ropt)
 
        ahprintf(AH, ";\n;\n; Selected TOC Entries:\n;\n");
 
-       while (te != AH->toc)
+       /* We should print DATABASE entries whether or not -C was specified */
+       ropt->createDB = 1;
+
+       for (te = AH->toc->next; te != AH->toc; te = te->next)
        {
-               if (_tocEntryRequired(te, ropt, true) != 0)
+               if (ropt->verbose || _tocEntryRequired(te, ropt, true) != 0)
                        ahprintf(AH, "%d; %u %u %s %s %s %s\n", te->dumpId,
                                         te->catalogId.tableoid, te->catalogId.oid,
                                         te->desc, te->namespace ? te->namespace : "-",
                                         te->tag, te->owner);
-               te = te->next;
+               if (ropt->verbose && te->nDeps > 0)
+               {
+                       int                     i;
+
+                       ahprintf(AH, ";\tdepends on:");
+                       for (i = 0; i < te->nDeps; i++)
+                               ahprintf(AH, " %d", te->dependencies[i]);
+                       ahprintf(AH, "\n");
+               }
        }
 
        if (ropt->filename)
@@ -714,7 +907,10 @@ EndRestoreBlobs(ArchiveHandle *AH)
                        ahprintf(AH, "COMMIT;\n\n");
        }
 
-       ahlog(AH, 1, "restored %d large objects\n", AH->blobCount);
+       ahlog(AH, 1, ngettext("restored %d large object\n",
+                                                 "restored %d large objects\n",
+                                                 AH->blobCount),
+                 AH->blobCount);
 }
 
 
@@ -722,8 +918,9 @@ EndRestoreBlobs(ArchiveHandle *AH)
  * Called by a format handler to initiate restoration of a blob
  */
 void
-StartRestoreBlob(ArchiveHandle *AH, Oid oid)
+StartRestoreBlob(ArchiveHandle *AH, Oid oid, bool drop)
 {
+       bool            old_blob_style = (AH->version < K_VERS_1_12);
        Oid                     loOid;
 
        AH->blobCount++;
@@ -733,20 +930,32 @@ StartRestoreBlob(ArchiveHandle *AH, Oid oid)
 
        ahlog(AH, 2, "restoring large object with OID %u\n", oid);
 
+       /* With an old archive we must do drop and create logic here */
+       if (old_blob_style && drop)
+               DropBlobIfExists(AH, oid);
+
        if (AH->connection)
        {
-               loOid = lo_create(AH->connection, oid);
-               if (loOid == 0 || loOid != oid)
-                       die_horribly(AH, modulename, "could not create large object %u\n",
-                                                oid);
-
+               if (old_blob_style)
+               {
+                       loOid = lo_create(AH->connection, oid);
+                       if (loOid == 0 || loOid != oid)
+                               die_horribly(AH, modulename, "could not create large object %u: %s",
+                                                        oid, PQerrorMessage(AH->connection));
+               }
                AH->loFd = lo_open(AH->connection, oid, INV_WRITE);
                if (AH->loFd == -1)
-                       die_horribly(AH, modulename, "could not open large object\n");
+                       die_horribly(AH, modulename, "could not open large object %u: %s",
+                                                oid, PQerrorMessage(AH->connection));
        }
        else
        {
-               ahprintf(AH, "SELECT lo_open(lo_create(%u), %d);\n", oid, INV_WRITE);
+               if (old_blob_style)
+                       ahprintf(AH, "SELECT pg_catalog.lo_open(pg_catalog.lo_create('%u'), %d);\n",
+                                        oid, INV_WRITE);
+               else
+                       ahprintf(AH, "SELECT pg_catalog.lo_open('%u', %d);\n",
+                                        oid, INV_WRITE);
        }
 
        AH->writingBlob = 1;
@@ -770,7 +979,7 @@ EndRestoreBlob(ArchiveHandle *AH, Oid oid)
        }
        else
        {
-               ahprintf(AH, "SELECT lo_close(0);\n\n");
+               ahprintf(AH, "SELECT pg_catalog.lo_close(0);\n\n");
        }
 }
 
@@ -788,20 +997,16 @@ SortTocFromFile(Archive *AHX, RestoreOptions *ropt)
        char       *endptr;
        DumpId          id;
        TocEntry   *te;
-       TocEntry   *tePrev;
 
        /* Allocate space for the 'wanted' array, and init it */
        ropt->idWanted = (bool *) malloc(sizeof(bool) * AH->maxDumpId);
        memset(ropt->idWanted, 0, sizeof(bool) * AH->maxDumpId);
 
-       /* Set prev entry as head of list */
-       tePrev = AH->toc;
-
        /* Setup the file */
        fh = fopen(ropt->tocFile, PG_BINARY_R);
        if (!fh)
-               die_horribly(AH, modulename, "could not open TOC file: %s\n",
-                                        strerror(errno));
+               die_horribly(AH, modulename, "could not open TOC file \"%s\": %s\n",
+                                        ropt->tocFile, strerror(errno));
 
        while (fgets(buf, sizeof(buf), fh) != NULL)
        {
@@ -811,7 +1016,7 @@ SortTocFromFile(Archive *AHX, RestoreOptions *ropt)
                        cmnt[0] = '\0';
 
                /* Ignore if all blank */
-               if (strspn(buf, " \t\r") == strlen(buf))
+               if (strspn(buf, " \t\r\n") == strlen(buf))
                        continue;
 
                /* Get an ID, check it's valid and not already seen */
@@ -829,10 +1034,21 @@ SortTocFromFile(Archive *AHX, RestoreOptions *ropt)
                        die_horribly(AH, modulename, "could not find entry for ID %d\n",
                                                 id);
 
+               /* Mark it wanted */
                ropt->idWanted[id - 1] = true;
 
-               _moveAfter(AH, tePrev, te);
-               tePrev = te;
+               /*
+                * Move each item to the end of the list as it is selected, so that
+                * they are placed in the desired order.  Any unwanted items will end
+                * up at the front of the list, which may seem unintuitive but it's
+                * what we need.  In an ordinary serial restore that makes no
+                * difference, but in a parallel restore we need to mark unrestored
+                * items' dependencies as satisfied before we start examining
+                * restorable items.  Otherwise they could have surprising
+                * side-effects on the order in which restorable items actually get
+                * restored.
+                */
+               _moveBefore(AH, AH->toc, te);
        }
 
        if (fclose(fh) != 0)
@@ -958,7 +1174,14 @@ SetOutput(ArchiveHandle *AH, char *filename, int compression)
        }
 
        if (!AH->OF)
-               die_horribly(AH, modulename, "could not open output file: %s\n", strerror(errno));
+       {
+               if (filename)
+                       die_horribly(AH, modulename, "could not open output file \"%s\": %s\n",
+                                                filename, strerror(errno));
+               else
+                       die_horribly(AH, modulename, "could not open output file: %s\n",
+                                                strerror(errno));
+       }
 
        return sav;
 }
@@ -1053,7 +1276,9 @@ dump_lo_buf(ArchiveHandle *AH)
                size_t          res;
 
                res = lo_write(AH->connection, AH->loFd, AH->lo_buf, AH->lo_buf_used);
-               ahlog(AH, 5, "wrote %lu bytes of large object data (result = %lu)\n",
+               ahlog(AH, 5, ngettext("wrote %lu byte of large object data (result = %lu)\n",
+                                        "wrote %lu bytes of large object data (result = %lu)\n",
+                                                         AH->lo_buf_used),
                          (unsigned long) AH->lo_buf_used, (unsigned long) res);
                if (res != AH->lo_buf_used)
                        die_horribly(AH, modulename,
@@ -1062,20 +1287,19 @@ dump_lo_buf(ArchiveHandle *AH)
        }
        else
        {
-               unsigned char *str;
-               size_t          len;
+               PQExpBuffer buf = createPQExpBuffer();
 
-               str = PQescapeBytea((const unsigned char *) AH->lo_buf,
-                                                       AH->lo_buf_used, &len);
-               if (!str)
-                       die_horribly(AH, modulename, "out of memory\n");
+               appendByteaLiteralAHX(buf,
+                                                         (const unsigned char *) AH->lo_buf,
+                                                         AH->lo_buf_used,
+                                                         AH);
 
                /* Hack: turn off writingBlob so ahwrite doesn't recurse to here */
                AH->writingBlob = 0;
-               ahprintf(AH, "SELECT lowrite(0, '%s');\n", str);
+               ahprintf(AH, "SELECT pg_catalog.lowrite(0, %s);\n", buf->data);
                AH->writingBlob = 1;
 
-               free(str);
+               destroyPQExpBuffer(buf);
        }
        AH->lo_buf_used = 0;
 }
@@ -1256,45 +1480,47 @@ warn_or_die_horribly(ArchiveHandle *AH,
        va_end(ap);
 }
 
+#ifdef NOT_USED
+
 static void
 _moveAfter(ArchiveHandle *AH, TocEntry *pos, TocEntry *te)
 {
+       /* Unlink te from list */
        te->prev->next = te->next;
        te->next->prev = te->prev;
 
+       /* and insert it after "pos" */
        te->prev = pos;
        te->next = pos->next;
-
        pos->next->prev = te;
        pos->next = te;
 }
 
-#ifdef NOT_USED
+#endif
 
 static void
 _moveBefore(ArchiveHandle *AH, TocEntry *pos, TocEntry *te)
 {
+       /* Unlink te from list */
        te->prev->next = te->next;
        te->next->prev = te->prev;
 
+       /* and insert it before "pos" */
        te->prev = pos->prev;
        te->next = pos;
        pos->prev->next = te;
        pos->prev = te;
 }
-#endif
 
 static TocEntry *
 getTocEntryByDumpId(ArchiveHandle *AH, DumpId id)
 {
        TocEntry   *te;
 
-       te = AH->toc->next;
-       while (te != AH->toc)
+       for (te = AH->toc->next; te != AH->toc; te = te->next)
        {
                if (te->dumpId == id)
                        return te;
-               te = te->next;
        }
        return NULL;
 }
@@ -1311,24 +1537,24 @@ TocIDRequired(ArchiveHandle *AH, DumpId id, RestoreOptions *ropt)
 }
 
 size_t
-WriteOffset(ArchiveHandle *AH, off_t o, int wasSet)
+WriteOffset(ArchiveHandle *AH, pgoff_t o, int wasSet)
 {
        int                     off;
 
        /* Save the flag */
        (*AH->WriteBytePtr) (AH, wasSet);
 
-       /* Write out off_t smallest byte first, prevents endian mismatch */
-       for (off = 0; off < sizeof(off_t); off++)
+       /* Write out pgoff_t smallest byte first, prevents endian mismatch */
+       for (off = 0; off < sizeof(pgoff_t); off++)
        {
                (*AH->WriteBytePtr) (AH, o & 0xFF);
                o >>= 8;
        }
-       return sizeof(off_t) + 1;
+       return sizeof(pgoff_t) + 1;
 }
 
 int
-ReadOffset(ArchiveHandle *AH, off_t *o)
+ReadOffset(ArchiveHandle *AH, pgoff_t * o)
 {
        int                     i;
        int                     off;
@@ -1348,8 +1574,8 @@ ReadOffset(ArchiveHandle *AH, off_t *o)
                else if (i == 0)
                        return K_OFFSET_NO_DATA;
 
-               /* Cast to off_t because it was written as an int. */
-               *o = (off_t) i;
+               /* Cast to pgoff_t because it was written as an int. */
+               *o = (pgoff_t) i;
                return K_OFFSET_POS_SET;
        }
 
@@ -1379,8 +1605,8 @@ ReadOffset(ArchiveHandle *AH, off_t *o)
         */
        for (off = 0; off < AH->offSize; off++)
        {
-               if (off < sizeof(off_t))
-                       *o |= ((off_t) ((*AH->ReadBytePtr) (AH))) << (off * 8);
+               if (off < sizeof(pgoff_t))
+                       *o |= ((pgoff_t) ((*AH->ReadBytePtr) (AH))) << (off * 8);
                else
                {
                        if ((*AH->ReadBytePtr) (AH) != 0)
@@ -1472,7 +1698,7 @@ ReadStr(ArchiveHandle *AH)
        int                     l;
 
        l = ReadInt(AH);
-       if (l == -1)
+       if (l < 0)
                buf = NULL;
        else
        {
@@ -1480,7 +1706,9 @@ ReadStr(ArchiveHandle *AH)
                if (!buf)
                        die_horribly(AH, modulename, "out of memory\n");
 
-               (*AH->ReadBufPtr) (AH, (void *) buf, l);
+               if ((*AH->ReadBufPtr) (AH, (void *) buf, l) != l)
+                       die_horribly(AH, modulename, "unexpected end of file\n");
+
                buf[l] = '\0';
        }
 
@@ -1511,12 +1739,17 @@ _discoverArchiveFormat(ArchiveHandle *AH)
        {
                wantClose = 1;
                fh = fopen(AH->fSpec, PG_BINARY_R);
+               if (!fh)
+                       die_horribly(AH, modulename, "could not open input file \"%s\": %s\n",
+                                                AH->fSpec, strerror(errno));
        }
        else
+       {
                fh = stdin;
-
-       if (!fh)
-               die_horribly(AH, modulename, "could not open input file: %s\n", strerror(errno));
+               if (!fh)
+                       die_horribly(AH, modulename, "could not open input file: %s\n",
+                                                strerror(errno));
+       }
 
        cnt = fread(sig, 1, 5, fh);
 
@@ -1535,6 +1768,11 @@ _discoverArchiveFormat(ArchiveHandle *AH)
 
        if (strncmp(sig, "PGDMP", 5) == 0)
        {
+               /*
+                * Finish reading (most of) a custom-format header.
+                *
+                * NB: this code must agree with ReadHead().
+                */
                AH->vmaj = fgetc(fh);
                AH->vmin = fgetc(fh);
 
@@ -1598,11 +1836,6 @@ _discoverArchiveFormat(ArchiveHandle *AH)
        else
                AH->lookaheadLen = 0;   /* Don't bother since we've reset the file */
 
-#if 0
-       write_msg(modulename, "read %lu bytes into lookahead buffer\n",
-                         (unsigned long) AH->lookaheadLen);
-#endif
-
        /* Close the file */
        if (wantClose)
                if (fclose(fh) != 0)
@@ -1636,18 +1869,23 @@ _allocAH(const char *FileSpec, const ArchiveFormat fmt,
        AH->vmin = K_VERS_MINOR;
        AH->vrev = K_VERS_REV;
 
+       /* Make a convenient integer <maj><min><rev>00 */
+       AH->version = ((AH->vmaj * 256 + AH->vmin) * 256 + AH->vrev) * 256 + 0;
+
        /* initialize for backwards compatible string processing */
-       AH->public.encoding = PG_SQL_ASCII;
+       AH->public.encoding = 0;        /* PG_SQL_ASCII */
        AH->public.std_strings = false;
 
        /* sql error handling */
        AH->public.exit_on_error = true;
        AH->public.n_errors = 0;
 
+       AH->archiveDumpVersion = PG_VERSION;
+
        AH->createDate = time(NULL);
 
        AH->intSize = sizeof(int);
-       AH->offSize = sizeof(off_t);
+       AH->offSize = sizeof(pgoff_t);
        if (FileSpec)
        {
                AH->fSpec = strdup(FileSpec);
@@ -1662,9 +1900,9 @@ _allocAH(const char *FileSpec, const ArchiveFormat fmt,
        else
                AH->fSpec = NULL;
 
-       AH->currUser = strdup("");      /* So it's valid, but we can free() it later
-                                                                * if necessary */
-       AH->currSchema = strdup("");    /* ditto */
+       AH->currUser = NULL;            /* unknown */
+       AH->currSchema = NULL;          /* ditto */
+       AH->currTablespace = NULL;      /* ditto */
        AH->currWithOids = -1;          /* force SET */
 
        AH->toc = (TocEntry *) calloc(1, sizeof(TocEntry));
@@ -1700,15 +1938,13 @@ _allocAH(const char *FileSpec, const ArchiveFormat fmt,
        }
 #endif
 
-#if 0
-       write_msg(modulename, "archive format is %d\n", fmt);
-#endif
-
        if (fmt == archUnknown)
                AH->format = _discoverArchiveFormat(AH);
        else
                AH->format = fmt;
 
+       AH->promptPassword = TRI_DEFAULT;
+
        switch (AH->format)
        {
                case archCustom:
@@ -1738,11 +1974,11 @@ _allocAH(const char *FileSpec, const ArchiveFormat fmt,
 void
 WriteDataChunks(ArchiveHandle *AH)
 {
-       TocEntry   *te = AH->toc->next;
+       TocEntry   *te;
        StartDataPtr startPtr;
        EndDataPtr      endPtr;
 
-       while (te != AH->toc)
+       for (te = AH->toc->next; te != AH->toc; te = te->next)
        {
                if (te->dataDumper != NULL)
                {
@@ -1777,7 +2013,6 @@ WriteDataChunks(ArchiveHandle *AH)
                                (*endPtr) (AH, te);
                        AH->currToc = NULL;
                }
-               te = te->next;
        }
 }
 
@@ -1805,6 +2040,7 @@ WriteToc(ArchiveHandle *AH)
 
                WriteStr(AH, te->tag);
                WriteStr(AH, te->desc);
+               WriteInt(AH, te->section);
                WriteStr(AH, te->defn);
                WriteStr(AH, te->dropStmt);
                WriteStr(AH, te->copyStmt);
@@ -1834,8 +2070,7 @@ ReadToc(ArchiveHandle *AH)
        DumpId     *deps;
        int                     depIdx;
        int                     depSize;
-
-       TocEntry   *te = AH->toc->next;
+       TocEntry   *te;
 
        AH->tocCount = ReadInt(AH);
        AH->maxDumpId = 0;
@@ -1870,6 +2105,37 @@ ReadToc(ArchiveHandle *AH)
 
                te->tag = ReadStr(AH);
                te->desc = ReadStr(AH);
+
+               if (AH->version >= K_VERS_1_11)
+               {
+                       te->section = ReadInt(AH);
+               }
+               else
+               {
+                       /*
+                        * Rules for pre-8.4 archives wherein pg_dump hasn't classified
+                        * the entries into sections.  This list need not cover entry
+                        * types added later than 8.4.
+                        */
+                       if (strcmp(te->desc, "COMMENT") == 0 ||
+                               strcmp(te->desc, "ACL") == 0 ||
+                               strcmp(te->desc, "ACL LANGUAGE") == 0)
+                               te->section = SECTION_NONE;
+                       else if (strcmp(te->desc, "TABLE DATA") == 0 ||
+                                        strcmp(te->desc, "BLOBS") == 0 ||
+                                        strcmp(te->desc, "BLOB COMMENTS") == 0)
+                               te->section = SECTION_DATA;
+                       else if (strcmp(te->desc, "CONSTRAINT") == 0 ||
+                                        strcmp(te->desc, "CHECK CONSTRAINT") == 0 ||
+                                        strcmp(te->desc, "FK CONSTRAINT") == 0 ||
+                                        strcmp(te->desc, "INDEX") == 0 ||
+                                        strcmp(te->desc, "RULE") == 0 ||
+                                        strcmp(te->desc, "TRIGGER") == 0)
+                               te->section = SECTION_POST_DATA;
+                       else
+                               te->section = SECTION_PRE_DATA;
+               }
+
                te->defn = ReadStr(AH);
                te->dropStmt = ReadStr(AH);
 
@@ -2008,10 +2274,15 @@ _tocEntryRequired(TocEntry *te, RestoreOptions *ropt, bool include_acls)
                return 0;
 
        /* If it's an ACL, maybe ignore it */
-       if ((!include_acls || ropt->aclsSkip) && strcmp(te->desc, "ACL") == 0)
+       if ((!include_acls || ropt->aclsSkip) && _tocEntryIsACL(te))
+               return 0;
+
+       /* If it's security labels, maybe ignore it */
+       if (ropt->skip_seclabel && strcmp(te->desc, "SECURITY LABEL") == 0)
                return 0;
 
-       if (!ropt->create && strcmp(te->desc, "DATABASE") == 0)
+       /* Ignore DATABASE entry unless we should create it */
+       if (!ropt->createDB && strcmp(te->desc, "DATABASE") == 0)
                return 0;
 
        /* Check options for selective dump/restore */
@@ -2065,9 +2336,20 @@ _tocEntryRequired(TocEntry *te, RestoreOptions *ropt, bool include_acls)
        if (!te->hadDumper)
        {
                /*
-                * Special Case: If 'SEQUENCE SET' then it is considered a data entry
+                * Special Case: If 'SEQUENCE SET' or anything to do with BLOBs, then
+                * it is considered a data entry.  We don't need to check for the
+                * BLOBS entry or old-style BLOB COMMENTS, because they will have
+                * hadDumper = true ... but we do need to check new-style BLOB
+                * comments.
                 */
-               if (strcmp(te->desc, "SEQUENCE SET") == 0)
+               if (strcmp(te->desc, "SEQUENCE SET") == 0 ||
+                       strcmp(te->desc, "BLOB") == 0 ||
+                       (strcmp(te->desc, "ACL") == 0 &&
+                        strncmp(te->tag, "LARGE OBJECT ", 13) == 0) ||
+                       (strcmp(te->desc, "COMMENT") == 0 &&
+                        strncmp(te->tag, "LARGE OBJECT ", 13) == 0) ||
+                       (strcmp(te->desc, "SECURITY LABEL") == 0 &&
+                        strncmp(te->tag, "LARGE OBJECT ", 13) == 0))
                        res = res & REQ_DATA;
                else
                        res = res & ~REQ_DATA;
@@ -2099,6 +2381,20 @@ _tocEntryRequired(TocEntry *te, RestoreOptions *ropt, bool include_acls)
        return res;
 }
 
+/*
+ * Identify TOC entries that are ACLs.
+ */
+static bool
+_tocEntryIsACL(TocEntry *te)
+{
+       /* "ACL LANGUAGE" was a crock emitted only in PG 7.4 */
+       if (strcmp(te->desc, "ACL") == 0 ||
+               strcmp(te->desc, "ACL LANGUAGE") == 0 ||
+               strcmp(te->desc, "DEFAULT ACL") == 0)
+               return true;
+       return false;
+}
+
 /*
  * Issue SET commands for parameters that we want to have set the same way
  * at all times during execution of a restore script.
@@ -2106,6 +2402,9 @@ _tocEntryRequired(TocEntry *te, RestoreOptions *ropt, bool include_acls)
 static void
 _doSetFixedOutputState(ArchiveHandle *AH)
 {
+       /* Disable statement_timeout in archive for pg_restore/psql  */
+       ahprintf(AH, "SET statement_timeout = 0;\n");
+
        /* Select the correct character set encoding */
        ahprintf(AH, "SET client_encoding = '%s';\n",
                         pg_encoding_to_char(AH->public.encoding));
@@ -2114,6 +2413,10 @@ _doSetFixedOutputState(ArchiveHandle *AH)
        ahprintf(AH, "SET standard_conforming_strings = %s;\n",
                         AH->public.std_strings ? "on" : "off");
 
+       /* Select the role to be used during restore */
+       if (AH->ropt && AH->ropt->use_role)
+               ahprintf(AH, "SET ROLE %s;\n", fmtId(AH->ropt->use_role));
+
        /* Make sure function checking is disabled */
        ahprintf(AH, "SET check_function_bodies = false;\n");
 
@@ -2228,13 +2531,15 @@ _reconnectToDB(ArchiveHandle *AH, const char *dbname)
         */
        if (AH->currUser)
                free(AH->currUser);
+       AH->currUser = NULL;
 
-       AH->currUser = strdup("");
-
-       /* don't assume we still know the output schema */
+       /* don't assume we still know the output schema, tablespace, etc either */
        if (AH->currSchema)
                free(AH->currSchema);
-       AH->currSchema = strdup("");
+       AH->currSchema = NULL;
+       if (AH->currTablespace)
+               free(AH->currTablespace);
+       AH->currTablespace = NULL;
        AH->currWithOids = -1;
 
        /* re-establish fixed state */
@@ -2263,12 +2568,11 @@ _becomeUser(ArchiveHandle *AH, const char *user)
         */
        if (AH->currUser)
                free(AH->currUser);
-
        AH->currUser = strdup(user);
 }
 
 /*
- * Become the owner of the the given TOC entry object. If
+ * Become the owner of the given TOC entry object.     If
  * changes in ownership are not allowed, this doesn't do anything.
  */
 static void
@@ -2349,6 +2653,10 @@ _selectTablespace(ArchiveHandle *AH, const char *tablespace)
        const char *want,
                           *have;
 
+       /* do nothing in --no-tablespaces mode */
+       if (AH->ropt->noTablespace)
+               return;
+
        have = AH->currTablespace;
        want = tablespace;
 
@@ -2417,7 +2725,10 @@ _getObjectDescription(PQExpBuffer buf, TocEntry *te, ArchiveHandle *AH)
        if (strcmp(type, "CONVERSION") == 0 ||
                strcmp(type, "DOMAIN") == 0 ||
                strcmp(type, "TABLE") == 0 ||
-               strcmp(type, "TYPE") == 0)
+               strcmp(type, "TYPE") == 0 ||
+               strcmp(type, "FOREIGN TABLE") == 0 ||
+               strcmp(type, "TEXT SEARCH DICTIONARY") == 0 ||
+               strcmp(type, "TEXT SEARCH CONFIGURATION") == 0)
        {
                appendPQExpBuffer(buf, "%s ", type);
                if (te->namespace && te->namespace[0])  /* is null pre-7.3 */
@@ -2440,12 +2751,23 @@ _getObjectDescription(PQExpBuffer buf, TocEntry *te, ArchiveHandle *AH)
 
        /* objects named by just a name */
        if (strcmp(type, "DATABASE") == 0 ||
-               strcmp(type, "SCHEMA") == 0)
+               strcmp(type, "PROCEDURAL LANGUAGE") == 0 ||
+               strcmp(type, "SCHEMA") == 0 ||
+               strcmp(type, "FOREIGN DATA WRAPPER") == 0 ||
+               strcmp(type, "SERVER") == 0 ||
+               strcmp(type, "USER MAPPING") == 0)
        {
                appendPQExpBuffer(buf, "%s %s", type, fmtId(te->tag));
                return;
        }
 
+       /* BLOBs just have a name, but it's numeric so must not use fmtId */
+       if (strcmp(type, "BLOB") == 0)
+       {
+               appendPQExpBuffer(buf, "LARGE OBJECT %s", te->tag);
+               return;
+       }
+
        /*
         * These object types require additional decoration.  Fortunately, the
         * information needed is exactly what's in the DROP command.
@@ -2484,23 +2806,30 @@ _printTocEntry(ArchiveHandle *AH, TocEntry *te, RestoreOptions *ropt, bool isDat
        /* ACLs are dumped only during acl pass */
        if (acl_pass)
        {
-               if (strcmp(te->desc, "ACL") != 0)
+               if (!_tocEntryIsACL(te))
                        return;
        }
        else
        {
-               if (strcmp(te->desc, "ACL") == 0)
+               if (_tocEntryIsACL(te))
                        return;
        }
 
        /*
         * Avoid dumping the public schema, as it will already be created ...
         * unless we are using --clean mode, in which case it's been deleted and
-        * we'd better recreate it.
+        * we'd better recreate it.  Likewise for its comment, if any.
         */
-       if (!ropt->dropSchema &&
-               strcmp(te->desc, "SCHEMA") == 0 && strcmp(te->tag, "public") == 0)
-               return;
+       if (!ropt->dropSchema)
+       {
+               if (strcmp(te->desc, "SCHEMA") == 0 &&
+                       strcmp(te->tag, "public") == 0)
+                       return;
+               /* The comment restore would require super-user privs, so avoid it. */
+               if (strcmp(te->desc, "COMMENT") == 0 &&
+                       strcmp(te->tag, "SCHEMA public") == 0)
+                       return;
+       }
 
        /* Select owner, schema, and tablespace as necessary */
        _becomeOwner(AH, te);
@@ -2540,7 +2869,7 @@ _printTocEntry(ArchiveHandle *AH, TocEntry *te, RestoreOptions *ropt, bool isDat
                                 pfx, te->tag, te->desc,
                                 te->namespace ? te->namespace : "-",
                                 ropt->noOwner ? "-" : te->owner);
-               if (te->tablespace)
+               if (te->tablespace && !ropt->noTablespace)
                        ahprintf(AH, "; Tablespace: %s", te->tablespace);
                ahprintf(AH, "\n");
 
@@ -2576,6 +2905,7 @@ _printTocEntry(ArchiveHandle *AH, TocEntry *te, RestoreOptions *ropt, bool isDat
                strlen(te->owner) > 0 && strlen(te->dropStmt) > 0)
        {
                if (strcmp(te->desc, "AGGREGATE") == 0 ||
+                       strcmp(te->desc, "BLOB") == 0 ||
                        strcmp(te->desc, "CONVERSION") == 0 ||
                        strcmp(te->desc, "DATABASE") == 0 ||
                        strcmp(te->desc, "DOMAIN") == 0 ||
@@ -2583,11 +2913,17 @@ _printTocEntry(ArchiveHandle *AH, TocEntry *te, RestoreOptions *ropt, bool isDat
                        strcmp(te->desc, "OPERATOR") == 0 ||
                        strcmp(te->desc, "OPERATOR CLASS") == 0 ||
                        strcmp(te->desc, "OPERATOR FAMILY") == 0 ||
+                       strcmp(te->desc, "PROCEDURAL LANGUAGE") == 0 ||
                        strcmp(te->desc, "SCHEMA") == 0 ||
                        strcmp(te->desc, "TABLE") == 0 ||
                        strcmp(te->desc, "TYPE") == 0 ||
                        strcmp(te->desc, "VIEW") == 0 ||
-                       strcmp(te->desc, "SEQUENCE") == 0)
+                       strcmp(te->desc, "SEQUENCE") == 0 ||
+                       strcmp(te->desc, "FOREIGN TABLE") == 0 ||
+                       strcmp(te->desc, "TEXT SEARCH DICTIONARY") == 0 ||
+                       strcmp(te->desc, "TEXT SEARCH CONFIGURATION") == 0 ||
+                       strcmp(te->desc, "FOREIGN DATA WRAPPER") == 0 ||
+                       strcmp(te->desc, "SERVER") == 0)
                {
                        PQExpBuffer temp = createPQExpBuffer();
 
@@ -2603,9 +2939,9 @@ _printTocEntry(ArchiveHandle *AH, TocEntry *te, RestoreOptions *ropt, bool isDat
                                 strcmp(te->desc, "DEFAULT") == 0 ||
                                 strcmp(te->desc, "FK CONSTRAINT") == 0 ||
                                 strcmp(te->desc, "INDEX") == 0 ||
-                                strcmp(te->desc, "PROCEDURAL LANGUAGE") == 0 ||
                                 strcmp(te->desc, "RULE") == 0 ||
-                                strcmp(te->desc, "TRIGGER") == 0)
+                                strcmp(te->desc, "TRIGGER") == 0 ||
+                                strcmp(te->desc, "USER MAPPING") == 0)
                {
                        /* these object types don't have separate owners */
                }
@@ -2620,7 +2956,7 @@ _printTocEntry(ArchiveHandle *AH, TocEntry *te, RestoreOptions *ropt, bool isDat
         * If it's an ACL entry, it might contain SET SESSION AUTHORIZATION
         * commands, so we can no longer assume we know the current auth setting.
         */
-       if (strncmp(te->desc, "ACL", 3) == 0)
+       if (acl_pass)
        {
                if (AH->currUser)
                        free(AH->currUser);
@@ -2671,11 +3007,16 @@ ReadHead(ArchiveHandle *AH)
        int                     fmt;
        struct tm       crtm;
 
-       /* If we haven't already read the header... */
+       /*
+        * If we haven't already read the header, do so.
+        *
+        * NB: this code must agree with _discoverArchiveFormat().      Maybe find a
+        * way to unify the cases?
+        */
        if (!AH->readHeader)
        {
-
-               (*AH->ReadBufPtr) (AH, tmpMag, 5);
+               if ((*AH->ReadBufPtr) (AH, tmpMag, 5) != 5)
+                       die_horribly(AH, modulename, "unexpected end of file\n");
 
                if (strncmp(tmpMag, "PGDMP", 5) != 0)
                        die_horribly(AH, modulename, "did not find magic string in file header\n");
@@ -2690,7 +3031,6 @@ ReadHead(ArchiveHandle *AH)
 
                AH->version = ((AH->vmaj * 256 + AH->vmin) * 256 + AH->vrev) * 256 + 0;
 
-
                if (AH->version < K_VERS_1_0 || AH->version > K_VERS_MAX)
                        die_horribly(AH, modulename, "unsupported version (%d.%d) in file header\n",
                                                 AH->vmaj, AH->vmin);
@@ -2701,7 +3041,7 @@ ReadHead(ArchiveHandle *AH)
                                                 (unsigned long) AH->intSize);
 
                if (AH->intSize > sizeof(int))
-                       write_msg(modulename, "WARNING: archive was made on a machine with larger integers, some operations may fail\n");
+                       write_msg(modulename, "WARNING: archive was made on a machine with larger integers, some operations might fail\n");
 
                if (AH->version >= K_VERS_1_7)
                        AH->offSize = (*AH->ReadBytePtr) (AH);
@@ -2753,34 +3093,42 @@ ReadHead(ArchiveHandle *AH)
                AH->archiveRemoteVersion = ReadStr(AH);
                AH->archiveDumpVersion = ReadStr(AH);
        }
-
 }
 
 
 /*
  * checkSeek
- *       check to see if fseek can be performed.
+ *       check to see if ftell/fseek can be performed.
  */
-
 bool
 checkSeek(FILE *fp)
 {
+       pgoff_t         tpos;
 
-       if (fseeko(fp, 0, SEEK_CUR) != 0)
+       /*
+        * If pgoff_t is wider than long, we must have "real" fseeko and not an
+        * emulation using fseek.  Otherwise report no seek capability.
+        */
+#ifndef HAVE_FSEEKO
+       if (sizeof(pgoff_t) > sizeof(long))
                return false;
-       else if (sizeof(off_t) > sizeof(long))
+#endif
 
-               /*
-                * At this point, off_t is too large for long, so we return based on
-                * whether an off_t version of fseek is available.
-                */
-#ifdef HAVE_FSEEKO
-               return true;
-#else
+       /* Check that ftello works on this file */
+       errno = 0;
+       tpos = ftello(fp);
+       if (errno)
                return false;
-#endif
-       else
-               return true;
+
+       /*
+        * Check that fseeko(SEEK_SET) works, too.      NB: we used to try to test
+        * this with fseeko(fp, 0, SEEK_CUR).  But some platforms treat that as a
+        * successful no-op even on files that are otherwise unseekable.
+        */
+       if (fseeko(fp, tpos, SEEK_SET) != 0)
+               return false;
+
+       return true;
 }
 
 
@@ -2795,8 +3143,8 @@ dumpTimestamp(ArchiveHandle *AH, const char *msg, time_t tim)
        /*
         * We don't print the timezone on Win32, because the names are long and
         * localized, which means they may contain characters in various random
-        * encodings; this has been seen to cause encoding errors when reading
-        * the dump script.
+        * encodings; this has been seen to cause encoding errors when reading the
+        * dump script.
         */
        if (strftime(buf, sizeof(buf),
 #ifndef WIN32
@@ -2807,3 +3155,990 @@ dumpTimestamp(ArchiveHandle *AH, const char *msg, time_t tim)
                                 localtime(&tim)) != 0)
                ahprintf(AH, "-- %s %s\n\n", msg, buf);
 }
+
+
+/*
+ * Main engine for parallel restore.
+ *
+ * Work is done in three phases.
+ * First we process tocEntries until we come to one that is marked
+ * SECTION_DATA or SECTION_POST_DATA, in a single connection, just as for a
+ * standard restore.  Second we process the remaining non-ACL steps in
+ * parallel worker children (threads on Windows, processes on Unix), each of
+ * which connects separately to the database.  Finally we process all the ACL
+ * entries in a single connection (that happens back in RestoreArchive).
+ */
+static void
+restore_toc_entries_parallel(ArchiveHandle *AH)
+{
+       RestoreOptions *ropt = AH->ropt;
+       int                     n_slots = ropt->number_of_jobs;
+       ParallelSlot *slots;
+       int                     work_status;
+       int                     next_slot;
+       TocEntry        pending_list;
+       TocEntry        ready_list;
+       TocEntry   *next_work_item;
+       thandle         ret_child;
+       TocEntry   *te;
+
+       ahlog(AH, 2, "entering restore_toc_entries_parallel\n");
+
+       /* we haven't got round to making this work for all archive formats */
+       if (AH->ClonePtr == NULL || AH->ReopenPtr == NULL)
+               die_horribly(AH, modulename, "parallel restore is not supported with this archive file format\n");
+
+       /* doesn't work if the archive represents dependencies as OIDs, either */
+       if (AH->version < K_VERS_1_8)
+               die_horribly(AH, modulename, "parallel restore is not supported with archives made by pre-8.0 pg_dump\n");
+
+       slots = (ParallelSlot *) calloc(sizeof(ParallelSlot), n_slots);
+
+       /* Adjust dependency information */
+       fix_dependencies(AH);
+
+       /*
+        * Do all the early stuff in a single connection in the parent. There's no
+        * great point in running it in parallel, in fact it will actually run
+        * faster in a single connection because we avoid all the connection and
+        * setup overhead.  Also, pg_dump is not currently very good about
+        * showing all the dependencies of SECTION_PRE_DATA items, so we do not
+        * risk trying to process them out-of-order.
+        */
+       for (next_work_item = AH->toc->next; next_work_item != AH->toc; next_work_item = next_work_item->next)
+       {
+               /* Non-PRE_DATA items are just ignored for now */
+               if (next_work_item->section == SECTION_DATA ||
+                       next_work_item->section == SECTION_POST_DATA)
+                       continue;
+
+               ahlog(AH, 1, "processing item %d %s %s\n",
+                         next_work_item->dumpId,
+                         next_work_item->desc, next_work_item->tag);
+
+               (void) restore_toc_entry(AH, next_work_item, ropt, false);
+
+               /* there should be no touch of ready_list here, so pass NULL */
+               reduce_dependencies(AH, next_work_item, NULL);
+       }
+
+       /*
+        * Now close parent connection in prep for parallel steps.      We do this
+        * mainly to ensure that we don't exceed the specified number of parallel
+        * connections.
+        */
+       PQfinish(AH->connection);
+       AH->connection = NULL;
+
+       /* blow away any transient state from the old connection */
+       if (AH->currUser)
+               free(AH->currUser);
+       AH->currUser = NULL;
+       if (AH->currSchema)
+               free(AH->currSchema);
+       AH->currSchema = NULL;
+       if (AH->currTablespace)
+               free(AH->currTablespace);
+       AH->currTablespace = NULL;
+       AH->currWithOids = -1;
+
+       /*
+        * Initialize the lists of pending and ready items.  After this setup, the
+        * pending list is everything that needs to be done but is blocked by one
+        * or more dependencies, while the ready list contains items that have no
+        * remaining dependencies.      Note: we don't yet filter out entries that
+        * aren't going to be restored.  They might participate in dependency
+        * chains connecting entries that should be restored, so we treat them as
+        * live until we actually process them.
+        */
+       par_list_header_init(&pending_list);
+       par_list_header_init(&ready_list);
+       for (next_work_item = AH->toc->next; next_work_item != AH->toc; next_work_item = next_work_item->next)
+       {
+               /* All PRE_DATA items were dealt with above */
+               if (next_work_item->section == SECTION_DATA ||
+                       next_work_item->section == SECTION_POST_DATA)
+               {
+                       if (next_work_item->depCount > 0)
+                               par_list_append(&pending_list, next_work_item);
+                       else
+                               par_list_append(&ready_list, next_work_item);
+               }
+       }
+
+       /*
+        * main parent loop
+        *
+        * Keep going until there is no worker still running AND there is no work
+        * left to be done.
+        */
+
+       ahlog(AH, 1, "entering main parallel loop\n");
+
+       while ((next_work_item = get_next_work_item(AH, &ready_list,
+                                                                                               slots, n_slots)) != NULL ||
+                  work_in_progress(slots, n_slots))
+       {
+               if (next_work_item != NULL)
+               {
+                       teReqs          reqs;
+
+                       /* If not to be dumped, don't waste time launching a worker */
+                       reqs = _tocEntryRequired(next_work_item, AH->ropt, false);
+                       if ((reqs & (REQ_SCHEMA | REQ_DATA)) == 0)
+                       {
+                               ahlog(AH, 1, "skipping item %d %s %s\n",
+                                         next_work_item->dumpId,
+                                         next_work_item->desc, next_work_item->tag);
+
+                               par_list_remove(next_work_item);
+                               reduce_dependencies(AH, next_work_item, &ready_list);
+
+                               continue;
+                       }
+
+                       if ((next_slot = get_next_slot(slots, n_slots)) != NO_SLOT)
+                       {
+                               /* There is work still to do and a worker slot available */
+                               thandle         child;
+                               RestoreArgs *args;
+
+                               ahlog(AH, 1, "launching item %d %s %s\n",
+                                         next_work_item->dumpId,
+                                         next_work_item->desc, next_work_item->tag);
+
+                               par_list_remove(next_work_item);
+
+                               /* this memory is dealloced in mark_work_done() */
+                               args = malloc(sizeof(RestoreArgs));
+                               args->AH = CloneArchive(AH);
+                               args->te = next_work_item;
+
+                               /* run the step in a worker child */
+                               child = spawn_restore(args);
+
+                               slots[next_slot].child_id = child;
+                               slots[next_slot].args = args;
+
+                               continue;
+                       }
+               }
+
+               /*
+                * If we get here there must be work being done.  Either there is no
+                * work available to schedule (and work_in_progress returned true) or
+                * there are no slots available.  So we wait for a worker to finish,
+                * and process the result.
+                */
+               ret_child = reap_child(slots, n_slots, &work_status);
+
+               if (WIFEXITED(work_status))
+               {
+                       mark_work_done(AH, &ready_list,
+                                                  ret_child, WEXITSTATUS(work_status),
+                                                  slots, n_slots);
+               }
+               else
+               {
+                       die_horribly(AH, modulename, "worker process crashed: status %d\n",
+                                                work_status);
+               }
+       }
+
+       ahlog(AH, 1, "finished main parallel loop\n");
+
+       /*
+        * Now reconnect the single parent connection.
+        */
+       ConnectDatabase((Archive *) AH, ropt->dbname,
+                                       ropt->pghost, ropt->pgport, ropt->username,
+                                       ropt->promptPassword);
+
+       _doSetFixedOutputState(AH);
+
+       /*
+        * Make sure there is no non-ACL work left due to, say, circular
+        * dependencies, or some other pathological condition. If so, do it in the
+        * single parent connection.
+        */
+       for (te = pending_list.par_next; te != &pending_list; te = te->par_next)
+       {
+               ahlog(AH, 1, "processing missed item %d %s %s\n",
+                         te->dumpId, te->desc, te->tag);
+               (void) restore_toc_entry(AH, te, ropt, false);
+       }
+
+       /* The ACLs will be handled back in RestoreArchive. */
+}
+
+/*
+ * create a worker child to perform a restore step in parallel
+ */
+static thandle
+spawn_restore(RestoreArgs *args)
+{
+       thandle         child;
+
+       /* Ensure stdio state is quiesced before forking */
+       fflush(NULL);
+
+#ifndef WIN32
+       child = fork();
+       if (child == 0)
+       {
+               /* in child process */
+               parallel_restore(args);
+               die_horribly(args->AH, modulename,
+                                        "parallel_restore should not return\n");
+       }
+       else if (child < 0)
+       {
+               /* fork failed */
+               die_horribly(args->AH, modulename,
+                                        "could not create worker process: %s\n",
+                                        strerror(errno));
+       }
+#else
+       child = (HANDLE) _beginthreadex(NULL, 0, (void *) parallel_restore,
+                                                                       args, 0, NULL);
+       if (child == 0)
+               die_horribly(args->AH, modulename,
+                                        "could not create worker thread: %s\n",
+                                        strerror(errno));
+#endif
+
+       return child;
+}
+
+/*
+ *     collect status from a completed worker child
+ */
+static thandle
+reap_child(ParallelSlot *slots, int n_slots, int *work_status)
+{
+#ifndef WIN32
+       /* Unix is so much easier ... */
+       return wait(work_status);
+#else
+       static HANDLE *handles = NULL;
+       int                     hindex,
+                               snum,
+                               tnum;
+       thandle         ret_child;
+       DWORD           res;
+
+       /* first time around only, make space for handles to listen on */
+       if (handles == NULL)
+               handles = (HANDLE *) calloc(sizeof(HANDLE), n_slots);
+
+       /* set up list of handles to listen to */
+       for (snum = 0, tnum = 0; snum < n_slots; snum++)
+               if (slots[snum].child_id != 0)
+                       handles[tnum++] = slots[snum].child_id;
+
+       /* wait for one to finish */
+       hindex = WaitForMultipleObjects(tnum, handles, false, INFINITE);
+
+       /* get handle of finished thread */
+       ret_child = handles[hindex - WAIT_OBJECT_0];
+
+       /* get the result */
+       GetExitCodeThread(ret_child, &res);
+       *work_status = res;
+
+       /* dispose of handle to stop leaks */
+       CloseHandle(ret_child);
+
+       return ret_child;
+#endif
+}
+
+/*
+ * are we doing anything now?
+ */
+static bool
+work_in_progress(ParallelSlot *slots, int n_slots)
+{
+       int                     i;
+
+       for (i = 0; i < n_slots; i++)
+       {
+               if (slots[i].child_id != 0)
+                       return true;
+       }
+       return false;
+}
+
+/*
+ * find the first free parallel slot (if any).
+ */
+static int
+get_next_slot(ParallelSlot *slots, int n_slots)
+{
+       int                     i;
+
+       for (i = 0; i < n_slots; i++)
+       {
+               if (slots[i].child_id == 0)
+                       return i;
+       }
+       return NO_SLOT;
+}
+
+
+/*
+ * Check if te1 has an exclusive lock requirement for an item that te2 also
+ * requires, whether or not te2's requirement is for an exclusive lock.
+ */
+static bool
+has_lock_conflicts(TocEntry *te1, TocEntry *te2)
+{
+       int                     j,
+                               k;
+
+       for (j = 0; j < te1->nLockDeps; j++)
+       {
+               for (k = 0; k < te2->nDeps; k++)
+               {
+                       if (te1->lockDeps[j] == te2->dependencies[k])
+                               return true;
+               }
+       }
+       return false;
+}
+
+
+/*
+ * Initialize the header of a parallel-processing list.
+ *
+ * These are circular lists with a dummy TocEntry as header, just like the
+ * main TOC list; but we use separate list links so that an entry can be in
+ * the main TOC list as well as in a parallel-processing list.
+ */
+static void
+par_list_header_init(TocEntry *l)
+{
+       l->par_prev = l->par_next = l;
+}
+
+/* Append te to the end of the parallel-processing list headed by l */
+static void
+par_list_append(TocEntry *l, TocEntry *te)
+{
+       te->par_prev = l->par_prev;
+       l->par_prev->par_next = te;
+       l->par_prev = te;
+       te->par_next = l;
+}
+
+/* Remove te from whatever parallel-processing list it's in */
+static void
+par_list_remove(TocEntry *te)
+{
+       te->par_prev->par_next = te->par_next;
+       te->par_next->par_prev = te->par_prev;
+       te->par_prev = NULL;
+       te->par_next = NULL;
+}
+
+
+/*
+ * Find the next work item (if any) that is capable of being run now.
+ *
+ * To qualify, the item must have no remaining dependencies
+ * and no requirements for locks that are incompatible with
+ * items currently running.  Items in the ready_list are known to have
+ * no remaining dependencies, but we have to check for lock conflicts.
+ *
+ * Note that the returned item has *not* been removed from ready_list.
+ * The caller must do that after successfully dispatching the item.
+ *
+ * pref_non_data is for an alternative selection algorithm that gives
+ * preference to non-data items if there is already a data load running.
+ * It is currently disabled.
+ */
+static TocEntry *
+get_next_work_item(ArchiveHandle *AH, TocEntry *ready_list,
+                                  ParallelSlot *slots, int n_slots)
+{
+       bool            pref_non_data = false;  /* or get from AH->ropt */
+       TocEntry   *data_te = NULL;
+       TocEntry   *te;
+       int                     i,
+                               k;
+
+       /*
+        * Bogus heuristics for pref_non_data
+        */
+       if (pref_non_data)
+       {
+               int                     count = 0;
+
+               for (k = 0; k < n_slots; k++)
+                       if (slots[k].args->te != NULL &&
+                               slots[k].args->te->section == SECTION_DATA)
+                               count++;
+               if (n_slots == 0 || count * 4 < n_slots)
+                       pref_non_data = false;
+       }
+
+       /*
+        * Search the ready_list until we find a suitable item.
+        */
+       for (te = ready_list->par_next; te != ready_list; te = te->par_next)
+       {
+               bool            conflicts = false;
+
+               /*
+                * Check to see if the item would need exclusive lock on something
+                * that a currently running item also needs lock on, or vice versa. If
+                * so, we don't want to schedule them together.
+                */
+               for (i = 0; i < n_slots && !conflicts; i++)
+               {
+                       TocEntry   *running_te;
+
+                       if (slots[i].args == NULL)
+                               continue;
+                       running_te = slots[i].args->te;
+
+                       if (has_lock_conflicts(te, running_te) ||
+                               has_lock_conflicts(running_te, te))
+                       {
+                               conflicts = true;
+                               break;
+                       }
+               }
+
+               if (conflicts)
+                       continue;
+
+               if (pref_non_data && te->section == SECTION_DATA)
+               {
+                       if (data_te == NULL)
+                               data_te = te;
+                       continue;
+               }
+
+               /* passed all tests, so this item can run */
+               return te;
+       }
+
+       if (data_te != NULL)
+               return data_te;
+
+       ahlog(AH, 2, "no item ready\n");
+       return NULL;
+}
+
+
+/*
+ * Restore a single TOC item in parallel with others
+ *
+ * this is the procedure run as a thread (Windows) or a
+ * separate process (everything else).
+ */
+static parallel_restore_result
+parallel_restore(RestoreArgs *args)
+{
+       ArchiveHandle *AH = args->AH;
+       TocEntry   *te = args->te;
+       RestoreOptions *ropt = AH->ropt;
+       int                     retval;
+
+       /*
+        * Close and reopen the input file so we have a private file pointer that
+        * doesn't stomp on anyone else's file pointer, if we're actually going to
+        * need to read from the file. Otherwise, just close it except on Windows,
+        * where it will possibly be needed by other threads.
+        *
+        * Note: on Windows, since we are using threads not processes, the reopen
+        * call *doesn't* close the original file pointer but just open a new one.
+        */
+       if (te->section == SECTION_DATA)
+               (AH->ReopenPtr) (AH);
+#ifndef WIN32
+       else
+               (AH->ClosePtr) (AH);
+#endif
+
+       /*
+        * We need our own database connection, too
+        */
+       ConnectDatabase((Archive *) AH, ropt->dbname,
+                                       ropt->pghost, ropt->pgport, ropt->username,
+                                       ropt->promptPassword);
+
+       _doSetFixedOutputState(AH);
+
+       /* Restore the TOC item */
+       retval = restore_toc_entry(AH, te, ropt, true);
+
+       /* And clean up */
+       PQfinish(AH->connection);
+       AH->connection = NULL;
+
+       /* If we reopened the file, we are done with it, so close it now */
+       if (te->section == SECTION_DATA)
+               (AH->ClosePtr) (AH);
+
+       if (retval == 0 && AH->public.n_errors)
+               retval = WORKER_IGNORED_ERRORS;
+
+#ifndef WIN32
+       exit(retval);
+#else
+       return retval;
+#endif
+}
+
+
+/*
+ * Housekeeping to be done after a step has been parallel restored.
+ *
+ * Clear the appropriate slot, free all the extra memory we allocated,
+ * update status, and reduce the dependency count of any dependent items.
+ */
+static void
+mark_work_done(ArchiveHandle *AH, TocEntry *ready_list,
+                          thandle worker, int status,
+                          ParallelSlot *slots, int n_slots)
+{
+       TocEntry   *te = NULL;
+       int                     i;
+
+       for (i = 0; i < n_slots; i++)
+       {
+               if (slots[i].child_id == worker)
+               {
+                       slots[i].child_id = 0;
+                       te = slots[i].args->te;
+                       DeCloneArchive(slots[i].args->AH);
+                       free(slots[i].args);
+                       slots[i].args = NULL;
+
+                       break;
+               }
+       }
+
+       if (te == NULL)
+               die_horribly(AH, modulename, "could not find slot of finished worker\n");
+
+       ahlog(AH, 1, "finished item %d %s %s\n",
+                 te->dumpId, te->desc, te->tag);
+
+       if (status == WORKER_CREATE_DONE)
+               mark_create_done(AH, te);
+       else if (status == WORKER_INHIBIT_DATA)
+       {
+               inhibit_data_for_failed_table(AH, te);
+               AH->public.n_errors++;
+       }
+       else if (status == WORKER_IGNORED_ERRORS)
+               AH->public.n_errors++;
+       else if (status != 0)
+               die_horribly(AH, modulename, "worker process failed: exit code %d\n",
+                                        status);
+
+       reduce_dependencies(AH, te, ready_list);
+}
+
+
+/*
+ * Process the dependency information into a form useful for parallel restore.
+ *
+ * This function takes care of fixing up some missing or badly designed
+ * dependencies, and then prepares subsidiary data structures that will be
+ * used in the main parallel-restore logic, including:
+ * 1. We build the tocsByDumpId[] index array.
+ * 2. We build the revDeps[] arrays of incoming dependency dumpIds.
+ * 3. We set up depCount fields that are the number of as-yet-unprocessed
+ * dependencies for each TOC entry.
+ *
+ * We also identify locking dependencies so that we can avoid trying to
+ * schedule conflicting items at the same time.
+ */
+static void
+fix_dependencies(ArchiveHandle *AH)
+{
+       TocEntry   *te;
+       int                     i;
+
+       /*
+        * It is convenient to have an array that indexes the TOC entries by dump
+        * ID, rather than searching the TOC list repeatedly.  Entries for dump
+        * IDs not present in the TOC will be NULL.
+        *
+        * NOTE: because maxDumpId is just the highest dump ID defined in the
+        * archive, there might be dependencies for IDs > maxDumpId.  All uses of
+        * this array must guard against out-of-range dependency numbers.
+        *
+        * Also, initialize the depCount/revDeps/nRevDeps fields, and make sure
+        * the TOC items are marked as not being in any parallel-processing list.
+        */
+       maxDumpId = AH->maxDumpId;
+       tocsByDumpId = (TocEntry **) calloc(maxDumpId, sizeof(TocEntry *));
+       for (te = AH->toc->next; te != AH->toc; te = te->next)
+       {
+               tocsByDumpId[te->dumpId - 1] = te;
+               te->depCount = te->nDeps;
+               te->revDeps = NULL;
+               te->nRevDeps = 0;
+               te->par_prev = NULL;
+               te->par_next = NULL;
+       }
+
+       /*
+        * POST_DATA items that are shown as depending on a table need to be
+        * re-pointed to depend on that table's data, instead.  This ensures they
+        * won't get scheduled until the data has been loaded.  We handle this by
+        * first finding TABLE/TABLE DATA pairs and then scanning all the
+        * dependencies.
+        *
+        * Note: currently, a TABLE DATA should always have exactly one
+        * dependency, on its TABLE item.  So we don't bother to search, but look
+        * just at the first dependency.  We do trouble to make sure that it's a
+        * TABLE, if possible.  However, if the dependency isn't in the archive
+        * then just assume it was a TABLE; this is to cover cases where the table
+        * was suppressed but we have the data and some dependent post-data items.
+        *
+        * XXX this is O(N^2) if there are a lot of tables.  We ought to fix
+        * pg_dump to produce correctly-linked dependencies in the first place.
+        */
+       for (te = AH->toc->next; te != AH->toc; te = te->next)
+       {
+               if (strcmp(te->desc, "TABLE DATA") == 0 && te->nDeps > 0)
+               {
+                       DumpId          tableId = te->dependencies[0];
+
+                       if (tableId > maxDumpId ||
+                               tocsByDumpId[tableId - 1] == NULL ||
+                               strcmp(tocsByDumpId[tableId - 1]->desc, "TABLE") == 0)
+                       {
+                               repoint_table_dependencies(AH, tableId, te->dumpId);
+                       }
+               }
+       }
+
+       /*
+        * Pre-8.4 versions of pg_dump neglected to set up a dependency from BLOB
+        * COMMENTS to BLOBS.  Cope.  (We assume there's only one BLOBS and only
+        * one BLOB COMMENTS in such files.)
+        */
+       if (AH->version < K_VERS_1_11)
+       {
+               for (te = AH->toc->next; te != AH->toc; te = te->next)
+               {
+                       if (strcmp(te->desc, "BLOB COMMENTS") == 0 && te->nDeps == 0)
+                       {
+                               TocEntry   *te2;
+
+                               for (te2 = AH->toc->next; te2 != AH->toc; te2 = te2->next)
+                               {
+                                       if (strcmp(te2->desc, "BLOBS") == 0)
+                                       {
+                                               te->dependencies = (DumpId *) malloc(sizeof(DumpId));
+                                               te->dependencies[0] = te2->dumpId;
+                                               te->nDeps++;
+                                               te->depCount++;
+                                               break;
+                                       }
+                               }
+                               break;
+                       }
+               }
+       }
+
+       /*
+        * At this point we start to build the revDeps reverse-dependency arrays,
+        * so all changes of dependencies must be complete.
+        */
+
+       /*
+        * Count the incoming dependencies for each item.  Also, it is possible
+        * that the dependencies list items that are not in the archive at
+        * all.  Subtract such items from the depCounts.
+        */
+       for (te = AH->toc->next; te != AH->toc; te = te->next)
+       {
+               for (i = 0; i < te->nDeps; i++)
+               {
+                       DumpId          depid = te->dependencies[i];
+
+                       if (depid <= maxDumpId && tocsByDumpId[depid - 1] != NULL)
+                               tocsByDumpId[depid - 1]->nRevDeps++;
+                       else
+                               te->depCount--;
+               }
+       }
+
+       /*
+        * Allocate space for revDeps[] arrays, and reset nRevDeps so we can
+        * use it as a counter below.
+        */
+       for (te = AH->toc->next; te != AH->toc; te = te->next)
+       {
+               if (te->nRevDeps > 0)
+                       te->revDeps = (DumpId *) malloc(te->nRevDeps * sizeof(DumpId));
+               te->nRevDeps = 0;
+       }
+
+       /*
+        * Build the revDeps[] arrays of incoming-dependency dumpIds.  This
+        * had better agree with the loops above.
+        */
+       for (te = AH->toc->next; te != AH->toc; te = te->next)
+       {
+               for (i = 0; i < te->nDeps; i++)
+               {
+                       DumpId          depid = te->dependencies[i];
+
+                       if (depid <= maxDumpId && tocsByDumpId[depid - 1] != NULL)
+                       {
+                               TocEntry   *otherte = tocsByDumpId[depid - 1];
+
+                               otherte->revDeps[otherte->nRevDeps++] = te->dumpId;
+                       }
+               }
+       }
+
+       /*
+        * Lastly, work out the locking dependencies.
+        */
+       for (te = AH->toc->next; te != AH->toc; te = te->next)
+       {
+               te->lockDeps = NULL;
+               te->nLockDeps = 0;
+               identify_locking_dependencies(te);
+       }
+}
+
+/*
+ * Change dependencies on tableId to depend on tableDataId instead,
+ * but only in POST_DATA items.
+ */
+static void
+repoint_table_dependencies(ArchiveHandle *AH,
+                                                  DumpId tableId, DumpId tableDataId)
+{
+       TocEntry   *te;
+       int                     i;
+
+       for (te = AH->toc->next; te != AH->toc; te = te->next)
+       {
+               if (te->section != SECTION_POST_DATA)
+                       continue;
+               for (i = 0; i < te->nDeps; i++)
+               {
+                       if (te->dependencies[i] == tableId)
+                       {
+                               te->dependencies[i] = tableDataId;
+                               ahlog(AH, 2, "transferring dependency %d -> %d to %d\n",
+                                         te->dumpId, tableId, tableDataId);
+                       }
+               }
+       }
+}
+
+/*
+ * Identify which objects we'll need exclusive lock on in order to restore
+ * the given TOC entry (*other* than the one identified by the TOC entry
+ * itself).  Record their dump IDs in the entry's lockDeps[] array.
+ */
+static void
+identify_locking_dependencies(TocEntry *te)
+{
+       DumpId     *lockids;
+       int                     nlockids;
+       int                     i;
+
+       /* Quick exit if no dependencies at all */
+       if (te->nDeps == 0)
+               return;
+
+       /* Exit if this entry doesn't need exclusive lock on other objects */
+       if (!(strcmp(te->desc, "CONSTRAINT") == 0 ||
+                 strcmp(te->desc, "CHECK CONSTRAINT") == 0 ||
+                 strcmp(te->desc, "FK CONSTRAINT") == 0 ||
+                 strcmp(te->desc, "RULE") == 0 ||
+                 strcmp(te->desc, "TRIGGER") == 0))
+               return;
+
+       /*
+        * We assume the item requires exclusive lock on each TABLE DATA item
+        * listed among its dependencies.  (This was originally a dependency on
+        * the TABLE, but fix_dependencies repointed it to the data item. Note
+        * that all the entry types we are interested in here are POST_DATA, so
+        * they will all have been changed this way.)
+        */
+       lockids = (DumpId *) malloc(te->nDeps * sizeof(DumpId));
+       nlockids = 0;
+       for (i = 0; i < te->nDeps; i++)
+       {
+               DumpId          depid = te->dependencies[i];
+
+               if (depid <= maxDumpId && tocsByDumpId[depid - 1] &&
+                       strcmp(tocsByDumpId[depid - 1]->desc, "TABLE DATA") == 0)
+                       lockids[nlockids++] = depid;
+       }
+
+       if (nlockids == 0)
+       {
+               free(lockids);
+               return;
+       }
+
+       te->lockDeps = realloc(lockids, nlockids * sizeof(DumpId));
+       te->nLockDeps = nlockids;
+}
+
+/*
+ * Remove the specified TOC entry from the depCounts of items that depend on
+ * it, thereby possibly making them ready-to-run.  Any pending item that
+ * becomes ready should be moved to the ready list.
+ */
+static void
+reduce_dependencies(ArchiveHandle *AH, TocEntry *te, TocEntry *ready_list)
+{
+       int                     i;
+
+       ahlog(AH, 2, "reducing dependencies for %d\n", te->dumpId);
+
+       for (i = 0; i < te->nRevDeps; i++)
+       {
+               TocEntry   *otherte = tocsByDumpId[te->revDeps[i] - 1];
+
+               otherte->depCount--;
+               if (otherte->depCount == 0 && otherte->par_prev != NULL)
+               {
+                       /* It must be in the pending list, so remove it ... */
+                       par_list_remove(otherte);
+                       /* ... and add to ready_list */
+                       par_list_append(ready_list, otherte);
+               }
+       }
+}
+
+/*
+ * Set the created flag on the DATA member corresponding to the given
+ * TABLE member
+ */
+static void
+mark_create_done(ArchiveHandle *AH, TocEntry *te)
+{
+       TocEntry   *tes;
+
+       for (tes = AH->toc->next; tes != AH->toc; tes = tes->next)
+       {
+               if (strcmp(tes->desc, "TABLE DATA") == 0 &&
+                       strcmp(tes->tag, te->tag) == 0 &&
+                       strcmp(tes->namespace ? tes->namespace : "",
+                                  te->namespace ? te->namespace : "") == 0)
+               {
+                       tes->created = true;
+                       break;
+               }
+       }
+}
+
+/*
+ * Mark the DATA member corresponding to the given TABLE member
+ * as not wanted
+ */
+static void
+inhibit_data_for_failed_table(ArchiveHandle *AH, TocEntry *te)
+{
+       RestoreOptions *ropt = AH->ropt;
+       TocEntry   *tes;
+
+       ahlog(AH, 1, "table \"%s\" could not be created, will not restore its data\n",
+                 te->tag);
+
+       for (tes = AH->toc->next; tes != AH->toc; tes = tes->next)
+       {
+               if (strcmp(tes->desc, "TABLE DATA") == 0 &&
+                       strcmp(tes->tag, te->tag) == 0 &&
+                       strcmp(tes->namespace ? tes->namespace : "",
+                                  te->namespace ? te->namespace : "") == 0)
+               {
+                       /* mark it unwanted; we assume idWanted array already exists */
+                       ropt->idWanted[tes->dumpId - 1] = false;
+                       break;
+               }
+       }
+}
+
+
+/*
+ * Clone and de-clone routines used in parallel restoration.
+ *
+ * Enough of the structure is cloned to ensure that there is no
+ * conflict between different threads each with their own clone.
+ *
+ * These could be public, but no need at present.
+ */
+static ArchiveHandle *
+CloneArchive(ArchiveHandle *AH)
+{
+       ArchiveHandle *clone;
+
+       /* Make a "flat" copy */
+       clone = (ArchiveHandle *) malloc(sizeof(ArchiveHandle));
+       if (clone == NULL)
+               die_horribly(AH, modulename, "out of memory\n");
+       memcpy(clone, AH, sizeof(ArchiveHandle));
+
+       /* Handle format-independent fields */
+       clone->pgCopyBuf = createPQExpBuffer();
+       clone->sqlBuf = createPQExpBuffer();
+       clone->sqlparse.tagBuf = NULL;
+
+       /* The clone will have its own connection, so disregard connection state */
+       clone->connection = NULL;
+       clone->currUser = NULL;
+       clone->currSchema = NULL;
+       clone->currTablespace = NULL;
+       clone->currWithOids = -1;
+
+       /* savedPassword must be local in case we change it while connecting */
+       if (clone->savedPassword)
+               clone->savedPassword = strdup(clone->savedPassword);
+
+       /* clone has its own error count, too */
+       clone->public.n_errors = 0;
+
+       /* Let the format-specific code have a chance too */
+       (clone->ClonePtr) (clone);
+
+       return clone;
+}
+
+/*
+ * Release clone-local storage.
+ *
+ * Note: we assume any clone-local connection was already closed.
+ */
+static void
+DeCloneArchive(ArchiveHandle *AH)
+{
+       /* Clear format-specific state */
+       (AH->DeClonePtr) (AH);
+
+       /* Clear state allocated by CloneArchive */
+       destroyPQExpBuffer(AH->pgCopyBuf);
+       destroyPQExpBuffer(AH->sqlBuf);
+       if (AH->sqlparse.tagBuf)
+               destroyPQExpBuffer(AH->sqlparse.tagBuf);
+
+       /* Clear any connection-local state */
+       if (AH->currUser)
+               free(AH->currUser);
+       if (AH->currSchema)
+               free(AH->currSchema);
+       if (AH->currTablespace)
+               free(AH->currTablespace);
+       if (AH->savedPassword)
+               free(AH->savedPassword);
+
+       free(AH);
+}