]> granicus.if.org Git - postgresql/commitdiff
Improve pg_dump's dependency-sorting logic to enforce section dump order.
authorTom Lane <tgl@sss.pgh.pa.us>
Tue, 26 Jun 2012 01:19:28 +0000 (21:19 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Tue, 26 Jun 2012 01:19:28 +0000 (21:19 -0400)
As of 9.2, with the --section option, it is very important that the concept
of "pre data", "data", and "post data" sections of the output be honored
strictly; else a dump divided into separate sectional files might be
unrestorable.  However, the dependency-sorting logic knew nothing of
sections and would happily select output orderings that didn't fit that
structure.  Doing so was mostly harmless before 9.2, but now we need to be
sure it doesn't do that.  To fix, create dummy objects representing the
section boundaries and add dependencies between them and all the normal
objects.  (This might sound expensive but it seems to only add a percent or
two to pg_dump's runtime.)

This also fixes a problem introduced in 9.1 by the feature that allows
incomplete GROUP BY lists when a primary key is given in GROUP BY.
That means that views can depend on primary key constraints.  Previously,
pg_dump would deal with that by simply emitting the primary key constraint
before the view definition (and hence before the data section of the
output).  That's bad enough for simple serial restores, where creating an
index before the data is loaded works, but is undesirable for speed
reasons.  But it could lead to outright failure of parallel restores, as
seen in bug #6699 from Joe Van Dyk.  That happened because pg_restore would
switch into parallel mode as soon as it reached the constraint, and then
very possibly would try to emit the view definition before the primary key
was committed (as a consequence of another bug that causes the view not to
be correctly marked as depending on the constraint).  Adding the section
boundary constraints forces the dependency-sorting code to break the view
into separate table and rule declarations, allowing the rule, and hence the
primary key constraint it depends on, to revert to their intended location
in the post-data section.  This also somewhat accidentally works around the
bogus-dependency-marking problem, because the rule will be correctly shown
as depending on the constraint, so parallel pg_restore will now do the
right thing.  (We will fix the bogus-dependency problem for real in a
separate patch, but that patch is not easily back-portable to 9.1, so the
fact that this patch is enough to dodge the only known symptom is
fortunate.)

Back-patch to 9.1, except for the hunk that adds verification that the
finished archive TOC list is in correct section order; the place where
it was convenient to add that doesn't exist in 9.1.

src/bin/pg_dump/pg_dump.c
src/bin/pg_dump/pg_dump.h
src/bin/pg_dump/pg_dump_sort.c

index 665f758db04b4add7763cf79f1c8f2ecd9dd7cca..d9eeaa64602e2503909b78d364db14e21d01a2d6 100644 (file)
@@ -202,6 +202,11 @@ static void dumpACL(Archive *fout, CatalogId objCatId, DumpId objDumpId,
                const char *acls);
 
 static void getDependencies(void);
+
+static DumpableObject *createBoundaryObjects(void);
+static void addBoundaryDependencies(DumpableObject **dobjs, int numObjs,
+                                               DumpableObject *boundaryObjs);
+
 static void getDomainConstraints(TypeInfo *tyinfo);
 static void getTableData(TableInfo *tblinfo, int numTables, bool oids);
 static void makeTableDataInfo(TableInfo *tbinfo, bool oids);
@@ -260,6 +265,7 @@ main(int argc, char **argv)
        int                     numTables;
        DumpableObject **dobjs;
        int                     numObjs;
+       DumpableObject *boundaryObjs;
        int                     i;
        enum trivalue prompt_password = TRI_DEFAULT;
        int                     compressLevel = -1;
@@ -742,6 +748,17 @@ main(int argc, char **argv)
         */
        getDependencies();
 
+       /* Lastly, create dummy objects to represent the section boundaries */
+       boundaryObjs = createBoundaryObjects();
+
+       /* Get pointers to all the known DumpableObjects */
+       getDumpableObjects(&dobjs, &numObjs);
+
+       /*
+        * Add dummy dependencies to enforce the dump section ordering.
+        */
+       addBoundaryDependencies(dobjs, numObjs, boundaryObjs);
+
        /*
         * Sort the objects into a safe dump order (no forward references).
         *
@@ -751,14 +768,13 @@ main(int argc, char **argv)
         * will dump identically.  Before 7.3 we don't have dependencies and we
         * use OID ordering as an (unreliable) guide to creation order.
         */
-       getDumpableObjects(&dobjs, &numObjs);
-
        if (g_fout->remoteVersion >= 70300)
                sortDumpableObjectsByTypeName(dobjs, numObjs);
        else
                sortDumpableObjectsByTypeOid(dobjs, numObjs);
 
-       sortDumpableObjects(dobjs, numObjs);
+       sortDumpableObjects(dobjs, numObjs,
+                                               boundaryObjs[0].dumpId, boundaryObjs[1].dumpId);
 
        /*
         * Create archive TOC entries for all the objects to be dumped, in a safe
@@ -7113,6 +7129,10 @@ dumpDumpableObject(Archive *fout, DumpableObject *dobj)
                                                 dobj->dependencies, dobj->nDeps,
                                                 dumpBlobs, NULL);
                        break;
+               case DO_PRE_DATA_BOUNDARY:
+               case DO_POST_DATA_BOUNDARY:
+                       /* never dumped, nothing to do */
+                       break;
        }
 }
 
@@ -11578,7 +11598,7 @@ dumpDefaultACL(Archive *fout, DefaultACLInfo *daclinfo)
           daclinfo->dobj.namespace ? daclinfo->dobj.namespace->dobj.name : NULL,
                                 NULL,
                                 daclinfo->defaclrole,
-                                false, "DEFAULT ACL", SECTION_NONE,
+                                false, "DEFAULT ACL", SECTION_POST_DATA,
                                 q->data, "", NULL,
                                 daclinfo->dobj.dependencies, daclinfo->dobj.nDeps,
                                 NULL, NULL);
@@ -13960,6 +13980,113 @@ getDependencies(void)
 }
 
 
+/*
+ * createBoundaryObjects - create dummy DumpableObjects to represent
+ * dump section boundaries.
+ */
+static DumpableObject *
+createBoundaryObjects(void)
+{
+       DumpableObject *dobjs;
+
+       dobjs = (DumpableObject *) pg_malloc(2 * sizeof(DumpableObject));
+
+       dobjs[0].objType = DO_PRE_DATA_BOUNDARY;
+       dobjs[0].catId = nilCatalogId;
+       AssignDumpId(dobjs + 0);
+       dobjs[0].name = pg_strdup("PRE-DATA BOUNDARY");
+
+       dobjs[1].objType = DO_POST_DATA_BOUNDARY;
+       dobjs[1].catId = nilCatalogId;
+       AssignDumpId(dobjs + 1);
+       dobjs[1].name = pg_strdup("POST-DATA BOUNDARY");
+
+       return dobjs;
+}
+
+/*
+ * addBoundaryDependencies - add dependencies as needed to enforce the dump
+ * section boundaries.
+ */
+static void
+addBoundaryDependencies(DumpableObject **dobjs, int numObjs,
+                                               DumpableObject *boundaryObjs)
+{
+       DumpableObject *preDataBound = boundaryObjs + 0;
+       DumpableObject *postDataBound = boundaryObjs + 1;
+       int                     i;
+
+       for (i = 0; i < numObjs; i++)
+       {
+               DumpableObject *dobj = dobjs[i];
+
+               /*
+                * The classification of object types here must match the SECTION_xxx
+                * values assigned during subsequent ArchiveEntry calls!
+                */
+               switch (dobj->objType)
+               {
+                       case DO_NAMESPACE:
+                       case DO_EXTENSION:
+                       case DO_TYPE:
+                       case DO_SHELL_TYPE:
+                       case DO_FUNC:
+                       case DO_AGG:
+                       case DO_OPERATOR:
+                       case DO_OPCLASS:
+                       case DO_OPFAMILY:
+                       case DO_COLLATION:
+                       case DO_CONVERSION:
+                       case DO_TABLE:
+                       case DO_ATTRDEF:
+                       case DO_PROCLANG:
+                       case DO_CAST:
+                       case DO_DUMMY_TYPE:
+                       case DO_TSPARSER:
+                       case DO_TSDICT:
+                       case DO_TSTEMPLATE:
+                       case DO_TSCONFIG:
+                       case DO_FDW:
+                       case DO_FOREIGN_SERVER:
+                       case DO_BLOB:
+                               /* Pre-data objects: must come before the pre-data boundary */
+                               addObjectDependency(preDataBound, dobj->dumpId);
+                               break;
+                       case DO_TABLE_DATA:
+                       case DO_BLOB_DATA:
+                               /* Data objects: must come between the boundaries */
+                               addObjectDependency(dobj, preDataBound->dumpId);
+                               addObjectDependency(postDataBound, dobj->dumpId);
+                               break;
+                       case DO_INDEX:
+                       case DO_TRIGGER:
+                       case DO_DEFAULT_ACL:
+                               /* Post-data objects: must come after the post-data boundary */
+                               addObjectDependency(dobj, postDataBound->dumpId);
+                               break;
+                       case DO_RULE:
+                               /* Rules are post-data, but only if dumped separately */
+                               if (((RuleInfo *) dobj)->separate)
+                                       addObjectDependency(dobj, postDataBound->dumpId);
+                               break;
+                       case DO_CONSTRAINT:
+                       case DO_FK_CONSTRAINT:
+                               /* Constraints are post-data, but only if dumped separately */
+                               if (((ConstraintInfo *) dobj)->separate)
+                                       addObjectDependency(dobj, postDataBound->dumpId);
+                               break;
+                       case DO_PRE_DATA_BOUNDARY:
+                               /* nothing to do */
+                               break;
+                       case DO_POST_DATA_BOUNDARY:
+                               /* must come after the pre-data boundary */
+                               addObjectDependency(dobj, preDataBound->dumpId);
+                               break;
+               }
+       }
+}
+
+
 /*
  * selectSourceSchema - make the specified schema the active search path
  * in the source database.
index 21d42f8ea5365754e87f64a57cab00f36c9cb2d9..a7ec992d3c3009dc98d653aa3f5fb7390729f0d3 100644 (file)
@@ -97,6 +97,7 @@ typedef enum
        DO_OPERATOR,
        DO_OPCLASS,
        DO_OPFAMILY,
+       DO_COLLATION,
        DO_CONVERSION,
        DO_TABLE,
        DO_ATTRDEF,
@@ -118,7 +119,8 @@ typedef enum
        DO_DEFAULT_ACL,
        DO_BLOB,
        DO_BLOB_DATA,
-       DO_COLLATION
+       DO_PRE_DATA_BOUNDARY,
+       DO_POST_DATA_BOUNDARY
 } DumpableObjectType;
 
 typedef struct _dumpableObject
@@ -523,7 +525,8 @@ extern void exit_nicely(void);
 
 extern void parseOidArray(const char *str, Oid *array, int arraysize);
 
-extern void sortDumpableObjects(DumpableObject **objs, int numObjs);
+extern void sortDumpableObjects(DumpableObject **objs, int numObjs,
+                                       DumpId preBoundaryId, DumpId postBoundaryId);
 extern void sortDumpableObjectsByTypeName(DumpableObject **objs, int numObjs);
 extern void sortDumpableObjectsByTypeOid(DumpableObject **objs, int numObjs);
 
index 71e459b30adb1542a95f922bd2182df9f5ab2254..a517523571f9e33f7cb3de5f513ac1f6eaac22c8 100644 (file)
@@ -25,6 +25,11 @@ static const char *modulename = gettext_noop("sorter");
  * behavior for old databases without full dependency info.)  Note: collations,
  * extensions, text search, foreign-data, and default ACL objects can't really
  * happen here, so the rather bogus priorities for them don't matter.
+ *
+ * NOTE: object-type priorities must match the section assignments made in
+ * pg_dump.c; that is, PRE_DATA objects must sort before DO_PRE_DATA_BOUNDARY,
+ * POST_DATA objects must sort after DO_POST_DATA_BOUNDARY, and DATA objects
+ * must sort between them.
  */
 static const int oldObjectTypePriority[] =
 {
@@ -37,33 +42,40 @@ static const int oldObjectTypePriority[] =
        3,                                                      /* DO_OPERATOR */
        4,                                                      /* DO_OPCLASS */
        4,                                                      /* DO_OPFAMILY */
+       4,                                                      /* DO_COLLATION */
        5,                                                      /* DO_CONVERSION */
        6,                                                      /* DO_TABLE */
        8,                                                      /* DO_ATTRDEF */
-       13,                                                     /* DO_INDEX */
-       14,                                                     /* DO_RULE */
-       15,                                                     /* DO_TRIGGER */
-       12,                                                     /* DO_CONSTRAINT */
-       16,                                                     /* DO_FK_CONSTRAINT */
+       15,                                                     /* DO_INDEX */
+       16,                                                     /* DO_RULE */
+       17,                                                     /* DO_TRIGGER */
+       14,                                                     /* DO_CONSTRAINT */
+       18,                                                     /* DO_FK_CONSTRAINT */
        2,                                                      /* DO_PROCLANG */
        2,                                                      /* DO_CAST */
-       10,                                                     /* DO_TABLE_DATA */
+       11,                                                     /* DO_TABLE_DATA */
        7,                                                      /* DO_DUMMY_TYPE */
-       3,                                                      /* DO_TSPARSER */
+       4,                                                      /* DO_TSPARSER */
        4,                                                      /* DO_TSDICT */
-       3,                                                      /* DO_TSTEMPLATE */
-       5,                                                      /* DO_TSCONFIG */
-       3,                                                      /* DO_FDW */
+       4,                                                      /* DO_TSTEMPLATE */
+       4,                                                      /* DO_TSCONFIG */
+       4,                                                      /* DO_FDW */
        4,                                                      /* DO_FOREIGN_SERVER */
-       17,                                                     /* DO_DEFAULT_ACL */
+       19,                                                     /* DO_DEFAULT_ACL */
        9,                                                      /* DO_BLOB */
-       11,                                                     /* DO_BLOB_DATA */
-       2                                                       /* DO_COLLATION */
+       12,                                                     /* DO_BLOB_DATA */
+       10,                                                     /* DO_PRE_DATA_BOUNDARY */
+       13                                                      /* DO_POST_DATA_BOUNDARY */
 };
 
 /*
  * Sort priority for object types when dumping newer databases.
  * Objects are sorted by type, and within a type by name.
+ *
+ * NOTE: object-type priorities must match the section assignments made in
+ * pg_dump.c; that is, PRE_DATA objects must sort before DO_PRE_DATA_BOUNDARY,
+ * POST_DATA objects must sort after DO_POST_DATA_BOUNDARY, and DATA objects
+ * must sort between them.
  */
 static const int newObjectTypePriority[] =
 {
@@ -76,17 +88,18 @@ static const int newObjectTypePriority[] =
        8,                                                      /* DO_OPERATOR */
        9,                                                      /* DO_OPCLASS */
        9,                                                      /* DO_OPFAMILY */
+       3,                                                      /* DO_COLLATION */
        11,                                                     /* DO_CONVERSION */
        18,                                                     /* DO_TABLE */
        20,                                                     /* DO_ATTRDEF */
-       25,                                                     /* DO_INDEX */
-       26,                                                     /* DO_RULE */
-       27,                                                     /* DO_TRIGGER */
-       24,                                                     /* DO_CONSTRAINT */
-       28,                                                     /* DO_FK_CONSTRAINT */
+       27,                                                     /* DO_INDEX */
+       28,                                                     /* DO_RULE */
+       29,                                                     /* DO_TRIGGER */
+       26,                                                     /* DO_CONSTRAINT */
+       30,                                                     /* DO_FK_CONSTRAINT */
        2,                                                      /* DO_PROCLANG */
        10,                                                     /* DO_CAST */
-       22,                                                     /* DO_TABLE_DATA */
+       23,                                                     /* DO_TABLE_DATA */
        19,                                                     /* DO_DUMMY_TYPE */
        12,                                                     /* DO_TSPARSER */
        14,                                                     /* DO_TSDICT */
@@ -94,12 +107,16 @@ static const int newObjectTypePriority[] =
        15,                                                     /* DO_TSCONFIG */
        16,                                                     /* DO_FDW */
        17,                                                     /* DO_FOREIGN_SERVER */
-       29,                                                     /* DO_DEFAULT_ACL */
+       31,                                                     /* DO_DEFAULT_ACL */
        21,                                                     /* DO_BLOB */
-       23,                                                     /* DO_BLOB_DATA */
-       3                                                       /* DO_COLLATION */
+       24,                                                     /* DO_BLOB_DATA */
+       22,                                                     /* DO_PRE_DATA_BOUNDARY */
+       25                                                      /* DO_POST_DATA_BOUNDARY */
 };
 
+static DumpId preDataBoundId;
+static DumpId postDataBoundId;
+
 
 static int     DOTypeNameCompare(const void *p1, const void *p2);
 static int     DOTypeOidCompare(const void *p1, const void *p2);
@@ -226,16 +243,27 @@ DOTypeOidCompare(const void *p1, const void *p2)
 /*
  * Sort the given objects into a safe dump order using dependency
  * information (to the extent we have it available).
+ *
+ * The DumpIds of the PRE_DATA_BOUNDARY and POST_DATA_BOUNDARY objects are
+ * passed in separately, in case we need them during dependency loop repair.
  */
 void
-sortDumpableObjects(DumpableObject **objs, int numObjs)
+sortDumpableObjects(DumpableObject **objs, int numObjs,
+                                       DumpId preBoundaryId, DumpId postBoundaryId)
 {
        DumpableObject **ordering;
        int                     nOrdering;
 
-       if (numObjs <= 0)
+       if (numObjs <= 0)                       /* can't happen anymore ... */
                return;
 
+       /*
+        * Saving the boundary IDs in static variables is a bit grotty, but seems
+        * better than adding them to parameter lists of subsidiary functions.
+        */
+       preDataBoundId = preBoundaryId;
+       postDataBoundId = postBoundaryId;
+
        ordering = (DumpableObject **) malloc(numObjs * sizeof(DumpableObject *));
        if (ordering == NULL)
                exit_horribly(NULL, modulename, "out of memory\n");
@@ -700,6 +728,8 @@ repairViewRuleMultiLoop(DumpableObject *viewobj,
        ((RuleInfo *) ruleobj)->separate = true;
        /* put back rule's dependency on view */
        addObjectDependency(ruleobj, viewobj->dumpId);
+       /* now that rule is separate, it must be post-data */
+       addObjectDependency(ruleobj, postDataBoundId);
 }
 
 /*
@@ -735,6 +765,8 @@ repairTableConstraintMultiLoop(DumpableObject *tableobj,
        ((ConstraintInfo *) constraintobj)->separate = true;
        /* put back constraint's dependency on table */
        addObjectDependency(constraintobj, tableobj->dumpId);
+       /* now that constraint is separate, it must be post-data */
+       addObjectDependency(constraintobj, postDataBoundId);
 }
 
 /*
@@ -781,6 +813,8 @@ repairDomainConstraintMultiLoop(DumpableObject *domainobj,
        ((ConstraintInfo *) constraintobj)->separate = true;
        /* put back constraint's dependency on domain */
        addObjectDependency(constraintobj, domainobj->dumpId);
+       /* now that constraint is separate, it must be post-data */
+       addObjectDependency(constraintobj, postDataBoundId);
 }
 
 /*
@@ -1189,6 +1223,16 @@ describeDumpableObject(DumpableObject *obj, char *buf, int bufsize)
                                         "BLOB DATA  (ID %d)",
                                         obj->dumpId);
                        return;
+               case DO_PRE_DATA_BOUNDARY:
+                       snprintf(buf, bufsize,
+                                        "PRE-DATA BOUNDARY  (ID %d)",
+                                        obj->dumpId);
+                       return;
+               case DO_POST_DATA_BOUNDARY:
+                       snprintf(buf, bufsize,
+                                        "POST-DATA BOUNDARY  (ID %d)",
+                                        obj->dumpId);
+                       return;
        }
        /* shouldn't get here */
        snprintf(buf, bufsize,