From: Tom Lane Date: Tue, 26 Jun 2012 01:20:24 +0000 (-0400) Subject: Make pg_dump emit more accurate dependency information. X-Git-Tag: REL9_3_BETA1~1296 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=8a504a363925fc5c7af48cd723da3f7e4d7ba9e2;p=postgresql Make pg_dump emit more accurate dependency information. While pg_dump has included dependency information in archive-format output ever since 7.3, it never made any large effort to ensure that that information was actually useful. In particular, in common situations where dependency chains include objects that aren't separately emitted in the dump, the dependencies shown for objects that were emitted would reference the dump IDs of these un-dumped objects, leaving no clue about which other objects the visible objects indirectly depend on. So far, parallel pg_restore has managed to avoid tripping over this misfeature, but only by dint of some crude hacks like not trusting dependency information in the pre-data section of the archive. It seems prudent to do something about this before it rises up to bite us, so instead of emitting the "raw" dependencies of each dumped object, recursively search for its actual dependencies among the subset of objects that are being dumped. Back-patch to 9.2, since that code hasn't yet diverged materially from HEAD. At some point we might need to back-patch further, but right now there are no known cases where this is actively necessary. (The one known case, bug #6699, is fixed in a different way by my previous patch.) Since this patch depends on 9.2 changes that made TOC entries be marked before output commences as to whether they'll be dumped, back-patching further would require additional surgery; and as of now there's no evidence that it's worth the risk. --- diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c index a20c4f3815..7f2bc070ad 100644 --- a/src/bin/pg_dump/pg_backup_archiver.c +++ b/src/bin/pg_dump/pg_backup_archiver.c @@ -3498,9 +3498,14 @@ restore_toc_entries_parallel(ArchiveHandle *AH) * Do all the early stuff in a single connection in the parent. There's no * great point in running it in parallel, in fact it will actually run * faster in a single connection because we avoid all the connection and - * setup overhead. Also, pg_dump is not currently very good about showing - * all the dependencies of SECTION_PRE_DATA items, so we do not risk - * trying to process them out-of-order. + * setup overhead. Also, pre-9.2 pg_dump versions were not very good + * about showing all the dependencies of SECTION_PRE_DATA items, so we do + * not risk trying to process them out-of-order. + * + * Note: as of 9.2, it should be guaranteed that all PRE_DATA items appear + * before DATA items, and all DATA items before POST_DATA items. That is + * not certain to be true in older archives, though, so this loop is coded + * to not assume it. */ skipped_some = false; for (next_work_item = AH->toc->next; next_work_item != AH->toc; next_work_item = next_work_item->next) @@ -4162,8 +4167,9 @@ fix_dependencies(ArchiveHandle *AH) /* * Count the incoming dependencies for each item. Also, it is possible - * that the dependencies list items that are not in the archive at all. - * Subtract such items from the depCounts. + * that the dependencies list items that are not in the archive at all + * (that should not happen in 9.2 and later, but is highly likely in + * older archives). Subtract such items from the depCounts. */ for (te = AH->toc->next; te != AH->toc; te = te->next) { diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 71cc3416bb..afb28a870e 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -210,6 +210,9 @@ static void dumpACL(Archive *fout, CatalogId objCatId, DumpId objDumpId, const char *acls); static void getDependencies(Archive *fout); +static void BuildArchiveDependencies(Archive *fout); +static void findDumpableDependencies(ArchiveHandle *AH, DumpableObject *dobj, + DumpId **dependencies, int *nDeps, int *allocDeps); static DumpableObject *createBoundaryObjects(void); static void addBoundaryDependencies(DumpableObject **dobjs, int numObjs, @@ -768,6 +771,14 @@ main(int argc, char **argv) SetArchiveRestoreOptions(fout, ropt); + /* + * The archive's TOC entries are now marked as to which ones will + * actually be output, so we can set up their dependency lists properly. + * This isn't necessary for plain-text output, though. + */ + if (!plainText) + BuildArchiveDependencies(fout); + /* * And finally we can do the actual output. * @@ -1574,12 +1585,17 @@ dumpTableData(Archive *fout, TableDataInfo *tdinfo) copyStmt = NULL; } + /* + * Note: although the TableDataInfo is a full DumpableObject, we treat its + * dependency on its table as "special" and pass it to ArchiveEntry now. + * See comments for BuildArchiveDependencies. + */ ArchiveEntry(fout, tdinfo->dobj.catId, tdinfo->dobj.dumpId, tbinfo->dobj.name, tbinfo->dobj.namespace->dobj.name, NULL, tbinfo->rolname, false, "TABLE DATA", SECTION_DATA, "", "", copyStmt, - tdinfo->dobj.dependencies, tdinfo->dobj.nDeps, + &(tbinfo->dobj.dumpId), 1, dumpFn, tdinfo); destroyPQExpBuffer(copyBuf); @@ -2263,7 +2279,7 @@ dumpBlob(Archive *fout, BlobInfo *binfo) binfo->rolname, false, "BLOB", SECTION_PRE_DATA, cquery->data, dquery->data, NULL, - binfo->dobj.dependencies, binfo->dobj.nDeps, + NULL, 0, NULL, NULL); /* set up tag for comment and/or ACL */ @@ -7197,7 +7213,7 @@ dumpDumpableObject(Archive *fout, DumpableObject *dobj) dobj->name, NULL, NULL, "", false, "BLOBS", SECTION_DATA, "", "", NULL, - dobj->dependencies, dobj->nDeps, + NULL, 0, dumpBlobs, NULL); break; case DO_PRE_DATA_BOUNDARY: @@ -7248,7 +7264,7 @@ dumpNamespace(Archive *fout, NamespaceInfo *nspinfo) nspinfo->rolname, false, "SCHEMA", SECTION_PRE_DATA, q->data, delq->data, NULL, - nspinfo->dobj.dependencies, nspinfo->dobj.nDeps, + NULL, 0, NULL, NULL); /* Dump Schema Comments and Security Labels */ @@ -7366,7 +7382,7 @@ dumpExtension(Archive *fout, ExtensionInfo *extinfo) "", false, "EXTENSION", SECTION_PRE_DATA, q->data, delq->data, NULL, - extinfo->dobj.dependencies, extinfo->dobj.nDeps, + NULL, 0, NULL, NULL); /* Dump Extension Comments and Security Labels */ @@ -7514,7 +7530,7 @@ dumpEnumType(Archive *fout, TypeInfo *tyinfo) tyinfo->rolname, false, "TYPE", SECTION_PRE_DATA, q->data, delq->data, NULL, - tyinfo->dobj.dependencies, tyinfo->dobj.nDeps, + NULL, 0, NULL, NULL); /* Dump Type Comments and Security Labels */ @@ -7639,7 +7655,7 @@ dumpRangeType(Archive *fout, TypeInfo *tyinfo) tyinfo->rolname, false, "TYPE", SECTION_PRE_DATA, q->data, delq->data, NULL, - tyinfo->dobj.dependencies, tyinfo->dobj.nDeps, + NULL, 0, NULL, NULL); /* Dump Type Comments and Security Labels */ @@ -8021,7 +8037,7 @@ dumpBaseType(Archive *fout, TypeInfo *tyinfo) tyinfo->rolname, false, "TYPE", SECTION_PRE_DATA, q->data, delq->data, NULL, - tyinfo->dobj.dependencies, tyinfo->dobj.nDeps, + NULL, 0, NULL, NULL); /* Dump Type Comments and Security Labels */ @@ -8176,7 +8192,7 @@ dumpDomain(Archive *fout, TypeInfo *tyinfo) tyinfo->rolname, false, "DOMAIN", SECTION_PRE_DATA, q->data, delq->data, NULL, - tyinfo->dobj.dependencies, tyinfo->dobj.nDeps, + NULL, 0, NULL, NULL); /* Dump Domain Comments and Security Labels */ @@ -8383,7 +8399,7 @@ dumpCompositeType(Archive *fout, TypeInfo *tyinfo) tyinfo->rolname, false, "TYPE", SECTION_PRE_DATA, q->data, delq->data, NULL, - tyinfo->dobj.dependencies, tyinfo->dobj.nDeps, + NULL, 0, NULL, NULL); @@ -8555,7 +8571,7 @@ dumpShellType(Archive *fout, ShellTypeInfo *stinfo) stinfo->baseType->rolname, false, "SHELL TYPE", SECTION_PRE_DATA, q->data, "", NULL, - stinfo->dobj.dependencies, stinfo->dobj.nDeps, + NULL, 0, NULL, NULL); destroyPQExpBuffer(q); @@ -8729,7 +8745,7 @@ dumpProcLang(Archive *fout, ProcLangInfo *plang) lanschema, NULL, plang->lanowner, false, "PROCEDURAL LANGUAGE", SECTION_PRE_DATA, defqry->data, delqry->data, NULL, - plang->dobj.dependencies, plang->dobj.nDeps, + NULL, 0, NULL, NULL); /* Dump Proc Lang Comments and Security Labels */ @@ -9316,7 +9332,7 @@ dumpFunc(Archive *fout, FuncInfo *finfo) finfo->rolname, false, "FUNCTION", SECTION_PRE_DATA, q->data, delqry->data, NULL, - finfo->dobj.dependencies, finfo->dobj.nDeps, + NULL, 0, NULL, NULL); /* Dump Function Comments and Security Labels */ @@ -9486,7 +9502,7 @@ dumpCast(Archive *fout, CastInfo *cast) "pg_catalog", NULL, "", false, "CAST", SECTION_PRE_DATA, defqry->data, delqry->data, NULL, - cast->dobj.dependencies, cast->dobj.nDeps, + NULL, 0, NULL, NULL); /* Dump Cast Comments */ @@ -9720,7 +9736,7 @@ dumpOpr(Archive *fout, OprInfo *oprinfo) oprinfo->rolname, false, "OPERATOR", SECTION_PRE_DATA, q->data, delq->data, NULL, - oprinfo->dobj.dependencies, oprinfo->dobj.nDeps, + NULL, 0, NULL, NULL); /* Dump Operator Comments */ @@ -10227,7 +10243,7 @@ dumpOpclass(Archive *fout, OpclassInfo *opcinfo) opcinfo->rolname, false, "OPERATOR CLASS", SECTION_PRE_DATA, q->data, delq->data, NULL, - opcinfo->dobj.dependencies, opcinfo->dobj.nDeps, + NULL, 0, NULL, NULL); /* Dump Operator Class Comments */ @@ -10540,7 +10556,7 @@ dumpOpfamily(Archive *fout, OpfamilyInfo *opfinfo) opfinfo->rolname, false, "OPERATOR FAMILY", SECTION_PRE_DATA, q->data, delq->data, NULL, - opfinfo->dobj.dependencies, opfinfo->dobj.nDeps, + NULL, 0, NULL, NULL); /* Dump Operator Family Comments */ @@ -10629,7 +10645,7 @@ dumpCollation(Archive *fout, CollInfo *collinfo) collinfo->rolname, false, "COLLATION", SECTION_PRE_DATA, q->data, delq->data, NULL, - collinfo->dobj.dependencies, collinfo->dobj.nDeps, + NULL, 0, NULL, NULL); /* Dump Collation Comments */ @@ -10728,7 +10744,7 @@ dumpConversion(Archive *fout, ConvInfo *convinfo) convinfo->rolname, false, "CONVERSION", SECTION_PRE_DATA, q->data, delq->data, NULL, - convinfo->dobj.dependencies, convinfo->dobj.nDeps, + NULL, 0, NULL, NULL); /* Dump Conversion Comments */ @@ -10965,7 +10981,7 @@ dumpAgg(Archive *fout, AggInfo *agginfo) agginfo->aggfn.rolname, false, "AGGREGATE", SECTION_PRE_DATA, q->data, delq->data, NULL, - agginfo->aggfn.dobj.dependencies, agginfo->aggfn.dobj.nDeps, + NULL, 0, NULL, NULL); /* Dump Aggregate Comments */ @@ -11063,7 +11079,7 @@ dumpTSParser(Archive *fout, TSParserInfo *prsinfo) "", false, "TEXT SEARCH PARSER", SECTION_PRE_DATA, q->data, delq->data, NULL, - prsinfo->dobj.dependencies, prsinfo->dobj.nDeps, + NULL, 0, NULL, NULL); /* Dump Parser Comments */ @@ -11150,7 +11166,7 @@ dumpTSDictionary(Archive *fout, TSDictInfo *dictinfo) dictinfo->rolname, false, "TEXT SEARCH DICTIONARY", SECTION_PRE_DATA, q->data, delq->data, NULL, - dictinfo->dobj.dependencies, dictinfo->dobj.nDeps, + NULL, 0, NULL, NULL); /* Dump Dictionary Comments */ @@ -11216,7 +11232,7 @@ dumpTSTemplate(Archive *fout, TSTemplateInfo *tmplinfo) "", false, "TEXT SEARCH TEMPLATE", SECTION_PRE_DATA, q->data, delq->data, NULL, - tmplinfo->dobj.dependencies, tmplinfo->dobj.nDeps, + NULL, 0, NULL, NULL); /* Dump Template Comments */ @@ -11344,7 +11360,7 @@ dumpTSConfig(Archive *fout, TSConfigInfo *cfginfo) cfginfo->rolname, false, "TEXT SEARCH CONFIGURATION", SECTION_PRE_DATA, q->data, delq->data, NULL, - cfginfo->dobj.dependencies, cfginfo->dobj.nDeps, + NULL, 0, NULL, NULL); /* Dump Configuration Comments */ @@ -11418,7 +11434,7 @@ dumpForeignDataWrapper(Archive *fout, FdwInfo *fdwinfo) fdwinfo->rolname, false, "FOREIGN DATA WRAPPER", SECTION_PRE_DATA, q->data, delq->data, NULL, - fdwinfo->dobj.dependencies, fdwinfo->dobj.nDeps, + NULL, 0, NULL, NULL); /* Handle the ACL */ @@ -11510,7 +11526,7 @@ dumpForeignServer(Archive *fout, ForeignServerInfo *srvinfo) srvinfo->rolname, false, "SERVER", SECTION_PRE_DATA, q->data, delq->data, NULL, - srvinfo->dobj.dependencies, srvinfo->dobj.nDeps, + NULL, 0, NULL, NULL); /* Handle the ACL */ @@ -11694,7 +11710,7 @@ dumpDefaultACL(Archive *fout, DefaultACLInfo *daclinfo) daclinfo->defaclrole, false, "DEFAULT ACL", SECTION_POST_DATA, q->data, "", NULL, - daclinfo->dobj.dependencies, daclinfo->dobj.nDeps, + NULL, 0, NULL, NULL); destroyPQExpBuffer(tag); @@ -12693,7 +12709,7 @@ dumpTableSchema(Archive *fout, TableInfo *tbinfo) (strcmp(reltypename, "TABLE") == 0) ? tbinfo->hasoids : false, reltypename, SECTION_PRE_DATA, q->data, delq->data, NULL, - tbinfo->dobj.dependencies, tbinfo->dobj.nDeps, + NULL, 0, NULL, NULL); @@ -12765,7 +12781,7 @@ dumpAttrDef(Archive *fout, AttrDefInfo *adinfo) tbinfo->rolname, false, "DEFAULT", SECTION_PRE_DATA, q->data, delq->data, NULL, - adinfo->dobj.dependencies, adinfo->dobj.nDeps, + NULL, 0, NULL, NULL); destroyPQExpBuffer(q); @@ -12867,7 +12883,7 @@ dumpIndex(Archive *fout, IndxInfo *indxinfo) tbinfo->rolname, false, "INDEX", SECTION_POST_DATA, q->data, delq->data, NULL, - indxinfo->dobj.dependencies, indxinfo->dobj.nDeps, + NULL, 0, NULL, NULL); } @@ -12988,7 +13004,7 @@ dumpConstraint(Archive *fout, ConstraintInfo *coninfo) tbinfo->rolname, false, "CONSTRAINT", SECTION_POST_DATA, q->data, delq->data, NULL, - coninfo->dobj.dependencies, coninfo->dobj.nDeps, + NULL, 0, NULL, NULL); } else if (coninfo->contype == 'f') @@ -13021,7 +13037,7 @@ dumpConstraint(Archive *fout, ConstraintInfo *coninfo) tbinfo->rolname, false, "FK CONSTRAINT", SECTION_POST_DATA, q->data, delq->data, NULL, - coninfo->dobj.dependencies, coninfo->dobj.nDeps, + NULL, 0, NULL, NULL); } else if (coninfo->contype == 'c' && tbinfo) @@ -13056,7 +13072,7 @@ dumpConstraint(Archive *fout, ConstraintInfo *coninfo) tbinfo->rolname, false, "CHECK CONSTRAINT", SECTION_POST_DATA, q->data, delq->data, NULL, - coninfo->dobj.dependencies, coninfo->dobj.nDeps, + NULL, 0, NULL, NULL); } } @@ -13092,7 +13108,7 @@ dumpConstraint(Archive *fout, ConstraintInfo *coninfo) tyinfo->rolname, false, "CHECK CONSTRAINT", SECTION_POST_DATA, q->data, delq->data, NULL, - coninfo->dobj.dependencies, coninfo->dobj.nDeps, + NULL, 0, NULL, NULL); } } @@ -13354,7 +13370,7 @@ dumpSequence(Archive *fout, TableInfo *tbinfo) tbinfo->rolname, false, "SEQUENCE", SECTION_PRE_DATA, query->data, delqry->data, NULL, - tbinfo->dobj.dependencies, tbinfo->dobj.nDeps, + NULL, 0, NULL, NULL); /* @@ -13620,7 +13636,7 @@ dumpTrigger(Archive *fout, TriggerInfo *tginfo) tbinfo->rolname, false, "TRIGGER", SECTION_POST_DATA, query->data, delqry->data, NULL, - tginfo->dobj.dependencies, tginfo->dobj.nDeps, + NULL, 0, NULL, NULL); dumpComment(fout, labelq->data, @@ -13741,7 +13757,7 @@ dumpRule(Archive *fout, RuleInfo *rinfo) tbinfo->rolname, false, "RULE", SECTION_POST_DATA, cmd->data, delcmd->data, NULL, - rinfo->dobj.dependencies, rinfo->dobj.nDeps, + NULL, 0, NULL, NULL); /* Dump rule comments */ @@ -14154,6 +14170,124 @@ addBoundaryDependencies(DumpableObject **dobjs, int numObjs, } +/* + * BuildArchiveDependencies - create dependency data for archive TOC entries + * + * The raw dependency data obtained by getDependencies() is not terribly + * useful in an archive dump, because in many cases there are dependency + * chains linking through objects that don't appear explicitly in the dump. + * For example, a view will depend on its _RETURN rule while the _RETURN rule + * will depend on other objects --- but the rule will not appear as a separate + * object in the dump. We need to adjust the view's dependencies to include + * whatever the rule depends on that is included in the dump. + * + * Just to make things more complicated, there are also "special" dependencies + * such as the dependency of a TABLE DATA item on its TABLE, which we must + * not rearrange because pg_restore knows that TABLE DATA only depends on + * its table. In these cases we must leave the dependencies strictly as-is + * even if they refer to not-to-be-dumped objects. + * + * To handle this, the convention is that "special" dependencies are created + * during ArchiveEntry calls, and an archive TOC item that has any such + * entries will not be touched here. Otherwise, we recursively search the + * DumpableObject data structures to build the correct dependencies for each + * archive TOC item. + */ +static void +BuildArchiveDependencies(Archive *fout) +{ + ArchiveHandle *AH = (ArchiveHandle *) fout; + TocEntry *te; + + /* Scan all TOC entries in the archive */ + for (te = AH->toc->next; te != AH->toc; te = te->next) + { + DumpableObject *dobj; + DumpId *dependencies; + int nDeps; + int allocDeps; + + /* No need to process entries that will not be dumped */ + if (te->reqs == 0) + continue; + /* Ignore entries that already have "special" dependencies */ + if (te->nDeps > 0) + continue; + /* Otherwise, look up the item's original DumpableObject, if any */ + dobj = findObjectByDumpId(te->dumpId); + if (dobj == NULL) + continue; + /* No work if it has no dependencies */ + if (dobj->nDeps <= 0) + continue; + /* Set up work array */ + allocDeps = 64; + dependencies = (DumpId *) pg_malloc(allocDeps * sizeof(DumpId)); + nDeps = 0; + /* Recursively find all dumpable dependencies */ + findDumpableDependencies(AH, dobj, + &dependencies, &nDeps, &allocDeps); + /* And save 'em ... */ + if (nDeps > 0) + { + dependencies = (DumpId *) pg_realloc(dependencies, + nDeps * sizeof(DumpId)); + te->dependencies = dependencies; + te->nDeps = nDeps; + } + else + free(dependencies); + } +} + +/* Recursive search subroutine for BuildArchiveDependencies */ +static void +findDumpableDependencies(ArchiveHandle *AH, DumpableObject *dobj, + DumpId **dependencies, int *nDeps, int *allocDeps) +{ + int i; + + /* + * Ignore section boundary objects: if we search through them, we'll + * report lots of bogus dependencies. + */ + if (dobj->objType == DO_PRE_DATA_BOUNDARY || + dobj->objType == DO_POST_DATA_BOUNDARY) + return; + + for (i = 0; i < dobj->nDeps; i++) + { + DumpId depid = dobj->dependencies[i]; + + if (TocIDRequired(AH, depid) != 0) + { + /* Object will be dumped, so just reference it as a dependency */ + if (*nDeps >= *allocDeps) + { + *allocDeps *= 2; + *dependencies = (DumpId *) pg_realloc(*dependencies, + *allocDeps * sizeof(DumpId)); + } + (*dependencies)[*nDeps] = depid; + (*nDeps)++; + } + else + { + /* + * Object will not be dumped, so recursively consider its deps. + * We rely on the assumption that sortDumpableObjects already + * broke any dependency loops, else we might recurse infinitely. + */ + DumpableObject *otherdobj = findObjectByDumpId(depid); + + if (otherdobj) + findDumpableDependencies(AH, otherdobj, + dependencies, nDeps, allocDeps); + } + } +} + + /* * selectSourceSchema - make the specified schema the active search path * in the source database.