Fix a performance problem in pg_dump's dump order selection logic.

author Tom Lane <tgl@sss.pgh.pa.us>

Fri, 25 Jul 2014 23:48:51 +0000 (19:48 -0400)

committer Tom Lane <tgl@sss.pgh.pa.us>

Fri, 25 Jul 2014 23:48:51 +0000 (19:48 -0400)
author Tom Lane <tgl@sss.pgh.pa.us>
Fri, 25 Jul 2014 23:48:51 +0000 (19:48 -0400)
committer Tom Lane <tgl@sss.pgh.pa.us>
Fri, 25 Jul 2014 23:48:51 +0000 (19:48 -0400)
diff --git a/src/bin/pg_dump/pg_dump_sort.c b/src/bin/pg_dump/pg_dump_sort.c

index 75fd4944cd74063139e9ebefaa5593a56a2a4d48..dbdc04889c86babb3231c25567c3122030381dc2 100644 (file)
--- a/src/bin/pg_dump/pg_dump_sort.c
+++ b/src/bin/pg_dump/pg_dump_sort.c
@@ -132,6 +132,7 @@ static void findDependencyLoops(DumpableObject **objs, int nObjs, int totObjs);
  static int findLoop(DumpableObject *obj,
                  DumpId startPoint,
                  bool *processed,
+                DumpId *searchFailed,
                  DumpableObject **workspace,
                  int depth);
  static void repairDependencyLoop(DumpableObject **loop,
@@ -535,21 +536,35 @@ static void
  findDependencyLoops(DumpableObject **objs, int nObjs, int totObjs)
  {
         /*
-        * We use two data structures here.  One is a bool array processed[],
-        * which is indexed by dump ID and marks the objects already processed
-        * during this invocation of findDependencyLoops().  The other is a
-        * workspace[] array of DumpableObject pointers, in which we try to build
-        * lists of objects constituting loops.  We make workspace[] large enough
-        * to hold all the objects, which is huge overkill in most cases but could
-        * theoretically be necessary if there is a single dependency chain
-        * linking all the objects.
+        * We use three data structures here:
+        *
+        * processed[] is a bool array indexed by dump ID, marking the objects
+        * already processed during this invocation of findDependencyLoops().
+        *
+        * searchFailed[] is another array indexed by dump ID.  searchFailed[j] is
+        * set to dump ID k if we have proven that there is no dependency path
+        * leading from object j back to start point k.  This allows us to skip
+        * useless searching when there are multiple dependency paths from k to j,
+        * which is a common situation.  We could use a simple bool array for
+        * this, but then we'd need to re-zero it for each start point, resulting
+        * in O(N^2) zeroing work.  Using the start point's dump ID as the "true"
+        * value lets us skip clearing the array before we consider the next start
+        * point.
+        *
+        * workspace[] is an array of DumpableObject pointers, in which we try to
+        * build lists of objects constituting loops.  We make workspace[] large
+        * enough to hold all the objects in TopoSort's output, which is huge
+        * overkill in most cases but could theoretically be necessary if there is
+        * a single dependency chain linking all the objects.
          */
         bool       *processed;
+       DumpId     *searchFailed;
         DumpableObject **workspace;
         bool            fixedloop;
         int                     i;
  
         processed = (bool *) pg_calloc(getMaxDumpId() + 1, sizeof(bool));
+       searchFailed = (DumpId *) pg_calloc(getMaxDumpId() + 1, sizeof(DumpId));
         workspace = (DumpableObject **) pg_malloc(totObjs * sizeof(DumpableObject *));
         fixedloop = false;
  
@@ -559,7 +574,12 @@ findDependencyLoops(DumpableObject **objs, int nObjs, int totObjs)
                 int                     looplen;
                 int                     j;
  
-               looplen = findLoop(obj, obj->dumpId, processed, workspace, 0);
+               looplen = findLoop(obj,
+                                                  obj->dumpId,
+                                                  processed,
+                                                  searchFailed,
+                                                  workspace,
+                                                  0);
  
                 if (looplen > 0)
                 {
@@ -587,6 +607,7 @@ findDependencyLoops(DumpableObject **objs, int nObjs, int totObjs)
                 exit_horribly(modulename, "could not identify dependency loop\n");
  
         free(workspace);
+       free(searchFailed);
         free(processed);
  }
  
@@ -597,6 +618,7 @@ findDependencyLoops(DumpableObject **objs, int nObjs, int totObjs)
   *     obj: object we are examining now
   *     startPoint: dumpId of starting object for the hoped-for circular loop
   *     processed[]: flag array marking already-processed objects
+ *     searchFailed[]: flag array marking already-unsuccessfully-visited objects
   *     workspace[]: work array in which we are building list of loop members
   *     depth: number of valid entries in workspace[] at call
   *
@@ -610,6 +632,7 @@ static int
  findLoop(DumpableObject *obj,
                  DumpId startPoint,
                  bool *processed,
+                DumpId *searchFailed,
                  DumpableObject **workspace,
                  int depth)
  {
@@ -622,6 +645,13 @@ findLoop(DumpableObject *obj,
         if (processed[obj->dumpId])
                 return 0;
  
+       /*
+        * If we've already proven there is no path from this object back to the
+        * startPoint, forget it.
+        */
+       if (searchFailed[obj->dumpId] == startPoint)
+               return 0;
+
         /*
          * Reject if obj is already present in workspace.  This test prevents us
          * from going into infinite recursion if we are given a startPoint object
@@ -661,12 +691,18 @@ findLoop(DumpableObject *obj,
                 newDepth = findLoop(nextobj,
                                                         startPoint,
                                                         processed,
+                                                       searchFailed,
                                                         workspace,
                                                         depth);
                 if (newDepth > 0)
                         return newDepth;
         }
  
+       /*
+        * Remember there is no path from here back to startPoint
+        */
+       searchFailed[obj->dumpId] = startPoint;
+
         return 0;
  }
author	Tom Lane <tgl@sss.pgh.pa.us>
	Fri, 25 Jul 2014 23:48:51 +0000 (19:48 -0400)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Fri, 25 Jul 2014 23:48:51 +0000 (19:48 -0400)