]> granicus.if.org Git - postgresql/commitdiff
Un-break parallel pg_upgrade.
authorTom Lane <tgl@sss.pgh.pa.us>
Sun, 25 Feb 2018 22:27:20 +0000 (17:27 -0500)
committerTom Lane <tgl@sss.pgh.pa.us>
Sun, 25 Feb 2018 22:27:20 +0000 (17:27 -0500)
Commit b3f840120 changed pg_upgrade so that it'd actually drop and
re-create the template1 and postgres databases in the new cluster.
That works fine, serially.  With the -j option it's not so fine, because
other per-database jobs might be launched while the template1 database is
dropped.  Since they attempt to connect there to start up, kaboom.

This is the cause of the intermittent failures buildfarm member jacana
has been showing for the last month; evidently it is the only BF member
configured to run the pg_upgrade test with parallelism enabled.

Fix by processing template1 separately before we get into the parallel
sub-job launch loop.  (We could alternatively have made the postgres DB
be the special case, but it seems likely that template1 will contain
less stuff and so we lose less parallelism with this choice.)

src/bin/pg_upgrade/pg_upgrade.c

index bbfa4c1ef38882a8cda21a95b61d0441d9c7e05f..d12412799fa19d40483f81584a31d51f7d165fe4 100644 (file)
@@ -302,13 +302,21 @@ create_new_objects(void)
 
        prep_status("Restoring database schemas in the new cluster\n");
 
+       /*
+        * We cannot process the template1 database concurrently with others,
+        * because when it's transiently dropped, connection attempts would fail.
+        * So handle it in a separate non-parallelized pass.
+        */
        for (dbnum = 0; dbnum < old_cluster.dbarr.ndbs; dbnum++)
        {
                char            sql_file_name[MAXPGPATH],
                                        log_file_name[MAXPGPATH];
                DbInfo     *old_db = &old_cluster.dbarr.dbs[dbnum];
                const char *create_opts;
-               const char *starting_db;
+
+               /* Process only template1 in this pass */
+               if (strcmp(old_db->db_name, "template1") != 0)
+                       continue;
 
                pg_log(PG_STATUS, "%s", old_db->db_name);
                snprintf(sql_file_name, sizeof(sql_file_name), DB_DUMP_FILE_MASK, old_db->db_oid);
@@ -320,26 +328,55 @@ create_new_objects(void)
                 * otherwise we would fail to propagate their database-level
                 * properties.
                 */
-               if (strcmp(old_db->db_name, "template1") == 0 ||
-                       strcmp(old_db->db_name, "postgres") == 0)
-                       create_opts = "--clean --create";
-               else
-                       create_opts = "--create";
+               create_opts = "--clean --create";
+
+               exec_prog(log_file_name,
+                                 NULL,
+                                 true,
+                                 true,
+                                 "\"%s/pg_restore\" %s %s --exit-on-error --verbose "
+                                 "--dbname postgres \"%s\"",
+                                 new_cluster.bindir,
+                                 cluster_conn_opts(&new_cluster),
+                                 create_opts,
+                                 sql_file_name);
 
-               /* When processing template1, we can't connect there to start with */
+               break;                                  /* done once we've processed template1 */
+       }
+
+       for (dbnum = 0; dbnum < old_cluster.dbarr.ndbs; dbnum++)
+       {
+               char            sql_file_name[MAXPGPATH],
+                                       log_file_name[MAXPGPATH];
+               DbInfo     *old_db = &old_cluster.dbarr.dbs[dbnum];
+               const char *create_opts;
+
+               /* Skip template1 in this pass */
                if (strcmp(old_db->db_name, "template1") == 0)
-                       starting_db = "postgres";
+                       continue;
+
+               pg_log(PG_STATUS, "%s", old_db->db_name);
+               snprintf(sql_file_name, sizeof(sql_file_name), DB_DUMP_FILE_MASK, old_db->db_oid);
+               snprintf(log_file_name, sizeof(log_file_name), DB_DUMP_LOG_FILE_MASK, old_db->db_oid);
+
+               /*
+                * template1 and postgres databases will already exist in the target
+                * installation, so tell pg_restore to drop and recreate them;
+                * otherwise we would fail to propagate their database-level
+                * properties.
+                */
+               if (strcmp(old_db->db_name, "postgres") == 0)
+                       create_opts = "--clean --create";
                else
-                       starting_db = "template1";
+                       create_opts = "--create";
 
                parallel_exec_prog(log_file_name,
                                                   NULL,
                                                   "\"%s/pg_restore\" %s %s --exit-on-error --verbose "
-                                                  "--dbname %s \"%s\"",
+                                                  "--dbname template1 \"%s\"",
                                                   new_cluster.bindir,
                                                   cluster_conn_opts(&new_cluster),
                                                   create_opts,
-                                                  starting_db,
                                                   sql_file_name);
        }