From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Sun, 25 Feb 2018 22:27:20 +0000 (-0500)
Subject: Un-break parallel pg_upgrade.
X-Git-Tag: REL_11_BETA1~718
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=5b570d771b80aadc98755208f8f1b81e9a5eb366;p=postgresql

Un-break parallel pg_upgrade.

Commit b3f840120 changed pg_upgrade so that it'd actually drop and
re-create the template1 and postgres databases in the new cluster.
That works fine, serially.  With the -j option it's not so fine, because
other per-database jobs might be launched while the template1 database is
dropped.  Since they attempt to connect there to start up, kaboom.

This is the cause of the intermittent failures buildfarm member jacana
has been showing for the last month; evidently it is the only BF member
configured to run the pg_upgrade test with parallelism enabled.

Fix by processing template1 separately before we get into the parallel
sub-job launch loop.  (We could alternatively have made the postgres DB
be the special case, but it seems likely that template1 will contain
less stuff and so we lose less parallelism with this choice.)
---

diff --git a/src/bin/pg_upgrade/pg_upgrade.c b/src/bin/pg_upgrade/pg_upgrade.c
index bbfa4c1ef3..d12412799f 100644
--- a/src/bin/pg_upgrade/pg_upgrade.c
+++ b/src/bin/pg_upgrade/pg_upgrade.c
@@ -302,13 +302,21 @@ create_new_objects(void)
 
 	prep_status("Restoring database schemas in the new cluster\n");
 
+	/*
+	 * We cannot process the template1 database concurrently with others,
+	 * because when it's transiently dropped, connection attempts would fail.
+	 * So handle it in a separate non-parallelized pass.
+	 */
 	for (dbnum = 0; dbnum < old_cluster.dbarr.ndbs; dbnum++)
 	{
 		char		sql_file_name[MAXPGPATH],
 					log_file_name[MAXPGPATH];
 		DbInfo	   *old_db = &old_cluster.dbarr.dbs[dbnum];
 		const char *create_opts;
-		const char *starting_db;
+
+		/* Process only template1 in this pass */
+		if (strcmp(old_db->db_name, "template1") != 0)
+			continue;
 
 		pg_log(PG_STATUS, "%s", old_db->db_name);
 		snprintf(sql_file_name, sizeof(sql_file_name), DB_DUMP_FILE_MASK, old_db->db_oid);
@@ -320,26 +328,55 @@ create_new_objects(void)
 		 * otherwise we would fail to propagate their database-level
 		 * properties.
 		 */
-		if (strcmp(old_db->db_name, "template1") == 0 ||
-			strcmp(old_db->db_name, "postgres") == 0)
-			create_opts = "--clean --create";
-		else
-			create_opts = "--create";
+		create_opts = "--clean --create";
+
+		exec_prog(log_file_name,
+				  NULL,
+				  true,
+				  true,
+				  "\"%s/pg_restore\" %s %s --exit-on-error --verbose "
+				  "--dbname postgres \"%s\"",
+				  new_cluster.bindir,
+				  cluster_conn_opts(&new_cluster),
+				  create_opts,
+				  sql_file_name);
 
-		/* When processing template1, we can't connect there to start with */
+		break;					/* done once we've processed template1 */
+	}
+
+	for (dbnum = 0; dbnum < old_cluster.dbarr.ndbs; dbnum++)
+	{
+		char		sql_file_name[MAXPGPATH],
+					log_file_name[MAXPGPATH];
+		DbInfo	   *old_db = &old_cluster.dbarr.dbs[dbnum];
+		const char *create_opts;
+
+		/* Skip template1 in this pass */
 		if (strcmp(old_db->db_name, "template1") == 0)
-			starting_db = "postgres";
+			continue;
+
+		pg_log(PG_STATUS, "%s", old_db->db_name);
+		snprintf(sql_file_name, sizeof(sql_file_name), DB_DUMP_FILE_MASK, old_db->db_oid);
+		snprintf(log_file_name, sizeof(log_file_name), DB_DUMP_LOG_FILE_MASK, old_db->db_oid);
+
+		/*
+		 * template1 and postgres databases will already exist in the target
+		 * installation, so tell pg_restore to drop and recreate them;
+		 * otherwise we would fail to propagate their database-level
+		 * properties.
+		 */
+		if (strcmp(old_db->db_name, "postgres") == 0)
+			create_opts = "--clean --create";
 		else
-			starting_db = "template1";
+			create_opts = "--create";
 
 		parallel_exec_prog(log_file_name,
 						   NULL,
 						   "\"%s/pg_restore\" %s %s --exit-on-error --verbose "
-						   "--dbname %s \"%s\"",
+						   "--dbname template1 \"%s\"",
 						   new_cluster.bindir,
 						   cluster_conn_opts(&new_cluster),
 						   create_opts,
-						   starting_db,
 						   sql_file_name);
 	}