From: Tom Lane Date: Sun, 25 Feb 2018 22:27:20 +0000 (-0500) Subject: Un-break parallel pg_upgrade. X-Git-Tag: REL_11_BETA1~718 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=5b570d771b80aadc98755208f8f1b81e9a5eb366;p=postgresql Un-break parallel pg_upgrade. Commit b3f840120 changed pg_upgrade so that it'd actually drop and re-create the template1 and postgres databases in the new cluster. That works fine, serially. With the -j option it's not so fine, because other per-database jobs might be launched while the template1 database is dropped. Since they attempt to connect there to start up, kaboom. This is the cause of the intermittent failures buildfarm member jacana has been showing for the last month; evidently it is the only BF member configured to run the pg_upgrade test with parallelism enabled. Fix by processing template1 separately before we get into the parallel sub-job launch loop. (We could alternatively have made the postgres DB be the special case, but it seems likely that template1 will contain less stuff and so we lose less parallelism with this choice.) --- diff --git a/src/bin/pg_upgrade/pg_upgrade.c b/src/bin/pg_upgrade/pg_upgrade.c index bbfa4c1ef3..d12412799f 100644 --- a/src/bin/pg_upgrade/pg_upgrade.c +++ b/src/bin/pg_upgrade/pg_upgrade.c @@ -302,13 +302,21 @@ create_new_objects(void) prep_status("Restoring database schemas in the new cluster\n"); + /* + * We cannot process the template1 database concurrently with others, + * because when it's transiently dropped, connection attempts would fail. + * So handle it in a separate non-parallelized pass. + */ for (dbnum = 0; dbnum < old_cluster.dbarr.ndbs; dbnum++) { char sql_file_name[MAXPGPATH], log_file_name[MAXPGPATH]; DbInfo *old_db = &old_cluster.dbarr.dbs[dbnum]; const char *create_opts; - const char *starting_db; + + /* Process only template1 in this pass */ + if (strcmp(old_db->db_name, "template1") != 0) + continue; pg_log(PG_STATUS, "%s", old_db->db_name); snprintf(sql_file_name, sizeof(sql_file_name), DB_DUMP_FILE_MASK, old_db->db_oid); @@ -320,26 +328,55 @@ create_new_objects(void) * otherwise we would fail to propagate their database-level * properties. */ - if (strcmp(old_db->db_name, "template1") == 0 || - strcmp(old_db->db_name, "postgres") == 0) - create_opts = "--clean --create"; - else - create_opts = "--create"; + create_opts = "--clean --create"; + + exec_prog(log_file_name, + NULL, + true, + true, + "\"%s/pg_restore\" %s %s --exit-on-error --verbose " + "--dbname postgres \"%s\"", + new_cluster.bindir, + cluster_conn_opts(&new_cluster), + create_opts, + sql_file_name); - /* When processing template1, we can't connect there to start with */ + break; /* done once we've processed template1 */ + } + + for (dbnum = 0; dbnum < old_cluster.dbarr.ndbs; dbnum++) + { + char sql_file_name[MAXPGPATH], + log_file_name[MAXPGPATH]; + DbInfo *old_db = &old_cluster.dbarr.dbs[dbnum]; + const char *create_opts; + + /* Skip template1 in this pass */ if (strcmp(old_db->db_name, "template1") == 0) - starting_db = "postgres"; + continue; + + pg_log(PG_STATUS, "%s", old_db->db_name); + snprintf(sql_file_name, sizeof(sql_file_name), DB_DUMP_FILE_MASK, old_db->db_oid); + snprintf(log_file_name, sizeof(log_file_name), DB_DUMP_LOG_FILE_MASK, old_db->db_oid); + + /* + * template1 and postgres databases will already exist in the target + * installation, so tell pg_restore to drop and recreate them; + * otherwise we would fail to propagate their database-level + * properties. + */ + if (strcmp(old_db->db_name, "postgres") == 0) + create_opts = "--clean --create"; else - starting_db = "template1"; + create_opts = "--create"; parallel_exec_prog(log_file_name, NULL, "\"%s/pg_restore\" %s %s --exit-on-error --verbose " - "--dbname %s \"%s\"", + "--dbname template1 \"%s\"", new_cluster.bindir, cluster_conn_opts(&new_cluster), create_opts, - starting_db, sql_file_name); }