]> granicus.if.org Git - postgresql/commitdiff
pg_upgrade: check for clean server shutdowns
authorBruce Momjian <bruce@momjian.us>
Sat, 28 Jul 2018 19:01:55 +0000 (15:01 -0400)
committerBruce Momjian <bruce@momjian.us>
Sat, 28 Jul 2018 19:01:55 +0000 (15:01 -0400)
Previously pg_upgrade checked for the pid file and started/stopped the
server to force a clean shutdown.  However, "pg_ctl -m immediate"
removes the pid file but doesn't do a clean shutdown, so check
pg_controldata for a clean shutdown too.

Diagnosed-by: Vimalraj A
Discussion: https://postgr.es/m/CAFKBAK5e4Q-oTUuPPJ56EU_d2Rzodq6GWKS3ncAk3xo7hAsOZg@mail.gmail.com

Backpatch-through: 9.3

src/bin/pg_upgrade/controldata.c
src/bin/pg_upgrade/pg_upgrade.c

index ca3db1a2f692e59eb4181aa43bf0354c4594303a..b1ab7d550cc24bae360f3bbeaa3e84b5cb042df1 100644 (file)
@@ -58,6 +58,7 @@ get_control_data(ClusterInfo *cluster, bool live_check)
        bool            got_large_object = false;
        bool            got_date_is_int = false;
        bool            got_data_checksum_version = false;
+       bool            got_cluster_state = false;
        char       *lc_collate = NULL;
        char       *lc_ctype = NULL;
        char       *lc_monetary = NULL;
@@ -422,6 +423,64 @@ get_control_data(ClusterInfo *cluster, bool live_check)
 
        pclose(output);
 
+       /*
+        * Check for clean shutdown
+        */
+
+       /* only pg_controldata outputs the cluster state */
+       snprintf(cmd, sizeof(cmd), "\"%s/pg_controldata\" \"%s\"",
+                        cluster->bindir, cluster->pgdata);
+       fflush(stdout);
+       fflush(stderr);
+
+       if ((output = popen(cmd, "r")) == NULL)
+               pg_fatal("could not get control data using %s: %s\n",
+                                cmd, strerror(errno));
+
+       /* we have the result of cmd in "output". so parse it line by line now */
+       while (fgets(bufin, sizeof(bufin), output))
+       {
+               if ((!live_check || cluster == &new_cluster) &&
+                       (p = strstr(bufin, "Database cluster state:")) != NULL)
+               {
+                       p = strchr(p, ':');
+
+                       if (p == NULL || strlen(p) <= 1)
+                               pg_fatal("%d: database cluster state problem\n", __LINE__);
+
+                       p++;                            /* remove ':' char */
+
+                       /*
+                        * We checked earlier for a postmaster lock file, and if we found
+                        * one, we tried to start/stop the server to replay the WAL.  However,
+                        * pg_ctl -m immediate doesn't leave a lock file, but does require
+                        * WAL replay, so we check here that the server was shut down cleanly,
+                        * from the controldata perspective.
+                        */
+                       /* remove leading spaces */
+                       while (*p == ' ')
+                               p++;
+                       if (strcmp(p, "shut down\n") != 0)
+                       {
+                               if (cluster == &old_cluster)
+                                       pg_fatal("The source cluster was not shut down cleanly.\n");
+                               else
+                                       pg_fatal("The target cluster was not shut down cleanly.\n");
+                       }
+                       got_cluster_state = true;
+               }
+       }
+
+       pclose(output);
+
+       if (!got_cluster_state)
+       {
+               if (cluster == &old_cluster)
+                       pg_fatal("The source cluster lacks cluster state information:\n");
+               else
+                       pg_fatal("The target cluster lacks cluster state information:\n");
+       }
+
        /*
         * Restore environment variables
         */
index acd6376d0a466d919e124dcbcbf0d8b22dce35d4..872ece67ece7fcdf40ef2d7e9e08df2065563bb0 100644 (file)
@@ -209,7 +209,8 @@ setup(char *argv0, bool *live_check)
                 * start, assume the server is running.  If the pid file is left over
                 * from a server crash, this also allows any committed transactions
                 * stored in the WAL to be replayed so they are not lost, because WAL
-                * files are not transferred from old to new servers.
+                * files are not transferred from old to new servers.  We later check
+                * for a clean shutdown.
                 */
                if (start_postmaster(&old_cluster, false))
                        stop_postmaster(false);