]> granicus.if.org Git - postgresql/commitdiff
pg_ctl promote
authorRobert Haas <rhaas@postgresql.org>
Wed, 16 Feb 2011 02:28:48 +0000 (21:28 -0500)
committerRobert Haas <rhaas@postgresql.org>
Wed, 16 Feb 2011 02:30:23 +0000 (21:30 -0500)
Fujii Masao, reviewed by Robert Haas, Stephen Frost, and Magnus Hagander.

doc/src/sgml/high-availability.sgml
doc/src/sgml/recovery-config.sgml
doc/src/sgml/ref/pg_ctl-ref.sgml
src/backend/access/transam/xlog.c
src/backend/postmaster/postmaster.c
src/bin/pg_ctl/pg_ctl.c
src/include/access/xlog.h

index a89296905b498366e9183e3deffd182b18a1b779..368c68867af90a84cec54c5047320fc196db06ef 100644 (file)
@@ -615,8 +615,9 @@ protocol to make nodes agree on a serializable transactional order.
    </para>
 
    <para>
-    Standby mode is exited and the server switches to normal operation,
-    when a trigger file is found (<varname>trigger_file</>). Before failover,
+    Standby mode is exited and the server switches to normal operation
+    when <command>pg_ctl promote</> is run or a trigger file is found
+    (<varname>trigger_file</>). Before failover,
     any WAL immediately available in the archive or in <filename>pg_xlog</> will be
     restored, but no attempt is made to connect to the master.
    </para>
@@ -685,11 +686,7 @@ protocol to make nodes agree on a serializable transactional order.
     If you're setting up the standby server for high availability purposes,
     set up WAL archiving, connections and authentication like the primary
     server, because the standby server will work as a primary server after
-    failover. You will also need to set <varname>trigger_file</> to make
-    it possible to fail over.
-    If you're setting up the standby server for reporting
-    purposes, with no plans to fail over to it, <varname>trigger_file</>
-    is not required.
+    failover.
    </para>
 
    <para>
@@ -710,7 +707,6 @@ protocol to make nodes agree on a serializable transactional order.
 standby_mode = 'on'
 primary_conninfo = 'host=192.168.1.50 port=5432 user=foo password=foopass'
 restore_command = 'cp /path/to/archive/%f %p'
-trigger_file = '/path/to/trigger_file'
 archive_cleanup_command = 'pg_archivecleanup /path/to/archive %r'
 </programlisting>
    </para>
@@ -949,13 +945,15 @@ primary_conninfo = 'host=192.168.1.50 port=5432 user=foo password=foopass'
    </para>
 
    <para>
-    To trigger failover of a log-shipping standby server, create a trigger
+    To trigger failover of a log-shipping standby server,
+    run <command>pg_ctl promote</> or create a trigger
     file with the filename and path specified by the <varname>trigger_file</>
-    setting in <filename>recovery.conf</>. If <varname>trigger_file</> is
-    not given, there is no way to exit recovery in the standby and promote
-    it to a master. That can be useful for e.g reporting servers that are
+    setting in <filename>recovery.conf</>. If you're planning to use
+    <command>pg_ctl promote</> to fail over, <varname>trigger_file</> is
+    not required. If you're setting up the reporting servers that are
     only used to offload read-only queries from the primary, not for high
-    availability purposes.
+    availability purposes, you don't need to exit recovery in the standby
+    and promote it to a master.
    </para>
   </sect1>
 
index 0ccd65e3ee3dae3615eef77d44d4de5266e3ce65..602fbe2c76e82943cc0d767ad4ba4cec3ffdaf34 100644 (file)
@@ -343,8 +343,8 @@ restore_command = 'copy "C:\\server\\archivedir\\%f" "%p"'  # Windows
         <listitem>
          <para>
           Specifies a trigger file whose presence ends recovery in the
-          standby. If no trigger file is specified, the standby never exits
-          recovery.
+          standby.  Even if this value is not set, you can still promote
+          the standby using <command>pg_ctl promote</>.
           This setting has no effect if <varname>standby_mode</> is <literal>off</>.
          </para>
         </listitem>
index 28f415da24b00528ca422595732af8ceca547666..307f66b8da3cd00bab1d4a8e347ad29636ed2252 100644 (file)
@@ -75,6 +75,13 @@ PostgreSQL documentation
    <arg>-o <replaceable>options</replaceable></arg>
   </cmdsynopsis>
 
+  <cmdsynopsis>
+   <command>pg_ctl</command>
+   <arg choice="plain">promote</arg>
+   <arg>-s</arg>
+   <arg>-D <replaceable>datadir</replaceable></arg>
+  </cmdsynopsis>
+
   <cmdsynopsis>
    <command>pg_ctl</command>
    <arg choice="plain">reload</arg>
@@ -183,6 +190,12 @@ PostgreSQL documentation
    command-line options.
   </para>
 
+  <para>
+   In <option>promote</option> mode, the standby server that is
+   running in the specified data directory is commanded to exit
+   recovery and begin read-write operations.
+  </para>
+
   <para>
    <option>reload</option> mode simply sends the
    <command>postgres</command> process a <systemitem>SIGHUP</>
index 4dc8dc6e3911152277172b9193797f781ccfe691..6fdaaff91407e11e3fa808c64ce7e67aa743a8f6 100644 (file)
@@ -62,6 +62,7 @@
 /* File path names (all relative to $PGDATA) */
 #define RECOVERY_COMMAND_FILE  "recovery.conf"
 #define RECOVERY_COMMAND_DONE  "recovery.done"
+#define PROMOTE_SIGNAL_FILE    "promote"
 
 
 /* User-settable parameters */
@@ -565,6 +566,7 @@ typedef struct xl_restore_point
  */
 static volatile sig_atomic_t got_SIGHUP = false;
 static volatile sig_atomic_t shutdown_requested = false;
+static volatile sig_atomic_t promote_triggered = false;
 
 /*
  * Flag set when executing a restore command, to tell SIGTERM signal handler
@@ -9669,6 +9671,14 @@ StartupProcSigUsr1Handler(SIGNAL_ARGS)
        latch_sigusr1_handler();
 }
 
+/* SIGUSR2: set flag to finish recovery */
+static void
+StartupProcTriggerHandler(SIGNAL_ARGS)
+{
+       promote_triggered = true;
+       WakeupRecovery();
+}
+
 /* SIGHUP: set flag to re-read config file at next convenient time */
 static void
 StartupProcSigHupHandler(SIGNAL_ARGS)
@@ -9746,7 +9756,7 @@ StartupProcessMain(void)
                pqsignal(SIGALRM, SIG_IGN);
        pqsignal(SIGPIPE, SIG_IGN);
        pqsignal(SIGUSR1, StartupProcSigUsr1Handler);
-       pqsignal(SIGUSR2, SIG_IGN);
+       pqsignal(SIGUSR2, StartupProcTriggerHandler);
 
        /*
         * Reset some signals that are accepted by postmaster but not here
@@ -10192,9 +10202,9 @@ emode_for_corrupt_record(int emode, XLogRecPtr RecPtr)
 }
 
 /*
- * Check to see if the trigger file exists. If it does, request postmaster
- * to shut down walreceiver, wait for it to exit, remove the trigger
- * file, and return true.
+ * Check to see whether the user-specified trigger file exists and whether a
+ * promote request has arrived.  If either condition holds, request postmaster
+ * to shut down walreceiver, wait for it to exit, and return true.
  */
 static bool
 CheckForStandbyTrigger(void)
@@ -10205,6 +10215,16 @@ CheckForStandbyTrigger(void)
        if (triggered)
                return true;
 
+       if (promote_triggered)
+       {
+               ereport(LOG,
+                               (errmsg("received promote request")));
+               ShutdownWalRcv();
+               promote_triggered = false;
+               triggered = true;
+               return true;
+       }
+
        if (TriggerFile == NULL)
                return false;
 
@@ -10220,6 +10240,27 @@ CheckForStandbyTrigger(void)
        return false;
 }
 
+/*
+ * Check to see if a promote request has arrived. Should be
+ * called by postmaster after receiving SIGUSR1.
+ */
+bool
+CheckPromoteSignal(void)
+{
+       struct stat stat_buf;
+
+       if (stat(PROMOTE_SIGNAL_FILE, &stat_buf) == 0)
+       {
+               /*
+                * Since we are in a signal handler, it's not safe
+                * to elog. We silently ignore any error from unlink.
+                */
+               unlink(PROMOTE_SIGNAL_FILE);
+               return true;
+       }
+       return false;
+}
+
 /*
  * Wake up startup process to replay newly arrived WAL, or to notice that
  * failover has been requested.
index 8f77d1bfc9754433b971cafcbd6c628cd3f4b347..997af5bf07243e8239cc389dee5980c72de0f3ae 100644 (file)
@@ -4284,6 +4284,14 @@ sigusr1_handler(SIGNAL_ARGS)
                WalReceiverPID = StartWalReceiver();
        }
 
+       if (CheckPromoteSignal() && StartupPID != 0 &&
+               (pmState == PM_STARTUP || pmState == PM_RECOVERY ||
+                pmState == PM_HOT_STANDBY || pmState == PM_WAIT_READONLY))
+       {
+               /* Tell startup process to finish recovery */
+               signal_child(StartupPID, SIGUSR2);
+       }
+
        PG_SETMASK(&UnBlockSig);
 
        errno = save_errno;
index 6c87f158f3f3e8215620cf17455cbdf77dc93382..2fab5c98fdc3a67ca056c62b7b174182f770d40b 100644 (file)
@@ -62,6 +62,7 @@ typedef enum
        START_COMMAND,
        STOP_COMMAND,
        RESTART_COMMAND,
+       PROMOTE_COMMAND,
        RELOAD_COMMAND,
        STATUS_COMMAND,
        KILL_COMMAND,
@@ -96,6 +97,7 @@ static char postopts_file[MAXPGPATH];
 static char pid_file[MAXPGPATH];
 static char backup_file[MAXPGPATH];
 static char recovery_file[MAXPGPATH];
+static char promote_file[MAXPGPATH];
 
 #if defined(WIN32) || defined(__CYGWIN__)
 static DWORD pgctl_start_type = SERVICE_AUTO_START;
@@ -124,6 +126,7 @@ static void do_init(void);
 static void do_start(void);
 static void do_stop(void);
 static void do_restart(void);
+static void do_promote(void);
 static void do_reload(void);
 static void do_status(void);
 static void do_kill(pgpid_t pid);
@@ -872,7 +875,7 @@ do_stop(void)
 
 
 /*
- *     restart/reload routines
+ *     restart/promote/reload routines
  */
 
 static void
@@ -965,6 +968,66 @@ do_restart(void)
        do_start();
 }
 
+static void
+do_promote(void)
+{
+       FILE       *prmfile;
+       pgpid_t         pid;
+       struct stat statbuf;
+
+       pid = get_pgpid();
+
+       if (pid == 0)                           /* no pid file */
+       {
+               write_stderr(_("%s: PID file \"%s\" does not exist\n"), progname, pid_file);
+               write_stderr(_("Is server running?\n"));
+               exit(1);
+       }
+       else if (pid < 0)                       /* standalone backend, not postmaster */
+       {
+               pid = -pid;
+               write_stderr(_("%s: cannot promote server; "
+                                          "single-user server is running (PID: %ld)\n"),
+                                        progname, pid);
+               exit(1);
+       }
+
+       /* If recovery.conf doesn't exist, the server is not in standby mode */
+       if (stat(recovery_file, &statbuf) != 0)
+       {
+               write_stderr(_("%s: cannot promote server; "
+                                          "server is not in standby mode\n"),
+                                        progname);
+               exit(1);
+       }
+
+       if ((prmfile = fopen(promote_file, "w")) == NULL)
+       {
+               write_stderr(_("%s: could not create promote signal file \"%s\": %s\n"),
+                                        progname, promote_file, strerror(errno));
+               exit(1);
+       }
+       if (fclose(prmfile))
+       {
+               write_stderr(_("%s: could not write promote signal file \"%s\": %s\n"),
+                                        progname, promote_file, strerror(errno));
+               exit(1);
+       }
+
+       sig = SIGUSR1;
+       if (kill((pid_t) pid, sig) != 0)
+       {
+               write_stderr(_("%s: could not send promote signal (PID: %ld): %s\n"),
+                                        progname, pid, strerror(errno));
+               if (unlink(promote_file) != 0)
+                       write_stderr(_("%s: could not remove promote signal file \"%s\": %s\n"),
+                                                progname, promote_file, strerror(errno));
+               exit(1);
+       }
+
+       print_msg(_("server promoting\n"));
+}
+
 
 static void
 do_reload(void)
@@ -1617,7 +1680,7 @@ do_advice(void)
 static void
 do_help(void)
 {
-       printf(_("%s is a utility to start, stop, restart, reload configuration files,\n"
+       printf(_("%s is a utility to start, stop, restart, promote, reload configuration files,\n"
                         "report the status of a PostgreSQL server, or signal a PostgreSQL process.\n\n"), progname);
        printf(_("Usage:\n"));
        printf(_("  %s init[db]               [-D DATADIR] [-s] [-o \"OPTIONS\"]\n"), progname);
@@ -1625,6 +1688,7 @@ do_help(void)
        printf(_("  %s stop    [-W] [-t SECS] [-D DATADIR] [-s] [-m SHUTDOWN-MODE]\n"), progname);
        printf(_("  %s restart [-w] [-t SECS] [-D DATADIR] [-s] [-m SHUTDOWN-MODE]\n"
                         "                 [-o \"OPTIONS\"]\n"), progname);
+       printf(_("  %s promote [-D DATADIR] [-s]\n"), progname);
        printf(_("  %s reload  [-D DATADIR] [-s]\n"), progname);
        printf(_("  %s status  [-D DATADIR]\n"), progname);
        printf(_("  %s kill    SIGNALNAME PID\n"), progname);
@@ -1950,6 +2014,8 @@ main(int argc, char **argv)
                                ctl_command = STOP_COMMAND;
                        else if (strcmp(argv[optind], "restart") == 0)
                                ctl_command = RESTART_COMMAND;
+                       else if (strcmp(argv[optind], "promote") == 0)
+                               ctl_command = PROMOTE_COMMAND;
                        else if (strcmp(argv[optind], "reload") == 0)
                                ctl_command = RELOAD_COMMAND;
                        else if (strcmp(argv[optind], "status") == 0)
@@ -2036,6 +2102,7 @@ main(int argc, char **argv)
                snprintf(pid_file, MAXPGPATH, "%s/postmaster.pid", pg_data);
                snprintf(backup_file, MAXPGPATH, "%s/backup_label", pg_data);
                snprintf(recovery_file, MAXPGPATH, "%s/recovery.conf", pg_data);
+               snprintf(promote_file, MAXPGPATH, "%s/promote", pg_data);
        }
 
        switch (ctl_command)
@@ -2055,6 +2122,9 @@ main(int argc, char **argv)
                case RESTART_COMMAND:
                        do_restart();
                        break;
+               case PROMOTE_COMMAND:
+                       do_promote();
+                       break;
                case RELOAD_COMMAND:
                        do_reload();
                        break;
index 1803d5ab2010962ea66411e62c74b694625dd327..7cd07a25d222e1f9c261559897f889e68822ec36 100644 (file)
@@ -313,6 +313,7 @@ extern TimeLineID GetRecoveryTargetTLI(void);
 
 extern void HandleStartupProcInterrupts(void);
 extern void StartupProcessMain(void);
+extern bool CheckPromoteSignal(void);
 extern void WakeupRecovery(void);
 
 /*