]> granicus.if.org Git - postgresql/blob - src/backend/postmaster/postmaster.c
Support host names in pg_hba.conf
[postgresql] / src / backend / postmaster / postmaster.c
1 /*-------------------------------------------------------------------------
2  *
3  * postmaster.c
4  *        This program acts as a clearing house for requests to the
5  *        POSTGRES system.      Frontend programs send a startup message
6  *        to the Postmaster and the postmaster uses the info in the
7  *        message to setup a backend process.
8  *
9  *        The postmaster also manages system-wide operations such as
10  *        startup and shutdown. The postmaster itself doesn't do those
11  *        operations, mind you --- it just forks off a subprocess to do them
12  *        at the right times.  It also takes care of resetting the system
13  *        if a backend crashes.
14  *
15  *        The postmaster process creates the shared memory and semaphore
16  *        pools during startup, but as a rule does not touch them itself.
17  *        In particular, it is not a member of the PGPROC array of backends
18  *        and so it cannot participate in lock-manager operations.      Keeping
19  *        the postmaster away from shared memory operations makes it simpler
20  *        and more reliable.  The postmaster is almost always able to recover
21  *        from crashes of individual backends by resetting shared memory;
22  *        if it did much with shared memory then it would be prone to crashing
23  *        along with the backends.
24  *
25  *        When a request message is received, we now fork() immediately.
26  *        The child process performs authentication of the request, and
27  *        then becomes a backend if successful.  This allows the auth code
28  *        to be written in a simple single-threaded style (as opposed to the
29  *        crufty "poor man's multitasking" code that used to be needed).
30  *        More importantly, it ensures that blockages in non-multithreaded
31  *        libraries like SSL or PAM cannot cause denial of service to other
32  *        clients.
33  *
34  *
35  * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
36  * Portions Copyright (c) 1994, Regents of the University of California
37  *
38  *
39  * IDENTIFICATION
40  *        src/backend/postmaster/postmaster.c
41  *
42  * NOTES
43  *
44  * Initialization:
45  *              The Postmaster sets up shared memory data structures
46  *              for the backends.
47  *
48  * Synchronization:
49  *              The Postmaster shares memory with the backends but should avoid
50  *              touching shared memory, so as not to become stuck if a crashing
51  *              backend screws up locks or shared memory.  Likewise, the Postmaster
52  *              should never block on messages from frontend clients.
53  *
54  * Garbage Collection:
55  *              The Postmaster cleans up after backends if they have an emergency
56  *              exit and/or core dump.
57  *
58  * Error Reporting:
59  *              Use write_stderr() only for reporting "interactive" errors
60  *              (essentially, bogus arguments on the command line).  Once the
61  *              postmaster is launched, use ereport().  In particular, don't use
62  *              write_stderr() for anything that occurs after pmdaemonize.
63  *
64  *-------------------------------------------------------------------------
65  */
66
67 #include "postgres.h"
68
69 #include <unistd.h>
70 #include <signal.h>
71 #include <time.h>
72 #include <sys/wait.h>
73 #include <ctype.h>
74 #include <sys/stat.h>
75 #include <sys/socket.h>
76 #include <fcntl.h>
77 #include <sys/param.h>
78 #include <netinet/in.h>
79 #include <arpa/inet.h>
80 #include <netdb.h>
81 #include <limits.h>
82
83 #ifdef HAVE_SYS_SELECT_H
84 #include <sys/select.h>
85 #endif
86
87 #ifdef HAVE_GETOPT_H
88 #include <getopt.h>
89 #endif
90
91 #ifdef USE_BONJOUR
92 #include <dns_sd.h>
93 #endif
94
95 #include "access/transam.h"
96 #include "access/xlog.h"
97 #include "bootstrap/bootstrap.h"
98 #include "catalog/pg_control.h"
99 #include "lib/dllist.h"
100 #include "libpq/auth.h"
101 #include "libpq/ip.h"
102 #include "libpq/libpq.h"
103 #include "libpq/pqsignal.h"
104 #include "miscadmin.h"
105 #include "pgstat.h"
106 #include "postmaster/autovacuum.h"
107 #include "postmaster/fork_process.h"
108 #include "postmaster/pgarch.h"
109 #include "postmaster/postmaster.h"
110 #include "postmaster/syslogger.h"
111 #include "replication/walsender.h"
112 #include "storage/fd.h"
113 #include "storage/ipc.h"
114 #include "storage/pg_shmem.h"
115 #include "storage/pmsignal.h"
116 #include "storage/proc.h"
117 #include "tcop/tcopprot.h"
118 #include "utils/builtins.h"
119 #include "utils/datetime.h"
120 #include "utils/memutils.h"
121 #include "utils/ps_status.h"
122
123 #ifdef EXEC_BACKEND
124 #include "storage/spin.h"
125 #endif
126
127
128 /*
129  * List of active backends (or child processes anyway; we don't actually
130  * know whether a given child has become a backend or is still in the
131  * authorization phase).  This is used mainly to keep track of how many
132  * children we have and send them appropriate signals when necessary.
133  *
134  * "Special" children such as the startup, bgwriter and autovacuum launcher
135  * tasks are not in this list.  Autovacuum worker and walsender processes are
136  * in it. Also, "dead_end" children are in it: these are children launched just
137  * for the purpose of sending a friendly rejection message to a would-be
138  * client.      We must track them because they are attached to shared memory,
139  * but we know they will never become live backends.  dead_end children are
140  * not assigned a PMChildSlot.
141  */
142 typedef struct bkend
143 {
144         pid_t           pid;                    /* process id of backend */
145         long            cancel_key;             /* cancel key for cancels for this backend */
146         int                     child_slot;             /* PMChildSlot for this backend, if any */
147         bool            is_autovacuum;  /* is it an autovacuum process? */
148         bool            dead_end;               /* is it going to send an error and quit? */
149         Dlelem          elem;                   /* list link in BackendList */
150 } Backend;
151
152 static Dllist *BackendList;
153
154 #ifdef EXEC_BACKEND
155 static Backend *ShmemBackendArray;
156 #endif
157
158 /* The socket number we are listening for connections on */
159 int                     PostPortNumber;
160 char       *UnixSocketDir;
161 char       *ListenAddresses;
162
163 /*
164  * ReservedBackends is the number of backends reserved for superuser use.
165  * This number is taken out of the pool size given by MaxBackends so
166  * number of backend slots available to non-superusers is
167  * (MaxBackends - ReservedBackends).  Note what this really means is
168  * "if there are <= ReservedBackends connections available, only superusers
169  * can make new connections" --- pre-existing superuser connections don't
170  * count against the limit.
171  */
172 int                     ReservedBackends;
173
174 /* The socket(s) we're listening to. */
175 #define MAXLISTEN       64
176 static pgsocket ListenSocket[MAXLISTEN];
177
178 /*
179  * Set by the -o option
180  */
181 static char ExtraOptions[MAXPGPATH];
182
183 /*
184  * These globals control the behavior of the postmaster in case some
185  * backend dumps core.  Normally, it kills all peers of the dead backend
186  * and reinitializes shared memory.  By specifying -s or -n, we can have
187  * the postmaster stop (rather than kill) peers and not reinitialize
188  * shared data structures.      (Reinit is currently dead code, though.)
189  */
190 static bool Reinit = true;
191 static int      SendStop = false;
192
193 /* still more option variables */
194 bool            EnableSSL = false;
195 bool            SilentMode = false; /* silent_mode */
196
197 int                     PreAuthDelay = 0;
198 int                     AuthenticationTimeout = 60;
199
200 bool            log_hostname;           /* for ps display and logging */
201 bool            Log_connections = false;
202 bool            Db_user_namespace = false;
203
204 bool            enable_bonjour = false;
205 char       *bonjour_name;
206 bool            restart_after_crash = true;
207
208 /* PIDs of special child processes; 0 when not running */
209 static pid_t StartupPID = 0,
210                         BgWriterPID = 0,
211                         WalWriterPID = 0,
212                         WalReceiverPID = 0,
213                         AutoVacPID = 0,
214                         PgArchPID = 0,
215                         PgStatPID = 0,
216                         SysLoggerPID = 0;
217
218 /* Startup/shutdown state */
219 #define                 NoShutdown              0
220 #define                 SmartShutdown   1
221 #define                 FastShutdown    2
222
223 static int      Shutdown = NoShutdown;
224
225 static bool FatalError = false; /* T if recovering from backend crash */
226 static bool RecoveryError = false;              /* T if WAL recovery failed */
227
228 /*
229  * We use a simple state machine to control startup, shutdown, and
230  * crash recovery (which is rather like shutdown followed by startup).
231  *
232  * After doing all the postmaster initialization work, we enter PM_STARTUP
233  * state and the startup process is launched. The startup process begins by
234  * reading the control file and other preliminary initialization steps.
235  * In a normal startup, or after crash recovery, the startup process exits
236  * with exit code 0 and we switch to PM_RUN state.      However, archive recovery
237  * is handled specially since it takes much longer and we would like to support
238  * hot standby during archive recovery.
239  *
240  * When the startup process is ready to start archive recovery, it signals the
241  * postmaster, and we switch to PM_RECOVERY state. The background writer is
242  * launched, while the startup process continues applying WAL.  If Hot Standby
243  * is enabled, then, after reaching a consistent point in WAL redo, startup
244  * process signals us again, and we switch to PM_HOT_STANDBY state and
245  * begin accepting connections to perform read-only queries.  When archive
246  * recovery is finished, the startup process exits with exit code 0 and we
247  * switch to PM_RUN state.
248  *
249  * Normal child backends can only be launched when we are in PM_RUN or
250  * PM_HOT_STANDBY state.  (We also allow launch of normal
251  * child backends in PM_WAIT_BACKUP state, but only for superusers.)
252  * In other states we handle connection requests by launching "dead_end"
253  * child processes, which will simply send the client an error message and
254  * quit.  (We track these in the BackendList so that we can know when they
255  * are all gone; this is important because they're still connected to shared
256  * memory, and would interfere with an attempt to destroy the shmem segment,
257  * possibly leading to SHMALL failure when we try to make a new one.)
258  * In PM_WAIT_DEAD_END state we are waiting for all the dead_end children
259  * to drain out of the system, and therefore stop accepting connection
260  * requests at all until the last existing child has quit (which hopefully
261  * will not be very long).
262  *
263  * Notice that this state variable does not distinguish *why* we entered
264  * states later than PM_RUN --- Shutdown and FatalError must be consulted
265  * to find that out.  FatalError is never true in PM_RECOVERY_* or PM_RUN
266  * states, nor in PM_SHUTDOWN states (because we don't enter those states
267  * when trying to recover from a crash).  It can be true in PM_STARTUP state,
268  * because we don't clear it until we've successfully started WAL redo.
269  * Similarly, RecoveryError means that we have crashed during recovery, and
270  * should not try to restart.
271  */
272 typedef enum
273 {
274         PM_INIT,                                        /* postmaster starting */
275         PM_STARTUP,                                     /* waiting for startup subprocess */
276         PM_RECOVERY,                            /* in archive recovery mode */
277         PM_HOT_STANDBY,                         /* in hot standby mode */
278         PM_RUN,                                         /* normal "database is alive" state */
279         PM_WAIT_BACKUP,                         /* waiting for online backup mode to end */
280         PM_WAIT_READONLY,                       /* waiting for read only backends to exit */
281         PM_WAIT_BACKENDS,                       /* waiting for live backends to exit */
282         PM_SHUTDOWN,                            /* waiting for bgwriter to do shutdown ckpt */
283         PM_SHUTDOWN_2,                          /* waiting for archiver and walsenders to
284                                                                  * finish */
285         PM_WAIT_DEAD_END,                       /* waiting for dead_end children to exit */
286         PM_NO_CHILDREN                          /* all important children have exited */
287 } PMState;
288
289 static PMState pmState = PM_INIT;
290
291 static bool ReachedNormalRunning = false;               /* T if we've reached PM_RUN */
292
293 bool            ClientAuthInProgress = false;           /* T during new-client
294                                                                                                  * authentication */
295
296 bool            redirection_done = false;       /* stderr redirected for syslogger? */
297
298 /* received START_AUTOVAC_LAUNCHER signal */
299 static volatile sig_atomic_t start_autovac_launcher = false;
300
301 /* the launcher needs to be signalled to communicate some condition */
302 static volatile bool avlauncher_needs_signal = false;
303
304 /*
305  * State for assigning random salts and cancel keys.
306  * Also, the global MyCancelKey passes the cancel key assigned to a given
307  * backend from the postmaster to that backend (via fork).
308  */
309 static unsigned int random_seed = 0;
310 static struct timeval random_start_time;
311
312 extern char *optarg;
313 extern int      optind,
314                         opterr;
315
316 #ifdef HAVE_INT_OPTRESET
317 extern int      optreset;                       /* might not be declared by system headers */
318 #endif
319
320 #ifdef USE_BONJOUR
321 static DNSServiceRef bonjour_sdref = NULL;
322 #endif
323
324 /*
325  * postmaster.c - function prototypes
326  */
327 static void getInstallationPaths(const char *argv0);
328 static void checkDataDir(void);
329 static void pmdaemonize(void);
330 static Port *ConnCreate(int serverFd);
331 static void ConnFree(Port *port);
332 static void reset_shared(int port);
333 static void SIGHUP_handler(SIGNAL_ARGS);
334 static void pmdie(SIGNAL_ARGS);
335 static void reaper(SIGNAL_ARGS);
336 static void sigusr1_handler(SIGNAL_ARGS);
337 static void startup_die(SIGNAL_ARGS);
338 static void dummy_handler(SIGNAL_ARGS);
339 static void CleanupBackend(int pid, int exitstatus);
340 static void HandleChildCrash(int pid, int exitstatus, const char *procname);
341 static void LogChildExit(int lev, const char *procname,
342                          int pid, int exitstatus);
343 static void PostmasterStateMachine(void);
344 static void BackendInitialize(Port *port);
345 static int      BackendRun(Port *port);
346 static void ExitPostmaster(int status);
347 static int      ServerLoop(void);
348 static int      BackendStartup(Port *port);
349 static int      ProcessStartupPacket(Port *port, bool SSLdone);
350 static void processCancelRequest(Port *port, void *pkt);
351 static int      initMasks(fd_set *rmask);
352 static void report_fork_failure_to_client(Port *port, int errnum);
353 static enum CAC_state canAcceptConnections(void);
354 static long PostmasterRandom(void);
355 static void RandomSalt(char *md5Salt);
356 static void signal_child(pid_t pid, int signal);
357 static bool SignalSomeChildren(int signal, int targets);
358
359 #define SignalChildren(sig)                        SignalSomeChildren(sig, BACKEND_TYPE_ALL)
360 #define SignalAutovacWorkers(sig)  SignalSomeChildren(sig, BACKEND_TYPE_AUTOVAC)
361
362 /*
363  * Possible types of a backend. These are OR-able request flag bits
364  * for SignalSomeChildren() and CountChildren().
365  */
366 #define BACKEND_TYPE_NORMAL             0x0001  /* normal backend */
367 #define BACKEND_TYPE_AUTOVAC    0x0002  /* autovacuum worker process */
368 #define BACKEND_TYPE_WALSND             0x0004  /* walsender process */
369 #define BACKEND_TYPE_ALL                0x0007  /* OR of all the above */
370
371 static int      CountChildren(int target);
372 static bool CreateOptsFile(int argc, char *argv[], char *fullprogname);
373 static pid_t StartChildProcess(AuxProcType type);
374 static void StartAutovacuumWorker(void);
375
376 #ifdef EXEC_BACKEND
377
378 #ifdef WIN32
379 static pid_t win32_waitpid(int *exitstatus);
380 static void WINAPI pgwin32_deadchild_callback(PVOID lpParameter, BOOLEAN TimerOrWaitFired);
381
382 static HANDLE win32ChildQueue;
383
384 typedef struct
385 {
386         HANDLE          waitHandle;
387         HANDLE          procHandle;
388         DWORD           procId;
389 }       win32_deadchild_waitinfo;
390
391 HANDLE          PostmasterHandle;
392 #endif
393
394 static pid_t backend_forkexec(Port *port);
395 static pid_t internal_forkexec(int argc, char *argv[], Port *port);
396
397 /* Type for a socket that can be inherited to a client process */
398 #ifdef WIN32
399 typedef struct
400 {
401         SOCKET          origsocket;             /* Original socket value, or PGINVALID_SOCKET
402                                                                  * if not a socket */
403         WSAPROTOCOL_INFO wsainfo;
404 }       InheritableSocket;
405 #else
406 typedef int InheritableSocket;
407 #endif
408
409 typedef struct LWLock LWLock;   /* ugly kluge */
410
411 /*
412  * Structure contains all variables passed to exec:ed backends
413  */
414 typedef struct
415 {
416         Port            port;
417         InheritableSocket portsocket;
418         char            DataDir[MAXPGPATH];
419         pgsocket        ListenSocket[MAXLISTEN];
420         long            MyCancelKey;
421         int                     MyPMChildSlot;
422 #ifndef WIN32
423         unsigned long UsedShmemSegID;
424 #else
425         HANDLE          UsedShmemSegID;
426 #endif
427         void       *UsedShmemSegAddr;
428         slock_t    *ShmemLock;
429         VariableCache ShmemVariableCache;
430         Backend    *ShmemBackendArray;
431         LWLock     *LWLockArray;
432         slock_t    *ProcStructLock;
433         PROC_HDR   *ProcGlobal;
434         PGPROC     *AuxiliaryProcs;
435         PMSignalData *PMSignalState;
436         InheritableSocket pgStatSock;
437         pid_t           PostmasterPid;
438         TimestampTz PgStartTime;
439         TimestampTz PgReloadTime;
440         bool            redirection_done;
441 #ifdef WIN32
442         HANDLE          PostmasterHandle;
443         HANDLE          initial_signal_pipe;
444         HANDLE          syslogPipe[2];
445 #else
446         int                     syslogPipe[2];
447 #endif
448         char            my_exec_path[MAXPGPATH];
449         char            pkglib_path[MAXPGPATH];
450         char            ExtraOptions[MAXPGPATH];
451 }       BackendParameters;
452
453 static void read_backend_variables(char *id, Port *port);
454 static void restore_backend_variables(BackendParameters * param, Port *port);
455
456 #ifndef WIN32
457 static bool save_backend_variables(BackendParameters * param, Port *port);
458 #else
459 static bool save_backend_variables(BackendParameters * param, Port *port,
460                                            HANDLE childProcess, pid_t childPid);
461 #endif
462
463 static void ShmemBackendArrayAdd(Backend *bn);
464 static void ShmemBackendArrayRemove(Backend *bn);
465 #endif   /* EXEC_BACKEND */
466
467 #define StartupDataBase()               StartChildProcess(StartupProcess)
468 #define StartBackgroundWriter() StartChildProcess(BgWriterProcess)
469 #define StartWalWriter()                StartChildProcess(WalWriterProcess)
470 #define StartWalReceiver()              StartChildProcess(WalReceiverProcess)
471
472 /* Macros to check exit status of a child process */
473 #define EXIT_STATUS_0(st)  ((st) == 0)
474 #define EXIT_STATUS_1(st)  (WIFEXITED(st) && WEXITSTATUS(st) == 1)
475
476
477 /*
478  * Postmaster main entry point
479  */
480 int
481 PostmasterMain(int argc, char *argv[])
482 {
483         int                     opt;
484         int                     status;
485         char       *userDoption = NULL;
486         int                     i;
487
488         MyProcPid = PostmasterPid = getpid();
489
490         MyStartTime = time(NULL);
491
492         IsPostmasterEnvironment = true;
493
494         /*
495          * for security, no dir or file created can be group or other accessible
496          */
497         umask((mode_t) 0077);
498
499         /*
500          * Fire up essential subsystems: memory management
501          */
502         MemoryContextInit();
503
504         /*
505          * By default, palloc() requests in the postmaster will be allocated in
506          * the PostmasterContext, which is space that can be recycled by backends.
507          * Allocated data that needs to be available to backends should be
508          * allocated in TopMemoryContext.
509          */
510         PostmasterContext = AllocSetContextCreate(TopMemoryContext,
511                                                                                           "Postmaster",
512                                                                                           ALLOCSET_DEFAULT_MINSIZE,
513                                                                                           ALLOCSET_DEFAULT_INITSIZE,
514                                                                                           ALLOCSET_DEFAULT_MAXSIZE);
515         MemoryContextSwitchTo(PostmasterContext);
516
517         /* Initialize paths to installation files */
518         getInstallationPaths(argv[0]);
519
520         /*
521          * Options setup
522          */
523         InitializeGUCOptions();
524
525         opterr = 1;
526
527         /*
528          * Parse command-line options.  CAUTION: keep this in sync with
529          * tcop/postgres.c (the option sets should not conflict) and with the
530          * common help() function in main/main.c.
531          */
532         while ((opt = getopt(argc, argv, "A:B:c:D:d:EeFf:h:ijk:lN:nOo:Pp:r:S:sTt:W:-:")) != -1)
533         {
534                 switch (opt)
535                 {
536                         case 'A':
537                                 SetConfigOption("debug_assertions", optarg, PGC_POSTMASTER, PGC_S_ARGV);
538                                 break;
539
540                         case 'B':
541                                 SetConfigOption("shared_buffers", optarg, PGC_POSTMASTER, PGC_S_ARGV);
542                                 break;
543
544                         case 'D':
545                                 userDoption = optarg;
546                                 break;
547
548                         case 'd':
549                                 set_debug_options(atoi(optarg), PGC_POSTMASTER, PGC_S_ARGV);
550                                 break;
551
552                         case 'E':
553                                 SetConfigOption("log_statement", "all", PGC_POSTMASTER, PGC_S_ARGV);
554                                 break;
555
556                         case 'e':
557                                 SetConfigOption("datestyle", "euro", PGC_POSTMASTER, PGC_S_ARGV);
558                                 break;
559
560                         case 'F':
561                                 SetConfigOption("fsync", "false", PGC_POSTMASTER, PGC_S_ARGV);
562                                 break;
563
564                         case 'f':
565                                 if (!set_plan_disabling_options(optarg, PGC_POSTMASTER, PGC_S_ARGV))
566                                 {
567                                         write_stderr("%s: invalid argument for option -f: \"%s\"\n",
568                                                                  progname, optarg);
569                                         ExitPostmaster(1);
570                                 }
571                                 break;
572
573                         case 'h':
574                                 SetConfigOption("listen_addresses", optarg, PGC_POSTMASTER, PGC_S_ARGV);
575                                 break;
576
577                         case 'i':
578                                 SetConfigOption("listen_addresses", "*", PGC_POSTMASTER, PGC_S_ARGV);
579                                 break;
580
581                         case 'j':
582                                 /* only used by interactive backend */
583                                 break;
584
585                         case 'k':
586                                 SetConfigOption("unix_socket_directory", optarg, PGC_POSTMASTER, PGC_S_ARGV);
587                                 break;
588
589                         case 'l':
590                                 SetConfigOption("ssl", "true", PGC_POSTMASTER, PGC_S_ARGV);
591                                 break;
592
593                         case 'N':
594                                 SetConfigOption("max_connections", optarg, PGC_POSTMASTER, PGC_S_ARGV);
595                                 break;
596
597                         case 'n':
598                                 /* Don't reinit shared mem after abnormal exit */
599                                 Reinit = false;
600                                 break;
601
602                         case 'O':
603                                 SetConfigOption("allow_system_table_mods", "true", PGC_POSTMASTER, PGC_S_ARGV);
604                                 break;
605
606                         case 'o':
607                                 /* Other options to pass to the backend on the command line */
608                                 snprintf(ExtraOptions + strlen(ExtraOptions),
609                                                  sizeof(ExtraOptions) - strlen(ExtraOptions),
610                                                  " %s", optarg);
611                                 break;
612
613                         case 'P':
614                                 SetConfigOption("ignore_system_indexes", "true", PGC_POSTMASTER, PGC_S_ARGV);
615                                 break;
616
617                         case 'p':
618                                 SetConfigOption("port", optarg, PGC_POSTMASTER, PGC_S_ARGV);
619                                 break;
620
621                         case 'r':
622                                 /* only used by single-user backend */
623                                 break;
624
625                         case 'S':
626                                 SetConfigOption("work_mem", optarg, PGC_POSTMASTER, PGC_S_ARGV);
627                                 break;
628
629                         case 's':
630                                 SetConfigOption("log_statement_stats", "true", PGC_POSTMASTER, PGC_S_ARGV);
631                                 break;
632
633                         case 'T':
634
635                                 /*
636                                  * In the event that some backend dumps core, send SIGSTOP,
637                                  * rather than SIGQUIT, to all its peers.  This lets the wily
638                                  * post_hacker collect core dumps from everyone.
639                                  */
640                                 SendStop = true;
641                                 break;
642
643                         case 't':
644                                 {
645                                         const char *tmp = get_stats_option_name(optarg);
646
647                                         if (tmp)
648                                         {
649                                                 SetConfigOption(tmp, "true", PGC_POSTMASTER, PGC_S_ARGV);
650                                         }
651                                         else
652                                         {
653                                                 write_stderr("%s: invalid argument for option -t: \"%s\"\n",
654                                                                          progname, optarg);
655                                                 ExitPostmaster(1);
656                                         }
657                                         break;
658                                 }
659
660                         case 'W':
661                                 SetConfigOption("post_auth_delay", optarg, PGC_POSTMASTER, PGC_S_ARGV);
662                                 break;
663
664                         case 'c':
665                         case '-':
666                                 {
667                                         char       *name,
668                                                            *value;
669
670                                         ParseLongOption(optarg, &name, &value);
671                                         if (!value)
672                                         {
673                                                 if (opt == '-')
674                                                         ereport(ERROR,
675                                                                         (errcode(ERRCODE_SYNTAX_ERROR),
676                                                                          errmsg("--%s requires a value",
677                                                                                         optarg)));
678                                                 else
679                                                         ereport(ERROR,
680                                                                         (errcode(ERRCODE_SYNTAX_ERROR),
681                                                                          errmsg("-c %s requires a value",
682                                                                                         optarg)));
683                                         }
684
685                                         SetConfigOption(name, value, PGC_POSTMASTER, PGC_S_ARGV);
686                                         free(name);
687                                         if (value)
688                                                 free(value);
689                                         break;
690                                 }
691
692                         default:
693                                 write_stderr("Try \"%s --help\" for more information.\n",
694                                                          progname);
695                                 ExitPostmaster(1);
696                 }
697         }
698
699         /*
700          * Postmaster accepts no non-option switch arguments.
701          */
702         if (optind < argc)
703         {
704                 write_stderr("%s: invalid argument: \"%s\"\n",
705                                          progname, argv[optind]);
706                 write_stderr("Try \"%s --help\" for more information.\n",
707                                          progname);
708                 ExitPostmaster(1);
709         }
710
711         /*
712          * Locate the proper configuration files and data directory, and read
713          * postgresql.conf for the first time.
714          */
715         if (!SelectConfigFiles(userDoption, progname))
716                 ExitPostmaster(2);
717
718         /* Verify that DataDir looks reasonable */
719         checkDataDir();
720
721         /* And switch working directory into it */
722         ChangeToDataDir();
723
724         /*
725          * Check for invalid combinations of GUC settings.
726          */
727         if (ReservedBackends >= MaxBackends)
728         {
729                 write_stderr("%s: superuser_reserved_connections must be less than max_connections\n", progname);
730                 ExitPostmaster(1);
731         }
732         if (XLogArchiveMode && wal_level == WAL_LEVEL_MINIMAL)
733                 ereport(ERROR,
734                                 (errmsg("WAL archival (archive_mode=on) requires wal_level \"archive\" or \"hot_standby\"")));
735         if (max_wal_senders > 0 && wal_level == WAL_LEVEL_MINIMAL)
736                 ereport(ERROR,
737                                 (errmsg("WAL streaming (max_wal_senders > 0) requires wal_level \"archive\" or \"hot_standby\"")));
738
739         /*
740          * Other one-time internal sanity checks can go here, if they are fast.
741          * (Put any slow processing further down, after postmaster.pid creation.)
742          */
743         if (!CheckDateTokenTables())
744         {
745                 write_stderr("%s: invalid datetoken tables, please fix\n", progname);
746                 ExitPostmaster(1);
747         }
748
749         /*
750          * Now that we are done processing the postmaster arguments, reset
751          * getopt(3) library so that it will work correctly in subprocesses.
752          */
753         optind = 1;
754 #ifdef HAVE_INT_OPTRESET
755         optreset = 1;                           /* some systems need this too */
756 #endif
757
758         /* For debugging: display postmaster environment */
759         {
760                 extern char **environ;
761                 char      **p;
762
763                 ereport(DEBUG3,
764                                 (errmsg_internal("%s: PostmasterMain: initial environ dump:",
765                                                                  progname)));
766                 ereport(DEBUG3,
767                          (errmsg_internal("-----------------------------------------")));
768                 for (p = environ; *p; ++p)
769                         ereport(DEBUG3,
770                                         (errmsg_internal("\t%s", *p)));
771                 ereport(DEBUG3,
772                          (errmsg_internal("-----------------------------------------")));
773         }
774
775         /*
776          * Fork away from controlling terminal, if silent_mode specified.
777          *
778          * Must do this before we grab any interlock files, else the interlocks
779          * will show the wrong PID.
780          */
781         if (SilentMode)
782                 pmdaemonize();
783
784         /*
785          * Create lockfile for data directory.
786          *
787          * We want to do this before we try to grab the input sockets, because the
788          * data directory interlock is more reliable than the socket-file
789          * interlock (thanks to whoever decided to put socket files in /tmp :-().
790          * For the same reason, it's best to grab the TCP socket(s) before the
791          * Unix socket.
792          */
793         CreateDataDirLockFile(true);
794
795         /*
796          * If timezone is not set, determine what the OS uses.  (In theory this
797          * should be done during GUC initialization, but because it can take as
798          * much as several seconds, we delay it until after we've created the
799          * postmaster.pid file.  This prevents problems with boot scripts that
800          * expect the pidfile to appear quickly.  Also, we avoid problems with
801          * trying to locate the timezone files too early in initialization.)
802          */
803         pg_timezone_initialize();
804
805         /*
806          * Likewise, init timezone_abbreviations if not already set.
807          */
808         pg_timezone_abbrev_initialize();
809
810         /*
811          * Initialize SSL library, if specified.
812          */
813 #ifdef USE_SSL
814         if (EnableSSL)
815                 secure_initialize();
816 #endif
817
818         /*
819          * process any libraries that should be preloaded at postmaster start
820          */
821         process_shared_preload_libraries();
822
823         /*
824          * Remove old temporary files.  At this point there can be no other
825          * Postgres processes running in this directory, so this should be safe.
826          */
827         RemovePgTempFiles();
828
829         /*
830          * Establish input sockets.
831          */
832         for (i = 0; i < MAXLISTEN; i++)
833                 ListenSocket[i] = PGINVALID_SOCKET;
834
835         if (ListenAddresses)
836         {
837                 char       *rawstring;
838                 List       *elemlist;
839                 ListCell   *l;
840                 int                     success = 0;
841
842                 /* Need a modifiable copy of ListenAddresses */
843                 rawstring = pstrdup(ListenAddresses);
844
845                 /* Parse string into list of identifiers */
846                 if (!SplitIdentifierString(rawstring, ',', &elemlist))
847                 {
848                         /* syntax error in list */
849                         ereport(FATAL,
850                                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
851                                          errmsg("invalid list syntax for \"listen_addresses\"")));
852                 }
853
854                 foreach(l, elemlist)
855                 {
856                         char       *curhost = (char *) lfirst(l);
857
858                         if (strcmp(curhost, "*") == 0)
859                                 status = StreamServerPort(AF_UNSPEC, NULL,
860                                                                                   (unsigned short) PostPortNumber,
861                                                                                   UnixSocketDir,
862                                                                                   ListenSocket, MAXLISTEN);
863                         else
864                                 status = StreamServerPort(AF_UNSPEC, curhost,
865                                                                                   (unsigned short) PostPortNumber,
866                                                                                   UnixSocketDir,
867                                                                                   ListenSocket, MAXLISTEN);
868                         if (status == STATUS_OK)
869                                 success++;
870                         else
871                                 ereport(WARNING,
872                                                 (errmsg("could not create listen socket for \"%s\"",
873                                                                 curhost)));
874                 }
875
876                 if (!success && list_length(elemlist))
877                         ereport(FATAL,
878                                         (errmsg("could not create any TCP/IP sockets")));
879
880                 list_free(elemlist);
881                 pfree(rawstring);
882         }
883
884 #ifdef USE_BONJOUR
885         /* Register for Bonjour only if we opened TCP socket(s) */
886         if (enable_bonjour && ListenSocket[0] != PGINVALID_SOCKET)
887         {
888                 DNSServiceErrorType err;
889
890                 /*
891                  * We pass 0 for interface_index, which will result in registering on
892                  * all "applicable" interfaces.  It's not entirely clear from the
893                  * DNS-SD docs whether this would be appropriate if we have bound to
894                  * just a subset of the available network interfaces.
895                  */
896                 err = DNSServiceRegister(&bonjour_sdref,
897                                                                  0,
898                                                                  0,
899                                                                  bonjour_name,
900                                                                  "_postgresql._tcp.",
901                                                                  NULL,
902                                                                  NULL,
903                                                                  htons(PostPortNumber),
904                                                                  0,
905                                                                  NULL,
906                                                                  NULL,
907                                                                  NULL);
908                 if (err != kDNSServiceErr_NoError)
909                         elog(LOG, "DNSServiceRegister() failed: error code %ld",
910                                  (long) err);
911
912                 /*
913                  * We don't bother to read the mDNS daemon's reply, and we expect that
914                  * it will automatically terminate our registration when the socket is
915                  * closed at postmaster termination.  So there's nothing more to be
916                  * done here.  However, the bonjour_sdref is kept around so that
917                  * forked children can close their copies of the socket.
918                  */
919         }
920 #endif
921
922 #ifdef HAVE_UNIX_SOCKETS
923         status = StreamServerPort(AF_UNIX, NULL,
924                                                           (unsigned short) PostPortNumber,
925                                                           UnixSocketDir,
926                                                           ListenSocket, MAXLISTEN);
927         if (status != STATUS_OK)
928                 ereport(WARNING,
929                                 (errmsg("could not create Unix-domain socket")));
930 #endif
931
932         /*
933          * check that we have some socket to listen on
934          */
935         if (ListenSocket[0] == PGINVALID_SOCKET)
936                 ereport(FATAL,
937                                 (errmsg("no socket created for listening")));
938
939         /*
940          * Set up shared memory and semaphores.
941          */
942         reset_shared(PostPortNumber);
943
944         /*
945          * Estimate number of openable files.  This must happen after setting up
946          * semaphores, because on some platforms semaphores count as open files.
947          */
948         set_max_safe_fds();
949
950         /*
951          * Initialize the list of active backends.
952          */
953         BackendList = DLNewList();
954
955 #ifdef WIN32
956
957         /*
958          * Initialize I/O completion port used to deliver list of dead children.
959          */
960         win32ChildQueue = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 1);
961         if (win32ChildQueue == NULL)
962                 ereport(FATAL,
963                    (errmsg("could not create I/O completion port for child queue")));
964
965         /*
966          * Set up a handle that child processes can use to check whether the
967          * postmaster is still running.
968          */
969         if (DuplicateHandle(GetCurrentProcess(),
970                                                 GetCurrentProcess(),
971                                                 GetCurrentProcess(),
972                                                 &PostmasterHandle,
973                                                 0,
974                                                 TRUE,
975                                                 DUPLICATE_SAME_ACCESS) == 0)
976                 ereport(FATAL,
977                                 (errmsg_internal("could not duplicate postmaster handle: error code %d",
978                                                                  (int) GetLastError())));
979 #endif
980
981         /*
982          * Record postmaster options.  We delay this till now to avoid recording
983          * bogus options (eg, NBuffers too high for available memory).
984          */
985         if (!CreateOptsFile(argc, argv, my_exec_path))
986                 ExitPostmaster(1);
987
988 #ifdef EXEC_BACKEND
989         /* Write out nondefault GUC settings for child processes to use */
990         write_nondefault_variables(PGC_POSTMASTER);
991 #endif
992
993         /*
994          * Write the external PID file if requested
995          */
996         if (external_pid_file)
997         {
998                 FILE       *fpidfile = fopen(external_pid_file, "w");
999
1000                 if (fpidfile)
1001                 {
1002                         fprintf(fpidfile, "%d\n", MyProcPid);
1003                         fclose(fpidfile);
1004                         /* Should we remove the pid file on postmaster exit? */
1005                 }
1006                 else
1007                         write_stderr("%s: could not write external PID file \"%s\": %s\n",
1008                                                  progname, external_pid_file, strerror(errno));
1009         }
1010
1011         /*
1012          * Set up signal handlers for the postmaster process.
1013          *
1014          * CAUTION: when changing this list, check for side-effects on the signal
1015          * handling setup of child processes.  See tcop/postgres.c,
1016          * bootstrap/bootstrap.c, postmaster/bgwriter.c, postmaster/walwriter.c,
1017          * postmaster/autovacuum.c, postmaster/pgarch.c, postmaster/pgstat.c, and
1018          * postmaster/syslogger.c.
1019          */
1020         pqinitmask();
1021         PG_SETMASK(&BlockSig);
1022
1023         pqsignal(SIGHUP, SIGHUP_handler);       /* reread config file and have
1024                                                                                  * children do same */
1025         pqsignal(SIGINT, pmdie);        /* send SIGTERM and shut down */
1026         pqsignal(SIGQUIT, pmdie);       /* send SIGQUIT and die */
1027         pqsignal(SIGTERM, pmdie);       /* wait for children and shut down */
1028         pqsignal(SIGALRM, SIG_IGN); /* ignored */
1029         pqsignal(SIGPIPE, SIG_IGN); /* ignored */
1030         pqsignal(SIGUSR1, sigusr1_handler); /* message from child process */
1031         pqsignal(SIGUSR2, dummy_handler);       /* unused, reserve for children */
1032         pqsignal(SIGCHLD, reaper);      /* handle child termination */
1033         pqsignal(SIGTTIN, SIG_IGN); /* ignored */
1034         pqsignal(SIGTTOU, SIG_IGN); /* ignored */
1035         /* ignore SIGXFSZ, so that ulimit violations work like disk full */
1036 #ifdef SIGXFSZ
1037         pqsignal(SIGXFSZ, SIG_IGN); /* ignored */
1038 #endif
1039
1040         /*
1041          * If enabled, start up syslogger collection subprocess
1042          */
1043         SysLoggerPID = SysLogger_Start();
1044
1045         /*
1046          * Reset whereToSendOutput from DestDebug (its starting state) to
1047          * DestNone. This stops ereport from sending log messages to stderr unless
1048          * Log_destination permits.  We don't do this until the postmaster is
1049          * fully launched, since startup failures may as well be reported to
1050          * stderr.
1051          */
1052         whereToSendOutput = DestNone;
1053
1054         /*
1055          * Initialize stats collection subsystem (this does NOT start the
1056          * collector process!)
1057          */
1058         pgstat_init();
1059
1060         /*
1061          * Initialize the autovacuum subsystem (again, no process start yet)
1062          */
1063         autovac_init();
1064
1065         /*
1066          * Load configuration files for client authentication.
1067          */
1068         if (!load_hba())
1069         {
1070                 /*
1071                  * It makes no sense to continue if we fail to load the HBA file,
1072                  * since there is no way to connect to the database in this case.
1073                  */
1074                 ereport(FATAL,
1075                                 (errmsg("could not load pg_hba.conf")));
1076         }
1077         load_ident();
1078
1079         /*
1080          * Remember postmaster startup time
1081          */
1082         PgStartTime = GetCurrentTimestamp();
1083         /* PostmasterRandom wants its own copy */
1084         gettimeofday(&random_start_time, NULL);
1085
1086         /*
1087          * We're ready to rock and roll...
1088          */
1089         StartupPID = StartupDataBase();
1090         Assert(StartupPID != 0);
1091         pmState = PM_STARTUP;
1092
1093         status = ServerLoop();
1094
1095         /*
1096          * ServerLoop probably shouldn't ever return, but if it does, close down.
1097          */
1098         ExitPostmaster(status != STATUS_OK);
1099
1100         return 0;                                       /* not reached */
1101 }
1102
1103
1104 /*
1105  * Compute and check the directory paths to files that are part of the
1106  * installation (as deduced from the postgres executable's own location)
1107  */
1108 static void
1109 getInstallationPaths(const char *argv0)
1110 {
1111         DIR                *pdir;
1112
1113         /* Locate the postgres executable itself */
1114         if (find_my_exec(argv0, my_exec_path) < 0)
1115                 elog(FATAL, "%s: could not locate my own executable path", argv0);
1116
1117 #ifdef EXEC_BACKEND
1118         /* Locate executable backend before we change working directory */
1119         if (find_other_exec(argv0, "postgres", PG_BACKEND_VERSIONSTR,
1120                                                 postgres_exec_path) < 0)
1121                 ereport(FATAL,
1122                                 (errmsg("%s: could not locate matching postgres executable",
1123                                                 argv0)));
1124 #endif
1125
1126         /*
1127          * Locate the pkglib directory --- this has to be set early in case we try
1128          * to load any modules from it in response to postgresql.conf entries.
1129          */
1130         get_pkglib_path(my_exec_path, pkglib_path);
1131
1132         /*
1133          * Verify that there's a readable directory there; otherwise the Postgres
1134          * installation is incomplete or corrupt.  (A typical cause of this
1135          * failure is that the postgres executable has been moved or hardlinked to
1136          * some directory that's not a sibling of the installation lib/
1137          * directory.)
1138          */
1139         pdir = AllocateDir(pkglib_path);
1140         if (pdir == NULL)
1141                 ereport(ERROR,
1142                                 (errcode_for_file_access(),
1143                                  errmsg("could not open directory \"%s\": %m",
1144                                                 pkglib_path),
1145                                  errhint("This may indicate an incomplete PostgreSQL installation, or that the file \"%s\" has been moved away from its proper location.",
1146                                                  my_exec_path)));
1147         FreeDir(pdir);
1148
1149         /*
1150          * XXX is it worth similarly checking the share/ directory?  If the lib/
1151          * directory is there, then share/ probably is too.
1152          */
1153 }
1154
1155
1156 /*
1157  * Validate the proposed data directory
1158  */
1159 static void
1160 checkDataDir(void)
1161 {
1162         char            path[MAXPGPATH];
1163         FILE       *fp;
1164         struct stat stat_buf;
1165
1166         Assert(DataDir);
1167
1168         if (stat(DataDir, &stat_buf) != 0)
1169         {
1170                 if (errno == ENOENT)
1171                         ereport(FATAL,
1172                                         (errcode_for_file_access(),
1173                                          errmsg("data directory \"%s\" does not exist",
1174                                                         DataDir)));
1175                 else
1176                         ereport(FATAL,
1177                                         (errcode_for_file_access(),
1178                                  errmsg("could not read permissions of directory \"%s\": %m",
1179                                                 DataDir)));
1180         }
1181
1182         /* eventual chdir would fail anyway, but let's test ... */
1183         if (!S_ISDIR(stat_buf.st_mode))
1184                 ereport(FATAL,
1185                                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1186                                  errmsg("specified data directory \"%s\" is not a directory",
1187                                                 DataDir)));
1188
1189         /*
1190          * Check that the directory belongs to my userid; if not, reject.
1191          *
1192          * This check is an essential part of the interlock that prevents two
1193          * postmasters from starting in the same directory (see CreateLockFile()).
1194          * Do not remove or weaken it.
1195          *
1196          * XXX can we safely enable this check on Windows?
1197          */
1198 #if !defined(WIN32) && !defined(__CYGWIN__)
1199         if (stat_buf.st_uid != geteuid())
1200                 ereport(FATAL,
1201                                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1202                                  errmsg("data directory \"%s\" has wrong ownership",
1203                                                 DataDir),
1204                                  errhint("The server must be started by the user that owns the data directory.")));
1205 #endif
1206
1207         /*
1208          * Check if the directory has group or world access.  If so, reject.
1209          *
1210          * It would be possible to allow weaker constraints (for example, allow
1211          * group access) but we cannot make a general assumption that that is
1212          * okay; for example there are platforms where nearly all users
1213          * customarily belong to the same group.  Perhaps this test should be
1214          * configurable.
1215          *
1216          * XXX temporarily suppress check when on Windows, because there may not
1217          * be proper support for Unix-y file permissions.  Need to think of a
1218          * reasonable check to apply on Windows.
1219          */
1220 #if !defined(WIN32) && !defined(__CYGWIN__)
1221         if (stat_buf.st_mode & (S_IRWXG | S_IRWXO))
1222                 ereport(FATAL,
1223                                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1224                                  errmsg("data directory \"%s\" has group or world access",
1225                                                 DataDir),
1226                                  errdetail("Permissions should be u=rwx (0700).")));
1227 #endif
1228
1229         /* Look for PG_VERSION before looking for pg_control */
1230         ValidatePgVersion(DataDir);
1231
1232         snprintf(path, sizeof(path), "%s/global/pg_control", DataDir);
1233
1234         fp = AllocateFile(path, PG_BINARY_R);
1235         if (fp == NULL)
1236         {
1237                 write_stderr("%s: could not find the database system\n"
1238                                          "Expected to find it in the directory \"%s\",\n"
1239                                          "but could not open file \"%s\": %s\n",
1240                                          progname, DataDir, path, strerror(errno));
1241                 ExitPostmaster(2);
1242         }
1243         FreeFile(fp);
1244 }
1245
1246
1247 /*
1248  * Fork away from the controlling terminal (silent_mode option)
1249  *
1250  * Since this requires disconnecting from stdin/stdout/stderr (in case they're
1251  * linked to the terminal), we re-point stdin to /dev/null and stdout/stderr
1252  * to "postmaster.log" in the data directory, where we're already chdir'd.
1253  */
1254 static void
1255 pmdaemonize(void)
1256 {
1257 #ifndef WIN32
1258         const char *pmlogname = "postmaster.log";
1259         int                     dvnull;
1260         int                     pmlog;
1261         pid_t           pid;
1262         int                     res;
1263
1264         /*
1265          * Make sure we can open the files we're going to redirect to.  If this
1266          * fails, we want to complain before disconnecting.  Mention the full path
1267          * of the logfile in the error message, even though we address it by
1268          * relative path.
1269          */
1270         dvnull = open(DEVNULL, O_RDONLY, 0);
1271         if (dvnull < 0)
1272         {
1273                 write_stderr("%s: could not open file \"%s\": %s\n",
1274                                          progname, DEVNULL, strerror(errno));
1275                 ExitPostmaster(1);
1276         }
1277         pmlog = open(pmlogname, O_CREAT | O_WRONLY | O_APPEND, 0600);
1278         if (pmlog < 0)
1279         {
1280                 write_stderr("%s: could not open log file \"%s/%s\": %s\n",
1281                                          progname, DataDir, pmlogname, strerror(errno));
1282                 ExitPostmaster(1);
1283         }
1284
1285         /*
1286          * Okay to fork.
1287          */
1288         pid = fork_process();
1289         if (pid == (pid_t) -1)
1290         {
1291                 write_stderr("%s: could not fork background process: %s\n",
1292                                          progname, strerror(errno));
1293                 ExitPostmaster(1);
1294         }
1295         else if (pid)
1296         {                                                       /* parent */
1297                 /* Parent should just exit, without doing any atexit cleanup */
1298                 _exit(0);
1299         }
1300
1301         MyProcPid = PostmasterPid = getpid();           /* reset PID vars to child */
1302
1303         MyStartTime = time(NULL);
1304
1305         /*
1306          * Some systems use setsid() to dissociate from the TTY's process group,
1307          * while on others it depends on stdin/stdout/stderr.  Do both if
1308          * possible.
1309          */
1310 #ifdef HAVE_SETSID
1311         if (setsid() < 0)
1312         {
1313                 write_stderr("%s: could not dissociate from controlling TTY: %s\n",
1314                                          progname, strerror(errno));
1315                 ExitPostmaster(1);
1316         }
1317 #endif
1318
1319         /*
1320          * Reassociate stdin/stdout/stderr.  fork_process() cleared any pending
1321          * output, so this should be safe.      The only plausible error is EINTR,
1322          * which just means we should retry.
1323          */
1324         do
1325         {
1326                 res = dup2(dvnull, 0);
1327         } while (res < 0 && errno == EINTR);
1328         close(dvnull);
1329         do
1330         {
1331                 res = dup2(pmlog, 1);
1332         } while (res < 0 && errno == EINTR);
1333         do
1334         {
1335                 res = dup2(pmlog, 2);
1336         } while (res < 0 && errno == EINTR);
1337         close(pmlog);
1338 #else                                                   /* WIN32 */
1339         /* not supported */
1340         elog(FATAL, "silent_mode is not supported under Windows");
1341 #endif   /* WIN32 */
1342 }
1343
1344
1345 /*
1346  * Main idle loop of postmaster
1347  */
1348 static int
1349 ServerLoop(void)
1350 {
1351         fd_set          readmask;
1352         int                     nSockets;
1353         time_t          now,
1354                                 last_touch_time;
1355
1356         last_touch_time = time(NULL);
1357
1358         nSockets = initMasks(&readmask);
1359
1360         for (;;)
1361         {
1362                 fd_set          rmask;
1363                 int                     selres;
1364
1365                 /*
1366                  * Wait for a connection request to arrive.
1367                  *
1368                  * We wait at most one minute, to ensure that the other background
1369                  * tasks handled below get done even when no requests are arriving.
1370                  *
1371                  * If we are in PM_WAIT_DEAD_END state, then we don't want to accept
1372                  * any new connections, so we don't call select() at all; just sleep
1373                  * for a little bit with signals unblocked.
1374                  */
1375                 memcpy((char *) &rmask, (char *) &readmask, sizeof(fd_set));
1376
1377                 PG_SETMASK(&UnBlockSig);
1378
1379                 if (pmState == PM_WAIT_DEAD_END)
1380                 {
1381                         pg_usleep(100000L); /* 100 msec seems reasonable */
1382                         selres = 0;
1383                 }
1384                 else
1385                 {
1386                         /* must set timeout each time; some OSes change it! */
1387                         struct timeval timeout;
1388
1389                         timeout.tv_sec = 60;
1390                         timeout.tv_usec = 0;
1391
1392                         selres = select(nSockets, &rmask, NULL, NULL, &timeout);
1393                 }
1394
1395                 /*
1396                  * Block all signals until we wait again.  (This makes it safe for our
1397                  * signal handlers to do nontrivial work.)
1398                  */
1399                 PG_SETMASK(&BlockSig);
1400
1401                 /* Now check the select() result */
1402                 if (selres < 0)
1403                 {
1404                         if (errno != EINTR && errno != EWOULDBLOCK)
1405                         {
1406                                 ereport(LOG,
1407                                                 (errcode_for_socket_access(),
1408                                                  errmsg("select() failed in postmaster: %m")));
1409                                 return STATUS_ERROR;
1410                         }
1411                 }
1412
1413                 /*
1414                  * New connection pending on any of our sockets? If so, fork a child
1415                  * process to deal with it.
1416                  */
1417                 if (selres > 0)
1418                 {
1419                         int                     i;
1420
1421                         for (i = 0; i < MAXLISTEN; i++)
1422                         {
1423                                 if (ListenSocket[i] == PGINVALID_SOCKET)
1424                                         break;
1425                                 if (FD_ISSET(ListenSocket[i], &rmask))
1426                                 {
1427                                         Port       *port;
1428
1429                                         port = ConnCreate(ListenSocket[i]);
1430                                         if (port)
1431                                         {
1432                                                 BackendStartup(port);
1433
1434                                                 /*
1435                                                  * We no longer need the open socket or port structure
1436                                                  * in this process
1437                                                  */
1438                                                 StreamClose(port->sock);
1439                                                 ConnFree(port);
1440                                         }
1441                                 }
1442                         }
1443                 }
1444
1445                 /* If we have lost the log collector, try to start a new one */
1446                 if (SysLoggerPID == 0 && Logging_collector)
1447                         SysLoggerPID = SysLogger_Start();
1448
1449                 /*
1450                  * If no background writer process is running, and we are not in a
1451                  * state that prevents it, start one.  It doesn't matter if this
1452                  * fails, we'll just try again later.
1453                  */
1454                 if (BgWriterPID == 0 &&
1455                         (pmState == PM_RUN || pmState == PM_RECOVERY ||
1456                          pmState == PM_HOT_STANDBY))
1457                         BgWriterPID = StartBackgroundWriter();
1458
1459                 /*
1460                  * Likewise, if we have lost the walwriter process, try to start a new
1461                  * one.
1462                  */
1463                 if (WalWriterPID == 0 && pmState == PM_RUN)
1464                         WalWriterPID = StartWalWriter();
1465
1466                 /* If we have lost the autovacuum launcher, try to start a new one */
1467                 if (AutoVacPID == 0 &&
1468                         (AutoVacuumingActive() || start_autovac_launcher) &&
1469                         pmState == PM_RUN)
1470                 {
1471                         AutoVacPID = StartAutoVacLauncher();
1472                         if (AutoVacPID != 0)
1473                                 start_autovac_launcher = false; /* signal processed */
1474                 }
1475
1476                 /* If we have lost the archiver, try to start a new one */
1477                 if (XLogArchivingActive() && PgArchPID == 0 && pmState == PM_RUN)
1478                         PgArchPID = pgarch_start();
1479
1480                 /* If we have lost the stats collector, try to start a new one */
1481                 if (PgStatPID == 0 && pmState == PM_RUN)
1482                         PgStatPID = pgstat_start();
1483
1484                 /* If we need to signal the autovacuum launcher, do so now */
1485                 if (avlauncher_needs_signal)
1486                 {
1487                         avlauncher_needs_signal = false;
1488                         if (AutoVacPID != 0)
1489                                 kill(AutoVacPID, SIGUSR2);
1490                 }
1491
1492                 /*
1493                  * Touch the socket and lock file every 58 minutes, to ensure that
1494                  * they are not removed by overzealous /tmp-cleaning tasks.  We assume
1495                  * no one runs cleaners with cutoff times of less than an hour ...
1496                  */
1497                 now = time(NULL);
1498                 if (now - last_touch_time >= 58 * SECS_PER_MINUTE)
1499                 {
1500                         TouchSocketFile();
1501                         TouchSocketLockFile();
1502                         last_touch_time = now;
1503                 }
1504         }
1505 }
1506
1507
1508 /*
1509  * Initialise the masks for select() for the ports we are listening on.
1510  * Return the number of sockets to listen on.
1511  */
1512 static int
1513 initMasks(fd_set *rmask)
1514 {
1515         int                     maxsock = -1;
1516         int                     i;
1517
1518         FD_ZERO(rmask);
1519
1520         for (i = 0; i < MAXLISTEN; i++)
1521         {
1522                 int                     fd = ListenSocket[i];
1523
1524                 if (fd == PGINVALID_SOCKET)
1525                         break;
1526                 FD_SET(fd, rmask);
1527
1528                 if (fd > maxsock)
1529                         maxsock = fd;
1530         }
1531
1532         return maxsock + 1;
1533 }
1534
1535
1536 /*
1537  * Read a client's startup packet and do something according to it.
1538  *
1539  * Returns STATUS_OK or STATUS_ERROR, or might call ereport(FATAL) and
1540  * not return at all.
1541  *
1542  * (Note that ereport(FATAL) stuff is sent to the client, so only use it
1543  * if that's what you want.  Return STATUS_ERROR if you don't want to
1544  * send anything to the client, which would typically be appropriate
1545  * if we detect a communications failure.)
1546  */
1547 static int
1548 ProcessStartupPacket(Port *port, bool SSLdone)
1549 {
1550         int32           len;
1551         void       *buf;
1552         ProtocolVersion proto;
1553         MemoryContext oldcontext;
1554
1555         if (pq_getbytes((char *) &len, 4) == EOF)
1556         {
1557                 /*
1558                  * EOF after SSLdone probably means the client didn't like our
1559                  * response to NEGOTIATE_SSL_CODE.      That's not an error condition, so
1560                  * don't clutter the log with a complaint.
1561                  */
1562                 if (!SSLdone)
1563                         ereport(COMMERROR,
1564                                         (errcode(ERRCODE_PROTOCOL_VIOLATION),
1565                                          errmsg("incomplete startup packet")));
1566                 return STATUS_ERROR;
1567         }
1568
1569         len = ntohl(len);
1570         len -= 4;
1571
1572         if (len < (int32) sizeof(ProtocolVersion) ||
1573                 len > MAX_STARTUP_PACKET_LENGTH)
1574         {
1575                 ereport(COMMERROR,
1576                                 (errcode(ERRCODE_PROTOCOL_VIOLATION),
1577                                  errmsg("invalid length of startup packet")));
1578                 return STATUS_ERROR;
1579         }
1580
1581         /*
1582          * Allocate at least the size of an old-style startup packet, plus one
1583          * extra byte, and make sure all are zeroes.  This ensures we will have
1584          * null termination of all strings, in both fixed- and variable-length
1585          * packet layouts.
1586          */
1587         if (len <= (int32) sizeof(StartupPacket))
1588                 buf = palloc0(sizeof(StartupPacket) + 1);
1589         else
1590                 buf = palloc0(len + 1);
1591
1592         if (pq_getbytes(buf, len) == EOF)
1593         {
1594                 ereport(COMMERROR,
1595                                 (errcode(ERRCODE_PROTOCOL_VIOLATION),
1596                                  errmsg("incomplete startup packet")));
1597                 return STATUS_ERROR;
1598         }
1599
1600         /*
1601          * The first field is either a protocol version number or a special
1602          * request code.
1603          */
1604         port->proto = proto = ntohl(*((ProtocolVersion *) buf));
1605
1606         if (proto == CANCEL_REQUEST_CODE)
1607         {
1608                 processCancelRequest(port, buf);
1609                 /* Not really an error, but we don't want to proceed further */
1610                 return STATUS_ERROR;
1611         }
1612
1613         if (proto == NEGOTIATE_SSL_CODE && !SSLdone)
1614         {
1615                 char            SSLok;
1616
1617 #ifdef USE_SSL
1618                 /* No SSL when disabled or on Unix sockets */
1619                 if (!EnableSSL || IS_AF_UNIX(port->laddr.addr.ss_family))
1620                         SSLok = 'N';
1621                 else
1622                         SSLok = 'S';            /* Support for SSL */
1623 #else
1624                 SSLok = 'N';                    /* No support for SSL */
1625 #endif
1626
1627 retry1:
1628                 if (send(port->sock, &SSLok, 1, 0) != 1)
1629                 {
1630                         if (errno == EINTR)
1631                                 goto retry1;    /* if interrupted, just retry */
1632                         ereport(COMMERROR,
1633                                         (errcode_for_socket_access(),
1634                                          errmsg("failed to send SSL negotiation response: %m")));
1635                         return STATUS_ERROR;    /* close the connection */
1636                 }
1637
1638 #ifdef USE_SSL
1639                 if (SSLok == 'S' && secure_open_server(port) == -1)
1640                         return STATUS_ERROR;
1641 #endif
1642                 /* regular startup packet, cancel, etc packet should follow... */
1643                 /* but not another SSL negotiation request */
1644                 return ProcessStartupPacket(port, true);
1645         }
1646
1647         /* Could add additional special packet types here */
1648
1649         /*
1650          * Set FrontendProtocol now so that ereport() knows what format to send if
1651          * we fail during startup.
1652          */
1653         FrontendProtocol = proto;
1654
1655         /* Check we can handle the protocol the frontend is using. */
1656
1657         if (PG_PROTOCOL_MAJOR(proto) < PG_PROTOCOL_MAJOR(PG_PROTOCOL_EARLIEST) ||
1658                 PG_PROTOCOL_MAJOR(proto) > PG_PROTOCOL_MAJOR(PG_PROTOCOL_LATEST) ||
1659                 (PG_PROTOCOL_MAJOR(proto) == PG_PROTOCOL_MAJOR(PG_PROTOCOL_LATEST) &&
1660                  PG_PROTOCOL_MINOR(proto) > PG_PROTOCOL_MINOR(PG_PROTOCOL_LATEST)))
1661                 ereport(FATAL,
1662                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1663                                  errmsg("unsupported frontend protocol %u.%u: server supports %u.0 to %u.%u",
1664                                                 PG_PROTOCOL_MAJOR(proto), PG_PROTOCOL_MINOR(proto),
1665                                                 PG_PROTOCOL_MAJOR(PG_PROTOCOL_EARLIEST),
1666                                                 PG_PROTOCOL_MAJOR(PG_PROTOCOL_LATEST),
1667                                                 PG_PROTOCOL_MINOR(PG_PROTOCOL_LATEST))));
1668
1669         /*
1670          * Now fetch parameters out of startup packet and save them into the Port
1671          * structure.  All data structures attached to the Port struct must be
1672          * allocated in TopMemoryContext so that they will remain available in a
1673          * running backend (even after PostmasterContext is destroyed).  We need
1674          * not worry about leaking this storage on failure, since we aren't in the
1675          * postmaster process anymore.
1676          */
1677         oldcontext = MemoryContextSwitchTo(TopMemoryContext);
1678
1679         if (PG_PROTOCOL_MAJOR(proto) >= 3)
1680         {
1681                 int32           offset = sizeof(ProtocolVersion);
1682
1683                 /*
1684                  * Scan packet body for name/option pairs.      We can assume any string
1685                  * beginning within the packet body is null-terminated, thanks to
1686                  * zeroing extra byte above.
1687                  */
1688                 port->guc_options = NIL;
1689
1690                 while (offset < len)
1691                 {
1692                         char       *nameptr = ((char *) buf) + offset;
1693                         int32           valoffset;
1694                         char       *valptr;
1695
1696                         if (*nameptr == '\0')
1697                                 break;                  /* found packet terminator */
1698                         valoffset = offset + strlen(nameptr) + 1;
1699                         if (valoffset >= len)
1700                                 break;                  /* missing value, will complain below */
1701                         valptr = ((char *) buf) + valoffset;
1702
1703                         if (strcmp(nameptr, "database") == 0)
1704                                 port->database_name = pstrdup(valptr);
1705                         else if (strcmp(nameptr, "user") == 0)
1706                                 port->user_name = pstrdup(valptr);
1707                         else if (strcmp(nameptr, "options") == 0)
1708                                 port->cmdline_options = pstrdup(valptr);
1709                         else if (strcmp(nameptr, "replication") == 0)
1710                         {
1711                                 if (!parse_bool(valptr, &am_walsender))
1712                                         ereport(FATAL,
1713                                                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1714                                                          errmsg("invalid value for boolean option \"replication\"")));
1715                         }
1716                         else
1717                         {
1718                                 /* Assume it's a generic GUC option */
1719                                 port->guc_options = lappend(port->guc_options,
1720                                                                                         pstrdup(nameptr));
1721                                 port->guc_options = lappend(port->guc_options,
1722                                                                                         pstrdup(valptr));
1723                         }
1724                         offset = valoffset + strlen(valptr) + 1;
1725                 }
1726
1727                 /*
1728                  * If we didn't find a packet terminator exactly at the end of the
1729                  * given packet length, complain.
1730                  */
1731                 if (offset != len - 1)
1732                         ereport(FATAL,
1733                                         (errcode(ERRCODE_PROTOCOL_VIOLATION),
1734                                          errmsg("invalid startup packet layout: expected terminator as last byte")));
1735         }
1736         else
1737         {
1738                 /*
1739                  * Get the parameters from the old-style, fixed-width-fields startup
1740                  * packet as C strings.  The packet destination was cleared first so a
1741                  * short packet has zeros silently added.  We have to be prepared to
1742                  * truncate the pstrdup result for oversize fields, though.
1743                  */
1744                 StartupPacket *packet = (StartupPacket *) buf;
1745
1746                 port->database_name = pstrdup(packet->database);
1747                 if (strlen(port->database_name) > sizeof(packet->database))
1748                         port->database_name[sizeof(packet->database)] = '\0';
1749                 port->user_name = pstrdup(packet->user);
1750                 if (strlen(port->user_name) > sizeof(packet->user))
1751                         port->user_name[sizeof(packet->user)] = '\0';
1752                 port->cmdline_options = pstrdup(packet->options);
1753                 if (strlen(port->cmdline_options) > sizeof(packet->options))
1754                         port->cmdline_options[sizeof(packet->options)] = '\0';
1755                 port->guc_options = NIL;
1756         }
1757
1758         /* Check a user name was given. */
1759         if (port->user_name == NULL || port->user_name[0] == '\0')
1760                 ereport(FATAL,
1761                                 (errcode(ERRCODE_INVALID_AUTHORIZATION_SPECIFICATION),
1762                          errmsg("no PostgreSQL user name specified in startup packet")));
1763
1764         /* The database defaults to the user name. */
1765         if (port->database_name == NULL || port->database_name[0] == '\0')
1766                 port->database_name = pstrdup(port->user_name);
1767
1768         if (Db_user_namespace)
1769         {
1770                 /*
1771                  * If user@, it is a global user, remove '@'. We only want to do this
1772                  * if there is an '@' at the end and no earlier in the user string or
1773                  * they may fake as a local user of another database attaching to this
1774                  * database.
1775                  */
1776                 if (strchr(port->user_name, '@') ==
1777                         port->user_name + strlen(port->user_name) - 1)
1778                         *strchr(port->user_name, '@') = '\0';
1779                 else
1780                 {
1781                         /* Append '@' and dbname */
1782                         char       *db_user;
1783
1784                         db_user = palloc(strlen(port->user_name) +
1785                                                          strlen(port->database_name) + 2);
1786                         sprintf(db_user, "%s@%s", port->user_name, port->database_name);
1787                         port->user_name = db_user;
1788                 }
1789         }
1790
1791         /*
1792          * Truncate given database and user names to length of a Postgres name.
1793          * This avoids lookup failures when overlength names are given.
1794          */
1795         if (strlen(port->database_name) >= NAMEDATALEN)
1796                 port->database_name[NAMEDATALEN - 1] = '\0';
1797         if (strlen(port->user_name) >= NAMEDATALEN)
1798                 port->user_name[NAMEDATALEN - 1] = '\0';
1799
1800         /* Walsender is not related to a particular database */
1801         if (am_walsender)
1802                 port->database_name[0] = '\0';
1803
1804         /*
1805          * Done putting stuff in TopMemoryContext.
1806          */
1807         MemoryContextSwitchTo(oldcontext);
1808
1809         /*
1810          * If we're going to reject the connection due to database state, say so
1811          * now instead of wasting cycles on an authentication exchange. (This also
1812          * allows a pg_ping utility to be written.)
1813          */
1814         switch (port->canAcceptConnections)
1815         {
1816                 case CAC_STARTUP:
1817                         ereport(FATAL,
1818                                         (errcode(ERRCODE_CANNOT_CONNECT_NOW),
1819                                          errmsg("the database system is starting up")));
1820                         break;
1821                 case CAC_SHUTDOWN:
1822                         ereport(FATAL,
1823                                         (errcode(ERRCODE_CANNOT_CONNECT_NOW),
1824                                          errmsg("the database system is shutting down")));
1825                         break;
1826                 case CAC_RECOVERY:
1827                         ereport(FATAL,
1828                                         (errcode(ERRCODE_CANNOT_CONNECT_NOW),
1829                                          errmsg("the database system is in recovery mode")));
1830                         break;
1831                 case CAC_TOOMANY:
1832                         ereport(FATAL,
1833                                         (errcode(ERRCODE_TOO_MANY_CONNECTIONS),
1834                                          errmsg("sorry, too many clients already")));
1835                         break;
1836                 case CAC_WAITBACKUP:
1837                         /* OK for now, will check in InitPostgres */
1838                         break;
1839                 case CAC_OK:
1840                         break;
1841         }
1842
1843         return STATUS_OK;
1844 }
1845
1846
1847 /*
1848  * The client has sent a cancel request packet, not a normal
1849  * start-a-new-connection packet.  Perform the necessary processing.
1850  * Nothing is sent back to the client.
1851  */
1852 static void
1853 processCancelRequest(Port *port, void *pkt)
1854 {
1855         CancelRequestPacket *canc = (CancelRequestPacket *) pkt;
1856         int                     backendPID;
1857         long            cancelAuthCode;
1858         Backend    *bp;
1859
1860 #ifndef EXEC_BACKEND
1861         Dlelem     *curr;
1862 #else
1863         int                     i;
1864 #endif
1865
1866         backendPID = (int) ntohl(canc->backendPID);
1867         cancelAuthCode = (long) ntohl(canc->cancelAuthCode);
1868
1869         /*
1870          * See if we have a matching backend.  In the EXEC_BACKEND case, we can no
1871          * longer access the postmaster's own backend list, and must rely on the
1872          * duplicate array in shared memory.
1873          */
1874 #ifndef EXEC_BACKEND
1875         for (curr = DLGetHead(BackendList); curr; curr = DLGetSucc(curr))
1876         {
1877                 bp = (Backend *) DLE_VAL(curr);
1878 #else
1879         for (i = MaxLivePostmasterChildren() - 1; i >= 0; i--)
1880         {
1881                 bp = (Backend *) &ShmemBackendArray[i];
1882 #endif
1883                 if (bp->pid == backendPID)
1884                 {
1885                         if (bp->cancel_key == cancelAuthCode)
1886                         {
1887                                 /* Found a match; signal that backend to cancel current op */
1888                                 ereport(DEBUG2,
1889                                                 (errmsg_internal("processing cancel request: sending SIGINT to process %d",
1890                                                                                  backendPID)));
1891                                 signal_child(bp->pid, SIGINT);
1892                         }
1893                         else
1894                                 /* Right PID, wrong key: no way, Jose */
1895                                 ereport(LOG,
1896                                                 (errmsg("wrong key in cancel request for process %d",
1897                                                                 backendPID)));
1898                         return;
1899                 }
1900         }
1901
1902         /* No matching backend */
1903         ereport(LOG,
1904                         (errmsg("PID %d in cancel request did not match any process",
1905                                         backendPID)));
1906 }
1907
1908 /*
1909  * canAcceptConnections --- check to see if database state allows connections.
1910  */
1911 static enum CAC_state
1912 canAcceptConnections(void)
1913 {
1914         /*
1915          * Can't start backends when in startup/shutdown/inconsistent recovery
1916          * state.
1917          *
1918          * In state PM_WAIT_BACKUP only superusers can connect (this must be
1919          * allowed so that a superuser can end online backup mode); we return
1920          * CAC_WAITBACKUP code to indicate that this must be checked later.
1921          */
1922         if (pmState != PM_RUN)
1923         {
1924                 if (pmState == PM_WAIT_BACKUP)
1925                         return CAC_WAITBACKUP;          /* allow superusers only */
1926                 if (Shutdown > NoShutdown)
1927                         return CAC_SHUTDOWN;    /* shutdown is pending */
1928                 if (!FatalError &&
1929                         (pmState == PM_STARTUP ||
1930                          pmState == PM_RECOVERY))
1931                         return CAC_STARTUP; /* normal startup */
1932                 if (!FatalError &&
1933                         pmState == PM_HOT_STANDBY)
1934                         return CAC_OK;          /* connection OK during hot standby */
1935                 return CAC_RECOVERY;    /* else must be crash recovery */
1936         }
1937
1938         /*
1939          * Don't start too many children.
1940          *
1941          * We allow more connections than we can have backends here because some
1942          * might still be authenticating; they might fail auth, or some existing
1943          * backend might exit before the auth cycle is completed. The exact
1944          * MaxBackends limit is enforced when a new backend tries to join the
1945          * shared-inval backend array.
1946          *
1947          * The limit here must match the sizes of the per-child-process arrays;
1948          * see comments for MaxLivePostmasterChildren().
1949          */
1950         if (CountChildren(BACKEND_TYPE_ALL) >= MaxLivePostmasterChildren())
1951                 return CAC_TOOMANY;
1952
1953         return CAC_OK;
1954 }
1955
1956
1957 /*
1958  * ConnCreate -- create a local connection data structure
1959  */
1960 static Port *
1961 ConnCreate(int serverFd)
1962 {
1963         Port       *port;
1964
1965         if (!(port = (Port *) calloc(1, sizeof(Port))))
1966         {
1967                 ereport(LOG,
1968                                 (errcode(ERRCODE_OUT_OF_MEMORY),
1969                                  errmsg("out of memory")));
1970                 ExitPostmaster(1);
1971         }
1972
1973         if (StreamConnection(serverFd, port) != STATUS_OK)
1974         {
1975                 if (port->sock >= 0)
1976                         StreamClose(port->sock);
1977                 ConnFree(port);
1978                 port = NULL;
1979         }
1980         else
1981         {
1982                 /*
1983                  * Precompute password salt values to use for this connection. It's
1984                  * slightly annoying to do this long in advance of knowing whether
1985                  * we'll need 'em or not, but we must do the random() calls before we
1986                  * fork, not after.  Else the postmaster's random sequence won't get
1987                  * advanced, and all backends would end up using the same salt...
1988                  */
1989                 RandomSalt(port->md5Salt);
1990         }
1991
1992         /*
1993          * Allocate GSSAPI specific state struct
1994          */
1995 #ifndef EXEC_BACKEND
1996 #if defined(ENABLE_GSS) || defined(ENABLE_SSPI)
1997         port->gss = (pg_gssinfo *) calloc(1, sizeof(pg_gssinfo));
1998         if (!port->gss)
1999         {
2000                 ereport(LOG,
2001                                 (errcode(ERRCODE_OUT_OF_MEMORY),
2002                                  errmsg("out of memory")));
2003                 ExitPostmaster(1);
2004         }
2005 #endif
2006 #endif
2007
2008         return port;
2009 }
2010
2011
2012 /*
2013  * ConnFree -- free a local connection data structure
2014  */
2015 static void
2016 ConnFree(Port *conn)
2017 {
2018 #ifdef USE_SSL
2019         secure_close(conn);
2020 #endif
2021         if (conn->gss)
2022                 free(conn->gss);
2023         free(conn);
2024 }
2025
2026
2027 /*
2028  * ClosePostmasterPorts -- close all the postmaster's open sockets
2029  *
2030  * This is called during child process startup to release file descriptors
2031  * that are not needed by that child process.  The postmaster still has
2032  * them open, of course.
2033  *
2034  * Note: we pass am_syslogger as a boolean because we don't want to set
2035  * the global variable yet when this is called.
2036  */
2037 void
2038 ClosePostmasterPorts(bool am_syslogger)
2039 {
2040         int                     i;
2041
2042         /* Close the listen sockets */
2043         for (i = 0; i < MAXLISTEN; i++)
2044         {
2045                 if (ListenSocket[i] != PGINVALID_SOCKET)
2046                 {
2047                         StreamClose(ListenSocket[i]);
2048                         ListenSocket[i] = PGINVALID_SOCKET;
2049                 }
2050         }
2051
2052         /* If using syslogger, close the read side of the pipe */
2053         if (!am_syslogger)
2054         {
2055 #ifndef WIN32
2056                 if (syslogPipe[0] >= 0)
2057                         close(syslogPipe[0]);
2058                 syslogPipe[0] = -1;
2059 #else
2060                 if (syslogPipe[0])
2061                         CloseHandle(syslogPipe[0]);
2062                 syslogPipe[0] = 0;
2063 #endif
2064         }
2065
2066 #ifdef USE_BONJOUR
2067         /* If using Bonjour, close the connection to the mDNS daemon */
2068         if (bonjour_sdref)
2069                 close(DNSServiceRefSockFD(bonjour_sdref));
2070 #endif
2071 }
2072
2073
2074 /*
2075  * reset_shared -- reset shared memory and semaphores
2076  */
2077 static void
2078 reset_shared(int port)
2079 {
2080         /*
2081          * Create or re-create shared memory and semaphores.
2082          *
2083          * Note: in each "cycle of life" we will normally assign the same IPC keys
2084          * (if using SysV shmem and/or semas), since the port number is used to
2085          * determine IPC keys.  This helps ensure that we will clean up dead IPC
2086          * objects if the postmaster crashes and is restarted.
2087          */
2088         CreateSharedMemoryAndSemaphores(false, port);
2089 }
2090
2091
2092 /*
2093  * SIGHUP -- reread config files, and tell children to do same
2094  */
2095 static void
2096 SIGHUP_handler(SIGNAL_ARGS)
2097 {
2098         int                     save_errno = errno;
2099
2100         PG_SETMASK(&BlockSig);
2101
2102         if (Shutdown <= SmartShutdown)
2103         {
2104                 ereport(LOG,
2105                                 (errmsg("received SIGHUP, reloading configuration files")));
2106                 ProcessConfigFile(PGC_SIGHUP);
2107                 SignalChildren(SIGHUP);
2108                 if (StartupPID != 0)
2109                         signal_child(StartupPID, SIGHUP);
2110                 if (BgWriterPID != 0)
2111                         signal_child(BgWriterPID, SIGHUP);
2112                 if (WalWriterPID != 0)
2113                         signal_child(WalWriterPID, SIGHUP);
2114                 if (WalReceiverPID != 0)
2115                         signal_child(WalReceiverPID, SIGHUP);
2116                 if (AutoVacPID != 0)
2117                         signal_child(AutoVacPID, SIGHUP);
2118                 if (PgArchPID != 0)
2119                         signal_child(PgArchPID, SIGHUP);
2120                 if (SysLoggerPID != 0)
2121                         signal_child(SysLoggerPID, SIGHUP);
2122                 if (PgStatPID != 0)
2123                         signal_child(PgStatPID, SIGHUP);
2124
2125                 /* Reload authentication config files too */
2126                 if (!load_hba())
2127                         ereport(WARNING,
2128                                         (errmsg("pg_hba.conf not reloaded")));
2129
2130                 load_ident();
2131
2132 #ifdef EXEC_BACKEND
2133                 /* Update the starting-point file for future children */
2134                 write_nondefault_variables(PGC_SIGHUP);
2135 #endif
2136         }
2137
2138         PG_SETMASK(&UnBlockSig);
2139
2140         errno = save_errno;
2141 }
2142
2143
2144 /*
2145  * pmdie -- signal handler for processing various postmaster signals.
2146  */
2147 static void
2148 pmdie(SIGNAL_ARGS)
2149 {
2150         int                     save_errno = errno;
2151
2152         PG_SETMASK(&BlockSig);
2153
2154         ereport(DEBUG2,
2155                         (errmsg_internal("postmaster received signal %d",
2156                                                          postgres_signal_arg)));
2157
2158         switch (postgres_signal_arg)
2159         {
2160                 case SIGTERM:
2161
2162                         /*
2163                          * Smart Shutdown:
2164                          *
2165                          * Wait for children to end their work, then shut down.
2166                          */
2167                         if (Shutdown >= SmartShutdown)
2168                                 break;
2169                         Shutdown = SmartShutdown;
2170                         ereport(LOG,
2171                                         (errmsg("received smart shutdown request")));
2172
2173                         if (pmState == PM_RUN || pmState == PM_RECOVERY ||
2174                                 pmState == PM_HOT_STANDBY || pmState == PM_STARTUP)
2175                         {
2176                                 /* autovacuum workers are told to shut down immediately */
2177                                 SignalAutovacWorkers(SIGTERM);
2178                                 /* and the autovac launcher too */
2179                                 if (AutoVacPID != 0)
2180                                         signal_child(AutoVacPID, SIGTERM);
2181                                 /* and the walwriter too */
2182                                 if (WalWriterPID != 0)
2183                                         signal_child(WalWriterPID, SIGTERM);
2184
2185                                 /*
2186                                  * If we're in recovery, we can't kill the startup process
2187                                  * right away, because at present doing so does not release
2188                                  * its locks.  We might want to change this in a future
2189                                  * release.  For the time being, the PM_WAIT_READONLY state
2190                                  * indicates that we're waiting for the regular (read only)
2191                                  * backends to die off; once they do, we'll kill the startup
2192                                  * and walreceiver processes.
2193                                  */
2194                                 pmState = (pmState == PM_RUN) ?
2195                                         PM_WAIT_BACKUP : PM_WAIT_READONLY;
2196                         }
2197
2198                         /*
2199                          * Now wait for online backup mode to end and backends to exit. If
2200                          * that is already the case, PostmasterStateMachine will take the
2201                          * next step.
2202                          */
2203                         PostmasterStateMachine();
2204                         break;
2205
2206                 case SIGINT:
2207
2208                         /*
2209                          * Fast Shutdown:
2210                          *
2211                          * Abort all children with SIGTERM (rollback active transactions
2212                          * and exit) and shut down when they are gone.
2213                          */
2214                         if (Shutdown >= FastShutdown)
2215                                 break;
2216                         Shutdown = FastShutdown;
2217                         ereport(LOG,
2218                                         (errmsg("received fast shutdown request")));
2219
2220                         if (StartupPID != 0)
2221                                 signal_child(StartupPID, SIGTERM);
2222                         if (WalReceiverPID != 0)
2223                                 signal_child(WalReceiverPID, SIGTERM);
2224                         if (pmState == PM_RECOVERY)
2225                         {
2226                                 /* only bgwriter is active in this state */
2227                                 pmState = PM_WAIT_BACKENDS;
2228                         }
2229                         else if (pmState == PM_RUN ||
2230                                          pmState == PM_WAIT_BACKUP ||
2231                                          pmState == PM_WAIT_READONLY ||
2232                                          pmState == PM_WAIT_BACKENDS ||
2233                                          pmState == PM_HOT_STANDBY)
2234                         {
2235                                 ereport(LOG,
2236                                                 (errmsg("aborting any active transactions")));
2237                                 /* shut down all backends and autovac workers */
2238                                 SignalSomeChildren(SIGTERM,
2239                                                                  BACKEND_TYPE_NORMAL | BACKEND_TYPE_AUTOVAC);
2240                                 /* and the autovac launcher too */
2241                                 if (AutoVacPID != 0)
2242                                         signal_child(AutoVacPID, SIGTERM);
2243                                 /* and the walwriter too */
2244                                 if (WalWriterPID != 0)
2245                                         signal_child(WalWriterPID, SIGTERM);
2246                                 pmState = PM_WAIT_BACKENDS;
2247                         }
2248
2249                         /*
2250                          * Now wait for backends to exit.  If there are none,
2251                          * PostmasterStateMachine will take the next step.
2252                          */
2253                         PostmasterStateMachine();
2254                         break;
2255
2256                 case SIGQUIT:
2257
2258                         /*
2259                          * Immediate Shutdown:
2260                          *
2261                          * abort all children with SIGQUIT and exit without attempt to
2262                          * properly shut down data base system.
2263                          */
2264                         ereport(LOG,
2265                                         (errmsg("received immediate shutdown request")));
2266                         SignalChildren(SIGQUIT);
2267                         if (StartupPID != 0)
2268                                 signal_child(StartupPID, SIGQUIT);
2269                         if (BgWriterPID != 0)
2270                                 signal_child(BgWriterPID, SIGQUIT);
2271                         if (WalWriterPID != 0)
2272                                 signal_child(WalWriterPID, SIGQUIT);
2273                         if (WalReceiverPID != 0)
2274                                 signal_child(WalReceiverPID, SIGQUIT);
2275                         if (AutoVacPID != 0)
2276                                 signal_child(AutoVacPID, SIGQUIT);
2277                         if (PgArchPID != 0)
2278                                 signal_child(PgArchPID, SIGQUIT);
2279                         if (PgStatPID != 0)
2280                                 signal_child(PgStatPID, SIGQUIT);
2281                         ExitPostmaster(0);
2282                         break;
2283         }
2284
2285         PG_SETMASK(&UnBlockSig);
2286
2287         errno = save_errno;
2288 }
2289
2290 /*
2291  * Reaper -- signal handler to cleanup after a child process dies.
2292  */
2293 static void
2294 reaper(SIGNAL_ARGS)
2295 {
2296         int                     save_errno = errno;
2297         int                     pid;                    /* process id of dead child process */
2298         int                     exitstatus;             /* its exit status */
2299
2300         /* These macros hide platform variations in getting child status */
2301 #ifdef HAVE_WAITPID
2302         int                     status;                 /* child exit status */
2303
2304 #define LOOPTEST()              ((pid = waitpid(-1, &status, WNOHANG)) > 0)
2305 #define LOOPHEADER()    (exitstatus = status)
2306 #else                                                   /* !HAVE_WAITPID */
2307 #ifndef WIN32
2308         union wait      status;                 /* child exit status */
2309
2310 #define LOOPTEST()              ((pid = wait3(&status, WNOHANG, NULL)) > 0)
2311 #define LOOPHEADER()    (exitstatus = status.w_status)
2312 #else                                                   /* WIN32 */
2313 #define LOOPTEST()              ((pid = win32_waitpid(&exitstatus)) > 0)
2314 #define LOOPHEADER()
2315 #endif   /* WIN32 */
2316 #endif   /* HAVE_WAITPID */
2317
2318         PG_SETMASK(&BlockSig);
2319
2320         ereport(DEBUG4,
2321                         (errmsg_internal("reaping dead processes")));
2322
2323         while (LOOPTEST())
2324         {
2325                 LOOPHEADER();
2326
2327                 /*
2328                  * Check if this child was a startup process.
2329                  */
2330                 if (pid == StartupPID)
2331                 {
2332                         StartupPID = 0;
2333
2334                         /*
2335                          * Unexpected exit of startup process (including FATAL exit)
2336                          * during PM_STARTUP is treated as catastrophic. There are no
2337                          * other processes running yet, so we can just exit.
2338                          */
2339                         if (pmState == PM_STARTUP && !EXIT_STATUS_0(exitstatus))
2340                         {
2341                                 LogChildExit(LOG, _("startup process"),
2342                                                          pid, exitstatus);
2343                                 ereport(LOG,
2344                                 (errmsg("aborting startup due to startup process failure")));
2345                                 ExitPostmaster(1);
2346                         }
2347
2348                         /*
2349                          * Startup process exited in response to a shutdown request (or it
2350                          * completed normally regardless of the shutdown request).
2351                          */
2352                         if (Shutdown > NoShutdown &&
2353                                 (EXIT_STATUS_0(exitstatus) || EXIT_STATUS_1(exitstatus)))
2354                         {
2355                                 pmState = PM_WAIT_BACKENDS;
2356                                 /* PostmasterStateMachine logic does the rest */
2357                                 continue;
2358                         }
2359
2360                         /*
2361                          * Any unexpected exit (including FATAL exit) of the startup
2362                          * process is treated as a crash, except that we don't want to
2363                          * reinitialize.
2364                          */
2365                         if (!EXIT_STATUS_0(exitstatus))
2366                         {
2367                                 RecoveryError = true;
2368                                 HandleChildCrash(pid, exitstatus,
2369                                                                  _("startup process"));
2370                                 continue;
2371                         }
2372
2373                         /*
2374                          * Startup succeeded, commence normal operations
2375                          */
2376                         FatalError = false;
2377                         ReachedNormalRunning = true;
2378                         pmState = PM_RUN;
2379
2380                         /*
2381                          * Crank up the background writer, if we didn't do that already
2382                          * when we entered consistent recovery state.  It doesn't matter
2383                          * if this fails, we'll just try again later.
2384                          */
2385                         if (BgWriterPID == 0)
2386                                 BgWriterPID = StartBackgroundWriter();
2387
2388                         /*
2389                          * Likewise, start other special children as needed.  In a restart
2390                          * situation, some of them may be alive already.
2391                          */
2392                         if (WalWriterPID == 0)
2393                                 WalWriterPID = StartWalWriter();
2394                         if (AutoVacuumingActive() && AutoVacPID == 0)
2395                                 AutoVacPID = StartAutoVacLauncher();
2396                         if (XLogArchivingActive() && PgArchPID == 0)
2397                                 PgArchPID = pgarch_start();
2398                         if (PgStatPID == 0)
2399                                 PgStatPID = pgstat_start();
2400
2401                         /* at this point we are really open for business */
2402                         ereport(LOG,
2403                                  (errmsg("database system is ready to accept connections")));
2404
2405                         continue;
2406                 }
2407
2408                 /*
2409                  * Was it the bgwriter?
2410                  */
2411                 if (pid == BgWriterPID)
2412                 {
2413                         BgWriterPID = 0;
2414                         if (EXIT_STATUS_0(exitstatus) && pmState == PM_SHUTDOWN)
2415                         {
2416                                 /*
2417                                  * OK, we saw normal exit of the bgwriter after it's been told
2418                                  * to shut down.  We expect that it wrote a shutdown
2419                                  * checkpoint.  (If for some reason it didn't, recovery will
2420                                  * occur on next postmaster start.)
2421                                  *
2422                                  * At this point we should have no normal backend children
2423                                  * left (else we'd not be in PM_SHUTDOWN state) but we might
2424                                  * have dead_end children to wait for.
2425                                  *
2426                                  * If we have an archiver subprocess, tell it to do a last
2427                                  * archive cycle and quit. Likewise, if we have walsender
2428                                  * processes, tell them to send any remaining WAL and quit.
2429                                  */
2430                                 Assert(Shutdown > NoShutdown);
2431
2432                                 /* Waken archiver for the last time */
2433                                 if (PgArchPID != 0)
2434                                         signal_child(PgArchPID, SIGUSR2);
2435
2436                                 /*
2437                                  * Waken walsenders for the last time. No regular backends
2438                                  * should be around anymore.
2439                                  */
2440                                 SignalChildren(SIGUSR2);
2441
2442                                 pmState = PM_SHUTDOWN_2;
2443
2444                                 /*
2445                                  * We can also shut down the stats collector now; there's
2446                                  * nothing left for it to do.
2447                                  */
2448                                 if (PgStatPID != 0)
2449                                         signal_child(PgStatPID, SIGQUIT);
2450                         }
2451                         else
2452                         {
2453                                 /*
2454                                  * Any unexpected exit of the bgwriter (including FATAL exit)
2455                                  * is treated as a crash.
2456                                  */
2457                                 HandleChildCrash(pid, exitstatus,
2458                                                                  _("background writer process"));
2459                         }
2460
2461                         continue;
2462                 }
2463
2464                 /*
2465                  * Was it the wal writer?  Normal exit can be ignored; we'll start a
2466                  * new one at the next iteration of the postmaster's main loop, if
2467                  * necessary.  Any other exit condition is treated as a crash.
2468                  */
2469                 if (pid == WalWriterPID)
2470                 {
2471                         WalWriterPID = 0;
2472                         if (!EXIT_STATUS_0(exitstatus))
2473                                 HandleChildCrash(pid, exitstatus,
2474                                                                  _("WAL writer process"));
2475                         continue;
2476                 }
2477
2478                 /*
2479                  * Was it the wal receiver?  If exit status is zero (normal) or one
2480                  * (FATAL exit), we assume everything is all right just like normal
2481                  * backends.
2482                  */
2483                 if (pid == WalReceiverPID)
2484                 {
2485                         WalReceiverPID = 0;
2486                         if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
2487                                 HandleChildCrash(pid, exitstatus,
2488                                                                  _("WAL receiver process"));
2489                         continue;
2490                 }
2491
2492                 /*
2493                  * Was it the autovacuum launcher?      Normal exit can be ignored; we'll
2494                  * start a new one at the next iteration of the postmaster's main
2495                  * loop, if necessary.  Any other exit condition is treated as a
2496                  * crash.
2497                  */
2498                 if (pid == AutoVacPID)
2499                 {
2500                         AutoVacPID = 0;
2501                         if (!EXIT_STATUS_0(exitstatus))
2502                                 HandleChildCrash(pid, exitstatus,
2503                                                                  _("autovacuum launcher process"));
2504                         continue;
2505                 }
2506
2507                 /*
2508                  * Was it the archiver?  If so, just try to start a new one; no need
2509                  * to force reset of the rest of the system.  (If fail, we'll try
2510                  * again in future cycles of the main loop.).  Unless we were waiting
2511                  * for it to shut down; don't restart it in that case, and and
2512                  * PostmasterStateMachine() will advance to the next shutdown step.
2513                  */
2514                 if (pid == PgArchPID)
2515                 {
2516                         PgArchPID = 0;
2517                         if (!EXIT_STATUS_0(exitstatus))
2518                                 LogChildExit(LOG, _("archiver process"),
2519                                                          pid, exitstatus);
2520                         if (XLogArchivingActive() && pmState == PM_RUN)
2521                                 PgArchPID = pgarch_start();
2522                         continue;
2523                 }
2524
2525                 /*
2526                  * Was it the statistics collector?  If so, just try to start a new
2527                  * one; no need to force reset of the rest of the system.  (If fail,
2528                  * we'll try again in future cycles of the main loop.)
2529                  */
2530                 if (pid == PgStatPID)
2531                 {
2532                         PgStatPID = 0;
2533                         if (!EXIT_STATUS_0(exitstatus))
2534                                 LogChildExit(LOG, _("statistics collector process"),
2535                                                          pid, exitstatus);
2536                         if (pmState == PM_RUN)
2537                                 PgStatPID = pgstat_start();
2538                         continue;
2539                 }
2540
2541                 /* Was it the system logger?  If so, try to start a new one */
2542                 if (pid == SysLoggerPID)
2543                 {
2544                         SysLoggerPID = 0;
2545                         /* for safety's sake, launch new logger *first* */
2546                         SysLoggerPID = SysLogger_Start();
2547                         if (!EXIT_STATUS_0(exitstatus))
2548                                 LogChildExit(LOG, _("system logger process"),
2549                                                          pid, exitstatus);
2550                         continue;
2551                 }
2552
2553                 /*
2554                  * Else do standard backend child cleanup.
2555                  */
2556                 CleanupBackend(pid, exitstatus);
2557         }                                                       /* loop over pending child-death reports */
2558
2559         /*
2560          * After cleaning out the SIGCHLD queue, see if we have any state changes
2561          * or actions to make.
2562          */
2563         PostmasterStateMachine();
2564
2565         /* Done with signal handler */
2566         PG_SETMASK(&UnBlockSig);
2567
2568         errno = save_errno;
2569 }
2570
2571
2572 /*
2573  * CleanupBackend -- cleanup after terminated backend.
2574  *
2575  * Remove all local state associated with backend.
2576  */
2577 static void
2578 CleanupBackend(int pid,
2579                            int exitstatus)      /* child's exit status. */
2580 {
2581         Dlelem     *curr;
2582
2583         LogChildExit(DEBUG2, _("server process"), pid, exitstatus);
2584
2585         /*
2586          * If a backend dies in an ugly way then we must signal all other backends
2587          * to quickdie.  If exit status is zero (normal) or one (FATAL exit), we
2588          * assume everything is all right and proceed to remove the backend from
2589          * the active backend list.
2590          */
2591 #ifdef WIN32
2592         /*
2593          * On win32, also treat ERROR_WAIT_NO_CHILDREN (128) as nonfatal
2594          * case, since that sometimes happens under load when the process fails
2595          * to start properly (long before it starts using shared memory).
2596          * Microsoft reports it is related to mutex failure:
2597          *    http://archives.postgresql.org/pgsql-hackers/2010-09/msg00790.php
2598          */
2599         if (exitstatus == ERROR_WAIT_NO_CHILDREN)
2600         {
2601                 LogChildExit(LOG, _("server process"), pid, exitstatus);
2602                 exitstatus = 0;
2603         }
2604 #endif
2605
2606         if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
2607         {
2608                 HandleChildCrash(pid, exitstatus, _("server process"));
2609                 return;
2610         }
2611
2612         for (curr = DLGetHead(BackendList); curr; curr = DLGetSucc(curr))
2613         {
2614                 Backend    *bp = (Backend *) DLE_VAL(curr);
2615
2616                 if (bp->pid == pid)
2617                 {
2618                         if (!bp->dead_end)
2619                         {
2620                                 if (!ReleasePostmasterChildSlot(bp->child_slot))
2621                                 {
2622                                         /*
2623                                          * Uh-oh, the child failed to clean itself up.  Treat as a
2624                                          * crash after all.
2625                                          */
2626                                         HandleChildCrash(pid, exitstatus, _("server process"));
2627                                         return;
2628                                 }
2629 #ifdef EXEC_BACKEND
2630                                 ShmemBackendArrayRemove(bp);
2631 #endif
2632                         }
2633                         DLRemove(curr);
2634                         free(bp);
2635                         break;
2636                 }
2637         }
2638 }
2639
2640 /*
2641  * HandleChildCrash -- cleanup after failed backend, bgwriter, walwriter,
2642  * or autovacuum.
2643  *
2644  * The objectives here are to clean up our local state about the child
2645  * process, and to signal all other remaining children to quickdie.
2646  */
2647 static void
2648 HandleChildCrash(int pid, int exitstatus, const char *procname)
2649 {
2650         Dlelem     *curr,
2651                            *next;
2652         Backend    *bp;
2653
2654         /*
2655          * Make log entry unless there was a previous crash (if so, nonzero exit
2656          * status is to be expected in SIGQUIT response; don't clutter log)
2657          */
2658         if (!FatalError)
2659         {
2660                 LogChildExit(LOG, procname, pid, exitstatus);
2661                 ereport(LOG,
2662                                 (errmsg("terminating any other active server processes")));
2663         }
2664
2665         /* Process regular backends */
2666         for (curr = DLGetHead(BackendList); curr; curr = next)
2667         {
2668                 next = DLGetSucc(curr);
2669                 bp = (Backend *) DLE_VAL(curr);
2670                 if (bp->pid == pid)
2671                 {
2672                         /*
2673                          * Found entry for freshly-dead backend, so remove it.
2674                          */
2675                         if (!bp->dead_end)
2676                         {
2677                                 (void) ReleasePostmasterChildSlot(bp->child_slot);
2678 #ifdef EXEC_BACKEND
2679                                 ShmemBackendArrayRemove(bp);
2680 #endif
2681                         }
2682                         DLRemove(curr);
2683                         free(bp);
2684                         /* Keep looping so we can signal remaining backends */
2685                 }
2686                 else
2687                 {
2688                         /*
2689                          * This backend is still alive.  Unless we did so already, tell it
2690                          * to commit hara-kiri.
2691                          *
2692                          * SIGQUIT is the special signal that says exit without proc_exit
2693                          * and let the user know what's going on. But if SendStop is set
2694                          * (-s on command line), then we send SIGSTOP instead, so that we
2695                          * can get core dumps from all backends by hand.
2696                          *
2697                          * We could exclude dead_end children here, but at least in the
2698                          * SIGSTOP case it seems better to include them.
2699                          */
2700                         if (!FatalError)
2701                         {
2702                                 ereport(DEBUG2,
2703                                                 (errmsg_internal("sending %s to process %d",
2704                                                                                  (SendStop ? "SIGSTOP" : "SIGQUIT"),
2705                                                                                  (int) bp->pid)));
2706                                 signal_child(bp->pid, (SendStop ? SIGSTOP : SIGQUIT));
2707                         }
2708                 }
2709         }
2710
2711         /* Take care of the startup process too */
2712         if (pid == StartupPID)
2713                 StartupPID = 0;
2714         else if (StartupPID != 0 && !FatalError)
2715         {
2716                 ereport(DEBUG2,
2717                                 (errmsg_internal("sending %s to process %d",
2718                                                                  (SendStop ? "SIGSTOP" : "SIGQUIT"),
2719                                                                  (int) StartupPID)));
2720                 signal_child(StartupPID, (SendStop ? SIGSTOP : SIGQUIT));
2721         }
2722
2723         /* Take care of the bgwriter too */
2724         if (pid == BgWriterPID)
2725                 BgWriterPID = 0;
2726         else if (BgWriterPID != 0 && !FatalError)
2727         {
2728                 ereport(DEBUG2,
2729                                 (errmsg_internal("sending %s to process %d",
2730                                                                  (SendStop ? "SIGSTOP" : "SIGQUIT"),
2731                                                                  (int) BgWriterPID)));
2732                 signal_child(BgWriterPID, (SendStop ? SIGSTOP : SIGQUIT));
2733         }
2734
2735         /* Take care of the walwriter too */
2736         if (pid == WalWriterPID)
2737                 WalWriterPID = 0;
2738         else if (WalWriterPID != 0 && !FatalError)
2739         {
2740                 ereport(DEBUG2,
2741                                 (errmsg_internal("sending %s to process %d",
2742                                                                  (SendStop ? "SIGSTOP" : "SIGQUIT"),
2743                                                                  (int) WalWriterPID)));
2744                 signal_child(WalWriterPID, (SendStop ? SIGSTOP : SIGQUIT));
2745         }
2746
2747         /* Take care of the walreceiver too */
2748         if (pid == WalReceiverPID)
2749                 WalReceiverPID = 0;
2750         else if (WalReceiverPID != 0 && !FatalError)
2751         {
2752                 ereport(DEBUG2,
2753                                 (errmsg_internal("sending %s to process %d",
2754                                                                  (SendStop ? "SIGSTOP" : "SIGQUIT"),
2755                                                                  (int) WalReceiverPID)));
2756                 signal_child(WalReceiverPID, (SendStop ? SIGSTOP : SIGQUIT));
2757         }
2758
2759         /* Take care of the autovacuum launcher too */
2760         if (pid == AutoVacPID)
2761                 AutoVacPID = 0;
2762         else if (AutoVacPID != 0 && !FatalError)
2763         {
2764                 ereport(DEBUG2,
2765                                 (errmsg_internal("sending %s to process %d",
2766                                                                  (SendStop ? "SIGSTOP" : "SIGQUIT"),
2767                                                                  (int) AutoVacPID)));
2768                 signal_child(AutoVacPID, (SendStop ? SIGSTOP : SIGQUIT));
2769         }
2770
2771         /*
2772          * Force a power-cycle of the pgarch process too.  (This isn't absolutely
2773          * necessary, but it seems like a good idea for robustness, and it
2774          * simplifies the state-machine logic in the case where a shutdown request
2775          * arrives during crash processing.)
2776          */
2777         if (PgArchPID != 0 && !FatalError)
2778         {
2779                 ereport(DEBUG2,
2780                                 (errmsg_internal("sending %s to process %d",
2781                                                                  "SIGQUIT",
2782                                                                  (int) PgArchPID)));
2783                 signal_child(PgArchPID, SIGQUIT);
2784         }
2785
2786         /*
2787          * Force a power-cycle of the pgstat process too.  (This isn't absolutely
2788          * necessary, but it seems like a good idea for robustness, and it
2789          * simplifies the state-machine logic in the case where a shutdown request
2790          * arrives during crash processing.)
2791          */
2792         if (PgStatPID != 0 && !FatalError)
2793         {
2794                 ereport(DEBUG2,
2795                                 (errmsg_internal("sending %s to process %d",
2796                                                                  "SIGQUIT",
2797                                                                  (int) PgStatPID)));
2798                 signal_child(PgStatPID, SIGQUIT);
2799                 allow_immediate_pgstat_restart();
2800         }
2801
2802         /* We do NOT restart the syslogger */
2803
2804         FatalError = true;
2805         /* We now transit into a state of waiting for children to die */
2806         if (pmState == PM_RECOVERY ||
2807                 pmState == PM_HOT_STANDBY ||
2808                 pmState == PM_RUN ||
2809                 pmState == PM_WAIT_BACKUP ||
2810                 pmState == PM_WAIT_READONLY ||
2811                 pmState == PM_SHUTDOWN)
2812                 pmState = PM_WAIT_BACKENDS;
2813 }
2814
2815 /*
2816  * Log the death of a child process.
2817  */
2818 static void
2819 LogChildExit(int lev, const char *procname, int pid, int exitstatus)
2820 {
2821         if (WIFEXITED(exitstatus))
2822                 ereport(lev,
2823
2824                 /*------
2825                   translator: %s is a noun phrase describing a child process, such as
2826                   "server process" */
2827                                 (errmsg("%s (PID %d) exited with exit code %d",
2828                                                 procname, pid, WEXITSTATUS(exitstatus))));
2829         else if (WIFSIGNALED(exitstatus))
2830 #if defined(WIN32)
2831                 ereport(lev,
2832
2833                 /*------
2834                   translator: %s is a noun phrase describing a child process, such as
2835                   "server process" */
2836                                 (errmsg("%s (PID %d) was terminated by exception 0x%X",
2837                                                 procname, pid, WTERMSIG(exitstatus)),
2838                                  errhint("See C include file \"ntstatus.h\" for a description of the hexadecimal value.")));
2839 #elif defined(HAVE_DECL_SYS_SIGLIST) && HAVE_DECL_SYS_SIGLIST
2840         ereport(lev,
2841
2842         /*------
2843           translator: %s is a noun phrase describing a child process, such as
2844           "server process" */
2845                         (errmsg("%s (PID %d) was terminated by signal %d: %s",
2846                                         procname, pid, WTERMSIG(exitstatus),
2847                                         WTERMSIG(exitstatus) < NSIG ?
2848                                         sys_siglist[WTERMSIG(exitstatus)] : "(unknown)")));
2849 #else
2850                 ereport(lev,
2851
2852                 /*------
2853                   translator: %s is a noun phrase describing a child process, such as
2854                   "server process" */
2855                                 (errmsg("%s (PID %d) was terminated by signal %d",
2856                                                 procname, pid, WTERMSIG(exitstatus))));
2857 #endif
2858         else
2859                 ereport(lev,
2860
2861                 /*------
2862                   translator: %s is a noun phrase describing a child process, such as
2863                   "server process" */
2864                                 (errmsg("%s (PID %d) exited with unrecognized status %d",
2865                                                 procname, pid, exitstatus)));
2866 }
2867
2868 /*
2869  * Advance the postmaster's state machine and take actions as appropriate
2870  *
2871  * This is common code for pmdie() and reaper(), which receive the signals
2872  * that might mean we need to change state.
2873  */
2874 static void
2875 PostmasterStateMachine(void)
2876 {
2877         if (pmState == PM_WAIT_BACKUP)
2878         {
2879                 /*
2880                  * PM_WAIT_BACKUP state ends when online backup mode is not active.
2881                  */
2882                 if (!BackupInProgress())
2883                         pmState = PM_WAIT_BACKENDS;
2884         }
2885
2886         if (pmState == PM_WAIT_READONLY)
2887         {
2888                 /*
2889                  * PM_WAIT_READONLY state ends when we have no regular backends that
2890                  * have been started during recovery.  We kill the startup and
2891                  * walreceiver processes and transition to PM_WAIT_BACKENDS.  Ideally,
2892                  * we might like to kill these processes first and then wait for
2893                  * backends to die off, but that doesn't work at present because
2894                  * killing the startup process doesn't release its locks.
2895                  */
2896                 if (CountChildren(BACKEND_TYPE_NORMAL) == 0)
2897                 {
2898                         if (StartupPID != 0)
2899                                 signal_child(StartupPID, SIGTERM);
2900                         if (WalReceiverPID != 0)
2901                                 signal_child(WalReceiverPID, SIGTERM);
2902                         pmState = PM_WAIT_BACKENDS;
2903                 }
2904         }
2905
2906         /*
2907          * If we are in a state-machine state that implies waiting for backends to
2908          * exit, see if they're all gone, and change state if so.
2909          */
2910         if (pmState == PM_WAIT_BACKENDS)
2911         {
2912                 /*
2913                  * PM_WAIT_BACKENDS state ends when we have no regular backends
2914                  * (including autovac workers) and no walwriter or autovac launcher.
2915                  * If we are doing crash recovery then we expect the bgwriter to exit
2916                  * too, otherwise not.  The archiver, stats, and syslogger processes
2917                  * are disregarded since they are not connected to shared memory; we
2918                  * also disregard dead_end children here. Walsenders are also
2919                  * disregarded, they will be terminated later after writing the
2920                  * checkpoint record, like the archiver process.
2921                  */
2922                 if (CountChildren(BACKEND_TYPE_NORMAL | BACKEND_TYPE_AUTOVAC) == 0 &&
2923                         StartupPID == 0 &&
2924                         WalReceiverPID == 0 &&
2925                         (BgWriterPID == 0 || !FatalError) &&
2926                         WalWriterPID == 0 &&
2927                         AutoVacPID == 0)
2928                 {
2929                         if (FatalError)
2930                         {
2931                                 /*
2932                                  * Start waiting for dead_end children to die.  This state
2933                                  * change causes ServerLoop to stop creating new ones.
2934                                  */
2935                                 pmState = PM_WAIT_DEAD_END;
2936
2937                                 /*
2938                                  * We already SIGQUIT'd the archiver and stats processes, if
2939                                  * any, when we entered FatalError state.
2940                                  */
2941                         }
2942                         else
2943                         {
2944                                 /*
2945                                  * If we get here, we are proceeding with normal shutdown. All
2946                                  * the regular children are gone, and it's time to tell the
2947                                  * bgwriter to do a shutdown checkpoint.
2948                                  */
2949                                 Assert(Shutdown > NoShutdown);
2950                                 /* Start the bgwriter if not running */
2951                                 if (BgWriterPID == 0)
2952                                         BgWriterPID = StartBackgroundWriter();
2953                                 /* And tell it to shut down */
2954                                 if (BgWriterPID != 0)
2955                                 {
2956                                         signal_child(BgWriterPID, SIGUSR2);
2957                                         pmState = PM_SHUTDOWN;
2958                                 }
2959                                 else
2960                                 {
2961                                         /*
2962                                          * If we failed to fork a bgwriter, just shut down. Any
2963                                          * required cleanup will happen at next restart. We set
2964                                          * FatalError so that an "abnormal shutdown" message gets
2965                                          * logged when we exit.
2966                                          */
2967                                         FatalError = true;
2968                                         pmState = PM_WAIT_DEAD_END;
2969
2970                                         /* Kill the walsenders, archiver and stats collector too */
2971                                         SignalSomeChildren(SIGQUIT, BACKEND_TYPE_ALL);
2972                                         if (PgArchPID != 0)
2973                                                 signal_child(PgArchPID, SIGQUIT);
2974                                         if (PgStatPID != 0)
2975                                                 signal_child(PgStatPID, SIGQUIT);
2976                                 }
2977                         }
2978                 }
2979         }
2980
2981         if (pmState == PM_SHUTDOWN_2)
2982         {
2983                 /*
2984                  * PM_SHUTDOWN_2 state ends when there's no other children than
2985                  * dead_end children left. There shouldn't be any regular backends
2986                  * left by now anyway; what we're really waiting for is walsenders and
2987                  * archiver.
2988                  *
2989                  * Walreceiver should normally be dead by now, but not when a fast
2990                  * shutdown is performed during recovery.
2991                  */
2992                 if (PgArchPID == 0 && CountChildren(BACKEND_TYPE_ALL) == 0 &&
2993                         WalReceiverPID == 0)
2994                 {
2995                         pmState = PM_WAIT_DEAD_END;
2996                 }
2997         }
2998
2999         if (pmState == PM_WAIT_DEAD_END)
3000         {
3001                 /*
3002                  * PM_WAIT_DEAD_END state ends when the BackendList is entirely empty
3003                  * (ie, no dead_end children remain), and the archiver and stats
3004                  * collector are gone too.
3005                  *
3006                  * The reason we wait for those two is to protect them against a new
3007                  * postmaster starting conflicting subprocesses; this isn't an
3008                  * ironclad protection, but it at least helps in the
3009                  * shutdown-and-immediately-restart scenario.  Note that they have
3010                  * already been sent appropriate shutdown signals, either during a
3011                  * normal state transition leading up to PM_WAIT_DEAD_END, or during
3012                  * FatalError processing.
3013                  */
3014                 if (DLGetHead(BackendList) == NULL &&
3015                         PgArchPID == 0 && PgStatPID == 0)
3016                 {
3017                         /* These other guys should be dead already */
3018                         Assert(StartupPID == 0);
3019                         Assert(WalReceiverPID == 0);
3020                         Assert(BgWriterPID == 0);
3021                         Assert(WalWriterPID == 0);
3022                         Assert(AutoVacPID == 0);
3023                         /* syslogger is not considered here */
3024                         pmState = PM_NO_CHILDREN;
3025                 }
3026         }
3027
3028         /*
3029          * If we've been told to shut down, we exit as soon as there are no
3030          * remaining children.  If there was a crash, cleanup will occur at the
3031          * next startup.  (Before PostgreSQL 8.3, we tried to recover from the
3032          * crash before exiting, but that seems unwise if we are quitting because
3033          * we got SIGTERM from init --- there may well not be time for recovery
3034          * before init decides to SIGKILL us.)
3035          *
3036          * Note that the syslogger continues to run.  It will exit when it sees
3037          * EOF on its input pipe, which happens when there are no more upstream
3038          * processes.
3039          */
3040         if (Shutdown > NoShutdown && pmState == PM_NO_CHILDREN)
3041         {
3042                 if (FatalError)
3043                 {
3044                         ereport(LOG, (errmsg("abnormal database system shutdown")));
3045                         ExitPostmaster(1);
3046                 }
3047                 else
3048                 {
3049                         /*
3050                          * Terminate backup mode to avoid recovery after a clean fast
3051                          * shutdown.  Since a backup can only be taken during normal
3052                          * running (and not, for example, while running under Hot Standby)
3053                          * it only makes sense to do this if we reached normal running. If
3054                          * we're still in recovery, the backup file is one we're
3055                          * recovering *from*, and we must keep it around so that recovery
3056                          * restarts from the right place.
3057                          */
3058                         if (ReachedNormalRunning)
3059                                 CancelBackup();
3060
3061                         /* Normal exit from the postmaster is here */
3062                         ExitPostmaster(0);
3063                 }
3064         }
3065
3066         /*
3067          * If recovery failed, or the user does not want an automatic restart after
3068          * backend crashes, wait for all non-syslogger children to exit, and then
3069          * exit postmaster. We don't try to reinitialize when recovery fails,
3070          * because more than likely it will just fail again and we will keep trying
3071          * forever.
3072          */
3073         if (pmState == PM_NO_CHILDREN && (RecoveryError || !restart_after_crash))
3074                 ExitPostmaster(1);
3075
3076         /*
3077          * If we need to recover from a crash, wait for all non-syslogger children
3078          * to exit, then reset shmem and StartupDataBase.
3079          */
3080         if (FatalError && pmState == PM_NO_CHILDREN)
3081         {
3082                 ereport(LOG,
3083                                 (errmsg("all server processes terminated; reinitializing")));
3084
3085                 shmem_exit(1);
3086                 reset_shared(PostPortNumber);
3087
3088                 StartupPID = StartupDataBase();
3089                 Assert(StartupPID != 0);
3090                 pmState = PM_STARTUP;
3091         }
3092 }
3093
3094
3095 /*
3096  * Send a signal to a postmaster child process
3097  *
3098  * On systems that have setsid(), each child process sets itself up as a
3099  * process group leader.  For signals that are generally interpreted in the
3100  * appropriate fashion, we signal the entire process group not just the
3101  * direct child process.  This allows us to, for example, SIGQUIT a blocked
3102  * archive_recovery script, or SIGINT a script being run by a backend via
3103  * system().
3104  *
3105  * There is a race condition for recently-forked children: they might not
3106  * have executed setsid() yet.  So we signal the child directly as well as
3107  * the group.  We assume such a child will handle the signal before trying
3108  * to spawn any grandchild processes.  We also assume that signaling the
3109  * child twice will not cause any problems.
3110  */
3111 static void
3112 signal_child(pid_t pid, int signal)
3113 {
3114         if (kill(pid, signal) < 0)
3115                 elog(DEBUG3, "kill(%ld,%d) failed: %m", (long) pid, signal);
3116 #ifdef HAVE_SETSID
3117         switch (signal)
3118         {
3119                 case SIGINT:
3120                 case SIGTERM:
3121                 case SIGQUIT:
3122                 case SIGSTOP:
3123                         if (kill(-pid, signal) < 0)
3124                                 elog(DEBUG3, "kill(%ld,%d) failed: %m", (long) (-pid), signal);
3125                         break;
3126                 default:
3127                         break;
3128         }
3129 #endif
3130 }
3131
3132 /*
3133  * Send a signal to the targeted children (but NOT special children;
3134  * dead_end children are never signaled, either).
3135  */
3136 static bool
3137 SignalSomeChildren(int signal, int target)
3138 {
3139         Dlelem     *curr;
3140         bool            signaled = false;
3141
3142         for (curr = DLGetHead(BackendList); curr; curr = DLGetSucc(curr))
3143         {
3144                 Backend    *bp = (Backend *) DLE_VAL(curr);
3145
3146                 if (bp->dead_end)
3147                         continue;
3148                 if (!(target & BACKEND_TYPE_NORMAL) && !bp->is_autovacuum)
3149                         continue;
3150                 if (!(target & BACKEND_TYPE_AUTOVAC) && bp->is_autovacuum)
3151                         continue;
3152                 if (!(target & BACKEND_TYPE_WALSND) &&
3153                         IsPostmasterChildWalSender(bp->child_slot))
3154                         continue;
3155
3156                 ereport(DEBUG4,
3157                                 (errmsg_internal("sending signal %d to process %d",
3158                                                                  signal, (int) bp->pid)));
3159                 signal_child(bp->pid, signal);
3160                 signaled = true;
3161         }
3162         return signaled;
3163 }
3164
3165 /*
3166  * BackendStartup -- start backend process
3167  *
3168  * returns: STATUS_ERROR if the fork failed, STATUS_OK otherwise.
3169  *
3170  * Note: if you change this code, also consider StartAutovacuumWorker.
3171  */
3172 static int
3173 BackendStartup(Port *port)
3174 {
3175         Backend    *bn;                         /* for backend cleanup */
3176         pid_t           pid;
3177
3178         /*
3179          * Create backend data structure.  Better before the fork() so we can
3180          * handle failure cleanly.
3181          */
3182         bn = (Backend *) malloc(sizeof(Backend));
3183         if (!bn)
3184         {
3185                 ereport(LOG,
3186                                 (errcode(ERRCODE_OUT_OF_MEMORY),
3187                                  errmsg("out of memory")));
3188                 return STATUS_ERROR;
3189         }
3190
3191         /*
3192          * Compute the cancel key that will be assigned to this backend. The
3193          * backend will have its own copy in the forked-off process' value of
3194          * MyCancelKey, so that it can transmit the key to the frontend.
3195          */
3196         MyCancelKey = PostmasterRandom();
3197         bn->cancel_key = MyCancelKey;
3198
3199         /* Pass down canAcceptConnections state */
3200         port->canAcceptConnections = canAcceptConnections();
3201         bn->dead_end = (port->canAcceptConnections != CAC_OK &&
3202                                         port->canAcceptConnections != CAC_WAITBACKUP);
3203
3204         /*
3205          * Unless it's a dead_end child, assign it a child slot number
3206          */
3207         if (!bn->dead_end)
3208                 bn->child_slot = MyPMChildSlot = AssignPostmasterChildSlot();
3209         else
3210                 bn->child_slot = 0;
3211
3212 #ifdef EXEC_BACKEND
3213         pid = backend_forkexec(port);
3214 #else                                                   /* !EXEC_BACKEND */
3215         pid = fork_process();
3216         if (pid == 0)                           /* child */
3217         {
3218                 free(bn);
3219
3220                 /*
3221                  * Let's clean up ourselves as the postmaster child, and close the
3222                  * postmaster's listen sockets.  (In EXEC_BACKEND case this is all
3223                  * done in SubPostmasterMain.)
3224                  */
3225                 IsUnderPostmaster = true;               /* we are a postmaster subprocess now */
3226
3227                 MyProcPid = getpid();   /* reset MyProcPid */
3228
3229                 MyStartTime = time(NULL);
3230
3231                 /* We don't want the postmaster's proc_exit() handlers */
3232                 on_exit_reset();
3233
3234                 /* Close the postmaster's sockets */
3235                 ClosePostmasterPorts(false);
3236
3237                 /* Perform additional initialization and collect startup packet */
3238                 BackendInitialize(port);
3239
3240                 /* And run the backend */
3241                 proc_exit(BackendRun(port));
3242         }
3243 #endif   /* EXEC_BACKEND */
3244
3245         if (pid < 0)
3246         {
3247                 /* in parent, fork failed */
3248                 int                     save_errno = errno;
3249
3250                 if (!bn->dead_end)
3251                         (void) ReleasePostmasterChildSlot(bn->child_slot);
3252                 free(bn);
3253                 errno = save_errno;
3254                 ereport(LOG,
3255                                 (errmsg("could not fork new process for connection: %m")));
3256                 report_fork_failure_to_client(port, save_errno);
3257                 return STATUS_ERROR;
3258         }
3259
3260         /* in parent, successful fork */
3261         ereport(DEBUG2,
3262                         (errmsg_internal("forked new backend, pid=%d socket=%d",
3263                                                          (int) pid, port->sock)));
3264
3265         /*
3266          * Everything's been successful, it's safe to add this backend to our list
3267          * of backends.
3268          */
3269         bn->pid = pid;
3270         bn->is_autovacuum = false;
3271         DLInitElem(&bn->elem, bn);
3272         DLAddHead(BackendList, &bn->elem);
3273 #ifdef EXEC_BACKEND
3274         if (!bn->dead_end)
3275                 ShmemBackendArrayAdd(bn);
3276 #endif
3277
3278         return STATUS_OK;
3279 }
3280
3281 /*
3282  * Try to report backend fork() failure to client before we close the
3283  * connection.  Since we do not care to risk blocking the postmaster on
3284  * this connection, we set the connection to non-blocking and try only once.
3285  *
3286  * This is grungy special-purpose code; we cannot use backend libpq since
3287  * it's not up and running.
3288  */
3289 static void
3290 report_fork_failure_to_client(Port *port, int errnum)
3291 {
3292         char            buffer[1000];
3293         int                     rc;
3294
3295         /* Format the error message packet (always V2 protocol) */
3296         snprintf(buffer, sizeof(buffer), "E%s%s\n",
3297                          _("could not fork new process for connection: "),
3298                          strerror(errnum));
3299
3300         /* Set port to non-blocking.  Don't do send() if this fails */
3301         if (!pg_set_noblock(port->sock))
3302                 return;
3303
3304         /* We'll retry after EINTR, but ignore all other failures */
3305         do
3306         {
3307                 rc = send(port->sock, buffer, strlen(buffer) + 1, 0);
3308         } while (rc < 0 && errno == EINTR);
3309 }
3310
3311
3312 /*
3313  * BackendInitialize -- initialize an interactive (postmaster-child)
3314  *                              backend process, and collect the client's startup packet.
3315  *
3316  * returns: nothing.  Will not return at all if there's any failure.
3317  *
3318  * Note: this code does not depend on having any access to shared memory.
3319  * In the EXEC_BACKEND case, we are physically attached to shared memory
3320  * but have not yet set up most of our local pointers to shmem structures.
3321  */
3322 static void
3323 BackendInitialize(Port *port)
3324 {
3325         int                     status;
3326         char            remote_host[NI_MAXHOST];
3327         char            remote_port[NI_MAXSERV];
3328         char            remote_ps_data[NI_MAXHOST];
3329
3330         /* Save port etc. for ps status */
3331         MyProcPort = port;
3332
3333         /*
3334          * PreAuthDelay is a debugging aid for investigating problems in the
3335          * authentication cycle: it can be set in postgresql.conf to allow time to
3336          * attach to the newly-forked backend with a debugger.  (See also
3337          * PostAuthDelay, which we allow clients to pass through PGOPTIONS, but it
3338          * is not honored until after authentication.)
3339          */
3340         if (PreAuthDelay > 0)
3341                 pg_usleep(PreAuthDelay * 1000000L);
3342
3343         /* This flag will remain set until InitPostgres finishes authentication */
3344         ClientAuthInProgress = true;    /* limit visibility of log messages */
3345
3346         /* save process start time */
3347         port->SessionStartTime = GetCurrentTimestamp();
3348         MyStartTime = timestamptz_to_time_t(port->SessionStartTime);
3349
3350         /* set these to empty in case they are needed before we set them up */
3351         port->remote_host = "";
3352         port->remote_port = "";
3353
3354         /*
3355          * Initialize libpq and enable reporting of ereport errors to the client.
3356          * Must do this now because authentication uses libpq to send messages.
3357          */
3358         pq_init();                                      /* initialize libpq to talk to client */
3359         whereToSendOutput = DestRemote;         /* now safe to ereport to client */
3360
3361         /*
3362          * If possible, make this process a group leader, so that the postmaster
3363          * can signal any child processes too.  (We do this now on the off chance
3364          * that something might spawn a child process during authentication.)
3365          */
3366 #ifdef HAVE_SETSID
3367         if (setsid() < 0)
3368                 elog(FATAL, "setsid() failed: %m");
3369 #endif
3370
3371         /*
3372          * We arrange for a simple exit(1) if we receive SIGTERM or SIGQUIT or
3373          * timeout while trying to collect the startup packet.  Otherwise the
3374          * postmaster cannot shutdown the database FAST or IMMED cleanly if a
3375          * buggy client fails to send the packet promptly.
3376          */
3377         pqsignal(SIGTERM, startup_die);
3378         pqsignal(SIGQUIT, startup_die);
3379         pqsignal(SIGALRM, startup_die);
3380         PG_SETMASK(&StartupBlockSig);
3381
3382         /*
3383          * Get the remote host name and port for logging and status display.
3384          */
3385         remote_host[0] = '\0';
3386         remote_port[0] = '\0';
3387         if (pg_getnameinfo_all(&port->raddr.addr, port->raddr.salen,
3388                                                    remote_host, sizeof(remote_host),
3389                                                    remote_port, sizeof(remote_port),
3390                                            (log_hostname ? 0 : NI_NUMERICHOST) | NI_NUMERICSERV))
3391         {
3392                 int                     ret = pg_getnameinfo_all(&port->raddr.addr, port->raddr.salen,
3393                                                                                          remote_host, sizeof(remote_host),
3394                                                                                          remote_port, sizeof(remote_port),
3395                                                                                          NI_NUMERICHOST | NI_NUMERICSERV);
3396
3397                 if (ret)
3398                         ereport(WARNING,
3399                                         (errmsg_internal("pg_getnameinfo_all() failed: %s",
3400                                                                          gai_strerror(ret))));
3401         }
3402         if (remote_port[0] == '\0')
3403                 snprintf(remote_ps_data, sizeof(remote_ps_data), "%s", remote_host);
3404         else
3405                 snprintf(remote_ps_data, sizeof(remote_ps_data), "%s(%s)", remote_host, remote_port);
3406
3407         if (Log_connections)
3408         {
3409                 if (remote_port[0])
3410                         ereport(LOG,
3411                                         (errmsg("connection received: host=%s port=%s",
3412                                                         remote_host,
3413                                                         remote_port)));
3414                 else
3415                         ereport(LOG,
3416                                         (errmsg("connection received: host=%s",
3417                                                         remote_host)));
3418         }
3419
3420         /*
3421          * save remote_host and remote_port in port structure
3422          */
3423         port->remote_host = strdup(remote_host);
3424         port->remote_port = strdup(remote_port);
3425         if (log_hostname)
3426                 port->remote_hostname = port->remote_host;
3427
3428         /*
3429          * Ready to begin client interaction.  We will give up and exit(1) after a
3430          * time delay, so that a broken client can't hog a connection
3431          * indefinitely.  PreAuthDelay and any DNS interactions above don't count
3432          * against the time limit.
3433          */
3434         if (!enable_sig_alarm(AuthenticationTimeout * 1000, false))
3435                 elog(FATAL, "could not set timer for startup packet timeout");
3436
3437         /*
3438          * Receive the startup packet (which might turn out to be a cancel request
3439          * packet).
3440          */
3441         status = ProcessStartupPacket(port, false);
3442
3443         /*
3444          * Stop here if it was bad or a cancel packet.  ProcessStartupPacket
3445          * already did any appropriate error reporting.
3446          */
3447         if (status != STATUS_OK)
3448                 proc_exit(0);
3449
3450         /*
3451          * Now that we have the user and database name, we can set the process
3452          * title for ps.  It's good to do this as early as possible in startup.
3453          *
3454          * For a walsender, the ps display is set in the following form:
3455          *
3456          * postgres: wal sender process <user> <host> <activity>
3457          *
3458          * To achieve that, we pass "wal sender process" as username and username
3459          * as dbname to init_ps_display(). XXX: should add a new variant of
3460          * init_ps_display() to avoid abusing the parameters like this.
3461          */
3462         if (am_walsender)
3463                 init_ps_display("wal sender process", port->user_name, remote_ps_data,
3464                                                 update_process_title ? "authentication" : "");
3465         else
3466                 init_ps_display(port->user_name, port->database_name, remote_ps_data,
3467                                                 update_process_title ? "authentication" : "");
3468
3469         /*
3470          * Disable the timeout, and prevent SIGTERM/SIGQUIT again.
3471          */
3472         if (!disable_sig_alarm(false))
3473                 elog(FATAL, "could not disable timer for startup packet timeout");
3474         PG_SETMASK(&BlockSig);
3475 }
3476
3477
3478 /*
3479  * BackendRun -- set up the backend's argument list and invoke PostgresMain()
3480  *
3481  * returns:
3482  *              Shouldn't return at all.
3483  *              If PostgresMain() fails, return status.
3484  */
3485 static int
3486 BackendRun(Port *port)
3487 {
3488         char      **av;
3489         int                     maxac;
3490         int                     ac;
3491         long            secs;
3492         int                     usecs;
3493         int                     i;
3494
3495         /*
3496          * Don't want backend to be able to see the postmaster random number
3497          * generator state.  We have to clobber the static random_seed *and* start
3498          * a new random sequence in the random() library function.
3499          */
3500         random_seed = 0;
3501         random_start_time.tv_usec = 0;
3502         /* slightly hacky way to get integer microseconds part of timestamptz */
3503         TimestampDifference(0, port->SessionStartTime, &secs, &usecs);
3504         srandom((unsigned int) (MyProcPid ^ usecs));
3505
3506         /*
3507          * Now, build the argv vector that will be given to PostgresMain.
3508          *
3509          * The maximum possible number of commandline arguments that could come
3510          * from ExtraOptions is (strlen(ExtraOptions) + 1) / 2; see
3511          * pg_split_opts().
3512          */
3513         maxac = 5;                                      /* for fixed args supplied below */
3514         maxac += (strlen(ExtraOptions) + 1) / 2;
3515
3516         av = (char **) MemoryContextAlloc(TopMemoryContext,
3517                                                                           maxac * sizeof(char *));
3518         ac = 0;
3519
3520         av[ac++] = "postgres";
3521
3522         /*
3523          * Pass any backend switches specified with -o on the postmaster's own
3524          * command line.  We assume these are secure.  (It's OK to mangle
3525          * ExtraOptions now, since we're safely inside a subprocess.)
3526          */
3527         pg_split_opts(av, &ac, ExtraOptions);
3528
3529         /*
3530          * Tell the backend which database to use.
3531          */
3532         av[ac++] = port->database_name;
3533
3534         av[ac] = NULL;
3535
3536         Assert(ac < maxac);
3537
3538         /*
3539          * Debug: print arguments being passed to backend
3540          */
3541         ereport(DEBUG3,
3542                         (errmsg_internal("%s child[%d]: starting with (",
3543                                                          progname, (int) getpid())));
3544         for (i = 0; i < ac; ++i)
3545                 ereport(DEBUG3,
3546                                 (errmsg_internal("\t%s", av[i])));
3547         ereport(DEBUG3,
3548                         (errmsg_internal(")")));
3549
3550         /*
3551          * Make sure we aren't in PostmasterContext anymore.  (We can't delete it
3552          * just yet, though, because InitPostgres will need the HBA data.)
3553          */
3554         MemoryContextSwitchTo(TopMemoryContext);
3555
3556         return (PostgresMain(ac, av, port->user_name));
3557 }
3558
3559
3560 #ifdef EXEC_BACKEND
3561
3562 /*
3563  * postmaster_forkexec -- fork and exec a postmaster subprocess
3564  *
3565  * The caller must have set up the argv array already, except for argv[2]
3566  * which will be filled with the name of the temp variable file.
3567  *
3568  * Returns the child process PID, or -1 on fork failure (a suitable error
3569  * message has been logged on failure).
3570  *
3571  * All uses of this routine will dispatch to SubPostmasterMain in the
3572  * child process.
3573  */
3574 pid_t
3575 postmaster_forkexec(int argc, char *argv[])
3576 {
3577         Port            port;
3578
3579         /* This entry point passes dummy values for the Port variables */
3580         memset(&port, 0, sizeof(port));
3581         return internal_forkexec(argc, argv, &port);
3582 }
3583
3584 /*
3585  * backend_forkexec -- fork/exec off a backend process
3586  *
3587  * Some operating systems (WIN32) don't have fork() so we have to simulate
3588  * it by storing parameters that need to be passed to the child and
3589  * then create a new child process.
3590  *
3591  * returns the pid of the fork/exec'd process, or -1 on failure
3592  */
3593 static pid_t
3594 backend_forkexec(Port *port)
3595 {
3596         char       *av[4];
3597         int                     ac = 0;
3598
3599         av[ac++] = "postgres";
3600         av[ac++] = "--forkbackend";
3601         av[ac++] = NULL;                        /* filled in by internal_forkexec */
3602
3603         av[ac] = NULL;
3604         Assert(ac < lengthof(av));
3605
3606         return internal_forkexec(ac, av, port);
3607 }
3608
3609 #ifndef WIN32
3610
3611 /*
3612  * internal_forkexec non-win32 implementation
3613  *
3614  * - writes out backend variables to the parameter file
3615  * - fork():s, and then exec():s the child process
3616  */
3617 static pid_t
3618 internal_forkexec(int argc, char *argv[], Port *port)
3619 {
3620         static unsigned long tmpBackendFileNum = 0;
3621         pid_t           pid;
3622         char            tmpfilename[MAXPGPATH];
3623         BackendParameters param;
3624         FILE       *fp;
3625
3626         if (!save_backend_variables(&param, port))
3627                 return -1;                              /* log made by save_backend_variables */
3628
3629         /* Calculate name for temp file */
3630         snprintf(tmpfilename, MAXPGPATH, "%s/%s.backend_var.%d.%lu",
3631                          PG_TEMP_FILES_DIR, PG_TEMP_FILE_PREFIX,
3632                          MyProcPid, ++tmpBackendFileNum);
3633
3634         /* Open file */
3635         fp = AllocateFile(tmpfilename, PG_BINARY_W);
3636         if (!fp)
3637         {
3638                 /* As in OpenTemporaryFile, try to make the temp-file directory */
3639                 mkdir(PG_TEMP_FILES_DIR, S_IRWXU);
3640
3641                 fp = AllocateFile(tmpfilename, PG_BINARY_W);
3642                 if (!fp)
3643                 {
3644                         ereport(LOG,
3645                                         (errcode_for_file_access(),
3646                                          errmsg("could not create file \"%s\": %m",
3647                                                         tmpfilename)));
3648                         return -1;
3649                 }
3650         }
3651
3652         if (fwrite(&param, sizeof(param), 1, fp) != 1)
3653         {
3654                 ereport(LOG,
3655                                 (errcode_for_file_access(),
3656                                  errmsg("could not write to file \"%s\": %m", tmpfilename)));
3657                 FreeFile(fp);
3658                 return -1;
3659         }
3660
3661         /* Release file */
3662         if (FreeFile(fp))
3663         {
3664                 ereport(LOG,
3665                                 (errcode_for_file_access(),
3666                                  errmsg("could not write to file \"%s\": %m", tmpfilename)));
3667                 return -1;
3668         }
3669
3670         /* Make sure caller set up argv properly */
3671         Assert(argc >= 3);
3672         Assert(argv[argc] == NULL);
3673         Assert(strncmp(argv[1], "--fork", 6) == 0);
3674         Assert(argv[2] == NULL);
3675
3676         /* Insert temp file name after --fork argument */
3677         argv[2] = tmpfilename;
3678
3679         /* Fire off execv in child */
3680         if ((pid = fork_process()) == 0)
3681         {
3682                 if (execv(postgres_exec_path, argv) < 0)
3683                 {
3684                         ereport(LOG,
3685                                         (errmsg("could not execute server process \"%s\": %m",
3686                                                         postgres_exec_path)));
3687                         /* We're already in the child process here, can't return */
3688                         exit(1);
3689                 }
3690         }
3691
3692         return pid;                                     /* Parent returns pid, or -1 on fork failure */
3693 }
3694 #else                                                   /* WIN32 */
3695
3696 /*
3697  * internal_forkexec win32 implementation
3698  *
3699  * - starts backend using CreateProcess(), in suspended state
3700  * - writes out backend variables to the parameter file
3701  *      - during this, duplicates handles and sockets required for
3702  *        inheritance into the new process
3703  * - resumes execution of the new process once the backend parameter
3704  *       file is complete.
3705  */
3706 static pid_t
3707 internal_forkexec(int argc, char *argv[], Port *port)
3708 {
3709         STARTUPINFO si;
3710         PROCESS_INFORMATION pi;
3711         int                     i;
3712         int                     j;
3713         char            cmdLine[MAXPGPATH * 2];
3714         HANDLE          paramHandle;
3715         BackendParameters *param;
3716         SECURITY_ATTRIBUTES sa;
3717         char            paramHandleStr[32];
3718         win32_deadchild_waitinfo *childinfo;
3719
3720         /* Make sure caller set up argv properly */
3721         Assert(argc >= 3);
3722         Assert(argv[argc] == NULL);
3723         Assert(strncmp(argv[1], "--fork", 6) == 0);
3724         Assert(argv[2] == NULL);
3725
3726         /* Set up shared memory for parameter passing */
3727         ZeroMemory(&sa, sizeof(sa));
3728         sa.nLength = sizeof(sa);
3729         sa.bInheritHandle = TRUE;
3730         paramHandle = CreateFileMapping(INVALID_HANDLE_VALUE,
3731                                                                         &sa,
3732                                                                         PAGE_READWRITE,
3733                                                                         0,
3734                                                                         sizeof(BackendParameters),
3735                                                                         NULL);
3736         if (paramHandle == INVALID_HANDLE_VALUE)
3737         {
3738                 elog(LOG, "could not create backend parameter file mapping: error code %d",
3739                          (int) GetLastError());
3740                 return -1;
3741         }
3742
3743         param = MapViewOfFile(paramHandle, FILE_MAP_WRITE, 0, 0, sizeof(BackendParameters));
3744         if (!param)
3745         {
3746                 elog(LOG, "could not map backend parameter memory: error code %d",
3747                          (int) GetLastError());
3748                 CloseHandle(paramHandle);
3749                 return -1;
3750         }
3751
3752         /* Insert temp file name after --fork argument */
3753         sprintf(paramHandleStr, "%lu", (DWORD) paramHandle);
3754         argv[2] = paramHandleStr;
3755
3756         /* Format the cmd line */
3757         cmdLine[sizeof(cmdLine) - 1] = '\0';
3758         cmdLine[sizeof(cmdLine) - 2] = '\0';
3759         snprintf(cmdLine, sizeof(cmdLine) - 1, "\"%s\"", postgres_exec_path);
3760         i = 0;
3761         while (argv[++i] != NULL)
3762         {
3763                 j = strlen(cmdLine);
3764                 snprintf(cmdLine + j, sizeof(cmdLine) - 1 - j, " \"%s\"", argv[i]);
3765         }
3766         if (cmdLine[sizeof(cmdLine) - 2] != '\0')
3767         {
3768                 elog(LOG, "subprocess command line too long");
3769                 return -1;
3770         }
3771
3772         memset(&pi, 0, sizeof(pi));
3773         memset(&si, 0, sizeof(si));
3774         si.cb = sizeof(si);
3775
3776         /*
3777          * Create the subprocess in a suspended state. This will be resumed later,
3778          * once we have written out the parameter file.
3779          */
3780         if (!CreateProcess(NULL, cmdLine, NULL, NULL, TRUE, CREATE_SUSPENDED,
3781                                            NULL, NULL, &si, &pi))
3782         {
3783                 elog(LOG, "CreateProcess call failed: %m (error code %d)",
3784                          (int) GetLastError());
3785                 return -1;
3786         }
3787
3788         if (!save_backend_variables(param, port, pi.hProcess, pi.dwProcessId))
3789         {
3790                 /*
3791                  * log made by save_backend_variables, but we have to clean up the
3792                  * mess with the half-started process
3793                  */
3794                 if (!TerminateProcess(pi.hProcess, 255))
3795                         ereport(LOG,
3796                                         (errmsg_internal("could not terminate unstarted process: error code %d",
3797                                                                          (int) GetLastError())));
3798                 CloseHandle(pi.hProcess);
3799                 CloseHandle(pi.hThread);
3800                 return -1;                              /* log made by save_backend_variables */
3801         }
3802
3803         /* Drop the parameter shared memory that is now inherited to the backend */
3804         if (!UnmapViewOfFile(param))
3805                 elog(LOG, "could not unmap view of backend parameter file: error code %d",
3806                          (int) GetLastError());
3807         if (!CloseHandle(paramHandle))
3808                 elog(LOG, "could not close handle to backend parameter file: error code %d",
3809                          (int) GetLastError());
3810
3811         /*
3812          * Reserve the memory region used by our main shared memory segment before
3813          * we resume the child process.
3814          */
3815         if (!pgwin32_ReserveSharedMemoryRegion(pi.hProcess))
3816         {
3817                 /*
3818                  * Failed to reserve the memory, so terminate the newly created
3819                  * process and give up.
3820                  */
3821                 if (!TerminateProcess(pi.hProcess, 255))
3822                         ereport(LOG,
3823                                         (errmsg_internal("could not terminate process that failed to reserve memory: error code %d",
3824                                                                          (int) GetLastError())));
3825                 CloseHandle(pi.hProcess);
3826                 CloseHandle(pi.hThread);
3827                 return -1;                              /* logging done made by
3828                                                                  * pgwin32_ReserveSharedMemoryRegion() */
3829         }
3830
3831         /*
3832          * Now that the backend variables are written out, we start the child
3833          * thread so it can start initializing while we set up the rest of the
3834          * parent state.
3835          */
3836         if (ResumeThread(pi.hThread) == -1)
3837         {
3838                 if (!TerminateProcess(pi.hProcess, 255))
3839                 {
3840                         ereport(LOG,
3841                                         (errmsg_internal("could not terminate unstartable process: error code %d",
3842                                                                          (int) GetLastError())));
3843                         CloseHandle(pi.hProcess);
3844                         CloseHandle(pi.hThread);
3845                         return -1;
3846                 }
3847                 CloseHandle(pi.hProcess);
3848                 CloseHandle(pi.hThread);
3849                 ereport(LOG,
3850                                 (errmsg_internal("could not resume thread of unstarted process: error code %d",
3851                                                                  (int) GetLastError())));
3852                 return -1;
3853         }
3854
3855         /*
3856          * Queue a waiter for to signal when this child dies. The wait will be
3857          * handled automatically by an operating system thread pool.
3858          *
3859          * Note: use malloc instead of palloc, since it needs to be thread-safe.
3860          * Struct will be free():d from the callback function that runs on a
3861          * different thread.
3862          */
3863         childinfo = malloc(sizeof(win32_deadchild_waitinfo));
3864         if (!childinfo)
3865                 ereport(FATAL,
3866                                 (errcode(ERRCODE_OUT_OF_MEMORY),
3867                                  errmsg("out of memory")));
3868
3869         childinfo->procHandle = pi.hProcess;
3870         childinfo->procId = pi.dwProcessId;
3871
3872         if (!RegisterWaitForSingleObject(&childinfo->waitHandle,
3873                                                                          pi.hProcess,
3874                                                                          pgwin32_deadchild_callback,
3875                                                                          childinfo,
3876                                                                          INFINITE,
3877                                                                 WT_EXECUTEONLYONCE | WT_EXECUTEINWAITTHREAD))
3878                 ereport(FATAL,
3879                 (errmsg_internal("could not register process for wait: error code %d",
3880                                                  (int) GetLastError())));
3881
3882         /* Don't close pi.hProcess here - the wait thread needs access to it */
3883
3884         CloseHandle(pi.hThread);
3885
3886         return pi.dwProcessId;
3887 }
3888 #endif   /* WIN32 */
3889
3890
3891 /*
3892  * SubPostmasterMain -- Get the fork/exec'd process into a state equivalent
3893  *                      to what it would be if we'd simply forked on Unix, and then
3894  *                      dispatch to the appropriate place.
3895  *
3896  * The first two command line arguments are expected to be "--forkFOO"
3897  * (where FOO indicates which postmaster child we are to become), and
3898  * the name of a variables file that we can read to load data that would
3899  * have been inherited by fork() on Unix.  Remaining arguments go to the
3900  * subprocess FooMain() routine.
3901  */
3902 int
3903 SubPostmasterMain(int argc, char *argv[])
3904 {
3905         Port            port;
3906
3907         /* Do this sooner rather than later... */
3908         IsUnderPostmaster = true;       /* we are a postmaster subprocess now */
3909
3910         MyProcPid = getpid();           /* reset MyProcPid */
3911
3912         MyStartTime = time(NULL);
3913
3914         /*
3915          * make sure stderr is in binary mode before anything can possibly be
3916          * written to it, in case it's actually the syslogger pipe, so the pipe
3917          * chunking protocol isn't disturbed. Non-logpipe data gets translated on
3918          * redirection (e.g. via pg_ctl -l) anyway.
3919          */
3920 #ifdef WIN32
3921         _setmode(fileno(stderr), _O_BINARY);
3922 #endif
3923
3924         /* Lose the postmaster's on-exit routines (really a no-op) */
3925         on_exit_reset();
3926
3927         /* In EXEC_BACKEND case we will not have inherited these settings */
3928         IsPostmasterEnvironment = true;
3929         whereToSendOutput = DestNone;
3930
3931         /* Setup essential subsystems (to ensure elog() behaves sanely) */
3932         MemoryContextInit();
3933         InitializeGUCOptions();
3934
3935         /* Read in the variables file */
3936         memset(&port, 0, sizeof(Port));
3937         read_backend_variables(argv[2], &port);
3938
3939         /*
3940          * Set up memory area for GSS information. Mirrors the code in ConnCreate
3941          * for the non-exec case.
3942          */
3943 #if defined(ENABLE_GSS) || defined(ENABLE_SSPI)
3944         port.gss = (pg_gssinfo *) calloc(1, sizeof(pg_gssinfo));
3945         if (!port.gss)
3946                 ereport(FATAL,
3947                                 (errcode(ERRCODE_OUT_OF_MEMORY),
3948                                  errmsg("out of memory")));
3949 #endif
3950
3951         /* Check we got appropriate args */
3952         if (argc < 3)
3953                 elog(FATAL, "invalid subpostmaster invocation");
3954
3955         /*
3956          * If appropriate, physically re-attach to shared memory segment. We want
3957          * to do this before going any further to ensure that we can attach at the
3958          * same address the postmaster used.
3959          */
3960         if (strcmp(argv[1], "--forkbackend") == 0 ||
3961                 strcmp(argv[1], "--forkavlauncher") == 0 ||
3962                 strcmp(argv[1], "--forkavworker") == 0 ||
3963                 strcmp(argv[1], "--forkboot") == 0)
3964                 PGSharedMemoryReAttach();
3965
3966         /* autovacuum needs this set before calling InitProcess */
3967         if (strcmp(argv[1], "--forkavlauncher") == 0)
3968                 AutovacuumLauncherIAm();
3969         if (strcmp(argv[1], "--forkavworker") == 0)
3970                 AutovacuumWorkerIAm();
3971
3972         /*
3973          * Start our win32 signal implementation. This has to be done after we
3974          * read the backend variables, because we need to pick up the signal pipe
3975          * from the parent process.
3976          */
3977 #ifdef WIN32
3978         pgwin32_signal_initialize();
3979 #endif
3980
3981         /* In EXEC_BACKEND case we will not have inherited these settings */
3982         pqinitmask();
3983         PG_SETMASK(&BlockSig);
3984
3985         /* Read in remaining GUC variables */
3986         read_nondefault_variables();
3987
3988         /*
3989          * Reload any libraries that were preloaded by the postmaster.  Since we
3990          * exec'd this process, those libraries didn't come along with us; but we
3991          * should load them into all child processes to be consistent with the
3992          * non-EXEC_BACKEND behavior.
3993          */
3994         process_shared_preload_libraries();
3995
3996         /* Run backend or appropriate child */
3997         if (strcmp(argv[1], "--forkbackend") == 0)
3998         {
3999                 Assert(argc == 3);              /* shouldn't be any more args */
4000
4001                 /* Close the postmaster's sockets */
4002                 ClosePostmasterPorts(false);
4003
4004                 /*
4005                  * Need to reinitialize the SSL library in the backend, since the
4006                  * context structures contain function pointers and cannot be passed
4007                  * through the parameter file.
4008                  *
4009                  * XXX should we do this in all child processes?  For the moment it's
4010                  * enough to do it in backend children.
4011                  */
4012 #ifdef USE_SSL
4013                 if (EnableSSL)
4014                         secure_initialize();
4015 #endif
4016
4017                 /*
4018                  * Perform additional initialization and collect startup packet.
4019                  *
4020                  * We want to do this before InitProcess() for a couple of reasons: 1.
4021                  * so that we aren't eating up a PGPROC slot while waiting on the
4022                  * client. 2. so that if InitProcess() fails due to being out of
4023                  * PGPROC slots, we have already initialized libpq and are able to
4024                  * report the error to the client.
4025                  */
4026                 BackendInitialize(&port);
4027
4028                 /* Restore basic shared memory pointers */
4029                 InitShmemAccess(UsedShmemSegAddr);
4030
4031                 /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
4032                 InitProcess();
4033
4034                 /*
4035                  * Attach process to shared data structures.  If testing EXEC_BACKEND
4036                  * on Linux, you must run this as root before starting the postmaster:
4037                  *
4038                  * echo 0 >/proc/sys/kernel/randomize_va_space
4039                  *
4040                  * This prevents a randomized stack base address that causes child
4041                  * shared memory to be at a different address than the parent, making
4042                  * it impossible to attached to shared memory.  Return the value to
4043                  * '1' when finished.
4044                  */
4045                 CreateSharedMemoryAndSemaphores(false, 0);
4046
4047                 /* And run the backend */
4048                 proc_exit(BackendRun(&port));
4049         }
4050         if (strcmp(argv[1], "--forkboot") == 0)
4051         {
4052                 /* Close the postmaster's sockets */
4053                 ClosePostmasterPorts(false);
4054
4055                 /* Restore basic shared memory pointers */
4056                 InitShmemAccess(UsedShmemSegAddr);
4057
4058                 /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
4059                 InitAuxiliaryProcess();
4060
4061                 /* Attach process to shared data structures */
4062                 CreateSharedMemoryAndSemaphores(false, 0);
4063
4064                 AuxiliaryProcessMain(argc - 2, argv + 2);
4065                 proc_exit(0);
4066         }
4067         if (strcmp(argv[1], "--forkavlauncher") == 0)
4068         {
4069                 /* Close the postmaster's sockets */
4070                 ClosePostmasterPorts(false);
4071
4072                 /* Restore basic shared memory pointers */
4073                 InitShmemAccess(UsedShmemSegAddr);
4074
4075                 /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
4076                 InitProcess();
4077
4078                 /* Attach process to shared data structures */
4079                 CreateSharedMemoryAndSemaphores(false, 0);
4080
4081                 AutoVacLauncherMain(argc - 2, argv + 2);
4082                 proc_exit(0);
4083         }
4084         if (strcmp(argv[1], "--forkavworker") == 0)
4085         {
4086                 /* Close the postmaster's sockets */
4087                 ClosePostmasterPorts(false);
4088
4089                 /* Restore basic shared memory pointers */
4090                 InitShmemAccess(UsedShmemSegAddr);
4091
4092                 /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
4093                 InitProcess();
4094
4095                 /* Attach process to shared data structures */
4096                 CreateSharedMemoryAndSemaphores(false, 0);
4097
4098                 AutoVacWorkerMain(argc - 2, argv + 2);
4099                 proc_exit(0);
4100         }
4101         if (strcmp(argv[1], "--forkarch") == 0)
4102         {
4103                 /* Close the postmaster's sockets */
4104                 ClosePostmasterPorts(false);
4105
4106                 /* Do not want to attach to shared memory */
4107
4108                 PgArchiverMain(argc, argv);
4109                 proc_exit(0);
4110         }
4111         if (strcmp(argv[1], "--forkcol") == 0)
4112         {
4113                 /* Close the postmaster's sockets */
4114                 ClosePostmasterPorts(false);
4115
4116                 /* Do not want to attach to shared memory */
4117
4118                 PgstatCollectorMain(argc, argv);
4119                 proc_exit(0);
4120         }
4121         if (strcmp(argv[1], "--forklog") == 0)
4122         {
4123                 /* Close the postmaster's sockets */
4124                 ClosePostmasterPorts(true);
4125
4126                 /* Do not want to attach to shared memory */
4127
4128                 SysLoggerMain(argc, argv);
4129                 proc_exit(0);
4130         }
4131
4132         return 1;                                       /* shouldn't get here */
4133 }
4134 #endif   /* EXEC_BACKEND */
4135
4136
4137 /*
4138  * ExitPostmaster -- cleanup
4139  *
4140  * Do NOT call exit() directly --- always go through here!
4141  */
4142 static void
4143 ExitPostmaster(int status)
4144 {
4145         /* should cleanup shared memory and kill all backends */
4146
4147         /*
4148          * Not sure of the semantics here.      When the Postmaster dies, should the
4149          * backends all be killed? probably not.
4150          *
4151          * MUST         -- vadim 05-10-1999
4152          */
4153
4154         proc_exit(status);
4155 }
4156
4157 /*
4158  * sigusr1_handler - handle signal conditions from child processes
4159  */
4160 static void
4161 sigusr1_handler(SIGNAL_ARGS)
4162 {
4163         int                     save_errno = errno;
4164
4165         PG_SETMASK(&BlockSig);
4166
4167         /*
4168          * RECOVERY_STARTED and BEGIN_HOT_STANDBY signals are ignored in
4169          * unexpected states. If the startup process quickly starts up, completes
4170          * recovery, exits, we might process the death of the startup process
4171          * first. We don't want to go back to recovery in that case.
4172          */
4173         if (CheckPostmasterSignal(PMSIGNAL_RECOVERY_STARTED) &&
4174                 pmState == PM_STARTUP)
4175         {
4176                 /* WAL redo has started. We're out of reinitialization. */
4177                 FatalError = false;
4178
4179                 /*
4180                  * Crank up the background writer.      It doesn't matter if this fails,
4181                  * we'll just try again later.
4182                  */
4183                 Assert(BgWriterPID == 0);
4184                 BgWriterPID = StartBackgroundWriter();
4185
4186                 pmState = PM_RECOVERY;
4187         }
4188         if (CheckPostmasterSignal(PMSIGNAL_BEGIN_HOT_STANDBY) &&
4189                 pmState == PM_RECOVERY)
4190         {
4191                 /*
4192                  * Likewise, start other special children as needed.
4193                  */
4194                 Assert(PgStatPID == 0);
4195                 PgStatPID = pgstat_start();
4196
4197                 ereport(LOG,
4198                 (errmsg("database system is ready to accept read only connections")));
4199
4200                 pmState = PM_HOT_STANDBY;
4201         }
4202
4203         if (CheckPostmasterSignal(PMSIGNAL_WAKEN_ARCHIVER) &&
4204                 PgArchPID != 0)
4205         {
4206                 /*
4207                  * Send SIGUSR1 to archiver process, to wake it up and begin archiving
4208                  * next transaction log file.
4209                  */
4210                 signal_child(PgArchPID, SIGUSR1);
4211         }
4212
4213         if (CheckPostmasterSignal(PMSIGNAL_ROTATE_LOGFILE) &&
4214                 SysLoggerPID != 0)
4215         {
4216                 /* Tell syslogger to rotate logfile */
4217                 signal_child(SysLoggerPID, SIGUSR1);
4218         }
4219
4220         if (CheckPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER))
4221         {
4222                 /*
4223                  * Start one iteration of the autovacuum daemon, even if autovacuuming
4224                  * is nominally not enabled.  This is so we can have an active defense
4225                  * against transaction ID wraparound.  We set a flag for the main loop
4226                  * to do it rather than trying to do it here --- this is because the
4227                  * autovac process itself may send the signal, and we want to handle
4228                  * that by launching another iteration as soon as the current one
4229                  * completes.
4230                  */
4231                 start_autovac_launcher = true;
4232         }
4233
4234         if (CheckPostmasterSignal(PMSIGNAL_START_AUTOVAC_WORKER))
4235         {
4236                 /* The autovacuum launcher wants us to start a worker process. */
4237                 StartAutovacuumWorker();
4238         }
4239
4240         if (CheckPostmasterSignal(PMSIGNAL_START_WALRECEIVER) &&
4241                 WalReceiverPID == 0 &&
4242                 (pmState == PM_STARTUP || pmState == PM_RECOVERY ||
4243                  pmState == PM_HOT_STANDBY || pmState == PM_WAIT_READONLY))
4244         {
4245                 /* Startup Process wants us to start the walreceiver process. */
4246                 WalReceiverPID = StartWalReceiver();
4247         }
4248
4249         PG_SETMASK(&UnBlockSig);
4250
4251         errno = save_errno;
4252 }
4253
4254 /*
4255  * Timeout or shutdown signal from postmaster while processing startup packet.
4256  * Cleanup and exit(1).
4257  *
4258  * XXX: possible future improvement: try to send a message indicating
4259  * why we are disconnecting.  Problem is to be sure we don't block while
4260  * doing so, nor mess up SSL initialization.  In practice, if the client
4261  * has wedged here, it probably couldn't do anything with the message anyway.
4262  */
4263 static void
4264 startup_die(SIGNAL_ARGS)
4265 {
4266         proc_exit(1);
4267 }
4268
4269 /*
4270  * Dummy signal handler
4271  *
4272  * We use this for signals that we don't actually use in the postmaster,
4273  * but we do use in backends.  If we were to SIG_IGN such signals in the
4274  * postmaster, then a newly started backend might drop a signal that arrives
4275  * before it's able to reconfigure its signal processing.  (See notes in
4276  * tcop/postgres.c.)
4277  */
4278 static void
4279 dummy_handler(SIGNAL_ARGS)
4280 {
4281 }
4282
4283 /*
4284  * RandomSalt
4285  */
4286 static void
4287 RandomSalt(char *md5Salt)
4288 {
4289         long            rand;
4290
4291         /*
4292          * We use % 255, sacrificing one possible byte value, so as to ensure that
4293          * all bits of the random() value participate in the result. While at it,
4294          * add one to avoid generating any null bytes.
4295          */
4296         rand = PostmasterRandom();
4297         md5Salt[0] = (rand % 255) + 1;
4298         rand = PostmasterRandom();
4299         md5Salt[1] = (rand % 255) + 1;
4300         rand = PostmasterRandom();
4301         md5Salt[2] = (rand % 255) + 1;
4302         rand = PostmasterRandom();
4303         md5Salt[3] = (rand % 255) + 1;
4304 }
4305
4306 /*
4307  * PostmasterRandom
4308  */
4309 static long
4310 PostmasterRandom(void)
4311 {
4312         /*
4313          * Select a random seed at the time of first receiving a request.
4314          */
4315         if (random_seed == 0)
4316         {
4317                 do
4318                 {
4319                         struct timeval random_stop_time;
4320
4321                         gettimeofday(&random_stop_time, NULL);
4322
4323                         /*
4324                          * We are not sure how much precision is in tv_usec, so we swap
4325                          * the high and low 16 bits of 'random_stop_time' and XOR them
4326                          * with 'random_start_time'. On the off chance that the result is
4327                          * 0, we loop until it isn't.
4328                          */
4329                         random_seed = random_start_time.tv_usec ^
4330                                 ((random_stop_time.tv_usec << 16) |
4331                                  ((random_stop_time.tv_usec >> 16) & 0xffff));
4332                 }
4333                 while (random_seed == 0);
4334
4335                 srandom(random_seed);
4336         }
4337
4338         return random();
4339 }
4340
4341 /*
4342  * Count up number of child processes of specified types (dead_end chidren
4343  * are always excluded).
4344  */
4345 static int
4346 CountChildren(int target)
4347 {
4348         Dlelem     *curr;
4349         int                     cnt = 0;
4350
4351         for (curr = DLGetHead(BackendList); curr; curr = DLGetSucc(curr))
4352         {
4353                 Backend    *bp = (Backend *) DLE_VAL(curr);
4354
4355                 if (bp->dead_end)
4356                         continue;
4357                 if (!(target & BACKEND_TYPE_NORMAL) && !bp->is_autovacuum)
4358                         continue;
4359                 if (!(target & BACKEND_TYPE_AUTOVAC) && bp->is_autovacuum)
4360                         continue;
4361                 if (!(target & BACKEND_TYPE_WALSND) &&
4362                         IsPostmasterChildWalSender(bp->child_slot))
4363                         continue;
4364
4365                 cnt++;
4366         }
4367         return cnt;
4368 }
4369
4370
4371 /*
4372  * StartChildProcess -- start an auxiliary process for the postmaster
4373  *
4374  * xlop determines what kind of child will be started.  All child types
4375  * initially go to AuxiliaryProcessMain, which will handle common setup.
4376  *
4377  * Return value of StartChildProcess is subprocess' PID, or 0 if failed
4378  * to start subprocess.
4379  */
4380 static pid_t
4381 StartChildProcess(AuxProcType type)
4382 {
4383         pid_t           pid;
4384         char       *av[10];
4385         int                     ac = 0;
4386         char            typebuf[32];
4387
4388         /*
4389          * Set up command-line arguments for subprocess
4390          */
4391         av[ac++] = "postgres";
4392
4393 #ifdef EXEC_BACKEND
4394         av[ac++] = "--forkboot";
4395         av[ac++] = NULL;                        /* filled in by postmaster_forkexec */
4396 #endif
4397
4398         snprintf(typebuf, sizeof(typebuf), "-x%d", type);
4399         av[ac++] = typebuf;
4400
4401         av[ac] = NULL;
4402         Assert(ac < lengthof(av));
4403
4404 #ifdef EXEC_BACKEND
4405         pid = postmaster_forkexec(ac, av);
4406 #else                                                   /* !EXEC_BACKEND */
4407         pid = fork_process();
4408
4409         if (pid == 0)                           /* child */
4410         {
4411                 IsUnderPostmaster = true;               /* we are a postmaster subprocess now */
4412
4413                 /* Close the postmaster's sockets */
4414                 ClosePostmasterPorts(false);
4415
4416                 /* Lose the postmaster's on-exit routines and port connections */
4417                 on_exit_reset();
4418
4419                 /* Release postmaster's working memory context */
4420                 MemoryContextSwitchTo(TopMemoryContext);
4421                 MemoryContextDelete(PostmasterContext);
4422                 PostmasterContext = NULL;
4423
4424                 AuxiliaryProcessMain(ac, av);
4425                 ExitPostmaster(0);
4426         }
4427 #endif   /* EXEC_BACKEND */
4428
4429         if (pid < 0)
4430         {
4431                 /* in parent, fork failed */
4432                 int                     save_errno = errno;
4433
4434                 errno = save_errno;
4435                 switch (type)
4436                 {
4437                         case StartupProcess:
4438                                 ereport(LOG,
4439                                                 (errmsg("could not fork startup process: %m")));
4440                                 break;
4441                         case BgWriterProcess:
4442                                 ereport(LOG,
4443                                    (errmsg("could not fork background writer process: %m")));
4444                                 break;
4445                         case WalWriterProcess:
4446                                 ereport(LOG,
4447                                                 (errmsg("could not fork WAL writer process: %m")));
4448                                 break;
4449                         case WalReceiverProcess:
4450                                 ereport(LOG,
4451                                                 (errmsg("could not fork WAL receiver process: %m")));
4452                                 break;
4453                         default:
4454                                 ereport(LOG,
4455                                                 (errmsg("could not fork process: %m")));
4456                                 break;
4457                 }
4458
4459                 /*
4460                  * fork failure is fatal during startup, but there's no need to choke
4461                  * immediately if starting other child types fails.
4462                  */
4463                 if (type == StartupProcess)
4464                         ExitPostmaster(1);
4465                 return 0;
4466         }
4467
4468         /*
4469          * in parent, successful fork
4470          */
4471         return pid;
4472 }
4473
4474 /*
4475  * StartAutovacuumWorker
4476  *              Start an autovac worker process.
4477  *
4478  * This function is here because it enters the resulting PID into the
4479  * postmaster's private backends list.
4480  *
4481  * NB -- this code very roughly matches BackendStartup.
4482  */
4483 static void
4484 StartAutovacuumWorker(void)
4485 {
4486         Backend    *bn;
4487
4488         /*
4489          * If not in condition to run a process, don't try, but handle it like a
4490          * fork failure.  This does not normally happen, since the signal is only
4491          * supposed to be sent by autovacuum launcher when it's OK to do it, but
4492          * we have to check to avoid race-condition problems during DB state
4493          * changes.
4494          */
4495         if (canAcceptConnections() == CAC_OK)
4496         {
4497                 bn = (Backend *) malloc(sizeof(Backend));
4498                 if (bn)
4499                 {
4500                         /*
4501                          * Compute the cancel key that will be assigned to this session.
4502                          * We probably don't need cancel keys for autovac workers, but
4503                          * we'd better have something random in the field to prevent
4504                          * unfriendly people from sending cancels to them.
4505                          */
4506                         MyCancelKey = PostmasterRandom();
4507                         bn->cancel_key = MyCancelKey;
4508
4509                         /* Autovac workers are not dead_end and need a child slot */
4510                         bn->dead_end = false;
4511                         bn->child_slot = MyPMChildSlot = AssignPostmasterChildSlot();
4512
4513                         bn->pid = StartAutoVacWorker();
4514                         if (bn->pid > 0)
4515                         {
4516                                 bn->is_autovacuum = true;
4517                                 DLInitElem(&bn->elem, bn);
4518                                 DLAddHead(BackendList, &bn->elem);
4519 #ifdef EXEC_BACKEND
4520                                 ShmemBackendArrayAdd(bn);
4521 #endif
4522                                 /* all OK */
4523                                 return;
4524                         }
4525
4526                         /*
4527                          * fork failed, fall through to report -- actual error message was
4528                          * logged by StartAutoVacWorker
4529                          */
4530                         (void) ReleasePostmasterChildSlot(bn->child_slot);
4531                         free(bn);
4532                 }
4533                 else
4534                         ereport(LOG,
4535                                         (errcode(ERRCODE_OUT_OF_MEMORY),
4536                                          errmsg("out of memory")));
4537         }
4538
4539         /*
4540          * Report the failure to the launcher, if it's running.  (If it's not, we
4541          * might not even be connected to shared memory, so don't try to call
4542          * AutoVacWorkerFailed.)  Note that we also need to signal it so that it
4543          * responds to the condition, but we don't do that here, instead waiting
4544          * for ServerLoop to do it.  This way we avoid a ping-pong signalling in
4545          * quick succession between the autovac launcher and postmaster in case
4546          * things get ugly.
4547          */
4548         if (AutoVacPID != 0)
4549         {
4550                 AutoVacWorkerFailed();
4551                 avlauncher_needs_signal = true;
4552         }
4553 }
4554
4555 /*
4556  * Create the opts file
4557  */
4558 static bool
4559 CreateOptsFile(int argc, char *argv[], char *fullprogname)
4560 {
4561         FILE       *fp;
4562         int                     i;
4563
4564 #define OPTS_FILE       "postmaster.opts"
4565
4566         if ((fp = fopen(OPTS_FILE, "w")) == NULL)
4567         {
4568                 elog(LOG, "could not create file \"%s\": %m", OPTS_FILE);
4569                 return false;
4570         }
4571
4572         fprintf(fp, "%s", fullprogname);
4573         for (i = 1; i < argc; i++)
4574                 fprintf(fp, " \"%s\"", argv[i]);
4575         fputs("\n", fp);
4576
4577         if (fclose(fp))
4578         {
4579                 elog(LOG, "could not write file \"%s\": %m", OPTS_FILE);
4580                 return false;
4581         }
4582
4583         return true;
4584 }
4585
4586
4587 /*
4588  * MaxLivePostmasterChildren
4589  *
4590  * This reports the number of entries needed in per-child-process arrays
4591  * (the PMChildFlags array, and if EXEC_BACKEND the ShmemBackendArray).
4592  * These arrays include regular backends, autovac workers and walsenders,
4593  * but not special children nor dead_end children.      This allows the arrays
4594  * to have a fixed maximum size, to wit the same too-many-children limit
4595  * enforced by canAcceptConnections().  The exact value isn't too critical
4596  * as long as it's more than MaxBackends.
4597  */
4598 int
4599 MaxLivePostmasterChildren(void)
4600 {
4601         return 2 * MaxBackends;
4602 }
4603
4604
4605 #ifdef EXEC_BACKEND
4606
4607 /*
4608  * The following need to be available to the save/restore_backend_variables
4609  * functions
4610  */
4611 extern slock_t *ShmemLock;
4612 extern LWLock *LWLockArray;
4613 extern slock_t *ProcStructLock;
4614 extern PROC_HDR *ProcGlobal;
4615 extern PGPROC *AuxiliaryProcs;
4616 extern PMSignalData *PMSignalState;
4617 extern pgsocket pgStatSock;
4618
4619 #ifndef WIN32
4620 #define write_inheritable_socket(dest, src, childpid) ((*(dest) = (src)), true)
4621 #define read_inheritable_socket(dest, src) (*(dest) = *(src))
4622 #else
4623 static bool write_duplicated_handle(HANDLE *dest, HANDLE src, HANDLE child);
4624 static bool write_inheritable_socket(InheritableSocket * dest, SOCKET src,
4625                                                  pid_t childPid);
4626 static void read_inheritable_socket(SOCKET * dest, InheritableSocket * src);
4627 #endif
4628
4629
4630 /* Save critical backend variables into the BackendParameters struct */
4631 #ifndef WIN32
4632 static bool
4633 save_backend_variables(BackendParameters * param, Port *port)
4634 #else
4635 static bool
4636 save_backend_variables(BackendParameters * param, Port *port,
4637                                            HANDLE childProcess, pid_t childPid)
4638 #endif
4639 {
4640         memcpy(&param->port, port, sizeof(Port));
4641         if (!write_inheritable_socket(&param->portsocket, port->sock, childPid))
4642                 return false;
4643
4644         strlcpy(param->DataDir, DataDir, MAXPGPATH);
4645
4646         memcpy(&param->ListenSocket, &ListenSocket, sizeof(ListenSocket));
4647
4648         param->MyCancelKey = MyCancelKey;
4649         param->MyPMChildSlot = MyPMChildSlot;
4650
4651         param->UsedShmemSegID = UsedShmemSegID;
4652         param->UsedShmemSegAddr = UsedShmemSegAddr;
4653
4654         param->ShmemLock = ShmemLock;
4655         param->ShmemVariableCache = ShmemVariableCache;
4656         param->ShmemBackendArray = ShmemBackendArray;
4657
4658         param->LWLockArray = LWLockArray;
4659         param->ProcStructLock = ProcStructLock;
4660         param->ProcGlobal = ProcGlobal;
4661         param->AuxiliaryProcs = AuxiliaryProcs;
4662         param->PMSignalState = PMSignalState;
4663         if (!write_inheritable_socket(&param->pgStatSock, pgStatSock, childPid))
4664                 return false;
4665
4666         param->PostmasterPid = PostmasterPid;
4667         param->PgStartTime = PgStartTime;
4668         param->PgReloadTime = PgReloadTime;
4669
4670         param->redirection_done = redirection_done;
4671
4672 #ifdef WIN32
4673         param->PostmasterHandle = PostmasterHandle;
4674         if (!write_duplicated_handle(&param->initial_signal_pipe,
4675                                                                  pgwin32_create_signal_listener(childPid),
4676                                                                  childProcess))
4677                 return false;
4678 #endif
4679
4680         memcpy(&param->syslogPipe, &syslogPipe, sizeof(syslogPipe));
4681
4682         strlcpy(param->my_exec_path, my_exec_path, MAXPGPATH);
4683
4684         strlcpy(param->pkglib_path, pkglib_path, MAXPGPATH);
4685
4686         strlcpy(param->ExtraOptions, ExtraOptions, MAXPGPATH);
4687
4688         return true;
4689 }
4690
4691
4692 #ifdef WIN32
4693 /*
4694  * Duplicate a handle for usage in a child process, and write the child
4695  * process instance of the handle to the parameter file.
4696  */
4697 static bool
4698 write_duplicated_handle(HANDLE *dest, HANDLE src, HANDLE childProcess)
4699 {
4700         HANDLE          hChild = INVALID_HANDLE_VALUE;
4701
4702         if (!DuplicateHandle(GetCurrentProcess(),
4703                                                  src,
4704                                                  childProcess,
4705                                                  &hChild,
4706                                                  0,
4707                                                  TRUE,
4708                                                  DUPLICATE_CLOSE_SOURCE | DUPLICATE_SAME_ACCESS))
4709         {
4710                 ereport(LOG,
4711                                 (errmsg_internal("could not duplicate handle to be written to backend parameter file: error code %d",
4712                                                                  (int) GetLastError())));
4713                 return false;
4714         }
4715
4716         *dest = hChild;
4717         return true;
4718 }
4719
4720 /*
4721  * Duplicate a socket for usage in a child process, and write the resulting
4722  * structure to the parameter file.
4723  * This is required because a number of LSPs (Layered Service Providers) very
4724  * common on Windows (antivirus, firewalls, download managers etc) break
4725  * straight socket inheritance.
4726  */
4727 static bool
4728 write_inheritable_socket(InheritableSocket * dest, SOCKET src, pid_t childpid)
4729 {
4730         dest->origsocket = src;
4731         if (src != 0 && src != PGINVALID_SOCKET)
4732         {
4733                 /* Actual socket */
4734                 if (WSADuplicateSocket(src, childpid, &dest->wsainfo) != 0)
4735                 {
4736                         ereport(LOG,
4737                                         (errmsg("could not duplicate socket %d for use in backend: error code %d",
4738                                                         src, WSAGetLastError())));
4739                         return false;
4740                 }
4741         }
4742         return true;
4743 }
4744
4745 /*
4746  * Read a duplicate socket structure back, and get the socket descriptor.
4747  */
4748 static void
4749 read_inheritable_socket(SOCKET * dest, InheritableSocket * src)
4750 {
4751         SOCKET          s;
4752
4753         if (src->origsocket == PGINVALID_SOCKET || src->origsocket == 0)
4754         {
4755                 /* Not a real socket! */
4756                 *dest = src->origsocket;
4757         }
4758         else
4759         {
4760                 /* Actual socket, so create from structure */
4761                 s = WSASocket(FROM_PROTOCOL_INFO,
4762                                           FROM_PROTOCOL_INFO,
4763                                           FROM_PROTOCOL_INFO,
4764                                           &src->wsainfo,
4765                                           0,
4766                                           0);
4767                 if (s == INVALID_SOCKET)
4768                 {
4769                         write_stderr("could not create inherited socket: error code %d\n",
4770                                                  WSAGetLastError());
4771                         exit(1);
4772                 }
4773                 *dest = s;
4774
4775                 /*
4776                  * To make sure we don't get two references to the same socket, close
4777                  * the original one. (This would happen when inheritance actually
4778                  * works..
4779                  */
4780                 closesocket(src->origsocket);
4781         }
4782 }
4783 #endif
4784
4785 static void
4786 read_backend_variables(char *id, Port *port)
4787 {
4788         BackendParameters param;
4789
4790 #ifndef WIN32
4791         /* Non-win32 implementation reads from file */
4792         FILE       *fp;
4793
4794         /* Open file */
4795         fp = AllocateFile(id, PG_BINARY_R);
4796         if (!fp)
4797         {
4798                 write_stderr("could not read from backend variables file \"%s\": %s\n",
4799                                          id, strerror(errno));
4800                 exit(1);
4801         }
4802
4803         if (fread(&param, sizeof(param), 1, fp) != 1)
4804         {
4805                 write_stderr("could not read from backend variables file \"%s\": %s\n",
4806                                          id, strerror(errno));
4807                 exit(1);
4808         }
4809
4810         /* Release file */
4811         FreeFile(fp);
4812         if (unlink(id) != 0)
4813         {
4814                 write_stderr("could not remove file \"%s\": %s\n",
4815                                          id, strerror(errno));
4816                 exit(1);
4817         }
4818 #else
4819         /* Win32 version uses mapped file */
4820         HANDLE          paramHandle;
4821         BackendParameters *paramp;
4822
4823         paramHandle = (HANDLE) atol(id);
4824         paramp = MapViewOfFile(paramHandle, FILE_MAP_READ, 0, 0, 0);
4825         if (!paramp)
4826         {
4827                 write_stderr("could not map view of backend variables: error code %d\n",
4828                                          (int) GetLastError());
4829                 exit(1);
4830         }
4831
4832         memcpy(&param, paramp, sizeof(BackendParameters));
4833
4834         if (!UnmapViewOfFile(paramp))
4835         {
4836                 write_stderr("could not unmap view of backend variables: error code %d\n",
4837                                          (int) GetLastError());
4838                 exit(1);
4839         }
4840
4841         if (!CloseHandle(paramHandle))
4842         {
4843                 write_stderr("could not close handle to backend parameter variables: error code %d\n",
4844                                          (int) GetLastError());
4845                 exit(1);
4846         }
4847 #endif
4848
4849         restore_backend_variables(&param, port);
4850 }
4851
4852 /* Restore critical backend variables from the BackendParameters struct */
4853 static void
4854 restore_backend_variables(BackendParameters * param, Port *port)
4855 {
4856         memcpy(port, &param->port, sizeof(Port));
4857         read_inheritable_socket(&port->sock, &param->portsocket);
4858
4859         SetDataDir(param->DataDir);
4860
4861         memcpy(&ListenSocket, &param->ListenSocket, sizeof(ListenSocket));
4862
4863         MyCancelKey = param->MyCancelKey;
4864         MyPMChildSlot = param->MyPMChildSlot;
4865
4866         UsedShmemSegID = param->UsedShmemSegID;
4867         UsedShmemSegAddr = param->UsedShmemSegAddr;
4868
4869         ShmemLock = param->ShmemLock;
4870         ShmemVariableCache = param->ShmemVariableCache;
4871         ShmemBackendArray = param->ShmemBackendArray;
4872
4873         LWLockArray = param->LWLockArray;
4874         ProcStructLock = param->ProcStructLock;
4875         ProcGlobal = param->ProcGlobal;
4876         AuxiliaryProcs = param->AuxiliaryProcs;
4877         PMSignalState = param->PMSignalState;
4878         read_inheritable_socket(&pgStatSock, &param->pgStatSock);
4879
4880         PostmasterPid = param->PostmasterPid;
4881         PgStartTime = param->PgStartTime;
4882         PgReloadTime = param->PgReloadTime;
4883
4884         redirection_done = param->redirection_done;
4885
4886 #ifdef WIN32
4887         PostmasterHandle = param->PostmasterHandle;
4888         pgwin32_initial_signal_pipe = param->initial_signal_pipe;
4889 #endif
4890
4891         memcpy(&syslogPipe, &param->syslogPipe, sizeof(syslogPipe));
4892
4893         strlcpy(my_exec_path, param->my_exec_path, MAXPGPATH);
4894
4895         strlcpy(pkglib_path, param->pkglib_path, MAXPGPATH);
4896
4897         strlcpy(ExtraOptions, param->ExtraOptions, MAXPGPATH);
4898 }
4899
4900
4901 Size
4902 ShmemBackendArraySize(void)
4903 {
4904         return mul_size(MaxLivePostmasterChildren(), sizeof(Backend));
4905 }
4906
4907 void
4908 ShmemBackendArrayAllocation(void)
4909 {
4910         Size            size = ShmemBackendArraySize();
4911
4912         ShmemBackendArray = (Backend *) ShmemAlloc(size);
4913         /* Mark all slots as empty */
4914         memset(ShmemBackendArray, 0, size);
4915 }
4916
4917 static void
4918 ShmemBackendArrayAdd(Backend *bn)
4919 {
4920         /* The array slot corresponding to my PMChildSlot should be free */
4921         int                     i = bn->child_slot - 1;
4922
4923         Assert(ShmemBackendArray[i].pid == 0);
4924         ShmemBackendArray[i] = *bn;
4925 }
4926
4927 static void
4928 ShmemBackendArrayRemove(Backend *bn)
4929 {
4930         int                     i = bn->child_slot - 1;
4931
4932         Assert(ShmemBackendArray[i].pid == bn->pid);
4933         /* Mark the slot as empty */
4934         ShmemBackendArray[i].pid = 0;
4935 }
4936 #endif   /* EXEC_BACKEND */
4937
4938
4939 #ifdef WIN32
4940
4941 static pid_t
4942 win32_waitpid(int *exitstatus)
4943 {
4944         DWORD           dwd;
4945         ULONG_PTR       key;
4946         OVERLAPPED *ovl;
4947
4948         /*
4949          * Check if there are any dead children. If there are, return the pid of
4950          * the first one that died.
4951          */
4952         if (GetQueuedCompletionStatus(win32ChildQueue, &dwd, &key, &ovl, 0))
4953         {
4954                 *exitstatus = (int) key;
4955                 return dwd;
4956         }
4957
4958         return -1;
4959 }
4960
4961 /*
4962  * Note! Code below executes on a thread pool! All operations must
4963  * be thread safe! Note that elog() and friends must *not* be used.
4964  */
4965 static void WINAPI
4966 pgwin32_deadchild_callback(PVOID lpParameter, BOOLEAN TimerOrWaitFired)
4967 {
4968         win32_deadchild_waitinfo *childinfo = (win32_deadchild_waitinfo *) lpParameter;
4969         DWORD           exitcode;
4970
4971         if (TimerOrWaitFired)
4972                 return;                                 /* timeout. Should never happen, since we use
4973                                                                  * INFINITE as timeout value. */
4974
4975         /*
4976          * Remove handle from wait - required even though it's set to wait only
4977          * once
4978          */
4979         UnregisterWaitEx(childinfo->waitHandle, NULL);
4980
4981         if (!GetExitCodeProcess(childinfo->procHandle, &exitcode))
4982         {
4983                 /*
4984                  * Should never happen. Inform user and set a fixed exitcode.
4985                  */
4986                 write_stderr("could not read exit code for process\n");
4987                 exitcode = 255;
4988         }
4989
4990         if (!PostQueuedCompletionStatus(win32ChildQueue, childinfo->procId, (ULONG_PTR) exitcode, NULL))
4991                 write_stderr("could not post child completion status\n");
4992
4993         /*
4994          * Handle is per-process, so we close it here instead of in the
4995          * originating thread
4996          */
4997         CloseHandle(childinfo->procHandle);
4998
4999         /*
5000          * Free struct that was allocated before the call to
5001          * RegisterWaitForSingleObject()
5002          */
5003         free(childinfo);
5004
5005         /* Queue SIGCHLD signal */
5006         pg_queue_signal(SIGCHLD);
5007 }
5008
5009 #endif   /* WIN32 */