]> granicus.if.org Git - postgresql/blob - src/backend/postmaster/postmaster.c
Fix some typos I introduced in WIN32-only code late last night.
[postgresql] / src / backend / postmaster / postmaster.c
1 /*-------------------------------------------------------------------------
2  *
3  * postmaster.c
4  *        This program acts as a clearing house for requests to the
5  *        POSTGRES system.      Frontend programs send a startup message
6  *        to the Postmaster and the postmaster uses the info in the
7  *        message to setup a backend process.
8  *
9  *        The postmaster also manages system-wide operations such as
10  *        startup and shutdown. The postmaster itself doesn't do those
11  *        operations, mind you --- it just forks off a subprocess to do them
12  *        at the right times.  It also takes care of resetting the system
13  *        if a backend crashes.
14  *
15  *        The postmaster process creates the shared memory and semaphore
16  *        pools during startup, but as a rule does not touch them itself.
17  *        In particular, it is not a member of the PGPROC array of backends
18  *        and so it cannot participate in lock-manager operations.      Keeping
19  *        the postmaster away from shared memory operations makes it simpler
20  *        and more reliable.  The postmaster is almost always able to recover
21  *        from crashes of individual backends by resetting shared memory;
22  *        if it did much with shared memory then it would be prone to crashing
23  *        along with the backends.
24  *
25  *        When a request message is received, we now fork() immediately.
26  *        The child process performs authentication of the request, and
27  *        then becomes a backend if successful.  This allows the auth code
28  *        to be written in a simple single-threaded style (as opposed to the
29  *        crufty "poor man's multitasking" code that used to be needed).
30  *        More importantly, it ensures that blockages in non-multithreaded
31  *        libraries like SSL or PAM cannot cause denial of service to other
32  *        clients.
33  *
34  *
35  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
36  * Portions Copyright (c) 1994, Regents of the University of California
37  *
38  *
39  * IDENTIFICATION
40  *        $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.399 2004/05/28 15:14:03 tgl Exp $
41  *
42  * NOTES
43  *
44  * Initialization:
45  *              The Postmaster sets up a few shared memory data structures
46  *              for the backends.  It should at the very least initialize the
47  *              lock manager.
48  *
49  * Synchronization:
50  *              The Postmaster shares memory with the backends but should avoid
51  *              touching shared memory, so as not to become stuck if a crashing
52  *              backend screws up locks or shared memory.  Likewise, the Postmaster
53  *              should never block on messages from frontend clients.
54  *
55  * Garbage Collection:
56  *              The Postmaster cleans up after backends if they have an emergency
57  *              exit and/or core dump.
58  *
59  *-------------------------------------------------------------------------
60  */
61
62 #include "postgres.h"
63
64 #include <unistd.h>
65 #include <signal.h>
66 #include <sys/wait.h>
67 #include <ctype.h>
68 #include <sys/stat.h>
69 #include <sys/socket.h>
70 #include <errno.h>
71 #include <fcntl.h>
72 #include <sys/param.h>
73 #include <netinet/in.h>
74 #include <arpa/inet.h>
75 #include <netdb.h>
76 #include <limits.h>
77
78 #ifdef HAVE_SYS_SELECT_H
79 #include <sys/select.h>
80 #endif
81
82 #ifdef HAVE_GETOPT_H
83 #include <getopt.h>
84 #endif
85
86 #ifdef USE_RENDEZVOUS
87 #include <DNSServiceDiscovery/DNSServiceDiscovery.h>
88 #endif
89
90 #include "catalog/pg_database.h"
91 #include "commands/async.h"
92 #include "lib/dllist.h"
93 #include "libpq/auth.h"
94 #include "libpq/crypt.h"
95 #include "libpq/libpq.h"
96 #include "libpq/pqcomm.h"
97 #include "libpq/pqsignal.h"
98 #include "miscadmin.h"
99 #include "nodes/nodes.h"
100 #include "pgtime.h"
101 #include "storage/fd.h"
102 #include "storage/ipc.h"
103 #include "storage/pg_shmem.h"
104 #include "storage/pmsignal.h"
105 #include "storage/proc.h"
106 #include "storage/bufmgr.h"
107 #include "access/xlog.h"
108 #include "tcop/tcopprot.h"
109 #include "utils/guc.h"
110 #include "utils/memutils.h"
111 #include "utils/ps_status.h"
112 #include "bootstrap/bootstrap.h"
113 #include "pgstat.h"
114
115
116 #ifdef HAVE_SIGPROCMASK
117 sigset_t        UnBlockSig,
118                         BlockSig,
119                         AuthBlockSig;
120
121 #else
122 int                     UnBlockSig,
123                         BlockSig,
124                         AuthBlockSig;
125 #endif
126
127 /*
128  * List of active backends (or child processes anyway; we don't actually
129  * know whether a given child has become a backend or is still in the
130  * authorization phase).  This is used mainly to keep track of how many
131  * children we have and send them appropriate signals when necessary.
132  */
133 typedef struct bkend
134 {
135         pid_t           pid;                    /* process id of backend */
136         long            cancel_key;             /* cancel key for cancels for this backend */
137 } Backend;
138
139 static Dllist *BackendList;
140
141 #ifdef EXEC_BACKEND
142 #define NUM_BACKENDARRAY_ELEMS (2*MaxBackends)
143 static Backend *ShmemBackendArray;
144 #endif
145
146 /* The socket number we are listening for connections on */
147 int                     PostPortNumber;
148 char       *UnixSocketDir;
149 char       *ListenAddresses;
150
151 /*
152  * MaxBackends is the limit on the number of backends we can start.
153  * Note that a larger MaxBackends value will increase the size of the
154  * shared memory area as well as cause the postmaster to grab more
155  * kernel semaphores, even if you never actually use that many
156  * backends.
157  */
158 int                     MaxBackends;
159
160 /*
161  * ReservedBackends is the number of backends reserved for superuser use.
162  * This number is taken out of the pool size given by MaxBackends so
163  * number of backend slots available to non-superusers is
164  * (MaxBackends - ReservedBackends).  Note what this really means is
165  * "if there are <= ReservedBackends connections available, only superusers
166  * can make new connections" --- pre-existing superuser connections don't
167  * count against the limit.
168  */
169 int                     ReservedBackends;
170
171
172 static const char *progname = NULL;
173
174 /* The socket(s) we're listening to. */
175 #define MAXLISTEN       10
176 static int      ListenSocket[MAXLISTEN];
177
178 /*
179  * Set by the -o option
180  */
181 static char ExtraOptions[MAXPGPATH];
182
183 /*
184  * These globals control the behavior of the postmaster in case some
185  * backend dumps core.  Normally, it kills all peers of the dead backend
186  * and reinitializes shared memory.  By specifying -s or -n, we can have
187  * the postmaster stop (rather than kill) peers and not reinitialize
188  * shared data structures.
189  */
190 static bool Reinit = true;
191 static int      SendStop = false;
192
193 /* still more option variables */
194 bool            EnableSSL = false;
195 bool            SilentMode = false; /* silent mode (-S) */
196
197 int                     PreAuthDelay = 0;
198 int                     AuthenticationTimeout = 60;
199 int                     CheckPointTimeout = 300;
200 int                     CheckPointWarning = 30;
201 time_t          LastSignalledCheckpoint = 0;
202
203 bool            log_hostname;           /* for ps display and logging */
204 bool            Log_connections = false;
205 bool            Db_user_namespace = false;
206
207 char       *rendezvous_name;
208
209 /* list of library:init-function to be preloaded */
210 char       *preload_libraries_string = NULL;
211
212 /* Startup/shutdown state */
213 static pid_t StartupPID = 0,
214                         ShutdownPID = 0,
215                         CheckPointPID = 0,
216                         BgWriterPID = 0;
217 static time_t checkpointed = 0;
218
219 #define                 NoShutdown              0
220 #define                 SmartShutdown   1
221 #define                 FastShutdown    2
222
223 static int      Shutdown = NoShutdown;
224
225 static bool FatalError = false; /* T if recovering from backend crash */
226
227 bool            ClientAuthInProgress = false;           /* T during new-client
228                                                                                                  * authentication */
229
230 /*
231  * State for assigning random salts and cancel keys.
232  * Also, the global MyCancelKey passes the cancel key assigned to a given
233  * backend from the postmaster to that backend (via fork).
234  */
235
236 static unsigned int random_seed = 0;
237
238 static int      debug_flag = 0;
239
240 extern char *optarg;
241 extern int      optind,
242                         opterr;
243
244 #ifdef HAVE_INT_OPTRESET
245 extern int      optreset;
246 #endif
247
248 /*
249  * postmaster.c - function prototypes
250  */
251 static void checkDataDir(const char *checkdir);
252 #ifdef USE_RENDEZVOUS
253 static void reg_reply(DNSServiceRegistrationReplyErrorType errorCode,
254                                           void *context);
255 #endif
256 static void pmdaemonize(void);
257 static Port *ConnCreate(int serverFd);
258 static void ConnFree(Port *port);
259 static void reset_shared(unsigned short port);
260 static void SIGHUP_handler(SIGNAL_ARGS);
261 static void pmdie(SIGNAL_ARGS);
262 static void reaper(SIGNAL_ARGS);
263 static void sigusr1_handler(SIGNAL_ARGS);
264 static void dummy_handler(SIGNAL_ARGS);
265 static void CleanupProc(int pid, int exitstatus);
266 static void LogChildExit(int lev, const char *procname,
267                          int pid, int exitstatus);
268 static int      BackendRun(Port *port);
269 static void ExitPostmaster(int status);
270 static void usage(const char *);
271 static int      ServerLoop(void);
272 static int      BackendStartup(Port *port);
273 static int      ProcessStartupPacket(Port *port, bool SSLdone);
274 static void processCancelRequest(Port *port, void *pkt);
275 static int      initMasks(fd_set *rmask);
276 static void report_fork_failure_to_client(Port *port, int errnum);
277 static enum CAC_state canAcceptConnections(void);
278 static long PostmasterRandom(void);
279 static void RandomSalt(char *cryptSalt, char *md5Salt);
280 static void SignalChildren(int signal);
281 static int      CountChildren(void);
282 static bool CreateOptsFile(int argc, char *argv[], char *fullprogname);
283 static pid_t SSDataBase(int xlop);
284 static void
285 postmaster_error(const char *fmt,...)
286 /* This lets gcc check the format string for consistency. */
287 __attribute__((format(printf, 1, 2)));
288
289 #ifdef EXEC_BACKEND
290
291 #ifdef WIN32
292 static pid_t win32_forkexec(const char *path, char *argv[]);
293 static void win32_AddChild(pid_t pid, HANDLE handle);
294 static void win32_RemoveChild(pid_t pid);
295 static pid_t win32_waitpid(int *exitstatus);
296 static DWORD WINAPI win32_sigchld_waiter(LPVOID param);
297
298 static pid_t *win32_childPIDArray;
299 static HANDLE *win32_childHNDArray;
300 static unsigned long win32_numChildren = 0;
301 #endif
302
303 static pid_t backend_forkexec(Port *port);
304 static pid_t internal_forkexec(int argc, char *argv[], Port *port);
305
306 static void read_backend_variables(char *filename, Port *port);
307 static bool write_backend_variables(char *filename, Port *port);
308
309 static void ShmemBackendArrayAdd(Backend *bn);
310 static void ShmemBackendArrayRemove(pid_t pid);
311
312 #endif /* EXEC_BACKEND */
313
314 #define StartupDataBase()               SSDataBase(BS_XLOG_STARTUP)
315 #define CheckPointDataBase()    SSDataBase(BS_XLOG_CHECKPOINT)
316 #define StartBackgroundWriter() SSDataBase(BS_XLOG_BGWRITER)
317 #define ShutdownDataBase()              SSDataBase(BS_XLOG_SHUTDOWN)
318
319
320 /*
321  * Postmaster main entry point
322  */
323 int
324 PostmasterMain(int argc, char *argv[])
325 {
326         int                     opt;
327         int                     status;
328         char            original_extraoptions[MAXPGPATH];
329         char       *potential_DataDir = NULL;
330         int                     i;
331
332         *original_extraoptions = '\0';
333
334         progname = get_progname(argv[0]);
335
336         IsPostmasterEnvironment = true;
337
338         /*
339          * Catch standard options before doing much else.  This even works on
340          * systems without getopt_long.
341          */
342         if (argc > 1)
343         {
344                 if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
345                 {
346                         usage(progname);
347                         ExitPostmaster(0);
348                 }
349                 if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
350                 {
351                         puts("postmaster (PostgreSQL) " PG_VERSION);
352                         ExitPostmaster(0);
353                 }
354         }
355
356         /*
357          * for security, no dir or file created can be group or other
358          * accessible
359          */
360         umask((mode_t) 0077);
361
362         MyProcPid = PostmasterPid = getpid();
363
364         /*
365          * Fire up essential subsystems: memory management
366          */
367         MemoryContextInit();
368
369         /*
370          * By default, palloc() requests in the postmaster will be allocated
371          * in the PostmasterContext, which is space that can be recycled by
372          * backends.  Allocated data that needs to be available to backends
373          * should be allocated in TopMemoryContext.
374          */
375         PostmasterContext = AllocSetContextCreate(TopMemoryContext,
376                                                                                           "Postmaster",
377                                                                                           ALLOCSET_DEFAULT_MINSIZE,
378                                                                                           ALLOCSET_DEFAULT_INITSIZE,
379                                                                                           ALLOCSET_DEFAULT_MAXSIZE);
380         MemoryContextSwitchTo(PostmasterContext);
381
382         IgnoreSystemIndexes(false);
383
384         if (find_my_exec(argv[0], my_exec_path) < 0)
385                 elog(FATAL, "%s: could not locate my own executable path",
386                          argv[0]);
387
388         get_pkglib_path(my_exec_path, pkglib_path);
389
390         /*
391          * Options setup
392          */
393         InitializeGUCOptions();
394
395         potential_DataDir = getenv("PGDATA");           /* default value */
396
397         opterr = 1;
398
399         while ((opt = getopt(argc, argv, "A:a:B:b:c:D:d:Fh:ik:lm:MN:no:p:Ss-:")) != -1)
400         {
401                 switch (opt)
402                 {
403                         case 'A':
404 #ifdef USE_ASSERT_CHECKING
405                                 SetConfigOption("debug_assertions", optarg, PGC_POSTMASTER, PGC_S_ARGV);
406 #else
407                                 postmaster_error("assert checking is not compiled in");
408 #endif
409                                 break;
410                         case 'a':
411                                 /* Can no longer set authentication method. */
412                                 break;
413                         case 'B':
414                                 SetConfigOption("shared_buffers", optarg, PGC_POSTMASTER, PGC_S_ARGV);
415                                 break;
416                         case 'b':
417                                 /* Can no longer set the backend executable file to use. */
418                                 break;
419                         case 'D':
420                                 potential_DataDir = optarg;
421                                 break;
422                         case 'd':
423                                 {
424                                         /* Turn on debugging for the postmaster. */
425                                         char       *debugstr = palloc(strlen("debug") + strlen(optarg) + 1);
426
427                                         sprintf(debugstr, "debug%s", optarg);
428                                         SetConfigOption("log_min_messages", debugstr,
429                                                                         PGC_POSTMASTER, PGC_S_ARGV);
430                                         pfree(debugstr);
431                                         debug_flag = atoi(optarg);
432                                         break;
433                                 }
434                         case 'F':
435                                 SetConfigOption("fsync", "false", PGC_POSTMASTER, PGC_S_ARGV);
436                                 break;
437                         case 'h':
438                                 SetConfigOption("listen_addresses", optarg, PGC_POSTMASTER, PGC_S_ARGV);
439                                 break;
440                         case 'i':
441                                 SetConfigOption("listen_addresses", "*", PGC_POSTMASTER, PGC_S_ARGV);
442                                 break;
443                         case 'k':
444                                 SetConfigOption("unix_socket_directory", optarg, PGC_POSTMASTER, PGC_S_ARGV);
445                                 break;
446 #ifdef USE_SSL
447                         case 'l':
448                                 SetConfigOption("ssl", "true", PGC_POSTMASTER, PGC_S_ARGV);
449                                 break;
450 #endif
451                         case 'm':
452                                 /* Multiplexed backends no longer supported. */
453                                 break;
454                         case 'M':
455
456                                 /*
457                                  * ignore this flag.  This may be passed in because the
458                                  * program was run as 'postgres -M' instead of
459                                  * 'postmaster'
460                                  */
461                                 break;
462                         case 'N':
463                                 /* The max number of backends to start. */
464                                 SetConfigOption("max_connections", optarg, PGC_POSTMASTER, PGC_S_ARGV);
465                                 break;
466                         case 'n':
467                                 /* Don't reinit shared mem after abnormal exit */
468                                 Reinit = false;
469                                 break;
470                         case 'o':
471
472                                 /*
473                                  * Other options to pass to the backend on the command
474                                  * line -- useful only for debugging.
475                                  */
476                                 strcat(ExtraOptions, " ");
477                                 strcat(ExtraOptions, optarg);
478                                 strcpy(original_extraoptions, optarg);
479                                 break;
480                         case 'p':
481                                 SetConfigOption("port", optarg, PGC_POSTMASTER, PGC_S_ARGV);
482                                 break;
483                         case 'S':
484
485                                 /*
486                                  * Start in 'S'ilent mode (disassociate from controlling
487                                  * tty). You may also think of this as 'S'ysV mode since
488                                  * it's most badly needed on SysV-derived systems like
489                                  * SVR4 and HP-UX.
490                                  */
491                                 SetConfigOption("silent_mode", "true", PGC_POSTMASTER, PGC_S_ARGV);
492                                 break;
493                         case 's':
494
495                                 /*
496                                  * In the event that some backend dumps core, send
497                                  * SIGSTOP, rather than SIGQUIT, to all its peers.      This
498                                  * lets the wily post_hacker collect core dumps from
499                                  * everyone.
500                                  */
501                                 SendStop = true;
502                                 break;
503                         case 'c':
504                         case '-':
505                                 {
506                                         char       *name,
507                                                            *value;
508
509                                         ParseLongOption(optarg, &name, &value);
510                                         if (!value)
511                                         {
512                                                 if (opt == '-')
513                                                         ereport(ERROR,
514                                                                         (errcode(ERRCODE_SYNTAX_ERROR),
515                                                                          errmsg("--%s requires a value",
516                                                                                         optarg)));
517                                                 else
518                                                         ereport(ERROR,
519                                                                         (errcode(ERRCODE_SYNTAX_ERROR),
520                                                                          errmsg("-c %s requires a value",
521                                                                                         optarg)));
522                                         }
523
524                                         SetConfigOption(name, value, PGC_POSTMASTER, PGC_S_ARGV);
525                                         free(name);
526                                         if (value)
527                                                 free(value);
528                                         break;
529                                 }
530
531                         default:
532                                 fprintf(stderr,
533                                         gettext("Try \"%s --help\" for more information.\n"),
534                                                 progname);
535                                 ExitPostmaster(1);
536                 }
537         }
538
539         /*
540          * Postmaster accepts no non-option switch arguments.
541          */
542         if (optind < argc)
543         {
544                 postmaster_error("invalid argument: \"%s\"", argv[optind]);
545                 fprintf(stderr,
546                                 gettext("Try \"%s --help\" for more information.\n"),
547                                 progname);
548                 ExitPostmaster(1);
549         }
550
551         /*
552          * Now we can set the data directory, and then read postgresql.conf.
553          */
554         checkDataDir(potential_DataDir);        /* issues error messages */
555         SetDataDir(potential_DataDir);
556
557         ProcessConfigFile(PGC_POSTMASTER);
558
559         /* If timezone is not set, determine what the OS uses */
560         pg_timezone_initialize();
561
562 #ifdef EXEC_BACKEND
563         write_nondefault_variables(PGC_POSTMASTER);
564 #endif
565
566         /*
567          * Check for invalid combinations of GUC settings.
568          */
569         if (NBuffers < 2 * MaxBackends || NBuffers < 16)
570         {
571                 /*
572                  * Do not accept -B so small that backends are likely to starve
573                  * for lack of buffers.  The specific choices here are somewhat
574                  * arbitrary.
575                  */
576                 postmaster_error("the number of buffers (-B) must be at least twice the number of allowed connections (-N) and at least 16");
577                 ExitPostmaster(1);
578         }
579
580         if (ReservedBackends >= MaxBackends)
581         {
582                 postmaster_error("superuser_reserved_connections must be less than max_connections");
583                 ExitPostmaster(1);
584         }
585
586         /*
587          * Other one-time internal sanity checks can go here.
588          */
589         if (!CheckDateTokenTables())
590         {
591                 postmaster_error("invalid datetoken tables, please fix");
592                 ExitPostmaster(1);
593         }
594
595         /*
596          * Now that we are done processing the postmaster arguments, reset
597          * getopt(3) library so that it will work correctly in subprocesses.
598          */
599         optind = 1;
600 #ifdef HAVE_INT_OPTRESET
601         optreset = 1;                           /* some systems need this too */
602 #endif
603
604         /* For debugging: display postmaster environment */
605         {
606                 extern char **environ;
607                 char      **p;
608
609                 ereport(DEBUG3,
610                         (errmsg_internal("%s: PostmasterMain: initial environ dump:",
611                                                          progname)));
612                 ereport(DEBUG3,
613                  (errmsg_internal("-----------------------------------------")));
614                 for (p = environ; *p; ++p)
615                         ereport(DEBUG3,
616                                         (errmsg_internal("\t%s", *p)));
617                 ereport(DEBUG3,
618                  (errmsg_internal("-----------------------------------------")));
619         }
620
621 #ifdef EXEC_BACKEND
622         if (find_other_exec(argv[0], "postgres", PG_VERSIONSTR,
623                                                 postgres_exec_path) < 0)
624                 ereport(FATAL,
625                                 (errmsg("%s: could not locate matching postgres executable",
626                                                 progname)));
627 #endif
628
629         /*
630          * Initialize SSL library, if specified.
631          */
632 #ifdef USE_SSL
633         if (EnableSSL)
634                 secure_initialize();
635 #endif
636
637         /*
638          * process any libraries that should be preloaded and optionally
639          * pre-initialized
640          */
641         if (preload_libraries_string)
642                 process_preload_libraries(preload_libraries_string);
643
644         /*
645          * Fork away from controlling terminal, if -S specified.
646          *
647          * Must do this before we grab any interlock files, else the interlocks
648          * will show the wrong PID.
649          */
650         if (SilentMode)
651                 pmdaemonize();
652
653         /*
654          * Create lockfile for data directory.
655          *
656          * We want to do this before we try to grab the input sockets, because
657          * the data directory interlock is more reliable than the socket-file
658          * interlock (thanks to whoever decided to put socket files in /tmp
659          * :-(). For the same reason, it's best to grab the TCP socket before
660          * the Unix socket.
661          */
662         CreateDataDirLockFile(DataDir, true);
663
664         /*
665          * Remove old temporary files.  At this point there can be no other
666          * Postgres processes running in this directory, so this should be
667          * safe.
668          */
669         RemovePgTempFiles();
670
671         /*
672          * Establish input sockets.
673          */
674         for (i = 0; i < MAXLISTEN; i++)
675                 ListenSocket[i] = -1;
676
677         if (ListenAddresses)
678         {
679                 char       *curhost,
680                                    *endptr;
681                 char            c;
682
683                 curhost = ListenAddresses;
684                 for (;;)
685                 {
686                         /* ignore whitespace */
687                         while (isspace((unsigned char) *curhost))
688                                 curhost++;
689                         if (*curhost == '\0')
690                                 break;
691                         endptr = curhost;
692                         while (*endptr != '\0' && !isspace((unsigned char) *endptr))
693                                 endptr++;
694                         c = *endptr;
695                         *endptr = '\0';
696                         if (strcmp(curhost, "*") == 0)
697                                 status = StreamServerPort(AF_UNSPEC, NULL,
698                                                                                   (unsigned short) PostPortNumber,
699                                                                                   UnixSocketDir,
700                                                                                   ListenSocket, MAXLISTEN);
701                         else
702                                 status = StreamServerPort(AF_UNSPEC, curhost,
703                                                                                   (unsigned short) PostPortNumber,
704                                                                                   UnixSocketDir,
705                                                                                   ListenSocket, MAXLISTEN);
706                         if (status != STATUS_OK)
707                                 ereport(WARNING,
708                                          (errmsg("could not create listen socket for \"%s\"",
709                                                          curhost)));
710                         *endptr = c;
711                         if (c != '\0')
712                                 curhost = endptr + 1;
713                         else
714                                 break;
715                 }
716         }
717
718 #ifdef USE_RENDEZVOUS
719         /* Register for Rendezvous only if we opened TCP socket(s) */
720         if (ListenSocket[0] != -1 && rendezvous_name != NULL)
721         {
722                 DNSServiceRegistrationCreate(rendezvous_name,
723                                                                          "_postgresql._tcp.",
724                                                                          "",
725                                                                          htonl(PostPortNumber),
726                                                                          "",
727                                                                  (DNSServiceRegistrationReply) reg_reply,
728                                                                          NULL);
729         }
730 #endif
731
732 #ifdef HAVE_UNIX_SOCKETS
733         status = StreamServerPort(AF_UNIX, NULL,
734                                                           (unsigned short) PostPortNumber,
735                                                           UnixSocketDir,
736                                                           ListenSocket, MAXLISTEN);
737         if (status != STATUS_OK)
738                 ereport(WARNING,
739                                 (errmsg("could not create Unix-domain socket")));
740 #endif
741
742         /*
743          * check that we have some socket to listen on
744          */
745         if (ListenSocket[0] == -1)
746                 ereport(FATAL,
747                                 (errmsg("no socket created for listening")));
748
749         XLOGPathInit();
750
751         /*
752          * Set up shared memory and semaphores.
753          */
754         reset_shared(PostPortNumber);
755
756         /*
757          * Estimate number of openable files.  This must happen after setting
758          * up semaphores, because on some platforms semaphores count as open
759          * files.
760          */
761         set_max_safe_fds();
762
763         /*
764          * Initialize the list of active backends.
765          */
766         BackendList = DLNewList();
767
768 #ifdef WIN32
769
770         /*
771          * Initialize the child pid/HANDLE arrays
772          */
773         win32_childPIDArray = (pid_t *) malloc(NUM_BACKENDARRAY_ELEMS * sizeof(pid_t));
774         win32_childHNDArray = (HANDLE *) malloc(NUM_BACKENDARRAY_ELEMS * sizeof(HANDLE));
775         if (!win32_childPIDArray || !win32_childHNDArray)
776                 ereport(FATAL,
777                                 (errcode(ERRCODE_OUT_OF_MEMORY),
778                                  errmsg("out of memory")));
779 #endif
780
781         /*
782          * Record postmaster options.  We delay this till now to avoid
783          * recording bogus options (eg, NBuffers too high for available
784          * memory).
785          */
786         if (!CreateOptsFile(argc, argv, my_exec_path))
787                 ExitPostmaster(1);
788
789         /*
790          * Set up signal handlers for the postmaster process.
791          *
792          * CAUTION: when changing this list, check for side-effects on the signal
793          * handling setup of child processes.  See tcop/postgres.c,
794          * bootstrap/bootstrap.c, and postmaster/pgstat.c.
795          */
796         pqinitmask();
797         PG_SETMASK(&BlockSig);
798
799         pqsignal(SIGHUP, SIGHUP_handler);       /* reread config file and have
800                                                                                  * children do same */
801         pqsignal(SIGINT, pmdie);        /* send SIGTERM and ShutdownDataBase */
802         pqsignal(SIGQUIT, pmdie);       /* send SIGQUIT and die */
803         pqsignal(SIGTERM, pmdie);       /* wait for children and ShutdownDataBase */
804         pqsignal(SIGALRM, SIG_IGN); /* ignored */
805         pqsignal(SIGPIPE, SIG_IGN); /* ignored */
806         pqsignal(SIGUSR1, sigusr1_handler); /* message from child process */
807         pqsignal(SIGUSR2, dummy_handler);       /* unused, reserve for children */
808         pqsignal(SIGCHLD, reaper);      /* handle child termination */
809         pqsignal(SIGTTIN, SIG_IGN); /* ignored */
810         pqsignal(SIGTTOU, SIG_IGN); /* ignored */
811         /* ignore SIGXFSZ, so that ulimit violations work like disk full */
812 #ifdef SIGXFSZ
813         pqsignal(SIGXFSZ, SIG_IGN); /* ignored */
814 #endif
815
816         /*
817          * Reset whereToSendOutput from Debug (its starting state) to None.
818          * This prevents ereport from sending log messages to stderr unless
819          * the syslog/stderr switch permits.  We don't do this until the
820          * postmaster is fully launched, since startup failures may as well be
821          * reported to stderr.
822          */
823         whereToSendOutput = None;
824
825         /*
826          * On many platforms, the first call of localtime() incurs significant
827          * overhead to load timezone info from the system configuration files.
828          * By doing it once in the postmaster, we avoid having to do it in
829          * every started child process.  The savings are not huge, but they
830          * add up...
831          */
832         {
833                 time_t          now = time(NULL);
834
835                 (void) pg_localtime(&now);
836         }
837
838         /*
839          * Initialize and try to startup the statistics collector process
840          */
841         pgstat_init();
842         pgstat_start();
843
844         /*
845          * Load cached files for client authentication.
846          */
847         load_hba();
848         load_ident();
849         load_user();
850         load_group();
851
852         /*
853          * We're ready to rock and roll...
854          */
855         StartupPID = StartupDataBase();
856
857         status = ServerLoop();
858
859         /*
860          * ServerLoop probably shouldn't ever return, but if it does, close
861          * down.
862          */
863         ExitPostmaster(status != STATUS_OK);
864
865         return 0;                                       /* not reached */
866 }
867
868
869 /*
870  * Validate the proposed data directory
871  */
872 static void
873 checkDataDir(const char *checkdir)
874 {
875         char            path[MAXPGPATH];
876         FILE       *fp;
877         struct stat stat_buf;
878
879         if (checkdir == NULL)
880         {
881                 fprintf(stderr,
882                                 gettext("%s does not know where to find the database system data.\n"
883                                                 "You must specify the directory that contains the database system\n"
884                                                 "either by specifying the -D invocation option or by setting the\n"
885                                                 "PGDATA environment variable.\n"),
886                                 progname);
887                 ExitPostmaster(2);
888         }
889
890         if (stat(checkdir, &stat_buf) == -1)
891         {
892                 if (errno == ENOENT)
893                         ereport(FATAL,
894                                         (errcode_for_file_access(),
895                                          errmsg("data directory \"%s\" does not exist",
896                                                         checkdir)));
897                 else
898                         ereport(FATAL,
899                                         (errcode_for_file_access(),
900                          errmsg("could not read permissions of directory \"%s\": %m",
901                                         checkdir)));
902         }
903
904         /*
905          * Check if the directory has group or world access.  If so, reject.
906          *
907          * XXX temporarily suppress check when on Windows, because there may not
908          * be proper support for Unix-y file permissions.  Need to think of a
909          * reasonable check to apply on Windows.
910          */
911 #if !defined(__CYGWIN__) && !defined(WIN32)
912         if (stat_buf.st_mode & (S_IRWXG | S_IRWXO))
913                 ereport(FATAL,
914                                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
915                                  errmsg("data directory \"%s\" has group or world access",
916                                                 checkdir),
917                                  errdetail("Permissions should be u=rwx (0700).")));
918 #endif
919
920         /* Look for PG_VERSION before looking for pg_control */
921         ValidatePgVersion(checkdir);
922
923         snprintf(path, sizeof(path), "%s/global/pg_control", checkdir);
924
925         fp = AllocateFile(path, PG_BINARY_R);
926         if (fp == NULL)
927         {
928                 fprintf(stderr,
929                                 gettext("%s: could not find the database system\n"
930                                                 "Expected to find it in the directory \"%s\",\n"
931                                                 "but could not open file \"%s\": %s\n"),
932                                 progname, checkdir, path, strerror(errno));
933                 ExitPostmaster(2);
934         }
935         FreeFile(fp);
936 }
937
938
939 #ifdef USE_RENDEZVOUS
940
941 /*
942  * empty callback function for DNSServiceRegistrationCreate()
943  */
944 static void
945 reg_reply(DNSServiceRegistrationReplyErrorType errorCode, void *context)
946 {
947
948 }
949
950 #endif /* USE_RENDEZVOUS */
951
952
953 /*
954  * Fork away from the controlling terminal (-S option)
955  */
956 static void
957 pmdaemonize(void)
958 {
959 #ifdef WIN32
960         /* not supported */
961         elog(FATAL, "SilentMode not supported under WIN32");
962 #else
963         int                     i;
964         pid_t           pid;
965
966 #ifdef LINUX_PROFILE
967         struct itimerval prof_itimer;
968 #endif
969
970 #ifdef LINUX_PROFILE
971         /* see comments in BackendStartup */
972         getitimer(ITIMER_PROF, &prof_itimer);
973 #endif
974
975         pid = fork();
976         if (pid == (pid_t) -1)
977         {
978                 postmaster_error("could not fork background process: %s",
979                                                  strerror(errno));
980                 ExitPostmaster(1);
981         }
982         else if (pid)
983         {                                                       /* parent */
984                 /* Parent should just exit, without doing any atexit cleanup */
985                 _exit(0);
986         }
987
988 #ifdef LINUX_PROFILE
989         setitimer(ITIMER_PROF, &prof_itimer, NULL);
990 #endif
991
992         MyProcPid = getpid();           /* reset MyProcPid to child */
993
994 /* GH: If there's no setsid(), we hopefully don't need silent mode.
995  * Until there's a better solution.
996  */
997 #ifdef HAVE_SETSID
998         if (setsid() < 0)
999         {
1000                 postmaster_error("could not dissociate from controlling TTY: %s",
1001                                                  strerror(errno));
1002                 ExitPostmaster(1);
1003         }
1004 #endif
1005         i = open(NULL_DEV, O_RDWR | PG_BINARY);
1006         dup2(i, 0);
1007         dup2(i, 1);
1008         dup2(i, 2);
1009         close(i);
1010 #endif
1011 }
1012
1013
1014 /*
1015  * Print out help message
1016  */
1017 static void
1018 usage(const char *progname)
1019 {
1020         printf(gettext("%s is the PostgreSQL server.\n\n"), progname);
1021         printf(gettext("Usage:\n  %s [OPTION]...\n\n"), progname);
1022         printf(gettext("Options:\n"));
1023 #ifdef USE_ASSERT_CHECKING
1024         printf(gettext("  -A 1|0          enable/disable run-time assert checking\n"));
1025 #endif
1026         printf(gettext("  -B NBUFFERS     number of shared buffers\n"));
1027         printf(gettext("  -c NAME=VALUE   set run-time parameter\n"));
1028         printf(gettext("  -d 1-5          debugging level\n"));
1029         printf(gettext("  -D DATADIR      database directory\n"));
1030         printf(gettext("  -F              turn fsync off\n"));
1031         printf(gettext("  -h HOSTNAME     host name or IP address to listen on\n"));
1032         printf(gettext("  -i              enable TCP/IP connections\n"));
1033         printf(gettext("  -k DIRECTORY    Unix-domain socket location\n"));
1034 #ifdef USE_SSL
1035         printf(gettext("  -l              enable SSL connections\n"));
1036 #endif
1037         printf(gettext("  -N MAX-CONNECT  maximum number of allowed connections\n"));
1038         printf(gettext("  -o OPTIONS      pass \"OPTIONS\" to each server process\n"));
1039         printf(gettext("  -p PORT         port number to listen on\n"));
1040         printf(gettext("  -S              silent mode (start in background without logging output)\n"));
1041         printf(gettext("  --help          show this help, then exit\n"));
1042         printf(gettext("  --version       output version information, then exit\n"));
1043
1044         printf(gettext("\nDeveloper options:\n"));
1045         printf(gettext("  -n              do not reinitialize shared memory after abnormal exit\n"));
1046         printf(gettext("  -s              send SIGSTOP to all backend servers if one dies\n"));
1047
1048         printf(gettext("\nPlease read the documentation for the complete list of run-time\n"
1049                                    "configuration settings and how to set them on the command line or in\n"
1050                                    "the configuration file.\n\n"
1051                                    "Report bugs to <pgsql-bugs@postgresql.org>.\n"));
1052 }
1053
1054
1055 /*
1056  * Main loop of postmaster
1057  */
1058 static int
1059 ServerLoop(void)
1060 {
1061         fd_set          readmask;
1062         int                     nSockets;
1063         struct timeval now,
1064                                 later;
1065         struct timezone tz;
1066         int                     i;
1067
1068         gettimeofday(&now, &tz);
1069
1070         nSockets = initMasks(&readmask);
1071
1072         for (;;)
1073         {
1074                 Port       *port;
1075                 fd_set          rmask;
1076                 struct timeval timeout;
1077
1078                 /*
1079                  * The timeout for the select() below is normally set on the basis
1080                  * of the time to the next checkpoint.  However, if for some
1081                  * reason we don't have a next-checkpoint time, time out after 60
1082                  * seconds. This keeps checkpoint scheduling from locking up when
1083                  * we get new connection requests infrequently (since we are
1084                  * likely to detect checkpoint completion just after enabling
1085                  * signals below, after we've already made the decision about how
1086                  * long to wait this time).
1087                  */
1088                 timeout.tv_sec = 60;
1089                 timeout.tv_usec = 0;
1090
1091                 if (CheckPointPID == 0 && checkpointed &&
1092                         StartupPID == 0 && Shutdown == NoShutdown &&
1093                         !FatalError && random_seed != 0)
1094                 {
1095                         time_t          now = time(NULL);
1096
1097                         if (CheckPointTimeout + checkpointed > now)
1098                         {
1099                                 /*
1100                                  * Not time for checkpoint yet, so set select timeout
1101                                  */
1102                                 timeout.tv_sec = CheckPointTimeout + checkpointed - now;
1103                         }
1104                         else
1105                         {
1106                                 /* Time to make the checkpoint... */
1107                                 CheckPointPID = CheckPointDataBase();
1108
1109                                 /*
1110                                  * if fork failed, schedule another try at 0.1 normal
1111                                  * delay
1112                                  */
1113                                 if (CheckPointPID == 0)
1114                                 {
1115                                         timeout.tv_sec = CheckPointTimeout / 10;
1116                                         checkpointed = now + timeout.tv_sec - CheckPointTimeout;
1117                                 }
1118                         }
1119                 }
1120
1121                 /*
1122                  * If no background writer process is running and we should do
1123                  * background writing, start one. It doesn't matter if this fails,
1124                  * we'll just try again later.
1125                  */
1126                 if (BgWriterPID == 0 && BgWriterPercent > 0 &&
1127                         StartupPID == 0 && Shutdown == NoShutdown &&
1128                         !FatalError && random_seed != 0)
1129                         BgWriterPID = StartBackgroundWriter();
1130
1131                 /*
1132                  * Wait for something to happen.
1133                  */
1134                 memcpy((char *) &rmask, (char *) &readmask, sizeof(fd_set));
1135
1136                 PG_SETMASK(&UnBlockSig);
1137
1138                 if (select(nSockets, &rmask, NULL, NULL, &timeout) < 0)
1139                 {
1140                         PG_SETMASK(&BlockSig);
1141                         if (errno == EINTR || errno == EWOULDBLOCK)
1142                                 continue;
1143                         ereport(LOG,
1144                                         (errcode_for_socket_access(),
1145                                          errmsg("select() failed in postmaster: %m")));
1146                         return STATUS_ERROR;
1147                 }
1148
1149                 /*
1150                  * Block all signals until we wait again.  (This makes it safe for
1151                  * our signal handlers to do nontrivial work.)
1152                  */
1153                 PG_SETMASK(&BlockSig);
1154
1155                 /*
1156                  * Select a random seed at the time of first receiving a request.
1157                  */
1158                 while (random_seed == 0)
1159                 {
1160                         gettimeofday(&later, &tz);
1161
1162                         /*
1163                          * We are not sure how much precision is in tv_usec, so we
1164                          * swap the nibbles of 'later' and XOR them with 'now'. On the
1165                          * off chance that the result is 0, we loop until it isn't.
1166                          */
1167                         random_seed = now.tv_usec ^
1168                                 ((later.tv_usec << 16) |
1169                                  ((later.tv_usec >> 16) & 0xffff));
1170                 }
1171
1172                 /*
1173                  * New connection pending on any of our sockets? If so, fork a
1174                  * child process to deal with it.
1175                  */
1176                 for (i = 0; i < MAXLISTEN; i++)
1177                 {
1178                         if (ListenSocket[i] == -1)
1179                                 break;
1180                         if (FD_ISSET(ListenSocket[i], &rmask))
1181                         {
1182                                 port = ConnCreate(ListenSocket[i]);
1183                                 if (port)
1184                                 {
1185                                         BackendStartup(port);
1186
1187                                         /*
1188                                          * We no longer need the open socket or port structure
1189                                          * in this process
1190                                          */
1191                                         StreamClose(port->sock);
1192                                         ConnFree(port);
1193                                 }
1194                         }
1195                 }
1196
1197                 /* If we have lost the stats collector, try to start a new one */
1198                 if (!pgstat_is_running)
1199                         pgstat_start();
1200         }
1201 }
1202
1203
1204 /*
1205  * Initialise the masks for select() for the ports
1206  * we are listening on.  Return the number of sockets to listen on.
1207  */
1208 static int
1209 initMasks(fd_set *rmask)
1210 {
1211         int                     nsocks = -1;
1212         int                     i;
1213
1214         FD_ZERO(rmask);
1215
1216         for (i = 0; i < MAXLISTEN; i++)
1217         {
1218                 int                     fd = ListenSocket[i];
1219
1220                 if (fd == -1)
1221                         break;
1222                 FD_SET(fd, rmask);
1223                 if (fd > nsocks)
1224                         nsocks = fd;
1225         }
1226
1227         return nsocks + 1;
1228 }
1229
1230
1231 /*
1232  * Read the startup packet and do something according to it.
1233  *
1234  * Returns STATUS_OK or STATUS_ERROR, or might call ereport(FATAL) and
1235  * not return at all.
1236  *
1237  * (Note that ereport(FATAL) stuff is sent to the client, so only use it
1238  * if that's what you want.  Return STATUS_ERROR if you don't want to
1239  * send anything to the client, which would typically be appropriate
1240  * if we detect a communications failure.)
1241  */
1242 static int
1243 ProcessStartupPacket(Port *port, bool SSLdone)
1244 {
1245         int32           len;
1246         void       *buf;
1247         ProtocolVersion proto;
1248         MemoryContext oldcontext;
1249
1250         if (pq_getbytes((char *) &len, 4) == EOF)
1251         {
1252                 /*
1253                  * EOF after SSLdone probably means the client didn't like our
1254                  * response to NEGOTIATE_SSL_CODE.      That's not an error condition,
1255                  * so don't clutter the log with a complaint.
1256                  */
1257                 if (!SSLdone)
1258                         ereport(COMMERROR,
1259                                         (errcode(ERRCODE_PROTOCOL_VIOLATION),
1260                                          errmsg("incomplete startup packet")));
1261                 return STATUS_ERROR;
1262         }
1263
1264         len = ntohl(len);
1265         len -= 4;
1266
1267         if (len < (int32) sizeof(ProtocolVersion) ||
1268                 len > MAX_STARTUP_PACKET_LENGTH)
1269         {
1270                 ereport(COMMERROR,
1271                                 (errcode(ERRCODE_PROTOCOL_VIOLATION),
1272                                  errmsg("invalid length of startup packet")));
1273                 return STATUS_ERROR;
1274         }
1275
1276         /*
1277          * Allocate at least the size of an old-style startup packet, plus one
1278          * extra byte, and make sure all are zeroes.  This ensures we will
1279          * have null termination of all strings, in both fixed- and
1280          * variable-length packet layouts.
1281          */
1282         if (len <= (int32) sizeof(StartupPacket))
1283                 buf = palloc0(sizeof(StartupPacket) + 1);
1284         else
1285                 buf = palloc0(len + 1);
1286
1287         if (pq_getbytes(buf, len) == EOF)
1288         {
1289                 ereport(COMMERROR,
1290                                 (errcode(ERRCODE_PROTOCOL_VIOLATION),
1291                                  errmsg("incomplete startup packet")));
1292                 return STATUS_ERROR;
1293         }
1294
1295         /*
1296          * The first field is either a protocol version number or a special
1297          * request code.
1298          */
1299         port->proto = proto = ntohl(*((ProtocolVersion *) buf));
1300
1301         if (proto == CANCEL_REQUEST_CODE)
1302         {
1303                 processCancelRequest(port, buf);
1304                 return 127;                             /* XXX */
1305         }
1306
1307         if (proto == NEGOTIATE_SSL_CODE && !SSLdone)
1308         {
1309                 char            SSLok;
1310
1311 #ifdef USE_SSL
1312                 /* No SSL when disabled or on Unix sockets */
1313                 if (!EnableSSL || IS_AF_UNIX(port->laddr.addr.ss_family))
1314                         SSLok = 'N';
1315                 else
1316                         SSLok = 'S';            /* Support for SSL */
1317 #else
1318                 SSLok = 'N';                    /* No support for SSL */
1319 #endif
1320                 if (send(port->sock, &SSLok, 1, 0) != 1)
1321                 {
1322                         ereport(COMMERROR,
1323                                         (errcode_for_socket_access(),
1324                                  errmsg("failed to send SSL negotiation response: %m")));
1325                         return STATUS_ERROR;    /* close the connection */
1326                 }
1327
1328 #ifdef USE_SSL
1329                 if (SSLok == 'S' && secure_open_server(port) == -1)
1330                         return STATUS_ERROR;
1331 #endif
1332                 /* regular startup packet, cancel, etc packet should follow... */
1333                 /* but not another SSL negotiation request */
1334                 return ProcessStartupPacket(port, true);
1335         }
1336
1337         /* Could add additional special packet types here */
1338
1339         /*
1340          * Set FrontendProtocol now so that ereport() knows what format to
1341          * send if we fail during startup.
1342          */
1343         FrontendProtocol = proto;
1344
1345         /* Check we can handle the protocol the frontend is using. */
1346
1347         if (PG_PROTOCOL_MAJOR(proto) < PG_PROTOCOL_MAJOR(PG_PROTOCOL_EARLIEST) ||
1348           PG_PROTOCOL_MAJOR(proto) > PG_PROTOCOL_MAJOR(PG_PROTOCOL_LATEST) ||
1349         (PG_PROTOCOL_MAJOR(proto) == PG_PROTOCOL_MAJOR(PG_PROTOCOL_LATEST) &&
1350          PG_PROTOCOL_MINOR(proto) > PG_PROTOCOL_MINOR(PG_PROTOCOL_LATEST)))
1351                 ereport(FATAL,
1352                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1353                                  errmsg("unsupported frontend protocol %u.%u: server supports %u.0 to %u.%u",
1354                                           PG_PROTOCOL_MAJOR(proto), PG_PROTOCOL_MINOR(proto),
1355                                                 PG_PROTOCOL_MAJOR(PG_PROTOCOL_EARLIEST),
1356                                                 PG_PROTOCOL_MAJOR(PG_PROTOCOL_LATEST),
1357                                                 PG_PROTOCOL_MINOR(PG_PROTOCOL_LATEST))));
1358
1359         /*
1360          * Now fetch parameters out of startup packet and save them into the
1361          * Port structure.      All data structures attached to the Port struct
1362          * must be allocated in TopMemoryContext so that they won't disappear
1363          * when we pass them to PostgresMain (see BackendRun).  We need not
1364          * worry about leaking this storage on failure, since we aren't in the
1365          * postmaster process anymore.
1366          */
1367         oldcontext = MemoryContextSwitchTo(TopMemoryContext);
1368
1369         if (PG_PROTOCOL_MAJOR(proto) >= 3)
1370         {
1371                 int32           offset = sizeof(ProtocolVersion);
1372
1373                 /*
1374                  * Scan packet body for name/option pairs.      We can assume any
1375                  * string beginning within the packet body is null-terminated,
1376                  * thanks to zeroing extra byte above.
1377                  */
1378                 port->guc_options = NIL;
1379
1380                 while (offset < len)
1381                 {
1382                         char       *nameptr = ((char *) buf) + offset;
1383                         int32           valoffset;
1384                         char       *valptr;
1385
1386                         if (*nameptr == '\0')
1387                                 break;                  /* found packet terminator */
1388                         valoffset = offset + strlen(nameptr) + 1;
1389                         if (valoffset >= len)
1390                                 break;                  /* missing value, will complain below */
1391                         valptr = ((char *) buf) + valoffset;
1392
1393                         if (strcmp(nameptr, "database") == 0)
1394                                 port->database_name = pstrdup(valptr);
1395                         else if (strcmp(nameptr, "user") == 0)
1396                                 port->user_name = pstrdup(valptr);
1397                         else if (strcmp(nameptr, "options") == 0)
1398                                 port->cmdline_options = pstrdup(valptr);
1399                         else
1400                         {
1401                                 /* Assume it's a generic GUC option */
1402                                 port->guc_options = lappend(port->guc_options,
1403                                                                                         pstrdup(nameptr));
1404                                 port->guc_options = lappend(port->guc_options,
1405                                                                                         pstrdup(valptr));
1406                         }
1407                         offset = valoffset + strlen(valptr) + 1;
1408                 }
1409
1410                 /*
1411                  * If we didn't find a packet terminator exactly at the end of the
1412                  * given packet length, complain.
1413                  */
1414                 if (offset != len - 1)
1415                         ereport(FATAL,
1416                                         (errcode(ERRCODE_PROTOCOL_VIOLATION),
1417                                          errmsg("invalid startup packet layout: expected terminator as last byte")));
1418         }
1419         else
1420         {
1421                 /*
1422                  * Get the parameters from the old-style, fixed-width-fields
1423                  * startup packet as C strings.  The packet destination was
1424                  * cleared first so a short packet has zeros silently added.  We
1425                  * have to be prepared to truncate the pstrdup result for oversize
1426                  * fields, though.
1427                  */
1428                 StartupPacket *packet = (StartupPacket *) buf;
1429
1430                 port->database_name = pstrdup(packet->database);
1431                 if (strlen(port->database_name) > sizeof(packet->database))
1432                         port->database_name[sizeof(packet->database)] = '\0';
1433                 port->user_name = pstrdup(packet->user);
1434                 if (strlen(port->user_name) > sizeof(packet->user))
1435                         port->user_name[sizeof(packet->user)] = '\0';
1436                 port->cmdline_options = pstrdup(packet->options);
1437                 if (strlen(port->cmdline_options) > sizeof(packet->options))
1438                         port->cmdline_options[sizeof(packet->options)] = '\0';
1439                 port->guc_options = NIL;
1440         }
1441
1442         /* Check a user name was given. */
1443         if (port->user_name == NULL || port->user_name[0] == '\0')
1444                 ereport(FATAL,
1445                                 (errcode(ERRCODE_INVALID_AUTHORIZATION_SPECIFICATION),
1446                  errmsg("no PostgreSQL user name specified in startup packet")));
1447
1448         /* The database defaults to the user name. */
1449         if (port->database_name == NULL || port->database_name[0] == '\0')
1450                 port->database_name = pstrdup(port->user_name);
1451
1452         if (Db_user_namespace)
1453         {
1454                 /*
1455                  * If user@, it is a global user, remove '@'. We only want to do
1456                  * this if there is an '@' at the end and no earlier in the user
1457                  * string or they may fake as a local user of another database
1458                  * attaching to this database.
1459                  */
1460                 if (strchr(port->user_name, '@') ==
1461                         port->user_name + strlen(port->user_name) - 1)
1462                         *strchr(port->user_name, '@') = '\0';
1463                 else
1464                 {
1465                         /* Append '@' and dbname */
1466                         char       *db_user;
1467
1468                         db_user = palloc(strlen(port->user_name) +
1469                                                          strlen(port->database_name) + 2);
1470                         sprintf(db_user, "%s@%s", port->user_name, port->database_name);
1471                         port->user_name = db_user;
1472                 }
1473         }
1474
1475         /*
1476          * Truncate given database and user names to length of a Postgres
1477          * name.  This avoids lookup failures when overlength names are given.
1478          */
1479         if (strlen(port->database_name) >= NAMEDATALEN)
1480                 port->database_name[NAMEDATALEN - 1] = '\0';
1481         if (strlen(port->user_name) >= NAMEDATALEN)
1482                 port->user_name[NAMEDATALEN - 1] = '\0';
1483
1484         /*
1485          * Done putting stuff in TopMemoryContext.
1486          */
1487         MemoryContextSwitchTo(oldcontext);
1488
1489         /*
1490          * If we're going to reject the connection due to database state, say
1491          * so now instead of wasting cycles on an authentication exchange.
1492          * (This also allows a pg_ping utility to be written.)
1493          */
1494         switch (port->canAcceptConnections)
1495         {
1496                 case CAC_STARTUP:
1497                         ereport(FATAL,
1498                                         (errcode(ERRCODE_CANNOT_CONNECT_NOW),
1499                                          errmsg("the database system is starting up")));
1500                         break;
1501                 case CAC_SHUTDOWN:
1502                         ereport(FATAL,
1503                                         (errcode(ERRCODE_CANNOT_CONNECT_NOW),
1504                                          errmsg("the database system is shutting down")));
1505                         break;
1506                 case CAC_RECOVERY:
1507                         ereport(FATAL,
1508                                         (errcode(ERRCODE_CANNOT_CONNECT_NOW),
1509                                          errmsg("the database system is in recovery mode")));
1510                         break;
1511                 case CAC_TOOMANY:
1512                         ereport(FATAL,
1513                                         (errcode(ERRCODE_TOO_MANY_CONNECTIONS),
1514                                          errmsg("sorry, too many clients already")));
1515                         break;
1516                 case CAC_OK:
1517                 default:
1518                         break;
1519         }
1520
1521         return STATUS_OK;
1522 }
1523
1524
1525 /*
1526  * The client has sent a cancel request packet, not a normal
1527  * start-a-new-connection packet.  Perform the necessary processing.
1528  * Nothing is sent back to the client.
1529  */
1530 static void
1531 processCancelRequest(Port *port, void *pkt)
1532 {
1533         CancelRequestPacket *canc = (CancelRequestPacket *) pkt;
1534         int                     backendPID;
1535         long            cancelAuthCode;
1536         Backend    *bp;
1537 #ifndef EXEC_BACKEND
1538         Dlelem     *curr;
1539 #else
1540         int                     i;
1541 #endif
1542
1543         backendPID = (int) ntohl(canc->backendPID);
1544         cancelAuthCode = (long) ntohl(canc->cancelAuthCode);
1545
1546         if (backendPID == CheckPointPID)
1547         {
1548                 ereport(DEBUG2,
1549                                 (errmsg_internal("ignoring cancel request for checkpoint process %d",
1550                                                                  backendPID)));
1551                 return;
1552         }
1553         else if (backendPID == BgWriterPID)
1554         {
1555                 ereport(DEBUG2,
1556                                 (errmsg_internal("ignoring cancel request for bgwriter process %d",
1557                                                                  backendPID)));
1558                 return;
1559         }
1560
1561         /*
1562          * See if we have a matching backend.  In the EXEC_BACKEND case, we
1563          * can no longer access the postmaster's own backend list, and must
1564          * rely on the backup array in shared memory.
1565          */
1566 #ifndef EXEC_BACKEND
1567         for (curr = DLGetHead(BackendList); curr; curr = DLGetSucc(curr))
1568         {
1569                 bp = (Backend *) DLE_VAL(curr);
1570 #else
1571         for (i = 0; i < NUM_BACKENDARRAY_ELEMS; i++)
1572         {
1573                 bp = (Backend *) &ShmemBackendArray[i];
1574 #endif
1575                 if (bp->pid == backendPID)
1576                 {
1577                         if (bp->cancel_key == cancelAuthCode)
1578                         {
1579                                 /* Found a match; signal that backend to cancel current op */
1580                                 ereport(DEBUG2,
1581                                                 (errmsg_internal("processing cancel request: sending SIGINT to process %d",
1582                                                                                  backendPID)));
1583                                 kill(bp->pid, SIGINT);
1584                         }
1585                         else
1586                                 /* Right PID, wrong key: no way, Jose */
1587                                 ereport(DEBUG2,
1588                                                 (errmsg_internal("bad key in cancel request for process %d",
1589                                                                                  backendPID)));
1590                         return;
1591                 }
1592         }
1593
1594         /* No matching backend */
1595         ereport(DEBUG2,
1596                         (errmsg_internal("bad pid in cancel request for process %d",
1597                                                          backendPID)));
1598 }
1599
1600 /*
1601  * canAcceptConnections --- check to see if database state allows connections.
1602  */
1603 static enum CAC_state
1604 canAcceptConnections(void)
1605 {
1606         /* Can't start backends when in startup/shutdown/recovery state. */
1607         if (Shutdown > NoShutdown)
1608                 return CAC_SHUTDOWN;
1609         if (StartupPID)
1610                 return CAC_STARTUP;
1611         if (FatalError)
1612                 return CAC_RECOVERY;
1613
1614         /*
1615          * Don't start too many children.
1616          *
1617          * We allow more connections than we can have backends here because some
1618          * might still be authenticating; they might fail auth, or some
1619          * existing backend might exit before the auth cycle is completed. The
1620          * exact MaxBackends limit is enforced when a new backend tries to
1621          * join the shared-inval backend array.
1622          */
1623         if (CountChildren() >= 2 * MaxBackends)
1624                 return CAC_TOOMANY;
1625
1626         return CAC_OK;
1627 }
1628
1629
1630 /*
1631  * ConnCreate -- create a local connection data structure
1632  */
1633 static Port *
1634 ConnCreate(int serverFd)
1635 {
1636         Port       *port;
1637
1638         if (!(port = (Port *) calloc(1, sizeof(Port))))
1639         {
1640                 ereport(LOG,
1641                                 (errcode(ERRCODE_OUT_OF_MEMORY),
1642                                  errmsg("out of memory")));
1643                 ExitPostmaster(1);
1644         }
1645
1646         if (StreamConnection(serverFd, port) != STATUS_OK)
1647         {
1648                 StreamClose(port->sock);
1649                 ConnFree(port);
1650                 port = NULL;
1651         }
1652         else
1653         {
1654                 /*
1655                  * Precompute password salt values to use for this connection.
1656                  * It's slightly annoying to do this long in advance of knowing
1657                  * whether we'll need 'em or not, but we must do the random()
1658                  * calls before we fork, not after.  Else the postmaster's random
1659                  * sequence won't get advanced, and all backends would end up
1660                  * using the same salt...
1661                  */
1662                 RandomSalt(port->cryptSalt, port->md5Salt);
1663         }
1664
1665         return port;
1666 }
1667
1668
1669 /*
1670  * ConnFree -- free a local connection data structure
1671  */
1672 static void
1673 ConnFree(Port *conn)
1674 {
1675 #ifdef USE_SSL
1676         secure_close(conn);
1677 #endif
1678         free(conn);
1679 }
1680
1681
1682 /*
1683  * ClosePostmasterPorts -- close all the postmaster's open sockets
1684  *
1685  * This is called during child process startup to release file descriptors
1686  * that are not needed by that child process.  The postmaster still has
1687  * them open, of course.
1688  */
1689 void
1690 ClosePostmasterPorts(bool pgstat_too)
1691 {
1692         int                     i;
1693
1694         /* Close the listen sockets */
1695         for (i = 0; i < MAXLISTEN; i++)
1696         {
1697                 if (ListenSocket[i] != -1)
1698                 {
1699                         StreamClose(ListenSocket[i]);
1700                         ListenSocket[i] = -1;
1701                 }
1702         }
1703
1704         /* Close pgstat control sockets, unless we're starting pgstat itself */
1705         if (pgstat_too)
1706                 pgstat_close_sockets();
1707 }
1708
1709
1710 /*
1711  * reset_shared -- reset shared memory and semaphores
1712  */
1713 static void
1714 reset_shared(unsigned short port)
1715 {
1716         /*
1717          * Create or re-create shared memory and semaphores.
1718          *
1719          * Note: in each "cycle of life" we will normally assign the same IPC
1720          * keys (if using SysV shmem and/or semas), since the port number is
1721          * used to determine IPC keys.  This helps ensure that we will clean
1722          * up dead IPC objects if the postmaster crashes and is restarted.
1723          */
1724         CreateSharedMemoryAndSemaphores(false, MaxBackends, port);
1725 }
1726
1727
1728 /*
1729  * SIGHUP -- reread config files, and tell children to do same
1730  */
1731 static void
1732 SIGHUP_handler(SIGNAL_ARGS)
1733 {
1734         int                     save_errno = errno;
1735
1736         PG_SETMASK(&BlockSig);
1737
1738         if (Shutdown <= SmartShutdown)
1739         {
1740                 ereport(LOG,
1741                          (errmsg("received SIGHUP, reloading configuration files")));
1742                 ProcessConfigFile(PGC_SIGHUP);
1743                 SignalChildren(SIGHUP);
1744                 load_hba();
1745                 load_ident();
1746
1747 #ifdef EXEC_BACKEND
1748                 /* Update the starting-point file for future children */
1749                 write_nondefault_variables(PGC_SIGHUP);
1750 #endif
1751
1752                 /*
1753                  * Tell the background writer to terminate so that we will start a
1754                  * new one with a possibly changed config
1755                  */
1756                 if (BgWriterPID != 0)
1757                         kill(BgWriterPID, SIGTERM);
1758         }
1759
1760         PG_SETMASK(&UnBlockSig);
1761
1762         errno = save_errno;
1763 }
1764
1765
1766 /*
1767  * pmdie -- signal handler for processing various postmaster signals.
1768  */
1769 static void
1770 pmdie(SIGNAL_ARGS)
1771 {
1772         int                     save_errno = errno;
1773
1774         PG_SETMASK(&BlockSig);
1775
1776         ereport(DEBUG2,
1777                         (errmsg_internal("postmaster received signal %d",
1778                                                          postgres_signal_arg)));
1779
1780         switch (postgres_signal_arg)
1781         {
1782                 case SIGTERM:
1783
1784                         /*
1785                          * Smart Shutdown:
1786                          *
1787                          * Wait for children to end their work and ShutdownDataBase.
1788                          */
1789                         if (Shutdown >= SmartShutdown)
1790                                 break;
1791                         Shutdown = SmartShutdown;
1792                         ereport(LOG,
1793                                         (errmsg("received smart shutdown request")));
1794
1795                         /* Must tell bgwriter to quit, or it never will... */
1796                         if (BgWriterPID != 0)
1797                                 kill(BgWriterPID, SIGTERM);
1798
1799                         if (DLGetHead(BackendList)) /* let reaper() handle this */
1800                                 break;
1801
1802                         /*
1803                          * No children left. Shutdown data base system.
1804                          */
1805                         if (StartupPID > 0 || FatalError)       /* let reaper() handle
1806                                                                                                  * this */
1807                                 break;
1808                         if (ShutdownPID > 0)
1809                         {
1810                                 elog(PANIC, "shutdown process %d already running",
1811                                          (int) ShutdownPID);
1812                                 abort();
1813                         }
1814
1815                         ShutdownPID = ShutdownDataBase();
1816                         break;
1817
1818                 case SIGINT:
1819
1820                         /*
1821                          * Fast Shutdown:
1822                          *
1823                          * Abort all children with SIGTERM (rollback active transactions
1824                          * and exit) and ShutdownDataBase when they are gone.
1825                          */
1826                         if (Shutdown >= FastShutdown)
1827                                 break;
1828                         Shutdown = FastShutdown;
1829                         ereport(LOG,
1830                                         (errmsg("received fast shutdown request")));
1831
1832                         if (DLGetHead(BackendList))
1833                         {
1834                                 if (!FatalError)
1835                                 {
1836                                         ereport(LOG,
1837                                                         (errmsg("aborting any active transactions")));
1838                                         SignalChildren(SIGTERM);
1839                                         /* reaper() does the rest */
1840                                 }
1841                                 break;
1842                         }
1843
1844                         /*
1845                          * No children left. Shutdown data base system.
1846                          *
1847                          * Unlike the previous case, it is not an error for the shutdown
1848                          * process to be running already (we could get SIGTERM
1849                          * followed shortly later by SIGINT).
1850                          */
1851                         if (StartupPID > 0 || FatalError)       /* let reaper() handle
1852                                                                                                  * this */
1853                                 break;
1854                         if (ShutdownPID == 0)
1855                                 ShutdownPID = ShutdownDataBase();
1856                         break;
1857
1858                 case SIGQUIT:
1859
1860                         /*
1861                          * Immediate Shutdown:
1862                          *
1863                          * abort all children with SIGQUIT and exit without attempt to
1864                          * properly shutdown data base system.
1865                          */
1866                         ereport(LOG,
1867                                         (errmsg("received immediate shutdown request")));
1868                         if (ShutdownPID > 0)
1869                                 kill(ShutdownPID, SIGQUIT);
1870                         if (StartupPID > 0)
1871                                 kill(StartupPID, SIGQUIT);
1872                         if (DLGetHead(BackendList))
1873                                 SignalChildren(SIGQUIT);
1874                         ExitPostmaster(0);
1875                         break;
1876         }
1877
1878         PG_SETMASK(&UnBlockSig);
1879
1880         errno = save_errno;
1881 }
1882
1883 /*
1884  * Reaper -- signal handler to cleanup after a backend (child) dies.
1885  */
1886 static void
1887 reaper(SIGNAL_ARGS)
1888 {
1889         int                     save_errno = errno;
1890
1891 #ifdef HAVE_WAITPID
1892         int                     status;                 /* backend exit status */
1893
1894 #else
1895 #ifndef WIN32
1896         union wait      status;                 /* backend exit status */
1897 #endif
1898 #endif
1899         int                     exitstatus;
1900         int                     pid;                    /* process id of dead backend */
1901
1902         PG_SETMASK(&BlockSig);
1903
1904         ereport(DEBUG4,
1905                         (errmsg_internal("reaping dead processes")));
1906 #ifdef HAVE_WAITPID
1907         while ((pid = waitpid(-1, &status, WNOHANG)) > 0)
1908         {
1909                 exitstatus = status;
1910 #else
1911 #ifndef WIN32
1912         while ((pid = wait3(&status, WNOHANG, NULL)) > 0)
1913         {
1914                 exitstatus = status.w_status;
1915 #else
1916         while ((pid = win32_waitpid(&exitstatus)) > 0)
1917         {
1918                 /*
1919                  * We need to do this here, and not in CleanupProc, since this is
1920                  * to be called on all children when we are done with them. Could
1921                  * move to LogChildExit, but that seems like asking for future
1922                  * trouble...
1923                  */
1924                 win32_RemoveChild(pid);
1925 #endif /* WIN32 */
1926 #endif /* HAVE_WAITPID */
1927
1928                 /*
1929                  * Check if this child was the statistics collector. If so, try to
1930                  * start a new one.  (If fail, we'll try again in future cycles of
1931                  * the main loop.)
1932                  */
1933                 if (pgstat_ispgstat(pid))
1934                 {
1935                         LogChildExit(LOG, gettext("statistics collector process"),
1936                                                  pid, exitstatus);
1937                         pgstat_start();
1938                         continue;
1939                 }
1940
1941                 /*
1942                  * Check if this child was a shutdown or startup process.
1943                  */
1944                 if (ShutdownPID > 0 && pid == ShutdownPID)
1945                 {
1946                         if (exitstatus != 0)
1947                         {
1948                                 LogChildExit(LOG, gettext("shutdown process"),
1949                                                          pid, exitstatus);
1950                                 ExitPostmaster(1);
1951                         }
1952                         /* Normal postmaster exit is here */
1953                         ExitPostmaster(0);
1954                 }
1955
1956                 if (StartupPID > 0 && pid == StartupPID)
1957                 {
1958                         if (exitstatus != 0)
1959                         {
1960                                 LogChildExit(LOG, gettext("startup process"),
1961                                                          pid, exitstatus);
1962                                 ereport(LOG,
1963                                                 (errmsg("aborting startup due to startup process failure")));
1964                                 ExitPostmaster(1);
1965                         }
1966                         StartupPID = 0;
1967
1968                         /*
1969                          * Startup succeeded - we are done with system startup or recovery.
1970                          */
1971                         FatalError = false;
1972
1973                         /*
1974                          * Arrange for first checkpoint to occur after standard delay.
1975                          */
1976                         CheckPointPID = 0;
1977                         checkpointed = time(NULL);
1978
1979                         /*
1980                          * Go to shutdown mode if a shutdown request was pending.
1981                          */
1982                         if (Shutdown > NoShutdown)
1983                         {
1984                                 if (ShutdownPID > 0)
1985                                 {
1986                                         elog(PANIC, "startup process %d died while shutdown process %d already running",
1987                                                  pid, (int) ShutdownPID);
1988                                         abort();
1989                                 }
1990                                 ShutdownPID = ShutdownDataBase();
1991                         }
1992
1993                         goto reaper_done;
1994                 }
1995
1996                 /*
1997                  * Else do standard child cleanup.
1998                  */
1999                 CleanupProc(pid, exitstatus);
2000
2001         }                                                       /* loop over pending child-death reports */
2002
2003         if (FatalError)
2004         {
2005                 /*
2006                  * Wait for all children exit, then reset shmem and
2007                  * StartupDataBase.
2008                  */
2009                 if (DLGetHead(BackendList) || StartupPID > 0 || ShutdownPID > 0)
2010                         goto reaper_done;
2011                 ereport(LOG,
2012                         (errmsg("all server processes terminated; reinitializing")));
2013
2014                 shmem_exit(0);
2015                 reset_shared(PostPortNumber);
2016
2017                 StartupPID = StartupDataBase();
2018
2019                 goto reaper_done;
2020         }
2021
2022         if (Shutdown > NoShutdown)
2023         {
2024                 if (DLGetHead(BackendList))
2025                         goto reaper_done;
2026                 if (StartupPID > 0 || ShutdownPID > 0)
2027                         goto reaper_done;
2028                 ShutdownPID = ShutdownDataBase();
2029         }
2030
2031 reaper_done:
2032         PG_SETMASK(&UnBlockSig);
2033
2034         errno = save_errno;
2035 }
2036
2037
2038 /*
2039  * CleanupProc -- cleanup after terminated backend.
2040  *
2041  * Remove all local state associated with backend.
2042  */
2043 static void
2044 CleanupProc(int pid,
2045                         int exitstatus)         /* child's exit status. */
2046 {
2047         Dlelem     *curr,
2048                            *next;
2049         Backend    *bp;
2050
2051         LogChildExit(DEBUG2, gettext("child process"), pid, exitstatus);
2052
2053         /*
2054          * If a backend dies in an ugly way (i.e. exit status not 0) then we
2055          * must signal all other backends to quickdie.  If exit status is zero
2056          * we assume everything is hunky dory and simply remove the backend
2057          * from the active backend list.
2058          */
2059         if (exitstatus == 0)
2060         {
2061                 curr = DLGetHead(BackendList);
2062                 while (curr)
2063                 {
2064                         bp = (Backend *) DLE_VAL(curr);
2065                         if (bp->pid == pid)
2066                         {
2067 #ifdef EXEC_BACKEND
2068                                 ShmemBackendArrayRemove(bp->pid);
2069 #endif
2070                                 DLRemove(curr);
2071                                 free(bp);
2072                                 DLFreeElem(curr);
2073                                 break;
2074                         }
2075                         curr = DLGetSucc(curr);
2076                 }
2077
2078                 if (pid == CheckPointPID)
2079                 {
2080                         CheckPointPID = 0;
2081                         if (!FatalError)
2082                         {
2083                                 checkpointed = time(NULL);
2084                         }
2085                 }
2086                 else if (pid == BgWriterPID)
2087                         BgWriterPID = 0;
2088                 else
2089                         pgstat_beterm(pid);
2090
2091                 return;
2092         }
2093
2094         /* below here we're dealing with a non-normal exit */
2095
2096         /* Make log entry unless we did so already */
2097         if (!FatalError)
2098         {
2099                 LogChildExit(LOG,
2100                                  (pid == CheckPointPID) ? gettext("checkpoint process") :
2101                                          (pid == BgWriterPID) ? gettext("bgwriter process") :
2102                                          gettext("server process"),
2103                                          pid, exitstatus);
2104                 ereport(LOG,
2105                           (errmsg("terminating any other active server processes")));
2106         }
2107
2108         curr = DLGetHead(BackendList);
2109         while (curr)
2110         {
2111                 next = DLGetSucc(curr);
2112                 bp = (Backend *) DLE_VAL(curr);
2113                 if (bp->pid != pid)
2114                 {
2115                         /*
2116                          * This backend is still alive.  Unless we did so already,
2117                          * tell it to commit hara-kiri.
2118                          *
2119                          * SIGQUIT is the special signal that says exit without proc_exit
2120                          * and let the user know what's going on. But if SendStop is
2121                          * set (-s on command line), then we send SIGSTOP instead, so
2122                          * that we can get core dumps from all backends by hand.
2123                          */
2124                         if (!FatalError)
2125                         {
2126                                 ereport(DEBUG2,
2127                                                 (errmsg_internal("sending %s to process %d",
2128                                                                           (SendStop ? "SIGSTOP" : "SIGQUIT"),
2129                                                                                  (int) bp->pid)));
2130                                 kill(bp->pid, (SendStop ? SIGSTOP : SIGQUIT));
2131                         }
2132                 }
2133                 else
2134                 {
2135                         /*
2136                          * Found entry for freshly-dead backend, so remove it.
2137                          */
2138 #ifdef EXEC_BACKEND
2139                         ShmemBackendArrayRemove(bp->pid);
2140 #endif
2141                         DLRemove(curr);
2142                         free(bp);
2143                         DLFreeElem(curr);
2144                 }
2145                 curr = next;
2146         }
2147
2148         if (pid == CheckPointPID)
2149         {
2150                 CheckPointPID = 0;
2151                 checkpointed = 0;
2152         }
2153         else if (pid == BgWriterPID)
2154                 BgWriterPID = 0;
2155         else
2156         {
2157                 /*
2158                  * Tell the collector about backend termination
2159                  */
2160                 pgstat_beterm(pid);
2161         }
2162
2163         FatalError = true;
2164 }
2165
2166 /*
2167  * Log the death of a child process.
2168  */
2169 static void
2170 LogChildExit(int lev, const char *procname, int pid, int exitstatus)
2171 {
2172         if (WIFEXITED(exitstatus))
2173                 ereport(lev,
2174
2175                 /*
2176                  * translator: %s is a noun phrase describing a child process,
2177                  * such as "server process"
2178                  */
2179                                 (errmsg("%s (PID %d) exited with exit code %d",
2180                                                 procname, pid, WEXITSTATUS(exitstatus))));
2181         else if (WIFSIGNALED(exitstatus))
2182                 ereport(lev,
2183
2184                 /*
2185                  * translator: %s is a noun phrase describing a child process,
2186                  * such as "server process"
2187                  */
2188                                 (errmsg("%s (PID %d) was terminated by signal %d",
2189                                                 procname, pid, WTERMSIG(exitstatus))));
2190         else
2191                 ereport(lev,
2192
2193                 /*
2194                  * translator: %s is a noun phrase describing a child process,
2195                  * such as "server process"
2196                  */
2197                                 (errmsg("%s (PID %d) exited with unexpected status %d",
2198                                                 procname, pid, exitstatus)));
2199 }
2200
2201 /*
2202  * Send a signal to all backend children.
2203  */
2204 static void
2205 SignalChildren(int signal)
2206 {
2207         Dlelem     *curr,
2208                            *next;
2209         Backend    *bp;
2210
2211         curr = DLGetHead(BackendList);
2212         while (curr)
2213         {
2214                 next = DLGetSucc(curr);
2215                 bp = (Backend *) DLE_VAL(curr);
2216
2217                 if (bp->pid != MyProcPid)
2218                 {
2219                         ereport(DEBUG2,
2220                                         (errmsg_internal("sending signal %d to process %d",
2221                                                                          signal,
2222                                                                          (int) bp->pid)));
2223                         kill(bp->pid, signal);
2224                 }
2225
2226                 curr = next;
2227         }
2228 }
2229
2230 /*
2231  * BackendStartup -- start backend process
2232  *
2233  * returns: STATUS_ERROR if the fork failed, STATUS_OK otherwise.
2234  */
2235 static int
2236 BackendStartup(Port *port)
2237 {
2238         Backend    *bn;                         /* for backend cleanup */
2239         pid_t           pid;
2240
2241 #ifdef LINUX_PROFILE
2242         struct itimerval prof_itimer;
2243 #endif
2244
2245         /*
2246          * Compute the cancel key that will be assigned to this backend. The
2247          * backend will have its own copy in the forked-off process' value of
2248          * MyCancelKey, so that it can transmit the key to the frontend.
2249          */
2250         MyCancelKey = PostmasterRandom();
2251
2252         /*
2253          * Make room for backend data structure.  Better before the fork() so
2254          * we can handle failure cleanly.
2255          */
2256         bn = (Backend *) malloc(sizeof(Backend));
2257         if (!bn)
2258         {
2259                 ereport(LOG,
2260                                 (errcode(ERRCODE_OUT_OF_MEMORY),
2261                                  errmsg("out of memory")));
2262                 return STATUS_ERROR;
2263         }
2264
2265         /* Pass down canAcceptConnections state (kluge for EXEC_BACKEND case) */
2266         port->canAcceptConnections = canAcceptConnections();
2267
2268         /*
2269          * Flush stdio channels just before fork, to avoid double-output
2270          * problems. Ideally we'd use fflush(NULL) here, but there are still a
2271          * few non-ANSI stdio libraries out there (like SunOS 4.1.x) that
2272          * coredump if we do. Presently stdout and stderr are the only stdio
2273          * output channels used by the postmaster, so fflush'ing them should
2274          * be sufficient.
2275          */
2276         fflush(stdout);
2277         fflush(stderr);
2278
2279 #ifdef EXEC_BACKEND
2280
2281         pid = backend_forkexec(port);
2282
2283 #else /* !EXEC_BACKEND */
2284
2285 #ifdef LINUX_PROFILE
2286
2287         /*
2288          * Linux's fork() resets the profiling timer in the child process. If
2289          * we want to profile child processes then we need to save and restore
2290          * the timer setting.  This is a waste of time if not profiling,
2291          * however, so only do it if commanded by specific -DLINUX_PROFILE
2292          * switch.
2293          */
2294         getitimer(ITIMER_PROF, &prof_itimer);
2295 #endif
2296
2297 #ifdef __BEOS__
2298         /* Specific beos actions before backend startup */
2299         beos_before_backend_startup();
2300 #endif
2301
2302         pid = fork();
2303
2304         if (pid == 0)                           /* child */
2305         {
2306 #ifdef LINUX_PROFILE
2307                 setitimer(ITIMER_PROF, &prof_itimer, NULL);
2308 #endif
2309
2310 #ifdef __BEOS__
2311                 /* Specific beos backend startup actions */
2312                 beos_backend_startup();
2313 #endif
2314                 free(bn);
2315
2316                 proc_exit(BackendRun(port));
2317         }
2318
2319 #endif /* EXEC_BACKEND */
2320
2321         if (pid < 0)
2322         {
2323                 /* in parent, fork failed */
2324                 int                     save_errno = errno;
2325
2326 #ifdef __BEOS__
2327                 /* Specific beos backend startup actions */
2328                 beos_backend_startup_failed();
2329 #endif
2330                 free(bn);
2331                 errno = save_errno;
2332                 ereport(LOG,
2333                           (errmsg("could not fork new process for connection: %m")));
2334                 report_fork_failure_to_client(port, save_errno);
2335                 return STATUS_ERROR;
2336         }
2337
2338         /* in parent, successful fork */
2339         ereport(DEBUG2,
2340                         (errmsg_internal("forked new backend, pid=%d socket=%d",
2341                                                          (int) pid, port->sock)));
2342
2343         /*
2344          * Everything's been successful, it's safe to add this backend to our
2345          * list of backends.
2346          */
2347         bn->pid = pid;
2348         bn->cancel_key = MyCancelKey;
2349 #ifdef EXEC_BACKEND
2350         ShmemBackendArrayAdd(bn);
2351 #endif
2352         DLAddHead(BackendList, DLNewElem(bn));
2353
2354         return STATUS_OK;
2355 }
2356
2357 /*
2358  * Try to report backend fork() failure to client before we close the
2359  * connection.  Since we do not care to risk blocking the postmaster on
2360  * this connection, we set the connection to non-blocking and try only once.
2361  *
2362  * This is grungy special-purpose code; we cannot use backend libpq since
2363  * it's not up and running.
2364  */
2365 static void
2366 report_fork_failure_to_client(Port *port, int errnum)
2367 {
2368         char            buffer[1000];
2369
2370         /* Format the error message packet (always V2 protocol) */
2371         snprintf(buffer, sizeof(buffer), "E%s%s\n",
2372                          gettext("could not fork new process for connection: "),
2373                          strerror(errnum));
2374
2375         /* Set port to non-blocking.  Don't do send() if this fails */
2376         if (!set_noblock(port->sock))
2377                 return;
2378
2379         send(port->sock, buffer, strlen(buffer) + 1, 0);
2380 }
2381
2382
2383 /*
2384  * split_opts -- split a string of options and append it to an argv array
2385  *
2386  * NB: the string is destructively modified!
2387  *
2388  * Since no current POSTGRES arguments require any quoting characters,
2389  * we can use the simple-minded tactic of assuming each set of space-
2390  * delimited characters is a separate argv element.
2391  *
2392  * If you don't like that, well, we *used* to pass the whole option string
2393  * as ONE argument to execl(), which was even less intelligent...
2394  */
2395 static void
2396 split_opts(char **argv, int *argcp, char *s)
2397 {
2398         while (s && *s)
2399         {
2400                 while (isspace((unsigned char) *s))
2401                         ++s;
2402                 if (*s == '\0')
2403                         break;
2404                 argv[(*argcp)++] = s;
2405                 while (*s && !isspace((unsigned char) *s))
2406                         ++s;
2407                 if (*s)
2408                         *s++ = '\0';
2409         }
2410 }
2411
2412
2413 /*
2414  * BackendRun -- perform authentication, and if successful,
2415  *                              set up the backend's argument list and invoke PostgresMain()
2416  *
2417  * returns:
2418  *              Shouldn't return at all.
2419  *              If PostgresMain() fails, return status.
2420  */
2421 static int
2422 BackendRun(Port *port)
2423 {
2424         int                     status;
2425         struct timeval now;
2426         struct timezone tz;
2427         char            remote_host[NI_MAXHOST];
2428         char            remote_port[NI_MAXSERV];
2429         char            remote_ps_data[NI_MAXHOST];
2430         char      **av;
2431         int                     maxac;
2432         int                     ac;
2433         char            debugbuf[32];
2434         char            protobuf[32];
2435         int                     i;
2436
2437         IsUnderPostmaster = true;       /* we are a postmaster subprocess now */
2438
2439         /*
2440          * Let's clean up ourselves as the postmaster child, and close the
2441          * postmaster's other sockets
2442          */
2443         ClosePostmasterPorts(true);
2444
2445         /* We don't want the postmaster's proc_exit() handlers */
2446         on_exit_reset();
2447
2448         /*
2449          * Signal handlers setting is moved to tcop/postgres...
2450          */
2451
2452         /* Save port etc. for ps status */
2453         MyProcPort = port;
2454
2455         /* Reset MyProcPid to new backend's pid */
2456         MyProcPid = getpid();
2457
2458         /*
2459          * PreAuthDelay is a debugging aid for investigating problems in the
2460          * authentication cycle: it can be set in postgresql.conf to allow
2461          * time to attach to the newly-forked backend with a debugger. (See
2462          * also the -W backend switch, which we allow clients to pass through
2463          * PGOPTIONS, but it is not honored until after authentication.)
2464          */
2465         if (PreAuthDelay > 0)
2466                 pg_usleep(PreAuthDelay * 1000000L);
2467
2468         ClientAuthInProgress = true;    /* limit visibility of log messages */
2469
2470         /* save start time for end of session reporting */
2471         gettimeofday(&(port->session_start), NULL);
2472
2473         /* set these to empty in case they are needed before we set them up */
2474         port->remote_host = "";
2475         port->remote_port = "";
2476         port->commandTag = "";
2477
2478         /*
2479          * Initialize libpq and enable reporting of ereport errors to the
2480          * client. Must do this now because authentication uses libpq to send
2481          * messages.
2482          */
2483         pq_init();                                      /* initialize libpq to talk to client */
2484         whereToSendOutput = Remote; /* now safe to ereport to client */
2485
2486         /*
2487          * We arrange for a simple exit(0) if we receive SIGTERM or SIGQUIT
2488          * during any client authentication related communication. Otherwise
2489          * the postmaster cannot shutdown the database FAST or IMMED cleanly
2490          * if a buggy client blocks a backend during authentication.
2491          */
2492         pqsignal(SIGTERM, authdie);
2493         pqsignal(SIGQUIT, authdie);
2494         pqsignal(SIGALRM, authdie);
2495         PG_SETMASK(&AuthBlockSig);
2496
2497         /*
2498          * Get the remote host name and port for logging and status display.
2499          */
2500         remote_host[0] = '\0';
2501         remote_port[0] = '\0';
2502         if (getnameinfo_all(&port->raddr.addr, port->raddr.salen,
2503                                                 remote_host, sizeof(remote_host),
2504                                                 remote_port, sizeof(remote_port),
2505                                    (log_hostname ? 0 : NI_NUMERICHOST) | NI_NUMERICSERV))
2506         {
2507                 int                     ret = getnameinfo_all(&port->raddr.addr, port->raddr.salen,
2508                                                                                 remote_host, sizeof(remote_host),
2509                                                                                 remote_port, sizeof(remote_port),
2510                                                                                 NI_NUMERICHOST | NI_NUMERICSERV);
2511
2512                 if (ret)
2513                         ereport(WARNING,
2514                                         (errmsg("getnameinfo_all() failed: %s",
2515                                                         gai_strerror(ret))));
2516         }
2517         snprintf(remote_ps_data, sizeof(remote_ps_data),
2518                          remote_port[0] == '\0' ? "%s" : "%s(%s)",
2519                          remote_host, remote_port);
2520
2521         if (Log_connections)
2522                 ereport(LOG,
2523                                 (errmsg("connection received: host=%s port=%s",
2524                                                 remote_host, remote_port)));
2525
2526         /*
2527          * save remote_host and remote_port in port stucture
2528          */
2529         port->remote_host = strdup(remote_host);
2530         port->remote_port = strdup(remote_port);
2531
2532         /*
2533          * In EXEC_BACKEND case, we didn't inherit the contents of pg_hba.c
2534          * etcetera from the postmaster, and have to load them ourselves.
2535          * Build the PostmasterContext (which didn't exist before, in this
2536          * process) to contain the data.
2537          *
2538          * FIXME: [fork/exec] Ugh.  Is there a way around this overhead?
2539          */
2540 #ifdef EXEC_BACKEND
2541         Assert(PostmasterContext == NULL);
2542         PostmasterContext = AllocSetContextCreate(TopMemoryContext,
2543                                                                                           "Postmaster",
2544                                                                                           ALLOCSET_DEFAULT_MINSIZE,
2545                                                                                           ALLOCSET_DEFAULT_INITSIZE,
2546                                                                                           ALLOCSET_DEFAULT_MAXSIZE);
2547         MemoryContextSwitchTo(PostmasterContext);
2548
2549         load_hba();
2550         load_ident();
2551         load_user();
2552         load_group();
2553 #endif
2554
2555         /*
2556          * Ready to begin client interaction.  We will give up and exit(0)
2557          * after a time delay, so that a broken client can't hog a connection
2558          * indefinitely.  PreAuthDelay doesn't count against the time limit.
2559          */
2560         if (!enable_sig_alarm(AuthenticationTimeout * 1000, false))
2561                 elog(FATAL, "could not set timer for authorization timeout");
2562
2563         /*
2564          * Receive the startup packet (which might turn out to be a cancel
2565          * request packet).
2566          */
2567         status = ProcessStartupPacket(port, false);
2568
2569         if (status != STATUS_OK)
2570                 proc_exit(0);
2571
2572         /*
2573          * Now that we have the user and database name, we can set the process
2574          * title for ps.  It's good to do this as early as possible in
2575          * startup.
2576          */
2577         init_ps_display(port->user_name, port->database_name, remote_ps_data);
2578         set_ps_display("authentication");
2579
2580         /*
2581          * Now perform authentication exchange.
2582          */
2583         ClientAuthentication(port); /* might not return, if failure */
2584
2585         /*
2586          * Done with authentication.  Disable timeout, and prevent
2587          * SIGTERM/SIGQUIT again until backend startup is complete.
2588          */
2589         if (!disable_sig_alarm(false))
2590                 elog(FATAL, "could not disable timer for authorization timeout");
2591         PG_SETMASK(&BlockSig);
2592
2593         if (Log_connections)
2594                 ereport(LOG,
2595                                 (errmsg("connection authorized: user=%s database=%s",
2596                                                 port->user_name, port->database_name)));
2597
2598         /*
2599          * Don't want backend to be able to see the postmaster random number
2600          * generator state.  We have to clobber the static random_seed *and*
2601          * start a new random sequence in the random() library function.
2602          */
2603         random_seed = 0;
2604         gettimeofday(&now, &tz);
2605         srandom((unsigned int) now.tv_usec);
2606
2607
2608         /* ----------------
2609          * Now, build the argv vector that will be given to PostgresMain.
2610          *
2611          * The layout of the command line is
2612          *              postgres [secure switches] -p databasename [insecure switches]
2613          * where the switches after -p come from the client request.
2614          *
2615          * The maximum possible number of commandline arguments that could come
2616          * from ExtraOptions or port->cmdline_options is (strlen + 1) / 2; see
2617          * split_opts().
2618          * ----------------
2619          */
2620         maxac = 10;                                     /* for fixed args supplied below */
2621         maxac += (strlen(ExtraOptions) + 1) / 2;
2622         if (port->cmdline_options)
2623                 maxac += (strlen(port->cmdline_options) + 1) / 2;
2624
2625         av = (char **) MemoryContextAlloc(TopMemoryContext,
2626                                                                           maxac * sizeof(char *));
2627         ac = 0;
2628
2629         av[ac++] = "postgres";
2630
2631         /*
2632          * Pass the requested debugging level along to the backend.
2633          */
2634         if (debug_flag > 0)
2635         {
2636                 snprintf(debugbuf, sizeof(debugbuf), "-d%d", debug_flag);
2637                 av[ac++] = debugbuf;
2638         }
2639
2640         /*
2641          * Pass any backend switches specified with -o in the postmaster's own
2642          * command line.  We assume these are secure.  (It's OK to mangle
2643          * ExtraOptions now, since we're safely inside a subprocess.)
2644          */
2645         split_opts(av, &ac, ExtraOptions);
2646
2647         /* Tell the backend what protocol the frontend is using. */
2648         snprintf(protobuf, sizeof(protobuf), "-v%u", port->proto);
2649         av[ac++] = protobuf;
2650
2651         /*
2652          * Tell the backend it is being called from the postmaster, and which
2653          * database to use.  -p marks the end of secure switches.
2654          */
2655         av[ac++] = "-p";
2656         av[ac++] = port->database_name;
2657
2658         /*
2659          * Pass the (insecure) option switches from the connection request.
2660          * (It's OK to mangle port->cmdline_options now.)
2661          */
2662         if (port->cmdline_options)
2663                 split_opts(av, &ac, port->cmdline_options);
2664
2665         av[ac] = NULL;
2666
2667         Assert(ac < maxac);
2668
2669         /*
2670          * Release postmaster's working memory context so that backend can
2671          * recycle the space.  Note this does not trash *MyProcPort, because
2672          * ConnCreate() allocated that space with malloc() ... else we'd need
2673          * to copy the Port data here.  Also, subsidiary data such as the
2674          * username isn't lost either; see ProcessStartupPacket().
2675          */
2676         MemoryContextSwitchTo(TopMemoryContext);
2677         MemoryContextDelete(PostmasterContext);
2678         PostmasterContext = NULL;
2679
2680         /*
2681          * Debug: print arguments being passed to backend
2682          */
2683         ereport(DEBUG3,
2684                         (errmsg_internal("%s child[%d]: starting with (",
2685                                                          progname, getpid())));
2686         for (i = 0; i < ac; ++i)
2687                 ereport(DEBUG3,
2688                                 (errmsg_internal("\t%s", av[i])));
2689         ereport(DEBUG3,
2690                         (errmsg_internal(")")));
2691
2692         ClientAuthInProgress = false;           /* client_min_messages is active
2693                                                                                  * now */
2694
2695         return (PostgresMain(ac, av, port->user_name));
2696 }
2697
2698
2699 #ifdef EXEC_BACKEND
2700
2701 /*
2702  * postmaster_forkexec -- fork and exec a postmaster subprocess
2703  *
2704  * The caller must have set up the argv array already, except for argv[2]
2705  * which will be filled with the name of the temp variable file.
2706  *
2707  * Returns the child process PID, or -1 on fork failure (a suitable error
2708  * message has been logged on failure).
2709  *
2710  * All uses of this routine will dispatch to SubPostmasterMain in the
2711  * child process.
2712  */
2713 pid_t
2714 postmaster_forkexec(int argc, char *argv[])
2715 {
2716         Port            port;
2717
2718         /* This entry point passes dummy values for the Port variables */
2719         memset(&port, 0, sizeof(port));
2720         return internal_forkexec(argc, argv, &port);
2721 }
2722
2723 /*
2724  * backend_forkexec -- fork/exec off a backend process
2725  *
2726  * returns the pid of the fork/exec'd process, or -1 on failure
2727  */
2728 static pid_t
2729 backend_forkexec(Port *port)
2730 {
2731         char       *av[4];
2732         int                     ac = 0;
2733
2734         av[ac++] = "postgres";
2735         av[ac++] = "-forkbackend";
2736         av[ac++] = NULL;                        /* filled in by internal_forkexec */
2737
2738         av[ac] = NULL;
2739         Assert(ac < lengthof(av));
2740
2741         return internal_forkexec(ac, av, port);
2742 }
2743
2744 static pid_t
2745 internal_forkexec(int argc, char *argv[], Port *port)
2746 {
2747         pid_t           pid;
2748         char            tmpfilename[MAXPGPATH];
2749
2750         if (!write_backend_variables(tmpfilename, port))
2751                 return -1;                              /* log made by write_backend_variables */
2752
2753         /* Make sure caller set up argv properly */
2754         Assert(argc >= 3);
2755         Assert(argv[argc] == NULL);
2756         Assert(strncmp(argv[1], "-fork", 5) == 0);
2757         Assert(argv[2] == NULL);
2758
2759         /* Insert temp file name after -fork argument */
2760         argv[2] = tmpfilename;
2761
2762 #ifdef WIN32
2763         pid = win32_forkexec(postgres_exec_path, argv);
2764 #else
2765         /* Fire off execv in child */
2766         if ((pid = fork()) == 0)
2767         {
2768                 if (execv(postgres_exec_path, argv) < 0)
2769                 {
2770                         ereport(LOG,
2771                                         (errmsg("could not exec backend process \"%s\": %m",
2772                                                         postgres_exec_path)));
2773                         /* We're already in the child process here, can't return */
2774                         exit(1);
2775                 }
2776         }
2777 #endif
2778
2779         return pid;                                     /* Parent returns pid, or -1 on fork failure */
2780 }
2781
2782 /*
2783  * SubPostmasterMain -- Get the fork/exec'd process into a state equivalent
2784  *                      to what it would be if we'd simply forked on Unix, and then
2785  *                      dispatch to the appropriate place.
2786  *
2787  * The first two command line arguments are expected to be "-forkFOO"
2788  * (where FOO indicates which postmaster child we are to become), and
2789  * the name of a variables file that we can read to load data that would
2790  * have been inherited by fork() on Unix.  Remaining arguments go to the
2791  * subprocess FooMain() routine.
2792  */
2793 int
2794 SubPostmasterMain(int argc, char *argv[])
2795 {
2796         Port            port;
2797
2798         /* Do this sooner rather than later... */
2799         IsUnderPostmaster = true;       /* we are a postmaster subprocess now */
2800
2801         MyProcPid = getpid();           /* reset MyProcPid */
2802
2803         /* In EXEC_BACKEND case we will not have inherited these settings */
2804         IsPostmasterEnvironment = true;
2805         whereToSendOutput = None;
2806         pqinitmask();
2807         PG_SETMASK(&BlockSig);
2808
2809         /* Setup essential subsystems */
2810         MemoryContextInit();
2811         InitializeGUCOptions();
2812
2813         /* Check we got appropriate args */
2814         if (argc < 3)
2815                 elog(FATAL, "invalid subpostmaster invocation");
2816
2817         /* Read in file-based context */
2818         memset(&port, 0, sizeof(Port));
2819         read_backend_variables(argv[2], &port);
2820         read_nondefault_variables();
2821
2822         /* Run backend or appropriate child */
2823         if (strcmp(argv[1], "-forkbackend") == 0)
2824         {
2825                 /* BackendRun will close sockets */
2826
2827                 /* Attach process to shared segments */
2828                 CreateSharedMemoryAndSemaphores(false, MaxBackends, 0);
2829
2830                 Assert(argc == 3);              /* shouldn't be any more args */
2831                 proc_exit(BackendRun(&port));
2832         }
2833         if (strcmp(argv[1], "-forkboot") == 0)
2834         {
2835                 /* Close the postmaster's sockets */
2836                 ClosePostmasterPorts(true);
2837
2838                 /* Attach process to shared segments */
2839                 CreateSharedMemoryAndSemaphores(false, MaxBackends, 0);
2840
2841                 BootstrapMain(argc - 2, argv + 2);
2842                 ExitPostmaster(0);
2843         }
2844         if (strcmp(argv[1], "-forkbuf") == 0)
2845         {
2846                 /* Close the postmaster's sockets */
2847                 ClosePostmasterPorts(false);
2848
2849                 /* Do not want to attach to shared memory */
2850
2851                 PgstatBufferMain(argc, argv);
2852                 ExitPostmaster(0);
2853         }
2854         if (strcmp(argv[1], "-forkcol") == 0)
2855         {
2856                 /*
2857                  * Do NOT close postmaster sockets here, because we are forking from
2858                  * pgstat buffer process, which already did it.
2859                  */
2860
2861                 /* Do not want to attach to shared memory */
2862
2863                 PgstatCollectorMain(argc, argv);
2864                 ExitPostmaster(0);
2865         }
2866
2867         return 1;                                       /* shouldn't get here */
2868 }
2869
2870 #endif /* EXEC_BACKEND */
2871
2872
2873 /*
2874  * ExitPostmaster -- cleanup
2875  *
2876  * Do NOT call exit() directly --- always go through here!
2877  */
2878 static void
2879 ExitPostmaster(int status)
2880 {
2881         /* should cleanup shared memory and kill all backends */
2882
2883         /*
2884          * Not sure of the semantics here.      When the Postmaster dies, should
2885          * the backends all be killed? probably not.
2886          *
2887          * MUST         -- vadim 05-10-1999
2888          */
2889         /* Should I use true instead? */
2890         ClosePostmasterPorts(false);
2891
2892         proc_exit(status);
2893 }
2894
2895 /*
2896  * sigusr1_handler - handle signal conditions from child processes
2897  */
2898 static void
2899 sigusr1_handler(SIGNAL_ARGS)
2900 {
2901         int                     save_errno = errno;
2902
2903         PG_SETMASK(&BlockSig);
2904
2905         if (CheckPostmasterSignal(PMSIGNAL_DO_CHECKPOINT))
2906         {
2907                 if (CheckPointWarning != 0)
2908                 {
2909                         /*
2910                          * This only times checkpoints forced by running out of
2911                          * segment files.  Other checkpoints could reduce the
2912                          * frequency of forced checkpoints.
2913                          */
2914                         time_t          now = time(NULL);
2915
2916                         if (LastSignalledCheckpoint != 0)
2917                         {
2918                                 int                     elapsed_secs = now - LastSignalledCheckpoint;
2919
2920                                 if (elapsed_secs < CheckPointWarning)
2921                                         ereport(LOG,
2922                                                         (errmsg("checkpoints are occurring too frequently (%d seconds apart)",
2923                                                                         elapsed_secs),
2924                                                          errhint("Consider increasing the configuration parameter \"checkpoint_segments\".")));
2925                         }
2926                         LastSignalledCheckpoint = now;
2927                 }
2928
2929                 /*
2930                  * Request to schedule a checkpoint
2931                  *
2932                  * Ignore request if checkpoint is already running or checkpointing
2933                  * is currently disabled
2934                  */
2935                 if (CheckPointPID == 0 && checkpointed &&
2936                         StartupPID == 0 && Shutdown == NoShutdown &&
2937                         !FatalError && random_seed != 0)
2938                 {
2939                         CheckPointPID = CheckPointDataBase();
2940                         /* note: if fork fails, CheckPointPID stays 0; nothing happens */
2941                 }
2942         }
2943
2944         if (CheckPostmasterSignal(PMSIGNAL_PASSWORD_CHANGE))
2945         {
2946                 /*
2947                  * Password or group file has changed.
2948                  */
2949                 load_user();
2950                 load_group();
2951         }
2952
2953         if (CheckPostmasterSignal(PMSIGNAL_WAKEN_CHILDREN))
2954         {
2955                 /*
2956                  * Send SIGUSR1 to all children (triggers
2957                  * CatchupInterruptHandler). See storage/ipc/sinval[adt].c for the
2958                  * use of this.
2959                  */
2960                 if (Shutdown == NoShutdown)
2961                         SignalChildren(SIGUSR1);
2962         }
2963
2964         PG_SETMASK(&UnBlockSig);
2965
2966         errno = save_errno;
2967 }
2968
2969
2970 /*
2971  * Dummy signal handler
2972  *
2973  * We use this for signals that we don't actually use in the postmaster,
2974  * but we do use in backends.  If we SIG_IGN such signals in the postmaster,
2975  * then a newly started backend might drop a signal that arrives before it's
2976  * able to reconfigure its signal processing.  (See notes in postgres.c.)
2977  */
2978 static void
2979 dummy_handler(SIGNAL_ARGS)
2980 {
2981 }
2982
2983
2984 /*
2985  * CharRemap: given an int in range 0..61, produce textual encoding of it
2986  * per crypt(3) conventions.
2987  */
2988 static char
2989 CharRemap(long ch)
2990 {
2991         if (ch < 0)
2992                 ch = -ch;
2993         ch = ch % 62;
2994
2995         if (ch < 26)
2996                 return 'A' + ch;
2997
2998         ch -= 26;
2999         if (ch < 26)
3000                 return 'a' + ch;
3001
3002         ch -= 26;
3003         return '0' + ch;
3004 }
3005
3006 /*
3007  * RandomSalt
3008  */
3009 static void
3010 RandomSalt(char *cryptSalt, char *md5Salt)
3011 {
3012         long            rand = PostmasterRandom();
3013
3014         cryptSalt[0] = CharRemap(rand % 62);
3015         cryptSalt[1] = CharRemap(rand / 62);
3016
3017         /*
3018          * It's okay to reuse the first random value for one of the MD5 salt
3019          * bytes, since only one of the two salts will be sent to the client.
3020          * After that we need to compute more random bits.
3021          *
3022          * We use % 255, sacrificing one possible byte value, so as to ensure
3023          * that all bits of the random() value participate in the result.
3024          * While at it, add one to avoid generating any null bytes.
3025          */
3026         md5Salt[0] = (rand % 255) + 1;
3027         rand = PostmasterRandom();
3028         md5Salt[1] = (rand % 255) + 1;
3029         rand = PostmasterRandom();
3030         md5Salt[2] = (rand % 255) + 1;
3031         rand = PostmasterRandom();
3032         md5Salt[3] = (rand % 255) + 1;
3033 }
3034
3035 /*
3036  * PostmasterRandom
3037  */
3038 static long
3039 PostmasterRandom(void)
3040 {
3041         static bool initialized = false;
3042
3043         if (!initialized)
3044         {
3045                 Assert(random_seed != 0);
3046                 srandom(random_seed);
3047                 initialized = true;
3048         }
3049
3050         return random();
3051 }
3052
3053 /*
3054  * Count up number of child processes.
3055  */
3056 static int
3057 CountChildren(void)
3058 {
3059         Dlelem     *curr;
3060         Backend    *bp;
3061         int                     cnt = 0;
3062
3063         for (curr = DLGetHead(BackendList); curr; curr = DLGetSucc(curr))
3064         {
3065                 bp = (Backend *) DLE_VAL(curr);
3066                 if (bp->pid != MyProcPid)
3067                         cnt++;
3068         }
3069         /* Checkpoint and bgwriter will be in the list, discount them */
3070         if (CheckPointPID != 0)
3071                 cnt--;
3072         if (BgWriterPID != 0)
3073                 cnt--;
3074         return cnt;
3075 }
3076
3077
3078 /*
3079  * SSDataBase -- start a non-backend child process for the postmaster
3080  *
3081  * xlog determines what kind of child will be started.  All child types
3082  * initially go to BootstrapMain, which will handle common setup.
3083  *
3084  * Return value of SSDataBase is subprocess' PID, or 0 if failed to start
3085  * subprocess (0 is returned only for checkpoint/bgwriter cases).
3086  */
3087 static pid_t
3088 SSDataBase(int xlop)
3089 {
3090         Backend    *bn;
3091         pid_t           pid;
3092         char       *av[10];
3093         int                     ac = 0;
3094         char            xlbuf[32];
3095 #ifdef LINUX_PROFILE
3096         struct itimerval prof_itimer;
3097 #endif
3098
3099         /*
3100          * Set up command-line arguments for subprocess
3101          */
3102         av[ac++] = "postgres";
3103
3104 #ifdef EXEC_BACKEND
3105         av[ac++] = "-forkboot";
3106         av[ac++] = NULL;                        /* filled in by postmaster_forkexec */
3107 #endif
3108
3109         snprintf(xlbuf, sizeof(xlbuf), "-x%d", xlop);
3110         av[ac++] = xlbuf;
3111
3112         av[ac++] = "-p";
3113         av[ac++] = "template1";
3114
3115         av[ac] = NULL;
3116         Assert(ac < lengthof(av));
3117
3118         /*
3119          * Flush stdio channels (see comments in BackendStartup)
3120          */
3121         fflush(stdout);
3122         fflush(stderr);
3123
3124 #ifdef EXEC_BACKEND
3125
3126         pid = postmaster_forkexec(ac, av);
3127
3128 #else /* !EXEC_BACKEND */
3129
3130 #ifdef LINUX_PROFILE
3131         /* see comments in BackendStartup */
3132         getitimer(ITIMER_PROF, &prof_itimer);
3133 #endif
3134
3135 #ifdef __BEOS__
3136         /* Specific beos actions before backend startup */
3137         beos_before_backend_startup();
3138 #endif
3139
3140         pid = fork();
3141
3142         if (pid == 0)                           /* child */
3143         {
3144 #ifdef LINUX_PROFILE
3145                 setitimer(ITIMER_PROF, &prof_itimer, NULL);
3146 #endif
3147
3148 #ifdef __BEOS__
3149                 /* Specific beos actions after backend startup */
3150                 beos_backend_startup();
3151 #endif
3152
3153                 IsUnderPostmaster = true;       /* we are a postmaster subprocess now */
3154
3155                 /* Close the postmaster's sockets */
3156                 ClosePostmasterPorts(true);
3157
3158                 /* Lose the postmaster's on-exit routines and port connections */
3159                 on_exit_reset();
3160
3161                 BootstrapMain(ac, av);
3162                 ExitPostmaster(0);
3163         }
3164
3165 #endif /* EXEC_BACKEND */
3166
3167         if (pid < 0)
3168         {
3169                 /* in parent, fork failed */
3170                 int                     save_errno = errno;
3171
3172 #ifdef __BEOS__
3173                 /* Specific beos actions before backend startup */
3174                 beos_backend_startup_failed();
3175 #endif
3176                 errno = save_errno;
3177                 switch (xlop)
3178                 {
3179                         case BS_XLOG_STARTUP:
3180                                 ereport(LOG,
3181                                                 (errmsg("could not fork startup process: %m")));
3182                                 break;
3183                         case BS_XLOG_CHECKPOINT:
3184                                 ereport(LOG,
3185                                           (errmsg("could not fork checkpoint process: %m")));
3186                                 break;
3187                         case BS_XLOG_BGWRITER:
3188                                 ereport(LOG,
3189                                                 (errmsg("could not fork bgwriter process: %m")));
3190                                 break;
3191                         case BS_XLOG_SHUTDOWN:
3192                                 ereport(LOG,
3193                                                 (errmsg("could not fork shutdown process: %m")));
3194                                 break;
3195                         default:
3196                                 ereport(LOG,
3197                                                 (errmsg("could not fork process: %m")));
3198                                 break;
3199                 }
3200
3201                 /*
3202                  * fork failure is fatal during startup/shutdown, but there's no
3203                  * need to choke if a routine checkpoint or starting a background
3204                  * writer fails.
3205                  */
3206                 if (xlop == BS_XLOG_CHECKPOINT)
3207                         return 0;
3208                 if (xlop == BS_XLOG_BGWRITER)
3209                         return 0;
3210                 ExitPostmaster(1);
3211         }
3212
3213         /*
3214          * in parent, successful fork
3215          *
3216          * The startup and shutdown processes are not considered normal
3217          * backends, but the checkpoint and bgwriter processes are. They must
3218          * be added to the list of backends.
3219          */
3220         if (xlop == BS_XLOG_CHECKPOINT || xlop == BS_XLOG_BGWRITER)
3221         {
3222                 if (!(bn = (Backend *) malloc(sizeof(Backend))))
3223                 {
3224                         ereport(LOG,
3225                                         (errcode(ERRCODE_OUT_OF_MEMORY),
3226                                          errmsg("out of memory")));
3227                         ExitPostmaster(1);
3228                 }
3229
3230                 bn->pid = pid;
3231                 bn->cancel_key = PostmasterRandom();
3232 #ifdef EXEC_BACKEND
3233                 ShmemBackendArrayAdd(bn);
3234 #endif
3235                 DLAddHead(BackendList, DLNewElem(bn));
3236
3237                 /*
3238                  * Since this code is executed periodically, it's a fine place to
3239                  * do other actions that should happen every now and then on no
3240                  * particular schedule.  Such as...
3241                  */
3242                 TouchSocketFile();
3243                 TouchSocketLockFile();
3244         }
3245
3246         return pid;
3247 }
3248
3249
3250 /*
3251  * Create the opts file
3252  */
3253 static bool
3254 CreateOptsFile(int argc, char *argv[], char *fullprogname)
3255 {
3256         char            filename[MAXPGPATH];
3257         FILE       *fp;
3258         int                     i;
3259
3260         snprintf(filename, sizeof(filename), "%s/postmaster.opts", DataDir);
3261
3262         if ((fp = fopen(filename, "w")) == NULL)
3263         {
3264                 elog(LOG, "could not create file \"%s\": %m", filename);
3265                 return false;
3266         }
3267
3268         fprintf(fp, "%s", fullprogname);
3269         for (i = 1; i < argc; i++)
3270                 fprintf(fp, " '%s'", argv[i]);
3271         fputs("\n", fp);
3272
3273         if (fclose(fp))
3274         {
3275                 elog(LOG, "could not write file \"%s\": %m", filename);
3276                 return false;
3277         }
3278
3279         return true;
3280 }
3281
3282 /*
3283  * This should be used only for reporting "interactive" errors (essentially,
3284  * bogus arguments on the command line).  Once the postmaster is launched,
3285  * use ereport.  In particular, don't use this for anything that occurs
3286  * after pmdaemonize.
3287  */
3288 static void
3289 postmaster_error(const char *fmt,...)
3290 {
3291         va_list         ap;
3292
3293         fprintf(stderr, "%s: ", progname);
3294         va_start(ap, fmt);
3295         vfprintf(stderr, gettext(fmt), ap);
3296         va_end(ap);
3297         fprintf(stderr, "\n");
3298 }
3299
3300
3301 #ifdef EXEC_BACKEND
3302
3303 /*
3304  * The following need to be available to the read/write_backend_variables
3305  * functions
3306  */
3307 #include "storage/spin.h"
3308
3309 extern slock_t *ShmemLock;
3310 extern slock_t *ShmemIndexLock;
3311 extern void *ShmemIndexAlloc;
3312 typedef struct LWLock LWLock;
3313 extern LWLock *LWLockArray;
3314 extern slock_t *ProcStructLock;
3315 extern int      pgStatSock;
3316
3317 #define write_var(var,fp) fwrite((void*)&(var),sizeof(var),1,fp)
3318 #define read_var(var,fp)  fread((void*)&(var),sizeof(var),1,fp)
3319
3320 static bool
3321 write_backend_variables(char *filename, Port *port)
3322 {
3323         static unsigned long tmpBackendFileNum = 0;
3324         FILE       *fp;
3325         char            str_buf[MAXPGPATH];
3326
3327         /* Calculate name for temp file in caller's buffer */
3328         Assert(DataDir);
3329         snprintf(filename, MAXPGPATH, "%s/%s/%s.backend_var.%lu",
3330                          DataDir, PG_TEMP_FILES_DIR, PG_TEMP_FILE_PREFIX,
3331                          ++tmpBackendFileNum);
3332
3333         /* Open file */
3334         fp = AllocateFile(filename, PG_BINARY_W);
3335         if (!fp)
3336         {
3337                 /* As per OpenTemporaryFile... */
3338                 char            dirname[MAXPGPATH];
3339
3340                 snprintf(dirname, MAXPGPATH, "%s/%s", DataDir, PG_TEMP_FILES_DIR);
3341                 mkdir(dirname, S_IRWXU);
3342
3343                 fp = AllocateFile(filename, PG_BINARY_W);
3344                 if (!fp)
3345                 {
3346                         ereport(LOG,
3347                                         (errcode_for_file_access(),
3348                                          errmsg("could not create file \"%s\": %m",
3349                                                         filename)));
3350                         return false;
3351                 }
3352         }
3353
3354         /* Write vars */
3355         write_var(port->sock, fp);
3356         write_var(port->proto, fp);
3357         write_var(port->laddr, fp);
3358         write_var(port->raddr, fp);
3359         write_var(port->canAcceptConnections, fp);
3360         write_var(port->cryptSalt, fp);
3361         write_var(port->md5Salt, fp);
3362
3363         /*
3364          * XXX FIXME later: writing these strings as MAXPGPATH bytes always is
3365          * probably a waste of resources
3366          */
3367
3368         StrNCpy(str_buf, DataDir, MAXPGPATH);
3369         fwrite((void *) str_buf, MAXPGPATH, 1, fp);
3370
3371         write_var(MyCancelKey, fp);
3372
3373         write_var(UsedShmemSegID, fp);
3374         write_var(UsedShmemSegAddr, fp);
3375
3376         write_var(ShmemLock, fp);
3377         write_var(ShmemIndexLock, fp);
3378         write_var(ShmemVariableCache, fp);
3379         write_var(ShmemIndexAlloc, fp);
3380         write_var(ShmemBackendArray, fp);
3381
3382         write_var(LWLockArray, fp);
3383         write_var(ProcStructLock, fp);
3384         write_var(pgStatSock, fp);
3385
3386         write_var(debug_flag, fp);
3387         write_var(PostmasterPid, fp);
3388
3389         fwrite((void *) my_exec_path, MAXPGPATH, 1, fp);
3390
3391         fwrite((void *) ExtraOptions, sizeof(ExtraOptions), 1, fp);
3392
3393         StrNCpy(str_buf, setlocale(LC_COLLATE, NULL), MAXPGPATH);
3394         fwrite((void *) str_buf, MAXPGPATH, 1, fp);
3395         StrNCpy(str_buf, setlocale(LC_CTYPE, NULL), MAXPGPATH);
3396         fwrite((void *) str_buf, MAXPGPATH, 1, fp);
3397
3398         /* Release file */
3399         if (FreeFile(fp))
3400         {
3401                 ereport(ERROR,
3402                                 (errcode_for_file_access(),
3403                                  errmsg("could not write to file \"%s\": %m", filename)));
3404                 return false;
3405         }
3406
3407         return true;
3408 }
3409
3410 static void
3411 read_backend_variables(char *filename, Port *port)
3412 {
3413         FILE       *fp;
3414         char            str_buf[MAXPGPATH];
3415
3416         /* Open file */
3417         fp = AllocateFile(filename, PG_BINARY_R);
3418         if (!fp)
3419                 ereport(FATAL,
3420                                 (errcode_for_file_access(),
3421                                  errmsg("could not read from backend variables file \"%s\": %m",
3422                                                 filename)));
3423
3424         /* Read vars */
3425         read_var(port->sock, fp);
3426         read_var(port->proto, fp);
3427         read_var(port->laddr, fp);
3428         read_var(port->raddr, fp);
3429         read_var(port->canAcceptConnections, fp);
3430         read_var(port->cryptSalt, fp);
3431         read_var(port->md5Salt, fp);
3432
3433         fread((void *) str_buf, MAXPGPATH, 1, fp);
3434         SetDataDir(str_buf);
3435
3436         read_var(MyCancelKey, fp);
3437
3438         read_var(UsedShmemSegID, fp);
3439         read_var(UsedShmemSegAddr, fp);
3440
3441         read_var(ShmemLock, fp);
3442         read_var(ShmemIndexLock, fp);
3443         read_var(ShmemVariableCache, fp);
3444         read_var(ShmemIndexAlloc, fp);
3445         read_var(ShmemBackendArray, fp);
3446
3447         read_var(LWLockArray, fp);
3448         read_var(ProcStructLock, fp);
3449         read_var(pgStatSock, fp);
3450
3451         read_var(debug_flag, fp);
3452         read_var(PostmasterPid, fp);
3453
3454         fread((void *) my_exec_path, MAXPGPATH, 1, fp);
3455
3456         fread((void *) ExtraOptions, sizeof(ExtraOptions), 1, fp);
3457
3458         fread((void *) str_buf, MAXPGPATH, 1, fp);
3459         setlocale(LC_COLLATE, str_buf);
3460         fread((void *) str_buf, MAXPGPATH, 1, fp);
3461         setlocale(LC_CTYPE, str_buf);
3462
3463         /* Release file */
3464         FreeFile(fp);
3465         if (unlink(filename) != 0)
3466                 ereport(WARNING,
3467                                 (errcode_for_file_access(),
3468                                  errmsg("could not remove file \"%s\": %m", filename)));
3469 }
3470
3471
3472 size_t
3473 ShmemBackendArraySize(void)
3474 {
3475         return (NUM_BACKENDARRAY_ELEMS * sizeof(Backend));
3476 }
3477
3478 void
3479 ShmemBackendArrayAllocation(void)
3480 {
3481         size_t          size = ShmemBackendArraySize();
3482
3483         ShmemBackendArray = (Backend *) ShmemAlloc(size);
3484         memset(ShmemBackendArray, 0, size);
3485 }
3486
3487 static void
3488 ShmemBackendArrayAdd(Backend *bn)
3489 {
3490         int                     i;
3491
3492         for (i = 0; i < NUM_BACKENDARRAY_ELEMS; i++)
3493         {
3494                 /* Find an empty slot */
3495                 if (ShmemBackendArray[i].pid == 0)
3496                 {
3497                         ShmemBackendArray[i] = *bn;
3498                         return;
3499                 }
3500         }
3501
3502         ereport(FATAL,
3503                         (errmsg_internal("unable to add backend entry")));
3504 }
3505
3506 static void
3507 ShmemBackendArrayRemove(pid_t pid)
3508 {
3509         int                     i;
3510
3511         for (i = 0; i < NUM_BACKENDARRAY_ELEMS; i++)
3512         {
3513                 if (ShmemBackendArray[i].pid == pid)
3514                 {
3515                         /* Mark the slot as empty */
3516                         ShmemBackendArray[i].pid = 0;
3517                         return;
3518                 }
3519         }
3520
3521         ereport(WARNING,
3522                         (errmsg_internal("unable to find backend entry with pid %d",
3523                                                          (int) pid)));
3524 }
3525
3526 #endif /* EXEC_BACKEND */
3527
3528
3529 #ifdef WIN32
3530
3531 static pid_t
3532 win32_forkexec(const char *path, char *argv[])
3533 {
3534         STARTUPINFO si;
3535         PROCESS_INFORMATION pi;
3536         int                     i;
3537         int                     j;
3538         char            cmdLine[MAXPGPATH * 2];
3539         HANDLE          childHandleCopy;
3540         HANDLE          waiterThread;
3541
3542         /* Format the cmd line */
3543         cmdLine[sizeof(cmdLine)-1] = '\0';
3544         cmdLine[sizeof(cmdLine)-2] = '\0';
3545         snprintf(cmdLine, sizeof(cmdLine)-1, "\"%s\"", path);
3546         i = 0;
3547         while (argv[++i] != NULL)
3548         {
3549                 j = strlen(cmdLine);
3550                 snprintf(cmdLine+j, sizeof(cmdLine)-1-j, " \"%s\"", argv[i]);
3551         }
3552         if (cmdLine[sizeof(cmdLine)-2] != '\0')
3553         {
3554                 elog(LOG, "subprocess command line too long");
3555                 return -1;
3556         }
3557
3558         memset(&pi, 0, sizeof(pi));
3559         memset(&si, 0, sizeof(si));
3560         si.cb = sizeof(si);
3561         if (!CreateProcess(NULL, cmdLine, NULL, NULL, TRUE, 0, NULL, NULL, &si, &pi))
3562         {
3563                 elog(LOG, "CreateProcess call failed (%d): %m", (int) GetLastError());
3564                 return -1;
3565         }
3566
3567         if (!IsUnderPostmaster)
3568         {
3569                 /* We are the Postmaster creating a child... */
3570                 win32_AddChild(pi.dwProcessId, pi.hProcess);
3571         }
3572
3573         if (!DuplicateHandle(GetCurrentProcess(),
3574                                                  pi.hProcess,
3575                                                  GetCurrentProcess(),
3576                                                  &childHandleCopy,
3577                                                  0,
3578                                                  FALSE,
3579                                                  DUPLICATE_SAME_ACCESS))
3580                 ereport(FATAL,
3581                                 (errmsg_internal("failed to duplicate child handle: %d",
3582                                                                  (int) GetLastError())));
3583
3584         waiterThread = CreateThread(NULL, 64 * 1024, win32_sigchld_waiter,
3585                                                                 (LPVOID) childHandleCopy, 0, NULL);
3586         if (!waiterThread)
3587                 ereport(FATAL,
3588                                 (errmsg_internal("failed to create sigchld waiter thread: %d",
3589                                                                  (int) GetLastError())));
3590         CloseHandle(waiterThread);
3591
3592         if (IsUnderPostmaster)
3593                 CloseHandle(pi.hProcess);
3594         CloseHandle(pi.hThread);
3595
3596         return pi.dwProcessId;
3597 }
3598
3599 /*
3600  * Note: The following three functions must not be interrupted (eg. by signals).
3601  *      As the Postgres Win32 signalling architecture (currently) requires polling,
3602  *      or APC checking functions which aren't used here, this is not an issue.
3603  *
3604  *      We keep two separate arrays, instead of a single array of pid/HANDLE structs,
3605  *      to avoid having to re-create a handle array for WaitForMultipleObjects on
3606  *      each call to win32_waitpid.
3607  */
3608
3609 static void
3610 win32_AddChild(pid_t pid, HANDLE handle)
3611 {
3612         Assert(win32_childPIDArray && win32_childHNDArray);
3613         if (win32_numChildren < NUM_BACKENDARRAY_ELEMS)
3614         {
3615                 win32_childPIDArray[win32_numChildren] = pid;
3616                 win32_childHNDArray[win32_numChildren] = handle;
3617                 ++win32_numChildren;
3618         }
3619         else
3620                 ereport(FATAL,
3621                                 (errmsg_internal("unable to add child entry with pid %lu",
3622                                                                  (unsigned long) pid)));
3623 }
3624
3625 static void
3626 win32_RemoveChild(pid_t pid)
3627 {
3628         int                     i;
3629
3630         Assert(win32_childPIDArray && win32_childHNDArray);
3631
3632         for (i = 0; i < win32_numChildren; i++)
3633         {
3634                 if (win32_childPIDArray[i] == pid)
3635                 {
3636                         CloseHandle(win32_childHNDArray[i]);
3637
3638                         /* Swap last entry into the "removed" one */
3639                         --win32_numChildren;
3640                         win32_childPIDArray[i] = win32_childPIDArray[win32_numChildren];
3641                         win32_childHNDArray[i] = win32_childHNDArray[win32_numChildren];
3642                         return;
3643                 }
3644         }
3645
3646         ereport(WARNING,
3647                         (errmsg_internal("unable to find child entry with pid %lu",
3648                                                          (unsigned long) pid)));
3649 }
3650
3651 static pid_t
3652 win32_waitpid(int *exitstatus)
3653 {
3654         Assert(win32_childPIDArray && win32_childHNDArray);
3655         elog(DEBUG3, "waiting on %lu children", win32_numChildren);
3656
3657         if (win32_numChildren > 0)
3658         {
3659                 /*
3660                  * Note: Do NOT use WaitForMultipleObjectsEx, as we don't want to
3661                  * run queued APCs here.
3662                  */
3663                 int                     index;
3664                 DWORD           exitCode;
3665                 DWORD           ret = WaitForMultipleObjects(win32_numChildren, win32_childHNDArray, FALSE, 0);
3666
3667                 switch (ret)
3668                 {
3669                         case WAIT_FAILED:
3670                                 ereport(ERROR,
3671                                    (errmsg_internal("failed to wait on %lu children: %i",
3672                                                           win32_numChildren, (int) GetLastError())));
3673                                 /* Fall through to WAIT_TIMEOUTs return */
3674
3675                         case WAIT_TIMEOUT:
3676                                 /* No children have finished */
3677                                 return -1;
3678
3679                         default:
3680
3681                                 /*
3682                                  * Get the exit code, and return the PID of, the
3683                                  * respective process
3684                                  */
3685                                 index = ret - WAIT_OBJECT_0;
3686                                 Assert(index >= 0 && index < win32_numChildren);
3687                                 if (!GetExitCodeProcess(win32_childHNDArray[index], &exitCode))
3688
3689                                         /*
3690                                          * If we get this far, this should never happen, but,
3691                                          * then again... No choice other than to assume a
3692                                          * catastrophic failure.
3693                                          */
3694                                         ereport(FATAL,
3695                                                         (errmsg_internal("failed to get exit code for child %lu",
3696                                                                                    win32_childPIDArray[index])));
3697                                 *exitstatus = (int) exitCode;
3698                                 return win32_childPIDArray[index];
3699                 }
3700         }
3701
3702         /* No children */
3703         return -1;
3704 }
3705
3706 /* Note! Code belows executes on separate threads, one for
3707    each child process created */
3708 static DWORD WINAPI
3709 win32_sigchld_waiter(LPVOID param)
3710 {
3711         HANDLE          procHandle = (HANDLE) param;
3712
3713         DWORD           r = WaitForSingleObject(procHandle, INFINITE);
3714
3715         if (r == WAIT_OBJECT_0)
3716                 pg_queue_signal(SIGCHLD);
3717         else
3718                 fprintf(stderr, "ERROR: Failed to wait on child process handle: %i\n",
3719                                 (int) GetLastError());
3720         CloseHandle(procHandle);
3721         return 0;
3722 }
3723
3724 #endif /* WIN32 */