]> granicus.if.org Git - postgresql/blob - src/backend/postmaster/postmaster.c
Add 'int' cast for getpid() because some Solaris releases return long
[postgresql] / src / backend / postmaster / postmaster.c
1 /*-------------------------------------------------------------------------
2  *
3  * postmaster.c
4  *        This program acts as a clearing house for requests to the
5  *        POSTGRES system.      Frontend programs send a startup message
6  *        to the Postmaster and the postmaster uses the info in the
7  *        message to setup a backend process.
8  *
9  *        The postmaster also manages system-wide operations such as
10  *        startup and shutdown. The postmaster itself doesn't do those
11  *        operations, mind you --- it just forks off a subprocess to do them
12  *        at the right times.  It also takes care of resetting the system
13  *        if a backend crashes.
14  *
15  *        The postmaster process creates the shared memory and semaphore
16  *        pools during startup, but as a rule does not touch them itself.
17  *        In particular, it is not a member of the PGPROC array of backends
18  *        and so it cannot participate in lock-manager operations.      Keeping
19  *        the postmaster away from shared memory operations makes it simpler
20  *        and more reliable.  The postmaster is almost always able to recover
21  *        from crashes of individual backends by resetting shared memory;
22  *        if it did much with shared memory then it would be prone to crashing
23  *        along with the backends.
24  *
25  *        When a request message is received, we now fork() immediately.
26  *        The child process performs authentication of the request, and
27  *        then becomes a backend if successful.  This allows the auth code
28  *        to be written in a simple single-threaded style (as opposed to the
29  *        crufty "poor man's multitasking" code that used to be needed).
30  *        More importantly, it ensures that blockages in non-multithreaded
31  *        libraries like SSL or PAM cannot cause denial of service to other
32  *        clients.
33  *
34  *
35  * Portions Copyright (c) 1996-2004, PostgreSQL Global Development Group
36  * Portions Copyright (c) 1994, Regents of the University of California
37  *
38  *
39  * IDENTIFICATION
40  *        $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.433 2004/10/14 20:23:45 momjian Exp $
41  *
42  * NOTES
43  *
44  * Initialization:
45  *              The Postmaster sets up shared memory data structures
46  *              for the backends.
47  *
48  * Synchronization:
49  *              The Postmaster shares memory with the backends but should avoid
50  *              touching shared memory, so as not to become stuck if a crashing
51  *              backend screws up locks or shared memory.  Likewise, the Postmaster
52  *              should never block on messages from frontend clients.
53  *
54  * Garbage Collection:
55  *              The Postmaster cleans up after backends if they have an emergency
56  *              exit and/or core dump.
57  *
58  * Error Reporting:
59  *              Use write_stderr() only for reporting "interactive" errors
60  *              (essentially, bogus arguments on the command line).  Once the
61  *              postmaster is launched, use ereport().  In particular, don't use
62  *              write_stderr() for anything that occurs after pmdaemonize.
63  *
64  *-------------------------------------------------------------------------
65  */
66
67 #include "postgres.h"
68
69 #include <unistd.h>
70 #include <signal.h>
71 #include <time.h>
72 #include <sys/wait.h>
73 #include <ctype.h>
74 #include <sys/stat.h>
75 #include <sys/socket.h>
76 #include <fcntl.h>
77 #include <sys/param.h>
78 #include <netinet/in.h>
79 #include <arpa/inet.h>
80 #include <netdb.h>
81 #include <limits.h>
82
83 #ifdef HAVE_SYS_SELECT_H
84 #include <sys/select.h>
85 #endif
86
87 #ifdef HAVE_GETOPT_H
88 #include <getopt.h>
89 #endif
90
91 #ifdef USE_RENDEZVOUS
92 #include <DNSServiceDiscovery/DNSServiceDiscovery.h>
93 #endif
94
95 #include "catalog/pg_database.h"
96 #include "commands/async.h"
97 #include "lib/dllist.h"
98 #include "libpq/auth.h"
99 #include "libpq/crypt.h"
100 #include "libpq/libpq.h"
101 #include "libpq/pqcomm.h"
102 #include "libpq/pqsignal.h"
103 #include "miscadmin.h"
104 #include "nodes/nodes.h"
105 #include "postmaster/postmaster.h"
106 #include "postmaster/pgarch.h"
107 #include "postmaster/syslogger.h"
108 #include "storage/fd.h"
109 #include "storage/ipc.h"
110 #include "storage/pg_shmem.h"
111 #include "storage/pmsignal.h"
112 #include "storage/proc.h"
113 #include "storage/bufmgr.h"
114 #include "access/xlog.h"
115 #include "tcop/tcopprot.h"
116 #include "utils/builtins.h"
117 #include "utils/guc.h"
118 #include "utils/memutils.h"
119 #include "utils/ps_status.h"
120 #include "bootstrap/bootstrap.h"
121 #include "pgstat.h"
122
123
124 /*
125  * List of active backends (or child processes anyway; we don't actually
126  * know whether a given child has become a backend or is still in the
127  * authorization phase).  This is used mainly to keep track of how many
128  * children we have and send them appropriate signals when necessary.
129  *
130  * "Special" children such as the startup and bgwriter tasks are not in
131  * this list.
132  */
133 typedef struct bkend
134 {
135         pid_t           pid;                    /* process id of backend */
136         long            cancel_key;             /* cancel key for cancels for this backend */
137 } Backend;
138
139 static Dllist *BackendList;
140
141 #ifdef EXEC_BACKEND
142 #define NUM_BACKENDARRAY_ELEMS (2*MaxBackends)
143 static Backend *ShmemBackendArray;
144 #endif
145
146 /* The socket number we are listening for connections on */
147 int                     PostPortNumber;
148 char       *UnixSocketDir;
149 char       *ListenAddresses;
150
151 /*
152  * ReservedBackends is the number of backends reserved for superuser use.
153  * This number is taken out of the pool size given by MaxBackends so
154  * number of backend slots available to non-superusers is
155  * (MaxBackends - ReservedBackends).  Note what this really means is
156  * "if there are <= ReservedBackends connections available, only superusers
157  * can make new connections" --- pre-existing superuser connections don't
158  * count against the limit.
159  */
160 int                     ReservedBackends;
161
162
163 static const char *progname = NULL;
164
165 /* The socket(s) we're listening to. */
166 #define MAXLISTEN       10
167 static int      ListenSocket[MAXLISTEN];
168
169 /*
170  * Set by the -o option
171  */
172 static char ExtraOptions[MAXPGPATH];
173
174 /*
175  * These globals control the behavior of the postmaster in case some
176  * backend dumps core.  Normally, it kills all peers of the dead backend
177  * and reinitializes shared memory.  By specifying -s or -n, we can have
178  * the postmaster stop (rather than kill) peers and not reinitialize
179  * shared data structures.
180  */
181 static bool Reinit = true;
182 static int      SendStop = false;
183
184 /* still more option variables */
185 bool            EnableSSL = false;
186 bool            SilentMode = false; /* silent mode (-S) */
187
188 int                     PreAuthDelay = 0;
189 int                     AuthenticationTimeout = 60;
190
191 bool            log_hostname;           /* for ps display and logging */
192 bool            Log_connections = false;
193 bool            Db_user_namespace = false;
194
195 char       *rendezvous_name;
196
197 /* list of library:init-function to be preloaded */
198 char       *preload_libraries_string = NULL;
199
200 /* PIDs of special child processes; 0 when not running */
201 static pid_t StartupPID = 0,
202                         BgWriterPID = 0,
203                         PgArchPID = 0,
204                         PgStatPID = 0,
205                         SysLoggerPID = 0;
206
207 /* Startup/shutdown state */
208 #define                 NoShutdown              0
209 #define                 SmartShutdown   1
210 #define                 FastShutdown    2
211
212 static int      Shutdown = NoShutdown;
213
214 static bool FatalError = false; /* T if recovering from backend crash */
215
216 bool            ClientAuthInProgress = false;           /* T during new-client
217                                                                                                  * authentication */
218
219 /*
220  * State for assigning random salts and cancel keys.
221  * Also, the global MyCancelKey passes the cancel key assigned to a given
222  * backend from the postmaster to that backend (via fork).
223  */
224 static unsigned int random_seed = 0;
225
226 static int      debug_flag = 0;
227
228 extern char *optarg;
229 extern int      optind,
230                         opterr;
231
232 #ifdef HAVE_INT_OPTRESET
233 extern int      optreset;
234 #endif
235
236 /*
237  * postmaster.c - function prototypes
238  */
239 static void checkDataDir(void);
240
241 #ifdef USE_RENDEZVOUS
242 static void reg_reply(DNSServiceRegistrationReplyErrorType errorCode,
243                   void *context);
244 #endif
245 static void pmdaemonize(void);
246 static Port *ConnCreate(int serverFd);
247 static void ConnFree(Port *port);
248 static void reset_shared(unsigned short port);
249 static void SIGHUP_handler(SIGNAL_ARGS);
250 static void pmdie(SIGNAL_ARGS);
251 static void reaper(SIGNAL_ARGS);
252 static void sigusr1_handler(SIGNAL_ARGS);
253 static void dummy_handler(SIGNAL_ARGS);
254 static void CleanupBackend(int pid, int exitstatus);
255 static void HandleChildCrash(int pid, int exitstatus, const char *procname);
256 static void LogChildExit(int lev, const char *procname,
257                          int pid, int exitstatus);
258 static int      BackendRun(Port *port);
259 static void ExitPostmaster(int status);
260 static void usage(const char *);
261 static int      ServerLoop(void);
262 static int      BackendStartup(Port *port);
263 static int      ProcessStartupPacket(Port *port, bool SSLdone);
264 static void processCancelRequest(Port *port, void *pkt);
265 static int      initMasks(fd_set *rmask);
266 static void report_fork_failure_to_client(Port *port, int errnum);
267 static enum CAC_state canAcceptConnections(void);
268 static long PostmasterRandom(void);
269 static void RandomSalt(char *cryptSalt, char *md5Salt);
270 static void SignalChildren(int signal);
271 static int      CountChildren(void);
272 static bool CreateOptsFile(int argc, char *argv[], char *fullprogname);
273 static pid_t StartChildProcess(int xlop);
274
275 #ifdef EXEC_BACKEND
276
277 #ifdef WIN32
278 static pid_t win32_forkexec(const char *path, char *argv[]);
279 static void win32_AddChild(pid_t pid, HANDLE handle);
280 static void win32_RemoveChild(pid_t pid);
281 static pid_t win32_waitpid(int *exitstatus);
282 static DWORD WINAPI win32_sigchld_waiter(LPVOID param);
283
284 static pid_t *win32_childPIDArray;
285 static HANDLE *win32_childHNDArray;
286 static unsigned long win32_numChildren = 0;
287
288 HANDLE          PostmasterHandle;
289 #endif
290
291 static pid_t backend_forkexec(Port *port);
292 static pid_t internal_forkexec(int argc, char *argv[], Port *port);
293
294 static void read_backend_variables(char *filename, Port *port);
295 static bool write_backend_variables(char *filename, Port *port);
296
297 static void ShmemBackendArrayAdd(Backend *bn);
298 static void ShmemBackendArrayRemove(pid_t pid);
299 #endif   /* EXEC_BACKEND */
300
301 #define StartupDataBase()               StartChildProcess(BS_XLOG_STARTUP)
302 #define StartBackgroundWriter() StartChildProcess(BS_XLOG_BGWRITER)
303
304
305 /*
306  * Postmaster main entry point
307  */
308 int
309 PostmasterMain(int argc, char *argv[])
310 {
311         int                     opt;
312         int                     status;
313         char       *userDoption = NULL;
314         int                     i;
315
316         progname = get_progname(argv[0]);
317
318         MyProcPid = PostmasterPid = getpid();
319
320         IsPostmasterEnvironment = true;
321
322         /*
323          * Catch standard options before doing much else.  This even works on
324          * systems without getopt_long.
325          */
326         if (argc > 1)
327         {
328                 if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
329                 {
330                         usage(progname);
331                         ExitPostmaster(0);
332                 }
333                 if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
334                 {
335                         puts("postmaster (PostgreSQL) " PG_VERSION);
336                         ExitPostmaster(0);
337                 }
338         }
339
340         /*
341          * for security, no dir or file created can be group or other
342          * accessible
343          */
344         umask((mode_t) 0077);
345
346         /*
347          * Fire up essential subsystems: memory management
348          */
349         MemoryContextInit();
350
351         /*
352          * By default, palloc() requests in the postmaster will be allocated
353          * in the PostmasterContext, which is space that can be recycled by
354          * backends.  Allocated data that needs to be available to backends
355          * should be allocated in TopMemoryContext.
356          */
357         PostmasterContext = AllocSetContextCreate(TopMemoryContext,
358                                                                                           "Postmaster",
359                                                                                           ALLOCSET_DEFAULT_MINSIZE,
360                                                                                           ALLOCSET_DEFAULT_INITSIZE,
361                                                                                           ALLOCSET_DEFAULT_MAXSIZE);
362         MemoryContextSwitchTo(PostmasterContext);
363
364         IgnoreSystemIndexes(false);
365
366         if (find_my_exec(argv[0], my_exec_path) < 0)
367                 elog(FATAL, "%s: could not locate my own executable path",
368                          argv[0]);
369
370         get_pkglib_path(my_exec_path, pkglib_path);
371
372         /*
373          * Options setup
374          */
375         InitializeGUCOptions();
376
377         opterr = 1;
378
379         while ((opt = getopt(argc, argv, "A:a:B:b:c:D:d:Fh:ik:lm:MN:no:p:Ss-:")) != -1)
380         {
381                 switch (opt)
382                 {
383                         case 'A':
384 #ifdef USE_ASSERT_CHECKING
385                                 SetConfigOption("debug_assertions", optarg, PGC_POSTMASTER, PGC_S_ARGV);
386 #else
387                                 write_stderr("%s: assert checking is not compiled in\n", progname);
388 #endif
389                                 break;
390                         case 'a':
391                                 /* Can no longer set authentication method. */
392                                 break;
393                         case 'B':
394                                 SetConfigOption("shared_buffers", optarg, PGC_POSTMASTER, PGC_S_ARGV);
395                                 break;
396                         case 'b':
397                                 /* Can no longer set the backend executable file to use. */
398                                 break;
399                         case 'D':
400                                 userDoption = optarg;
401                                 break;
402                         case 'd':
403                                 {
404                                         /* Turn on debugging for the postmaster. */
405                                         char       *debugstr = palloc(strlen("debug") + strlen(optarg) + 1);
406
407                                         sprintf(debugstr, "debug%s", optarg);
408                                         SetConfigOption("log_min_messages", debugstr,
409                                                                         PGC_POSTMASTER, PGC_S_ARGV);
410                                         pfree(debugstr);
411                                         debug_flag = atoi(optarg);
412                                         break;
413                                 }
414                         case 'F':
415                                 SetConfigOption("fsync", "false", PGC_POSTMASTER, PGC_S_ARGV);
416                                 break;
417                         case 'h':
418                                 SetConfigOption("listen_addresses", optarg, PGC_POSTMASTER, PGC_S_ARGV);
419                                 break;
420                         case 'i':
421                                 SetConfigOption("listen_addresses", "*", PGC_POSTMASTER, PGC_S_ARGV);
422                                 break;
423                         case 'k':
424                                 SetConfigOption("unix_socket_directory", optarg, PGC_POSTMASTER, PGC_S_ARGV);
425                                 break;
426 #ifdef USE_SSL
427                         case 'l':
428                                 SetConfigOption("ssl", "true", PGC_POSTMASTER, PGC_S_ARGV);
429                                 break;
430 #endif
431                         case 'm':
432                                 /* Multiplexed backends no longer supported. */
433                                 break;
434                         case 'M':
435
436                                 /*
437                                  * ignore this flag.  This may be passed in because the
438                                  * program was run as 'postgres -M' instead of
439                                  * 'postmaster'
440                                  */
441                                 break;
442                         case 'N':
443                                 /* The max number of backends to start. */
444                                 SetConfigOption("max_connections", optarg, PGC_POSTMASTER, PGC_S_ARGV);
445                                 break;
446                         case 'n':
447                                 /* Don't reinit shared mem after abnormal exit */
448                                 Reinit = false;
449                                 break;
450                         case 'o':
451
452                                 /*
453                                  * Other options to pass to the backend on the command
454                                  * line
455                                  */
456                                 snprintf(ExtraOptions + strlen(ExtraOptions),
457                                                  sizeof(ExtraOptions) - strlen(ExtraOptions),
458                                                  " %s", optarg);
459                                 break;
460                         case 'p':
461                                 SetConfigOption("port", optarg, PGC_POSTMASTER, PGC_S_ARGV);
462                                 break;
463                         case 'S':
464
465                                 /*
466                                  * Start in 'S'ilent mode (disassociate from controlling
467                                  * tty). You may also think of this as 'S'ysV mode since
468                                  * it's most badly needed on SysV-derived systems like
469                                  * SVR4 and HP-UX.
470                                  */
471                                 SetConfigOption("silent_mode", "true", PGC_POSTMASTER, PGC_S_ARGV);
472                                 break;
473                         case 's':
474
475                                 /*
476                                  * In the event that some backend dumps core, send
477                                  * SIGSTOP, rather than SIGQUIT, to all its peers.      This
478                                  * lets the wily post_hacker collect core dumps from
479                                  * everyone.
480                                  */
481                                 SendStop = true;
482                                 break;
483                         case 'c':
484                         case '-':
485                                 {
486                                         char       *name,
487                                                            *value;
488
489                                         ParseLongOption(optarg, &name, &value);
490                                         if (!value)
491                                         {
492                                                 if (opt == '-')
493                                                         ereport(ERROR,
494                                                                         (errcode(ERRCODE_SYNTAX_ERROR),
495                                                                          errmsg("--%s requires a value",
496                                                                                         optarg)));
497                                                 else
498                                                         ereport(ERROR,
499                                                                         (errcode(ERRCODE_SYNTAX_ERROR),
500                                                                          errmsg("-c %s requires a value",
501                                                                                         optarg)));
502                                         }
503
504                                         SetConfigOption(name, value, PGC_POSTMASTER, PGC_S_ARGV);
505                                         free(name);
506                                         if (value)
507                                                 free(value);
508                                         break;
509                                 }
510
511                         default:
512                                 write_stderr("Try \"%s --help\" for more information.\n",
513                                                          progname);
514                                 ExitPostmaster(1);
515                 }
516         }
517
518         /*
519          * Postmaster accepts no non-option switch arguments.
520          */
521         if (optind < argc)
522         {
523                 write_stderr("%s: invalid argument: \"%s\"\n",
524                                          progname, argv[optind]);
525                 write_stderr("Try \"%s --help\" for more information.\n",
526                                          progname);
527                 ExitPostmaster(1);
528         }
529
530         /*
531          * Locate the proper configuration files and data directory, and
532          * read postgresql.conf for the first time.
533          */
534         if (!SelectConfigFiles(userDoption, progname))
535                 ExitPostmaster(2);
536
537         /* Verify that DataDir looks reasonable */
538         checkDataDir();
539
540         /*
541          * Check for invalid combinations of GUC settings.
542          */
543         if (NBuffers < 2 * MaxBackends || NBuffers < 16)
544         {
545                 /*
546                  * Do not accept -B so small that backends are likely to starve
547                  * for lack of buffers.  The specific choices here are somewhat
548                  * arbitrary.
549                  */
550                 write_stderr("%s: the number of buffers (-B) must be at least twice the number of allowed connections (-N) and at least 16\n", progname);
551                 ExitPostmaster(1);
552         }
553
554         if (ReservedBackends >= MaxBackends)
555         {
556                 write_stderr("%s: superuser_reserved_connections must be less than max_connections\n", progname);
557                 ExitPostmaster(1);
558         }
559
560         /*
561          * Other one-time internal sanity checks can go here.
562          */
563         if (!CheckDateTokenTables())
564         {
565                 write_stderr("%s: invalid datetoken tables, please fix\n", progname);
566                 ExitPostmaster(1);
567         }
568
569         /*
570          * Now that we are done processing the postmaster arguments, reset
571          * getopt(3) library so that it will work correctly in subprocesses.
572          */
573         optind = 1;
574 #ifdef HAVE_INT_OPTRESET
575         optreset = 1;                           /* some systems need this too */
576 #endif
577
578         /* For debugging: display postmaster environment */
579         {
580                 extern char **environ;
581                 char      **p;
582
583                 ereport(DEBUG3,
584                         (errmsg_internal("%s: PostmasterMain: initial environ dump:",
585                                                          progname)));
586                 ereport(DEBUG3,
587                  (errmsg_internal("-----------------------------------------")));
588                 for (p = environ; *p; ++p)
589                         ereport(DEBUG3,
590                                         (errmsg_internal("\t%s", *p)));
591                 ereport(DEBUG3,
592                  (errmsg_internal("-----------------------------------------")));
593         }
594
595 #ifdef EXEC_BACKEND
596         if (find_other_exec(argv[0], "postgres", PG_VERSIONSTR,
597                                                 postgres_exec_path) < 0)
598                 ereport(FATAL,
599                          (errmsg("%s: could not locate matching postgres executable",
600                                          progname)));
601 #endif
602
603         /*
604          * Initialize SSL library, if specified.
605          */
606 #ifdef USE_SSL
607         if (EnableSSL)
608                 secure_initialize();
609 #endif
610
611         /*
612          * process any libraries that should be preloaded and optionally
613          * pre-initialized
614          */
615         if (preload_libraries_string)
616                 process_preload_libraries(preload_libraries_string);
617
618         /*
619          * Fork away from controlling terminal, if -S specified.
620          *
621          * Must do this before we grab any interlock files, else the interlocks
622          * will show the wrong PID.
623          */
624         if (SilentMode)
625                 pmdaemonize();
626
627         /*
628          * Create lockfile for data directory.
629          *
630          * We want to do this before we try to grab the input sockets, because
631          * the data directory interlock is more reliable than the socket-file
632          * interlock (thanks to whoever decided to put socket files in /tmp
633          * :-(). For the same reason, it's best to grab the TCP socket(s)
634          * before the Unix socket.
635          */
636         CreateDataDirLockFile(DataDir, true);
637
638         /*
639          * Remove old temporary files.  At this point there can be no other
640          * Postgres processes running in this directory, so this should be
641          * safe.
642          */
643         RemovePgTempFiles();
644
645         /*
646          * Establish input sockets.
647          */
648         for (i = 0; i < MAXLISTEN; i++)
649                 ListenSocket[i] = -1;
650
651         if (ListenAddresses)
652         {
653                 char       *rawstring;
654                 List       *elemlist;
655                 ListCell   *l;
656
657                 /* Need a modifiable copy of ListenAddresses */
658                 rawstring = pstrdup(ListenAddresses);
659
660                 /* Parse string into list of identifiers */
661                 if (!SplitIdentifierString(rawstring, ',', &elemlist))
662                 {
663                         /* syntax error in list */
664                         ereport(FATAL,
665                                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
666                                 errmsg("invalid list syntax for \"listen_addresses\"")));
667                 }
668
669                 foreach(l, elemlist)
670                 {
671                         char       *curhost = (char *) lfirst(l);
672
673                         if (strcmp(curhost, "*") == 0)
674                                 status = StreamServerPort(AF_UNSPEC, NULL,
675                                                                                   (unsigned short) PostPortNumber,
676                                                                                   UnixSocketDir,
677                                                                                   ListenSocket, MAXLISTEN);
678                         else
679                                 status = StreamServerPort(AF_UNSPEC, curhost,
680                                                                                   (unsigned short) PostPortNumber,
681                                                                                   UnixSocketDir,
682                                                                                   ListenSocket, MAXLISTEN);
683                         if (status != STATUS_OK)
684                                 ereport(WARNING,
685                                          (errmsg("could not create listen socket for \"%s\"",
686                                                          curhost)));
687                 }
688
689                 list_free(elemlist);
690                 pfree(rawstring);
691         }
692
693 #ifdef USE_RENDEZVOUS
694         /* Register for Rendezvous only if we opened TCP socket(s) */
695         if (ListenSocket[0] != -1 && rendezvous_name != NULL)
696         {
697                 DNSServiceRegistrationCreate(rendezvous_name,
698                                                                          "_postgresql._tcp.",
699                                                                          "",
700                                                                          htonl(PostPortNumber),
701                                                                          "",
702                                                                  (DNSServiceRegistrationReply) reg_reply,
703                                                                          NULL);
704         }
705 #endif
706
707 #ifdef HAVE_UNIX_SOCKETS
708         status = StreamServerPort(AF_UNIX, NULL,
709                                                           (unsigned short) PostPortNumber,
710                                                           UnixSocketDir,
711                                                           ListenSocket, MAXLISTEN);
712         if (status != STATUS_OK)
713                 ereport(WARNING,
714                                 (errmsg("could not create Unix-domain socket")));
715 #endif
716
717         /*
718          * check that we have some socket to listen on
719          */
720         if (ListenSocket[0] == -1)
721                 ereport(FATAL,
722                                 (errmsg("no socket created for listening")));
723
724         XLOGPathInit();
725
726         /*
727          * Set up shared memory and semaphores.
728          */
729         reset_shared(PostPortNumber);
730
731         /*
732          * Estimate number of openable files.  This must happen after setting
733          * up semaphores, because on some platforms semaphores count as open
734          * files.
735          */
736         set_max_safe_fds();
737
738         /*
739          * Initialize the list of active backends.
740          */
741         BackendList = DLNewList();
742
743 #ifdef WIN32
744
745         /*
746          * Initialize the child pid/HANDLE arrays for signal handling.
747          */
748         win32_childPIDArray = (pid_t *)
749                 malloc(NUM_BACKENDARRAY_ELEMS * sizeof(pid_t));
750         win32_childHNDArray = (HANDLE *)
751                 malloc(NUM_BACKENDARRAY_ELEMS * sizeof(HANDLE));
752         if (!win32_childPIDArray || !win32_childHNDArray)
753                 ereport(FATAL,
754                                 (errcode(ERRCODE_OUT_OF_MEMORY),
755                                  errmsg("out of memory")));
756
757         /*
758          * Set up a handle that child processes can use to check whether the
759          * postmaster is still running.
760          */
761         if (DuplicateHandle(GetCurrentProcess(),
762                                                 GetCurrentProcess(),
763                                                 GetCurrentProcess(),
764                                                 &PostmasterHandle,
765                                                 0,
766                                                 TRUE,
767                                                 DUPLICATE_SAME_ACCESS) == 0)
768                 ereport(FATAL,
769                         (errmsg_internal("could not duplicate postmaster handle: %d",
770                                                          (int) GetLastError())));
771 #endif
772
773         /*
774          * Record postmaster options.  We delay this till now to avoid
775          * recording bogus options (eg, NBuffers too high for available
776          * memory).
777          */
778         if (!CreateOptsFile(argc, argv, my_exec_path))
779                 ExitPostmaster(1);
780
781 #ifdef EXEC_BACKEND
782         write_nondefault_variables(PGC_POSTMASTER);
783 #endif
784
785         /*
786          * Write the external PID file if requested
787          */
788         if (external_pid_file)
789         {
790                 FILE       *fpidfile = fopen(external_pid_file, "w");
791
792                 if (fpidfile)
793                 {
794                         fprintf(fpidfile, "%d\n", MyProcPid);
795                         fclose(fpidfile);
796                         /* Should we remove the pid file on postmaster exit? */
797                 }
798                 else
799                         write_stderr("%s: could not write external PID file \"%s\": %s\n",
800                                                  progname, external_pid_file, strerror(errno));
801         }
802
803         /*
804          * Set up signal handlers for the postmaster process.
805          *
806          * CAUTION: when changing this list, check for side-effects on the signal
807          * handling setup of child processes.  See tcop/postgres.c,
808          * bootstrap/bootstrap.c, postmaster/bgwriter.c, postmaster/pgarch.c,
809          * postmaster/pgstat.c, and postmaster/syslogger.c.
810          */
811         pqinitmask();
812         PG_SETMASK(&BlockSig);
813
814         pqsignal(SIGHUP, SIGHUP_handler);       /* reread config file and have
815                                                                                  * children do same */
816         pqsignal(SIGINT, pmdie);        /* send SIGTERM and shut down */
817         pqsignal(SIGQUIT, pmdie);       /* send SIGQUIT and die */
818         pqsignal(SIGTERM, pmdie);       /* wait for children and shut down */
819         pqsignal(SIGALRM, SIG_IGN); /* ignored */
820         pqsignal(SIGPIPE, SIG_IGN); /* ignored */
821         pqsignal(SIGUSR1, sigusr1_handler); /* message from child process */
822         pqsignal(SIGUSR2, dummy_handler);       /* unused, reserve for children */
823         pqsignal(SIGCHLD, reaper);      /* handle child termination */
824         pqsignal(SIGTTIN, SIG_IGN); /* ignored */
825         pqsignal(SIGTTOU, SIG_IGN); /* ignored */
826         /* ignore SIGXFSZ, so that ulimit violations work like disk full */
827 #ifdef SIGXFSZ
828         pqsignal(SIGXFSZ, SIG_IGN); /* ignored */
829 #endif
830
831         /*
832          * If enabled, start up syslogger collection subprocess
833          */
834         SysLoggerPID = SysLogger_Start();
835
836         /*
837          * Reset whereToSendOutput from Debug (its starting state) to None.
838          * This stops ereport from sending log messages to stderr unless
839          * Log_destination permits.  We don't do this until the postmaster is
840          * fully launched, since startup failures may as well be reported to
841          * stderr.
842          */
843         whereToSendOutput = None;
844
845         /*
846          * Initialize the statistics collector stuff
847          */
848         pgstat_init();
849
850         /*
851          * Load cached files for client authentication.
852          */
853         load_hba();
854         load_ident();
855         load_user();
856         load_group();
857
858         /*
859          * We're ready to rock and roll...
860          */
861         StartupPID = StartupDataBase();
862
863         status = ServerLoop();
864
865         /*
866          * ServerLoop probably shouldn't ever return, but if it does, close
867          * down.
868          */
869         ExitPostmaster(status != STATUS_OK);
870
871         return 0;                                       /* not reached */
872 }
873
874
875 /*
876  * Validate the proposed data directory
877  */
878 static void
879 checkDataDir(void)
880 {
881         char            path[MAXPGPATH];
882         FILE       *fp;
883         struct stat stat_buf;
884
885         Assert(DataDir);
886
887         if (stat(DataDir, &stat_buf) != 0)
888         {
889                 if (errno == ENOENT)
890                         ereport(FATAL,
891                                         (errcode_for_file_access(),
892                                          errmsg("data directory \"%s\" does not exist",
893                                                         DataDir)));
894                 else
895                         ereport(FATAL,
896                                         (errcode_for_file_access(),
897                          errmsg("could not read permissions of directory \"%s\": %m",
898                                         DataDir)));
899         }
900
901         /*
902          * Check if the directory has group or world access.  If so, reject.
903          *
904          * XXX temporarily suppress check when on Windows, because there may not
905          * be proper support for Unix-y file permissions.  Need to think of a
906          * reasonable check to apply on Windows.
907          */
908 #if !defined(WIN32) && !defined(__CYGWIN__)
909         if (stat_buf.st_mode & (S_IRWXG | S_IRWXO))
910                 ereport(FATAL,
911                                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
912                                  errmsg("data directory \"%s\" has group or world access",
913                                                 DataDir),
914                                  errdetail("Permissions should be u=rwx (0700).")));
915 #endif
916
917         /* Look for PG_VERSION before looking for pg_control */
918         ValidatePgVersion(DataDir);
919
920         snprintf(path, sizeof(path), "%s/global/pg_control", DataDir);
921
922         fp = AllocateFile(path, PG_BINARY_R);
923         if (fp == NULL)
924         {
925                 write_stderr("%s: could not find the database system\n"
926                                          "Expected to find it in the directory \"%s\",\n"
927                                          "but could not open file \"%s\": %s\n",
928                                          progname, DataDir, path, strerror(errno));
929                 ExitPostmaster(2);
930         }
931         FreeFile(fp);
932 }
933
934
935 #ifdef USE_RENDEZVOUS
936
937 /*
938  * empty callback function for DNSServiceRegistrationCreate()
939  */
940 static void
941 reg_reply(DNSServiceRegistrationReplyErrorType errorCode, void *context)
942 {
943
944 }
945 #endif   /* USE_RENDEZVOUS */
946
947
948 /*
949  * Fork away from the controlling terminal (-S option)
950  */
951 static void
952 pmdaemonize(void)
953 {
954 #ifndef WIN32
955         int                     i;
956         pid_t           pid;
957
958 #ifdef LINUX_PROFILE
959         struct itimerval prof_itimer;
960 #endif
961
962 #ifdef LINUX_PROFILE
963         /* see comments in BackendStartup */
964         getitimer(ITIMER_PROF, &prof_itimer);
965 #endif
966
967         pid = fork();
968         if (pid == (pid_t) -1)
969         {
970                 write_stderr("%s: could not fork background process: %s\n",
971                                          progname, strerror(errno));
972                 ExitPostmaster(1);
973         }
974         else if (pid)
975         {                                                       /* parent */
976                 /* Parent should just exit, without doing any atexit cleanup */
977                 _exit(0);
978         }
979
980 #ifdef LINUX_PROFILE
981         setitimer(ITIMER_PROF, &prof_itimer, NULL);
982 #endif
983
984         MyProcPid = PostmasterPid = getpid();           /* reset PID vars to child */
985
986 /* GH: If there's no setsid(), we hopefully don't need silent mode.
987  * Until there's a better solution.
988  */
989 #ifdef HAVE_SETSID
990         if (setsid() < 0)
991         {
992                 write_stderr("%s: could not dissociate from controlling TTY: %s\n",
993                                          progname, strerror(errno));
994                 ExitPostmaster(1);
995         }
996 #endif
997         i = open(NULL_DEV, O_RDWR);
998         dup2(i, 0);
999         dup2(i, 1);
1000         dup2(i, 2);
1001         close(i);
1002 #else                                                   /* WIN32 */
1003         /* not supported */
1004         elog(FATAL, "SilentMode not supported under WIN32");
1005 #endif   /* WIN32 */
1006 }
1007
1008
1009 /*
1010  * Print out help message
1011  */
1012 static void
1013 usage(const char *progname)
1014 {
1015         printf(gettext("%s is the PostgreSQL server.\n\n"), progname);
1016         printf(gettext("Usage:\n  %s [OPTION]...\n\n"), progname);
1017         printf(gettext("Options:\n"));
1018 #ifdef USE_ASSERT_CHECKING
1019         printf(gettext("  -A 1|0          enable/disable run-time assert checking\n"));
1020 #endif
1021         printf(gettext("  -B NBUFFERS     number of shared buffers\n"));
1022         printf(gettext("  -c NAME=VALUE   set run-time parameter\n"));
1023         printf(gettext("  -d 1-5          debugging level\n"));
1024         printf(gettext("  -D DATADIR      database directory\n"));
1025         printf(gettext("  -F              turn fsync off\n"));
1026         printf(gettext("  -h HOSTNAME     host name or IP address to listen on\n"));
1027         printf(gettext("  -i              enable TCP/IP connections\n"));
1028         printf(gettext("  -k DIRECTORY    Unix-domain socket location\n"));
1029 #ifdef USE_SSL
1030         printf(gettext("  -l              enable SSL connections\n"));
1031 #endif
1032         printf(gettext("  -N MAX-CONNECT  maximum number of allowed connections\n"));
1033         printf(gettext("  -o OPTIONS      pass \"OPTIONS\" to each server process\n"));
1034         printf(gettext("  -p PORT         port number to listen on\n"));
1035         printf(gettext("  -S              silent mode (start in background without logging output)\n"));
1036         printf(gettext("  --help          show this help, then exit\n"));
1037         printf(gettext("  --version       output version information, then exit\n"));
1038
1039         printf(gettext("\nDeveloper options:\n"));
1040         printf(gettext("  -n              do not reinitialize shared memory after abnormal exit\n"));
1041         printf(gettext("  -s              send SIGSTOP to all backend servers if one dies\n"));
1042
1043         printf(gettext("\nPlease read the documentation for the complete list of run-time\n"
1044                                    "configuration settings and how to set them on the command line or in\n"
1045                                    "the configuration file.\n\n"
1046                                    "Report bugs to <pgsql-bugs@postgresql.org>.\n"));
1047 }
1048
1049
1050 /*
1051  * Main idle loop of postmaster
1052  */
1053 static int
1054 ServerLoop(void)
1055 {
1056         fd_set          readmask;
1057         int                     nSockets;
1058         time_t          now,
1059                                 last_touch_time;
1060         struct timeval earlier,
1061                                 later;
1062         struct timezone tz;
1063
1064         gettimeofday(&earlier, &tz);
1065         last_touch_time = time(NULL);
1066
1067         nSockets = initMasks(&readmask);
1068
1069         for (;;)
1070         {
1071                 Port       *port;
1072                 fd_set          rmask;
1073                 struct timeval timeout;
1074                 int                     selres;
1075                 int                     i;
1076
1077                 /*
1078                  * Wait for something to happen.
1079                  *
1080                  * We wait at most one minute, to ensure that the other background
1081                  * tasks handled below get done even when no requests are
1082                  * arriving.
1083                  */
1084                 memcpy((char *) &rmask, (char *) &readmask, sizeof(fd_set));
1085
1086                 timeout.tv_sec = 60;
1087                 timeout.tv_usec = 0;
1088
1089                 PG_SETMASK(&UnBlockSig);
1090
1091                 selres = select(nSockets, &rmask, NULL, NULL, &timeout);
1092
1093                 /*
1094                  * Block all signals until we wait again.  (This makes it safe for
1095                  * our signal handlers to do nontrivial work.)
1096                  */
1097                 PG_SETMASK(&BlockSig);
1098
1099                 if (selres < 0)
1100                 {
1101                         if (errno != EINTR && errno != EWOULDBLOCK)
1102                         {
1103                                 ereport(LOG,
1104                                                 (errcode_for_socket_access(),
1105                                                  errmsg("select() failed in postmaster: %m")));
1106                                 return STATUS_ERROR;
1107                         }
1108                 }
1109
1110                 /*
1111                  * New connection pending on any of our sockets? If so, fork a
1112                  * child process to deal with it.
1113                  */
1114                 if (selres > 0)
1115                 {
1116                         /*
1117                          * Select a random seed at the time of first receiving a
1118                          * request.
1119                          */
1120                         while (random_seed == 0)
1121                         {
1122                                 gettimeofday(&later, &tz);
1123
1124                                 /*
1125                                  * We are not sure how much precision is in tv_usec, so we
1126                                  * swap the high and low 16 bits of 'later' and XOR them with
1127                                  * 'earlier'. On the off chance that the result is 0, we
1128                                  * loop until it isn't.
1129                                  */
1130                                 random_seed = earlier.tv_usec ^
1131                                         ((later.tv_usec << 16) |
1132                                          ((later.tv_usec >> 16) & 0xffff));
1133                         }
1134
1135                         for (i = 0; i < MAXLISTEN; i++)
1136                         {
1137                                 if (ListenSocket[i] == -1)
1138                                         break;
1139                                 if (FD_ISSET(ListenSocket[i], &rmask))
1140                                 {
1141                                         port = ConnCreate(ListenSocket[i]);
1142                                         if (port)
1143                                         {
1144                                                 BackendStartup(port);
1145
1146                                                 /*
1147                                                  * We no longer need the open socket or port
1148                                                  * structure in this process
1149                                                  */
1150                                                 StreamClose(port->sock);
1151                                                 ConnFree(port);
1152                                         }
1153                                 }
1154                         }
1155                 }
1156
1157                 /* If we have lost the system logger, try to start a new one */
1158                 if (SysLoggerPID == 0 && Redirect_stderr)
1159                         SysLoggerPID = SysLogger_Start();
1160
1161                 /*
1162                  * If no background writer process is running, and we are not in a
1163                  * state that prevents it, start one.  It doesn't matter if this
1164                  * fails, we'll just try again later.
1165                  */
1166                 if (BgWriterPID == 0 && StartupPID == 0 && !FatalError)
1167                 {
1168                         BgWriterPID = StartBackgroundWriter();
1169                         /* If shutdown is pending, set it going */
1170                         if (Shutdown > NoShutdown && BgWriterPID != 0)
1171                                 kill(BgWriterPID, SIGUSR2);
1172                 }
1173
1174                 /* If we have lost the archiver, try to start a new one */
1175                 if (XLogArchivingActive() && PgArchPID == 0 &&
1176                         StartupPID == 0 && !FatalError && Shutdown == NoShutdown)
1177                         PgArchPID = pgarch_start();
1178
1179                 /* If we have lost the stats collector, try to start a new one */
1180                 if (PgStatPID == 0 &&
1181                         StartupPID == 0 && !FatalError && Shutdown == NoShutdown)
1182                         PgStatPID = pgstat_start();
1183
1184                 /*
1185                  * Touch the socket and lock file at least every ten minutes, to
1186                  * ensure that they are not removed by overzealous /tmp-cleaning
1187                  * tasks.
1188                  */
1189                 now = time(NULL);
1190                 if (now - last_touch_time >= 10 * 60)
1191                 {
1192                         TouchSocketFile();
1193                         TouchSocketLockFile();
1194                         last_touch_time = now;
1195                 }
1196         }
1197 }
1198
1199
1200 /*
1201  * Initialise the masks for select() for the ports we are listening on.
1202  * Return the number of sockets to listen on.
1203  */
1204 static int
1205 initMasks(fd_set *rmask)
1206 {
1207         int                     nsocks = -1;
1208         int                     i;
1209
1210         FD_ZERO(rmask);
1211
1212         for (i = 0; i < MAXLISTEN; i++)
1213         {
1214                 int                     fd = ListenSocket[i];
1215
1216                 if (fd == -1)
1217                         break;
1218                 FD_SET(fd, rmask);
1219                 if (fd > nsocks)
1220                         nsocks = fd;
1221         }
1222
1223         return nsocks + 1;
1224 }
1225
1226
1227 /*
1228  * Read the startup packet and do something according to it.
1229  *
1230  * Returns STATUS_OK or STATUS_ERROR, or might call ereport(FATAL) and
1231  * not return at all.
1232  *
1233  * (Note that ereport(FATAL) stuff is sent to the client, so only use it
1234  * if that's what you want.  Return STATUS_ERROR if you don't want to
1235  * send anything to the client, which would typically be appropriate
1236  * if we detect a communications failure.)
1237  */
1238 static int
1239 ProcessStartupPacket(Port *port, bool SSLdone)
1240 {
1241         int32           len;
1242         void       *buf;
1243         ProtocolVersion proto;
1244         MemoryContext oldcontext;
1245
1246         if (pq_getbytes((char *) &len, 4) == EOF)
1247         {
1248                 /*
1249                  * EOF after SSLdone probably means the client didn't like our
1250                  * response to NEGOTIATE_SSL_CODE.      That's not an error condition,
1251                  * so don't clutter the log with a complaint.
1252                  */
1253                 if (!SSLdone)
1254                         ereport(COMMERROR,
1255                                         (errcode(ERRCODE_PROTOCOL_VIOLATION),
1256                                          errmsg("incomplete startup packet")));
1257                 return STATUS_ERROR;
1258         }
1259
1260         len = ntohl(len);
1261         len -= 4;
1262
1263         if (len < (int32) sizeof(ProtocolVersion) ||
1264                 len > MAX_STARTUP_PACKET_LENGTH)
1265         {
1266                 ereport(COMMERROR,
1267                                 (errcode(ERRCODE_PROTOCOL_VIOLATION),
1268                                  errmsg("invalid length of startup packet")));
1269                 return STATUS_ERROR;
1270         }
1271
1272         /*
1273          * Allocate at least the size of an old-style startup packet, plus one
1274          * extra byte, and make sure all are zeroes.  This ensures we will
1275          * have null termination of all strings, in both fixed- and
1276          * variable-length packet layouts.
1277          */
1278         if (len <= (int32) sizeof(StartupPacket))
1279                 buf = palloc0(sizeof(StartupPacket) + 1);
1280         else
1281                 buf = palloc0(len + 1);
1282
1283         if (pq_getbytes(buf, len) == EOF)
1284         {
1285                 ereport(COMMERROR,
1286                                 (errcode(ERRCODE_PROTOCOL_VIOLATION),
1287                                  errmsg("incomplete startup packet")));
1288                 return STATUS_ERROR;
1289         }
1290
1291         /*
1292          * The first field is either a protocol version number or a special
1293          * request code.
1294          */
1295         port->proto = proto = ntohl(*((ProtocolVersion *) buf));
1296
1297         if (proto == CANCEL_REQUEST_CODE)
1298         {
1299                 processCancelRequest(port, buf);
1300                 return 127;                             /* XXX */
1301         }
1302
1303         if (proto == NEGOTIATE_SSL_CODE && !SSLdone)
1304         {
1305                 char            SSLok;
1306
1307 #ifdef USE_SSL
1308                 /* No SSL when disabled or on Unix sockets */
1309                 if (!EnableSSL || IS_AF_UNIX(port->laddr.addr.ss_family))
1310                         SSLok = 'N';
1311                 else
1312                         SSLok = 'S';            /* Support for SSL */
1313 #else
1314                 SSLok = 'N';                    /* No support for SSL */
1315 #endif
1316                 if (send(port->sock, &SSLok, 1, 0) != 1)
1317                 {
1318                         ereport(COMMERROR,
1319                                         (errcode_for_socket_access(),
1320                                  errmsg("failed to send SSL negotiation response: %m")));
1321                         return STATUS_ERROR;    /* close the connection */
1322                 }
1323
1324 #ifdef USE_SSL
1325                 if (SSLok == 'S' && secure_open_server(port) == -1)
1326                         return STATUS_ERROR;
1327 #endif
1328                 /* regular startup packet, cancel, etc packet should follow... */
1329                 /* but not another SSL negotiation request */
1330                 return ProcessStartupPacket(port, true);
1331         }
1332
1333         /* Could add additional special packet types here */
1334
1335         /*
1336          * Set FrontendProtocol now so that ereport() knows what format to
1337          * send if we fail during startup.
1338          */
1339         FrontendProtocol = proto;
1340
1341         /* Check we can handle the protocol the frontend is using. */
1342
1343         if (PG_PROTOCOL_MAJOR(proto) < PG_PROTOCOL_MAJOR(PG_PROTOCOL_EARLIEST) ||
1344           PG_PROTOCOL_MAJOR(proto) > PG_PROTOCOL_MAJOR(PG_PROTOCOL_LATEST) ||
1345         (PG_PROTOCOL_MAJOR(proto) == PG_PROTOCOL_MAJOR(PG_PROTOCOL_LATEST) &&
1346          PG_PROTOCOL_MINOR(proto) > PG_PROTOCOL_MINOR(PG_PROTOCOL_LATEST)))
1347                 ereport(FATAL,
1348                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1349                                  errmsg("unsupported frontend protocol %u.%u: server supports %u.0 to %u.%u",
1350                                           PG_PROTOCOL_MAJOR(proto), PG_PROTOCOL_MINOR(proto),
1351                                                 PG_PROTOCOL_MAJOR(PG_PROTOCOL_EARLIEST),
1352                                                 PG_PROTOCOL_MAJOR(PG_PROTOCOL_LATEST),
1353                                                 PG_PROTOCOL_MINOR(PG_PROTOCOL_LATEST))));
1354
1355         /*
1356          * Now fetch parameters out of startup packet and save them into the
1357          * Port structure.      All data structures attached to the Port struct
1358          * must be allocated in TopMemoryContext so that they won't disappear
1359          * when we pass them to PostgresMain (see BackendRun).  We need not
1360          * worry about leaking this storage on failure, since we aren't in the
1361          * postmaster process anymore.
1362          */
1363         oldcontext = MemoryContextSwitchTo(TopMemoryContext);
1364
1365         if (PG_PROTOCOL_MAJOR(proto) >= 3)
1366         {
1367                 int32           offset = sizeof(ProtocolVersion);
1368
1369                 /*
1370                  * Scan packet body for name/option pairs.      We can assume any
1371                  * string beginning within the packet body is null-terminated,
1372                  * thanks to zeroing extra byte above.
1373                  */
1374                 port->guc_options = NIL;
1375
1376                 while (offset < len)
1377                 {
1378                         char       *nameptr = ((char *) buf) + offset;
1379                         int32           valoffset;
1380                         char       *valptr;
1381
1382                         if (*nameptr == '\0')
1383                                 break;                  /* found packet terminator */
1384                         valoffset = offset + strlen(nameptr) + 1;
1385                         if (valoffset >= len)
1386                                 break;                  /* missing value, will complain below */
1387                         valptr = ((char *) buf) + valoffset;
1388
1389                         if (strcmp(nameptr, "database") == 0)
1390                                 port->database_name = pstrdup(valptr);
1391                         else if (strcmp(nameptr, "user") == 0)
1392                                 port->user_name = pstrdup(valptr);
1393                         else if (strcmp(nameptr, "options") == 0)
1394                                 port->cmdline_options = pstrdup(valptr);
1395                         else
1396                         {
1397                                 /* Assume it's a generic GUC option */
1398                                 port->guc_options = lappend(port->guc_options,
1399                                                                                         pstrdup(nameptr));
1400                                 port->guc_options = lappend(port->guc_options,
1401                                                                                         pstrdup(valptr));
1402                         }
1403                         offset = valoffset + strlen(valptr) + 1;
1404                 }
1405
1406                 /*
1407                  * If we didn't find a packet terminator exactly at the end of the
1408                  * given packet length, complain.
1409                  */
1410                 if (offset != len - 1)
1411                         ereport(FATAL,
1412                                         (errcode(ERRCODE_PROTOCOL_VIOLATION),
1413                                          errmsg("invalid startup packet layout: expected terminator as last byte")));
1414         }
1415         else
1416         {
1417                 /*
1418                  * Get the parameters from the old-style, fixed-width-fields
1419                  * startup packet as C strings.  The packet destination was
1420                  * cleared first so a short packet has zeros silently added.  We
1421                  * have to be prepared to truncate the pstrdup result for oversize
1422                  * fields, though.
1423                  */
1424                 StartupPacket *packet = (StartupPacket *) buf;
1425
1426                 port->database_name = pstrdup(packet->database);
1427                 if (strlen(port->database_name) > sizeof(packet->database))
1428                         port->database_name[sizeof(packet->database)] = '\0';
1429                 port->user_name = pstrdup(packet->user);
1430                 if (strlen(port->user_name) > sizeof(packet->user))
1431                         port->user_name[sizeof(packet->user)] = '\0';
1432                 port->cmdline_options = pstrdup(packet->options);
1433                 if (strlen(port->cmdline_options) > sizeof(packet->options))
1434                         port->cmdline_options[sizeof(packet->options)] = '\0';
1435                 port->guc_options = NIL;
1436         }
1437
1438         /* Check a user name was given. */
1439         if (port->user_name == NULL || port->user_name[0] == '\0')
1440                 ereport(FATAL,
1441                                 (errcode(ERRCODE_INVALID_AUTHORIZATION_SPECIFICATION),
1442                  errmsg("no PostgreSQL user name specified in startup packet")));
1443
1444         /* The database defaults to the user name. */
1445         if (port->database_name == NULL || port->database_name[0] == '\0')
1446                 port->database_name = pstrdup(port->user_name);
1447
1448         if (Db_user_namespace)
1449         {
1450                 /*
1451                  * If user@, it is a global user, remove '@'. We only want to do
1452                  * this if there is an '@' at the end and no earlier in the user
1453                  * string or they may fake as a local user of another database
1454                  * attaching to this database.
1455                  */
1456                 if (strchr(port->user_name, '@') ==
1457                         port->user_name + strlen(port->user_name) - 1)
1458                         *strchr(port->user_name, '@') = '\0';
1459                 else
1460                 {
1461                         /* Append '@' and dbname */
1462                         char       *db_user;
1463
1464                         db_user = palloc(strlen(port->user_name) +
1465                                                          strlen(port->database_name) + 2);
1466                         sprintf(db_user, "%s@%s", port->user_name, port->database_name);
1467                         port->user_name = db_user;
1468                 }
1469         }
1470
1471         /*
1472          * Truncate given database and user names to length of a Postgres
1473          * name.  This avoids lookup failures when overlength names are given.
1474          */
1475         if (strlen(port->database_name) >= NAMEDATALEN)
1476                 port->database_name[NAMEDATALEN - 1] = '\0';
1477         if (strlen(port->user_name) >= NAMEDATALEN)
1478                 port->user_name[NAMEDATALEN - 1] = '\0';
1479
1480         /*
1481          * Done putting stuff in TopMemoryContext.
1482          */
1483         MemoryContextSwitchTo(oldcontext);
1484
1485         /*
1486          * If we're going to reject the connection due to database state, say
1487          * so now instead of wasting cycles on an authentication exchange.
1488          * (This also allows a pg_ping utility to be written.)
1489          */
1490         switch (port->canAcceptConnections)
1491         {
1492                 case CAC_STARTUP:
1493                         ereport(FATAL,
1494                                         (errcode(ERRCODE_CANNOT_CONNECT_NOW),
1495                                          errmsg("the database system is starting up")));
1496                         break;
1497                 case CAC_SHUTDOWN:
1498                         ereport(FATAL,
1499                                         (errcode(ERRCODE_CANNOT_CONNECT_NOW),
1500                                          errmsg("the database system is shutting down")));
1501                         break;
1502                 case CAC_RECOVERY:
1503                         ereport(FATAL,
1504                                         (errcode(ERRCODE_CANNOT_CONNECT_NOW),
1505                                          errmsg("the database system is in recovery mode")));
1506                         break;
1507                 case CAC_TOOMANY:
1508                         ereport(FATAL,
1509                                         (errcode(ERRCODE_TOO_MANY_CONNECTIONS),
1510                                          errmsg("sorry, too many clients already")));
1511                         break;
1512                 case CAC_OK:
1513                 default:
1514                         break;
1515         }
1516
1517         return STATUS_OK;
1518 }
1519
1520
1521 /*
1522  * The client has sent a cancel request packet, not a normal
1523  * start-a-new-connection packet.  Perform the necessary processing.
1524  * Nothing is sent back to the client.
1525  */
1526 static void
1527 processCancelRequest(Port *port, void *pkt)
1528 {
1529         CancelRequestPacket *canc = (CancelRequestPacket *) pkt;
1530         int                     backendPID;
1531         long            cancelAuthCode;
1532         Backend    *bp;
1533
1534 #ifndef EXEC_BACKEND
1535         Dlelem     *curr;
1536
1537 #else
1538         int                     i;
1539 #endif
1540
1541         backendPID = (int) ntohl(canc->backendPID);
1542         cancelAuthCode = (long) ntohl(canc->cancelAuthCode);
1543
1544         /*
1545          * See if we have a matching backend.  In the EXEC_BACKEND case, we
1546          * can no longer access the postmaster's own backend list, and must
1547          * rely on the duplicate array in shared memory.
1548          */
1549 #ifndef EXEC_BACKEND
1550         for (curr = DLGetHead(BackendList); curr; curr = DLGetSucc(curr))
1551         {
1552                 bp = (Backend *) DLE_VAL(curr);
1553 #else
1554         for (i = 0; i < NUM_BACKENDARRAY_ELEMS; i++)
1555         {
1556                 bp = (Backend *) &ShmemBackendArray[i];
1557 #endif
1558                 if (bp->pid == backendPID)
1559                 {
1560                         if (bp->cancel_key == cancelAuthCode)
1561                         {
1562                                 /* Found a match; signal that backend to cancel current op */
1563                                 ereport(DEBUG2,
1564                                                 (errmsg_internal("processing cancel request: sending SIGINT to process %d",
1565                                                                                  backendPID)));
1566                                 kill(bp->pid, SIGINT);
1567                         }
1568                         else
1569                                 /* Right PID, wrong key: no way, Jose */
1570                                 ereport(DEBUG2,
1571                                                 (errmsg_internal("bad key in cancel request for process %d",
1572                                                                                  backendPID)));
1573                         return;
1574                 }
1575         }
1576
1577         /* No matching backend */
1578         ereport(DEBUG2,
1579                         (errmsg_internal("bad pid in cancel request for process %d",
1580                                                          backendPID)));
1581 }
1582
1583 /*
1584  * canAcceptConnections --- check to see if database state allows connections.
1585  */
1586 static enum CAC_state
1587 canAcceptConnections(void)
1588 {
1589         /* Can't start backends when in startup/shutdown/recovery state. */
1590         if (Shutdown > NoShutdown)
1591                 return CAC_SHUTDOWN;
1592         if (StartupPID)
1593                 return CAC_STARTUP;
1594         if (FatalError)
1595                 return CAC_RECOVERY;
1596
1597         /*
1598          * Don't start too many children.
1599          *
1600          * We allow more connections than we can have backends here because some
1601          * might still be authenticating; they might fail auth, or some
1602          * existing backend might exit before the auth cycle is completed. The
1603          * exact MaxBackends limit is enforced when a new backend tries to
1604          * join the shared-inval backend array.
1605          */
1606         if (CountChildren() >= 2 * MaxBackends)
1607                 return CAC_TOOMANY;
1608
1609         return CAC_OK;
1610 }
1611
1612
1613 /*
1614  * ConnCreate -- create a local connection data structure
1615  */
1616 static Port *
1617 ConnCreate(int serverFd)
1618 {
1619         Port       *port;
1620
1621         if (!(port = (Port *) calloc(1, sizeof(Port))))
1622         {
1623                 ereport(LOG,
1624                                 (errcode(ERRCODE_OUT_OF_MEMORY),
1625                                  errmsg("out of memory")));
1626                 ExitPostmaster(1);
1627         }
1628
1629         if (StreamConnection(serverFd, port) != STATUS_OK)
1630         {
1631                 StreamClose(port->sock);
1632                 ConnFree(port);
1633                 port = NULL;
1634         }
1635         else
1636         {
1637                 /*
1638                  * Precompute password salt values to use for this connection.
1639                  * It's slightly annoying to do this long in advance of knowing
1640                  * whether we'll need 'em or not, but we must do the random()
1641                  * calls before we fork, not after.  Else the postmaster's random
1642                  * sequence won't get advanced, and all backends would end up
1643                  * using the same salt...
1644                  */
1645                 RandomSalt(port->cryptSalt, port->md5Salt);
1646         }
1647
1648         return port;
1649 }
1650
1651
1652 /*
1653  * ConnFree -- free a local connection data structure
1654  */
1655 static void
1656 ConnFree(Port *conn)
1657 {
1658 #ifdef USE_SSL
1659         secure_close(conn);
1660 #endif
1661         free(conn);
1662 }
1663
1664
1665 /*
1666  * ClosePostmasterPorts -- close all the postmaster's open sockets
1667  *
1668  * This is called during child process startup to release file descriptors
1669  * that are not needed by that child process.  The postmaster still has
1670  * them open, of course.
1671  *
1672  * Note: we pass am_syslogger as a boolean because we don't want to set
1673  * the global variable yet when this is called.
1674  */
1675 void
1676 ClosePostmasterPorts(bool am_syslogger)
1677 {
1678         int                     i;
1679
1680         /* Close the listen sockets */
1681         for (i = 0; i < MAXLISTEN; i++)
1682         {
1683                 if (ListenSocket[i] != -1)
1684                 {
1685                         StreamClose(ListenSocket[i]);
1686                         ListenSocket[i] = -1;
1687                 }
1688         }
1689
1690         /* If using syslogger, close the read side of the pipe */
1691         if (!am_syslogger)
1692         {
1693 #ifndef WIN32
1694                 if (syslogPipe[0] >= 0)
1695                         close(syslogPipe[0]);
1696                 syslogPipe[0] = -1;
1697 #else
1698                 if (syslogPipe[0])
1699                         CloseHandle(syslogPipe[0]);
1700                 syslogPipe[0] = 0;
1701 #endif
1702         }
1703 }
1704
1705
1706 /*
1707  * reset_shared -- reset shared memory and semaphores
1708  */
1709 static void
1710 reset_shared(unsigned short port)
1711 {
1712         /*
1713          * Create or re-create shared memory and semaphores.
1714          *
1715          * Note: in each "cycle of life" we will normally assign the same IPC
1716          * keys (if using SysV shmem and/or semas), since the port number is
1717          * used to determine IPC keys.  This helps ensure that we will clean
1718          * up dead IPC objects if the postmaster crashes and is restarted.
1719          */
1720         CreateSharedMemoryAndSemaphores(false, MaxBackends, port);
1721 }
1722
1723
1724 /*
1725  * SIGHUP -- reread config files, and tell children to do same
1726  */
1727 static void
1728 SIGHUP_handler(SIGNAL_ARGS)
1729 {
1730         int                     save_errno = errno;
1731
1732         PG_SETMASK(&BlockSig);
1733
1734         if (Shutdown <= SmartShutdown)
1735         {
1736                 ereport(LOG,
1737                          (errmsg("received SIGHUP, reloading configuration files")));
1738                 ProcessConfigFile(PGC_SIGHUP);
1739                 SignalChildren(SIGHUP);
1740                 if (BgWriterPID != 0)
1741                         kill(BgWriterPID, SIGHUP);
1742                 if (PgArchPID != 0)
1743                         kill(PgArchPID, SIGHUP);
1744                 if (SysLoggerPID != 0)
1745                         kill(SysLoggerPID, SIGHUP);
1746                 /* PgStatPID does not currently need SIGHUP */
1747                 load_hba();
1748                 load_ident();
1749
1750 #ifdef EXEC_BACKEND
1751                 /* Update the starting-point file for future children */
1752                 write_nondefault_variables(PGC_SIGHUP);
1753 #endif
1754         }
1755
1756         PG_SETMASK(&UnBlockSig);
1757
1758         errno = save_errno;
1759 }
1760
1761
1762 /*
1763  * pmdie -- signal handler for processing various postmaster signals.
1764  */
1765 static void
1766 pmdie(SIGNAL_ARGS)
1767 {
1768         int                     save_errno = errno;
1769
1770         PG_SETMASK(&BlockSig);
1771
1772         ereport(DEBUG2,
1773                         (errmsg_internal("postmaster received signal %d",
1774                                                          postgres_signal_arg)));
1775
1776         switch (postgres_signal_arg)
1777         {
1778                 case SIGTERM:
1779
1780                         /*
1781                          * Smart Shutdown:
1782                          *
1783                          * Wait for children to end their work, then shut down.
1784                          */
1785                         if (Shutdown >= SmartShutdown)
1786                                 break;
1787                         Shutdown = SmartShutdown;
1788                         ereport(LOG,
1789                                         (errmsg("received smart shutdown request")));
1790
1791                         if (DLGetHead(BackendList))
1792                                 break;                  /* let reaper() handle this */
1793
1794                         /*
1795                          * No children left. Begin shutdown of data base system.
1796                          */
1797                         if (StartupPID != 0 || FatalError)
1798                                 break;                  /* let reaper() handle this */
1799                         /* Start the bgwriter if not running */
1800                         if (BgWriterPID == 0)
1801                                 BgWriterPID = StartBackgroundWriter();
1802                         /* And tell it to shut down */
1803                         if (BgWriterPID != 0)
1804                                 kill(BgWriterPID, SIGUSR2);
1805                         /* Tell pgarch to shut down too; nothing left for it to do */
1806                         if (PgArchPID != 0)
1807                                 kill(PgArchPID, SIGQUIT);
1808                         /* Tell pgstat to shut down too; nothing left for it to do */
1809                         if (PgStatPID != 0)
1810                                 kill(PgStatPID, SIGQUIT);
1811                         break;
1812
1813                 case SIGINT:
1814
1815                         /*
1816                          * Fast Shutdown:
1817                          *
1818                          * Abort all children with SIGTERM (rollback active transactions
1819                          * and exit) and shut down when they are gone.
1820                          */
1821                         if (Shutdown >= FastShutdown)
1822                                 break;
1823                         Shutdown = FastShutdown;
1824                         ereport(LOG,
1825                                         (errmsg("received fast shutdown request")));
1826
1827                         if (DLGetHead(BackendList))
1828                         {
1829                                 if (!FatalError)
1830                                 {
1831                                         ereport(LOG,
1832                                                         (errmsg("aborting any active transactions")));
1833                                         SignalChildren(SIGTERM);
1834                                         /* reaper() does the rest */
1835                                 }
1836                                 break;
1837                         }
1838
1839                         /*
1840                          * No children left. Begin shutdown of data base system.
1841                          *
1842                          * Note: if we previously got SIGTERM then we may send SIGUSR2 to
1843                          * the bgwriter a second time here.  This should be harmless.
1844                          */
1845                         if (StartupPID != 0 || FatalError)
1846                                 break;                  /* let reaper() handle this */
1847                         /* Start the bgwriter if not running */
1848                         if (BgWriterPID == 0)
1849                                 BgWriterPID = StartBackgroundWriter();
1850                         /* And tell it to shut down */
1851                         if (BgWriterPID != 0)
1852                                 kill(BgWriterPID, SIGUSR2);
1853                         /* Tell pgarch to shut down too; nothing left for it to do */
1854                         if (PgArchPID != 0)
1855                                 kill(PgArchPID, SIGQUIT);
1856                         /* Tell pgstat to shut down too; nothing left for it to do */
1857                         if (PgStatPID != 0)
1858                                 kill(PgStatPID, SIGQUIT);
1859                         break;
1860
1861                 case SIGQUIT:
1862
1863                         /*
1864                          * Immediate Shutdown:
1865                          *
1866                          * abort all children with SIGQUIT and exit without attempt to
1867                          * properly shut down data base system.
1868                          */
1869                         ereport(LOG,
1870                                         (errmsg("received immediate shutdown request")));
1871                         if (StartupPID != 0)
1872                                 kill(StartupPID, SIGQUIT);
1873                         if (BgWriterPID != 0)
1874                                 kill(BgWriterPID, SIGQUIT);
1875                         if (PgArchPID != 0)
1876                                 kill(PgArchPID, SIGQUIT);
1877                         if (PgStatPID != 0)
1878                                 kill(PgStatPID, SIGQUIT);
1879                         if (DLGetHead(BackendList))
1880                                 SignalChildren(SIGQUIT);
1881                         ExitPostmaster(0);
1882                         break;
1883         }
1884
1885         PG_SETMASK(&UnBlockSig);
1886
1887         errno = save_errno;
1888 }
1889
1890 /*
1891  * Reaper -- signal handler to cleanup after a backend (child) dies.
1892  */
1893 static void
1894 reaper(SIGNAL_ARGS)
1895 {
1896         int                     save_errno = errno;
1897
1898 #ifdef HAVE_WAITPID
1899         int                     status;                 /* backend exit status */
1900
1901 #else
1902 #ifndef WIN32
1903         union wait      status;                 /* backend exit status */
1904 #endif
1905 #endif
1906         int                     exitstatus;
1907         int                     pid;                    /* process id of dead backend */
1908
1909         PG_SETMASK(&BlockSig);
1910
1911         ereport(DEBUG4,
1912                         (errmsg_internal("reaping dead processes")));
1913 #ifdef HAVE_WAITPID
1914         while ((pid = waitpid(-1, &status, WNOHANG)) > 0)
1915         {
1916                 exitstatus = status;
1917 #else
1918 #ifndef WIN32
1919         while ((pid = wait3(&status, WNOHANG, NULL)) > 0)
1920         {
1921                 exitstatus = status.w_status;
1922 #else
1923         while ((pid = win32_waitpid(&exitstatus)) > 0)
1924         {
1925                 /*
1926                  * We need to do this here, and not in CleanupBackend, since this
1927                  * is to be called on all children when we are done with them.
1928                  * Could move to LogChildExit, but that seems like asking for
1929                  * future trouble...
1930                  */
1931                 win32_RemoveChild(pid);
1932 #endif   /* WIN32 */
1933 #endif   /* HAVE_WAITPID */
1934
1935                 /*
1936                  * Check if this child was a startup process.
1937                  */
1938                 if (StartupPID != 0 && pid == StartupPID)
1939                 {
1940                         StartupPID = 0;
1941                         if (exitstatus != 0)
1942                         {
1943                                 LogChildExit(LOG, gettext("startup process"),
1944                                                          pid, exitstatus);
1945                                 ereport(LOG,
1946                                                 (errmsg("aborting startup due to startup process failure")));
1947                                 ExitPostmaster(1);
1948                         }
1949
1950                         /*
1951                          * Startup succeeded - we are done with system startup or
1952                          * recovery.
1953                          */
1954                         FatalError = false;
1955
1956                         /*
1957                          * Crank up the background writer.      It doesn't matter if this
1958                          * fails, we'll just try again later.
1959                          */
1960                         Assert(BgWriterPID == 0);
1961                         BgWriterPID = StartBackgroundWriter();
1962
1963                         /*
1964                          * Go to shutdown mode if a shutdown request was pending.
1965                          * Otherwise, try to start the archiver and stats collector
1966                          * too.
1967                          */
1968                         if (Shutdown > NoShutdown && BgWriterPID != 0)
1969                                 kill(BgWriterPID, SIGUSR2);
1970                         else if (Shutdown == NoShutdown)
1971                         {
1972                                 if (XLogArchivingActive() && PgArchPID == 0)
1973                                         PgArchPID = pgarch_start();
1974                                 if (PgStatPID == 0)
1975                                         PgStatPID = pgstat_start();
1976                         }
1977
1978                         continue;
1979                 }
1980
1981                 /*
1982                  * Was it the bgwriter?
1983                  */
1984                 if (BgWriterPID != 0 && pid == BgWriterPID)
1985                 {
1986                         BgWriterPID = 0;
1987                         if (exitstatus == 0 && Shutdown > NoShutdown &&
1988                                 !FatalError && !DLGetHead(BackendList))
1989                         {
1990                                 /*
1991                                  * Normal postmaster exit is here: we've seen normal exit
1992                                  * of the bgwriter after it's been told to shut down. We
1993                                  * expect that it wrote a shutdown checkpoint.  (If for
1994                                  * some reason it didn't, recovery will occur on next
1995                                  * postmaster start.)
1996                                  *
1997                                  * Note: we do not wait around for exit of the archiver or
1998                                  * stats processes.  They've been sent SIGQUIT by this
1999                                  * point, and in any case contain logic to commit
2000                                  * hara-kiri if they notice the postmaster is gone.
2001                                  */
2002                                 ExitPostmaster(0);
2003                         }
2004
2005                         /*
2006                          * Any unexpected exit of the bgwriter is treated as a crash.
2007                          */
2008                         HandleChildCrash(pid, exitstatus,
2009                                                          gettext("background writer process"));
2010                         continue;
2011                 }
2012
2013                 /*
2014                  * Was it the archiver?  If so, just try to start a new one; no
2015                  * need to force reset of the rest of the system.  (If fail, we'll
2016                  * try again in future cycles of the main loop.)
2017                  */
2018                 if (PgArchPID != 0 && pid == PgArchPID)
2019                 {
2020                         PgArchPID = 0;
2021                         if (exitstatus != 0)
2022                                 LogChildExit(LOG, gettext("archiver process"),
2023                                                          pid, exitstatus);
2024                         if (XLogArchivingActive() &&
2025                                 StartupPID == 0 && !FatalError && Shutdown == NoShutdown)
2026                                 PgArchPID = pgarch_start();
2027                         continue;
2028                 }
2029
2030                 /*
2031                  * Was it the statistics collector?  If so, just try to start a
2032                  * new one; no need to force reset of the rest of the system.  (If
2033                  * fail, we'll try again in future cycles of the main loop.)
2034                  */
2035                 if (PgStatPID != 0 && pid == PgStatPID)
2036                 {
2037                         PgStatPID = 0;
2038                         if (exitstatus != 0)
2039                                 LogChildExit(LOG, gettext("statistics collector process"),
2040                                                          pid, exitstatus);
2041                         if (StartupPID == 0 && !FatalError && Shutdown == NoShutdown)
2042                                 PgStatPID = pgstat_start();
2043                         continue;
2044                 }
2045
2046                 /* Was it the system logger? try to start a new one */
2047                 if (SysLoggerPID != 0 && pid == SysLoggerPID)
2048                 {
2049                         SysLoggerPID = 0;
2050                         /* for safety's sake, launch new logger *first* */
2051                         SysLoggerPID = SysLogger_Start();
2052                         if (exitstatus != 0)
2053                                 LogChildExit(LOG, gettext("system logger process"),
2054                                                          pid, exitstatus);
2055                         continue;
2056                 }
2057
2058                 /*
2059                  * Else do standard backend child cleanup.
2060                  */
2061                 CleanupBackend(pid, exitstatus);
2062         }                                                       /* loop over pending child-death reports */
2063
2064         if (FatalError)
2065         {
2066                 /*
2067                  * Wait for all important children to exit, then reset shmem and
2068                  * StartupDataBase.  (We can ignore the archiver and stats
2069                  * processes here since they are not connected to shmem.)
2070                  */
2071                 if (DLGetHead(BackendList) || StartupPID != 0 || BgWriterPID != 0)
2072                         goto reaper_done;
2073                 ereport(LOG,
2074                         (errmsg("all server processes terminated; reinitializing")));
2075
2076                 shmem_exit(0);
2077                 reset_shared(PostPortNumber);
2078
2079                 StartupPID = StartupDataBase();
2080
2081                 goto reaper_done;
2082         }
2083
2084         if (Shutdown > NoShutdown)
2085         {
2086                 if (DLGetHead(BackendList) || StartupPID != 0)
2087                         goto reaper_done;
2088                 /* Start the bgwriter if not running */
2089                 if (BgWriterPID == 0)
2090                         BgWriterPID = StartBackgroundWriter();
2091                 /* And tell it to shut down */
2092                 if (BgWriterPID != 0)
2093                         kill(BgWriterPID, SIGUSR2);
2094                 /* Tell pgarch to shut down too; nothing left for it to do */
2095                 if (PgArchPID != 0)
2096                         kill(PgArchPID, SIGQUIT);
2097                 /* Tell pgstat to shut down too; nothing left for it to do */
2098                 if (PgStatPID != 0)
2099                         kill(PgStatPID, SIGQUIT);
2100         }
2101
2102 reaper_done:
2103         PG_SETMASK(&UnBlockSig);
2104
2105         errno = save_errno;
2106 }
2107
2108
2109 /*
2110  * CleanupBackend -- cleanup after terminated backend.
2111  *
2112  * Remove all local state associated with backend.
2113  */
2114 static void
2115 CleanupBackend(int pid,
2116                            int exitstatus)      /* child's exit status. */
2117 {
2118         Dlelem     *curr;
2119
2120         LogChildExit(DEBUG2, gettext("server process"), pid, exitstatus);
2121
2122         /*
2123          * If a backend dies in an ugly way (i.e. exit status not 0) then we
2124          * must signal all other backends to quickdie.  If exit status is zero
2125          * we assume everything is hunky dory and simply remove the backend
2126          * from the active backend list.
2127          */
2128         if (exitstatus != 0)
2129         {
2130                 HandleChildCrash(pid, exitstatus, gettext("server process"));
2131                 return;
2132         }
2133
2134         for (curr = DLGetHead(BackendList); curr; curr = DLGetSucc(curr))
2135         {
2136                 Backend    *bp = (Backend *) DLE_VAL(curr);
2137
2138                 if (bp->pid == pid)
2139                 {
2140                         DLRemove(curr);
2141                         free(bp);
2142                         DLFreeElem(curr);
2143 #ifdef EXEC_BACKEND
2144                         ShmemBackendArrayRemove(pid);
2145 #endif
2146                         /* Tell the collector about backend termination */
2147                         pgstat_beterm(pid);
2148                         break;
2149                 }
2150         }
2151 }
2152
2153 /*
2154  * HandleChildCrash -- cleanup after failed backend or bgwriter.
2155  *
2156  * The objectives here are to clean up our local state about the child
2157  * process, and to signal all other remaining children to quickdie.
2158  */
2159 static void
2160 HandleChildCrash(int pid, int exitstatus, const char *procname)
2161 {
2162         Dlelem     *curr,
2163                            *next;
2164         Backend    *bp;
2165
2166         /*
2167          * Make log entry unless there was a previous crash (if so, nonzero
2168          * exit status is to be expected in SIGQUIT response; don't clutter
2169          * log)
2170          */
2171         if (!FatalError)
2172         {
2173                 LogChildExit(LOG, procname, pid, exitstatus);
2174                 ereport(LOG,
2175                           (errmsg("terminating any other active server processes")));
2176         }
2177
2178         /* Process regular backends */
2179         for (curr = DLGetHead(BackendList); curr; curr = next)
2180         {
2181                 next = DLGetSucc(curr);
2182                 bp = (Backend *) DLE_VAL(curr);
2183                 if (bp->pid == pid)
2184                 {
2185                         /*
2186                          * Found entry for freshly-dead backend, so remove it.
2187                          */
2188                         DLRemove(curr);
2189                         free(bp);
2190                         DLFreeElem(curr);
2191 #ifdef EXEC_BACKEND
2192                         ShmemBackendArrayRemove(pid);
2193 #endif
2194                         /* Tell the collector about backend termination */
2195                         pgstat_beterm(pid);
2196                         /* Keep looping so we can signal remaining backends */
2197                 }
2198                 else
2199                 {
2200                         /*
2201                          * This backend is still alive.  Unless we did so already,
2202                          * tell it to commit hara-kiri.
2203                          *
2204                          * SIGQUIT is the special signal that says exit without proc_exit
2205                          * and let the user know what's going on. But if SendStop is
2206                          * set (-s on command line), then we send SIGSTOP instead, so
2207                          * that we can get core dumps from all backends by hand.
2208                          */
2209                         if (!FatalError)
2210                         {
2211                                 ereport(DEBUG2,
2212                                                 (errmsg_internal("sending %s to process %d",
2213                                                                           (SendStop ? "SIGSTOP" : "SIGQUIT"),
2214                                                                                  (int) bp->pid)));
2215                                 kill(bp->pid, (SendStop ? SIGSTOP : SIGQUIT));
2216                         }
2217                 }
2218         }
2219
2220         /* Take care of the bgwriter too */
2221         if (pid == BgWriterPID)
2222                 BgWriterPID = 0;
2223         else if (BgWriterPID != 0 && !FatalError)
2224         {
2225                 ereport(DEBUG2,
2226                                 (errmsg_internal("sending %s to process %d",
2227                                                                  (SendStop ? "SIGSTOP" : "SIGQUIT"),
2228                                                                  (int) BgWriterPID)));
2229                 kill(BgWriterPID, (SendStop ? SIGSTOP : SIGQUIT));
2230         }
2231
2232         /* Force a power-cycle of the pgarch process too */
2233         /* (Shouldn't be necessary, but just for luck) */
2234         if (PgArchPID != 0 && !FatalError)
2235         {
2236                 ereport(DEBUG2,
2237                                 (errmsg_internal("sending %s to process %d",
2238                                                                  "SIGQUIT",
2239                                                                  (int) PgArchPID)));
2240                 kill(PgArchPID, SIGQUIT);
2241         }
2242
2243         /* Force a power-cycle of the pgstat processes too */
2244         /* (Shouldn't be necessary, but just for luck) */
2245         if (PgStatPID != 0 && !FatalError)
2246         {
2247                 ereport(DEBUG2,
2248                                 (errmsg_internal("sending %s to process %d",
2249                                                                  "SIGQUIT",
2250                                                                  (int) PgStatPID)));
2251                 kill(PgStatPID, SIGQUIT);
2252         }
2253
2254         /* We do NOT restart the syslogger */
2255
2256         FatalError = true;
2257 }
2258
2259 /*
2260  * Log the death of a child process.
2261  */
2262 static void
2263 LogChildExit(int lev, const char *procname, int pid, int exitstatus)
2264 {
2265         if (WIFEXITED(exitstatus))
2266                 ereport(lev,
2267
2268                 /*
2269                  * translator: %s is a noun phrase describing a child process,
2270                  * such as "server process"
2271                  */
2272                                 (errmsg("%s (PID %d) exited with exit code %d",
2273                                                 procname, pid, WEXITSTATUS(exitstatus))));
2274         else if (WIFSIGNALED(exitstatus))
2275                 ereport(lev,
2276
2277                 /*
2278                  * translator: %s is a noun phrase describing a child process,
2279                  * such as "server process"
2280                  */
2281                                 (errmsg("%s (PID %d) was terminated by signal %d",
2282                                                 procname, pid, WTERMSIG(exitstatus))));
2283         else
2284                 ereport(lev,
2285
2286                 /*
2287                  * translator: %s is a noun phrase describing a child process,
2288                  * such as "server process"
2289                  */
2290                                 (errmsg("%s (PID %d) exited with unexpected status %d",
2291                                                 procname, pid, exitstatus)));
2292 }
2293
2294 /*
2295  * Send a signal to all backend children (but NOT special children)
2296  */
2297 static void
2298 SignalChildren(int signal)
2299 {
2300         Dlelem     *curr;
2301
2302         for (curr = DLGetHead(BackendList); curr; curr = DLGetSucc(curr))
2303         {
2304                 Backend    *bp = (Backend *) DLE_VAL(curr);
2305
2306                 ereport(DEBUG4,
2307                                 (errmsg_internal("sending signal %d to process %d",
2308                                                                  signal, (int) bp->pid)));
2309                 kill(bp->pid, signal);
2310         }
2311 }
2312
2313 /*
2314  * BackendStartup -- start backend process
2315  *
2316  * returns: STATUS_ERROR if the fork failed, STATUS_OK otherwise.
2317  */
2318 static int
2319 BackendStartup(Port *port)
2320 {
2321         Backend    *bn;                         /* for backend cleanup */
2322         pid_t           pid;
2323
2324 #ifdef LINUX_PROFILE
2325         struct itimerval prof_itimer;
2326 #endif
2327
2328         /*
2329          * Compute the cancel key that will be assigned to this backend. The
2330          * backend will have its own copy in the forked-off process' value of
2331          * MyCancelKey, so that it can transmit the key to the frontend.
2332          */
2333         MyCancelKey = PostmasterRandom();
2334
2335         /*
2336          * Make room for backend data structure.  Better before the fork() so
2337          * we can handle failure cleanly.
2338          */
2339         bn = (Backend *) malloc(sizeof(Backend));
2340         if (!bn)
2341         {
2342                 ereport(LOG,
2343                                 (errcode(ERRCODE_OUT_OF_MEMORY),
2344                                  errmsg("out of memory")));
2345                 return STATUS_ERROR;
2346         }
2347
2348         /* Pass down canAcceptConnections state (kluge for EXEC_BACKEND case) */
2349         port->canAcceptConnections = canAcceptConnections();
2350
2351         /*
2352          * Flush stdio channels just before fork, to avoid double-output
2353          * problems. Ideally we'd use fflush(NULL) here, but there are still a
2354          * few non-ANSI stdio libraries out there (like SunOS 4.1.x) that
2355          * coredump if we do. Presently stdout and stderr are the only stdio
2356          * output channels used by the postmaster, so fflush'ing them should
2357          * be sufficient.
2358          */
2359         fflush(stdout);
2360         fflush(stderr);
2361
2362 #ifdef EXEC_BACKEND
2363
2364         pid = backend_forkexec(port);
2365
2366 #else                                                   /* !EXEC_BACKEND */
2367
2368 #ifdef LINUX_PROFILE
2369
2370         /*
2371          * Linux's fork() resets the profiling timer in the child process. If
2372          * we want to profile child processes then we need to save and restore
2373          * the timer setting.  This is a waste of time if not profiling,
2374          * however, so only do it if commanded by specific -DLINUX_PROFILE
2375          * switch.
2376          */
2377         getitimer(ITIMER_PROF, &prof_itimer);
2378 #endif
2379
2380 #ifdef __BEOS__
2381         /* Specific beos actions before backend startup */
2382         beos_before_backend_startup();
2383 #endif
2384
2385         pid = fork();
2386
2387         if (pid == 0)                           /* child */
2388         {
2389 #ifdef LINUX_PROFILE
2390                 setitimer(ITIMER_PROF, &prof_itimer, NULL);
2391 #endif
2392
2393 #ifdef __BEOS__
2394                 /* Specific beos backend startup actions */
2395                 beos_backend_startup();
2396 #endif
2397                 free(bn);
2398
2399                 proc_exit(BackendRun(port));
2400         }
2401 #endif   /* EXEC_BACKEND */
2402
2403         if (pid < 0)
2404         {
2405                 /* in parent, fork failed */
2406                 int                     save_errno = errno;
2407
2408 #ifdef __BEOS__
2409                 /* Specific beos backend startup actions */
2410                 beos_backend_startup_failed();
2411 #endif
2412                 free(bn);
2413                 errno = save_errno;
2414                 ereport(LOG,
2415                           (errmsg("could not fork new process for connection: %m")));
2416                 report_fork_failure_to_client(port, save_errno);
2417                 return STATUS_ERROR;
2418         }
2419
2420         /* in parent, successful fork */
2421         ereport(DEBUG2,
2422                         (errmsg_internal("forked new backend, pid=%d socket=%d",
2423                                                          (int) pid, port->sock)));
2424
2425         /*
2426          * Everything's been successful, it's safe to add this backend to our
2427          * list of backends.
2428          */
2429         bn->pid = pid;
2430         bn->cancel_key = MyCancelKey;
2431         DLAddHead(BackendList, DLNewElem(bn));
2432 #ifdef EXEC_BACKEND
2433         ShmemBackendArrayAdd(bn);
2434 #endif
2435
2436         return STATUS_OK;
2437 }
2438
2439 /*
2440  * Try to report backend fork() failure to client before we close the
2441  * connection.  Since we do not care to risk blocking the postmaster on
2442  * this connection, we set the connection to non-blocking and try only once.
2443  *
2444  * This is grungy special-purpose code; we cannot use backend libpq since
2445  * it's not up and running.
2446  */
2447 static void
2448 report_fork_failure_to_client(Port *port, int errnum)
2449 {
2450         char            buffer[1000];
2451
2452         /* Format the error message packet (always V2 protocol) */
2453         snprintf(buffer, sizeof(buffer), "E%s%s\n",
2454                          gettext("could not fork new process for connection: "),
2455                          strerror(errnum));
2456
2457         /* Set port to non-blocking.  Don't do send() if this fails */
2458         if (!set_noblock(port->sock))
2459                 return;
2460
2461         send(port->sock, buffer, strlen(buffer) + 1, 0);
2462 }
2463
2464
2465 /*
2466  * split_opts -- split a string of options and append it to an argv array
2467  *
2468  * NB: the string is destructively modified!
2469  *
2470  * Since no current POSTGRES arguments require any quoting characters,
2471  * we can use the simple-minded tactic of assuming each set of space-
2472  * delimited characters is a separate argv element.
2473  *
2474  * If you don't like that, well, we *used* to pass the whole option string
2475  * as ONE argument to execl(), which was even less intelligent...
2476  */
2477 static void
2478 split_opts(char **argv, int *argcp, char *s)
2479 {
2480         while (s && *s)
2481         {
2482                 while (isspace((unsigned char) *s))
2483                         ++s;
2484                 if (*s == '\0')
2485                         break;
2486                 argv[(*argcp)++] = s;
2487                 while (*s && !isspace((unsigned char) *s))
2488                         ++s;
2489                 if (*s)
2490                         *s++ = '\0';
2491         }
2492 }
2493
2494
2495 /*
2496  * BackendRun -- perform authentication, and if successful,
2497  *                              set up the backend's argument list and invoke PostgresMain()
2498  *
2499  * returns:
2500  *              Shouldn't return at all.
2501  *              If PostgresMain() fails, return status.
2502  */
2503 static int
2504 BackendRun(Port *port)
2505 {
2506         int                     status;
2507         char            remote_host[NI_MAXHOST];
2508         char            remote_port[NI_MAXSERV];
2509         char            remote_ps_data[NI_MAXHOST];
2510         char      **av;
2511         int                     maxac;
2512         int                     ac;
2513         char            debugbuf[32];
2514         char            protobuf[32];
2515         int                     i;
2516
2517         IsUnderPostmaster = true;       /* we are a postmaster subprocess now */
2518
2519         /*
2520          * Let's clean up ourselves as the postmaster child, and close the
2521          * postmaster's listen sockets
2522          */
2523         ClosePostmasterPorts(false);
2524
2525         /* We don't want the postmaster's proc_exit() handlers */
2526         on_exit_reset();
2527
2528         /*
2529          * Signal handlers setting is moved to tcop/postgres...
2530          */
2531
2532         /* Save port etc. for ps status */
2533         MyProcPort = port;
2534
2535         /* Reset MyProcPid to new backend's pid */
2536         MyProcPid = getpid();
2537
2538         /*
2539          * PreAuthDelay is a debugging aid for investigating problems in the
2540          * authentication cycle: it can be set in postgresql.conf to allow
2541          * time to attach to the newly-forked backend with a debugger. (See
2542          * also the -W backend switch, which we allow clients to pass through
2543          * PGOPTIONS, but it is not honored until after authentication.)
2544          */
2545         if (PreAuthDelay > 0)
2546                 pg_usleep(PreAuthDelay * 1000000L);
2547
2548         ClientAuthInProgress = true;    /* limit visibility of log messages */
2549
2550         /* save start time for end of session reporting */
2551         gettimeofday(&(port->session_start), NULL);
2552
2553         /* set these to empty in case they are needed before we set them up */
2554         port->remote_host = "";
2555         port->remote_port = "";
2556         port->commandTag = "";
2557
2558         /*
2559          * Initialize libpq and enable reporting of ereport errors to the
2560          * client. Must do this now because authentication uses libpq to send
2561          * messages.
2562          */
2563         pq_init();                                      /* initialize libpq to talk to client */
2564         whereToSendOutput = Remote; /* now safe to ereport to client */
2565
2566         /*
2567          * We arrange for a simple exit(0) if we receive SIGTERM or SIGQUIT
2568          * during any client authentication related communication. Otherwise
2569          * the postmaster cannot shutdown the database FAST or IMMED cleanly
2570          * if a buggy client blocks a backend during authentication.
2571          */
2572         pqsignal(SIGTERM, authdie);
2573         pqsignal(SIGQUIT, authdie);
2574         pqsignal(SIGALRM, authdie);
2575         PG_SETMASK(&AuthBlockSig);
2576
2577         /*
2578          * Get the remote host name and port for logging and status display.
2579          */
2580         remote_host[0] = '\0';
2581         remote_port[0] = '\0';
2582         if (getnameinfo_all(&port->raddr.addr, port->raddr.salen,
2583                                                 remote_host, sizeof(remote_host),
2584                                                 remote_port, sizeof(remote_port),
2585                                    (log_hostname ? 0 : NI_NUMERICHOST) | NI_NUMERICSERV))
2586         {
2587                 int                     ret = getnameinfo_all(&port->raddr.addr, port->raddr.salen,
2588                                                                                 remote_host, sizeof(remote_host),
2589                                                                                 remote_port, sizeof(remote_port),
2590                                                                                 NI_NUMERICHOST | NI_NUMERICSERV);
2591
2592                 if (ret)
2593                         ereport(WARNING,
2594                                         (errmsg("getnameinfo_all() failed: %s",
2595                                                         gai_strerror(ret))));
2596         }
2597         snprintf(remote_ps_data, sizeof(remote_ps_data),
2598                          remote_port[0] == '\0' ? "%s" : "%s(%s)",
2599                          remote_host, remote_port);
2600
2601         if (Log_connections)
2602                 ereport(LOG,
2603                                 (errmsg("connection received: host=%s port=%s",
2604                                                 remote_host, remote_port)));
2605
2606         /*
2607          * save remote_host and remote_port in port stucture
2608          */
2609         port->remote_host = strdup(remote_host);
2610         port->remote_port = strdup(remote_port);
2611
2612         /*
2613          * In EXEC_BACKEND case, we didn't inherit the contents of pg_hba.c
2614          * etcetera from the postmaster, and have to load them ourselves.
2615          * Build the PostmasterContext (which didn't exist before, in this
2616          * process) to contain the data.
2617          *
2618          * FIXME: [fork/exec] Ugh.      Is there a way around this overhead?
2619          */
2620 #ifdef EXEC_BACKEND
2621         Assert(PostmasterContext == NULL);
2622         PostmasterContext = AllocSetContextCreate(TopMemoryContext,
2623                                                                                           "Postmaster",
2624                                                                                           ALLOCSET_DEFAULT_MINSIZE,
2625                                                                                           ALLOCSET_DEFAULT_INITSIZE,
2626                                                                                           ALLOCSET_DEFAULT_MAXSIZE);
2627         MemoryContextSwitchTo(PostmasterContext);
2628
2629         load_hba();
2630         load_ident();
2631         load_user();
2632         load_group();
2633 #endif
2634
2635         /*
2636          * Ready to begin client interaction.  We will give up and exit(0)
2637          * after a time delay, so that a broken client can't hog a connection
2638          * indefinitely.  PreAuthDelay doesn't count against the time limit.
2639          */
2640         if (!enable_sig_alarm(AuthenticationTimeout * 1000, false))
2641                 elog(FATAL, "could not set timer for authorization timeout");
2642
2643         /*
2644          * Receive the startup packet (which might turn out to be a cancel
2645          * request packet).
2646          */
2647         status = ProcessStartupPacket(port, false);
2648
2649         if (status != STATUS_OK)
2650                 proc_exit(0);
2651
2652         /*
2653          * Now that we have the user and database name, we can set the process
2654          * title for ps.  It's good to do this as early as possible in
2655          * startup.
2656          */
2657         init_ps_display(port->user_name, port->database_name, remote_ps_data);
2658         set_ps_display("authentication");
2659
2660         /*
2661          * Now perform authentication exchange.
2662          */
2663         ClientAuthentication(port); /* might not return, if failure */
2664
2665         /*
2666          * Done with authentication.  Disable timeout, and prevent
2667          * SIGTERM/SIGQUIT again until backend startup is complete.
2668          */
2669         if (!disable_sig_alarm(false))
2670                 elog(FATAL, "could not disable timer for authorization timeout");
2671         PG_SETMASK(&BlockSig);
2672
2673         if (Log_connections)
2674                 ereport(LOG,
2675                                 (errmsg("connection authorized: user=%s database=%s",
2676                                                 port->user_name, port->database_name)));
2677
2678         /*
2679          * Don't want backend to be able to see the postmaster random number
2680          * generator state.  We have to clobber the static random_seed *and*
2681          * start a new random sequence in the random() library function.
2682          */
2683         random_seed = 0;
2684         srandom((unsigned int) (MyProcPid ^ port->session_start.tv_usec));
2685
2686         /* ----------------
2687          * Now, build the argv vector that will be given to PostgresMain.
2688          *
2689          * The layout of the command line is
2690          *              postgres [secure switches] -p databasename [insecure switches]
2691          * where the switches after -p come from the client request.
2692          *
2693          * The maximum possible number of commandline arguments that could come
2694          * from ExtraOptions or port->cmdline_options is (strlen + 1) / 2; see
2695          * split_opts().
2696          * ----------------
2697          */
2698         maxac = 10;                                     /* for fixed args supplied below */
2699         maxac += (strlen(ExtraOptions) + 1) / 2;
2700         if (port->cmdline_options)
2701                 maxac += (strlen(port->cmdline_options) + 1) / 2;
2702
2703         av = (char **) MemoryContextAlloc(TopMemoryContext,
2704                                                                           maxac * sizeof(char *));
2705         ac = 0;
2706
2707         av[ac++] = "postgres";
2708
2709         /*
2710          * Pass the requested debugging level along to the backend.
2711          */
2712         if (debug_flag > 0)
2713         {
2714                 snprintf(debugbuf, sizeof(debugbuf), "-d%d", debug_flag);
2715                 av[ac++] = debugbuf;
2716         }
2717
2718         /*
2719          * Pass any backend switches specified with -o in the postmaster's own
2720          * command line.  We assume these are secure.  (It's OK to mangle
2721          * ExtraOptions now, since we're safely inside a subprocess.)
2722          */
2723         split_opts(av, &ac, ExtraOptions);
2724
2725         /* Tell the backend what protocol the frontend is using. */
2726         snprintf(protobuf, sizeof(protobuf), "-v%u", port->proto);
2727         av[ac++] = protobuf;
2728
2729         /*
2730          * Tell the backend it is being called from the postmaster, and which
2731          * database to use.  -p marks the end of secure switches.
2732          */
2733         av[ac++] = "-p";
2734         av[ac++] = port->database_name;
2735
2736         /*
2737          * Pass the (insecure) option switches from the connection request.
2738          * (It's OK to mangle port->cmdline_options now.)
2739          */
2740         if (port->cmdline_options)
2741                 split_opts(av, &ac, port->cmdline_options);
2742
2743         av[ac] = NULL;
2744
2745         Assert(ac < maxac);
2746
2747         /*
2748          * Release postmaster's working memory context so that backend can
2749          * recycle the space.  Note this does not trash *MyProcPort, because
2750          * ConnCreate() allocated that space with malloc() ... else we'd need
2751          * to copy the Port data here.  Also, subsidiary data such as the
2752          * username isn't lost either; see ProcessStartupPacket().
2753          */
2754         MemoryContextSwitchTo(TopMemoryContext);
2755         MemoryContextDelete(PostmasterContext);
2756         PostmasterContext = NULL;
2757
2758         /*
2759          * Debug: print arguments being passed to backend
2760          */
2761         ereport(DEBUG3,
2762                         (errmsg_internal("%s child[%d]: starting with (",
2763                                                          progname, (int)getpid())));
2764         for (i = 0; i < ac; ++i)
2765                 ereport(DEBUG3,
2766                                 (errmsg_internal("\t%s", av[i])));
2767         ereport(DEBUG3,
2768                         (errmsg_internal(")")));
2769
2770         ClientAuthInProgress = false;           /* client_min_messages is active
2771                                                                                  * now */
2772
2773         return (PostgresMain(ac, av, port->user_name));
2774 }
2775
2776
2777 #ifdef EXEC_BACKEND
2778
2779 /*
2780  * postmaster_forkexec -- fork and exec a postmaster subprocess
2781  *
2782  * The caller must have set up the argv array already, except for argv[2]
2783  * which will be filled with the name of the temp variable file.
2784  *
2785  * Returns the child process PID, or -1 on fork failure (a suitable error
2786  * message has been logged on failure).
2787  *
2788  * All uses of this routine will dispatch to SubPostmasterMain in the
2789  * child process.
2790  */
2791 pid_t
2792 postmaster_forkexec(int argc, char *argv[])
2793 {
2794         Port            port;
2795
2796         /* This entry point passes dummy values for the Port variables */
2797         memset(&port, 0, sizeof(port));
2798         return internal_forkexec(argc, argv, &port);
2799 }
2800
2801 /*
2802  * backend_forkexec -- fork/exec off a backend process
2803  *
2804  * returns the pid of the fork/exec'd process, or -1 on failure
2805  */
2806 static pid_t
2807 backend_forkexec(Port *port)
2808 {
2809         char       *av[4];
2810         int                     ac = 0;
2811
2812         av[ac++] = "postgres";
2813         av[ac++] = "-forkbackend";
2814         av[ac++] = NULL;                        /* filled in by internal_forkexec */
2815
2816         av[ac] = NULL;
2817         Assert(ac < lengthof(av));
2818
2819         return internal_forkexec(ac, av, port);
2820 }
2821
2822 static pid_t
2823 internal_forkexec(int argc, char *argv[], Port *port)
2824 {
2825         pid_t           pid;
2826         char            tmpfilename[MAXPGPATH];
2827
2828         if (!write_backend_variables(tmpfilename, port))
2829                 return -1;                              /* log made by write_backend_variables */
2830
2831         /* Make sure caller set up argv properly */
2832         Assert(argc >= 3);
2833         Assert(argv[argc] == NULL);
2834         Assert(strncmp(argv[1], "-fork", 5) == 0);
2835         Assert(argv[2] == NULL);
2836
2837         /* Insert temp file name after -fork argument */
2838         argv[2] = tmpfilename;
2839
2840 #ifdef WIN32
2841         pid = win32_forkexec(postgres_exec_path, argv);
2842 #else
2843         /* Fire off execv in child */
2844         if ((pid = fork()) == 0)
2845         {
2846                 if (execv(postgres_exec_path, argv) < 0)
2847                 {
2848                         ereport(LOG,
2849                                         (errmsg("could not exec backend process \"%s\": %m",
2850                                                         postgres_exec_path)));
2851                         /* We're already in the child process here, can't return */
2852                         exit(1);
2853                 }
2854         }
2855 #endif
2856
2857         return pid;                                     /* Parent returns pid, or -1 on fork
2858                                                                  * failure */
2859 }
2860
2861 /*
2862  * SubPostmasterMain -- Get the fork/exec'd process into a state equivalent
2863  *                      to what it would be if we'd simply forked on Unix, and then
2864  *                      dispatch to the appropriate place.
2865  *
2866  * The first two command line arguments are expected to be "-forkFOO"
2867  * (where FOO indicates which postmaster child we are to become), and
2868  * the name of a variables file that we can read to load data that would
2869  * have been inherited by fork() on Unix.  Remaining arguments go to the
2870  * subprocess FooMain() routine.
2871  */
2872 int
2873 SubPostmasterMain(int argc, char *argv[])
2874 {
2875         Port            port;
2876
2877         /* Do this sooner rather than later... */
2878         IsUnderPostmaster = true;       /* we are a postmaster subprocess now */
2879
2880         MyProcPid = getpid();           /* reset MyProcPid */
2881
2882         /* In EXEC_BACKEND case we will not have inherited these settings */
2883         IsPostmasterEnvironment = true;
2884         whereToSendOutput = None;
2885         pqinitmask();
2886         PG_SETMASK(&BlockSig);
2887
2888         /* Setup essential subsystems */
2889         MemoryContextInit();
2890         InitializeGUCOptions();
2891
2892         /* Check we got appropriate args */
2893         if (argc < 3)
2894                 elog(FATAL, "invalid subpostmaster invocation");
2895
2896         /* Read in file-based context */
2897         memset(&port, 0, sizeof(Port));
2898         read_backend_variables(argv[2], &port);
2899         read_nondefault_variables();
2900
2901         /* Run backend or appropriate child */
2902         if (strcmp(argv[1], "-forkbackend") == 0)
2903         {
2904                 /* BackendRun will close sockets */
2905
2906                 /* Attach process to shared segments */
2907                 CreateSharedMemoryAndSemaphores(false, MaxBackends, 0);
2908
2909 #ifdef USE_SSL
2910                 /*
2911                  *      Need to reinitialize the SSL library in the backend,
2912                  *      since the context structures contain function pointers
2913                  *      and cannot be passed through the parameter file.
2914                  */
2915                 if (EnableSSL)
2916                         secure_initialize();
2917 #endif
2918
2919                 Assert(argc == 3);              /* shouldn't be any more args */
2920                 proc_exit(BackendRun(&port));
2921         }
2922         if (strcmp(argv[1], "-forkboot") == 0)
2923         {
2924                 /* Close the postmaster's sockets */
2925                 ClosePostmasterPorts(false);
2926
2927                 /* Attach process to shared segments */
2928                 CreateSharedMemoryAndSemaphores(false, MaxBackends, 0);
2929
2930                 BootstrapMain(argc - 2, argv + 2);
2931                 proc_exit(0);
2932         }
2933         if (strcmp(argv[1], "-forkarch") == 0)
2934         {
2935                 /* Close the postmaster's sockets */
2936                 ClosePostmasterPorts(false);
2937
2938                 /* Do not want to attach to shared memory */
2939
2940                 PgArchiverMain(argc, argv);
2941                 proc_exit(0);
2942         }
2943         if (strcmp(argv[1], "-forkbuf") == 0)
2944         {
2945                 /* Close the postmaster's sockets */
2946                 ClosePostmasterPorts(false);
2947
2948                 /* Do not want to attach to shared memory */
2949
2950                 PgstatBufferMain(argc, argv);
2951                 proc_exit(0);
2952         }
2953         if (strcmp(argv[1], "-forkcol") == 0)
2954         {
2955                 /*
2956                  * Do NOT close postmaster sockets here, because we are forking
2957                  * from pgstat buffer process, which already did it.
2958                  */
2959
2960                 /* Do not want to attach to shared memory */
2961
2962                 PgstatCollectorMain(argc, argv);
2963                 proc_exit(0);
2964         }
2965         if (strcmp(argv[1], "-forklog") == 0)
2966         {
2967                 /* Close the postmaster's sockets */
2968                 ClosePostmasterPorts(true);
2969
2970                 /* Do not want to attach to shared memory */
2971
2972                 SysLoggerMain(argc, argv);
2973                 proc_exit(0);
2974         }
2975
2976         return 1;                                       /* shouldn't get here */
2977 }
2978 #endif   /* EXEC_BACKEND */
2979
2980
2981 /*
2982  * ExitPostmaster -- cleanup
2983  *
2984  * Do NOT call exit() directly --- always go through here!
2985  */
2986 static void
2987 ExitPostmaster(int status)
2988 {
2989         /* should cleanup shared memory and kill all backends */
2990
2991         /*
2992          * Not sure of the semantics here.      When the Postmaster dies, should
2993          * the backends all be killed? probably not.
2994          *
2995          * MUST         -- vadim 05-10-1999
2996          */
2997
2998         proc_exit(status);
2999 }
3000
3001 /*
3002  * sigusr1_handler - handle signal conditions from child processes
3003  */
3004 static void
3005 sigusr1_handler(SIGNAL_ARGS)
3006 {
3007         int                     save_errno = errno;
3008
3009         PG_SETMASK(&BlockSig);
3010
3011         if (CheckPostmasterSignal(PMSIGNAL_PASSWORD_CHANGE))
3012         {
3013                 /*
3014                  * Password or group file has changed.
3015                  */
3016                 load_user();
3017                 load_group();
3018         }
3019
3020         if (CheckPostmasterSignal(PMSIGNAL_WAKEN_CHILDREN))
3021         {
3022                 /*
3023                  * Send SIGUSR1 to all children (triggers
3024                  * CatchupInterruptHandler). See storage/ipc/sinval[adt].c for the
3025                  * use of this.
3026                  */
3027                 if (Shutdown <= SmartShutdown)
3028                         SignalChildren(SIGUSR1);
3029         }
3030
3031         if (PgArchPID != 0 && Shutdown == NoShutdown)
3032         {
3033                 if (CheckPostmasterSignal(PMSIGNAL_WAKEN_ARCHIVER))
3034                 {
3035                         /*
3036                          * Send SIGUSR1 to archiver process, to wake it up and begin
3037                          * archiving next transaction log file.
3038                          */
3039                         kill(PgArchPID, SIGUSR1);
3040                 }
3041         }
3042
3043         PG_SETMASK(&UnBlockSig);
3044
3045         errno = save_errno;
3046 }
3047
3048
3049 /*
3050  * Dummy signal handler
3051  *
3052  * We use this for signals that we don't actually use in the postmaster,
3053  * but we do use in backends.  If we were to SIG_IGN such signals in the
3054  * postmaster, then a newly started backend might drop a signal that arrives
3055  * before it's able to reconfigure its signal processing.  (See notes in
3056  * tcop/postgres.c.)
3057  */
3058 static void
3059 dummy_handler(SIGNAL_ARGS)
3060 {
3061 }
3062
3063
3064 /*
3065  * CharRemap: given an int in range 0..61, produce textual encoding of it
3066  * per crypt(3) conventions.
3067  */
3068 static char
3069 CharRemap(long ch)
3070 {
3071         if (ch < 0)
3072                 ch = -ch;
3073         ch = ch % 62;
3074
3075         if (ch < 26)
3076                 return 'A' + ch;
3077
3078         ch -= 26;
3079         if (ch < 26)
3080                 return 'a' + ch;
3081
3082         ch -= 26;
3083         return '0' + ch;
3084 }
3085
3086 /*
3087  * RandomSalt
3088  */
3089 static void
3090 RandomSalt(char *cryptSalt, char *md5Salt)
3091 {
3092         long            rand = PostmasterRandom();
3093
3094         cryptSalt[0] = CharRemap(rand % 62);
3095         cryptSalt[1] = CharRemap(rand / 62);
3096
3097         /*
3098          * It's okay to reuse the first random value for one of the MD5 salt
3099          * bytes, since only one of the two salts will be sent to the client.
3100          * After that we need to compute more random bits.
3101          *
3102          * We use % 255, sacrificing one possible byte value, so as to ensure
3103          * that all bits of the random() value participate in the result.
3104          * While at it, add one to avoid generating any null bytes.
3105          */
3106         md5Salt[0] = (rand % 255) + 1;
3107         rand = PostmasterRandom();
3108         md5Salt[1] = (rand % 255) + 1;
3109         rand = PostmasterRandom();
3110         md5Salt[2] = (rand % 255) + 1;
3111         rand = PostmasterRandom();
3112         md5Salt[3] = (rand % 255) + 1;
3113 }
3114
3115 /*
3116  * PostmasterRandom
3117  */
3118 static long
3119 PostmasterRandom(void)
3120 {
3121         static bool initialized = false;
3122
3123         if (!initialized)
3124         {
3125                 Assert(random_seed != 0);
3126                 srandom(random_seed);
3127                 initialized = true;
3128         }
3129
3130         return random();
3131 }
3132
3133 /*
3134  * Count up number of child processes (regular backends only)
3135  */
3136 static int
3137 CountChildren(void)
3138 {
3139         Dlelem     *curr;
3140         int                     cnt = 0;
3141
3142         for (curr = DLGetHead(BackendList); curr; curr = DLGetSucc(curr))
3143                 cnt++;
3144         return cnt;
3145 }
3146
3147
3148 /*
3149  * StartChildProcess -- start a non-backend child process for the postmaster
3150  *
3151  * xlog determines what kind of child will be started.  All child types
3152  * initially go to BootstrapMain, which will handle common setup.
3153  *
3154  * Return value of StartChildProcess is subprocess' PID, or 0 if failed
3155  * to start subprocess.
3156  */
3157 static pid_t
3158 StartChildProcess(int xlop)
3159 {
3160         pid_t           pid;
3161         char       *av[10];
3162         int                     ac = 0;
3163         char            xlbuf[32];
3164
3165 #ifdef LINUX_PROFILE
3166         struct itimerval prof_itimer;
3167 #endif
3168
3169         /*
3170          * Set up command-line arguments for subprocess
3171          */
3172         av[ac++] = "postgres";
3173
3174 #ifdef EXEC_BACKEND
3175         av[ac++] = "-forkboot";
3176         av[ac++] = NULL;                        /* filled in by postmaster_forkexec */
3177 #endif
3178
3179         snprintf(xlbuf, sizeof(xlbuf), "-x%d", xlop);
3180         av[ac++] = xlbuf;
3181
3182         av[ac++] = "-p";
3183         av[ac++] = "template1";
3184
3185         av[ac] = NULL;
3186         Assert(ac < lengthof(av));
3187
3188         /*
3189          * Flush stdio channels (see comments in BackendStartup)
3190          */
3191         fflush(stdout);
3192         fflush(stderr);
3193
3194 #ifdef EXEC_BACKEND
3195
3196         pid = postmaster_forkexec(ac, av);
3197
3198 #else                                                   /* !EXEC_BACKEND */
3199
3200 #ifdef LINUX_PROFILE
3201         /* see comments in BackendStartup */
3202         getitimer(ITIMER_PROF, &prof_itimer);
3203 #endif
3204
3205 #ifdef __BEOS__
3206         /* Specific beos actions before backend startup */
3207         beos_before_backend_startup();
3208 #endif
3209
3210         pid = fork();
3211
3212         if (pid == 0)                           /* child */
3213         {
3214 #ifdef LINUX_PROFILE
3215                 setitimer(ITIMER_PROF, &prof_itimer, NULL);
3216 #endif
3217
3218 #ifdef __BEOS__
3219                 /* Specific beos actions after backend startup */
3220                 beos_backend_startup();
3221 #endif
3222
3223                 IsUnderPostmaster = true;               /* we are a postmaster subprocess
3224                                                                                  * now */
3225
3226                 /* Close the postmaster's sockets */
3227                 ClosePostmasterPorts(false);
3228
3229                 /* Lose the postmaster's on-exit routines and port connections */
3230                 on_exit_reset();
3231
3232                 /* Release postmaster's working memory context */
3233                 MemoryContextSwitchTo(TopMemoryContext);
3234                 MemoryContextDelete(PostmasterContext);
3235                 PostmasterContext = NULL;
3236
3237                 BootstrapMain(ac, av);
3238                 ExitPostmaster(0);
3239         }
3240 #endif   /* EXEC_BACKEND */
3241
3242         if (pid < 0)
3243         {
3244                 /* in parent, fork failed */
3245                 int                     save_errno = errno;
3246
3247 #ifdef __BEOS__
3248                 /* Specific beos actions before backend startup */
3249                 beos_backend_startup_failed();
3250 #endif
3251                 errno = save_errno;
3252                 switch (xlop)
3253                 {
3254                         case BS_XLOG_STARTUP:
3255                                 ereport(LOG,
3256                                                 (errmsg("could not fork startup process: %m")));
3257                                 break;
3258                         case BS_XLOG_BGWRITER:
3259                                 ereport(LOG,
3260                                 (errmsg("could not fork background writer process: %m")));
3261                                 break;
3262                         default:
3263                                 ereport(LOG,
3264                                                 (errmsg("could not fork process: %m")));
3265                                 break;
3266                 }
3267
3268                 /*
3269                  * fork failure is fatal during startup, but there's no need to
3270                  * choke immediately if starting other child types fails.
3271                  */
3272                 if (xlop == BS_XLOG_STARTUP)
3273                         ExitPostmaster(1);
3274                 return 0;
3275         }
3276
3277         /*
3278          * in parent, successful fork
3279          */
3280         return pid;
3281 }
3282
3283
3284 /*
3285  * Create the opts file
3286  */
3287 static bool
3288 CreateOptsFile(int argc, char *argv[], char *fullprogname)
3289 {
3290         char            filename[MAXPGPATH];
3291         FILE       *fp;
3292         int                     i;
3293
3294         snprintf(filename, sizeof(filename), "%s/postmaster.opts", DataDir);
3295
3296         if ((fp = fopen(filename, "w")) == NULL)
3297         {
3298                 elog(LOG, "could not create file \"%s\": %m", filename);
3299                 return false;
3300         }
3301
3302         fprintf(fp, "%s", fullprogname);
3303         for (i = 1; i < argc; i++)
3304                 fprintf(fp, " '%s'", argv[i]);
3305         fputs("\n", fp);
3306
3307         if (fclose(fp))
3308         {
3309                 elog(LOG, "could not write file \"%s\": %m", filename);
3310                 return false;
3311         }
3312
3313         return true;
3314 }
3315
3316
3317 #ifdef EXEC_BACKEND
3318
3319 /*
3320  * The following need to be available to the read/write_backend_variables
3321  * functions
3322  */
3323 #include "storage/spin.h"
3324
3325 extern slock_t *ShmemLock;
3326 extern slock_t *ShmemIndexLock;
3327 extern void *ShmemIndexAlloc;
3328 typedef struct LWLock LWLock;
3329 extern LWLock *LWLockArray;
3330 extern slock_t *ProcStructLock;
3331 extern int      pgStatSock;
3332
3333 #define write_var(var,fp) fwrite((void*)&(var),sizeof(var),1,fp)
3334 #define read_var(var,fp)  fread((void*)&(var),sizeof(var),1,fp)
3335 #define write_array_var(var,fp) fwrite((void*)(var),sizeof(var),1,fp)
3336 #define read_array_var(var,fp)  fread((void*)(var),sizeof(var),1,fp)
3337
3338 static bool
3339 write_backend_variables(char *filename, Port *port)
3340 {
3341         static unsigned long tmpBackendFileNum = 0;
3342         FILE       *fp;
3343         char            str_buf[MAXPGPATH];
3344
3345         /* Calculate name for temp file in caller's buffer */
3346         Assert(DataDir);
3347         snprintf(filename, MAXPGPATH, "%s/%s/%s.backend_var.%d.%lu",
3348                          DataDir, PG_TEMP_FILES_DIR, PG_TEMP_FILE_PREFIX,
3349                          MyProcPid, ++tmpBackendFileNum);
3350
3351         /* Open file */
3352         fp = AllocateFile(filename, PG_BINARY_W);
3353         if (!fp)
3354         {
3355                 /* As per OpenTemporaryFile... */
3356                 char            dirname[MAXPGPATH];
3357
3358                 snprintf(dirname, MAXPGPATH, "%s/%s", DataDir, PG_TEMP_FILES_DIR);
3359                 mkdir(dirname, S_IRWXU);
3360
3361                 fp = AllocateFile(filename, PG_BINARY_W);
3362                 if (!fp)
3363                 {
3364                         ereport(LOG,
3365                                         (errcode_for_file_access(),
3366                                          errmsg("could not create file \"%s\": %m",
3367                                                         filename)));
3368                         return false;
3369                 }
3370         }
3371
3372         /* Write vars */
3373         write_var(port->sock, fp);
3374         write_var(port->proto, fp);
3375         write_var(port->laddr, fp);
3376         write_var(port->raddr, fp);
3377         write_var(port->canAcceptConnections, fp);
3378         write_var(port->cryptSalt, fp);
3379         write_var(port->md5Salt, fp);
3380
3381         /*
3382          * XXX FIXME later: writing these strings as MAXPGPATH bytes always is
3383          * probably a waste of resources
3384          */
3385
3386         StrNCpy(str_buf, DataDir, MAXPGPATH);
3387         write_array_var(str_buf, fp);
3388
3389         write_array_var(ListenSocket, fp);
3390
3391         write_var(MyCancelKey, fp);
3392
3393         write_var(UsedShmemSegID, fp);
3394         write_var(UsedShmemSegAddr, fp);
3395
3396         write_var(ShmemLock, fp);
3397         write_var(ShmemIndexLock, fp);
3398         write_var(ShmemVariableCache, fp);
3399         write_var(ShmemIndexAlloc, fp);
3400         write_var(ShmemBackendArray, fp);
3401
3402         write_var(LWLockArray, fp);
3403         write_var(ProcStructLock, fp);
3404         write_var(pgStatSock, fp);
3405
3406         write_var(debug_flag, fp);
3407         write_var(PostmasterPid, fp);
3408 #ifdef WIN32
3409         write_var(PostmasterHandle, fp);
3410 #endif
3411
3412         write_var(syslogPipe[0], fp);
3413         write_var(syslogPipe[1], fp);
3414
3415         StrNCpy(str_buf, my_exec_path, MAXPGPATH);
3416         write_array_var(str_buf, fp);
3417
3418         write_array_var(ExtraOptions, fp);
3419
3420         StrNCpy(str_buf, setlocale(LC_COLLATE, NULL), MAXPGPATH);
3421         write_array_var(str_buf, fp);
3422         StrNCpy(str_buf, setlocale(LC_CTYPE, NULL), MAXPGPATH);
3423         write_array_var(str_buf, fp);
3424
3425         /* Release file */
3426         if (FreeFile(fp))
3427         {
3428                 ereport(ERROR,
3429                                 (errcode_for_file_access(),
3430                                  errmsg("could not write to file \"%s\": %m", filename)));
3431                 return false;
3432         }
3433
3434         return true;
3435 }
3436
3437 static void
3438 read_backend_variables(char *filename, Port *port)
3439 {
3440         FILE       *fp;
3441         char            str_buf[MAXPGPATH];
3442
3443         /* Open file */
3444         fp = AllocateFile(filename, PG_BINARY_R);
3445         if (!fp)
3446                 ereport(FATAL,
3447                                 (errcode_for_file_access(),
3448                   errmsg("could not read from backend variables file \"%s\": %m",
3449                                  filename)));
3450
3451         /* Read vars */
3452         read_var(port->sock, fp);
3453         read_var(port->proto, fp);
3454         read_var(port->laddr, fp);
3455         read_var(port->raddr, fp);
3456         read_var(port->canAcceptConnections, fp);
3457         read_var(port->cryptSalt, fp);
3458         read_var(port->md5Salt, fp);
3459
3460         read_array_var(str_buf, fp);
3461         SetDataDir(str_buf);
3462
3463         read_array_var(ListenSocket, fp);
3464
3465         read_var(MyCancelKey, fp);
3466
3467         read_var(UsedShmemSegID, fp);
3468         read_var(UsedShmemSegAddr, fp);
3469
3470         read_var(ShmemLock, fp);
3471         read_var(ShmemIndexLock, fp);
3472         read_var(ShmemVariableCache, fp);
3473         read_var(ShmemIndexAlloc, fp);
3474         read_var(ShmemBackendArray, fp);
3475
3476         read_var(LWLockArray, fp);
3477         read_var(ProcStructLock, fp);
3478         read_var(pgStatSock, fp);
3479
3480         read_var(debug_flag, fp);
3481         read_var(PostmasterPid, fp);
3482 #ifdef WIN32
3483         read_var(PostmasterHandle, fp);
3484 #endif
3485
3486         read_var(syslogPipe[0], fp);
3487         read_var(syslogPipe[1], fp);
3488
3489         read_array_var(str_buf, fp);
3490         StrNCpy(my_exec_path, str_buf, MAXPGPATH);
3491
3492         read_array_var(ExtraOptions, fp);
3493
3494         read_array_var(str_buf, fp);
3495         setlocale(LC_COLLATE, str_buf);
3496         read_array_var(str_buf, fp);
3497         setlocale(LC_CTYPE, str_buf);
3498
3499         /* Release file */
3500         FreeFile(fp);
3501         if (unlink(filename) != 0)
3502                 ereport(WARNING,
3503                                 (errcode_for_file_access(),
3504                                  errmsg("could not remove file \"%s\": %m", filename)));
3505 }
3506
3507
3508 size_t
3509 ShmemBackendArraySize(void)
3510 {
3511         return (NUM_BACKENDARRAY_ELEMS * sizeof(Backend));
3512 }
3513
3514 void
3515 ShmemBackendArrayAllocation(void)
3516 {
3517         size_t          size = ShmemBackendArraySize();
3518
3519         ShmemBackendArray = (Backend *) ShmemAlloc(size);
3520         /* Mark all slots as empty */
3521         memset(ShmemBackendArray, 0, size);
3522 }
3523
3524 static void
3525 ShmemBackendArrayAdd(Backend *bn)
3526 {
3527         int                     i;
3528
3529         /* Find an empty slot */
3530         for (i = 0; i < NUM_BACKENDARRAY_ELEMS; i++)
3531         {
3532                 if (ShmemBackendArray[i].pid == 0)
3533                 {
3534                         ShmemBackendArray[i] = *bn;
3535                         return;
3536                 }
3537         }
3538
3539         ereport(FATAL,
3540                         (errmsg_internal("no free slots in shmem backend array")));
3541 }
3542
3543 static void
3544 ShmemBackendArrayRemove(pid_t pid)
3545 {
3546         int                     i;
3547
3548         for (i = 0; i < NUM_BACKENDARRAY_ELEMS; i++)
3549         {
3550                 if (ShmemBackendArray[i].pid == pid)
3551                 {
3552                         /* Mark the slot as empty */
3553                         ShmemBackendArray[i].pid = 0;
3554                         return;
3555                 }
3556         }
3557
3558         ereport(WARNING,
3559                         (errmsg_internal("could not find backend entry with pid %d",
3560                                                          (int) pid)));
3561 }
3562 #endif   /* EXEC_BACKEND */
3563
3564
3565 #ifdef WIN32
3566
3567 static pid_t
3568 win32_forkexec(const char *path, char *argv[])
3569 {
3570         STARTUPINFO si;
3571         PROCESS_INFORMATION pi;
3572         int                     i;
3573         int                     j;
3574         char            cmdLine[MAXPGPATH * 2];
3575         HANDLE          childHandleCopy;
3576         HANDLE          waiterThread;
3577
3578         /* Format the cmd line */
3579         cmdLine[sizeof(cmdLine) - 1] = '\0';
3580         cmdLine[sizeof(cmdLine) - 2] = '\0';
3581         snprintf(cmdLine, sizeof(cmdLine) - 1, "\"%s\"", path);
3582         i = 0;
3583         while (argv[++i] != NULL)
3584         {
3585                 j = strlen(cmdLine);
3586                 snprintf(cmdLine + j, sizeof(cmdLine) - 1 - j, " \"%s\"", argv[i]);
3587         }
3588         if (cmdLine[sizeof(cmdLine) - 2] != '\0')
3589         {
3590                 elog(LOG, "subprocess command line too long");
3591                 return -1;
3592         }
3593
3594         memset(&pi, 0, sizeof(pi));
3595         memset(&si, 0, sizeof(si));
3596         si.cb = sizeof(si);
3597         if (!CreateProcess(NULL, cmdLine, NULL, NULL, TRUE, 0, NULL, NULL, &si, &pi))
3598         {
3599                 elog(LOG, "CreateProcess call failed (%d): %m", (int) GetLastError());
3600                 return -1;
3601         }
3602
3603         if (!IsUnderPostmaster)
3604         {
3605                 /* We are the Postmaster creating a child... */
3606                 win32_AddChild(pi.dwProcessId, pi.hProcess);
3607         }
3608
3609         if (DuplicateHandle(GetCurrentProcess(),
3610                                                 pi.hProcess,
3611                                                 GetCurrentProcess(),
3612                                                 &childHandleCopy,
3613                                                 0,
3614                                                 FALSE,
3615                                                 DUPLICATE_SAME_ACCESS) == 0)
3616                 ereport(FATAL,
3617                                 (errmsg_internal("could not duplicate child handle: %d",
3618                                                                  (int) GetLastError())));
3619
3620         waiterThread = CreateThread(NULL, 64 * 1024, win32_sigchld_waiter,
3621                                                                 (LPVOID) childHandleCopy, 0, NULL);
3622         if (!waiterThread)
3623                 ereport(FATAL,
3624                    (errmsg_internal("could not create sigchld waiter thread: %d",
3625                                                         (int) GetLastError())));
3626         CloseHandle(waiterThread);
3627
3628         if (IsUnderPostmaster)
3629                 CloseHandle(pi.hProcess);
3630         CloseHandle(pi.hThread);
3631
3632         return pi.dwProcessId;
3633 }
3634
3635 /*
3636  * Note: The following three functions must not be interrupted (eg. by
3637  * signals).  As the Postgres Win32 signalling architecture (currently)
3638  * requires polling, or APC checking functions which aren't used here, this
3639  * is not an issue.
3640  *
3641  * We keep two separate arrays, instead of a single array of pid/HANDLE
3642  * structs, to avoid having to re-create a handle array for
3643  * WaitForMultipleObjects on each call to win32_waitpid.
3644  */
3645
3646 static void
3647 win32_AddChild(pid_t pid, HANDLE handle)
3648 {
3649         Assert(win32_childPIDArray && win32_childHNDArray);
3650         if (win32_numChildren < NUM_BACKENDARRAY_ELEMS)
3651         {
3652                 win32_childPIDArray[win32_numChildren] = pid;
3653                 win32_childHNDArray[win32_numChildren] = handle;
3654                 ++win32_numChildren;
3655         }
3656         else
3657                 ereport(FATAL,
3658                                 (errmsg_internal("no room for child entry with pid %lu",
3659                                                                  (unsigned long) pid)));
3660 }
3661
3662 static void
3663 win32_RemoveChild(pid_t pid)
3664 {
3665         int                     i;
3666
3667         Assert(win32_childPIDArray && win32_childHNDArray);
3668
3669         for (i = 0; i < win32_numChildren; i++)
3670         {
3671                 if (win32_childPIDArray[i] == pid)
3672                 {
3673                         CloseHandle(win32_childHNDArray[i]);
3674
3675                         /* Swap last entry into the "removed" one */
3676                         --win32_numChildren;
3677                         win32_childPIDArray[i] = win32_childPIDArray[win32_numChildren];
3678                         win32_childHNDArray[i] = win32_childHNDArray[win32_numChildren];
3679                         return;
3680                 }
3681         }
3682
3683         ereport(WARNING,
3684                         (errmsg_internal("could not find child entry with pid %lu",
3685                                                          (unsigned long) pid)));
3686 }
3687
3688 static pid_t
3689 win32_waitpid(int *exitstatus)
3690 {
3691         /*
3692          * Note: Do NOT use WaitForMultipleObjectsEx, as we don't want to run
3693          * queued APCs here.
3694          */
3695         int                     index;
3696         DWORD           exitCode;
3697         DWORD           ret;
3698         unsigned long offset;
3699
3700         Assert(win32_childPIDArray && win32_childHNDArray);
3701         elog(DEBUG3, "waiting on %lu children", win32_numChildren);
3702
3703         for (offset = 0; offset < win32_numChildren; offset += MAXIMUM_WAIT_OBJECTS)
3704         {
3705                 unsigned long num = min(MAXIMUM_WAIT_OBJECTS, win32_numChildren - offset);
3706
3707                 ret = WaitForMultipleObjects(num, &win32_childHNDArray[offset], FALSE, 0);
3708                 switch (ret)
3709                 {
3710                         case WAIT_FAILED:
3711                                 ereport(LOG,
3712                                                 (errmsg_internal("failed to wait on %lu of %lu children: %d",
3713                                                  num, win32_numChildren, (int) GetLastError())));
3714                                 return -1;
3715
3716                         case WAIT_TIMEOUT:
3717                                 /* No children (in this chunk) have finished */
3718                                 break;
3719
3720                         default:
3721
3722                                 /*
3723                                  * Get the exit code, and return the PID of, the
3724                                  * respective process
3725                                  */
3726                                 index = offset + ret - WAIT_OBJECT_0;
3727                                 Assert(index >= 0 && index < win32_numChildren);
3728                                 if (!GetExitCodeProcess(win32_childHNDArray[index], &exitCode))
3729                                 {
3730                                         /*
3731                                          * If we get this far, this should never happen, but,
3732                                          * then again... No choice other than to assume a
3733                                          * catastrophic failure.
3734                                          */
3735                                         ereport(FATAL,
3736                                                         (errmsg_internal("failed to get exit code for child %lu",
3737                                                                                    win32_childPIDArray[index])));
3738                                 }
3739                                 *exitstatus = (int) exitCode;
3740                                 return win32_childPIDArray[index];
3741                 }
3742         }
3743
3744         /* No children have finished */
3745         return -1;
3746 }
3747
3748 /*
3749  * Note! Code below executes on separate threads, one for
3750  * each child process created
3751  */
3752 static DWORD WINAPI
3753 win32_sigchld_waiter(LPVOID param)
3754 {
3755         HANDLE          procHandle = (HANDLE) param;
3756
3757         DWORD           r = WaitForSingleObject(procHandle, INFINITE);
3758
3759         if (r == WAIT_OBJECT_0)
3760                 pg_queue_signal(SIGCHLD);
3761         else
3762                 write_stderr("could not wait on child process handle: %d\n",
3763                                          (int) GetLastError());
3764         CloseHandle(procHandle);
3765         return 0;
3766 }
3767
3768 #endif   /* WIN32 */