]> granicus.if.org Git - postgresql/blob - src/backend/postmaster/postmaster.c
Fix Win32 problems with signals and sockets, by making the forkexec code
[postgresql] / src / backend / postmaster / postmaster.c
1 /*-------------------------------------------------------------------------
2  *
3  * postmaster.c
4  *        This program acts as a clearing house for requests to the
5  *        POSTGRES system.      Frontend programs send a startup message
6  *        to the Postmaster and the postmaster uses the info in the
7  *        message to setup a backend process.
8  *
9  *        The postmaster also manages system-wide operations such as
10  *        startup and shutdown. The postmaster itself doesn't do those
11  *        operations, mind you --- it just forks off a subprocess to do them
12  *        at the right times.  It also takes care of resetting the system
13  *        if a backend crashes.
14  *
15  *        The postmaster process creates the shared memory and semaphore
16  *        pools during startup, but as a rule does not touch them itself.
17  *        In particular, it is not a member of the PGPROC array of backends
18  *        and so it cannot participate in lock-manager operations.      Keeping
19  *        the postmaster away from shared memory operations makes it simpler
20  *        and more reliable.  The postmaster is almost always able to recover
21  *        from crashes of individual backends by resetting shared memory;
22  *        if it did much with shared memory then it would be prone to crashing
23  *        along with the backends.
24  *
25  *        When a request message is received, we now fork() immediately.
26  *        The child process performs authentication of the request, and
27  *        then becomes a backend if successful.  This allows the auth code
28  *        to be written in a simple single-threaded style (as opposed to the
29  *        crufty "poor man's multitasking" code that used to be needed).
30  *        More importantly, it ensures that blockages in non-multithreaded
31  *        libraries like SSL or PAM cannot cause denial of service to other
32  *        clients.
33  *
34  *
35  * Portions Copyright (c) 1996-2004, PostgreSQL Global Development Group
36  * Portions Copyright (c) 1994, Regents of the University of California
37  *
38  *
39  * IDENTIFICATION
40  *        $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.439 2004/11/17 00:14:12 tgl Exp $
41  *
42  * NOTES
43  *
44  * Initialization:
45  *              The Postmaster sets up shared memory data structures
46  *              for the backends.
47  *
48  * Synchronization:
49  *              The Postmaster shares memory with the backends but should avoid
50  *              touching shared memory, so as not to become stuck if a crashing
51  *              backend screws up locks or shared memory.  Likewise, the Postmaster
52  *              should never block on messages from frontend clients.
53  *
54  * Garbage Collection:
55  *              The Postmaster cleans up after backends if they have an emergency
56  *              exit and/or core dump.
57  *
58  * Error Reporting:
59  *              Use write_stderr() only for reporting "interactive" errors
60  *              (essentially, bogus arguments on the command line).  Once the
61  *              postmaster is launched, use ereport().  In particular, don't use
62  *              write_stderr() for anything that occurs after pmdaemonize.
63  *
64  *-------------------------------------------------------------------------
65  */
66
67 #include "postgres.h"
68
69 #include <unistd.h>
70 #include <signal.h>
71 #include <time.h>
72 #include <sys/wait.h>
73 #include <ctype.h>
74 #include <sys/stat.h>
75 #include <sys/socket.h>
76 #include <fcntl.h>
77 #include <sys/param.h>
78 #include <netinet/in.h>
79 #include <arpa/inet.h>
80 #include <netdb.h>
81 #include <limits.h>
82
83 #ifdef HAVE_SYS_SELECT_H
84 #include <sys/select.h>
85 #endif
86
87 #ifdef HAVE_GETOPT_H
88 #include <getopt.h>
89 #endif
90
91 #ifdef USE_RENDEZVOUS
92 #include <DNSServiceDiscovery/DNSServiceDiscovery.h>
93 #endif
94
95 #include "catalog/pg_database.h"
96 #include "commands/async.h"
97 #include "lib/dllist.h"
98 #include "libpq/auth.h"
99 #include "libpq/crypt.h"
100 #include "libpq/libpq.h"
101 #include "libpq/pqcomm.h"
102 #include "libpq/pqsignal.h"
103 #include "miscadmin.h"
104 #include "nodes/nodes.h"
105 #include "postmaster/postmaster.h"
106 #include "postmaster/pgarch.h"
107 #include "postmaster/syslogger.h"
108 #include "storage/fd.h"
109 #include "storage/ipc.h"
110 #include "storage/pg_shmem.h"
111 #include "storage/pmsignal.h"
112 #include "storage/proc.h"
113 #include "storage/bufmgr.h"
114 #include "access/xlog.h"
115 #include "tcop/tcopprot.h"
116 #include "utils/builtins.h"
117 #include "utils/guc.h"
118 #include "utils/memutils.h"
119 #include "utils/ps_status.h"
120 #include "bootstrap/bootstrap.h"
121 #include "pgstat.h"
122
123 #ifdef EXEC_BACKEND
124 #include "storage/spin.h"
125 #endif
126
127
128 /*
129  * List of active backends (or child processes anyway; we don't actually
130  * know whether a given child has become a backend or is still in the
131  * authorization phase).  This is used mainly to keep track of how many
132  * children we have and send them appropriate signals when necessary.
133  *
134  * "Special" children such as the startup and bgwriter tasks are not in
135  * this list.
136  */
137 typedef struct bkend
138 {
139         pid_t           pid;                    /* process id of backend */
140         long            cancel_key;             /* cancel key for cancels for this backend */
141 } Backend;
142
143 static Dllist *BackendList;
144
145 #ifdef EXEC_BACKEND
146 #define NUM_BACKENDARRAY_ELEMS (2*MaxBackends)
147 static Backend *ShmemBackendArray;
148 #endif
149
150 /* The socket number we are listening for connections on */
151 int                     PostPortNumber;
152 char       *UnixSocketDir;
153 char       *ListenAddresses;
154
155 /*
156  * ReservedBackends is the number of backends reserved for superuser use.
157  * This number is taken out of the pool size given by MaxBackends so
158  * number of backend slots available to non-superusers is
159  * (MaxBackends - ReservedBackends).  Note what this really means is
160  * "if there are <= ReservedBackends connections available, only superusers
161  * can make new connections" --- pre-existing superuser connections don't
162  * count against the limit.
163  */
164 int                     ReservedBackends;
165
166
167 static const char *progname = NULL;
168
169 /* The socket(s) we're listening to. */
170 #define MAXLISTEN       10
171 static int      ListenSocket[MAXLISTEN];
172
173 /*
174  * Set by the -o option
175  */
176 static char ExtraOptions[MAXPGPATH];
177
178 /*
179  * These globals control the behavior of the postmaster in case some
180  * backend dumps core.  Normally, it kills all peers of the dead backend
181  * and reinitializes shared memory.  By specifying -s or -n, we can have
182  * the postmaster stop (rather than kill) peers and not reinitialize
183  * shared data structures.
184  */
185 static bool Reinit = true;
186 static int      SendStop = false;
187
188 /* still more option variables */
189 bool            EnableSSL = false;
190 bool            SilentMode = false; /* silent mode (-S) */
191
192 int                     PreAuthDelay = 0;
193 int                     AuthenticationTimeout = 60;
194
195 bool            log_hostname;           /* for ps display and logging */
196 bool            Log_connections = false;
197 bool            Db_user_namespace = false;
198
199 char       *rendezvous_name;
200
201 /* list of library:init-function to be preloaded */
202 char       *preload_libraries_string = NULL;
203
204 /* PIDs of special child processes; 0 when not running */
205 static pid_t StartupPID = 0,
206                         BgWriterPID = 0,
207                         PgArchPID = 0,
208                         PgStatPID = 0,
209                         SysLoggerPID = 0;
210
211 /* Startup/shutdown state */
212 #define                 NoShutdown              0
213 #define                 SmartShutdown   1
214 #define                 FastShutdown    2
215
216 static int      Shutdown = NoShutdown;
217
218 static bool FatalError = false; /* T if recovering from backend crash */
219
220 bool            ClientAuthInProgress = false;           /* T during new-client
221                                                                                                  * authentication */
222
223 /*
224  * State for assigning random salts and cancel keys.
225  * Also, the global MyCancelKey passes the cancel key assigned to a given
226  * backend from the postmaster to that backend (via fork).
227  */
228 static unsigned int random_seed = 0;
229
230 extern char *optarg;
231 extern int      optind,
232                         opterr;
233
234 #ifdef HAVE_INT_OPTRESET
235 extern int      optreset;
236 #endif
237
238 /*
239  * postmaster.c - function prototypes
240  */
241 static void checkDataDir(void);
242
243 #ifdef USE_RENDEZVOUS
244 static void reg_reply(DNSServiceRegistrationReplyErrorType errorCode,
245                   void *context);
246 #endif
247 static void pmdaemonize(void);
248 static Port *ConnCreate(int serverFd);
249 static void ConnFree(Port *port);
250 static void reset_shared(unsigned short port);
251 static void SIGHUP_handler(SIGNAL_ARGS);
252 static void pmdie(SIGNAL_ARGS);
253 static void reaper(SIGNAL_ARGS);
254 static void sigusr1_handler(SIGNAL_ARGS);
255 static void dummy_handler(SIGNAL_ARGS);
256 static void CleanupBackend(int pid, int exitstatus);
257 static void HandleChildCrash(int pid, int exitstatus, const char *procname);
258 static void LogChildExit(int lev, const char *procname,
259                          int pid, int exitstatus);
260 static int      BackendRun(Port *port);
261 static void ExitPostmaster(int status);
262 static void usage(const char *);
263 static int      ServerLoop(void);
264 static int      BackendStartup(Port *port);
265 static int      ProcessStartupPacket(Port *port, bool SSLdone);
266 static void processCancelRequest(Port *port, void *pkt);
267 static int      initMasks(fd_set *rmask);
268 static void report_fork_failure_to_client(Port *port, int errnum);
269 static enum CAC_state canAcceptConnections(void);
270 static long PostmasterRandom(void);
271 static void RandomSalt(char *cryptSalt, char *md5Salt);
272 static void SignalChildren(int signal);
273 static int      CountChildren(void);
274 static bool CreateOptsFile(int argc, char *argv[], char *fullprogname);
275 static pid_t StartChildProcess(int xlop);
276
277 #ifdef EXEC_BACKEND
278
279 #ifdef WIN32
280 static void win32_AddChild(pid_t pid, HANDLE handle);
281 static void win32_RemoveChild(pid_t pid);
282 static pid_t win32_waitpid(int *exitstatus);
283 static DWORD WINAPI win32_sigchld_waiter(LPVOID param);
284
285 static pid_t *win32_childPIDArray;
286 static HANDLE *win32_childHNDArray;
287 static unsigned long win32_numChildren = 0;
288
289 HANDLE          PostmasterHandle;
290 #endif
291
292 static pid_t backend_forkexec(Port *port);
293 static pid_t internal_forkexec(int argc, char *argv[], Port *port);
294
295 /* Type for a socket that can be inherited to a client process */
296 #ifdef WIN32
297 typedef struct
298 {
299         SOCKET origsocket; /* Original socket value, or -1 if not a socket */
300         WSAPROTOCOL_INFO wsainfo;
301 } InheritableSocket;
302 #else
303 typedef int InheritableSocket;
304 #endif
305
306 typedef struct LWLock LWLock;   /* ugly kluge */
307
308 /*
309  * Structure contains all variables passed to exec:ed backends
310  */
311 typedef struct
312 {
313         Port port;
314         InheritableSocket portsocket;
315         char DataDir[MAXPGPATH];
316         int ListenSocket[MAXLISTEN];
317         long MyCancelKey;
318         unsigned long UsedShmemSegID;
319         void *UsedShmemSegAddr;
320         slock_t *ShmemLock;
321         slock_t *ShmemIndexLock;
322         VariableCache ShmemVariableCache;
323         void *ShmemIndexAlloc;
324         Backend *ShmemBackendArray;
325         LWLock *LWLockArray;
326         slock_t *ProcStructLock;
327         InheritableSocket pgStatSock;
328         InheritableSocket pgStatPipe0;
329         InheritableSocket pgStatPipe1;
330         pid_t PostmasterPid;
331 #ifdef WIN32
332         HANDLE PostmasterHandle;
333         HANDLE initial_signal_pipe;
334         HANDLE syslogPipe[2];
335 #else
336         int syslogPipe[2];
337 #endif
338         char my_exec_path[MAXPGPATH];
339         char ExtraOptions[MAXPGPATH];
340         char lc_collate[MAXPGPATH];
341         char lc_ctype[MAXPGPATH];
342 } BackendParameters;
343
344 static void read_backend_variables(char *id, Port *port);
345 static void restore_backend_variables(BackendParameters *param, Port *port);
346 #ifndef WIN32
347 static bool save_backend_variables(BackendParameters *param, Port *port);
348 #else
349 static bool save_backend_variables(BackendParameters *param, Port *port,
350                                                                    HANDLE childProcess, pid_t childPid);
351 #endif
352
353 static void ShmemBackendArrayAdd(Backend *bn);
354 static void ShmemBackendArrayRemove(pid_t pid);
355
356 #endif   /* EXEC_BACKEND */
357
358 #define StartupDataBase()               StartChildProcess(BS_XLOG_STARTUP)
359 #define StartBackgroundWriter() StartChildProcess(BS_XLOG_BGWRITER)
360
361
362 /*
363  * Postmaster main entry point
364  */
365 int
366 PostmasterMain(int argc, char *argv[])
367 {
368         int                     opt;
369         int                     status;
370         char       *userDoption = NULL;
371         int                     i;
372
373         /* This will call exit() if strdup() fails. */
374         progname = get_progname(argv[0]);       
375
376         MyProcPid = PostmasterPid = getpid();
377
378         IsPostmasterEnvironment = true;
379
380         /*
381          * Catch standard options before doing much else.  This even works on
382          * systems without getopt_long.
383          */
384         if (argc > 1)
385         {
386                 if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
387                 {
388                         usage(progname);
389                         ExitPostmaster(0);
390                 }
391                 if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
392                 {
393                         puts("postmaster (PostgreSQL) " PG_VERSION);
394                         ExitPostmaster(0);
395                 }
396         }
397
398 #ifdef WIN32
399         /* Start our win32 signal implementation */
400         pgwin32_signal_initialize();
401 #endif
402
403         /*
404          * for security, no dir or file created can be group or other
405          * accessible
406          */
407         umask((mode_t) 0077);
408
409         /*
410          * Fire up essential subsystems: memory management
411          */
412         MemoryContextInit();
413
414         /*
415          * By default, palloc() requests in the postmaster will be allocated
416          * in the PostmasterContext, which is space that can be recycled by
417          * backends.  Allocated data that needs to be available to backends
418          * should be allocated in TopMemoryContext.
419          */
420         PostmasterContext = AllocSetContextCreate(TopMemoryContext,
421                                                                                           "Postmaster",
422                                                                                           ALLOCSET_DEFAULT_MINSIZE,
423                                                                                           ALLOCSET_DEFAULT_INITSIZE,
424                                                                                           ALLOCSET_DEFAULT_MAXSIZE);
425         MemoryContextSwitchTo(PostmasterContext);
426
427         IgnoreSystemIndexes(false);
428
429         if (find_my_exec(argv[0], my_exec_path) < 0)
430                 elog(FATAL, "%s: could not locate my own executable path",
431                          argv[0]);
432
433         get_pkglib_path(my_exec_path, pkglib_path);
434
435         /*
436          * Options setup
437          */
438         InitializeGUCOptions();
439
440         opterr = 1;
441
442         while ((opt = getopt(argc, argv, "A:a:B:b:c:D:d:Fh:ik:lm:MN:no:p:Ss-:")) != -1)
443         {
444                 switch (opt)
445                 {
446                         case 'A':
447 #ifdef USE_ASSERT_CHECKING
448                                 SetConfigOption("debug_assertions", optarg, PGC_POSTMASTER, PGC_S_ARGV);
449 #else
450                                 write_stderr("%s: assert checking is not compiled in\n", progname);
451 #endif
452                                 break;
453                         case 'a':
454                                 /* Can no longer set authentication method. */
455                                 break;
456                         case 'B':
457                                 SetConfigOption("shared_buffers", optarg, PGC_POSTMASTER, PGC_S_ARGV);
458                                 break;
459                         case 'b':
460                                 /* Can no longer set the backend executable file to use. */
461                                 break;
462                         case 'D':
463                                 userDoption = optarg;
464                                 break;
465                         case 'd':
466                                 set_debug_options(atoi(optarg), PGC_POSTMASTER, PGC_S_ARGV);
467                                 break;
468                         case 'F':
469                                 SetConfigOption("fsync", "false", PGC_POSTMASTER, PGC_S_ARGV);
470                                 break;
471                         case 'h':
472                                 SetConfigOption("listen_addresses", optarg, PGC_POSTMASTER, PGC_S_ARGV);
473                                 break;
474                         case 'i':
475                                 SetConfigOption("listen_addresses", "*", PGC_POSTMASTER, PGC_S_ARGV);
476                                 break;
477                         case 'k':
478                                 SetConfigOption("unix_socket_directory", optarg, PGC_POSTMASTER, PGC_S_ARGV);
479                                 break;
480 #ifdef USE_SSL
481                         case 'l':
482                                 SetConfigOption("ssl", "true", PGC_POSTMASTER, PGC_S_ARGV);
483                                 break;
484 #endif
485                         case 'm':
486                                 /* Multiplexed backends no longer supported. */
487                                 break;
488                         case 'M':
489
490                                 /*
491                                  * ignore this flag.  This may be passed in because the
492                                  * program was run as 'postgres -M' instead of
493                                  * 'postmaster'
494                                  */
495                                 break;
496                         case 'N':
497                                 /* The max number of backends to start. */
498                                 SetConfigOption("max_connections", optarg, PGC_POSTMASTER, PGC_S_ARGV);
499                                 break;
500                         case 'n':
501                                 /* Don't reinit shared mem after abnormal exit */
502                                 Reinit = false;
503                                 break;
504                         case 'o':
505
506                                 /*
507                                  * Other options to pass to the backend on the command
508                                  * line
509                                  */
510                                 snprintf(ExtraOptions + strlen(ExtraOptions),
511                                                  sizeof(ExtraOptions) - strlen(ExtraOptions),
512                                                  " %s", optarg);
513                                 break;
514                         case 'p':
515                                 SetConfigOption("port", optarg, PGC_POSTMASTER, PGC_S_ARGV);
516                                 break;
517                         case 'S':
518
519                                 /*
520                                  * Start in 'S'ilent mode (disassociate from controlling
521                                  * tty). You may also think of this as 'S'ysV mode since
522                                  * it's most badly needed on SysV-derived systems like
523                                  * SVR4 and HP-UX.
524                                  */
525                                 SetConfigOption("silent_mode", "true", PGC_POSTMASTER, PGC_S_ARGV);
526                                 break;
527                         case 's':
528
529                                 /*
530                                  * In the event that some backend dumps core, send
531                                  * SIGSTOP, rather than SIGQUIT, to all its peers.      This
532                                  * lets the wily post_hacker collect core dumps from
533                                  * everyone.
534                                  */
535                                 SendStop = true;
536                                 break;
537                         case 'c':
538                         case '-':
539                                 {
540                                         char       *name,
541                                                            *value;
542
543                                         ParseLongOption(optarg, &name, &value);
544                                         if (!value)
545                                         {
546                                                 if (opt == '-')
547                                                         ereport(ERROR,
548                                                                         (errcode(ERRCODE_SYNTAX_ERROR),
549                                                                          errmsg("--%s requires a value",
550                                                                                         optarg)));
551                                                 else
552                                                         ereport(ERROR,
553                                                                         (errcode(ERRCODE_SYNTAX_ERROR),
554                                                                          errmsg("-c %s requires a value",
555                                                                                         optarg)));
556                                         }
557
558                                         SetConfigOption(name, value, PGC_POSTMASTER, PGC_S_ARGV);
559                                         free(name);
560                                         if (value)
561                                                 free(value);
562                                         break;
563                                 }
564
565                         default:
566                                 write_stderr("Try \"%s --help\" for more information.\n",
567                                                          progname);
568                                 ExitPostmaster(1);
569                 }
570         }
571
572         /*
573          * Postmaster accepts no non-option switch arguments.
574          */
575         if (optind < argc)
576         {
577                 write_stderr("%s: invalid argument: \"%s\"\n",
578                                          progname, argv[optind]);
579                 write_stderr("Try \"%s --help\" for more information.\n",
580                                          progname);
581                 ExitPostmaster(1);
582         }
583
584         /*
585          * Locate the proper configuration files and data directory, and
586          * read postgresql.conf for the first time.
587          */
588         if (!SelectConfigFiles(userDoption, progname))
589                 ExitPostmaster(2);
590
591         /* Verify that DataDir looks reasonable */
592         checkDataDir();
593
594         /*
595          * Check for invalid combinations of GUC settings.
596          */
597         if (NBuffers < 2 * MaxBackends || NBuffers < 16)
598         {
599                 /*
600                  * Do not accept -B so small that backends are likely to starve
601                  * for lack of buffers.  The specific choices here are somewhat
602                  * arbitrary.
603                  */
604                 write_stderr("%s: the number of buffers (-B) must be at least twice the number of allowed connections (-N) and at least 16\n", progname);
605                 ExitPostmaster(1);
606         }
607
608         if (ReservedBackends >= MaxBackends)
609         {
610                 write_stderr("%s: superuser_reserved_connections must be less than max_connections\n", progname);
611                 ExitPostmaster(1);
612         }
613
614         /*
615          * Other one-time internal sanity checks can go here.
616          */
617         if (!CheckDateTokenTables())
618         {
619                 write_stderr("%s: invalid datetoken tables, please fix\n", progname);
620                 ExitPostmaster(1);
621         }
622
623         /*
624          * Now that we are done processing the postmaster arguments, reset
625          * getopt(3) library so that it will work correctly in subprocesses.
626          */
627         optind = 1;
628 #ifdef HAVE_INT_OPTRESET
629         optreset = 1;                           /* some systems need this too */
630 #endif
631
632         /* For debugging: display postmaster environment */
633         {
634                 extern char **environ;
635                 char      **p;
636
637                 ereport(DEBUG3,
638                         (errmsg_internal("%s: PostmasterMain: initial environ dump:",
639                                                          progname)));
640                 ereport(DEBUG3,
641                  (errmsg_internal("-----------------------------------------")));
642                 for (p = environ; *p; ++p)
643                         ereport(DEBUG3,
644                                         (errmsg_internal("\t%s", *p)));
645                 ereport(DEBUG3,
646                  (errmsg_internal("-----------------------------------------")));
647         }
648
649 #ifdef EXEC_BACKEND
650         if (find_other_exec(argv[0], "postgres", PG_VERSIONSTR,
651                                                 postgres_exec_path) < 0)
652                 ereport(FATAL,
653                          (errmsg("%s: could not locate matching postgres executable",
654                                          progname)));
655 #endif
656
657         /*
658          * Initialize SSL library, if specified.
659          */
660 #ifdef USE_SSL
661         if (EnableSSL)
662                 secure_initialize();
663 #endif
664
665         /*
666          * process any libraries that should be preloaded and optionally
667          * pre-initialized
668          */
669         if (preload_libraries_string)
670                 process_preload_libraries(preload_libraries_string);
671
672         /*
673          * Fork away from controlling terminal, if -S specified.
674          *
675          * Must do this before we grab any interlock files, else the interlocks
676          * will show the wrong PID.
677          */
678         if (SilentMode)
679                 pmdaemonize();
680
681         /*
682          * Create lockfile for data directory.
683          *
684          * We want to do this before we try to grab the input sockets, because
685          * the data directory interlock is more reliable than the socket-file
686          * interlock (thanks to whoever decided to put socket files in /tmp
687          * :-(). For the same reason, it's best to grab the TCP socket(s)
688          * before the Unix socket.
689          */
690         CreateDataDirLockFile(DataDir, true);
691
692         /*
693          * Remove old temporary files.  At this point there can be no other
694          * Postgres processes running in this directory, so this should be
695          * safe.
696          */
697         RemovePgTempFiles();
698
699         /*
700          * Establish input sockets.
701          */
702         for (i = 0; i < MAXLISTEN; i++)
703                 ListenSocket[i] = -1;
704
705         if (ListenAddresses)
706         {
707                 char       *rawstring;
708                 List       *elemlist;
709                 ListCell   *l;
710
711                 /* Need a modifiable copy of ListenAddresses */
712                 rawstring = pstrdup(ListenAddresses);
713
714                 /* Parse string into list of identifiers */
715                 if (!SplitIdentifierString(rawstring, ',', &elemlist))
716                 {
717                         /* syntax error in list */
718                         ereport(FATAL,
719                                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
720                                 errmsg("invalid list syntax for \"listen_addresses\"")));
721                 }
722
723                 foreach(l, elemlist)
724                 {
725                         char       *curhost = (char *) lfirst(l);
726
727                         if (strcmp(curhost, "*") == 0)
728                                 status = StreamServerPort(AF_UNSPEC, NULL,
729                                                                                   (unsigned short) PostPortNumber,
730                                                                                   UnixSocketDir,
731                                                                                   ListenSocket, MAXLISTEN);
732                         else
733                                 status = StreamServerPort(AF_UNSPEC, curhost,
734                                                                                   (unsigned short) PostPortNumber,
735                                                                                   UnixSocketDir,
736                                                                                   ListenSocket, MAXLISTEN);
737                         if (status != STATUS_OK)
738                                 ereport(WARNING,
739                                          (errmsg("could not create listen socket for \"%s\"",
740                                                          curhost)));
741                 }
742
743                 list_free(elemlist);
744                 pfree(rawstring);
745         }
746
747 #ifdef USE_RENDEZVOUS
748         /* Register for Rendezvous only if we opened TCP socket(s) */
749         if (ListenSocket[0] != -1 && rendezvous_name != NULL)
750         {
751                 DNSServiceRegistrationCreate(rendezvous_name,
752                                                                          "_postgresql._tcp.",
753                                                                          "",
754                                                                          htonl(PostPortNumber),
755                                                                          "",
756                                                                  (DNSServiceRegistrationReply) reg_reply,
757                                                                          NULL);
758         }
759 #endif
760
761 #ifdef HAVE_UNIX_SOCKETS
762         status = StreamServerPort(AF_UNIX, NULL,
763                                                           (unsigned short) PostPortNumber,
764                                                           UnixSocketDir,
765                                                           ListenSocket, MAXLISTEN);
766         if (status != STATUS_OK)
767                 ereport(WARNING,
768                                 (errmsg("could not create Unix-domain socket")));
769 #endif
770
771         /*
772          * check that we have some socket to listen on
773          */
774         if (ListenSocket[0] == -1)
775                 ereport(FATAL,
776                                 (errmsg("no socket created for listening")));
777
778         XLOGPathInit();
779
780         /*
781          * Set up shared memory and semaphores.
782          */
783         reset_shared(PostPortNumber);
784
785         /*
786          * Estimate number of openable files.  This must happen after setting
787          * up semaphores, because on some platforms semaphores count as open
788          * files.
789          */
790         set_max_safe_fds();
791
792         /*
793          * Initialize the list of active backends.
794          */
795         BackendList = DLNewList();
796
797 #ifdef WIN32
798
799         /*
800          * Initialize the child pid/HANDLE arrays for signal handling.
801          */
802         win32_childPIDArray = (pid_t *)
803                 malloc(NUM_BACKENDARRAY_ELEMS * sizeof(pid_t));
804         win32_childHNDArray = (HANDLE *)
805                 malloc(NUM_BACKENDARRAY_ELEMS * sizeof(HANDLE));
806         if (!win32_childPIDArray || !win32_childHNDArray)
807                 ereport(FATAL,
808                                 (errcode(ERRCODE_OUT_OF_MEMORY),
809                                  errmsg("out of memory")));
810
811         /*
812          * Set up a handle that child processes can use to check whether the
813          * postmaster is still running.
814          */
815         if (DuplicateHandle(GetCurrentProcess(),
816                                                 GetCurrentProcess(),
817                                                 GetCurrentProcess(),
818                                                 &PostmasterHandle,
819                                                 0,
820                                                 TRUE,
821                                                 DUPLICATE_SAME_ACCESS) == 0)
822                 ereport(FATAL,
823                         (errmsg_internal("could not duplicate postmaster handle: error code %d",
824                                                          (int) GetLastError())));
825 #endif
826
827         /*
828          * Record postmaster options.  We delay this till now to avoid
829          * recording bogus options (eg, NBuffers too high for available
830          * memory).
831          */
832         if (!CreateOptsFile(argc, argv, my_exec_path))
833                 ExitPostmaster(1);
834
835 #ifdef EXEC_BACKEND
836         write_nondefault_variables(PGC_POSTMASTER);
837 #endif
838
839         /*
840          * Write the external PID file if requested
841          */
842         if (external_pid_file)
843         {
844                 FILE       *fpidfile = fopen(external_pid_file, "w");
845
846                 if (fpidfile)
847                 {
848                         fprintf(fpidfile, "%d\n", MyProcPid);
849                         fclose(fpidfile);
850                         /* Should we remove the pid file on postmaster exit? */
851                 }
852                 else
853                         write_stderr("%s: could not write external PID file \"%s\": %s\n",
854                                                  progname, external_pid_file, strerror(errno));
855         }
856
857         /*
858          * Set up signal handlers for the postmaster process.
859          *
860          * CAUTION: when changing this list, check for side-effects on the signal
861          * handling setup of child processes.  See tcop/postgres.c,
862          * bootstrap/bootstrap.c, postmaster/bgwriter.c, postmaster/pgarch.c,
863          * postmaster/pgstat.c, and postmaster/syslogger.c.
864          */
865         pqinitmask();
866         PG_SETMASK(&BlockSig);
867
868         pqsignal(SIGHUP, SIGHUP_handler);       /* reread config file and have
869                                                                                  * children do same */
870         pqsignal(SIGINT, pmdie);        /* send SIGTERM and shut down */
871         pqsignal(SIGQUIT, pmdie);       /* send SIGQUIT and die */
872         pqsignal(SIGTERM, pmdie);       /* wait for children and shut down */
873         pqsignal(SIGALRM, SIG_IGN); /* ignored */
874         pqsignal(SIGPIPE, SIG_IGN); /* ignored */
875         pqsignal(SIGUSR1, sigusr1_handler); /* message from child process */
876         pqsignal(SIGUSR2, dummy_handler);       /* unused, reserve for children */
877         pqsignal(SIGCHLD, reaper);      /* handle child termination */
878         pqsignal(SIGTTIN, SIG_IGN); /* ignored */
879         pqsignal(SIGTTOU, SIG_IGN); /* ignored */
880         /* ignore SIGXFSZ, so that ulimit violations work like disk full */
881 #ifdef SIGXFSZ
882         pqsignal(SIGXFSZ, SIG_IGN); /* ignored */
883 #endif
884
885         /*
886          * If enabled, start up syslogger collection subprocess
887          */
888         SysLoggerPID = SysLogger_Start();
889
890         /*
891          * Reset whereToSendOutput from Debug (its starting state) to None.
892          * This stops ereport from sending log messages to stderr unless
893          * Log_destination permits.  We don't do this until the postmaster is
894          * fully launched, since startup failures may as well be reported to
895          * stderr.
896          */
897         whereToSendOutput = None;
898
899         /*
900          * Initialize the statistics collector stuff
901          */
902         pgstat_init();
903
904         /*
905          * Load cached files for client authentication.
906          */
907         load_hba();
908         load_ident();
909         load_user();
910         load_group();
911
912         /*
913          * We're ready to rock and roll...
914          */
915         StartupPID = StartupDataBase();
916
917         status = ServerLoop();
918
919         /*
920          * ServerLoop probably shouldn't ever return, but if it does, close
921          * down.
922          */
923         ExitPostmaster(status != STATUS_OK);
924
925         return 0;                                       /* not reached */
926 }
927
928
929 /*
930  * Validate the proposed data directory
931  */
932 static void
933 checkDataDir(void)
934 {
935         char            path[MAXPGPATH];
936         FILE       *fp;
937         struct stat stat_buf;
938
939         Assert(DataDir);
940
941         if (stat(DataDir, &stat_buf) != 0)
942         {
943                 if (errno == ENOENT)
944                         ereport(FATAL,
945                                         (errcode_for_file_access(),
946                                          errmsg("data directory \"%s\" does not exist",
947                                                         DataDir)));
948                 else
949                         ereport(FATAL,
950                                         (errcode_for_file_access(),
951                          errmsg("could not read permissions of directory \"%s\": %m",
952                                         DataDir)));
953         }
954
955         /*
956          * Check if the directory has group or world access.  If so, reject.
957          *
958          * XXX temporarily suppress check when on Windows, because there may not
959          * be proper support for Unix-y file permissions.  Need to think of a
960          * reasonable check to apply on Windows.
961          */
962 #if !defined(WIN32) && !defined(__CYGWIN__)
963         if (stat_buf.st_mode & (S_IRWXG | S_IRWXO))
964                 ereport(FATAL,
965                                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
966                                  errmsg("data directory \"%s\" has group or world access",
967                                                 DataDir),
968                                  errdetail("Permissions should be u=rwx (0700).")));
969 #endif
970
971         /* Look for PG_VERSION before looking for pg_control */
972         ValidatePgVersion(DataDir);
973
974         snprintf(path, sizeof(path), "%s/global/pg_control", DataDir);
975
976         fp = AllocateFile(path, PG_BINARY_R);
977         if (fp == NULL)
978         {
979                 write_stderr("%s: could not find the database system\n"
980                                          "Expected to find it in the directory \"%s\",\n"
981                                          "but could not open file \"%s\": %s\n",
982                                          progname, DataDir, path, strerror(errno));
983                 ExitPostmaster(2);
984         }
985         FreeFile(fp);
986 }
987
988
989 #ifdef USE_RENDEZVOUS
990
991 /*
992  * empty callback function for DNSServiceRegistrationCreate()
993  */
994 static void
995 reg_reply(DNSServiceRegistrationReplyErrorType errorCode, void *context)
996 {
997
998 }
999 #endif   /* USE_RENDEZVOUS */
1000
1001
1002 /*
1003  * Fork away from the controlling terminal (-S option)
1004  */
1005 static void
1006 pmdaemonize(void)
1007 {
1008 #ifndef WIN32
1009         int                     i;
1010         pid_t           pid;
1011
1012 #ifdef LINUX_PROFILE
1013         struct itimerval prof_itimer;
1014 #endif
1015
1016 #ifdef LINUX_PROFILE
1017         /* see comments in BackendStartup */
1018         getitimer(ITIMER_PROF, &prof_itimer);
1019 #endif
1020
1021         pid = fork();
1022         if (pid == (pid_t) -1)
1023         {
1024                 write_stderr("%s: could not fork background process: %s\n",
1025                                          progname, strerror(errno));
1026                 ExitPostmaster(1);
1027         }
1028         else if (pid)
1029         {                                                       /* parent */
1030                 /* Parent should just exit, without doing any atexit cleanup */
1031                 _exit(0);
1032         }
1033
1034 #ifdef LINUX_PROFILE
1035         setitimer(ITIMER_PROF, &prof_itimer, NULL);
1036 #endif
1037
1038         MyProcPid = PostmasterPid = getpid();           /* reset PID vars to child */
1039
1040 /* GH: If there's no setsid(), we hopefully don't need silent mode.
1041  * Until there's a better solution.
1042  */
1043 #ifdef HAVE_SETSID
1044         if (setsid() < 0)
1045         {
1046                 write_stderr("%s: could not dissociate from controlling TTY: %s\n",
1047                                          progname, strerror(errno));
1048                 ExitPostmaster(1);
1049         }
1050 #endif
1051         i = open(NULL_DEV, O_RDWR);
1052         dup2(i, 0);
1053         dup2(i, 1);
1054         dup2(i, 2);
1055         close(i);
1056 #else                                                   /* WIN32 */
1057         /* not supported */
1058         elog(FATAL, "SilentMode not supported under WIN32");
1059 #endif   /* WIN32 */
1060 }
1061
1062
1063 /*
1064  * Print out help message
1065  */
1066 static void
1067 usage(const char *progname)
1068 {
1069         printf(gettext("%s is the PostgreSQL server.\n\n"), progname);
1070         printf(gettext("Usage:\n  %s [OPTION]...\n\n"), progname);
1071         printf(gettext("Options:\n"));
1072 #ifdef USE_ASSERT_CHECKING
1073         printf(gettext("  -A 1|0          enable/disable run-time assert checking\n"));
1074 #endif
1075         printf(gettext("  -B NBUFFERS     number of shared buffers\n"));
1076         printf(gettext("  -c NAME=VALUE   set run-time parameter\n"));
1077         printf(gettext("  -d 1-5          debugging level\n"));
1078         printf(gettext("  -D DATADIR      database directory\n"));
1079         printf(gettext("  -F              turn fsync off\n"));
1080         printf(gettext("  -h HOSTNAME     host name or IP address to listen on\n"));
1081         printf(gettext("  -i              enable TCP/IP connections\n"));
1082         printf(gettext("  -k DIRECTORY    Unix-domain socket location\n"));
1083 #ifdef USE_SSL
1084         printf(gettext("  -l              enable SSL connections\n"));
1085 #endif
1086         printf(gettext("  -N MAX-CONNECT  maximum number of allowed connections\n"));
1087         printf(gettext("  -o OPTIONS      pass \"OPTIONS\" to each server process\n"));
1088         printf(gettext("  -p PORT         port number to listen on\n"));
1089         printf(gettext("  -S              silent mode (start in background without logging output)\n"));
1090         printf(gettext("  --help          show this help, then exit\n"));
1091         printf(gettext("  --version       output version information, then exit\n"));
1092
1093         printf(gettext("\nDeveloper options:\n"));
1094         printf(gettext("  -n              do not reinitialize shared memory after abnormal exit\n"));
1095         printf(gettext("  -s              send SIGSTOP to all backend servers if one dies\n"));
1096
1097         printf(gettext("\nPlease read the documentation for the complete list of run-time\n"
1098                                    "configuration settings and how to set them on the command line or in\n"
1099                                    "the configuration file.\n\n"
1100                                    "Report bugs to <pgsql-bugs@postgresql.org>.\n"));
1101 }
1102
1103
1104 /*
1105  * Main idle loop of postmaster
1106  */
1107 static int
1108 ServerLoop(void)
1109 {
1110         fd_set          readmask;
1111         int                     nSockets;
1112         time_t          now,
1113                                 last_touch_time;
1114         struct timeval earlier,
1115                                 later;
1116         struct timezone tz;
1117
1118         gettimeofday(&earlier, &tz);
1119         last_touch_time = time(NULL);
1120
1121         nSockets = initMasks(&readmask);
1122
1123         for (;;)
1124         {
1125                 Port       *port;
1126                 fd_set          rmask;
1127                 struct timeval timeout;
1128                 int                     selres;
1129                 int                     i;
1130
1131                 /*
1132                  * Wait for something to happen.
1133                  *
1134                  * We wait at most one minute, to ensure that the other background
1135                  * tasks handled below get done even when no requests are
1136                  * arriving.
1137                  */
1138                 memcpy((char *) &rmask, (char *) &readmask, sizeof(fd_set));
1139
1140                 timeout.tv_sec = 60;
1141                 timeout.tv_usec = 0;
1142
1143                 PG_SETMASK(&UnBlockSig);
1144
1145                 selres = select(nSockets, &rmask, NULL, NULL, &timeout);
1146
1147                 /*
1148                  * Block all signals until we wait again.  (This makes it safe for
1149                  * our signal handlers to do nontrivial work.)
1150                  */
1151                 PG_SETMASK(&BlockSig);
1152
1153                 if (selres < 0)
1154                 {
1155                         if (errno != EINTR && errno != EWOULDBLOCK)
1156                         {
1157                                 ereport(LOG,
1158                                                 (errcode_for_socket_access(),
1159                                                  errmsg("select() failed in postmaster: %m")));
1160                                 return STATUS_ERROR;
1161                         }
1162                 }
1163
1164                 /*
1165                  * New connection pending on any of our sockets? If so, fork a
1166                  * child process to deal with it.
1167                  */
1168                 if (selres > 0)
1169                 {
1170                         /*
1171                          * Select a random seed at the time of first receiving a
1172                          * request.
1173                          */
1174                         while (random_seed == 0)
1175                         {
1176                                 gettimeofday(&later, &tz);
1177
1178                                 /*
1179                                  * We are not sure how much precision is in tv_usec, so we
1180                                  * swap the high and low 16 bits of 'later' and XOR them with
1181                                  * 'earlier'. On the off chance that the result is 0, we
1182                                  * loop until it isn't.
1183                                  */
1184                                 random_seed = earlier.tv_usec ^
1185                                         ((later.tv_usec << 16) |
1186                                          ((later.tv_usec >> 16) & 0xffff));
1187                         }
1188
1189                         for (i = 0; i < MAXLISTEN; i++)
1190                         {
1191                                 if (ListenSocket[i] == -1)
1192                                         break;
1193                                 if (FD_ISSET(ListenSocket[i], &rmask))
1194                                 {
1195                                         port = ConnCreate(ListenSocket[i]);
1196                                         if (port)
1197                                         {
1198                                                 BackendStartup(port);
1199
1200                                                 /*
1201                                                  * We no longer need the open socket or port
1202                                                  * structure in this process
1203                                                  */
1204                                                 StreamClose(port->sock);
1205                                                 ConnFree(port);
1206                                         }
1207                                 }
1208                         }
1209                 }
1210
1211                 /* If we have lost the system logger, try to start a new one */
1212                 if (SysLoggerPID == 0 && Redirect_stderr)
1213                         SysLoggerPID = SysLogger_Start();
1214
1215                 /*
1216                  * If no background writer process is running, and we are not in a
1217                  * state that prevents it, start one.  It doesn't matter if this
1218                  * fails, we'll just try again later.
1219                  */
1220                 if (BgWriterPID == 0 && StartupPID == 0 && !FatalError)
1221                 {
1222                         BgWriterPID = StartBackgroundWriter();
1223                         /* If shutdown is pending, set it going */
1224                         if (Shutdown > NoShutdown && BgWriterPID != 0)
1225                                 kill(BgWriterPID, SIGUSR2);
1226                 }
1227
1228                 /* If we have lost the archiver, try to start a new one */
1229                 if (XLogArchivingActive() && PgArchPID == 0 &&
1230                         StartupPID == 0 && !FatalError && Shutdown == NoShutdown)
1231                         PgArchPID = pgarch_start();
1232
1233                 /* If we have lost the stats collector, try to start a new one */
1234                 if (PgStatPID == 0 &&
1235                         StartupPID == 0 && !FatalError && Shutdown == NoShutdown)
1236                         PgStatPID = pgstat_start();
1237
1238                 /*
1239                  * Touch the socket and lock file at least every ten minutes, to
1240                  * ensure that they are not removed by overzealous /tmp-cleaning
1241                  * tasks.
1242                  */
1243                 now = time(NULL);
1244                 if (now - last_touch_time >= 10 * 60)
1245                 {
1246                         TouchSocketFile();
1247                         TouchSocketLockFile();
1248                         last_touch_time = now;
1249                 }
1250         }
1251 }
1252
1253
1254 /*
1255  * Initialise the masks for select() for the ports we are listening on.
1256  * Return the number of sockets to listen on.
1257  */
1258 static int
1259 initMasks(fd_set *rmask)
1260 {
1261         int                     nsocks = -1;
1262         int                     i;
1263
1264         FD_ZERO(rmask);
1265
1266         for (i = 0; i < MAXLISTEN; i++)
1267         {
1268                 int                     fd = ListenSocket[i];
1269
1270                 if (fd == -1)
1271                         break;
1272                 FD_SET(fd, rmask);
1273                 if (fd > nsocks)
1274                         nsocks = fd;
1275         }
1276
1277         return nsocks + 1;
1278 }
1279
1280
1281 /*
1282  * Read the startup packet and do something according to it.
1283  *
1284  * Returns STATUS_OK or STATUS_ERROR, or might call ereport(FATAL) and
1285  * not return at all.
1286  *
1287  * (Note that ereport(FATAL) stuff is sent to the client, so only use it
1288  * if that's what you want.  Return STATUS_ERROR if you don't want to
1289  * send anything to the client, which would typically be appropriate
1290  * if we detect a communications failure.)
1291  */
1292 static int
1293 ProcessStartupPacket(Port *port, bool SSLdone)
1294 {
1295         int32           len;
1296         void       *buf;
1297         ProtocolVersion proto;
1298         MemoryContext oldcontext;
1299
1300         if (pq_getbytes((char *) &len, 4) == EOF)
1301         {
1302                 /*
1303                  * EOF after SSLdone probably means the client didn't like our
1304                  * response to NEGOTIATE_SSL_CODE.      That's not an error condition,
1305                  * so don't clutter the log with a complaint.
1306                  */
1307                 if (!SSLdone)
1308                         ereport(COMMERROR,
1309                                         (errcode(ERRCODE_PROTOCOL_VIOLATION),
1310                                          errmsg("incomplete startup packet")));
1311                 return STATUS_ERROR;
1312         }
1313
1314         len = ntohl(len);
1315         len -= 4;
1316
1317         if (len < (int32) sizeof(ProtocolVersion) ||
1318                 len > MAX_STARTUP_PACKET_LENGTH)
1319         {
1320                 ereport(COMMERROR,
1321                                 (errcode(ERRCODE_PROTOCOL_VIOLATION),
1322                                  errmsg("invalid length of startup packet")));
1323                 return STATUS_ERROR;
1324         }
1325
1326         /*
1327          * Allocate at least the size of an old-style startup packet, plus one
1328          * extra byte, and make sure all are zeroes.  This ensures we will
1329          * have null termination of all strings, in both fixed- and
1330          * variable-length packet layouts.
1331          */
1332         if (len <= (int32) sizeof(StartupPacket))
1333                 buf = palloc0(sizeof(StartupPacket) + 1);
1334         else
1335                 buf = palloc0(len + 1);
1336
1337         if (pq_getbytes(buf, len) == EOF)
1338         {
1339                 ereport(COMMERROR,
1340                                 (errcode(ERRCODE_PROTOCOL_VIOLATION),
1341                                  errmsg("incomplete startup packet")));
1342                 return STATUS_ERROR;
1343         }
1344
1345         /*
1346          * The first field is either a protocol version number or a special
1347          * request code.
1348          */
1349         port->proto = proto = ntohl(*((ProtocolVersion *) buf));
1350
1351         if (proto == CANCEL_REQUEST_CODE)
1352         {
1353                 processCancelRequest(port, buf);
1354                 return 127;                             /* XXX */
1355         }
1356
1357         if (proto == NEGOTIATE_SSL_CODE && !SSLdone)
1358         {
1359                 char            SSLok;
1360
1361 #ifdef USE_SSL
1362                 /* No SSL when disabled or on Unix sockets */
1363                 if (!EnableSSL || IS_AF_UNIX(port->laddr.addr.ss_family))
1364                         SSLok = 'N';
1365                 else
1366                         SSLok = 'S';            /* Support for SSL */
1367 #else
1368                 SSLok = 'N';                    /* No support for SSL */
1369 #endif
1370                 if (send(port->sock, &SSLok, 1, 0) != 1)
1371                 {
1372                         ereport(COMMERROR,
1373                                         (errcode_for_socket_access(),
1374                                  errmsg("failed to send SSL negotiation response: %m")));
1375                         return STATUS_ERROR;    /* close the connection */
1376                 }
1377
1378 #ifdef USE_SSL
1379                 if (SSLok == 'S' && secure_open_server(port) == -1)
1380                         return STATUS_ERROR;
1381 #endif
1382                 /* regular startup packet, cancel, etc packet should follow... */
1383                 /* but not another SSL negotiation request */
1384                 return ProcessStartupPacket(port, true);
1385         }
1386
1387         /* Could add additional special packet types here */
1388
1389         /*
1390          * Set FrontendProtocol now so that ereport() knows what format to
1391          * send if we fail during startup.
1392          */
1393         FrontendProtocol = proto;
1394
1395         /* Check we can handle the protocol the frontend is using. */
1396
1397         if (PG_PROTOCOL_MAJOR(proto) < PG_PROTOCOL_MAJOR(PG_PROTOCOL_EARLIEST) ||
1398           PG_PROTOCOL_MAJOR(proto) > PG_PROTOCOL_MAJOR(PG_PROTOCOL_LATEST) ||
1399         (PG_PROTOCOL_MAJOR(proto) == PG_PROTOCOL_MAJOR(PG_PROTOCOL_LATEST) &&
1400          PG_PROTOCOL_MINOR(proto) > PG_PROTOCOL_MINOR(PG_PROTOCOL_LATEST)))
1401                 ereport(FATAL,
1402                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1403                                  errmsg("unsupported frontend protocol %u.%u: server supports %u.0 to %u.%u",
1404                                           PG_PROTOCOL_MAJOR(proto), PG_PROTOCOL_MINOR(proto),
1405                                                 PG_PROTOCOL_MAJOR(PG_PROTOCOL_EARLIEST),
1406                                                 PG_PROTOCOL_MAJOR(PG_PROTOCOL_LATEST),
1407                                                 PG_PROTOCOL_MINOR(PG_PROTOCOL_LATEST))));
1408
1409         /*
1410          * Now fetch parameters out of startup packet and save them into the
1411          * Port structure.      All data structures attached to the Port struct
1412          * must be allocated in TopMemoryContext so that they won't disappear
1413          * when we pass them to PostgresMain (see BackendRun).  We need not
1414          * worry about leaking this storage on failure, since we aren't in the
1415          * postmaster process anymore.
1416          */
1417         oldcontext = MemoryContextSwitchTo(TopMemoryContext);
1418
1419         if (PG_PROTOCOL_MAJOR(proto) >= 3)
1420         {
1421                 int32           offset = sizeof(ProtocolVersion);
1422
1423                 /*
1424                  * Scan packet body for name/option pairs.      We can assume any
1425                  * string beginning within the packet body is null-terminated,
1426                  * thanks to zeroing extra byte above.
1427                  */
1428                 port->guc_options = NIL;
1429
1430                 while (offset < len)
1431                 {
1432                         char       *nameptr = ((char *) buf) + offset;
1433                         int32           valoffset;
1434                         char       *valptr;
1435
1436                         if (*nameptr == '\0')
1437                                 break;                  /* found packet terminator */
1438                         valoffset = offset + strlen(nameptr) + 1;
1439                         if (valoffset >= len)
1440                                 break;                  /* missing value, will complain below */
1441                         valptr = ((char *) buf) + valoffset;
1442
1443                         if (strcmp(nameptr, "database") == 0)
1444                                 port->database_name = pstrdup(valptr);
1445                         else if (strcmp(nameptr, "user") == 0)
1446                                 port->user_name = pstrdup(valptr);
1447                         else if (strcmp(nameptr, "options") == 0)
1448                                 port->cmdline_options = pstrdup(valptr);
1449                         else
1450                         {
1451                                 /* Assume it's a generic GUC option */
1452                                 port->guc_options = lappend(port->guc_options,
1453                                                                                         pstrdup(nameptr));
1454                                 port->guc_options = lappend(port->guc_options,
1455                                                                                         pstrdup(valptr));
1456                         }
1457                         offset = valoffset + strlen(valptr) + 1;
1458                 }
1459
1460                 /*
1461                  * If we didn't find a packet terminator exactly at the end of the
1462                  * given packet length, complain.
1463                  */
1464                 if (offset != len - 1)
1465                         ereport(FATAL,
1466                                         (errcode(ERRCODE_PROTOCOL_VIOLATION),
1467                                          errmsg("invalid startup packet layout: expected terminator as last byte")));
1468         }
1469         else
1470         {
1471                 /*
1472                  * Get the parameters from the old-style, fixed-width-fields
1473                  * startup packet as C strings.  The packet destination was
1474                  * cleared first so a short packet has zeros silently added.  We
1475                  * have to be prepared to truncate the pstrdup result for oversize
1476                  * fields, though.
1477                  */
1478                 StartupPacket *packet = (StartupPacket *) buf;
1479
1480                 port->database_name = pstrdup(packet->database);
1481                 if (strlen(port->database_name) > sizeof(packet->database))
1482                         port->database_name[sizeof(packet->database)] = '\0';
1483                 port->user_name = pstrdup(packet->user);
1484                 if (strlen(port->user_name) > sizeof(packet->user))
1485                         port->user_name[sizeof(packet->user)] = '\0';
1486                 port->cmdline_options = pstrdup(packet->options);
1487                 if (strlen(port->cmdline_options) > sizeof(packet->options))
1488                         port->cmdline_options[sizeof(packet->options)] = '\0';
1489                 port->guc_options = NIL;
1490         }
1491
1492         /* Check a user name was given. */
1493         if (port->user_name == NULL || port->user_name[0] == '\0')
1494                 ereport(FATAL,
1495                                 (errcode(ERRCODE_INVALID_AUTHORIZATION_SPECIFICATION),
1496                  errmsg("no PostgreSQL user name specified in startup packet")));
1497
1498         /* The database defaults to the user name. */
1499         if (port->database_name == NULL || port->database_name[0] == '\0')
1500                 port->database_name = pstrdup(port->user_name);
1501
1502         if (Db_user_namespace)
1503         {
1504                 /*
1505                  * If user@, it is a global user, remove '@'. We only want to do
1506                  * this if there is an '@' at the end and no earlier in the user
1507                  * string or they may fake as a local user of another database
1508                  * attaching to this database.
1509                  */
1510                 if (strchr(port->user_name, '@') ==
1511                         port->user_name + strlen(port->user_name) - 1)
1512                         *strchr(port->user_name, '@') = '\0';
1513                 else
1514                 {
1515                         /* Append '@' and dbname */
1516                         char       *db_user;
1517
1518                         db_user = palloc(strlen(port->user_name) +
1519                                                          strlen(port->database_name) + 2);
1520                         sprintf(db_user, "%s@%s", port->user_name, port->database_name);
1521                         port->user_name = db_user;
1522                 }
1523         }
1524
1525         /*
1526          * Truncate given database and user names to length of a Postgres
1527          * name.  This avoids lookup failures when overlength names are given.
1528          */
1529         if (strlen(port->database_name) >= NAMEDATALEN)
1530                 port->database_name[NAMEDATALEN - 1] = '\0';
1531         if (strlen(port->user_name) >= NAMEDATALEN)
1532                 port->user_name[NAMEDATALEN - 1] = '\0';
1533
1534         /*
1535          * Done putting stuff in TopMemoryContext.
1536          */
1537         MemoryContextSwitchTo(oldcontext);
1538
1539         /*
1540          * If we're going to reject the connection due to database state, say
1541          * so now instead of wasting cycles on an authentication exchange.
1542          * (This also allows a pg_ping utility to be written.)
1543          */
1544         switch (port->canAcceptConnections)
1545         {
1546                 case CAC_STARTUP:
1547                         ereport(FATAL,
1548                                         (errcode(ERRCODE_CANNOT_CONNECT_NOW),
1549                                          errmsg("the database system is starting up")));
1550                         break;
1551                 case CAC_SHUTDOWN:
1552                         ereport(FATAL,
1553                                         (errcode(ERRCODE_CANNOT_CONNECT_NOW),
1554                                          errmsg("the database system is shutting down")));
1555                         break;
1556                 case CAC_RECOVERY:
1557                         ereport(FATAL,
1558                                         (errcode(ERRCODE_CANNOT_CONNECT_NOW),
1559                                          errmsg("the database system is in recovery mode")));
1560                         break;
1561                 case CAC_TOOMANY:
1562                         ereport(FATAL,
1563                                         (errcode(ERRCODE_TOO_MANY_CONNECTIONS),
1564                                          errmsg("sorry, too many clients already")));
1565                         break;
1566                 case CAC_OK:
1567                 default:
1568                         break;
1569         }
1570
1571         return STATUS_OK;
1572 }
1573
1574
1575 /*
1576  * The client has sent a cancel request packet, not a normal
1577  * start-a-new-connection packet.  Perform the necessary processing.
1578  * Nothing is sent back to the client.
1579  */
1580 static void
1581 processCancelRequest(Port *port, void *pkt)
1582 {
1583         CancelRequestPacket *canc = (CancelRequestPacket *) pkt;
1584         int                     backendPID;
1585         long            cancelAuthCode;
1586         Backend    *bp;
1587
1588 #ifndef EXEC_BACKEND
1589         Dlelem     *curr;
1590
1591 #else
1592         int                     i;
1593 #endif
1594
1595         backendPID = (int) ntohl(canc->backendPID);
1596         cancelAuthCode = (long) ntohl(canc->cancelAuthCode);
1597
1598         /*
1599          * See if we have a matching backend.  In the EXEC_BACKEND case, we
1600          * can no longer access the postmaster's own backend list, and must
1601          * rely on the duplicate array in shared memory.
1602          */
1603 #ifndef EXEC_BACKEND
1604         for (curr = DLGetHead(BackendList); curr; curr = DLGetSucc(curr))
1605         {
1606                 bp = (Backend *) DLE_VAL(curr);
1607 #else
1608         for (i = 0; i < NUM_BACKENDARRAY_ELEMS; i++)
1609         {
1610                 bp = (Backend *) &ShmemBackendArray[i];
1611 #endif
1612                 if (bp->pid == backendPID)
1613                 {
1614                         if (bp->cancel_key == cancelAuthCode)
1615                         {
1616                                 /* Found a match; signal that backend to cancel current op */
1617                                 ereport(DEBUG2,
1618                                                 (errmsg_internal("processing cancel request: sending SIGINT to process %d",
1619                                                                                  backendPID)));
1620                                 kill(bp->pid, SIGINT);
1621                         }
1622                         else
1623                                 /* Right PID, wrong key: no way, Jose */
1624                                 ereport(DEBUG2,
1625                                                 (errmsg_internal("bad key in cancel request for process %d",
1626                                                                                  backendPID)));
1627                         return;
1628                 }
1629         }
1630
1631         /* No matching backend */
1632         ereport(DEBUG2,
1633                         (errmsg_internal("bad pid in cancel request for process %d",
1634                                                          backendPID)));
1635 }
1636
1637 /*
1638  * canAcceptConnections --- check to see if database state allows connections.
1639  */
1640 static enum CAC_state
1641 canAcceptConnections(void)
1642 {
1643         /* Can't start backends when in startup/shutdown/recovery state. */
1644         if (Shutdown > NoShutdown)
1645                 return CAC_SHUTDOWN;
1646         if (StartupPID)
1647                 return CAC_STARTUP;
1648         if (FatalError)
1649                 return CAC_RECOVERY;
1650
1651         /*
1652          * Don't start too many children.
1653          *
1654          * We allow more connections than we can have backends here because some
1655          * might still be authenticating; they might fail auth, or some
1656          * existing backend might exit before the auth cycle is completed. The
1657          * exact MaxBackends limit is enforced when a new backend tries to
1658          * join the shared-inval backend array.
1659          */
1660         if (CountChildren() >= 2 * MaxBackends)
1661                 return CAC_TOOMANY;
1662
1663         return CAC_OK;
1664 }
1665
1666
1667 /*
1668  * ConnCreate -- create a local connection data structure
1669  */
1670 static Port *
1671 ConnCreate(int serverFd)
1672 {
1673         Port       *port;
1674
1675         if (!(port = (Port *) calloc(1, sizeof(Port))))
1676         {
1677                 ereport(LOG,
1678                                 (errcode(ERRCODE_OUT_OF_MEMORY),
1679                                  errmsg("out of memory")));
1680                 ExitPostmaster(1);
1681         }
1682
1683         if (StreamConnection(serverFd, port) != STATUS_OK)
1684         {
1685                 StreamClose(port->sock);
1686                 ConnFree(port);
1687                 port = NULL;
1688         }
1689         else
1690         {
1691                 /*
1692                  * Precompute password salt values to use for this connection.
1693                  * It's slightly annoying to do this long in advance of knowing
1694                  * whether we'll need 'em or not, but we must do the random()
1695                  * calls before we fork, not after.  Else the postmaster's random
1696                  * sequence won't get advanced, and all backends would end up
1697                  * using the same salt...
1698                  */
1699                 RandomSalt(port->cryptSalt, port->md5Salt);
1700         }
1701
1702         return port;
1703 }
1704
1705
1706 /*
1707  * ConnFree -- free a local connection data structure
1708  */
1709 static void
1710 ConnFree(Port *conn)
1711 {
1712 #ifdef USE_SSL
1713         secure_close(conn);
1714 #endif
1715         free(conn);
1716 }
1717
1718
1719 /*
1720  * ClosePostmasterPorts -- close all the postmaster's open sockets
1721  *
1722  * This is called during child process startup to release file descriptors
1723  * that are not needed by that child process.  The postmaster still has
1724  * them open, of course.
1725  *
1726  * Note: we pass am_syslogger as a boolean because we don't want to set
1727  * the global variable yet when this is called.
1728  */
1729 void
1730 ClosePostmasterPorts(bool am_syslogger)
1731 {
1732         int                     i;
1733
1734         /* Close the listen sockets */
1735         for (i = 0; i < MAXLISTEN; i++)
1736         {
1737                 if (ListenSocket[i] != -1)
1738                 {
1739                         StreamClose(ListenSocket[i]);
1740                         ListenSocket[i] = -1;
1741                 }
1742         }
1743
1744         /* If using syslogger, close the read side of the pipe */
1745         if (!am_syslogger)
1746         {
1747 #ifndef WIN32
1748                 if (syslogPipe[0] >= 0)
1749                         close(syslogPipe[0]);
1750                 syslogPipe[0] = -1;
1751 #else
1752                 if (syslogPipe[0])
1753                         CloseHandle(syslogPipe[0]);
1754                 syslogPipe[0] = 0;
1755 #endif
1756         }
1757 }
1758
1759
1760 /*
1761  * reset_shared -- reset shared memory and semaphores
1762  */
1763 static void
1764 reset_shared(unsigned short port)
1765 {
1766         /*
1767          * Create or re-create shared memory and semaphores.
1768          *
1769          * Note: in each "cycle of life" we will normally assign the same IPC
1770          * keys (if using SysV shmem and/or semas), since the port number is
1771          * used to determine IPC keys.  This helps ensure that we will clean
1772          * up dead IPC objects if the postmaster crashes and is restarted.
1773          */
1774         CreateSharedMemoryAndSemaphores(false, MaxBackends, port);
1775 }
1776
1777
1778 /*
1779  * SIGHUP -- reread config files, and tell children to do same
1780  */
1781 static void
1782 SIGHUP_handler(SIGNAL_ARGS)
1783 {
1784         int                     save_errno = errno;
1785
1786         PG_SETMASK(&BlockSig);
1787
1788         if (Shutdown <= SmartShutdown)
1789         {
1790                 ereport(LOG,
1791                          (errmsg("received SIGHUP, reloading configuration files")));
1792                 ProcessConfigFile(PGC_SIGHUP);
1793                 SignalChildren(SIGHUP);
1794                 if (BgWriterPID != 0)
1795                         kill(BgWriterPID, SIGHUP);
1796                 if (PgArchPID != 0)
1797                         kill(PgArchPID, SIGHUP);
1798                 if (SysLoggerPID != 0)
1799                         kill(SysLoggerPID, SIGHUP);
1800                 /* PgStatPID does not currently need SIGHUP */
1801                 load_hba();
1802                 load_ident();
1803
1804 #ifdef EXEC_BACKEND
1805                 /* Update the starting-point file for future children */
1806                 write_nondefault_variables(PGC_SIGHUP);
1807 #endif
1808         }
1809
1810         PG_SETMASK(&UnBlockSig);
1811
1812         errno = save_errno;
1813 }
1814
1815
1816 /*
1817  * pmdie -- signal handler for processing various postmaster signals.
1818  */
1819 static void
1820 pmdie(SIGNAL_ARGS)
1821 {
1822         int                     save_errno = errno;
1823
1824         PG_SETMASK(&BlockSig);
1825
1826         ereport(DEBUG2,
1827                         (errmsg_internal("postmaster received signal %d",
1828                                                          postgres_signal_arg)));
1829
1830         switch (postgres_signal_arg)
1831         {
1832                 case SIGTERM:
1833
1834                         /*
1835                          * Smart Shutdown:
1836                          *
1837                          * Wait for children to end their work, then shut down.
1838                          */
1839                         if (Shutdown >= SmartShutdown)
1840                                 break;
1841                         Shutdown = SmartShutdown;
1842                         ereport(LOG,
1843                                         (errmsg("received smart shutdown request")));
1844
1845                         if (DLGetHead(BackendList))
1846                                 break;                  /* let reaper() handle this */
1847
1848                         /*
1849                          * No children left. Begin shutdown of data base system.
1850                          */
1851                         if (StartupPID != 0 || FatalError)
1852                                 break;                  /* let reaper() handle this */
1853                         /* Start the bgwriter if not running */
1854                         if (BgWriterPID == 0)
1855                                 BgWriterPID = StartBackgroundWriter();
1856                         /* And tell it to shut down */
1857                         if (BgWriterPID != 0)
1858                                 kill(BgWriterPID, SIGUSR2);
1859                         /* Tell pgarch to shut down too; nothing left for it to do */
1860                         if (PgArchPID != 0)
1861                                 kill(PgArchPID, SIGQUIT);
1862                         /* Tell pgstat to shut down too; nothing left for it to do */
1863                         if (PgStatPID != 0)
1864                                 kill(PgStatPID, SIGQUIT);
1865                         break;
1866
1867                 case SIGINT:
1868
1869                         /*
1870                          * Fast Shutdown:
1871                          *
1872                          * Abort all children with SIGTERM (rollback active transactions
1873                          * and exit) and shut down when they are gone.
1874                          */
1875                         if (Shutdown >= FastShutdown)
1876                                 break;
1877                         Shutdown = FastShutdown;
1878                         ereport(LOG,
1879                                         (errmsg("received fast shutdown request")));
1880
1881                         if (DLGetHead(BackendList))
1882                         {
1883                                 if (!FatalError)
1884                                 {
1885                                         ereport(LOG,
1886                                                         (errmsg("aborting any active transactions")));
1887                                         SignalChildren(SIGTERM);
1888                                         /* reaper() does the rest */
1889                                 }
1890                                 break;
1891                         }
1892
1893                         /*
1894                          * No children left. Begin shutdown of data base system.
1895                          *
1896                          * Note: if we previously got SIGTERM then we may send SIGUSR2 to
1897                          * the bgwriter a second time here.  This should be harmless.
1898                          */
1899                         if (StartupPID != 0 || FatalError)
1900                                 break;                  /* let reaper() handle this */
1901                         /* Start the bgwriter if not running */
1902                         if (BgWriterPID == 0)
1903                                 BgWriterPID = StartBackgroundWriter();
1904                         /* And tell it to shut down */
1905                         if (BgWriterPID != 0)
1906                                 kill(BgWriterPID, SIGUSR2);
1907                         /* Tell pgarch to shut down too; nothing left for it to do */
1908                         if (PgArchPID != 0)
1909                                 kill(PgArchPID, SIGQUIT);
1910                         /* Tell pgstat to shut down too; nothing left for it to do */
1911                         if (PgStatPID != 0)
1912                                 kill(PgStatPID, SIGQUIT);
1913                         break;
1914
1915                 case SIGQUIT:
1916
1917                         /*
1918                          * Immediate Shutdown:
1919                          *
1920                          * abort all children with SIGQUIT and exit without attempt to
1921                          * properly shut down data base system.
1922                          */
1923                         ereport(LOG,
1924                                         (errmsg("received immediate shutdown request")));
1925                         if (StartupPID != 0)
1926                                 kill(StartupPID, SIGQUIT);
1927                         if (BgWriterPID != 0)
1928                                 kill(BgWriterPID, SIGQUIT);
1929                         if (PgArchPID != 0)
1930                                 kill(PgArchPID, SIGQUIT);
1931                         if (PgStatPID != 0)
1932                                 kill(PgStatPID, SIGQUIT);
1933                         if (DLGetHead(BackendList))
1934                                 SignalChildren(SIGQUIT);
1935                         ExitPostmaster(0);
1936                         break;
1937         }
1938
1939         PG_SETMASK(&UnBlockSig);
1940
1941         errno = save_errno;
1942 }
1943
1944 /*
1945  * Reaper -- signal handler to cleanup after a backend (child) dies.
1946  */
1947 static void
1948 reaper(SIGNAL_ARGS)
1949 {
1950         int                     save_errno = errno;
1951
1952 #ifdef HAVE_WAITPID
1953         int                     status;                 /* backend exit status */
1954
1955 #else
1956 #ifndef WIN32
1957         union wait      status;                 /* backend exit status */
1958 #endif
1959 #endif
1960         int                     exitstatus;
1961         int                     pid;                    /* process id of dead backend */
1962
1963         PG_SETMASK(&BlockSig);
1964
1965         ereport(DEBUG4,
1966                         (errmsg_internal("reaping dead processes")));
1967 #ifdef HAVE_WAITPID
1968         while ((pid = waitpid(-1, &status, WNOHANG)) > 0)
1969         {
1970                 exitstatus = status;
1971 #else
1972 #ifndef WIN32
1973         while ((pid = wait3(&status, WNOHANG, NULL)) > 0)
1974         {
1975                 exitstatus = status.w_status;
1976 #else
1977         while ((pid = win32_waitpid(&exitstatus)) > 0)
1978         {
1979                 /*
1980                  * We need to do this here, and not in CleanupBackend, since this
1981                  * is to be called on all children when we are done with them.
1982                  * Could move to LogChildExit, but that seems like asking for
1983                  * future trouble...
1984                  */
1985                 win32_RemoveChild(pid);
1986 #endif   /* WIN32 */
1987 #endif   /* HAVE_WAITPID */
1988
1989                 /*
1990                  * Check if this child was a startup process.
1991                  */
1992                 if (StartupPID != 0 && pid == StartupPID)
1993                 {
1994                         StartupPID = 0;
1995                         if (exitstatus != 0)
1996                         {
1997                                 LogChildExit(LOG, gettext("startup process"),
1998                                                          pid, exitstatus);
1999                                 ereport(LOG,
2000                                                 (errmsg("aborting startup due to startup process failure")));
2001                                 ExitPostmaster(1);
2002                         }
2003
2004                         /*
2005                          * Startup succeeded - we are done with system startup or
2006                          * recovery.
2007                          */
2008                         FatalError = false;
2009
2010                         /*
2011                          * Crank up the background writer.      It doesn't matter if this
2012                          * fails, we'll just try again later.
2013                          */
2014                         Assert(BgWriterPID == 0);
2015                         BgWriterPID = StartBackgroundWriter();
2016
2017                         /*
2018                          * Go to shutdown mode if a shutdown request was pending.
2019                          * Otherwise, try to start the archiver and stats collector
2020                          * too.
2021                          */
2022                         if (Shutdown > NoShutdown && BgWriterPID != 0)
2023                                 kill(BgWriterPID, SIGUSR2);
2024                         else if (Shutdown == NoShutdown)
2025                         {
2026                                 if (XLogArchivingActive() && PgArchPID == 0)
2027                                         PgArchPID = pgarch_start();
2028                                 if (PgStatPID == 0)
2029                                         PgStatPID = pgstat_start();
2030                         }
2031
2032                         continue;
2033                 }
2034
2035                 /*
2036                  * Was it the bgwriter?
2037                  */
2038                 if (BgWriterPID != 0 && pid == BgWriterPID)
2039                 {
2040                         BgWriterPID = 0;
2041                         if (exitstatus == 0 && Shutdown > NoShutdown &&
2042                                 !FatalError && !DLGetHead(BackendList))
2043                         {
2044                                 /*
2045                                  * Normal postmaster exit is here: we've seen normal exit
2046                                  * of the bgwriter after it's been told to shut down. We
2047                                  * expect that it wrote a shutdown checkpoint.  (If for
2048                                  * some reason it didn't, recovery will occur on next
2049                                  * postmaster start.)
2050                                  *
2051                                  * Note: we do not wait around for exit of the archiver or
2052                                  * stats processes.  They've been sent SIGQUIT by this
2053                                  * point, and in any case contain logic to commit
2054                                  * hara-kiri if they notice the postmaster is gone.
2055                                  */
2056                                 ExitPostmaster(0);
2057                         }
2058
2059                         /*
2060                          * Any unexpected exit of the bgwriter is treated as a crash.
2061                          */
2062                         HandleChildCrash(pid, exitstatus,
2063                                                          gettext("background writer process"));
2064                         continue;
2065                 }
2066
2067                 /*
2068                  * Was it the archiver?  If so, just try to start a new one; no
2069                  * need to force reset of the rest of the system.  (If fail, we'll
2070                  * try again in future cycles of the main loop.)
2071                  */
2072                 if (PgArchPID != 0 && pid == PgArchPID)
2073                 {
2074                         PgArchPID = 0;
2075                         if (exitstatus != 0)
2076                                 LogChildExit(LOG, gettext("archiver process"),
2077                                                          pid, exitstatus);
2078                         if (XLogArchivingActive() &&
2079                                 StartupPID == 0 && !FatalError && Shutdown == NoShutdown)
2080                                 PgArchPID = pgarch_start();
2081                         continue;
2082                 }
2083
2084                 /*
2085                  * Was it the statistics collector?  If so, just try to start a
2086                  * new one; no need to force reset of the rest of the system.  (If
2087                  * fail, we'll try again in future cycles of the main loop.)
2088                  */
2089                 if (PgStatPID != 0 && pid == PgStatPID)
2090                 {
2091                         PgStatPID = 0;
2092                         if (exitstatus != 0)
2093                                 LogChildExit(LOG, gettext("statistics collector process"),
2094                                                          pid, exitstatus);
2095                         if (StartupPID == 0 && !FatalError && Shutdown == NoShutdown)
2096                                 PgStatPID = pgstat_start();
2097                         continue;
2098                 }
2099
2100                 /* Was it the system logger? try to start a new one */
2101                 if (SysLoggerPID != 0 && pid == SysLoggerPID)
2102                 {
2103                         SysLoggerPID = 0;
2104                         /* for safety's sake, launch new logger *first* */
2105                         SysLoggerPID = SysLogger_Start();
2106                         if (exitstatus != 0)
2107                                 LogChildExit(LOG, gettext("system logger process"),
2108                                                          pid, exitstatus);
2109                         continue;
2110                 }
2111
2112                 /*
2113                  * Else do standard backend child cleanup.
2114                  */
2115                 CleanupBackend(pid, exitstatus);
2116         }                                                       /* loop over pending child-death reports */
2117
2118         if (FatalError)
2119         {
2120                 /*
2121                  * Wait for all important children to exit, then reset shmem and
2122                  * StartupDataBase.  (We can ignore the archiver and stats
2123                  * processes here since they are not connected to shmem.)
2124                  */
2125                 if (DLGetHead(BackendList) || StartupPID != 0 || BgWriterPID != 0)
2126                         goto reaper_done;
2127                 ereport(LOG,
2128                         (errmsg("all server processes terminated; reinitializing")));
2129
2130                 shmem_exit(0);
2131                 reset_shared(PostPortNumber);
2132
2133                 StartupPID = StartupDataBase();
2134
2135                 goto reaper_done;
2136         }
2137
2138         if (Shutdown > NoShutdown)
2139         {
2140                 if (DLGetHead(BackendList) || StartupPID != 0)
2141                         goto reaper_done;
2142                 /* Start the bgwriter if not running */
2143                 if (BgWriterPID == 0)
2144                         BgWriterPID = StartBackgroundWriter();
2145                 /* And tell it to shut down */
2146                 if (BgWriterPID != 0)
2147                         kill(BgWriterPID, SIGUSR2);
2148                 /* Tell pgarch to shut down too; nothing left for it to do */
2149                 if (PgArchPID != 0)
2150                         kill(PgArchPID, SIGQUIT);
2151                 /* Tell pgstat to shut down too; nothing left for it to do */
2152                 if (PgStatPID != 0)
2153                         kill(PgStatPID, SIGQUIT);
2154         }
2155
2156 reaper_done:
2157         PG_SETMASK(&UnBlockSig);
2158
2159         errno = save_errno;
2160 }
2161
2162
2163 /*
2164  * CleanupBackend -- cleanup after terminated backend.
2165  *
2166  * Remove all local state associated with backend.
2167  */
2168 static void
2169 CleanupBackend(int pid,
2170                            int exitstatus)      /* child's exit status. */
2171 {
2172         Dlelem     *curr;
2173
2174         LogChildExit(DEBUG2, gettext("server process"), pid, exitstatus);
2175
2176         /*
2177          * If a backend dies in an ugly way (i.e. exit status not 0) then we
2178          * must signal all other backends to quickdie.  If exit status is zero
2179          * we assume everything is hunky dory and simply remove the backend
2180          * from the active backend list.
2181          */
2182         if (exitstatus != 0)
2183         {
2184                 HandleChildCrash(pid, exitstatus, gettext("server process"));
2185                 return;
2186         }
2187
2188         for (curr = DLGetHead(BackendList); curr; curr = DLGetSucc(curr))
2189         {
2190                 Backend    *bp = (Backend *) DLE_VAL(curr);
2191
2192                 if (bp->pid == pid)
2193                 {
2194                         DLRemove(curr);
2195                         free(bp);
2196                         DLFreeElem(curr);
2197 #ifdef EXEC_BACKEND
2198                         ShmemBackendArrayRemove(pid);
2199 #endif
2200                         /* Tell the collector about backend termination */
2201                         pgstat_beterm(pid);
2202                         break;
2203                 }
2204         }
2205 }
2206
2207 /*
2208  * HandleChildCrash -- cleanup after failed backend or bgwriter.
2209  *
2210  * The objectives here are to clean up our local state about the child
2211  * process, and to signal all other remaining children to quickdie.
2212  */
2213 static void
2214 HandleChildCrash(int pid, int exitstatus, const char *procname)
2215 {
2216         Dlelem     *curr,
2217                            *next;
2218         Backend    *bp;
2219
2220         /*
2221          * Make log entry unless there was a previous crash (if so, nonzero
2222          * exit status is to be expected in SIGQUIT response; don't clutter
2223          * log)
2224          */
2225         if (!FatalError)
2226         {
2227                 LogChildExit(LOG, procname, pid, exitstatus);
2228                 ereport(LOG,
2229                           (errmsg("terminating any other active server processes")));
2230         }
2231
2232         /* Process regular backends */
2233         for (curr = DLGetHead(BackendList); curr; curr = next)
2234         {
2235                 next = DLGetSucc(curr);
2236                 bp = (Backend *) DLE_VAL(curr);
2237                 if (bp->pid == pid)
2238                 {
2239                         /*
2240                          * Found entry for freshly-dead backend, so remove it.
2241                          */
2242                         DLRemove(curr);
2243                         free(bp);
2244                         DLFreeElem(curr);
2245 #ifdef EXEC_BACKEND
2246                         ShmemBackendArrayRemove(pid);
2247 #endif
2248                         /* Tell the collector about backend termination */
2249                         pgstat_beterm(pid);
2250                         /* Keep looping so we can signal remaining backends */
2251                 }
2252                 else
2253                 {
2254                         /*
2255                          * This backend is still alive.  Unless we did so already,
2256                          * tell it to commit hara-kiri.
2257                          *
2258                          * SIGQUIT is the special signal that says exit without proc_exit
2259                          * and let the user know what's going on. But if SendStop is
2260                          * set (-s on command line), then we send SIGSTOP instead, so
2261                          * that we can get core dumps from all backends by hand.
2262                          */
2263                         if (!FatalError)
2264                         {
2265                                 ereport(DEBUG2,
2266                                                 (errmsg_internal("sending %s to process %d",
2267                                                                           (SendStop ? "SIGSTOP" : "SIGQUIT"),
2268                                                                                  (int) bp->pid)));
2269                                 kill(bp->pid, (SendStop ? SIGSTOP : SIGQUIT));
2270                         }
2271                 }
2272         }
2273
2274         /* Take care of the bgwriter too */
2275         if (pid == BgWriterPID)
2276                 BgWriterPID = 0;
2277         else if (BgWriterPID != 0 && !FatalError)
2278         {
2279                 ereport(DEBUG2,
2280                                 (errmsg_internal("sending %s to process %d",
2281                                                                  (SendStop ? "SIGSTOP" : "SIGQUIT"),
2282                                                                  (int) BgWriterPID)));
2283                 kill(BgWriterPID, (SendStop ? SIGSTOP : SIGQUIT));
2284         }
2285
2286         /* Force a power-cycle of the pgarch process too */
2287         /* (Shouldn't be necessary, but just for luck) */
2288         if (PgArchPID != 0 && !FatalError)
2289         {
2290                 ereport(DEBUG2,
2291                                 (errmsg_internal("sending %s to process %d",
2292                                                                  "SIGQUIT",
2293                                                                  (int) PgArchPID)));
2294                 kill(PgArchPID, SIGQUIT);
2295         }
2296
2297         /* Force a power-cycle of the pgstat processes too */
2298         /* (Shouldn't be necessary, but just for luck) */
2299         if (PgStatPID != 0 && !FatalError)
2300         {
2301                 ereport(DEBUG2,
2302                                 (errmsg_internal("sending %s to process %d",
2303                                                                  "SIGQUIT",
2304                                                                  (int) PgStatPID)));
2305                 kill(PgStatPID, SIGQUIT);
2306         }
2307
2308         /* We do NOT restart the syslogger */
2309
2310         FatalError = true;
2311 }
2312
2313 /*
2314  * Log the death of a child process.
2315  */
2316 static void
2317 LogChildExit(int lev, const char *procname, int pid, int exitstatus)
2318 {
2319         if (WIFEXITED(exitstatus))
2320                 ereport(lev,
2321
2322                 /*
2323                  * translator: %s is a noun phrase describing a child process,
2324                  * such as "server process"
2325                  */
2326                                 (errmsg("%s (PID %d) exited with exit code %d",
2327                                                 procname, pid, WEXITSTATUS(exitstatus))));
2328         else if (WIFSIGNALED(exitstatus))
2329                 ereport(lev,
2330
2331                 /*
2332                  * translator: %s is a noun phrase describing a child process,
2333                  * such as "server process"
2334                  */
2335                                 (errmsg("%s (PID %d) was terminated by signal %d",
2336                                                 procname, pid, WTERMSIG(exitstatus))));
2337         else
2338                 ereport(lev,
2339
2340                 /*
2341                  * translator: %s is a noun phrase describing a child process,
2342                  * such as "server process"
2343                  */
2344                                 (errmsg("%s (PID %d) exited with unexpected status %d",
2345                                                 procname, pid, exitstatus)));
2346 }
2347
2348 /*
2349  * Send a signal to all backend children (but NOT special children)
2350  */
2351 static void
2352 SignalChildren(int signal)
2353 {
2354         Dlelem     *curr;
2355
2356         for (curr = DLGetHead(BackendList); curr; curr = DLGetSucc(curr))
2357         {
2358                 Backend    *bp = (Backend *) DLE_VAL(curr);
2359
2360                 ereport(DEBUG4,
2361                                 (errmsg_internal("sending signal %d to process %d",
2362                                                                  signal, (int) bp->pid)));
2363                 kill(bp->pid, signal);
2364         }
2365 }
2366
2367 /*
2368  * BackendStartup -- start backend process
2369  *
2370  * returns: STATUS_ERROR if the fork failed, STATUS_OK otherwise.
2371  */
2372 static int
2373 BackendStartup(Port *port)
2374 {
2375         Backend    *bn;                         /* for backend cleanup */
2376         pid_t           pid;
2377
2378 #ifdef LINUX_PROFILE
2379         struct itimerval prof_itimer;
2380 #endif
2381
2382         /*
2383          * Compute the cancel key that will be assigned to this backend. The
2384          * backend will have its own copy in the forked-off process' value of
2385          * MyCancelKey, so that it can transmit the key to the frontend.
2386          */
2387         MyCancelKey = PostmasterRandom();
2388
2389         /*
2390          * Make room for backend data structure.  Better before the fork() so
2391          * we can handle failure cleanly.
2392          */
2393         bn = (Backend *) malloc(sizeof(Backend));
2394         if (!bn)
2395         {
2396                 ereport(LOG,
2397                                 (errcode(ERRCODE_OUT_OF_MEMORY),
2398                                  errmsg("out of memory")));
2399                 return STATUS_ERROR;
2400         }
2401
2402         /* Pass down canAcceptConnections state (kluge for EXEC_BACKEND case) */
2403         port->canAcceptConnections = canAcceptConnections();
2404
2405         /*
2406          * Flush stdio channels just before fork, to avoid double-output
2407          * problems. Ideally we'd use fflush(NULL) here, but there are still a
2408          * few non-ANSI stdio libraries out there (like SunOS 4.1.x) that
2409          * coredump if we do. Presently stdout and stderr are the only stdio
2410          * output channels used by the postmaster, so fflush'ing them should
2411          * be sufficient.
2412          */
2413         fflush(stdout);
2414         fflush(stderr);
2415
2416 #ifdef EXEC_BACKEND
2417
2418         pid = backend_forkexec(port);
2419
2420 #else                                                   /* !EXEC_BACKEND */
2421
2422 #ifdef LINUX_PROFILE
2423
2424         /*
2425          * Linux's fork() resets the profiling timer in the child process. If
2426          * we want to profile child processes then we need to save and restore
2427          * the timer setting.  This is a waste of time if not profiling,
2428          * however, so only do it if commanded by specific -DLINUX_PROFILE
2429          * switch.
2430          */
2431         getitimer(ITIMER_PROF, &prof_itimer);
2432 #endif
2433
2434 #ifdef __BEOS__
2435         /* Specific beos actions before backend startup */
2436         beos_before_backend_startup();
2437 #endif
2438
2439         pid = fork();
2440
2441         if (pid == 0)                           /* child */
2442         {
2443 #ifdef LINUX_PROFILE
2444                 setitimer(ITIMER_PROF, &prof_itimer, NULL);
2445 #endif
2446
2447 #ifdef __BEOS__
2448                 /* Specific beos backend startup actions */
2449                 beos_backend_startup();
2450 #endif
2451                 free(bn);
2452
2453                 proc_exit(BackendRun(port));
2454         }
2455 #endif   /* EXEC_BACKEND */
2456
2457         if (pid < 0)
2458         {
2459                 /* in parent, fork failed */
2460                 int                     save_errno = errno;
2461
2462 #ifdef __BEOS__
2463                 /* Specific beos backend startup actions */
2464                 beos_backend_startup_failed();
2465 #endif
2466                 free(bn);
2467                 errno = save_errno;
2468                 ereport(LOG,
2469                           (errmsg("could not fork new process for connection: %m")));
2470                 report_fork_failure_to_client(port, save_errno);
2471                 return STATUS_ERROR;
2472         }
2473
2474         /* in parent, successful fork */
2475         ereport(DEBUG2,
2476                         (errmsg_internal("forked new backend, pid=%d socket=%d",
2477                                                          (int) pid, port->sock)));
2478
2479         /*
2480          * Everything's been successful, it's safe to add this backend to our
2481          * list of backends.
2482          */
2483         bn->pid = pid;
2484         bn->cancel_key = MyCancelKey;
2485         DLAddHead(BackendList, DLNewElem(bn));
2486 #ifdef EXEC_BACKEND
2487         ShmemBackendArrayAdd(bn);
2488 #endif
2489
2490         return STATUS_OK;
2491 }
2492
2493 /*
2494  * Try to report backend fork() failure to client before we close the
2495  * connection.  Since we do not care to risk blocking the postmaster on
2496  * this connection, we set the connection to non-blocking and try only once.
2497  *
2498  * This is grungy special-purpose code; we cannot use backend libpq since
2499  * it's not up and running.
2500  */
2501 static void
2502 report_fork_failure_to_client(Port *port, int errnum)
2503 {
2504         char            buffer[1000];
2505
2506         /* Format the error message packet (always V2 protocol) */
2507         snprintf(buffer, sizeof(buffer), "E%s%s\n",
2508                          gettext("could not fork new process for connection: "),
2509                          strerror(errnum));
2510
2511         /* Set port to non-blocking.  Don't do send() if this fails */
2512         if (!set_noblock(port->sock))
2513                 return;
2514
2515         send(port->sock, buffer, strlen(buffer) + 1, 0);
2516 }
2517
2518
2519 /*
2520  * split_opts -- split a string of options and append it to an argv array
2521  *
2522  * NB: the string is destructively modified!
2523  *
2524  * Since no current POSTGRES arguments require any quoting characters,
2525  * we can use the simple-minded tactic of assuming each set of space-
2526  * delimited characters is a separate argv element.
2527  *
2528  * If you don't like that, well, we *used* to pass the whole option string
2529  * as ONE argument to execl(), which was even less intelligent...
2530  */
2531 static void
2532 split_opts(char **argv, int *argcp, char *s)
2533 {
2534         while (s && *s)
2535         {
2536                 while (isspace((unsigned char) *s))
2537                         ++s;
2538                 if (*s == '\0')
2539                         break;
2540                 argv[(*argcp)++] = s;
2541                 while (*s && !isspace((unsigned char) *s))
2542                         ++s;
2543                 if (*s)
2544                         *s++ = '\0';
2545         }
2546 }
2547
2548
2549 /*
2550  * BackendRun -- perform authentication, and if successful,
2551  *                              set up the backend's argument list and invoke PostgresMain()
2552  *
2553  * returns:
2554  *              Shouldn't return at all.
2555  *              If PostgresMain() fails, return status.
2556  */
2557 static int
2558 BackendRun(Port *port)
2559 {
2560         int                     status;
2561         char            remote_host[NI_MAXHOST];
2562         char            remote_port[NI_MAXSERV];
2563         char            remote_ps_data[NI_MAXHOST];
2564         char      **av;
2565         int                     maxac;
2566         int                     ac;
2567         char            protobuf[32];
2568         int                     i;
2569
2570         IsUnderPostmaster = true;       /* we are a postmaster subprocess now */
2571
2572         /*
2573          * Let's clean up ourselves as the postmaster child, and close the
2574          * postmaster's listen sockets
2575          */
2576         ClosePostmasterPorts(false);
2577
2578         /* We don't want the postmaster's proc_exit() handlers */
2579         on_exit_reset();
2580
2581         /*
2582          * Signal handlers setting is moved to tcop/postgres...
2583          */
2584
2585         /* Save port etc. for ps status */
2586         MyProcPort = port;
2587
2588         /* Reset MyProcPid to new backend's pid */
2589         MyProcPid = getpid();
2590
2591         /*
2592          * PreAuthDelay is a debugging aid for investigating problems in the
2593          * authentication cycle: it can be set in postgresql.conf to allow
2594          * time to attach to the newly-forked backend with a debugger. (See
2595          * also the -W backend switch, which we allow clients to pass through
2596          * PGOPTIONS, but it is not honored until after authentication.)
2597          */
2598         if (PreAuthDelay > 0)
2599                 pg_usleep(PreAuthDelay * 1000000L);
2600
2601         ClientAuthInProgress = true;    /* limit visibility of log messages */
2602
2603         /* save start time for end of session reporting */
2604         gettimeofday(&(port->session_start), NULL);
2605
2606         /* set these to empty in case they are needed before we set them up */
2607         port->remote_host = "";
2608         port->remote_port = "";
2609         port->commandTag = "";
2610
2611         /*
2612          * Initialize libpq and enable reporting of ereport errors to the
2613          * client. Must do this now because authentication uses libpq to send
2614          * messages.
2615          */
2616         pq_init();                                      /* initialize libpq to talk to client */
2617         whereToSendOutput = Remote; /* now safe to ereport to client */
2618
2619         /*
2620          * We arrange for a simple exit(0) if we receive SIGTERM or SIGQUIT
2621          * during any client authentication related communication. Otherwise
2622          * the postmaster cannot shutdown the database FAST or IMMED cleanly
2623          * if a buggy client blocks a backend during authentication.
2624          */
2625         pqsignal(SIGTERM, authdie);
2626         pqsignal(SIGQUIT, authdie);
2627         pqsignal(SIGALRM, authdie);
2628         PG_SETMASK(&AuthBlockSig);
2629
2630         /*
2631          * Get the remote host name and port for logging and status display.
2632          */
2633         remote_host[0] = '\0';
2634         remote_port[0] = '\0';
2635         if (getnameinfo_all(&port->raddr.addr, port->raddr.salen,
2636                                                 remote_host, sizeof(remote_host),
2637                                                 remote_port, sizeof(remote_port),
2638                                    (log_hostname ? 0 : NI_NUMERICHOST) | NI_NUMERICSERV))
2639         {
2640                 int                     ret = getnameinfo_all(&port->raddr.addr, port->raddr.salen,
2641                                                                                 remote_host, sizeof(remote_host),
2642                                                                                 remote_port, sizeof(remote_port),
2643                                                                                 NI_NUMERICHOST | NI_NUMERICSERV);
2644
2645                 if (ret)
2646                         ereport(WARNING,
2647                                         (errmsg("getnameinfo_all() failed: %s",
2648                                                         gai_strerror(ret))));
2649         }
2650         snprintf(remote_ps_data, sizeof(remote_ps_data),
2651                          remote_port[0] == '\0' ? "%s" : "%s(%s)",
2652                          remote_host, remote_port);
2653
2654         if (Log_connections)
2655                 ereport(LOG,
2656                                 (errmsg("connection received: host=%s port=%s",
2657                                                 remote_host, remote_port)));
2658
2659         /*
2660          * save remote_host and remote_port in port stucture
2661          */
2662         port->remote_host = strdup(remote_host);
2663         port->remote_port = strdup(remote_port);
2664
2665         /*
2666          * In EXEC_BACKEND case, we didn't inherit the contents of pg_hba.c
2667          * etcetera from the postmaster, and have to load them ourselves.
2668          * Build the PostmasterContext (which didn't exist before, in this
2669          * process) to contain the data.
2670          *
2671          * FIXME: [fork/exec] Ugh.      Is there a way around this overhead?
2672          */
2673 #ifdef EXEC_BACKEND
2674         Assert(PostmasterContext == NULL);
2675         PostmasterContext = AllocSetContextCreate(TopMemoryContext,
2676                                                                                           "Postmaster",
2677                                                                                           ALLOCSET_DEFAULT_MINSIZE,
2678                                                                                           ALLOCSET_DEFAULT_INITSIZE,
2679                                                                                           ALLOCSET_DEFAULT_MAXSIZE);
2680         MemoryContextSwitchTo(PostmasterContext);
2681
2682         load_hba();
2683         load_ident();
2684         load_user();
2685         load_group();
2686 #endif
2687
2688         /*
2689          * Ready to begin client interaction.  We will give up and exit(0)
2690          * after a time delay, so that a broken client can't hog a connection
2691          * indefinitely.  PreAuthDelay doesn't count against the time limit.
2692          */
2693         if (!enable_sig_alarm(AuthenticationTimeout * 1000, false))
2694                 elog(FATAL, "could not set timer for authorization timeout");
2695
2696         /*
2697          * Receive the startup packet (which might turn out to be a cancel
2698          * request packet).
2699          */
2700         status = ProcessStartupPacket(port, false);
2701
2702         if (status != STATUS_OK)
2703                 proc_exit(0);
2704
2705         /*
2706          * Now that we have the user and database name, we can set the process
2707          * title for ps.  It's good to do this as early as possible in
2708          * startup.
2709          */
2710         init_ps_display(port->user_name, port->database_name, remote_ps_data);
2711         set_ps_display("authentication");
2712
2713         /*
2714          * Now perform authentication exchange.
2715          */
2716         ClientAuthentication(port); /* might not return, if failure */
2717
2718         /*
2719          * Done with authentication.  Disable timeout, and prevent
2720          * SIGTERM/SIGQUIT again until backend startup is complete.
2721          */
2722         if (!disable_sig_alarm(false))
2723                 elog(FATAL, "could not disable timer for authorization timeout");
2724         PG_SETMASK(&BlockSig);
2725
2726         if (Log_connections)
2727                 ereport(LOG,
2728                                 (errmsg("connection authorized: user=%s database=%s",
2729                                                 port->user_name, port->database_name)));
2730
2731         /*
2732          * Don't want backend to be able to see the postmaster random number
2733          * generator state.  We have to clobber the static random_seed *and*
2734          * start a new random sequence in the random() library function.
2735          */
2736         random_seed = 0;
2737         srandom((unsigned int) (MyProcPid ^ port->session_start.tv_usec));
2738
2739         /* ----------------
2740          * Now, build the argv vector that will be given to PostgresMain.
2741          *
2742          * The layout of the command line is
2743          *              postgres [secure switches] -p databasename [insecure switches]
2744          * where the switches after -p come from the client request.
2745          *
2746          * The maximum possible number of commandline arguments that could come
2747          * from ExtraOptions or port->cmdline_options is (strlen + 1) / 2; see
2748          * split_opts().
2749          * ----------------
2750          */
2751         maxac = 10;                                     /* for fixed args supplied below */
2752         maxac += (strlen(ExtraOptions) + 1) / 2;
2753         if (port->cmdline_options)
2754                 maxac += (strlen(port->cmdline_options) + 1) / 2;
2755
2756         av = (char **) MemoryContextAlloc(TopMemoryContext,
2757                                                                           maxac * sizeof(char *));
2758         ac = 0;
2759
2760         av[ac++] = "postgres";
2761
2762         /*
2763          * Pass any backend switches specified with -o in the postmaster's own
2764          * command line.  We assume these are secure.  (It's OK to mangle
2765          * ExtraOptions now, since we're safely inside a subprocess.)
2766          */
2767         split_opts(av, &ac, ExtraOptions);
2768
2769         /* Tell the backend what protocol the frontend is using. */
2770         snprintf(protobuf, sizeof(protobuf), "-v%u", port->proto);
2771         av[ac++] = protobuf;
2772
2773         /*
2774          * Tell the backend it is being called from the postmaster, and which
2775          * database to use.  -p marks the end of secure switches.
2776          */
2777         av[ac++] = "-p";
2778         av[ac++] = port->database_name;
2779
2780         /*
2781          * Pass the (insecure) option switches from the connection request.
2782          * (It's OK to mangle port->cmdline_options now.)
2783          */
2784         if (port->cmdline_options)
2785                 split_opts(av, &ac, port->cmdline_options);
2786
2787         av[ac] = NULL;
2788
2789         Assert(ac < maxac);
2790
2791         /*
2792          * Release postmaster's working memory context so that backend can
2793          * recycle the space.  Note this does not trash *MyProcPort, because
2794          * ConnCreate() allocated that space with malloc() ... else we'd need
2795          * to copy the Port data here.  Also, subsidiary data such as the
2796          * username isn't lost either; see ProcessStartupPacket().
2797          */
2798         MemoryContextSwitchTo(TopMemoryContext);
2799         MemoryContextDelete(PostmasterContext);
2800         PostmasterContext = NULL;
2801
2802         /*
2803          * Debug: print arguments being passed to backend
2804          */
2805         ereport(DEBUG3,
2806                         (errmsg_internal("%s child[%d]: starting with (",
2807                                                          progname, (int)getpid())));
2808         for (i = 0; i < ac; ++i)
2809                 ereport(DEBUG3,
2810                                 (errmsg_internal("\t%s", av[i])));
2811         ereport(DEBUG3,
2812                         (errmsg_internal(")")));
2813
2814         ClientAuthInProgress = false;           /* client_min_messages is active
2815                                                                                  * now */
2816
2817         return (PostgresMain(ac, av, port->user_name));
2818 }
2819
2820
2821 #ifdef EXEC_BACKEND
2822
2823 /*
2824  * postmaster_forkexec -- fork and exec a postmaster subprocess
2825  *
2826  * The caller must have set up the argv array already, except for argv[2]
2827  * which will be filled with the name of the temp variable file.
2828  *
2829  * Returns the child process PID, or -1 on fork failure (a suitable error
2830  * message has been logged on failure).
2831  *
2832  * All uses of this routine will dispatch to SubPostmasterMain in the
2833  * child process.
2834  */
2835 pid_t
2836 postmaster_forkexec(int argc, char *argv[])
2837 {
2838         Port            port;
2839
2840         /* This entry point passes dummy values for the Port variables */
2841         memset(&port, 0, sizeof(port));
2842         return internal_forkexec(argc, argv, &port);
2843 }
2844
2845 /*
2846  * backend_forkexec -- fork/exec off a backend process
2847  *
2848  * returns the pid of the fork/exec'd process, or -1 on failure
2849  */
2850 static pid_t
2851 backend_forkexec(Port *port)
2852 {
2853         char       *av[4];
2854         int                     ac = 0;
2855
2856         av[ac++] = "postgres";
2857         av[ac++] = "-forkbackend";
2858         av[ac++] = NULL;                        /* filled in by internal_forkexec */
2859
2860         av[ac] = NULL;
2861         Assert(ac < lengthof(av));
2862
2863         return internal_forkexec(ac, av, port);
2864 }
2865
2866 #ifndef WIN32
2867
2868 /*
2869  * internal_forkexec non-win32 implementation
2870  *
2871  * - writes out backend variables to the parameter file
2872  * - fork():s, and then exec():s the child process
2873  */
2874 static pid_t
2875 internal_forkexec(int argc, char *argv[], Port *port)
2876 {
2877         static unsigned long tmpBackendFileNum = 0;
2878         pid_t           pid;
2879         char            tmpfilename[MAXPGPATH];
2880         BackendParameters param;
2881         FILE       *fp;
2882
2883         if (!save_backend_variables(&param, port))
2884                 return -1;                              /* log made by save_backend_variables */
2885
2886         /* Calculate name for temp file */
2887         Assert(DataDir);
2888         snprintf(tmpfilename, MAXPGPATH, "%s/%s/%s.backend_var.%d.%lu",
2889                          DataDir, PG_TEMP_FILES_DIR, PG_TEMP_FILE_PREFIX,
2890                          MyProcPid, ++tmpBackendFileNum);
2891
2892         /* Open file */
2893         fp = AllocateFile(tmpfilename, PG_BINARY_W);
2894         if (!fp)
2895         {
2896                 /* As per OpenTemporaryFile... */
2897                 char            dirname[MAXPGPATH];
2898
2899                 snprintf(dirname, MAXPGPATH, "%s/%s", DataDir, PG_TEMP_FILES_DIR);
2900                 mkdir(dirname, S_IRWXU);
2901
2902                 fp = AllocateFile(tmpfilename, PG_BINARY_W);
2903                 if (!fp)
2904                 {
2905                         ereport(LOG,
2906                                         (errcode_for_file_access(),
2907                                          errmsg("could not create file \"%s\": %m",
2908                                                         tmpfilename)));
2909                         return -1;
2910                 }
2911         }
2912
2913         if (fwrite(&param, sizeof(param), 1, fp) != 1)
2914         {
2915                 ereport(LOG,
2916                                 (errcode_for_file_access(),
2917                                  errmsg("could not write to file \"%s\": %m", tmpfilename)));
2918                 FreeFile(fp);
2919                 return -1;
2920         }
2921
2922         /* Release file */
2923         if (FreeFile(fp))
2924         {
2925                 ereport(LOG,
2926                                 (errcode_for_file_access(),
2927                                  errmsg("could not write to file \"%s\": %m", tmpfilename)));
2928                 return -1;
2929         }
2930
2931         /* Make sure caller set up argv properly */
2932         Assert(argc >= 3);
2933         Assert(argv[argc] == NULL);
2934         Assert(strncmp(argv[1], "-fork", 5) == 0);
2935         Assert(argv[2] == NULL);
2936
2937         /* Insert temp file name after -fork argument */
2938         argv[2] = tmpfilename;
2939
2940         /* Fire off execv in child */
2941         if ((pid = fork()) == 0)
2942         {
2943                 if (execv(postgres_exec_path, argv) < 0)
2944                 {
2945                         ereport(LOG,
2946                                         (errmsg("could not execute server process \"%s\": %m",
2947                                                         postgres_exec_path)));
2948                         /* We're already in the child process here, can't return */
2949                         exit(1);
2950                 }
2951         }
2952
2953         return pid;                                     /* Parent returns pid, or -1 on fork
2954                                                                  * failure */
2955 }
2956
2957 #else /* WIN32 */
2958
2959 /*
2960  * internal_forkexec win32 implementation
2961  *
2962  * - starts backend using CreateProcess(), in suspended state
2963  * - writes out backend variables to the parameter file
2964  *  - during this, duplicates handles and sockets required for
2965  *    inheritance into the new process
2966  * - resumes execution of the new process once the backend parameter
2967  *   file is complete.
2968  */
2969 static pid_t
2970 internal_forkexec(int argc, char *argv[], Port *port)
2971 {
2972         STARTUPINFO si;
2973         PROCESS_INFORMATION pi;
2974         int                     i;
2975         int                     j;
2976         char            cmdLine[MAXPGPATH * 2];
2977         HANDLE          childHandleCopy;
2978         HANDLE          waiterThread;
2979         HANDLE      paramHandle;
2980         BackendParameters *param;
2981         SECURITY_ATTRIBUTES sa;
2982         char        paramHandleStr[32];
2983
2984         /* Make sure caller set up argv properly */
2985         Assert(argc >= 3);
2986         Assert(argv[argc] == NULL);
2987         Assert(strncmp(argv[1], "-fork", 5) == 0);
2988         Assert(argv[2] == NULL);
2989
2990         /* Set up shared memory for parameter passing */
2991         ZeroMemory(&sa,sizeof(sa));
2992         sa.nLength = sizeof(sa);
2993         sa.bInheritHandle = TRUE;
2994         paramHandle = CreateFileMapping(INVALID_HANDLE_VALUE,
2995                                                                         &sa,
2996                                                                         PAGE_READWRITE,
2997                                                                         0,
2998                                                                         sizeof(BackendParameters),
2999                                                                         NULL);
3000         if (paramHandle == INVALID_HANDLE_VALUE)
3001         {
3002                 elog(LOG, "could not create backend parameter file mapping: error code %d",
3003                          (int) GetLastError());
3004                 return -1;
3005         }
3006
3007         param = MapViewOfFile(paramHandle, FILE_MAP_WRITE, 0, 0, sizeof(BackendParameters));
3008         if (!param)
3009         {
3010                 elog(LOG, "could not map backend parameter memory: error code %d",
3011                          (int) GetLastError());
3012                 CloseHandle(paramHandle);
3013                 return -1;
3014         }
3015
3016         /* Insert temp file name after -fork argument */
3017         sprintf(paramHandleStr, "%lu", (DWORD)paramHandle);
3018         argv[2] = paramHandleStr;
3019
3020         /* Format the cmd line */
3021         cmdLine[sizeof(cmdLine) - 1] = '\0';
3022         cmdLine[sizeof(cmdLine) - 2] = '\0';
3023         snprintf(cmdLine, sizeof(cmdLine) - 1, "\"%s\"", postgres_exec_path);
3024         i = 0;
3025         while (argv[++i] != NULL)
3026         {
3027                 j = strlen(cmdLine);
3028                 snprintf(cmdLine + j, sizeof(cmdLine) - 1 - j, " \"%s\"", argv[i]);
3029         }
3030         if (cmdLine[sizeof(cmdLine) - 2] != '\0')
3031         {
3032                 elog(LOG, "subprocess command line too long");
3033                 return -1;
3034         }
3035
3036         memset(&pi, 0, sizeof(pi));
3037         memset(&si, 0, sizeof(si));
3038         si.cb = sizeof(si);
3039         /*
3040          * Create the subprocess in a suspended state. This will be resumed
3041          * later, once we have written out the parameter file.
3042          */
3043         if (!CreateProcess(NULL, cmdLine, NULL, NULL, TRUE, CREATE_SUSPENDED,
3044                                            NULL, NULL, &si, &pi))
3045         {
3046                 elog(LOG, "CreateProcess call failed: %m (error code %d)",
3047                          (int) GetLastError());
3048                 return -1;
3049         }
3050
3051         if (!save_backend_variables(param, port, pi.hProcess, pi.dwProcessId))
3052         {
3053                 /*
3054                  * log made by save_backend_variables, but we have to clean
3055                  * up the mess with the half-started process
3056                  */
3057                 if (!TerminateProcess(pi.hProcess, 255))
3058                         ereport(ERROR,
3059                                         (errmsg_internal("could not terminate unstarted process: error code %d",
3060                                                                          (int) GetLastError())));
3061                 CloseHandle(pi.hProcess);
3062                 CloseHandle(pi.hThread);
3063                 return -1;                              /* log made by save_backend_variables */
3064         }
3065
3066         /* Drop the shared memory that is now inherited to the backend */
3067         if (!UnmapViewOfFile(param))
3068                 elog(LOG, "could not unmap view of backend parameter file: error code %d",
3069                          (int) GetLastError());
3070         if (!CloseHandle(paramHandle))
3071                 elog(LOG, "could not close handle to backend parameter file: error code %d",
3072                          (int) GetLastError());
3073
3074         /*
3075          * Now that the backend variables are written out, we start the
3076          * child thread so it can start initializing while we set up
3077          * the rest of the parent state.
3078          */
3079         if (ResumeThread(pi.hThread) == -1)
3080         {
3081                 if (!TerminateProcess(pi.hProcess, 255))
3082                 {
3083                         ereport(ERROR,
3084                                         (errmsg_internal("could not terminate unstartable process: error code %d",
3085                                                                          (int) GetLastError())));
3086                         CloseHandle(pi.hProcess);
3087                         CloseHandle(pi.hThread);
3088                         return -1;
3089                 }
3090                 CloseHandle(pi.hProcess);
3091                 CloseHandle(pi.hThread);
3092                 ereport(ERROR,
3093                                 (errmsg_internal("could not resume thread of unstarted process: error code %d",
3094                                                                  (int) GetLastError())));
3095                 return -1;
3096         }
3097
3098         if (!IsUnderPostmaster)
3099         {
3100                 /* We are the Postmaster creating a child... */
3101                 win32_AddChild(pi.dwProcessId, pi.hProcess);
3102         }
3103
3104         /* Set up the thread to handle the SIGCHLD for this process */
3105         if (DuplicateHandle(GetCurrentProcess(),
3106                                                 pi.hProcess,
3107                                                 GetCurrentProcess(),
3108                                                 &childHandleCopy,
3109                                                 0,
3110                                                 FALSE,
3111                                                 DUPLICATE_SAME_ACCESS) == 0)
3112                 ereport(FATAL,
3113                                 (errmsg_internal("could not duplicate child handle: error code %d",
3114                                                                  (int) GetLastError())));
3115
3116         waiterThread = CreateThread(NULL, 64 * 1024, win32_sigchld_waiter,
3117                                                                 (LPVOID) childHandleCopy, 0, NULL);
3118         if (!waiterThread)
3119                 ereport(FATAL,
3120                    (errmsg_internal("could not create sigchld waiter thread: error code %d",
3121                                                         (int) GetLastError())));
3122         CloseHandle(waiterThread);
3123
3124         if (IsUnderPostmaster)
3125                 CloseHandle(pi.hProcess);
3126         CloseHandle(pi.hThread);
3127
3128         return pi.dwProcessId;
3129 }
3130
3131 #endif /* WIN32 */
3132
3133
3134 /*
3135  * SubPostmasterMain -- Get the fork/exec'd process into a state equivalent
3136  *                      to what it would be if we'd simply forked on Unix, and then
3137  *                      dispatch to the appropriate place.
3138  *
3139  * The first two command line arguments are expected to be "-forkFOO"
3140  * (where FOO indicates which postmaster child we are to become), and
3141  * the name of a variables file that we can read to load data that would
3142  * have been inherited by fork() on Unix.  Remaining arguments go to the
3143  * subprocess FooMain() routine.
3144  */
3145 int
3146 SubPostmasterMain(int argc, char *argv[])
3147 {
3148         Port            port;
3149
3150         /* Do this sooner rather than later... */
3151         IsUnderPostmaster = true;       /* we are a postmaster subprocess now */
3152
3153         MyProcPid = getpid();           /* reset MyProcPid */
3154
3155         /* Read in file-based context */
3156         memset(&port, 0, sizeof(Port));
3157         read_backend_variables(argv[2], &port);
3158
3159         /*
3160          * Start our win32 signal implementation. This has to be done
3161          * after we read the backend variables, because we need to pick
3162          * up the signal pipe from the parent process.
3163          */
3164 #ifdef WIN32
3165         pgwin32_signal_initialize();
3166 #endif
3167
3168         /* In EXEC_BACKEND case we will not have inherited these settings */
3169         IsPostmasterEnvironment = true;
3170         whereToSendOutput = None;
3171         pqinitmask();
3172         PG_SETMASK(&BlockSig);
3173
3174         /* Setup essential subsystems */
3175         MemoryContextInit();
3176         InitializeGUCOptions();
3177
3178         /* Check we got appropriate args */
3179         if (argc < 3)
3180                 elog(FATAL, "invalid subpostmaster invocation");
3181
3182         /* Read in remaining GUC variables */
3183         read_nondefault_variables();
3184
3185         /* Run backend or appropriate child */
3186         if (strcmp(argv[1], "-forkbackend") == 0)
3187         {
3188                 /* BackendRun will close sockets */
3189
3190                 /* Attach process to shared segments */
3191                 CreateSharedMemoryAndSemaphores(false, MaxBackends, 0);
3192
3193 #ifdef USE_SSL
3194                 /*
3195                  *      Need to reinitialize the SSL library in the backend,
3196                  *      since the context structures contain function pointers
3197                  *      and cannot be passed through the parameter file.
3198                  */
3199                 if (EnableSSL)
3200                         secure_initialize();
3201 #endif
3202
3203                 Assert(argc == 3);              /* shouldn't be any more args */
3204                 proc_exit(BackendRun(&port));
3205         }
3206         if (strcmp(argv[1], "-forkboot") == 0)
3207         {
3208                 /* Close the postmaster's sockets */
3209                 ClosePostmasterPorts(false);
3210
3211                 /* Attach process to shared segments */
3212                 CreateSharedMemoryAndSemaphores(false, MaxBackends, 0);
3213
3214                 BootstrapMain(argc - 2, argv + 2);
3215                 proc_exit(0);
3216         }
3217         if (strcmp(argv[1], "-forkarch") == 0)
3218         {
3219                 /* Close the postmaster's sockets */
3220                 ClosePostmasterPorts(false);
3221
3222                 /* Do not want to attach to shared memory */
3223
3224                 PgArchiverMain(argc, argv);
3225                 proc_exit(0);
3226         }
3227         if (strcmp(argv[1], "-forkbuf") == 0)
3228         {
3229                 /* Close the postmaster's sockets */
3230                 ClosePostmasterPorts(false);
3231
3232                 /* Do not want to attach to shared memory */
3233
3234                 PgstatBufferMain(argc, argv);
3235                 proc_exit(0);
3236         }
3237         if (strcmp(argv[1], "-forkcol") == 0)
3238         {
3239                 /*
3240                  * Do NOT close postmaster sockets here, because we are forking
3241                  * from pgstat buffer process, which already did it.
3242                  */
3243
3244                 /* Do not want to attach to shared memory */
3245
3246                 PgstatCollectorMain(argc, argv);
3247                 proc_exit(0);
3248         }
3249         if (strcmp(argv[1], "-forklog") == 0)
3250         {
3251                 /* Close the postmaster's sockets */
3252                 ClosePostmasterPorts(true);
3253
3254                 /* Do not want to attach to shared memory */
3255
3256                 SysLoggerMain(argc, argv);
3257                 proc_exit(0);
3258         }
3259
3260         return 1;                                       /* shouldn't get here */
3261 }
3262 #endif   /* EXEC_BACKEND */
3263
3264
3265 /*
3266  * ExitPostmaster -- cleanup
3267  *
3268  * Do NOT call exit() directly --- always go through here!
3269  */
3270 static void
3271 ExitPostmaster(int status)
3272 {
3273         /* should cleanup shared memory and kill all backends */
3274
3275         /*
3276          * Not sure of the semantics here.      When the Postmaster dies, should
3277          * the backends all be killed? probably not.
3278          *
3279          * MUST         -- vadim 05-10-1999
3280          */
3281
3282         proc_exit(status);
3283 }
3284
3285 /*
3286  * sigusr1_handler - handle signal conditions from child processes
3287  */
3288 static void
3289 sigusr1_handler(SIGNAL_ARGS)
3290 {
3291         int                     save_errno = errno;
3292
3293         PG_SETMASK(&BlockSig);
3294
3295         if (CheckPostmasterSignal(PMSIGNAL_PASSWORD_CHANGE))
3296         {
3297                 /*
3298                  * Password or group file has changed.
3299                  */
3300                 load_user();
3301                 load_group();
3302         }
3303
3304         if (CheckPostmasterSignal(PMSIGNAL_WAKEN_CHILDREN))
3305         {
3306                 /*
3307                  * Send SIGUSR1 to all children (triggers
3308                  * CatchupInterruptHandler). See storage/ipc/sinval[adt].c for the
3309                  * use of this.
3310                  */
3311                 if (Shutdown <= SmartShutdown)
3312                         SignalChildren(SIGUSR1);
3313         }
3314
3315         if (PgArchPID != 0 && Shutdown == NoShutdown)
3316         {
3317                 if (CheckPostmasterSignal(PMSIGNAL_WAKEN_ARCHIVER))
3318                 {
3319                         /*
3320                          * Send SIGUSR1 to archiver process, to wake it up and begin
3321                          * archiving next transaction log file.
3322                          */
3323                         kill(PgArchPID, SIGUSR1);
3324                 }
3325         }
3326
3327         PG_SETMASK(&UnBlockSig);
3328
3329         errno = save_errno;
3330 }
3331
3332
3333 /*
3334  * Dummy signal handler
3335  *
3336  * We use this for signals that we don't actually use in the postmaster,
3337  * but we do use in backends.  If we were to SIG_IGN such signals in the
3338  * postmaster, then a newly started backend might drop a signal that arrives
3339  * before it's able to reconfigure its signal processing.  (See notes in
3340  * tcop/postgres.c.)
3341  */
3342 static void
3343 dummy_handler(SIGNAL_ARGS)
3344 {
3345 }
3346
3347
3348 /*
3349  * CharRemap: given an int in range 0..61, produce textual encoding of it
3350  * per crypt(3) conventions.
3351  */
3352 static char
3353 CharRemap(long ch)
3354 {
3355         if (ch < 0)
3356                 ch = -ch;
3357         ch = ch % 62;
3358
3359         if (ch < 26)
3360                 return 'A' + ch;
3361
3362         ch -= 26;
3363         if (ch < 26)
3364                 return 'a' + ch;
3365
3366         ch -= 26;
3367         return '0' + ch;
3368 }
3369
3370 /*
3371  * RandomSalt
3372  */
3373 static void
3374 RandomSalt(char *cryptSalt, char *md5Salt)
3375 {
3376         long            rand = PostmasterRandom();
3377
3378         cryptSalt[0] = CharRemap(rand % 62);
3379         cryptSalt[1] = CharRemap(rand / 62);
3380
3381         /*
3382          * It's okay to reuse the first random value for one of the MD5 salt
3383          * bytes, since only one of the two salts will be sent to the client.
3384          * After that we need to compute more random bits.
3385          *
3386          * We use % 255, sacrificing one possible byte value, so as to ensure
3387          * that all bits of the random() value participate in the result.
3388          * While at it, add one to avoid generating any null bytes.
3389          */
3390         md5Salt[0] = (rand % 255) + 1;
3391         rand = PostmasterRandom();
3392         md5Salt[1] = (rand % 255) + 1;
3393         rand = PostmasterRandom();
3394         md5Salt[2] = (rand % 255) + 1;
3395         rand = PostmasterRandom();
3396         md5Salt[3] = (rand % 255) + 1;
3397 }
3398
3399 /*
3400  * PostmasterRandom
3401  */
3402 static long
3403 PostmasterRandom(void)
3404 {
3405         static bool initialized = false;
3406
3407         if (!initialized)
3408         {
3409                 Assert(random_seed != 0);
3410                 srandom(random_seed);
3411                 initialized = true;
3412         }
3413
3414         return random();
3415 }
3416
3417 /*
3418  * Count up number of child processes (regular backends only)
3419  */
3420 static int
3421 CountChildren(void)
3422 {
3423         Dlelem     *curr;
3424         int                     cnt = 0;
3425
3426         for (curr = DLGetHead(BackendList); curr; curr = DLGetSucc(curr))
3427                 cnt++;
3428         return cnt;
3429 }
3430
3431
3432 /*
3433  * StartChildProcess -- start a non-backend child process for the postmaster
3434  *
3435  * xlog determines what kind of child will be started.  All child types
3436  * initially go to BootstrapMain, which will handle common setup.
3437  *
3438  * Return value of StartChildProcess is subprocess' PID, or 0 if failed
3439  * to start subprocess.
3440  */
3441 static pid_t
3442 StartChildProcess(int xlop)
3443 {
3444         pid_t           pid;
3445         char       *av[10];
3446         int                     ac = 0;
3447         char            xlbuf[32];
3448
3449 #ifdef LINUX_PROFILE
3450         struct itimerval prof_itimer;
3451 #endif
3452
3453         /*
3454          * Set up command-line arguments for subprocess
3455          */
3456         av[ac++] = "postgres";
3457
3458 #ifdef EXEC_BACKEND
3459         av[ac++] = "-forkboot";
3460         av[ac++] = NULL;                        /* filled in by postmaster_forkexec */
3461 #endif
3462
3463         snprintf(xlbuf, sizeof(xlbuf), "-x%d", xlop);
3464         av[ac++] = xlbuf;
3465
3466         av[ac++] = "-p";
3467         av[ac++] = "template1";
3468
3469         av[ac] = NULL;
3470         Assert(ac < lengthof(av));
3471
3472         /*
3473          * Flush stdio channels (see comments in BackendStartup)
3474          */
3475         fflush(stdout);
3476         fflush(stderr);
3477
3478 #ifdef EXEC_BACKEND
3479
3480         pid = postmaster_forkexec(ac, av);
3481
3482 #else                                                   /* !EXEC_BACKEND */
3483
3484 #ifdef LINUX_PROFILE
3485         /* see comments in BackendStartup */
3486         getitimer(ITIMER_PROF, &prof_itimer);
3487 #endif
3488
3489 #ifdef __BEOS__
3490         /* Specific beos actions before backend startup */
3491         beos_before_backend_startup();
3492 #endif
3493
3494         pid = fork();
3495
3496         if (pid == 0)                           /* child */
3497         {
3498 #ifdef LINUX_PROFILE
3499                 setitimer(ITIMER_PROF, &prof_itimer, NULL);
3500 #endif
3501
3502 #ifdef __BEOS__
3503                 /* Specific beos actions after backend startup */
3504                 beos_backend_startup();
3505 #endif
3506
3507                 IsUnderPostmaster = true;               /* we are a postmaster subprocess
3508                                                                                  * now */
3509
3510                 /* Close the postmaster's sockets */
3511                 ClosePostmasterPorts(false);
3512
3513                 /* Lose the postmaster's on-exit routines and port connections */
3514                 on_exit_reset();
3515
3516                 /* Release postmaster's working memory context */
3517                 MemoryContextSwitchTo(TopMemoryContext);
3518                 MemoryContextDelete(PostmasterContext);
3519                 PostmasterContext = NULL;
3520
3521                 BootstrapMain(ac, av);
3522                 ExitPostmaster(0);
3523         }
3524 #endif   /* EXEC_BACKEND */
3525
3526         if (pid < 0)
3527         {
3528                 /* in parent, fork failed */
3529                 int                     save_errno = errno;
3530
3531 #ifdef __BEOS__
3532                 /* Specific beos actions before backend startup */
3533                 beos_backend_startup_failed();
3534 #endif
3535                 errno = save_errno;
3536                 switch (xlop)
3537                 {
3538                         case BS_XLOG_STARTUP:
3539                                 ereport(LOG,
3540                                                 (errmsg("could not fork startup process: %m")));
3541                                 break;
3542                         case BS_XLOG_BGWRITER:
3543                                 ereport(LOG,
3544                                 (errmsg("could not fork background writer process: %m")));
3545                                 break;
3546                         default:
3547                                 ereport(LOG,
3548                                                 (errmsg("could not fork process: %m")));
3549                                 break;
3550                 }
3551
3552                 /*
3553                  * fork failure is fatal during startup, but there's no need to
3554                  * choke immediately if starting other child types fails.
3555                  */
3556                 if (xlop == BS_XLOG_STARTUP)
3557                         ExitPostmaster(1);
3558                 return 0;
3559         }
3560
3561         /*
3562          * in parent, successful fork
3563          */
3564         return pid;
3565 }
3566
3567
3568 /*
3569  * Create the opts file
3570  */
3571 static bool
3572 CreateOptsFile(int argc, char *argv[], char *fullprogname)
3573 {
3574         char            filename[MAXPGPATH];
3575         FILE       *fp;
3576         int                     i;
3577
3578         snprintf(filename, sizeof(filename), "%s/postmaster.opts", DataDir);
3579
3580         if ((fp = fopen(filename, "w")) == NULL)
3581         {
3582                 elog(LOG, "could not create file \"%s\": %m", filename);
3583                 return false;
3584         }
3585
3586         fprintf(fp, "%s", fullprogname);
3587         for (i = 1; i < argc; i++)
3588                 fprintf(fp, " %s%s%s", SYSTEMQUOTE, argv[i], SYSTEMQUOTE);
3589         fputs("\n", fp);
3590
3591         if (fclose(fp))
3592         {
3593                 elog(LOG, "could not write file \"%s\": %m", filename);
3594                 return false;
3595         }
3596
3597         return true;
3598 }
3599
3600
3601 #ifdef EXEC_BACKEND
3602
3603 /*
3604  * The following need to be available to the save/restore_backend_variables
3605  * functions
3606  */
3607 extern slock_t *ShmemLock;
3608 extern slock_t *ShmemIndexLock;
3609 extern void *ShmemIndexAlloc;
3610 extern LWLock *LWLockArray;
3611 extern slock_t *ProcStructLock;
3612 extern int      pgStatSock;
3613 extern int pgStatPipe[2];
3614
3615 #ifndef WIN32
3616 #define write_inheritable_socket(dest, src, childpid) (*(dest) = (src))
3617 #define read_inheritable_socket(dest, src) (*(dest) = *(src))
3618 #else
3619 static void write_duplicated_handle(HANDLE *dest, HANDLE src, HANDLE child);
3620 static void write_inheritable_socket(InheritableSocket *dest, SOCKET src,
3621                                                                          pid_t childPid);
3622 static void read_inheritable_socket(SOCKET *dest, InheritableSocket *src);
3623 #endif
3624
3625
3626 /* Save critical backend variables into the BackendParameters struct */
3627 #ifndef WIN32
3628 static bool
3629 save_backend_variables(BackendParameters *param, Port *port)
3630 #else
3631 static bool
3632 save_backend_variables(BackendParameters *param, Port *port,
3633                                            HANDLE childProcess, pid_t childPid)
3634 #endif
3635 {
3636         memcpy(&param->port, port, sizeof(Port));
3637         write_inheritable_socket(&param->portsocket, port->sock, childPid);
3638
3639         StrNCpy(param->DataDir, DataDir, MAXPGPATH);
3640
3641         memcpy(&param->ListenSocket, &ListenSocket, sizeof(ListenSocket));
3642
3643         param->MyCancelKey = MyCancelKey;
3644
3645         param->UsedShmemSegID = UsedShmemSegID;
3646         param->UsedShmemSegAddr = UsedShmemSegAddr;
3647
3648         param->ShmemLock = ShmemLock;
3649         param->ShmemIndexLock = ShmemIndexLock;
3650         param->ShmemVariableCache = ShmemVariableCache;
3651         param->ShmemIndexAlloc = ShmemIndexAlloc;
3652         param->ShmemBackendArray = ShmemBackendArray;
3653
3654         param->LWLockArray = LWLockArray;
3655         param->ProcStructLock = ProcStructLock;
3656         write_inheritable_socket(&param->pgStatSock, pgStatSock, childPid);
3657         write_inheritable_socket(&param->pgStatPipe0, pgStatPipe[0], childPid);
3658         write_inheritable_socket(&param->pgStatPipe1, pgStatPipe[1], childPid);
3659
3660         param->PostmasterPid = PostmasterPid;
3661
3662 #ifdef WIN32
3663         param->PostmasterHandle = PostmasterHandle;
3664         write_duplicated_handle(&param->initial_signal_pipe,
3665                                                         pgwin32_create_signal_listener(childPid),
3666                                                         childProcess);
3667 #endif
3668
3669         memcpy(&param->syslogPipe, &syslogPipe, sizeof(syslogPipe));
3670
3671         StrNCpy(param->my_exec_path, my_exec_path, MAXPGPATH);
3672
3673         StrNCpy(param->ExtraOptions, ExtraOptions, MAXPGPATH);
3674
3675         StrNCpy(param->lc_collate, setlocale(LC_COLLATE, NULL), MAXPGPATH);
3676         StrNCpy(param->lc_ctype, setlocale(LC_CTYPE, NULL), MAXPGPATH);
3677
3678         return true;
3679 }
3680
3681
3682 #ifdef WIN32
3683 /*
3684  * Duplicate a handle for usage in a child process, and write the child
3685  * process instance of the handle to the parameter file.
3686  */
3687 static void
3688 write_duplicated_handle(HANDLE *dest, HANDLE src, HANDLE childProcess)
3689 {
3690         HANDLE hChild = INVALID_HANDLE_VALUE;
3691
3692         if (!DuplicateHandle(GetCurrentProcess(),
3693                                                  src,
3694                                                  childProcess,
3695                                                  &hChild,
3696                                                  0,
3697                                                  TRUE,
3698                                                  DUPLICATE_CLOSE_SOURCE | DUPLICATE_SAME_ACCESS))
3699                 ereport(ERROR,
3700                                 (errmsg_internal("could not duplicate handle to be written to backend parameter file: error code %d",
3701                                                                  (int) GetLastError())));
3702
3703         *dest = hChild;
3704 }
3705
3706 /*
3707  * Duplicate a socket for usage in a child process, and write the resulting
3708  * structure to the parameter file.
3709  * This is required because a number of LSPs (Layered Service Providers) very
3710  * common on Windows (antivirus, firewalls, download managers etc) break
3711  * straight socket inheritance.
3712  */
3713 static void
3714 write_inheritable_socket(InheritableSocket *dest, SOCKET src, pid_t childpid)
3715 {
3716         dest->origsocket = src;
3717         if (src != 0 && src != -1)
3718         {
3719                 /* Actual socket */
3720                 if (WSADuplicateSocket(src, childpid, &dest->wsainfo) != 0)
3721                         ereport(ERROR,
3722                                         (errmsg("could not duplicate socket %d for use in backend: error code %d",
3723                                                         src, WSAGetLastError())));
3724         }
3725 }
3726
3727 /*
3728  * Read a duplicate socket structure back, and get the socket descriptor.
3729  */
3730 static void
3731 read_inheritable_socket(SOCKET *dest, InheritableSocket *src)
3732 {
3733         SOCKET s;
3734
3735         if (src->origsocket == -1  || src->origsocket == 0)
3736         {
3737                 /* Not a real socket! */
3738                 *dest = src->origsocket;
3739         }
3740         else
3741         {
3742                 /* Actual socket, so create from structure */
3743                 s = WSASocket(FROM_PROTOCOL_INFO,
3744                                           FROM_PROTOCOL_INFO,
3745                                           FROM_PROTOCOL_INFO,
3746                                           &src->wsainfo,
3747                                           0,
3748                                           0);
3749                 if (s == INVALID_SOCKET)
3750                 {
3751                         write_stderr("could not create inherited socket: error code %d\n",
3752                                                  WSAGetLastError());
3753                         exit(1);
3754                 }
3755                 *dest = s;
3756
3757                 /*
3758                  * To make sure we don't get two references to the same socket,
3759                  * close the original one. (This would happen when inheritance
3760                  * actually works..
3761                  */
3762                 closesocket(src->origsocket);
3763         }
3764 }
3765 #endif
3766
3767 static void
3768 read_backend_variables(char *id, Port *port)
3769 {
3770 #ifndef WIN32
3771         /* Non-win32 implementation reads from file */
3772         FILE *fp;
3773         BackendParameters param;
3774
3775         /* Open file */
3776         fp = AllocateFile(id, PG_BINARY_R);
3777         if (!fp)
3778         {
3779                 write_stderr("could not read from backend variables file \"%s\": %s\n",
3780                                          id, strerror(errno));
3781                 exit(1);
3782         }
3783
3784         if (fread(&param, sizeof(param), 1, fp) != 1)
3785         {
3786                 write_stderr("could not read from backend variables file \"%s\": %s\n",
3787                                          id, strerror(errno));
3788                 exit(1);
3789         }
3790
3791         /* Release file */
3792         FreeFile(fp);
3793         if (unlink(id) != 0)
3794         {
3795                 write_stderr("could not remove file \"%s\": %s\n",
3796                                          id, strerror(errno));
3797                 exit(1);
3798         }
3799
3800         restore_backend_variables(&param, port);
3801 #else
3802         /* Win32 version uses mapped file */
3803         HANDLE paramHandle;
3804         BackendParameters *param;
3805
3806         paramHandle = (HANDLE)atol(id);
3807         param = MapViewOfFile(paramHandle, FILE_MAP_READ, 0, 0, 0);
3808         if (!param)
3809         {
3810                 write_stderr("could not map view of backend variables: error code %d\n",
3811                                          (int) GetLastError());
3812                 exit(1);
3813         }
3814
3815         restore_backend_variables(param, port);
3816
3817         if (!UnmapViewOfFile(param))
3818         {
3819                 write_stderr("could not unmap view of backend variables: error code %d\n",
3820                                          (int) GetLastError());
3821                 exit(1);
3822         }
3823
3824         if (!CloseHandle(paramHandle))
3825         {
3826                 write_stderr("could not close handle to backend parameter variables: error code %d\n",
3827                                          (int) GetLastError());
3828                 exit(1);
3829         }
3830 #endif
3831 }
3832
3833 /* Restore critical backend variables from the BackendParameters struct */
3834 static void
3835 restore_backend_variables(BackendParameters *param, Port *port)
3836 {
3837         memcpy(port, &param->port, sizeof(Port));
3838         read_inheritable_socket(&port->sock, &param->portsocket);
3839
3840         SetDataDir(param->DataDir);
3841
3842         memcpy(&ListenSocket, &param->ListenSocket, sizeof(ListenSocket));
3843
3844         MyCancelKey = param->MyCancelKey;
3845
3846         UsedShmemSegID = param->UsedShmemSegID;
3847         UsedShmemSegAddr = param->UsedShmemSegAddr;
3848
3849         ShmemLock = param->ShmemLock;
3850         ShmemIndexLock = param->ShmemIndexLock;
3851         ShmemVariableCache = param->ShmemVariableCache;
3852         ShmemIndexAlloc = param->ShmemIndexAlloc;
3853         ShmemBackendArray = param->ShmemBackendArray;
3854
3855         LWLockArray = param->LWLockArray;
3856         ProcStructLock = param->ProcStructLock;
3857         read_inheritable_socket(&pgStatSock, &param->pgStatSock);
3858         read_inheritable_socket(&pgStatPipe[0], &param->pgStatPipe0);
3859         read_inheritable_socket(&pgStatPipe[1], &param->pgStatPipe1);
3860
3861         PostmasterPid = param->PostmasterPid;
3862
3863 #ifdef WIN32
3864         PostmasterHandle = param->PostmasterHandle;
3865         pgwin32_initial_signal_pipe = param->initial_signal_pipe;
3866 #endif
3867
3868         memcpy(&syslogPipe, &param->syslogPipe, sizeof(syslogPipe));
3869
3870         StrNCpy(my_exec_path, param->my_exec_path, MAXPGPATH);
3871
3872         StrNCpy(ExtraOptions, param->ExtraOptions, MAXPGPATH);
3873
3874         setlocale(LC_COLLATE, param->lc_collate);
3875         setlocale(LC_CTYPE, param->lc_ctype);
3876 }
3877
3878
3879 size_t
3880 ShmemBackendArraySize(void)
3881 {
3882         return (NUM_BACKENDARRAY_ELEMS * sizeof(Backend));
3883 }
3884
3885 void
3886 ShmemBackendArrayAllocation(void)
3887 {
3888         size_t          size = ShmemBackendArraySize();
3889
3890         ShmemBackendArray = (Backend *) ShmemAlloc(size);
3891         /* Mark all slots as empty */
3892         memset(ShmemBackendArray, 0, size);
3893 }
3894
3895 static void
3896 ShmemBackendArrayAdd(Backend *bn)
3897 {
3898         int                     i;
3899
3900         /* Find an empty slot */
3901         for (i = 0; i < NUM_BACKENDARRAY_ELEMS; i++)
3902         {
3903                 if (ShmemBackendArray[i].pid == 0)
3904                 {
3905                         ShmemBackendArray[i] = *bn;
3906                         return;
3907                 }
3908         }
3909
3910         ereport(FATAL,
3911                         (errmsg_internal("no free slots in shmem backend array")));
3912 }
3913
3914 static void
3915 ShmemBackendArrayRemove(pid_t pid)
3916 {
3917         int                     i;
3918
3919         for (i = 0; i < NUM_BACKENDARRAY_ELEMS; i++)
3920         {
3921                 if (ShmemBackendArray[i].pid == pid)
3922                 {
3923                         /* Mark the slot as empty */
3924                         ShmemBackendArray[i].pid = 0;
3925                         return;
3926                 }
3927         }
3928
3929         ereport(WARNING,
3930                         (errmsg_internal("could not find backend entry with pid %d",
3931                                                          (int) pid)));
3932 }
3933 #endif   /* EXEC_BACKEND */
3934
3935
3936 #ifdef WIN32
3937
3938 /*
3939  * Note: The following three functions must not be interrupted (eg. by
3940  * signals).  As the Postgres Win32 signalling architecture (currently)
3941  * requires polling, or APC checking functions which aren't used here, this
3942  * is not an issue.
3943  *
3944  * We keep two separate arrays, instead of a single array of pid/HANDLE
3945  * structs, to avoid having to re-create a handle array for
3946  * WaitForMultipleObjects on each call to win32_waitpid.
3947  */
3948
3949 static void
3950 win32_AddChild(pid_t pid, HANDLE handle)
3951 {
3952         Assert(win32_childPIDArray && win32_childHNDArray);
3953         if (win32_numChildren < NUM_BACKENDARRAY_ELEMS)
3954         {
3955                 win32_childPIDArray[win32_numChildren] = pid;
3956                 win32_childHNDArray[win32_numChildren] = handle;
3957                 ++win32_numChildren;
3958         }
3959         else
3960                 ereport(FATAL,
3961                                 (errmsg_internal("no room for child entry with pid %lu",
3962                                                                  (unsigned long) pid)));
3963 }
3964
3965 static void
3966 win32_RemoveChild(pid_t pid)
3967 {
3968         int                     i;
3969
3970         Assert(win32_childPIDArray && win32_childHNDArray);
3971
3972         for (i = 0; i < win32_numChildren; i++)
3973         {
3974                 if (win32_childPIDArray[i] == pid)
3975                 {
3976                         CloseHandle(win32_childHNDArray[i]);
3977
3978                         /* Swap last entry into the "removed" one */
3979                         --win32_numChildren;
3980                         win32_childPIDArray[i] = win32_childPIDArray[win32_numChildren];
3981                         win32_childHNDArray[i] = win32_childHNDArray[win32_numChildren];
3982                         return;
3983                 }
3984         }
3985
3986         ereport(WARNING,
3987                         (errmsg_internal("could not find child entry with pid %lu",
3988                                                          (unsigned long) pid)));
3989 }
3990
3991 static pid_t
3992 win32_waitpid(int *exitstatus)
3993 {
3994         /*
3995          * Note: Do NOT use WaitForMultipleObjectsEx, as we don't want to run
3996          * queued APCs here.
3997          */
3998         int                     index;
3999         DWORD           exitCode;
4000         DWORD           ret;
4001         unsigned long offset;
4002
4003         Assert(win32_childPIDArray && win32_childHNDArray);
4004         elog(DEBUG3, "waiting on %lu children", win32_numChildren);
4005
4006         for (offset = 0; offset < win32_numChildren; offset += MAXIMUM_WAIT_OBJECTS)
4007         {
4008                 unsigned long num = Min(MAXIMUM_WAIT_OBJECTS, win32_numChildren - offset);
4009
4010                 ret = WaitForMultipleObjects(num, &win32_childHNDArray[offset], FALSE, 0);
4011                 switch (ret)
4012                 {
4013                         case WAIT_FAILED:
4014                                 ereport(LOG,
4015                                                 (errmsg_internal("failed to wait on %lu of %lu children: error code %d",
4016                                                  num, win32_numChildren, (int) GetLastError())));
4017                                 return -1;
4018
4019                         case WAIT_TIMEOUT:
4020                                 /* No children (in this chunk) have finished */
4021                                 break;
4022
4023                         default:
4024
4025                                 /*
4026                                  * Get the exit code, and return the PID of, the
4027                                  * respective process
4028                                  */
4029                                 index = offset + ret - WAIT_OBJECT_0;
4030                                 Assert(index >= 0 && index < win32_numChildren);
4031                                 if (!GetExitCodeProcess(win32_childHNDArray[index], &exitCode))
4032                                 {
4033                                         /*
4034                                          * If we get this far, this should never happen, but,
4035                                          * then again... No choice other than to assume a
4036                                          * catastrophic failure.
4037                                          */
4038                                         ereport(FATAL,
4039                                                         (errmsg_internal("failed to get exit code for child %lu",
4040                                                                                          (DWORD)win32_childPIDArray[index])));
4041                                 }
4042                                 *exitstatus = (int) exitCode;
4043                                 return win32_childPIDArray[index];
4044                 }
4045         }
4046
4047         /* No children have finished */
4048         return -1;
4049 }
4050
4051 /*
4052  * Note! Code below executes on separate threads, one for
4053  * each child process created
4054  */
4055 static DWORD WINAPI
4056 win32_sigchld_waiter(LPVOID param)
4057 {
4058         HANDLE          procHandle = (HANDLE) param;
4059
4060         DWORD           r = WaitForSingleObject(procHandle, INFINITE);
4061
4062         if (r == WAIT_OBJECT_0)
4063                 pg_queue_signal(SIGCHLD);
4064         else
4065                 write_stderr("could not wait on child process handle: error code %d\n",
4066                                          (int) GetLastError());
4067         CloseHandle(procHandle);
4068         return 0;
4069 }
4070
4071 #endif   /* WIN32 */