]> granicus.if.org Git - postgresql/blob - src/backend/postmaster/postmaster.c
Refactor EXEC_BACKEND code so that postmaster child processes reattach
[postgresql] / src / backend / postmaster / postmaster.c
1 /*-------------------------------------------------------------------------
2  *
3  * postmaster.c
4  *        This program acts as a clearing house for requests to the
5  *        POSTGRES system.      Frontend programs send a startup message
6  *        to the Postmaster and the postmaster uses the info in the
7  *        message to setup a backend process.
8  *
9  *        The postmaster also manages system-wide operations such as
10  *        startup and shutdown. The postmaster itself doesn't do those
11  *        operations, mind you --- it just forks off a subprocess to do them
12  *        at the right times.  It also takes care of resetting the system
13  *        if a backend crashes.
14  *
15  *        The postmaster process creates the shared memory and semaphore
16  *        pools during startup, but as a rule does not touch them itself.
17  *        In particular, it is not a member of the PGPROC array of backends
18  *        and so it cannot participate in lock-manager operations.      Keeping
19  *        the postmaster away from shared memory operations makes it simpler
20  *        and more reliable.  The postmaster is almost always able to recover
21  *        from crashes of individual backends by resetting shared memory;
22  *        if it did much with shared memory then it would be prone to crashing
23  *        along with the backends.
24  *
25  *        When a request message is received, we now fork() immediately.
26  *        The child process performs authentication of the request, and
27  *        then becomes a backend if successful.  This allows the auth code
28  *        to be written in a simple single-threaded style (as opposed to the
29  *        crufty "poor man's multitasking" code that used to be needed).
30  *        More importantly, it ensures that blockages in non-multithreaded
31  *        libraries like SSL or PAM cannot cause denial of service to other
32  *        clients.
33  *
34  *
35  * Portions Copyright (c) 1996-2004, PostgreSQL Global Development Group
36  * Portions Copyright (c) 1994, Regents of the University of California
37  *
38  *
39  * IDENTIFICATION
40  *        $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.441 2004/12/29 21:36:03 tgl Exp $
41  *
42  * NOTES
43  *
44  * Initialization:
45  *              The Postmaster sets up shared memory data structures
46  *              for the backends.
47  *
48  * Synchronization:
49  *              The Postmaster shares memory with the backends but should avoid
50  *              touching shared memory, so as not to become stuck if a crashing
51  *              backend screws up locks or shared memory.  Likewise, the Postmaster
52  *              should never block on messages from frontend clients.
53  *
54  * Garbage Collection:
55  *              The Postmaster cleans up after backends if they have an emergency
56  *              exit and/or core dump.
57  *
58  * Error Reporting:
59  *              Use write_stderr() only for reporting "interactive" errors
60  *              (essentially, bogus arguments on the command line).  Once the
61  *              postmaster is launched, use ereport().  In particular, don't use
62  *              write_stderr() for anything that occurs after pmdaemonize.
63  *
64  *-------------------------------------------------------------------------
65  */
66
67 #include "postgres.h"
68
69 #include <unistd.h>
70 #include <signal.h>
71 #include <time.h>
72 #include <sys/wait.h>
73 #include <ctype.h>
74 #include <sys/stat.h>
75 #include <sys/socket.h>
76 #include <fcntl.h>
77 #include <sys/param.h>
78 #include <netinet/in.h>
79 #include <arpa/inet.h>
80 #include <netdb.h>
81 #include <limits.h>
82
83 #ifdef HAVE_SYS_SELECT_H
84 #include <sys/select.h>
85 #endif
86
87 #ifdef HAVE_GETOPT_H
88 #include <getopt.h>
89 #endif
90
91 #ifdef USE_RENDEZVOUS
92 #include <DNSServiceDiscovery/DNSServiceDiscovery.h>
93 #endif
94
95 #include "catalog/pg_database.h"
96 #include "commands/async.h"
97 #include "lib/dllist.h"
98 #include "libpq/auth.h"
99 #include "libpq/crypt.h"
100 #include "libpq/libpq.h"
101 #include "libpq/pqcomm.h"
102 #include "libpq/pqsignal.h"
103 #include "miscadmin.h"
104 #include "nodes/nodes.h"
105 #include "postmaster/postmaster.h"
106 #include "postmaster/pgarch.h"
107 #include "postmaster/syslogger.h"
108 #include "storage/fd.h"
109 #include "storage/ipc.h"
110 #include "storage/pg_shmem.h"
111 #include "storage/pmsignal.h"
112 #include "storage/proc.h"
113 #include "storage/bufmgr.h"
114 #include "access/xlog.h"
115 #include "tcop/tcopprot.h"
116 #include "utils/builtins.h"
117 #include "utils/guc.h"
118 #include "utils/memutils.h"
119 #include "utils/ps_status.h"
120 #include "bootstrap/bootstrap.h"
121 #include "pgstat.h"
122
123 #ifdef EXEC_BACKEND
124 #include "storage/spin.h"
125 #endif
126
127
128 /*
129  * List of active backends (or child processes anyway; we don't actually
130  * know whether a given child has become a backend or is still in the
131  * authorization phase).  This is used mainly to keep track of how many
132  * children we have and send them appropriate signals when necessary.
133  *
134  * "Special" children such as the startup and bgwriter tasks are not in
135  * this list.
136  */
137 typedef struct bkend
138 {
139         pid_t           pid;                    /* process id of backend */
140         long            cancel_key;             /* cancel key for cancels for this backend */
141 } Backend;
142
143 static Dllist *BackendList;
144
145 #ifdef EXEC_BACKEND
146 #define NUM_BACKENDARRAY_ELEMS (2*MaxBackends)
147 static Backend *ShmemBackendArray;
148 #endif
149
150 /* The socket number we are listening for connections on */
151 int                     PostPortNumber;
152 char       *UnixSocketDir;
153 char       *ListenAddresses;
154
155 /*
156  * ReservedBackends is the number of backends reserved for superuser use.
157  * This number is taken out of the pool size given by MaxBackends so
158  * number of backend slots available to non-superusers is
159  * (MaxBackends - ReservedBackends).  Note what this really means is
160  * "if there are <= ReservedBackends connections available, only superusers
161  * can make new connections" --- pre-existing superuser connections don't
162  * count against the limit.
163  */
164 int                     ReservedBackends;
165
166
167 static const char *progname = NULL;
168
169 /* The socket(s) we're listening to. */
170 #define MAXLISTEN       10
171 static int      ListenSocket[MAXLISTEN];
172
173 /*
174  * Set by the -o option
175  */
176 static char ExtraOptions[MAXPGPATH];
177
178 /*
179  * These globals control the behavior of the postmaster in case some
180  * backend dumps core.  Normally, it kills all peers of the dead backend
181  * and reinitializes shared memory.  By specifying -s or -n, we can have
182  * the postmaster stop (rather than kill) peers and not reinitialize
183  * shared data structures.
184  */
185 static bool Reinit = true;
186 static int      SendStop = false;
187
188 /* still more option variables */
189 bool            EnableSSL = false;
190 bool            SilentMode = false; /* silent mode (-S) */
191
192 int                     PreAuthDelay = 0;
193 int                     AuthenticationTimeout = 60;
194
195 bool            log_hostname;           /* for ps display and logging */
196 bool            Log_connections = false;
197 bool            Db_user_namespace = false;
198
199 char       *rendezvous_name;
200
201 /* list of library:init-function to be preloaded */
202 char       *preload_libraries_string = NULL;
203
204 /* PIDs of special child processes; 0 when not running */
205 static pid_t StartupPID = 0,
206                         BgWriterPID = 0,
207                         PgArchPID = 0,
208                         PgStatPID = 0,
209                         SysLoggerPID = 0;
210
211 /* Startup/shutdown state */
212 #define                 NoShutdown              0
213 #define                 SmartShutdown   1
214 #define                 FastShutdown    2
215
216 static int      Shutdown = NoShutdown;
217
218 static bool FatalError = false; /* T if recovering from backend crash */
219
220 bool            ClientAuthInProgress = false;           /* T during new-client
221                                                                                                  * authentication */
222
223 /*
224  * State for assigning random salts and cancel keys.
225  * Also, the global MyCancelKey passes the cancel key assigned to a given
226  * backend from the postmaster to that backend (via fork).
227  */
228 static unsigned int random_seed = 0;
229
230 extern char *optarg;
231 extern int      optind,
232                         opterr;
233
234 #ifdef HAVE_INT_OPTRESET
235 extern int      optreset;
236 #endif
237
238 /*
239  * postmaster.c - function prototypes
240  */
241 static void checkDataDir(void);
242
243 #ifdef USE_RENDEZVOUS
244 static void reg_reply(DNSServiceRegistrationReplyErrorType errorCode,
245                   void *context);
246 #endif
247 static void pmdaemonize(void);
248 static Port *ConnCreate(int serverFd);
249 static void ConnFree(Port *port);
250 static void reset_shared(unsigned short port);
251 static void SIGHUP_handler(SIGNAL_ARGS);
252 static void pmdie(SIGNAL_ARGS);
253 static void reaper(SIGNAL_ARGS);
254 static void sigusr1_handler(SIGNAL_ARGS);
255 static void dummy_handler(SIGNAL_ARGS);
256 static void CleanupBackend(int pid, int exitstatus);
257 static void HandleChildCrash(int pid, int exitstatus, const char *procname);
258 static void LogChildExit(int lev, const char *procname,
259                          int pid, int exitstatus);
260 static int      BackendRun(Port *port);
261 static void ExitPostmaster(int status);
262 static void usage(const char *);
263 static int      ServerLoop(void);
264 static int      BackendStartup(Port *port);
265 static int      ProcessStartupPacket(Port *port, bool SSLdone);
266 static void processCancelRequest(Port *port, void *pkt);
267 static int      initMasks(fd_set *rmask);
268 static void report_fork_failure_to_client(Port *port, int errnum);
269 static enum CAC_state canAcceptConnections(void);
270 static long PostmasterRandom(void);
271 static void RandomSalt(char *cryptSalt, char *md5Salt);
272 static void SignalChildren(int signal);
273 static int      CountChildren(void);
274 static bool CreateOptsFile(int argc, char *argv[], char *fullprogname);
275 static pid_t StartChildProcess(int xlop);
276
277 #ifdef EXEC_BACKEND
278
279 #ifdef WIN32
280 static void win32_AddChild(pid_t pid, HANDLE handle);
281 static void win32_RemoveChild(pid_t pid);
282 static pid_t win32_waitpid(int *exitstatus);
283 static DWORD WINAPI win32_sigchld_waiter(LPVOID param);
284
285 static pid_t *win32_childPIDArray;
286 static HANDLE *win32_childHNDArray;
287 static unsigned long win32_numChildren = 0;
288
289 HANDLE          PostmasterHandle;
290 #endif
291
292 static pid_t backend_forkexec(Port *port);
293 static pid_t internal_forkexec(int argc, char *argv[], Port *port);
294
295 /* Type for a socket that can be inherited to a client process */
296 #ifdef WIN32
297 typedef struct
298 {
299         SOCKET origsocket; /* Original socket value, or -1 if not a socket */
300         WSAPROTOCOL_INFO wsainfo;
301 } InheritableSocket;
302 #else
303 typedef int InheritableSocket;
304 #endif
305
306 typedef struct LWLock LWLock;   /* ugly kluge */
307
308 /*
309  * Structure contains all variables passed to exec:ed backends
310  */
311 typedef struct
312 {
313         Port port;
314         InheritableSocket portsocket;
315         char DataDir[MAXPGPATH];
316         int ListenSocket[MAXLISTEN];
317         long MyCancelKey;
318         unsigned long UsedShmemSegID;
319         void *UsedShmemSegAddr;
320         slock_t *ShmemLock;
321         slock_t *ShmemIndexLock;
322         VariableCache ShmemVariableCache;
323         void *ShmemIndexAlloc;
324         Backend *ShmemBackendArray;
325         LWLock *LWLockArray;
326         slock_t *ProcStructLock;
327         InheritableSocket pgStatSock;
328         InheritableSocket pgStatPipe0;
329         InheritableSocket pgStatPipe1;
330         pid_t PostmasterPid;
331 #ifdef WIN32
332         HANDLE PostmasterHandle;
333         HANDLE initial_signal_pipe;
334         HANDLE syslogPipe[2];
335 #else
336         int syslogPipe[2];
337 #endif
338         char my_exec_path[MAXPGPATH];
339         char ExtraOptions[MAXPGPATH];
340         char lc_collate[MAXPGPATH];
341         char lc_ctype[MAXPGPATH];
342 } BackendParameters;
343
344 static void read_backend_variables(char *id, Port *port);
345 static void restore_backend_variables(BackendParameters *param, Port *port);
346 #ifndef WIN32
347 static bool save_backend_variables(BackendParameters *param, Port *port);
348 #else
349 static bool save_backend_variables(BackendParameters *param, Port *port,
350                                                                    HANDLE childProcess, pid_t childPid);
351 #endif
352
353 static void ShmemBackendArrayAdd(Backend *bn);
354 static void ShmemBackendArrayRemove(pid_t pid);
355
356 #endif   /* EXEC_BACKEND */
357
358 #define StartupDataBase()               StartChildProcess(BS_XLOG_STARTUP)
359 #define StartBackgroundWriter() StartChildProcess(BS_XLOG_BGWRITER)
360
361
362 /*
363  * Postmaster main entry point
364  */
365 int
366 PostmasterMain(int argc, char *argv[])
367 {
368         int                     opt;
369         int                     status;
370         char       *userDoption = NULL;
371         int                     i;
372
373         /* This will call exit() if strdup() fails. */
374         progname = get_progname(argv[0]);       
375
376         MyProcPid = PostmasterPid = getpid();
377
378         IsPostmasterEnvironment = true;
379
380         /*
381          * Catch standard options before doing much else.  This even works on
382          * systems without getopt_long.
383          */
384         if (argc > 1)
385         {
386                 if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
387                 {
388                         usage(progname);
389                         ExitPostmaster(0);
390                 }
391                 if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
392                 {
393                         puts("postmaster (PostgreSQL) " PG_VERSION);
394                         ExitPostmaster(0);
395                 }
396         }
397
398 #ifdef WIN32
399         /* Start our win32 signal implementation */
400         pgwin32_signal_initialize();
401 #endif
402
403         /*
404          * for security, no dir or file created can be group or other
405          * accessible
406          */
407         umask((mode_t) 0077);
408
409         /*
410          * Fire up essential subsystems: memory management
411          */
412         MemoryContextInit();
413
414         /*
415          * By default, palloc() requests in the postmaster will be allocated
416          * in the PostmasterContext, which is space that can be recycled by
417          * backends.  Allocated data that needs to be available to backends
418          * should be allocated in TopMemoryContext.
419          */
420         PostmasterContext = AllocSetContextCreate(TopMemoryContext,
421                                                                                           "Postmaster",
422                                                                                           ALLOCSET_DEFAULT_MINSIZE,
423                                                                                           ALLOCSET_DEFAULT_INITSIZE,
424                                                                                           ALLOCSET_DEFAULT_MAXSIZE);
425         MemoryContextSwitchTo(PostmasterContext);
426
427         IgnoreSystemIndexes(false);
428
429         if (find_my_exec(argv[0], my_exec_path) < 0)
430                 elog(FATAL, "%s: could not locate my own executable path",
431                          argv[0]);
432
433         get_pkglib_path(my_exec_path, pkglib_path);
434
435         /*
436          * Options setup
437          */
438         InitializeGUCOptions();
439
440         opterr = 1;
441
442         while ((opt = getopt(argc, argv, "A:a:B:b:c:D:d:Fh:ik:lm:MN:no:p:Ss-:")) != -1)
443         {
444                 switch (opt)
445                 {
446                         case 'A':
447 #ifdef USE_ASSERT_CHECKING
448                                 SetConfigOption("debug_assertions", optarg, PGC_POSTMASTER, PGC_S_ARGV);
449 #else
450                                 write_stderr("%s: assert checking is not compiled in\n", progname);
451 #endif
452                                 break;
453                         case 'a':
454                                 /* Can no longer set authentication method. */
455                                 break;
456                         case 'B':
457                                 SetConfigOption("shared_buffers", optarg, PGC_POSTMASTER, PGC_S_ARGV);
458                                 break;
459                         case 'b':
460                                 /* Can no longer set the backend executable file to use. */
461                                 break;
462                         case 'D':
463                                 userDoption = optarg;
464                                 break;
465                         case 'd':
466                                 set_debug_options(atoi(optarg), PGC_POSTMASTER, PGC_S_ARGV);
467                                 break;
468                         case 'F':
469                                 SetConfigOption("fsync", "false", PGC_POSTMASTER, PGC_S_ARGV);
470                                 break;
471                         case 'h':
472                                 SetConfigOption("listen_addresses", optarg, PGC_POSTMASTER, PGC_S_ARGV);
473                                 break;
474                         case 'i':
475                                 SetConfigOption("listen_addresses", "*", PGC_POSTMASTER, PGC_S_ARGV);
476                                 break;
477                         case 'k':
478                                 SetConfigOption("unix_socket_directory", optarg, PGC_POSTMASTER, PGC_S_ARGV);
479                                 break;
480 #ifdef USE_SSL
481                         case 'l':
482                                 SetConfigOption("ssl", "true", PGC_POSTMASTER, PGC_S_ARGV);
483                                 break;
484 #endif
485                         case 'm':
486                                 /* Multiplexed backends no longer supported. */
487                                 break;
488                         case 'M':
489
490                                 /*
491                                  * ignore this flag.  This may be passed in because the
492                                  * program was run as 'postgres -M' instead of
493                                  * 'postmaster'
494                                  */
495                                 break;
496                         case 'N':
497                                 /* The max number of backends to start. */
498                                 SetConfigOption("max_connections", optarg, PGC_POSTMASTER, PGC_S_ARGV);
499                                 break;
500                         case 'n':
501                                 /* Don't reinit shared mem after abnormal exit */
502                                 Reinit = false;
503                                 break;
504                         case 'o':
505
506                                 /*
507                                  * Other options to pass to the backend on the command
508                                  * line
509                                  */
510                                 snprintf(ExtraOptions + strlen(ExtraOptions),
511                                                  sizeof(ExtraOptions) - strlen(ExtraOptions),
512                                                  " %s", optarg);
513                                 break;
514                         case 'p':
515                                 SetConfigOption("port", optarg, PGC_POSTMASTER, PGC_S_ARGV);
516                                 break;
517                         case 'S':
518
519                                 /*
520                                  * Start in 'S'ilent mode (disassociate from controlling
521                                  * tty). You may also think of this as 'S'ysV mode since
522                                  * it's most badly needed on SysV-derived systems like
523                                  * SVR4 and HP-UX.
524                                  */
525                                 SetConfigOption("silent_mode", "true", PGC_POSTMASTER, PGC_S_ARGV);
526                                 break;
527                         case 's':
528
529                                 /*
530                                  * In the event that some backend dumps core, send
531                                  * SIGSTOP, rather than SIGQUIT, to all its peers.      This
532                                  * lets the wily post_hacker collect core dumps from
533                                  * everyone.
534                                  */
535                                 SendStop = true;
536                                 break;
537                         case 'c':
538                         case '-':
539                                 {
540                                         char       *name,
541                                                            *value;
542
543                                         ParseLongOption(optarg, &name, &value);
544                                         if (!value)
545                                         {
546                                                 if (opt == '-')
547                                                         ereport(ERROR,
548                                                                         (errcode(ERRCODE_SYNTAX_ERROR),
549                                                                          errmsg("--%s requires a value",
550                                                                                         optarg)));
551                                                 else
552                                                         ereport(ERROR,
553                                                                         (errcode(ERRCODE_SYNTAX_ERROR),
554                                                                          errmsg("-c %s requires a value",
555                                                                                         optarg)));
556                                         }
557
558                                         SetConfigOption(name, value, PGC_POSTMASTER, PGC_S_ARGV);
559                                         free(name);
560                                         if (value)
561                                                 free(value);
562                                         break;
563                                 }
564
565                         default:
566                                 write_stderr("Try \"%s --help\" for more information.\n",
567                                                          progname);
568                                 ExitPostmaster(1);
569                 }
570         }
571
572         /*
573          * Postmaster accepts no non-option switch arguments.
574          */
575         if (optind < argc)
576         {
577                 write_stderr("%s: invalid argument: \"%s\"\n",
578                                          progname, argv[optind]);
579                 write_stderr("Try \"%s --help\" for more information.\n",
580                                          progname);
581                 ExitPostmaster(1);
582         }
583
584         /*
585          * Locate the proper configuration files and data directory, and
586          * read postgresql.conf for the first time.
587          */
588         if (!SelectConfigFiles(userDoption, progname))
589                 ExitPostmaster(2);
590
591         /* Verify that DataDir looks reasonable */
592         checkDataDir();
593
594         /*
595          * Check for invalid combinations of GUC settings.
596          */
597         if (NBuffers < 2 * MaxBackends || NBuffers < 16)
598         {
599                 /*
600                  * Do not accept -B so small that backends are likely to starve
601                  * for lack of buffers.  The specific choices here are somewhat
602                  * arbitrary.
603                  */
604                 write_stderr("%s: the number of buffers (-B) must be at least twice the number of allowed connections (-N) and at least 16\n", progname);
605                 ExitPostmaster(1);
606         }
607
608         if (ReservedBackends >= MaxBackends)
609         {
610                 write_stderr("%s: superuser_reserved_connections must be less than max_connections\n", progname);
611                 ExitPostmaster(1);
612         }
613
614         /*
615          * Other one-time internal sanity checks can go here.
616          */
617         if (!CheckDateTokenTables())
618         {
619                 write_stderr("%s: invalid datetoken tables, please fix\n", progname);
620                 ExitPostmaster(1);
621         }
622
623         /*
624          * Now that we are done processing the postmaster arguments, reset
625          * getopt(3) library so that it will work correctly in subprocesses.
626          */
627         optind = 1;
628 #ifdef HAVE_INT_OPTRESET
629         optreset = 1;                           /* some systems need this too */
630 #endif
631
632         /* For debugging: display postmaster environment */
633         {
634                 extern char **environ;
635                 char      **p;
636
637                 ereport(DEBUG3,
638                         (errmsg_internal("%s: PostmasterMain: initial environ dump:",
639                                                          progname)));
640                 ereport(DEBUG3,
641                  (errmsg_internal("-----------------------------------------")));
642                 for (p = environ; *p; ++p)
643                         ereport(DEBUG3,
644                                         (errmsg_internal("\t%s", *p)));
645                 ereport(DEBUG3,
646                  (errmsg_internal("-----------------------------------------")));
647         }
648
649 #ifdef EXEC_BACKEND
650         if (find_other_exec(argv[0], "postgres", PG_VERSIONSTR,
651                                                 postgres_exec_path) < 0)
652                 ereport(FATAL,
653                          (errmsg("%s: could not locate matching postgres executable",
654                                          progname)));
655 #endif
656
657         /*
658          * Initialize SSL library, if specified.
659          */
660 #ifdef USE_SSL
661         if (EnableSSL)
662                 secure_initialize();
663 #endif
664
665         /*
666          * process any libraries that should be preloaded and optionally
667          * pre-initialized
668          */
669         if (preload_libraries_string)
670                 process_preload_libraries(preload_libraries_string);
671
672         /*
673          * Fork away from controlling terminal, if -S specified.
674          *
675          * Must do this before we grab any interlock files, else the interlocks
676          * will show the wrong PID.
677          */
678         if (SilentMode)
679                 pmdaemonize();
680
681         /*
682          * Create lockfile for data directory.
683          *
684          * We want to do this before we try to grab the input sockets, because
685          * the data directory interlock is more reliable than the socket-file
686          * interlock (thanks to whoever decided to put socket files in /tmp
687          * :-(). For the same reason, it's best to grab the TCP socket(s)
688          * before the Unix socket.
689          */
690         CreateDataDirLockFile(DataDir, true);
691
692         /*
693          * Remove old temporary files.  At this point there can be no other
694          * Postgres processes running in this directory, so this should be
695          * safe.
696          */
697         RemovePgTempFiles();
698
699         /*
700          * Establish input sockets.
701          */
702         for (i = 0; i < MAXLISTEN; i++)
703                 ListenSocket[i] = -1;
704
705         if (ListenAddresses)
706         {
707                 char       *rawstring;
708                 List       *elemlist;
709                 ListCell   *l;
710
711                 /* Need a modifiable copy of ListenAddresses */
712                 rawstring = pstrdup(ListenAddresses);
713
714                 /* Parse string into list of identifiers */
715                 if (!SplitIdentifierString(rawstring, ',', &elemlist))
716                 {
717                         /* syntax error in list */
718                         ereport(FATAL,
719                                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
720                                 errmsg("invalid list syntax for \"listen_addresses\"")));
721                 }
722
723                 foreach(l, elemlist)
724                 {
725                         char       *curhost = (char *) lfirst(l);
726
727                         if (strcmp(curhost, "*") == 0)
728                                 status = StreamServerPort(AF_UNSPEC, NULL,
729                                                                                   (unsigned short) PostPortNumber,
730                                                                                   UnixSocketDir,
731                                                                                   ListenSocket, MAXLISTEN);
732                         else
733                                 status = StreamServerPort(AF_UNSPEC, curhost,
734                                                                                   (unsigned short) PostPortNumber,
735                                                                                   UnixSocketDir,
736                                                                                   ListenSocket, MAXLISTEN);
737                         if (status != STATUS_OK)
738                                 ereport(WARNING,
739                                          (errmsg("could not create listen socket for \"%s\"",
740                                                          curhost)));
741                 }
742
743                 list_free(elemlist);
744                 pfree(rawstring);
745         }
746
747 #ifdef USE_RENDEZVOUS
748         /* Register for Rendezvous only if we opened TCP socket(s) */
749         if (ListenSocket[0] != -1 && rendezvous_name != NULL)
750         {
751                 DNSServiceRegistrationCreate(rendezvous_name,
752                                                                          "_postgresql._tcp.",
753                                                                          "",
754                                                                          htonl(PostPortNumber),
755                                                                          "",
756                                                                  (DNSServiceRegistrationReply) reg_reply,
757                                                                          NULL);
758         }
759 #endif
760
761 #ifdef HAVE_UNIX_SOCKETS
762         status = StreamServerPort(AF_UNIX, NULL,
763                                                           (unsigned short) PostPortNumber,
764                                                           UnixSocketDir,
765                                                           ListenSocket, MAXLISTEN);
766         if (status != STATUS_OK)
767                 ereport(WARNING,
768                                 (errmsg("could not create Unix-domain socket")));
769 #endif
770
771         /*
772          * check that we have some socket to listen on
773          */
774         if (ListenSocket[0] == -1)
775                 ereport(FATAL,
776                                 (errmsg("no socket created for listening")));
777
778         XLOGPathInit();
779
780         /*
781          * Set up shared memory and semaphores.
782          */
783         reset_shared(PostPortNumber);
784
785         /*
786          * Estimate number of openable files.  This must happen after setting
787          * up semaphores, because on some platforms semaphores count as open
788          * files.
789          */
790         set_max_safe_fds();
791
792         /*
793          * Initialize the list of active backends.
794          */
795         BackendList = DLNewList();
796
797 #ifdef WIN32
798
799         /*
800          * Initialize the child pid/HANDLE arrays for signal handling.
801          */
802         win32_childPIDArray = (pid_t *)
803                 malloc(NUM_BACKENDARRAY_ELEMS * sizeof(pid_t));
804         win32_childHNDArray = (HANDLE *)
805                 malloc(NUM_BACKENDARRAY_ELEMS * sizeof(HANDLE));
806         if (!win32_childPIDArray || !win32_childHNDArray)
807                 ereport(FATAL,
808                                 (errcode(ERRCODE_OUT_OF_MEMORY),
809                                  errmsg("out of memory")));
810
811         /*
812          * Set up a handle that child processes can use to check whether the
813          * postmaster is still running.
814          */
815         if (DuplicateHandle(GetCurrentProcess(),
816                                                 GetCurrentProcess(),
817                                                 GetCurrentProcess(),
818                                                 &PostmasterHandle,
819                                                 0,
820                                                 TRUE,
821                                                 DUPLICATE_SAME_ACCESS) == 0)
822                 ereport(FATAL,
823                         (errmsg_internal("could not duplicate postmaster handle: error code %d",
824                                                          (int) GetLastError())));
825 #endif
826
827         /*
828          * Record postmaster options.  We delay this till now to avoid
829          * recording bogus options (eg, NBuffers too high for available
830          * memory).
831          */
832         if (!CreateOptsFile(argc, argv, my_exec_path))
833                 ExitPostmaster(1);
834
835 #ifdef EXEC_BACKEND
836         write_nondefault_variables(PGC_POSTMASTER);
837 #endif
838
839         /*
840          * Write the external PID file if requested
841          */
842         if (external_pid_file)
843         {
844                 FILE       *fpidfile = fopen(external_pid_file, "w");
845
846                 if (fpidfile)
847                 {
848                         fprintf(fpidfile, "%d\n", MyProcPid);
849                         fclose(fpidfile);
850                         /* Should we remove the pid file on postmaster exit? */
851                 }
852                 else
853                         write_stderr("%s: could not write external PID file \"%s\": %s\n",
854                                                  progname, external_pid_file, strerror(errno));
855         }
856
857         /*
858          * Set up signal handlers for the postmaster process.
859          *
860          * CAUTION: when changing this list, check for side-effects on the signal
861          * handling setup of child processes.  See tcop/postgres.c,
862          * bootstrap/bootstrap.c, postmaster/bgwriter.c, postmaster/pgarch.c,
863          * postmaster/pgstat.c, and postmaster/syslogger.c.
864          */
865         pqinitmask();
866         PG_SETMASK(&BlockSig);
867
868         pqsignal(SIGHUP, SIGHUP_handler);       /* reread config file and have
869                                                                                  * children do same */
870         pqsignal(SIGINT, pmdie);        /* send SIGTERM and shut down */
871         pqsignal(SIGQUIT, pmdie);       /* send SIGQUIT and die */
872         pqsignal(SIGTERM, pmdie);       /* wait for children and shut down */
873         pqsignal(SIGALRM, SIG_IGN); /* ignored */
874         pqsignal(SIGPIPE, SIG_IGN); /* ignored */
875         pqsignal(SIGUSR1, sigusr1_handler); /* message from child process */
876         pqsignal(SIGUSR2, dummy_handler);       /* unused, reserve for children */
877         pqsignal(SIGCHLD, reaper);      /* handle child termination */
878         pqsignal(SIGTTIN, SIG_IGN); /* ignored */
879         pqsignal(SIGTTOU, SIG_IGN); /* ignored */
880         /* ignore SIGXFSZ, so that ulimit violations work like disk full */
881 #ifdef SIGXFSZ
882         pqsignal(SIGXFSZ, SIG_IGN); /* ignored */
883 #endif
884
885         /*
886          * If enabled, start up syslogger collection subprocess
887          */
888         SysLoggerPID = SysLogger_Start();
889
890         /*
891          * Reset whereToSendOutput from Debug (its starting state) to None.
892          * This stops ereport from sending log messages to stderr unless
893          * Log_destination permits.  We don't do this until the postmaster is
894          * fully launched, since startup failures may as well be reported to
895          * stderr.
896          */
897         whereToSendOutput = None;
898
899         /*
900          * Initialize the statistics collector stuff
901          */
902         pgstat_init();
903
904         /*
905          * Load cached files for client authentication.
906          */
907         load_hba();
908         load_ident();
909         load_user();
910         load_group();
911
912         /*
913          * We're ready to rock and roll...
914          */
915         StartupPID = StartupDataBase();
916
917         status = ServerLoop();
918
919         /*
920          * ServerLoop probably shouldn't ever return, but if it does, close
921          * down.
922          */
923         ExitPostmaster(status != STATUS_OK);
924
925         return 0;                                       /* not reached */
926 }
927
928
929 /*
930  * Validate the proposed data directory
931  */
932 static void
933 checkDataDir(void)
934 {
935         char            path[MAXPGPATH];
936         FILE       *fp;
937         struct stat stat_buf;
938
939         Assert(DataDir);
940
941         if (stat(DataDir, &stat_buf) != 0)
942         {
943                 if (errno == ENOENT)
944                         ereport(FATAL,
945                                         (errcode_for_file_access(),
946                                          errmsg("data directory \"%s\" does not exist",
947                                                         DataDir)));
948                 else
949                         ereport(FATAL,
950                                         (errcode_for_file_access(),
951                          errmsg("could not read permissions of directory \"%s\": %m",
952                                         DataDir)));
953         }
954
955         /*
956          * Check if the directory has group or world access.  If so, reject.
957          *
958          * XXX temporarily suppress check when on Windows, because there may not
959          * be proper support for Unix-y file permissions.  Need to think of a
960          * reasonable check to apply on Windows.
961          */
962 #if !defined(WIN32) && !defined(__CYGWIN__)
963         if (stat_buf.st_mode & (S_IRWXG | S_IRWXO))
964                 ereport(FATAL,
965                                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
966                                  errmsg("data directory \"%s\" has group or world access",
967                                                 DataDir),
968                                  errdetail("Permissions should be u=rwx (0700).")));
969 #endif
970
971         /* Look for PG_VERSION before looking for pg_control */
972         ValidatePgVersion(DataDir);
973
974         snprintf(path, sizeof(path), "%s/global/pg_control", DataDir);
975
976         fp = AllocateFile(path, PG_BINARY_R);
977         if (fp == NULL)
978         {
979                 write_stderr("%s: could not find the database system\n"
980                                          "Expected to find it in the directory \"%s\",\n"
981                                          "but could not open file \"%s\": %s\n",
982                                          progname, DataDir, path, strerror(errno));
983                 ExitPostmaster(2);
984         }
985         FreeFile(fp);
986 }
987
988
989 #ifdef USE_RENDEZVOUS
990
991 /*
992  * empty callback function for DNSServiceRegistrationCreate()
993  */
994 static void
995 reg_reply(DNSServiceRegistrationReplyErrorType errorCode, void *context)
996 {
997
998 }
999 #endif   /* USE_RENDEZVOUS */
1000
1001
1002 /*
1003  * Fork away from the controlling terminal (-S option)
1004  */
1005 static void
1006 pmdaemonize(void)
1007 {
1008 #ifndef WIN32
1009         int                     i;
1010         pid_t           pid;
1011
1012 #ifdef LINUX_PROFILE
1013         struct itimerval prof_itimer;
1014 #endif
1015
1016 #ifdef LINUX_PROFILE
1017         /* see comments in BackendStartup */
1018         getitimer(ITIMER_PROF, &prof_itimer);
1019 #endif
1020
1021         pid = fork();
1022         if (pid == (pid_t) -1)
1023         {
1024                 write_stderr("%s: could not fork background process: %s\n",
1025                                          progname, strerror(errno));
1026                 ExitPostmaster(1);
1027         }
1028         else if (pid)
1029         {                                                       /* parent */
1030                 /* Parent should just exit, without doing any atexit cleanup */
1031                 _exit(0);
1032         }
1033
1034 #ifdef LINUX_PROFILE
1035         setitimer(ITIMER_PROF, &prof_itimer, NULL);
1036 #endif
1037
1038         MyProcPid = PostmasterPid = getpid();           /* reset PID vars to child */
1039
1040 /* GH: If there's no setsid(), we hopefully don't need silent mode.
1041  * Until there's a better solution.
1042  */
1043 #ifdef HAVE_SETSID
1044         if (setsid() < 0)
1045         {
1046                 write_stderr("%s: could not dissociate from controlling TTY: %s\n",
1047                                          progname, strerror(errno));
1048                 ExitPostmaster(1);
1049         }
1050 #endif
1051         i = open(NULL_DEV, O_RDWR);
1052         dup2(i, 0);
1053         dup2(i, 1);
1054         dup2(i, 2);
1055         close(i);
1056 #else                                                   /* WIN32 */
1057         /* not supported */
1058         elog(FATAL, "SilentMode not supported under WIN32");
1059 #endif   /* WIN32 */
1060 }
1061
1062
1063 /*
1064  * Print out help message
1065  */
1066 static void
1067 usage(const char *progname)
1068 {
1069         printf(gettext("%s is the PostgreSQL server.\n\n"), progname);
1070         printf(gettext("Usage:\n  %s [OPTION]...\n\n"), progname);
1071         printf(gettext("Options:\n"));
1072 #ifdef USE_ASSERT_CHECKING
1073         printf(gettext("  -A 1|0          enable/disable run-time assert checking\n"));
1074 #endif
1075         printf(gettext("  -B NBUFFERS     number of shared buffers\n"));
1076         printf(gettext("  -c NAME=VALUE   set run-time parameter\n"));
1077         printf(gettext("  -d 1-5          debugging level\n"));
1078         printf(gettext("  -D DATADIR      database directory\n"));
1079         printf(gettext("  -F              turn fsync off\n"));
1080         printf(gettext("  -h HOSTNAME     host name or IP address to listen on\n"));
1081         printf(gettext("  -i              enable TCP/IP connections\n"));
1082         printf(gettext("  -k DIRECTORY    Unix-domain socket location\n"));
1083 #ifdef USE_SSL
1084         printf(gettext("  -l              enable SSL connections\n"));
1085 #endif
1086         printf(gettext("  -N MAX-CONNECT  maximum number of allowed connections\n"));
1087         printf(gettext("  -o OPTIONS      pass \"OPTIONS\" to each server process\n"));
1088         printf(gettext("  -p PORT         port number to listen on\n"));
1089         printf(gettext("  -S              silent mode (start in background without logging output)\n"));
1090         printf(gettext("  --help          show this help, then exit\n"));
1091         printf(gettext("  --version       output version information, then exit\n"));
1092
1093         printf(gettext("\nDeveloper options:\n"));
1094         printf(gettext("  -n              do not reinitialize shared memory after abnormal exit\n"));
1095         printf(gettext("  -s              send SIGSTOP to all backend servers if one dies\n"));
1096
1097         printf(gettext("\nPlease read the documentation for the complete list of run-time\n"
1098                                    "configuration settings and how to set them on the command line or in\n"
1099                                    "the configuration file.\n\n"
1100                                    "Report bugs to <pgsql-bugs@postgresql.org>.\n"));
1101 }
1102
1103
1104 /*
1105  * Main idle loop of postmaster
1106  */
1107 static int
1108 ServerLoop(void)
1109 {
1110         fd_set          readmask;
1111         int                     nSockets;
1112         time_t          now,
1113                                 last_touch_time;
1114         struct timeval earlier,
1115                                 later;
1116         struct timezone tz;
1117
1118         gettimeofday(&earlier, &tz);
1119         last_touch_time = time(NULL);
1120
1121         nSockets = initMasks(&readmask);
1122
1123         for (;;)
1124         {
1125                 Port       *port;
1126                 fd_set          rmask;
1127                 struct timeval timeout;
1128                 int                     selres;
1129                 int                     i;
1130
1131                 /*
1132                  * Wait for something to happen.
1133                  *
1134                  * We wait at most one minute, to ensure that the other background
1135                  * tasks handled below get done even when no requests are
1136                  * arriving.
1137                  */
1138                 memcpy((char *) &rmask, (char *) &readmask, sizeof(fd_set));
1139
1140                 timeout.tv_sec = 60;
1141                 timeout.tv_usec = 0;
1142
1143                 PG_SETMASK(&UnBlockSig);
1144
1145                 selres = select(nSockets, &rmask, NULL, NULL, &timeout);
1146
1147                 /*
1148                  * Block all signals until we wait again.  (This makes it safe for
1149                  * our signal handlers to do nontrivial work.)
1150                  */
1151                 PG_SETMASK(&BlockSig);
1152
1153                 if (selres < 0)
1154                 {
1155                         if (errno != EINTR && errno != EWOULDBLOCK)
1156                         {
1157                                 ereport(LOG,
1158                                                 (errcode_for_socket_access(),
1159                                                  errmsg("select() failed in postmaster: %m")));
1160                                 return STATUS_ERROR;
1161                         }
1162                 }
1163
1164                 /*
1165                  * New connection pending on any of our sockets? If so, fork a
1166                  * child process to deal with it.
1167                  */
1168                 if (selres > 0)
1169                 {
1170                         /*
1171                          * Select a random seed at the time of first receiving a
1172                          * request.
1173                          */
1174                         while (random_seed == 0)
1175                         {
1176                                 gettimeofday(&later, &tz);
1177
1178                                 /*
1179                                  * We are not sure how much precision is in tv_usec, so we
1180                                  * swap the high and low 16 bits of 'later' and XOR them with
1181                                  * 'earlier'. On the off chance that the result is 0, we
1182                                  * loop until it isn't.
1183                                  */
1184                                 random_seed = earlier.tv_usec ^
1185                                         ((later.tv_usec << 16) |
1186                                          ((later.tv_usec >> 16) & 0xffff));
1187                         }
1188
1189                         for (i = 0; i < MAXLISTEN; i++)
1190                         {
1191                                 if (ListenSocket[i] == -1)
1192                                         break;
1193                                 if (FD_ISSET(ListenSocket[i], &rmask))
1194                                 {
1195                                         port = ConnCreate(ListenSocket[i]);
1196                                         if (port)
1197                                         {
1198                                                 BackendStartup(port);
1199
1200                                                 /*
1201                                                  * We no longer need the open socket or port
1202                                                  * structure in this process
1203                                                  */
1204                                                 StreamClose(port->sock);
1205                                                 ConnFree(port);
1206                                         }
1207                                 }
1208                         }
1209                 }
1210
1211                 /* If we have lost the system logger, try to start a new one */
1212                 if (SysLoggerPID == 0 && Redirect_stderr)
1213                         SysLoggerPID = SysLogger_Start();
1214
1215                 /*
1216                  * If no background writer process is running, and we are not in a
1217                  * state that prevents it, start one.  It doesn't matter if this
1218                  * fails, we'll just try again later.
1219                  */
1220                 if (BgWriterPID == 0 && StartupPID == 0 && !FatalError)
1221                 {
1222                         BgWriterPID = StartBackgroundWriter();
1223                         /* If shutdown is pending, set it going */
1224                         if (Shutdown > NoShutdown && BgWriterPID != 0)
1225                                 kill(BgWriterPID, SIGUSR2);
1226                 }
1227
1228                 /* If we have lost the archiver, try to start a new one */
1229                 if (XLogArchivingActive() && PgArchPID == 0 &&
1230                         StartupPID == 0 && !FatalError && Shutdown == NoShutdown)
1231                         PgArchPID = pgarch_start();
1232
1233                 /* If we have lost the stats collector, try to start a new one */
1234                 if (PgStatPID == 0 &&
1235                         StartupPID == 0 && !FatalError && Shutdown == NoShutdown)
1236                         PgStatPID = pgstat_start();
1237
1238                 /*
1239                  * Touch the socket and lock file at least every ten minutes, to
1240                  * ensure that they are not removed by overzealous /tmp-cleaning
1241                  * tasks.
1242                  */
1243                 now = time(NULL);
1244                 if (now - last_touch_time >= 10 * 60)
1245                 {
1246                         TouchSocketFile();
1247                         TouchSocketLockFile();
1248                         last_touch_time = now;
1249                 }
1250         }
1251 }
1252
1253
1254 /*
1255  * Initialise the masks for select() for the ports we are listening on.
1256  * Return the number of sockets to listen on.
1257  */
1258 static int
1259 initMasks(fd_set *rmask)
1260 {
1261         int                     nsocks = -1;
1262         int                     i;
1263
1264         FD_ZERO(rmask);
1265
1266         for (i = 0; i < MAXLISTEN; i++)
1267         {
1268                 int                     fd = ListenSocket[i];
1269
1270                 if (fd == -1)
1271                         break;
1272                 FD_SET(fd, rmask);
1273                 if (fd > nsocks)
1274                         nsocks = fd;
1275         }
1276
1277         return nsocks + 1;
1278 }
1279
1280
1281 /*
1282  * Read the startup packet and do something according to it.
1283  *
1284  * Returns STATUS_OK or STATUS_ERROR, or might call ereport(FATAL) and
1285  * not return at all.
1286  *
1287  * (Note that ereport(FATAL) stuff is sent to the client, so only use it
1288  * if that's what you want.  Return STATUS_ERROR if you don't want to
1289  * send anything to the client, which would typically be appropriate
1290  * if we detect a communications failure.)
1291  */
1292 static int
1293 ProcessStartupPacket(Port *port, bool SSLdone)
1294 {
1295         int32           len;
1296         void       *buf;
1297         ProtocolVersion proto;
1298         MemoryContext oldcontext;
1299
1300         if (pq_getbytes((char *) &len, 4) == EOF)
1301         {
1302                 /*
1303                  * EOF after SSLdone probably means the client didn't like our
1304                  * response to NEGOTIATE_SSL_CODE.      That's not an error condition,
1305                  * so don't clutter the log with a complaint.
1306                  */
1307                 if (!SSLdone)
1308                         ereport(COMMERROR,
1309                                         (errcode(ERRCODE_PROTOCOL_VIOLATION),
1310                                          errmsg("incomplete startup packet")));
1311                 return STATUS_ERROR;
1312         }
1313
1314         len = ntohl(len);
1315         len -= 4;
1316
1317         if (len < (int32) sizeof(ProtocolVersion) ||
1318                 len > MAX_STARTUP_PACKET_LENGTH)
1319         {
1320                 ereport(COMMERROR,
1321                                 (errcode(ERRCODE_PROTOCOL_VIOLATION),
1322                                  errmsg("invalid length of startup packet")));
1323                 return STATUS_ERROR;
1324         }
1325
1326         /*
1327          * Allocate at least the size of an old-style startup packet, plus one
1328          * extra byte, and make sure all are zeroes.  This ensures we will
1329          * have null termination of all strings, in both fixed- and
1330          * variable-length packet layouts.
1331          */
1332         if (len <= (int32) sizeof(StartupPacket))
1333                 buf = palloc0(sizeof(StartupPacket) + 1);
1334         else
1335                 buf = palloc0(len + 1);
1336
1337         if (pq_getbytes(buf, len) == EOF)
1338         {
1339                 ereport(COMMERROR,
1340                                 (errcode(ERRCODE_PROTOCOL_VIOLATION),
1341                                  errmsg("incomplete startup packet")));
1342                 return STATUS_ERROR;
1343         }
1344
1345         /*
1346          * The first field is either a protocol version number or a special
1347          * request code.
1348          */
1349         port->proto = proto = ntohl(*((ProtocolVersion *) buf));
1350
1351         if (proto == CANCEL_REQUEST_CODE)
1352         {
1353                 processCancelRequest(port, buf);
1354                 return 127;                             /* XXX */
1355         }
1356
1357         if (proto == NEGOTIATE_SSL_CODE && !SSLdone)
1358         {
1359                 char            SSLok;
1360
1361 #ifdef USE_SSL
1362                 /* No SSL when disabled or on Unix sockets */
1363                 if (!EnableSSL || IS_AF_UNIX(port->laddr.addr.ss_family))
1364                         SSLok = 'N';
1365                 else
1366                         SSLok = 'S';            /* Support for SSL */
1367 #else
1368                 SSLok = 'N';                    /* No support for SSL */
1369 #endif
1370                 if (send(port->sock, &SSLok, 1, 0) != 1)
1371                 {
1372                         ereport(COMMERROR,
1373                                         (errcode_for_socket_access(),
1374                                  errmsg("failed to send SSL negotiation response: %m")));
1375                         return STATUS_ERROR;    /* close the connection */
1376                 }
1377
1378 #ifdef USE_SSL
1379                 if (SSLok == 'S' && secure_open_server(port) == -1)
1380                         return STATUS_ERROR;
1381 #endif
1382                 /* regular startup packet, cancel, etc packet should follow... */
1383                 /* but not another SSL negotiation request */
1384                 return ProcessStartupPacket(port, true);
1385         }
1386
1387         /* Could add additional special packet types here */
1388
1389         /*
1390          * Set FrontendProtocol now so that ereport() knows what format to
1391          * send if we fail during startup.
1392          */
1393         FrontendProtocol = proto;
1394
1395         /* Check we can handle the protocol the frontend is using. */
1396
1397         if (PG_PROTOCOL_MAJOR(proto) < PG_PROTOCOL_MAJOR(PG_PROTOCOL_EARLIEST) ||
1398           PG_PROTOCOL_MAJOR(proto) > PG_PROTOCOL_MAJOR(PG_PROTOCOL_LATEST) ||
1399         (PG_PROTOCOL_MAJOR(proto) == PG_PROTOCOL_MAJOR(PG_PROTOCOL_LATEST) &&
1400          PG_PROTOCOL_MINOR(proto) > PG_PROTOCOL_MINOR(PG_PROTOCOL_LATEST)))
1401                 ereport(FATAL,
1402                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1403                                  errmsg("unsupported frontend protocol %u.%u: server supports %u.0 to %u.%u",
1404                                           PG_PROTOCOL_MAJOR(proto), PG_PROTOCOL_MINOR(proto),
1405                                                 PG_PROTOCOL_MAJOR(PG_PROTOCOL_EARLIEST),
1406                                                 PG_PROTOCOL_MAJOR(PG_PROTOCOL_LATEST),
1407                                                 PG_PROTOCOL_MINOR(PG_PROTOCOL_LATEST))));
1408
1409         /*
1410          * Now fetch parameters out of startup packet and save them into the
1411          * Port structure.      All data structures attached to the Port struct
1412          * must be allocated in TopMemoryContext so that they won't disappear
1413          * when we pass them to PostgresMain (see BackendRun).  We need not
1414          * worry about leaking this storage on failure, since we aren't in the
1415          * postmaster process anymore.
1416          */
1417         oldcontext = MemoryContextSwitchTo(TopMemoryContext);
1418
1419         if (PG_PROTOCOL_MAJOR(proto) >= 3)
1420         {
1421                 int32           offset = sizeof(ProtocolVersion);
1422
1423                 /*
1424                  * Scan packet body for name/option pairs.      We can assume any
1425                  * string beginning within the packet body is null-terminated,
1426                  * thanks to zeroing extra byte above.
1427                  */
1428                 port->guc_options = NIL;
1429
1430                 while (offset < len)
1431                 {
1432                         char       *nameptr = ((char *) buf) + offset;
1433                         int32           valoffset;
1434                         char       *valptr;
1435
1436                         if (*nameptr == '\0')
1437                                 break;                  /* found packet terminator */
1438                         valoffset = offset + strlen(nameptr) + 1;
1439                         if (valoffset >= len)
1440                                 break;                  /* missing value, will complain below */
1441                         valptr = ((char *) buf) + valoffset;
1442
1443                         if (strcmp(nameptr, "database") == 0)
1444                                 port->database_name = pstrdup(valptr);
1445                         else if (strcmp(nameptr, "user") == 0)
1446                                 port->user_name = pstrdup(valptr);
1447                         else if (strcmp(nameptr, "options") == 0)
1448                                 port->cmdline_options = pstrdup(valptr);
1449                         else
1450                         {
1451                                 /* Assume it's a generic GUC option */
1452                                 port->guc_options = lappend(port->guc_options,
1453                                                                                         pstrdup(nameptr));
1454                                 port->guc_options = lappend(port->guc_options,
1455                                                                                         pstrdup(valptr));
1456                         }
1457                         offset = valoffset + strlen(valptr) + 1;
1458                 }
1459
1460                 /*
1461                  * If we didn't find a packet terminator exactly at the end of the
1462                  * given packet length, complain.
1463                  */
1464                 if (offset != len - 1)
1465                         ereport(FATAL,
1466                                         (errcode(ERRCODE_PROTOCOL_VIOLATION),
1467                                          errmsg("invalid startup packet layout: expected terminator as last byte")));
1468         }
1469         else
1470         {
1471                 /*
1472                  * Get the parameters from the old-style, fixed-width-fields
1473                  * startup packet as C strings.  The packet destination was
1474                  * cleared first so a short packet has zeros silently added.  We
1475                  * have to be prepared to truncate the pstrdup result for oversize
1476                  * fields, though.
1477                  */
1478                 StartupPacket *packet = (StartupPacket *) buf;
1479
1480                 port->database_name = pstrdup(packet->database);
1481                 if (strlen(port->database_name) > sizeof(packet->database))
1482                         port->database_name[sizeof(packet->database)] = '\0';
1483                 port->user_name = pstrdup(packet->user);
1484                 if (strlen(port->user_name) > sizeof(packet->user))
1485                         port->user_name[sizeof(packet->user)] = '\0';
1486                 port->cmdline_options = pstrdup(packet->options);
1487                 if (strlen(port->cmdline_options) > sizeof(packet->options))
1488                         port->cmdline_options[sizeof(packet->options)] = '\0';
1489                 port->guc_options = NIL;
1490         }
1491
1492         /* Check a user name was given. */
1493         if (port->user_name == NULL || port->user_name[0] == '\0')
1494                 ereport(FATAL,
1495                                 (errcode(ERRCODE_INVALID_AUTHORIZATION_SPECIFICATION),
1496                  errmsg("no PostgreSQL user name specified in startup packet")));
1497
1498         /* The database defaults to the user name. */
1499         if (port->database_name == NULL || port->database_name[0] == '\0')
1500                 port->database_name = pstrdup(port->user_name);
1501
1502         if (Db_user_namespace)
1503         {
1504                 /*
1505                  * If user@, it is a global user, remove '@'. We only want to do
1506                  * this if there is an '@' at the end and no earlier in the user
1507                  * string or they may fake as a local user of another database
1508                  * attaching to this database.
1509                  */
1510                 if (strchr(port->user_name, '@') ==
1511                         port->user_name + strlen(port->user_name) - 1)
1512                         *strchr(port->user_name, '@') = '\0';
1513                 else
1514                 {
1515                         /* Append '@' and dbname */
1516                         char       *db_user;
1517
1518                         db_user = palloc(strlen(port->user_name) +
1519                                                          strlen(port->database_name) + 2);
1520                         sprintf(db_user, "%s@%s", port->user_name, port->database_name);
1521                         port->user_name = db_user;
1522                 }
1523         }
1524
1525         /*
1526          * Truncate given database and user names to length of a Postgres
1527          * name.  This avoids lookup failures when overlength names are given.
1528          */
1529         if (strlen(port->database_name) >= NAMEDATALEN)
1530                 port->database_name[NAMEDATALEN - 1] = '\0';
1531         if (strlen(port->user_name) >= NAMEDATALEN)
1532                 port->user_name[NAMEDATALEN - 1] = '\0';
1533
1534         /*
1535          * Done putting stuff in TopMemoryContext.
1536          */
1537         MemoryContextSwitchTo(oldcontext);
1538
1539         /*
1540          * If we're going to reject the connection due to database state, say
1541          * so now instead of wasting cycles on an authentication exchange.
1542          * (This also allows a pg_ping utility to be written.)
1543          */
1544         switch (port->canAcceptConnections)
1545         {
1546                 case CAC_STARTUP:
1547                         ereport(FATAL,
1548                                         (errcode(ERRCODE_CANNOT_CONNECT_NOW),
1549                                          errmsg("the database system is starting up")));
1550                         break;
1551                 case CAC_SHUTDOWN:
1552                         ereport(FATAL,
1553                                         (errcode(ERRCODE_CANNOT_CONNECT_NOW),
1554                                          errmsg("the database system is shutting down")));
1555                         break;
1556                 case CAC_RECOVERY:
1557                         ereport(FATAL,
1558                                         (errcode(ERRCODE_CANNOT_CONNECT_NOW),
1559                                          errmsg("the database system is in recovery mode")));
1560                         break;
1561                 case CAC_TOOMANY:
1562                         ereport(FATAL,
1563                                         (errcode(ERRCODE_TOO_MANY_CONNECTIONS),
1564                                          errmsg("sorry, too many clients already")));
1565                         break;
1566                 case CAC_OK:
1567                 default:
1568                         break;
1569         }
1570
1571         return STATUS_OK;
1572 }
1573
1574
1575 /*
1576  * The client has sent a cancel request packet, not a normal
1577  * start-a-new-connection packet.  Perform the necessary processing.
1578  * Nothing is sent back to the client.
1579  */
1580 static void
1581 processCancelRequest(Port *port, void *pkt)
1582 {
1583         CancelRequestPacket *canc = (CancelRequestPacket *) pkt;
1584         int                     backendPID;
1585         long            cancelAuthCode;
1586         Backend    *bp;
1587 #ifndef EXEC_BACKEND
1588         Dlelem     *curr;
1589 #else
1590         int                     i;
1591 #endif
1592
1593         backendPID = (int) ntohl(canc->backendPID);
1594         cancelAuthCode = (long) ntohl(canc->cancelAuthCode);
1595
1596         /*
1597          * See if we have a matching backend.  In the EXEC_BACKEND case, we
1598          * can no longer access the postmaster's own backend list, and must
1599          * rely on the duplicate array in shared memory.
1600          */
1601 #ifndef EXEC_BACKEND
1602         for (curr = DLGetHead(BackendList); curr; curr = DLGetSucc(curr))
1603         {
1604                 bp = (Backend *) DLE_VAL(curr);
1605 #else
1606         for (i = 0; i < NUM_BACKENDARRAY_ELEMS; i++)
1607         {
1608                 bp = (Backend *) &ShmemBackendArray[i];
1609 #endif
1610                 if (bp->pid == backendPID)
1611                 {
1612                         if (bp->cancel_key == cancelAuthCode)
1613                         {
1614                                 /* Found a match; signal that backend to cancel current op */
1615                                 ereport(DEBUG2,
1616                                                 (errmsg_internal("processing cancel request: sending SIGINT to process %d",
1617                                                                                  backendPID)));
1618                                 kill(bp->pid, SIGINT);
1619                         }
1620                         else
1621                                 /* Right PID, wrong key: no way, Jose */
1622                                 ereport(DEBUG2,
1623                                                 (errmsg_internal("bad key in cancel request for process %d",
1624                                                                                  backendPID)));
1625                         return;
1626                 }
1627         }
1628
1629         /* No matching backend */
1630         ereport(DEBUG2,
1631                         (errmsg_internal("bad pid in cancel request for process %d",
1632                                                          backendPID)));
1633 }
1634
1635 /*
1636  * canAcceptConnections --- check to see if database state allows connections.
1637  */
1638 static enum CAC_state
1639 canAcceptConnections(void)
1640 {
1641         /* Can't start backends when in startup/shutdown/recovery state. */
1642         if (Shutdown > NoShutdown)
1643                 return CAC_SHUTDOWN;
1644         if (StartupPID)
1645                 return CAC_STARTUP;
1646         if (FatalError)
1647                 return CAC_RECOVERY;
1648
1649         /*
1650          * Don't start too many children.
1651          *
1652          * We allow more connections than we can have backends here because some
1653          * might still be authenticating; they might fail auth, or some
1654          * existing backend might exit before the auth cycle is completed. The
1655          * exact MaxBackends limit is enforced when a new backend tries to
1656          * join the shared-inval backend array.
1657          */
1658         if (CountChildren() >= 2 * MaxBackends)
1659                 return CAC_TOOMANY;
1660
1661         return CAC_OK;
1662 }
1663
1664
1665 /*
1666  * ConnCreate -- create a local connection data structure
1667  */
1668 static Port *
1669 ConnCreate(int serverFd)
1670 {
1671         Port       *port;
1672
1673         if (!(port = (Port *) calloc(1, sizeof(Port))))
1674         {
1675                 ereport(LOG,
1676                                 (errcode(ERRCODE_OUT_OF_MEMORY),
1677                                  errmsg("out of memory")));
1678                 ExitPostmaster(1);
1679         }
1680
1681         if (StreamConnection(serverFd, port) != STATUS_OK)
1682         {
1683                 StreamClose(port->sock);
1684                 ConnFree(port);
1685                 port = NULL;
1686         }
1687         else
1688         {
1689                 /*
1690                  * Precompute password salt values to use for this connection.
1691                  * It's slightly annoying to do this long in advance of knowing
1692                  * whether we'll need 'em or not, but we must do the random()
1693                  * calls before we fork, not after.  Else the postmaster's random
1694                  * sequence won't get advanced, and all backends would end up
1695                  * using the same salt...
1696                  */
1697                 RandomSalt(port->cryptSalt, port->md5Salt);
1698         }
1699
1700         return port;
1701 }
1702
1703
1704 /*
1705  * ConnFree -- free a local connection data structure
1706  */
1707 static void
1708 ConnFree(Port *conn)
1709 {
1710 #ifdef USE_SSL
1711         secure_close(conn);
1712 #endif
1713         free(conn);
1714 }
1715
1716
1717 /*
1718  * ClosePostmasterPorts -- close all the postmaster's open sockets
1719  *
1720  * This is called during child process startup to release file descriptors
1721  * that are not needed by that child process.  The postmaster still has
1722  * them open, of course.
1723  *
1724  * Note: we pass am_syslogger as a boolean because we don't want to set
1725  * the global variable yet when this is called.
1726  */
1727 void
1728 ClosePostmasterPorts(bool am_syslogger)
1729 {
1730         int                     i;
1731
1732         /* Close the listen sockets */
1733         for (i = 0; i < MAXLISTEN; i++)
1734         {
1735                 if (ListenSocket[i] != -1)
1736                 {
1737                         StreamClose(ListenSocket[i]);
1738                         ListenSocket[i] = -1;
1739                 }
1740         }
1741
1742         /* If using syslogger, close the read side of the pipe */
1743         if (!am_syslogger)
1744         {
1745 #ifndef WIN32
1746                 if (syslogPipe[0] >= 0)
1747                         close(syslogPipe[0]);
1748                 syslogPipe[0] = -1;
1749 #else
1750                 if (syslogPipe[0])
1751                         CloseHandle(syslogPipe[0]);
1752                 syslogPipe[0] = 0;
1753 #endif
1754         }
1755 }
1756
1757
1758 /*
1759  * reset_shared -- reset shared memory and semaphores
1760  */
1761 static void
1762 reset_shared(unsigned short port)
1763 {
1764         /*
1765          * Create or re-create shared memory and semaphores.
1766          *
1767          * Note: in each "cycle of life" we will normally assign the same IPC
1768          * keys (if using SysV shmem and/or semas), since the port number is
1769          * used to determine IPC keys.  This helps ensure that we will clean
1770          * up dead IPC objects if the postmaster crashes and is restarted.
1771          */
1772         CreateSharedMemoryAndSemaphores(false, MaxBackends, port);
1773 }
1774
1775
1776 /*
1777  * SIGHUP -- reread config files, and tell children to do same
1778  */
1779 static void
1780 SIGHUP_handler(SIGNAL_ARGS)
1781 {
1782         int                     save_errno = errno;
1783
1784         PG_SETMASK(&BlockSig);
1785
1786         if (Shutdown <= SmartShutdown)
1787         {
1788                 ereport(LOG,
1789                          (errmsg("received SIGHUP, reloading configuration files")));
1790                 ProcessConfigFile(PGC_SIGHUP);
1791                 SignalChildren(SIGHUP);
1792                 if (BgWriterPID != 0)
1793                         kill(BgWriterPID, SIGHUP);
1794                 if (PgArchPID != 0)
1795                         kill(PgArchPID, SIGHUP);
1796                 if (SysLoggerPID != 0)
1797                         kill(SysLoggerPID, SIGHUP);
1798                 /* PgStatPID does not currently need SIGHUP */
1799                 load_hba();
1800                 load_ident();
1801
1802 #ifdef EXEC_BACKEND
1803                 /* Update the starting-point file for future children */
1804                 write_nondefault_variables(PGC_SIGHUP);
1805 #endif
1806         }
1807
1808         PG_SETMASK(&UnBlockSig);
1809
1810         errno = save_errno;
1811 }
1812
1813
1814 /*
1815  * pmdie -- signal handler for processing various postmaster signals.
1816  */
1817 static void
1818 pmdie(SIGNAL_ARGS)
1819 {
1820         int                     save_errno = errno;
1821
1822         PG_SETMASK(&BlockSig);
1823
1824         ereport(DEBUG2,
1825                         (errmsg_internal("postmaster received signal %d",
1826                                                          postgres_signal_arg)));
1827
1828         switch (postgres_signal_arg)
1829         {
1830                 case SIGTERM:
1831
1832                         /*
1833                          * Smart Shutdown:
1834                          *
1835                          * Wait for children to end their work, then shut down.
1836                          */
1837                         if (Shutdown >= SmartShutdown)
1838                                 break;
1839                         Shutdown = SmartShutdown;
1840                         ereport(LOG,
1841                                         (errmsg("received smart shutdown request")));
1842
1843                         if (DLGetHead(BackendList))
1844                                 break;                  /* let reaper() handle this */
1845
1846                         /*
1847                          * No children left. Begin shutdown of data base system.
1848                          */
1849                         if (StartupPID != 0 || FatalError)
1850                                 break;                  /* let reaper() handle this */
1851                         /* Start the bgwriter if not running */
1852                         if (BgWriterPID == 0)
1853                                 BgWriterPID = StartBackgroundWriter();
1854                         /* And tell it to shut down */
1855                         if (BgWriterPID != 0)
1856                                 kill(BgWriterPID, SIGUSR2);
1857                         /* Tell pgarch to shut down too; nothing left for it to do */
1858                         if (PgArchPID != 0)
1859                                 kill(PgArchPID, SIGQUIT);
1860                         /* Tell pgstat to shut down too; nothing left for it to do */
1861                         if (PgStatPID != 0)
1862                                 kill(PgStatPID, SIGQUIT);
1863                         break;
1864
1865                 case SIGINT:
1866
1867                         /*
1868                          * Fast Shutdown:
1869                          *
1870                          * Abort all children with SIGTERM (rollback active transactions
1871                          * and exit) and shut down when they are gone.
1872                          */
1873                         if (Shutdown >= FastShutdown)
1874                                 break;
1875                         Shutdown = FastShutdown;
1876                         ereport(LOG,
1877                                         (errmsg("received fast shutdown request")));
1878
1879                         if (DLGetHead(BackendList))
1880                         {
1881                                 if (!FatalError)
1882                                 {
1883                                         ereport(LOG,
1884                                                         (errmsg("aborting any active transactions")));
1885                                         SignalChildren(SIGTERM);
1886                                         /* reaper() does the rest */
1887                                 }
1888                                 break;
1889                         }
1890
1891                         /*
1892                          * No children left. Begin shutdown of data base system.
1893                          *
1894                          * Note: if we previously got SIGTERM then we may send SIGUSR2 to
1895                          * the bgwriter a second time here.  This should be harmless.
1896                          */
1897                         if (StartupPID != 0 || FatalError)
1898                                 break;                  /* let reaper() handle this */
1899                         /* Start the bgwriter if not running */
1900                         if (BgWriterPID == 0)
1901                                 BgWriterPID = StartBackgroundWriter();
1902                         /* And tell it to shut down */
1903                         if (BgWriterPID != 0)
1904                                 kill(BgWriterPID, SIGUSR2);
1905                         /* Tell pgarch to shut down too; nothing left for it to do */
1906                         if (PgArchPID != 0)
1907                                 kill(PgArchPID, SIGQUIT);
1908                         /* Tell pgstat to shut down too; nothing left for it to do */
1909                         if (PgStatPID != 0)
1910                                 kill(PgStatPID, SIGQUIT);
1911                         break;
1912
1913                 case SIGQUIT:
1914
1915                         /*
1916                          * Immediate Shutdown:
1917                          *
1918                          * abort all children with SIGQUIT and exit without attempt to
1919                          * properly shut down data base system.
1920                          */
1921                         ereport(LOG,
1922                                         (errmsg("received immediate shutdown request")));
1923                         if (StartupPID != 0)
1924                                 kill(StartupPID, SIGQUIT);
1925                         if (BgWriterPID != 0)
1926                                 kill(BgWriterPID, SIGQUIT);
1927                         if (PgArchPID != 0)
1928                                 kill(PgArchPID, SIGQUIT);
1929                         if (PgStatPID != 0)
1930                                 kill(PgStatPID, SIGQUIT);
1931                         if (DLGetHead(BackendList))
1932                                 SignalChildren(SIGQUIT);
1933                         ExitPostmaster(0);
1934                         break;
1935         }
1936
1937         PG_SETMASK(&UnBlockSig);
1938
1939         errno = save_errno;
1940 }
1941
1942 /*
1943  * Reaper -- signal handler to cleanup after a backend (child) dies.
1944  */
1945 static void
1946 reaper(SIGNAL_ARGS)
1947 {
1948         int                     save_errno = errno;
1949
1950 #ifdef HAVE_WAITPID
1951         int                     status;                 /* backend exit status */
1952
1953 #else
1954 #ifndef WIN32
1955         union wait      status;                 /* backend exit status */
1956 #endif
1957 #endif
1958         int                     exitstatus;
1959         int                     pid;                    /* process id of dead backend */
1960
1961         PG_SETMASK(&BlockSig);
1962
1963         ereport(DEBUG4,
1964                         (errmsg_internal("reaping dead processes")));
1965 #ifdef HAVE_WAITPID
1966         while ((pid = waitpid(-1, &status, WNOHANG)) > 0)
1967         {
1968                 exitstatus = status;
1969 #else
1970 #ifndef WIN32
1971         while ((pid = wait3(&status, WNOHANG, NULL)) > 0)
1972         {
1973                 exitstatus = status.w_status;
1974 #else
1975         while ((pid = win32_waitpid(&exitstatus)) > 0)
1976         {
1977                 /*
1978                  * We need to do this here, and not in CleanupBackend, since this
1979                  * is to be called on all children when we are done with them.
1980                  * Could move to LogChildExit, but that seems like asking for
1981                  * future trouble...
1982                  */
1983                 win32_RemoveChild(pid);
1984 #endif   /* WIN32 */
1985 #endif   /* HAVE_WAITPID */
1986
1987                 /*
1988                  * Check if this child was a startup process.
1989                  */
1990                 if (StartupPID != 0 && pid == StartupPID)
1991                 {
1992                         StartupPID = 0;
1993                         if (exitstatus != 0)
1994                         {
1995                                 LogChildExit(LOG, gettext("startup process"),
1996                                                          pid, exitstatus);
1997                                 ereport(LOG,
1998                                                 (errmsg("aborting startup due to startup process failure")));
1999                                 ExitPostmaster(1);
2000                         }
2001
2002                         /*
2003                          * Startup succeeded - we are done with system startup or
2004                          * recovery.
2005                          */
2006                         FatalError = false;
2007
2008                         /*
2009                          * Crank up the background writer.      It doesn't matter if this
2010                          * fails, we'll just try again later.
2011                          */
2012                         Assert(BgWriterPID == 0);
2013                         BgWriterPID = StartBackgroundWriter();
2014
2015                         /*
2016                          * Go to shutdown mode if a shutdown request was pending.
2017                          * Otherwise, try to start the archiver and stats collector
2018                          * too.
2019                          */
2020                         if (Shutdown > NoShutdown && BgWriterPID != 0)
2021                                 kill(BgWriterPID, SIGUSR2);
2022                         else if (Shutdown == NoShutdown)
2023                         {
2024                                 if (XLogArchivingActive() && PgArchPID == 0)
2025                                         PgArchPID = pgarch_start();
2026                                 if (PgStatPID == 0)
2027                                         PgStatPID = pgstat_start();
2028                         }
2029
2030                         continue;
2031                 }
2032
2033                 /*
2034                  * Was it the bgwriter?
2035                  */
2036                 if (BgWriterPID != 0 && pid == BgWriterPID)
2037                 {
2038                         BgWriterPID = 0;
2039                         if (exitstatus == 0 && Shutdown > NoShutdown &&
2040                                 !FatalError && !DLGetHead(BackendList))
2041                         {
2042                                 /*
2043                                  * Normal postmaster exit is here: we've seen normal exit
2044                                  * of the bgwriter after it's been told to shut down. We
2045                                  * expect that it wrote a shutdown checkpoint.  (If for
2046                                  * some reason it didn't, recovery will occur on next
2047                                  * postmaster start.)
2048                                  *
2049                                  * Note: we do not wait around for exit of the archiver or
2050                                  * stats processes.  They've been sent SIGQUIT by this
2051                                  * point, and in any case contain logic to commit
2052                                  * hara-kiri if they notice the postmaster is gone.
2053                                  */
2054                                 ExitPostmaster(0);
2055                         }
2056
2057                         /*
2058                          * Any unexpected exit of the bgwriter is treated as a crash.
2059                          */
2060                         HandleChildCrash(pid, exitstatus,
2061                                                          gettext("background writer process"));
2062                         continue;
2063                 }
2064
2065                 /*
2066                  * Was it the archiver?  If so, just try to start a new one; no
2067                  * need to force reset of the rest of the system.  (If fail, we'll
2068                  * try again in future cycles of the main loop.)
2069                  */
2070                 if (PgArchPID != 0 && pid == PgArchPID)
2071                 {
2072                         PgArchPID = 0;
2073                         if (exitstatus != 0)
2074                                 LogChildExit(LOG, gettext("archiver process"),
2075                                                          pid, exitstatus);
2076                         if (XLogArchivingActive() &&
2077                                 StartupPID == 0 && !FatalError && Shutdown == NoShutdown)
2078                                 PgArchPID = pgarch_start();
2079                         continue;
2080                 }
2081
2082                 /*
2083                  * Was it the statistics collector?  If so, just try to start a
2084                  * new one; no need to force reset of the rest of the system.  (If
2085                  * fail, we'll try again in future cycles of the main loop.)
2086                  */
2087                 if (PgStatPID != 0 && pid == PgStatPID)
2088                 {
2089                         PgStatPID = 0;
2090                         if (exitstatus != 0)
2091                                 LogChildExit(LOG, gettext("statistics collector process"),
2092                                                          pid, exitstatus);
2093                         if (StartupPID == 0 && !FatalError && Shutdown == NoShutdown)
2094                                 PgStatPID = pgstat_start();
2095                         continue;
2096                 }
2097
2098                 /* Was it the system logger? try to start a new one */
2099                 if (SysLoggerPID != 0 && pid == SysLoggerPID)
2100                 {
2101                         SysLoggerPID = 0;
2102                         /* for safety's sake, launch new logger *first* */
2103                         SysLoggerPID = SysLogger_Start();
2104                         if (exitstatus != 0)
2105                                 LogChildExit(LOG, gettext("system logger process"),
2106                                                          pid, exitstatus);
2107                         continue;
2108                 }
2109
2110                 /*
2111                  * Else do standard backend child cleanup.
2112                  */
2113                 CleanupBackend(pid, exitstatus);
2114         }                                                       /* loop over pending child-death reports */
2115
2116         if (FatalError)
2117         {
2118                 /*
2119                  * Wait for all important children to exit, then reset shmem and
2120                  * StartupDataBase.  (We can ignore the archiver and stats
2121                  * processes here since they are not connected to shmem.)
2122                  */
2123                 if (DLGetHead(BackendList) || StartupPID != 0 || BgWriterPID != 0)
2124                         goto reaper_done;
2125                 ereport(LOG,
2126                         (errmsg("all server processes terminated; reinitializing")));
2127
2128                 shmem_exit(0);
2129                 reset_shared(PostPortNumber);
2130
2131                 StartupPID = StartupDataBase();
2132
2133                 goto reaper_done;
2134         }
2135
2136         if (Shutdown > NoShutdown)
2137         {
2138                 if (DLGetHead(BackendList) || StartupPID != 0)
2139                         goto reaper_done;
2140                 /* Start the bgwriter if not running */
2141                 if (BgWriterPID == 0)
2142                         BgWriterPID = StartBackgroundWriter();
2143                 /* And tell it to shut down */
2144                 if (BgWriterPID != 0)
2145                         kill(BgWriterPID, SIGUSR2);
2146                 /* Tell pgarch to shut down too; nothing left for it to do */
2147                 if (PgArchPID != 0)
2148                         kill(PgArchPID, SIGQUIT);
2149                 /* Tell pgstat to shut down too; nothing left for it to do */
2150                 if (PgStatPID != 0)
2151                         kill(PgStatPID, SIGQUIT);
2152         }
2153
2154 reaper_done:
2155         PG_SETMASK(&UnBlockSig);
2156
2157         errno = save_errno;
2158 }
2159
2160
2161 /*
2162  * CleanupBackend -- cleanup after terminated backend.
2163  *
2164  * Remove all local state associated with backend.
2165  */
2166 static void
2167 CleanupBackend(int pid,
2168                            int exitstatus)      /* child's exit status. */
2169 {
2170         Dlelem     *curr;
2171
2172         LogChildExit(DEBUG2, gettext("server process"), pid, exitstatus);
2173
2174         /*
2175          * If a backend dies in an ugly way (i.e. exit status not 0) then we
2176          * must signal all other backends to quickdie.  If exit status is zero
2177          * we assume everything is hunky dory and simply remove the backend
2178          * from the active backend list.
2179          */
2180         if (exitstatus != 0)
2181         {
2182                 HandleChildCrash(pid, exitstatus, gettext("server process"));
2183                 return;
2184         }
2185
2186         for (curr = DLGetHead(BackendList); curr; curr = DLGetSucc(curr))
2187         {
2188                 Backend    *bp = (Backend *) DLE_VAL(curr);
2189
2190                 if (bp->pid == pid)
2191                 {
2192                         DLRemove(curr);
2193                         free(bp);
2194                         DLFreeElem(curr);
2195 #ifdef EXEC_BACKEND
2196                         ShmemBackendArrayRemove(pid);
2197 #endif
2198                         /* Tell the collector about backend termination */
2199                         pgstat_beterm(pid);
2200                         break;
2201                 }
2202         }
2203 }
2204
2205 /*
2206  * HandleChildCrash -- cleanup after failed backend or bgwriter.
2207  *
2208  * The objectives here are to clean up our local state about the child
2209  * process, and to signal all other remaining children to quickdie.
2210  */
2211 static void
2212 HandleChildCrash(int pid, int exitstatus, const char *procname)
2213 {
2214         Dlelem     *curr,
2215                            *next;
2216         Backend    *bp;
2217
2218         /*
2219          * Make log entry unless there was a previous crash (if so, nonzero
2220          * exit status is to be expected in SIGQUIT response; don't clutter
2221          * log)
2222          */
2223         if (!FatalError)
2224         {
2225                 LogChildExit(LOG, procname, pid, exitstatus);
2226                 ereport(LOG,
2227                           (errmsg("terminating any other active server processes")));
2228         }
2229
2230         /* Process regular backends */
2231         for (curr = DLGetHead(BackendList); curr; curr = next)
2232         {
2233                 next = DLGetSucc(curr);
2234                 bp = (Backend *) DLE_VAL(curr);
2235                 if (bp->pid == pid)
2236                 {
2237                         /*
2238                          * Found entry for freshly-dead backend, so remove it.
2239                          */
2240                         DLRemove(curr);
2241                         free(bp);
2242                         DLFreeElem(curr);
2243 #ifdef EXEC_BACKEND
2244                         ShmemBackendArrayRemove(pid);
2245 #endif
2246                         /* Tell the collector about backend termination */
2247                         pgstat_beterm(pid);
2248                         /* Keep looping so we can signal remaining backends */
2249                 }
2250                 else
2251                 {
2252                         /*
2253                          * This backend is still alive.  Unless we did so already,
2254                          * tell it to commit hara-kiri.
2255                          *
2256                          * SIGQUIT is the special signal that says exit without proc_exit
2257                          * and let the user know what's going on. But if SendStop is
2258                          * set (-s on command line), then we send SIGSTOP instead, so
2259                          * that we can get core dumps from all backends by hand.
2260                          */
2261                         if (!FatalError)
2262                         {
2263                                 ereport(DEBUG2,
2264                                                 (errmsg_internal("sending %s to process %d",
2265                                                                           (SendStop ? "SIGSTOP" : "SIGQUIT"),
2266                                                                                  (int) bp->pid)));
2267                                 kill(bp->pid, (SendStop ? SIGSTOP : SIGQUIT));
2268                         }
2269                 }
2270         }
2271
2272         /* Take care of the bgwriter too */
2273         if (pid == BgWriterPID)
2274                 BgWriterPID = 0;
2275         else if (BgWriterPID != 0 && !FatalError)
2276         {
2277                 ereport(DEBUG2,
2278                                 (errmsg_internal("sending %s to process %d",
2279                                                                  (SendStop ? "SIGSTOP" : "SIGQUIT"),
2280                                                                  (int) BgWriterPID)));
2281                 kill(BgWriterPID, (SendStop ? SIGSTOP : SIGQUIT));
2282         }
2283
2284         /* Force a power-cycle of the pgarch process too */
2285         /* (Shouldn't be necessary, but just for luck) */
2286         if (PgArchPID != 0 && !FatalError)
2287         {
2288                 ereport(DEBUG2,
2289                                 (errmsg_internal("sending %s to process %d",
2290                                                                  "SIGQUIT",
2291                                                                  (int) PgArchPID)));
2292                 kill(PgArchPID, SIGQUIT);
2293         }
2294
2295         /* Force a power-cycle of the pgstat processes too */
2296         /* (Shouldn't be necessary, but just for luck) */
2297         if (PgStatPID != 0 && !FatalError)
2298         {
2299                 ereport(DEBUG2,
2300                                 (errmsg_internal("sending %s to process %d",
2301                                                                  "SIGQUIT",
2302                                                                  (int) PgStatPID)));
2303                 kill(PgStatPID, SIGQUIT);
2304         }
2305
2306         /* We do NOT restart the syslogger */
2307
2308         FatalError = true;
2309 }
2310
2311 /*
2312  * Log the death of a child process.
2313  */
2314 static void
2315 LogChildExit(int lev, const char *procname, int pid, int exitstatus)
2316 {
2317         if (WIFEXITED(exitstatus))
2318                 ereport(lev,
2319
2320                 /*
2321                  * translator: %s is a noun phrase describing a child process,
2322                  * such as "server process"
2323                  */
2324                                 (errmsg("%s (PID %d) exited with exit code %d",
2325                                                 procname, pid, WEXITSTATUS(exitstatus))));
2326         else if (WIFSIGNALED(exitstatus))
2327                 ereport(lev,
2328
2329                 /*
2330                  * translator: %s is a noun phrase describing a child process,
2331                  * such as "server process"
2332                  */
2333                                 (errmsg("%s (PID %d) was terminated by signal %d",
2334                                                 procname, pid, WTERMSIG(exitstatus))));
2335         else
2336                 ereport(lev,
2337
2338                 /*
2339                  * translator: %s is a noun phrase describing a child process,
2340                  * such as "server process"
2341                  */
2342                                 (errmsg("%s (PID %d) exited with unexpected status %d",
2343                                                 procname, pid, exitstatus)));
2344 }
2345
2346 /*
2347  * Send a signal to all backend children (but NOT special children)
2348  */
2349 static void
2350 SignalChildren(int signal)
2351 {
2352         Dlelem     *curr;
2353
2354         for (curr = DLGetHead(BackendList); curr; curr = DLGetSucc(curr))
2355         {
2356                 Backend    *bp = (Backend *) DLE_VAL(curr);
2357
2358                 ereport(DEBUG4,
2359                                 (errmsg_internal("sending signal %d to process %d",
2360                                                                  signal, (int) bp->pid)));
2361                 kill(bp->pid, signal);
2362         }
2363 }
2364
2365 /*
2366  * BackendStartup -- start backend process
2367  *
2368  * returns: STATUS_ERROR if the fork failed, STATUS_OK otherwise.
2369  */
2370 static int
2371 BackendStartup(Port *port)
2372 {
2373         Backend    *bn;                         /* for backend cleanup */
2374         pid_t           pid;
2375
2376 #ifdef LINUX_PROFILE
2377         struct itimerval prof_itimer;
2378 #endif
2379
2380         /*
2381          * Compute the cancel key that will be assigned to this backend. The
2382          * backend will have its own copy in the forked-off process' value of
2383          * MyCancelKey, so that it can transmit the key to the frontend.
2384          */
2385         MyCancelKey = PostmasterRandom();
2386
2387         /*
2388          * Make room for backend data structure.  Better before the fork() so
2389          * we can handle failure cleanly.
2390          */
2391         bn = (Backend *) malloc(sizeof(Backend));
2392         if (!bn)
2393         {
2394                 ereport(LOG,
2395                                 (errcode(ERRCODE_OUT_OF_MEMORY),
2396                                  errmsg("out of memory")));
2397                 return STATUS_ERROR;
2398         }
2399
2400         /* Pass down canAcceptConnections state (kluge for EXEC_BACKEND case) */
2401         port->canAcceptConnections = canAcceptConnections();
2402
2403         /*
2404          * Flush stdio channels just before fork, to avoid double-output
2405          * problems. Ideally we'd use fflush(NULL) here, but there are still a
2406          * few non-ANSI stdio libraries out there (like SunOS 4.1.x) that
2407          * coredump if we do. Presently stdout and stderr are the only stdio
2408          * output channels used by the postmaster, so fflush'ing them should
2409          * be sufficient.
2410          */
2411         fflush(stdout);
2412         fflush(stderr);
2413
2414 #ifdef EXEC_BACKEND
2415
2416         pid = backend_forkexec(port);
2417
2418 #else                                                   /* !EXEC_BACKEND */
2419
2420 #ifdef LINUX_PROFILE
2421
2422         /*
2423          * Linux's fork() resets the profiling timer in the child process. If
2424          * we want to profile child processes then we need to save and restore
2425          * the timer setting.  This is a waste of time if not profiling,
2426          * however, so only do it if commanded by specific -DLINUX_PROFILE
2427          * switch.
2428          */
2429         getitimer(ITIMER_PROF, &prof_itimer);
2430 #endif
2431
2432 #ifdef __BEOS__
2433         /* Specific beos actions before backend startup */
2434         beos_before_backend_startup();
2435 #endif
2436
2437         pid = fork();
2438
2439         if (pid == 0)                           /* child */
2440         {
2441 #ifdef LINUX_PROFILE
2442                 setitimer(ITIMER_PROF, &prof_itimer, NULL);
2443 #endif
2444
2445 #ifdef __BEOS__
2446                 /* Specific beos backend startup actions */
2447                 beos_backend_startup();
2448 #endif
2449                 free(bn);
2450
2451                 proc_exit(BackendRun(port));
2452         }
2453 #endif   /* EXEC_BACKEND */
2454
2455         if (pid < 0)
2456         {
2457                 /* in parent, fork failed */
2458                 int                     save_errno = errno;
2459
2460 #ifdef __BEOS__
2461                 /* Specific beos backend startup actions */
2462                 beos_backend_startup_failed();
2463 #endif
2464                 free(bn);
2465                 errno = save_errno;
2466                 ereport(LOG,
2467                           (errmsg("could not fork new process for connection: %m")));
2468                 report_fork_failure_to_client(port, save_errno);
2469                 return STATUS_ERROR;
2470         }
2471
2472         /* in parent, successful fork */
2473         ereport(DEBUG2,
2474                         (errmsg_internal("forked new backend, pid=%d socket=%d",
2475                                                          (int) pid, port->sock)));
2476
2477         /*
2478          * Everything's been successful, it's safe to add this backend to our
2479          * list of backends.
2480          */
2481         bn->pid = pid;
2482         bn->cancel_key = MyCancelKey;
2483         DLAddHead(BackendList, DLNewElem(bn));
2484 #ifdef EXEC_BACKEND
2485         ShmemBackendArrayAdd(bn);
2486 #endif
2487
2488         return STATUS_OK;
2489 }
2490
2491 /*
2492  * Try to report backend fork() failure to client before we close the
2493  * connection.  Since we do not care to risk blocking the postmaster on
2494  * this connection, we set the connection to non-blocking and try only once.
2495  *
2496  * This is grungy special-purpose code; we cannot use backend libpq since
2497  * it's not up and running.
2498  */
2499 static void
2500 report_fork_failure_to_client(Port *port, int errnum)
2501 {
2502         char            buffer[1000];
2503
2504         /* Format the error message packet (always V2 protocol) */
2505         snprintf(buffer, sizeof(buffer), "E%s%s\n",
2506                          gettext("could not fork new process for connection: "),
2507                          strerror(errnum));
2508
2509         /* Set port to non-blocking.  Don't do send() if this fails */
2510         if (!set_noblock(port->sock))
2511                 return;
2512
2513         send(port->sock, buffer, strlen(buffer) + 1, 0);
2514 }
2515
2516
2517 /*
2518  * split_opts -- split a string of options and append it to an argv array
2519  *
2520  * NB: the string is destructively modified!
2521  *
2522  * Since no current POSTGRES arguments require any quoting characters,
2523  * we can use the simple-minded tactic of assuming each set of space-
2524  * delimited characters is a separate argv element.
2525  *
2526  * If you don't like that, well, we *used* to pass the whole option string
2527  * as ONE argument to execl(), which was even less intelligent...
2528  */
2529 static void
2530 split_opts(char **argv, int *argcp, char *s)
2531 {
2532         while (s && *s)
2533         {
2534                 while (isspace((unsigned char) *s))
2535                         ++s;
2536                 if (*s == '\0')
2537                         break;
2538                 argv[(*argcp)++] = s;
2539                 while (*s && !isspace((unsigned char) *s))
2540                         ++s;
2541                 if (*s)
2542                         *s++ = '\0';
2543         }
2544 }
2545
2546
2547 /*
2548  * BackendRun -- perform authentication, and if successful,
2549  *                              set up the backend's argument list and invoke PostgresMain()
2550  *
2551  * returns:
2552  *              Shouldn't return at all.
2553  *              If PostgresMain() fails, return status.
2554  */
2555 static int
2556 BackendRun(Port *port)
2557 {
2558         int                     status;
2559         char            remote_host[NI_MAXHOST];
2560         char            remote_port[NI_MAXSERV];
2561         char            remote_ps_data[NI_MAXHOST];
2562         char      **av;
2563         int                     maxac;
2564         int                     ac;
2565         char            protobuf[32];
2566         int                     i;
2567
2568         IsUnderPostmaster = true;       /* we are a postmaster subprocess now */
2569
2570         /*
2571          * Let's clean up ourselves as the postmaster child, and close the
2572          * postmaster's listen sockets
2573          */
2574         ClosePostmasterPorts(false);
2575
2576         /* We don't want the postmaster's proc_exit() handlers */
2577         on_exit_reset();
2578
2579         /*
2580          * Signal handlers setting is moved to tcop/postgres...
2581          */
2582
2583         /* Save port etc. for ps status */
2584         MyProcPort = port;
2585
2586         /* Reset MyProcPid to new backend's pid */
2587         MyProcPid = getpid();
2588
2589         /*
2590          * PreAuthDelay is a debugging aid for investigating problems in the
2591          * authentication cycle: it can be set in postgresql.conf to allow
2592          * time to attach to the newly-forked backend with a debugger. (See
2593          * also the -W backend switch, which we allow clients to pass through
2594          * PGOPTIONS, but it is not honored until after authentication.)
2595          */
2596         if (PreAuthDelay > 0)
2597                 pg_usleep(PreAuthDelay * 1000000L);
2598
2599         ClientAuthInProgress = true;    /* limit visibility of log messages */
2600
2601         /* save start time for end of session reporting */
2602         gettimeofday(&(port->session_start), NULL);
2603
2604         /* set these to empty in case they are needed before we set them up */
2605         port->remote_host = "";
2606         port->remote_port = "";
2607         port->commandTag = "";
2608
2609         /*
2610          * Initialize libpq and enable reporting of ereport errors to the
2611          * client. Must do this now because authentication uses libpq to send
2612          * messages.
2613          */
2614         pq_init();                                      /* initialize libpq to talk to client */
2615         whereToSendOutput = Remote; /* now safe to ereport to client */
2616
2617         /*
2618          * We arrange for a simple exit(0) if we receive SIGTERM or SIGQUIT
2619          * during any client authentication related communication. Otherwise
2620          * the postmaster cannot shutdown the database FAST or IMMED cleanly
2621          * if a buggy client blocks a backend during authentication.
2622          */
2623         pqsignal(SIGTERM, authdie);
2624         pqsignal(SIGQUIT, authdie);
2625         pqsignal(SIGALRM, authdie);
2626         PG_SETMASK(&AuthBlockSig);
2627
2628         /*
2629          * Get the remote host name and port for logging and status display.
2630          */
2631         remote_host[0] = '\0';
2632         remote_port[0] = '\0';
2633         if (getnameinfo_all(&port->raddr.addr, port->raddr.salen,
2634                                                 remote_host, sizeof(remote_host),
2635                                                 remote_port, sizeof(remote_port),
2636                                    (log_hostname ? 0 : NI_NUMERICHOST) | NI_NUMERICSERV))
2637         {
2638                 int                     ret = getnameinfo_all(&port->raddr.addr, port->raddr.salen,
2639                                                                                 remote_host, sizeof(remote_host),
2640                                                                                 remote_port, sizeof(remote_port),
2641                                                                                 NI_NUMERICHOST | NI_NUMERICSERV);
2642
2643                 if (ret)
2644                         ereport(WARNING,
2645                                         (errmsg("getnameinfo_all() failed: %s",
2646                                                         gai_strerror(ret))));
2647         }
2648         snprintf(remote_ps_data, sizeof(remote_ps_data),
2649                          remote_port[0] == '\0' ? "%s" : "%s(%s)",
2650                          remote_host, remote_port);
2651
2652         if (Log_connections)
2653                 ereport(LOG,
2654                                 (errmsg("connection received: host=%s port=%s",
2655                                                 remote_host, remote_port)));
2656
2657         /*
2658          * save remote_host and remote_port in port stucture
2659          */
2660         port->remote_host = strdup(remote_host);
2661         port->remote_port = strdup(remote_port);
2662
2663         /*
2664          * In EXEC_BACKEND case, we didn't inherit the contents of pg_hba.c
2665          * etcetera from the postmaster, and have to load them ourselves.
2666          * Build the PostmasterContext (which didn't exist before, in this
2667          * process) to contain the data.
2668          *
2669          * FIXME: [fork/exec] Ugh.      Is there a way around this overhead?
2670          */
2671 #ifdef EXEC_BACKEND
2672         Assert(PostmasterContext == NULL);
2673         PostmasterContext = AllocSetContextCreate(TopMemoryContext,
2674                                                                                           "Postmaster",
2675                                                                                           ALLOCSET_DEFAULT_MINSIZE,
2676                                                                                           ALLOCSET_DEFAULT_INITSIZE,
2677                                                                                           ALLOCSET_DEFAULT_MAXSIZE);
2678         MemoryContextSwitchTo(PostmasterContext);
2679
2680         load_hba();
2681         load_ident();
2682         load_user();
2683         load_group();
2684 #endif
2685
2686         /*
2687          * Ready to begin client interaction.  We will give up and exit(0)
2688          * after a time delay, so that a broken client can't hog a connection
2689          * indefinitely.  PreAuthDelay doesn't count against the time limit.
2690          */
2691         if (!enable_sig_alarm(AuthenticationTimeout * 1000, false))
2692                 elog(FATAL, "could not set timer for authorization timeout");
2693
2694         /*
2695          * Receive the startup packet (which might turn out to be a cancel
2696          * request packet).
2697          */
2698         status = ProcessStartupPacket(port, false);
2699
2700         if (status != STATUS_OK)
2701                 proc_exit(0);
2702
2703         /*
2704          * Now that we have the user and database name, we can set the process
2705          * title for ps.  It's good to do this as early as possible in
2706          * startup.
2707          */
2708         init_ps_display(port->user_name, port->database_name, remote_ps_data);
2709         set_ps_display("authentication");
2710
2711         /*
2712          * Now perform authentication exchange.
2713          */
2714         ClientAuthentication(port); /* might not return, if failure */
2715
2716         /*
2717          * Done with authentication.  Disable timeout, and prevent
2718          * SIGTERM/SIGQUIT again until backend startup is complete.
2719          */
2720         if (!disable_sig_alarm(false))
2721                 elog(FATAL, "could not disable timer for authorization timeout");
2722         PG_SETMASK(&BlockSig);
2723
2724         if (Log_connections)
2725                 ereport(LOG,
2726                                 (errmsg("connection authorized: user=%s database=%s",
2727                                                 port->user_name, port->database_name)));
2728
2729         /*
2730          * Don't want backend to be able to see the postmaster random number
2731          * generator state.  We have to clobber the static random_seed *and*
2732          * start a new random sequence in the random() library function.
2733          */
2734         random_seed = 0;
2735         srandom((unsigned int) (MyProcPid ^ port->session_start.tv_usec));
2736
2737         /* ----------------
2738          * Now, build the argv vector that will be given to PostgresMain.
2739          *
2740          * The layout of the command line is
2741          *              postgres [secure switches] -p databasename [insecure switches]
2742          * where the switches after -p come from the client request.
2743          *
2744          * The maximum possible number of commandline arguments that could come
2745          * from ExtraOptions or port->cmdline_options is (strlen + 1) / 2; see
2746          * split_opts().
2747          * ----------------
2748          */
2749         maxac = 10;                                     /* for fixed args supplied below */
2750         maxac += (strlen(ExtraOptions) + 1) / 2;
2751         if (port->cmdline_options)
2752                 maxac += (strlen(port->cmdline_options) + 1) / 2;
2753
2754         av = (char **) MemoryContextAlloc(TopMemoryContext,
2755                                                                           maxac * sizeof(char *));
2756         ac = 0;
2757
2758         av[ac++] = "postgres";
2759
2760         /*
2761          * Pass any backend switches specified with -o in the postmaster's own
2762          * command line.  We assume these are secure.  (It's OK to mangle
2763          * ExtraOptions now, since we're safely inside a subprocess.)
2764          */
2765         split_opts(av, &ac, ExtraOptions);
2766
2767         /* Tell the backend what protocol the frontend is using. */
2768         snprintf(protobuf, sizeof(protobuf), "-v%u", port->proto);
2769         av[ac++] = protobuf;
2770
2771         /*
2772          * Tell the backend it is being called from the postmaster, and which
2773          * database to use.  -p marks the end of secure switches.
2774          */
2775         av[ac++] = "-p";
2776         av[ac++] = port->database_name;
2777
2778         /*
2779          * Pass the (insecure) option switches from the connection request.
2780          * (It's OK to mangle port->cmdline_options now.)
2781          */
2782         if (port->cmdline_options)
2783                 split_opts(av, &ac, port->cmdline_options);
2784
2785         av[ac] = NULL;
2786
2787         Assert(ac < maxac);
2788
2789         /*
2790          * Release postmaster's working memory context so that backend can
2791          * recycle the space.  Note this does not trash *MyProcPort, because
2792          * ConnCreate() allocated that space with malloc() ... else we'd need
2793          * to copy the Port data here.  Also, subsidiary data such as the
2794          * username isn't lost either; see ProcessStartupPacket().
2795          */
2796         MemoryContextSwitchTo(TopMemoryContext);
2797         MemoryContextDelete(PostmasterContext);
2798         PostmasterContext = NULL;
2799
2800         /*
2801          * Debug: print arguments being passed to backend
2802          */
2803         ereport(DEBUG3,
2804                         (errmsg_internal("%s child[%d]: starting with (",
2805                                                          progname, (int)getpid())));
2806         for (i = 0; i < ac; ++i)
2807                 ereport(DEBUG3,
2808                                 (errmsg_internal("\t%s", av[i])));
2809         ereport(DEBUG3,
2810                         (errmsg_internal(")")));
2811
2812         ClientAuthInProgress = false;           /* client_min_messages is active
2813                                                                                  * now */
2814
2815         return (PostgresMain(ac, av, port->user_name));
2816 }
2817
2818
2819 #ifdef EXEC_BACKEND
2820
2821 /*
2822  * postmaster_forkexec -- fork and exec a postmaster subprocess
2823  *
2824  * The caller must have set up the argv array already, except for argv[2]
2825  * which will be filled with the name of the temp variable file.
2826  *
2827  * Returns the child process PID, or -1 on fork failure (a suitable error
2828  * message has been logged on failure).
2829  *
2830  * All uses of this routine will dispatch to SubPostmasterMain in the
2831  * child process.
2832  */
2833 pid_t
2834 postmaster_forkexec(int argc, char *argv[])
2835 {
2836         Port            port;
2837
2838         /* This entry point passes dummy values for the Port variables */
2839         memset(&port, 0, sizeof(port));
2840         return internal_forkexec(argc, argv, &port);
2841 }
2842
2843 /*
2844  * backend_forkexec -- fork/exec off a backend process
2845  *
2846  * returns the pid of the fork/exec'd process, or -1 on failure
2847  */
2848 static pid_t
2849 backend_forkexec(Port *port)
2850 {
2851         char       *av[4];
2852         int                     ac = 0;
2853
2854         av[ac++] = "postgres";
2855         av[ac++] = "-forkbackend";
2856         av[ac++] = NULL;                        /* filled in by internal_forkexec */
2857
2858         av[ac] = NULL;
2859         Assert(ac < lengthof(av));
2860
2861         return internal_forkexec(ac, av, port);
2862 }
2863
2864 #ifndef WIN32
2865
2866 /*
2867  * internal_forkexec non-win32 implementation
2868  *
2869  * - writes out backend variables to the parameter file
2870  * - fork():s, and then exec():s the child process
2871  */
2872 static pid_t
2873 internal_forkexec(int argc, char *argv[], Port *port)
2874 {
2875         static unsigned long tmpBackendFileNum = 0;
2876         pid_t           pid;
2877         char            tmpfilename[MAXPGPATH];
2878         BackendParameters param;
2879         FILE       *fp;
2880
2881         if (!save_backend_variables(&param, port))
2882                 return -1;                              /* log made by save_backend_variables */
2883
2884         /* Calculate name for temp file */
2885         Assert(DataDir);
2886         snprintf(tmpfilename, MAXPGPATH, "%s/%s/%s.backend_var.%d.%lu",
2887                          DataDir, PG_TEMP_FILES_DIR, PG_TEMP_FILE_PREFIX,
2888                          MyProcPid, ++tmpBackendFileNum);
2889
2890         /* Open file */
2891         fp = AllocateFile(tmpfilename, PG_BINARY_W);
2892         if (!fp)
2893         {
2894                 /* As per OpenTemporaryFile... */
2895                 char            dirname[MAXPGPATH];
2896
2897                 snprintf(dirname, MAXPGPATH, "%s/%s", DataDir, PG_TEMP_FILES_DIR);
2898                 mkdir(dirname, S_IRWXU);
2899
2900                 fp = AllocateFile(tmpfilename, PG_BINARY_W);
2901                 if (!fp)
2902                 {
2903                         ereport(LOG,
2904                                         (errcode_for_file_access(),
2905                                          errmsg("could not create file \"%s\": %m",
2906                                                         tmpfilename)));
2907                         return -1;
2908                 }
2909         }
2910
2911         if (fwrite(&param, sizeof(param), 1, fp) != 1)
2912         {
2913                 ereport(LOG,
2914                                 (errcode_for_file_access(),
2915                                  errmsg("could not write to file \"%s\": %m", tmpfilename)));
2916                 FreeFile(fp);
2917                 return -1;
2918         }
2919
2920         /* Release file */
2921         if (FreeFile(fp))
2922         {
2923                 ereport(LOG,
2924                                 (errcode_for_file_access(),
2925                                  errmsg("could not write to file \"%s\": %m", tmpfilename)));
2926                 return -1;
2927         }
2928
2929         /* Make sure caller set up argv properly */
2930         Assert(argc >= 3);
2931         Assert(argv[argc] == NULL);
2932         Assert(strncmp(argv[1], "-fork", 5) == 0);
2933         Assert(argv[2] == NULL);
2934
2935         /* Insert temp file name after -fork argument */
2936         argv[2] = tmpfilename;
2937
2938         /* Fire off execv in child */
2939         if ((pid = fork()) == 0)
2940         {
2941                 if (execv(postgres_exec_path, argv) < 0)
2942                 {
2943                         ereport(LOG,
2944                                         (errmsg("could not execute server process \"%s\": %m",
2945                                                         postgres_exec_path)));
2946                         /* We're already in the child process here, can't return */
2947                         exit(1);
2948                 }
2949         }
2950
2951         return pid;                                     /* Parent returns pid, or -1 on fork
2952                                                                  * failure */
2953 }
2954
2955 #else /* WIN32 */
2956
2957 /*
2958  * internal_forkexec win32 implementation
2959  *
2960  * - starts backend using CreateProcess(), in suspended state
2961  * - writes out backend variables to the parameter file
2962  *  - during this, duplicates handles and sockets required for
2963  *    inheritance into the new process
2964  * - resumes execution of the new process once the backend parameter
2965  *   file is complete.
2966  */
2967 static pid_t
2968 internal_forkexec(int argc, char *argv[], Port *port)
2969 {
2970         STARTUPINFO si;
2971         PROCESS_INFORMATION pi;
2972         int                     i;
2973         int                     j;
2974         char            cmdLine[MAXPGPATH * 2];
2975         HANDLE          childHandleCopy;
2976         HANDLE          waiterThread;
2977         HANDLE      paramHandle;
2978         BackendParameters *param;
2979         SECURITY_ATTRIBUTES sa;
2980         char        paramHandleStr[32];
2981
2982         /* Make sure caller set up argv properly */
2983         Assert(argc >= 3);
2984         Assert(argv[argc] == NULL);
2985         Assert(strncmp(argv[1], "-fork", 5) == 0);
2986         Assert(argv[2] == NULL);
2987
2988         /* Set up shared memory for parameter passing */
2989         ZeroMemory(&sa,sizeof(sa));
2990         sa.nLength = sizeof(sa);
2991         sa.bInheritHandle = TRUE;
2992         paramHandle = CreateFileMapping(INVALID_HANDLE_VALUE,
2993                                                                         &sa,
2994                                                                         PAGE_READWRITE,
2995                                                                         0,
2996                                                                         sizeof(BackendParameters),
2997                                                                         NULL);
2998         if (paramHandle == INVALID_HANDLE_VALUE)
2999         {
3000                 elog(LOG, "could not create backend parameter file mapping: error code %d",
3001                          (int) GetLastError());
3002                 return -1;
3003         }
3004
3005         param = MapViewOfFile(paramHandle, FILE_MAP_WRITE, 0, 0, sizeof(BackendParameters));
3006         if (!param)
3007         {
3008                 elog(LOG, "could not map backend parameter memory: error code %d",
3009                          (int) GetLastError());
3010                 CloseHandle(paramHandle);
3011                 return -1;
3012         }
3013
3014         /* Insert temp file name after -fork argument */
3015         sprintf(paramHandleStr, "%lu", (DWORD)paramHandle);
3016         argv[2] = paramHandleStr;
3017
3018         /* Format the cmd line */
3019         cmdLine[sizeof(cmdLine) - 1] = '\0';
3020         cmdLine[sizeof(cmdLine) - 2] = '\0';
3021         snprintf(cmdLine, sizeof(cmdLine) - 1, "\"%s\"", postgres_exec_path);
3022         i = 0;
3023         while (argv[++i] != NULL)
3024         {
3025                 j = strlen(cmdLine);
3026                 snprintf(cmdLine + j, sizeof(cmdLine) - 1 - j, " \"%s\"", argv[i]);
3027         }
3028         if (cmdLine[sizeof(cmdLine) - 2] != '\0')
3029         {
3030                 elog(LOG, "subprocess command line too long");
3031                 return -1;
3032         }
3033
3034         memset(&pi, 0, sizeof(pi));
3035         memset(&si, 0, sizeof(si));
3036         si.cb = sizeof(si);
3037         /*
3038          * Create the subprocess in a suspended state. This will be resumed
3039          * later, once we have written out the parameter file.
3040          */
3041         if (!CreateProcess(NULL, cmdLine, NULL, NULL, TRUE, CREATE_SUSPENDED,
3042                                            NULL, NULL, &si, &pi))
3043         {
3044                 elog(LOG, "CreateProcess call failed: %m (error code %d)",
3045                          (int) GetLastError());
3046                 return -1;
3047         }
3048
3049         if (!save_backend_variables(param, port, pi.hProcess, pi.dwProcessId))
3050         {
3051                 /*
3052                  * log made by save_backend_variables, but we have to clean
3053                  * up the mess with the half-started process
3054                  */
3055                 if (!TerminateProcess(pi.hProcess, 255))
3056                         ereport(ERROR,
3057                                         (errmsg_internal("could not terminate unstarted process: error code %d",
3058                                                                          (int) GetLastError())));
3059                 CloseHandle(pi.hProcess);
3060                 CloseHandle(pi.hThread);
3061                 return -1;                              /* log made by save_backend_variables */
3062         }
3063
3064         /* Drop the shared memory that is now inherited to the backend */
3065         if (!UnmapViewOfFile(param))
3066                 elog(LOG, "could not unmap view of backend parameter file: error code %d",
3067                          (int) GetLastError());
3068         if (!CloseHandle(paramHandle))
3069                 elog(LOG, "could not close handle to backend parameter file: error code %d",
3070                          (int) GetLastError());
3071
3072         /*
3073          * Now that the backend variables are written out, we start the
3074          * child thread so it can start initializing while we set up
3075          * the rest of the parent state.
3076          */
3077         if (ResumeThread(pi.hThread) == -1)
3078         {
3079                 if (!TerminateProcess(pi.hProcess, 255))
3080                 {
3081                         ereport(ERROR,
3082                                         (errmsg_internal("could not terminate unstartable process: error code %d",
3083                                                                          (int) GetLastError())));
3084                         CloseHandle(pi.hProcess);
3085                         CloseHandle(pi.hThread);
3086                         return -1;
3087                 }
3088                 CloseHandle(pi.hProcess);
3089                 CloseHandle(pi.hThread);
3090                 ereport(ERROR,
3091                                 (errmsg_internal("could not resume thread of unstarted process: error code %d",
3092                                                                  (int) GetLastError())));
3093                 return -1;
3094         }
3095
3096         if (!IsUnderPostmaster)
3097         {
3098                 /* We are the Postmaster creating a child... */
3099                 win32_AddChild(pi.dwProcessId, pi.hProcess);
3100         }
3101
3102         /* Set up the thread to handle the SIGCHLD for this process */
3103         if (DuplicateHandle(GetCurrentProcess(),
3104                                                 pi.hProcess,
3105                                                 GetCurrentProcess(),
3106                                                 &childHandleCopy,
3107                                                 0,
3108                                                 FALSE,
3109                                                 DUPLICATE_SAME_ACCESS) == 0)
3110                 ereport(FATAL,
3111                                 (errmsg_internal("could not duplicate child handle: error code %d",
3112                                                                  (int) GetLastError())));
3113
3114         waiterThread = CreateThread(NULL, 64 * 1024, win32_sigchld_waiter,
3115                                                                 (LPVOID) childHandleCopy, 0, NULL);
3116         if (!waiterThread)
3117                 ereport(FATAL,
3118                    (errmsg_internal("could not create sigchld waiter thread: error code %d",
3119                                                         (int) GetLastError())));
3120         CloseHandle(waiterThread);
3121
3122         if (IsUnderPostmaster)
3123                 CloseHandle(pi.hProcess);
3124         CloseHandle(pi.hThread);
3125
3126         return pi.dwProcessId;
3127 }
3128
3129 #endif /* WIN32 */
3130
3131
3132 /*
3133  * SubPostmasterMain -- Get the fork/exec'd process into a state equivalent
3134  *                      to what it would be if we'd simply forked on Unix, and then
3135  *                      dispatch to the appropriate place.
3136  *
3137  * The first two command line arguments are expected to be "-forkFOO"
3138  * (where FOO indicates which postmaster child we are to become), and
3139  * the name of a variables file that we can read to load data that would
3140  * have been inherited by fork() on Unix.  Remaining arguments go to the
3141  * subprocess FooMain() routine.
3142  */
3143 int
3144 SubPostmasterMain(int argc, char *argv[])
3145 {
3146         Port            port;
3147
3148         /* Do this sooner rather than later... */
3149         IsUnderPostmaster = true;       /* we are a postmaster subprocess now */
3150
3151         MyProcPid = getpid();           /* reset MyProcPid */
3152
3153         /* In EXEC_BACKEND case we will not have inherited these settings */
3154         IsPostmasterEnvironment = true;
3155         whereToSendOutput = None;
3156
3157         /* Setup essential subsystems (to ensure elog() behaves sanely) */
3158         MemoryContextInit();
3159         InitializeGUCOptions();
3160
3161         /* Read in the variables file */
3162         memset(&port, 0, sizeof(Port));
3163         read_backend_variables(argv[2], &port);
3164
3165         /* Check we got appropriate args */
3166         if (argc < 3)
3167                 elog(FATAL, "invalid subpostmaster invocation");
3168
3169         /*
3170          * If appropriate, physically re-attach to shared memory segment.
3171          * We want to do this before going any further to ensure that we
3172          * can attach at the same address the postmaster used.
3173          */
3174         if (strcmp(argv[1], "-forkbackend") == 0 ||
3175                 strcmp(argv[1], "-forkboot") == 0)
3176                 PGSharedMemoryReAttach();
3177
3178         /*
3179          * Start our win32 signal implementation. This has to be done
3180          * after we read the backend variables, because we need to pick
3181          * up the signal pipe from the parent process.
3182          */
3183 #ifdef WIN32
3184         pgwin32_signal_initialize();
3185 #endif
3186
3187         /* In EXEC_BACKEND case we will not have inherited these settings */
3188         pqinitmask();
3189         PG_SETMASK(&BlockSig);
3190
3191         /* Read in remaining GUC variables */
3192         read_nondefault_variables();
3193
3194         /* Run backend or appropriate child */
3195         if (strcmp(argv[1], "-forkbackend") == 0)
3196         {
3197                 /* BackendRun will close sockets */
3198
3199                 /* Attach process to shared data structures */
3200                 CreateSharedMemoryAndSemaphores(false, MaxBackends, 0);
3201
3202 #ifdef USE_SSL
3203                 /*
3204                  *      Need to reinitialize the SSL library in the backend,
3205                  *      since the context structures contain function pointers
3206                  *      and cannot be passed through the parameter file.
3207                  */
3208                 if (EnableSSL)
3209                         secure_initialize();
3210 #endif
3211
3212                 Assert(argc == 3);              /* shouldn't be any more args */
3213                 proc_exit(BackendRun(&port));
3214         }
3215         if (strcmp(argv[1], "-forkboot") == 0)
3216         {
3217                 /* Close the postmaster's sockets */
3218                 ClosePostmasterPorts(false);
3219
3220                 /* Attach process to shared data structures */
3221                 CreateSharedMemoryAndSemaphores(false, MaxBackends, 0);
3222
3223                 BootstrapMain(argc - 2, argv + 2);
3224                 proc_exit(0);
3225         }
3226         if (strcmp(argv[1], "-forkarch") == 0)
3227         {
3228                 /* Close the postmaster's sockets */
3229                 ClosePostmasterPorts(false);
3230
3231                 /* Do not want to attach to shared memory */
3232
3233                 PgArchiverMain(argc, argv);
3234                 proc_exit(0);
3235         }
3236         if (strcmp(argv[1], "-forkbuf") == 0)
3237         {
3238                 /* Close the postmaster's sockets */
3239                 ClosePostmasterPorts(false);
3240
3241                 /* Do not want to attach to shared memory */
3242
3243                 PgstatBufferMain(argc, argv);
3244                 proc_exit(0);
3245         }
3246         if (strcmp(argv[1], "-forkcol") == 0)
3247         {
3248                 /*
3249                  * Do NOT close postmaster sockets here, because we are forking
3250                  * from pgstat buffer process, which already did it.
3251                  */
3252
3253                 /* Do not want to attach to shared memory */
3254
3255                 PgstatCollectorMain(argc, argv);
3256                 proc_exit(0);
3257         }
3258         if (strcmp(argv[1], "-forklog") == 0)
3259         {
3260                 /* Close the postmaster's sockets */
3261                 ClosePostmasterPorts(true);
3262
3263                 /* Do not want to attach to shared memory */
3264
3265                 SysLoggerMain(argc, argv);
3266                 proc_exit(0);
3267         }
3268
3269         return 1;                                       /* shouldn't get here */
3270 }
3271
3272 #endif   /* EXEC_BACKEND */
3273
3274
3275 /*
3276  * ExitPostmaster -- cleanup
3277  *
3278  * Do NOT call exit() directly --- always go through here!
3279  */
3280 static void
3281 ExitPostmaster(int status)
3282 {
3283         /* should cleanup shared memory and kill all backends */
3284
3285         /*
3286          * Not sure of the semantics here.      When the Postmaster dies, should
3287          * the backends all be killed? probably not.
3288          *
3289          * MUST         -- vadim 05-10-1999
3290          */
3291
3292         proc_exit(status);
3293 }
3294
3295 /*
3296  * sigusr1_handler - handle signal conditions from child processes
3297  */
3298 static void
3299 sigusr1_handler(SIGNAL_ARGS)
3300 {
3301         int                     save_errno = errno;
3302
3303         PG_SETMASK(&BlockSig);
3304
3305         if (CheckPostmasterSignal(PMSIGNAL_PASSWORD_CHANGE))
3306         {
3307                 /*
3308                  * Password or group file has changed.
3309                  */
3310                 load_user();
3311                 load_group();
3312         }
3313
3314         if (CheckPostmasterSignal(PMSIGNAL_WAKEN_CHILDREN))
3315         {
3316                 /*
3317                  * Send SIGUSR1 to all children (triggers
3318                  * CatchupInterruptHandler). See storage/ipc/sinval[adt].c for the
3319                  * use of this.
3320                  */
3321                 if (Shutdown <= SmartShutdown)
3322                         SignalChildren(SIGUSR1);
3323         }
3324
3325         if (PgArchPID != 0 && Shutdown == NoShutdown)
3326         {
3327                 if (CheckPostmasterSignal(PMSIGNAL_WAKEN_ARCHIVER))
3328                 {
3329                         /*
3330                          * Send SIGUSR1 to archiver process, to wake it up and begin
3331                          * archiving next transaction log file.
3332                          */
3333                         kill(PgArchPID, SIGUSR1);
3334                 }
3335         }
3336
3337         PG_SETMASK(&UnBlockSig);
3338
3339         errno = save_errno;
3340 }
3341
3342
3343 /*
3344  * Dummy signal handler
3345  *
3346  * We use this for signals that we don't actually use in the postmaster,
3347  * but we do use in backends.  If we were to SIG_IGN such signals in the
3348  * postmaster, then a newly started backend might drop a signal that arrives
3349  * before it's able to reconfigure its signal processing.  (See notes in
3350  * tcop/postgres.c.)
3351  */
3352 static void
3353 dummy_handler(SIGNAL_ARGS)
3354 {
3355 }
3356
3357
3358 /*
3359  * CharRemap: given an int in range 0..61, produce textual encoding of it
3360  * per crypt(3) conventions.
3361  */
3362 static char
3363 CharRemap(long ch)
3364 {
3365         if (ch < 0)
3366                 ch = -ch;
3367         ch = ch % 62;
3368
3369         if (ch < 26)
3370                 return 'A' + ch;
3371
3372         ch -= 26;
3373         if (ch < 26)
3374                 return 'a' + ch;
3375
3376         ch -= 26;
3377         return '0' + ch;
3378 }
3379
3380 /*
3381  * RandomSalt
3382  */
3383 static void
3384 RandomSalt(char *cryptSalt, char *md5Salt)
3385 {
3386         long            rand = PostmasterRandom();
3387
3388         cryptSalt[0] = CharRemap(rand % 62);
3389         cryptSalt[1] = CharRemap(rand / 62);
3390
3391         /*
3392          * It's okay to reuse the first random value for one of the MD5 salt
3393          * bytes, since only one of the two salts will be sent to the client.
3394          * After that we need to compute more random bits.
3395          *
3396          * We use % 255, sacrificing one possible byte value, so as to ensure
3397          * that all bits of the random() value participate in the result.
3398          * While at it, add one to avoid generating any null bytes.
3399          */
3400         md5Salt[0] = (rand % 255) + 1;
3401         rand = PostmasterRandom();
3402         md5Salt[1] = (rand % 255) + 1;
3403         rand = PostmasterRandom();
3404         md5Salt[2] = (rand % 255) + 1;
3405         rand = PostmasterRandom();
3406         md5Salt[3] = (rand % 255) + 1;
3407 }
3408
3409 /*
3410  * PostmasterRandom
3411  */
3412 static long
3413 PostmasterRandom(void)
3414 {
3415         static bool initialized = false;
3416
3417         if (!initialized)
3418         {
3419                 Assert(random_seed != 0);
3420                 srandom(random_seed);
3421                 initialized = true;
3422         }
3423
3424         return random();
3425 }
3426
3427 /*
3428  * Count up number of child processes (regular backends only)
3429  */
3430 static int
3431 CountChildren(void)
3432 {
3433         Dlelem     *curr;
3434         int                     cnt = 0;
3435
3436         for (curr = DLGetHead(BackendList); curr; curr = DLGetSucc(curr))
3437                 cnt++;
3438         return cnt;
3439 }
3440
3441
3442 /*
3443  * StartChildProcess -- start a non-backend child process for the postmaster
3444  *
3445  * xlog determines what kind of child will be started.  All child types
3446  * initially go to BootstrapMain, which will handle common setup.
3447  *
3448  * Return value of StartChildProcess is subprocess' PID, or 0 if failed
3449  * to start subprocess.
3450  */
3451 static pid_t
3452 StartChildProcess(int xlop)
3453 {
3454         pid_t           pid;
3455         char       *av[10];
3456         int                     ac = 0;
3457         char            xlbuf[32];
3458
3459 #ifdef LINUX_PROFILE
3460         struct itimerval prof_itimer;
3461 #endif
3462
3463         /*
3464          * Set up command-line arguments for subprocess
3465          */
3466         av[ac++] = "postgres";
3467
3468 #ifdef EXEC_BACKEND
3469         av[ac++] = "-forkboot";
3470         av[ac++] = NULL;                        /* filled in by postmaster_forkexec */
3471 #endif
3472
3473         snprintf(xlbuf, sizeof(xlbuf), "-x%d", xlop);
3474         av[ac++] = xlbuf;
3475
3476         av[ac++] = "-p";
3477         av[ac++] = "template1";
3478
3479         av[ac] = NULL;
3480         Assert(ac < lengthof(av));
3481
3482         /*
3483          * Flush stdio channels (see comments in BackendStartup)
3484          */
3485         fflush(stdout);
3486         fflush(stderr);
3487
3488 #ifdef EXEC_BACKEND
3489
3490         pid = postmaster_forkexec(ac, av);
3491
3492 #else                                                   /* !EXEC_BACKEND */
3493
3494 #ifdef LINUX_PROFILE
3495         /* see comments in BackendStartup */
3496         getitimer(ITIMER_PROF, &prof_itimer);
3497 #endif
3498
3499 #ifdef __BEOS__
3500         /* Specific beos actions before backend startup */
3501         beos_before_backend_startup();
3502 #endif
3503
3504         pid = fork();
3505
3506         if (pid == 0)                           /* child */
3507         {
3508 #ifdef LINUX_PROFILE
3509                 setitimer(ITIMER_PROF, &prof_itimer, NULL);
3510 #endif
3511
3512 #ifdef __BEOS__
3513                 /* Specific beos actions after backend startup */
3514                 beos_backend_startup();
3515 #endif
3516
3517                 IsUnderPostmaster = true;               /* we are a postmaster subprocess
3518                                                                                  * now */
3519
3520                 /* Close the postmaster's sockets */
3521                 ClosePostmasterPorts(false);
3522
3523                 /* Lose the postmaster's on-exit routines and port connections */
3524                 on_exit_reset();
3525
3526                 /* Release postmaster's working memory context */
3527                 MemoryContextSwitchTo(TopMemoryContext);
3528                 MemoryContextDelete(PostmasterContext);
3529                 PostmasterContext = NULL;
3530
3531                 BootstrapMain(ac, av);
3532                 ExitPostmaster(0);
3533         }
3534 #endif   /* EXEC_BACKEND */
3535
3536         if (pid < 0)
3537         {
3538                 /* in parent, fork failed */
3539                 int                     save_errno = errno;
3540
3541 #ifdef __BEOS__
3542                 /* Specific beos actions before backend startup */
3543                 beos_backend_startup_failed();
3544 #endif
3545                 errno = save_errno;
3546                 switch (xlop)
3547                 {
3548                         case BS_XLOG_STARTUP:
3549                                 ereport(LOG,
3550                                                 (errmsg("could not fork startup process: %m")));
3551                                 break;
3552                         case BS_XLOG_BGWRITER:
3553                                 ereport(LOG,
3554                                 (errmsg("could not fork background writer process: %m")));
3555                                 break;
3556                         default:
3557                                 ereport(LOG,
3558                                                 (errmsg("could not fork process: %m")));
3559                                 break;
3560                 }
3561
3562                 /*
3563                  * fork failure is fatal during startup, but there's no need to
3564                  * choke immediately if starting other child types fails.
3565                  */
3566                 if (xlop == BS_XLOG_STARTUP)
3567                         ExitPostmaster(1);
3568                 return 0;
3569         }
3570
3571         /*
3572          * in parent, successful fork
3573          */
3574         return pid;
3575 }
3576
3577
3578 /*
3579  * Create the opts file
3580  */
3581 static bool
3582 CreateOptsFile(int argc, char *argv[], char *fullprogname)
3583 {
3584         char            filename[MAXPGPATH];
3585         FILE       *fp;
3586         int                     i;
3587
3588         snprintf(filename, sizeof(filename), "%s/postmaster.opts", DataDir);
3589
3590         if ((fp = fopen(filename, "w")) == NULL)
3591         {
3592                 elog(LOG, "could not create file \"%s\": %m", filename);
3593                 return false;
3594         }
3595
3596         fprintf(fp, "%s", fullprogname);
3597         for (i = 1; i < argc; i++)
3598                 fprintf(fp, " %s%s%s", SYSTEMQUOTE, argv[i], SYSTEMQUOTE);
3599         fputs("\n", fp);
3600
3601         if (fclose(fp))
3602         {
3603                 elog(LOG, "could not write file \"%s\": %m", filename);
3604                 return false;
3605         }
3606
3607         return true;
3608 }
3609
3610
3611 #ifdef EXEC_BACKEND
3612
3613 /*
3614  * The following need to be available to the save/restore_backend_variables
3615  * functions
3616  */
3617 extern slock_t *ShmemLock;
3618 extern slock_t *ShmemIndexLock;
3619 extern void *ShmemIndexAlloc;
3620 extern LWLock *LWLockArray;
3621 extern slock_t *ProcStructLock;
3622 extern int      pgStatSock;
3623 extern int pgStatPipe[2];
3624
3625 #ifndef WIN32
3626 #define write_inheritable_socket(dest, src, childpid) (*(dest) = (src))
3627 #define read_inheritable_socket(dest, src) (*(dest) = *(src))
3628 #else
3629 static void write_duplicated_handle(HANDLE *dest, HANDLE src, HANDLE child);
3630 static void write_inheritable_socket(InheritableSocket *dest, SOCKET src,
3631                                                                          pid_t childPid);
3632 static void read_inheritable_socket(SOCKET *dest, InheritableSocket *src);
3633 #endif
3634
3635
3636 /* Save critical backend variables into the BackendParameters struct */
3637 #ifndef WIN32
3638 static bool
3639 save_backend_variables(BackendParameters *param, Port *port)
3640 #else
3641 static bool
3642 save_backend_variables(BackendParameters *param, Port *port,
3643                                            HANDLE childProcess, pid_t childPid)
3644 #endif
3645 {
3646         memcpy(&param->port, port, sizeof(Port));
3647         write_inheritable_socket(&param->portsocket, port->sock, childPid);
3648
3649         StrNCpy(param->DataDir, DataDir, MAXPGPATH);
3650
3651         memcpy(&param->ListenSocket, &ListenSocket, sizeof(ListenSocket));
3652
3653         param->MyCancelKey = MyCancelKey;
3654
3655         param->UsedShmemSegID = UsedShmemSegID;
3656         param->UsedShmemSegAddr = UsedShmemSegAddr;
3657
3658         param->ShmemLock = ShmemLock;
3659         param->ShmemIndexLock = ShmemIndexLock;
3660         param->ShmemVariableCache = ShmemVariableCache;
3661         param->ShmemIndexAlloc = ShmemIndexAlloc;
3662         param->ShmemBackendArray = ShmemBackendArray;
3663
3664         param->LWLockArray = LWLockArray;
3665         param->ProcStructLock = ProcStructLock;
3666         write_inheritable_socket(&param->pgStatSock, pgStatSock, childPid);
3667         write_inheritable_socket(&param->pgStatPipe0, pgStatPipe[0], childPid);
3668         write_inheritable_socket(&param->pgStatPipe1, pgStatPipe[1], childPid);
3669
3670         param->PostmasterPid = PostmasterPid;
3671
3672 #ifdef WIN32
3673         param->PostmasterHandle = PostmasterHandle;
3674         write_duplicated_handle(&param->initial_signal_pipe,
3675                                                         pgwin32_create_signal_listener(childPid),
3676                                                         childProcess);
3677 #endif
3678
3679         memcpy(&param->syslogPipe, &syslogPipe, sizeof(syslogPipe));
3680
3681         StrNCpy(param->my_exec_path, my_exec_path, MAXPGPATH);
3682
3683         StrNCpy(param->ExtraOptions, ExtraOptions, MAXPGPATH);
3684
3685         StrNCpy(param->lc_collate, setlocale(LC_COLLATE, NULL), MAXPGPATH);
3686         StrNCpy(param->lc_ctype, setlocale(LC_CTYPE, NULL), MAXPGPATH);
3687
3688         return true;
3689 }
3690
3691
3692 #ifdef WIN32
3693 /*
3694  * Duplicate a handle for usage in a child process, and write the child
3695  * process instance of the handle to the parameter file.
3696  */
3697 static void
3698 write_duplicated_handle(HANDLE *dest, HANDLE src, HANDLE childProcess)
3699 {
3700         HANDLE hChild = INVALID_HANDLE_VALUE;
3701
3702         if (!DuplicateHandle(GetCurrentProcess(),
3703                                                  src,
3704                                                  childProcess,
3705                                                  &hChild,
3706                                                  0,
3707                                                  TRUE,
3708                                                  DUPLICATE_CLOSE_SOURCE | DUPLICATE_SAME_ACCESS))
3709                 ereport(ERROR,
3710                                 (errmsg_internal("could not duplicate handle to be written to backend parameter file: error code %d",
3711                                                                  (int) GetLastError())));
3712
3713         *dest = hChild;
3714 }
3715
3716 /*
3717  * Duplicate a socket for usage in a child process, and write the resulting
3718  * structure to the parameter file.
3719  * This is required because a number of LSPs (Layered Service Providers) very
3720  * common on Windows (antivirus, firewalls, download managers etc) break
3721  * straight socket inheritance.
3722  */
3723 static void
3724 write_inheritable_socket(InheritableSocket *dest, SOCKET src, pid_t childpid)
3725 {
3726         dest->origsocket = src;
3727         if (src != 0 && src != -1)
3728         {
3729                 /* Actual socket */
3730                 if (WSADuplicateSocket(src, childpid, &dest->wsainfo) != 0)
3731                         ereport(ERROR,
3732                                         (errmsg("could not duplicate socket %d for use in backend: error code %d",
3733                                                         src, WSAGetLastError())));
3734         }
3735 }
3736
3737 /*
3738  * Read a duplicate socket structure back, and get the socket descriptor.
3739  */
3740 static void
3741 read_inheritable_socket(SOCKET *dest, InheritableSocket *src)
3742 {
3743         SOCKET s;
3744
3745         if (src->origsocket == -1  || src->origsocket == 0)
3746         {
3747                 /* Not a real socket! */
3748                 *dest = src->origsocket;
3749         }
3750         else
3751         {
3752                 /* Actual socket, so create from structure */
3753                 s = WSASocket(FROM_PROTOCOL_INFO,
3754                                           FROM_PROTOCOL_INFO,
3755                                           FROM_PROTOCOL_INFO,
3756                                           &src->wsainfo,
3757                                           0,
3758                                           0);
3759                 if (s == INVALID_SOCKET)
3760                 {
3761                         write_stderr("could not create inherited socket: error code %d\n",
3762                                                  WSAGetLastError());
3763                         exit(1);
3764                 }
3765                 *dest = s;
3766
3767                 /*
3768                  * To make sure we don't get two references to the same socket,
3769                  * close the original one. (This would happen when inheritance
3770                  * actually works..
3771                  */
3772                 closesocket(src->origsocket);
3773         }
3774 }
3775 #endif
3776
3777 static void
3778 read_backend_variables(char *id, Port *port)
3779 {
3780         BackendParameters param;
3781
3782 #ifndef WIN32
3783         /* Non-win32 implementation reads from file */
3784         FILE *fp;
3785
3786         /* Open file */
3787         fp = AllocateFile(id, PG_BINARY_R);
3788         if (!fp)
3789         {
3790                 write_stderr("could not read from backend variables file \"%s\": %s\n",
3791                                          id, strerror(errno));
3792                 exit(1);
3793         }
3794
3795         if (fread(&param, sizeof(param), 1, fp) != 1)
3796         {
3797                 write_stderr("could not read from backend variables file \"%s\": %s\n",
3798                                          id, strerror(errno));
3799                 exit(1);
3800         }
3801
3802         /* Release file */
3803         FreeFile(fp);
3804         if (unlink(id) != 0)
3805         {
3806                 write_stderr("could not remove file \"%s\": %s\n",
3807                                          id, strerror(errno));
3808                 exit(1);
3809         }
3810 #else
3811         /* Win32 version uses mapped file */
3812         HANDLE paramHandle;
3813         BackendParameters *paramp;
3814
3815         paramHandle = (HANDLE)atol(id);
3816         paramp = MapViewOfFile(paramHandle, FILE_MAP_READ, 0, 0, 0);
3817         if (!paramp)
3818         {
3819                 write_stderr("could not map view of backend variables: error code %d\n",
3820                                          (int) GetLastError());
3821                 exit(1);
3822         }
3823
3824         memcpy(&param, paramp, sizeof(BackendParameters));
3825
3826         if (!UnmapViewOfFile(paramp))
3827         {
3828                 write_stderr("could not unmap view of backend variables: error code %d\n",
3829                                          (int) GetLastError());
3830                 exit(1);
3831         }
3832
3833         if (!CloseHandle(paramHandle))
3834         {
3835                 write_stderr("could not close handle to backend parameter variables: error code %d\n",
3836                                          (int) GetLastError());
3837                 exit(1);
3838         }
3839 #endif
3840
3841         restore_backend_variables(&param, port);
3842 }
3843
3844 /* Restore critical backend variables from the BackendParameters struct */
3845 static void
3846 restore_backend_variables(BackendParameters *param, Port *port)
3847 {
3848         memcpy(port, &param->port, sizeof(Port));
3849         read_inheritable_socket(&port->sock, &param->portsocket);
3850
3851         SetDataDir(param->DataDir);
3852
3853         memcpy(&ListenSocket, &param->ListenSocket, sizeof(ListenSocket));
3854
3855         MyCancelKey = param->MyCancelKey;
3856
3857         UsedShmemSegID = param->UsedShmemSegID;
3858         UsedShmemSegAddr = param->UsedShmemSegAddr;
3859
3860         ShmemLock = param->ShmemLock;
3861         ShmemIndexLock = param->ShmemIndexLock;
3862         ShmemVariableCache = param->ShmemVariableCache;
3863         ShmemIndexAlloc = param->ShmemIndexAlloc;
3864         ShmemBackendArray = param->ShmemBackendArray;
3865
3866         LWLockArray = param->LWLockArray;
3867         ProcStructLock = param->ProcStructLock;
3868         read_inheritable_socket(&pgStatSock, &param->pgStatSock);
3869         read_inheritable_socket(&pgStatPipe[0], &param->pgStatPipe0);
3870         read_inheritable_socket(&pgStatPipe[1], &param->pgStatPipe1);
3871
3872         PostmasterPid = param->PostmasterPid;
3873
3874 #ifdef WIN32
3875         PostmasterHandle = param->PostmasterHandle;
3876         pgwin32_initial_signal_pipe = param->initial_signal_pipe;
3877 #endif
3878
3879         memcpy(&syslogPipe, &param->syslogPipe, sizeof(syslogPipe));
3880
3881         StrNCpy(my_exec_path, param->my_exec_path, MAXPGPATH);
3882
3883         StrNCpy(ExtraOptions, param->ExtraOptions, MAXPGPATH);
3884
3885         setlocale(LC_COLLATE, param->lc_collate);
3886         setlocale(LC_CTYPE, param->lc_ctype);
3887 }
3888
3889
3890 size_t
3891 ShmemBackendArraySize(void)
3892 {
3893         return (NUM_BACKENDARRAY_ELEMS * sizeof(Backend));
3894 }
3895
3896 void
3897 ShmemBackendArrayAllocation(void)
3898 {
3899         size_t          size = ShmemBackendArraySize();
3900
3901         ShmemBackendArray = (Backend *) ShmemAlloc(size);
3902         /* Mark all slots as empty */
3903         memset(ShmemBackendArray, 0, size);
3904 }
3905
3906 static void
3907 ShmemBackendArrayAdd(Backend *bn)
3908 {
3909         int                     i;
3910
3911         /* Find an empty slot */
3912         for (i = 0; i < NUM_BACKENDARRAY_ELEMS; i++)
3913         {
3914                 if (ShmemBackendArray[i].pid == 0)
3915                 {
3916                         ShmemBackendArray[i] = *bn;
3917                         return;
3918                 }
3919         }
3920
3921         ereport(FATAL,
3922                         (errmsg_internal("no free slots in shmem backend array")));
3923 }
3924
3925 static void
3926 ShmemBackendArrayRemove(pid_t pid)
3927 {
3928         int                     i;
3929
3930         for (i = 0; i < NUM_BACKENDARRAY_ELEMS; i++)
3931         {
3932                 if (ShmemBackendArray[i].pid == pid)
3933                 {
3934                         /* Mark the slot as empty */
3935                         ShmemBackendArray[i].pid = 0;
3936                         return;
3937                 }
3938         }
3939
3940         ereport(WARNING,
3941                         (errmsg_internal("could not find backend entry with pid %d",
3942                                                          (int) pid)));
3943 }
3944
3945 #endif   /* EXEC_BACKEND */
3946
3947
3948 #ifdef WIN32
3949
3950 /*
3951  * Note: The following three functions must not be interrupted (eg. by
3952  * signals).  As the Postgres Win32 signalling architecture (currently)
3953  * requires polling, or APC checking functions which aren't used here, this
3954  * is not an issue.
3955  *
3956  * We keep two separate arrays, instead of a single array of pid/HANDLE
3957  * structs, to avoid having to re-create a handle array for
3958  * WaitForMultipleObjects on each call to win32_waitpid.
3959  */
3960
3961 static void
3962 win32_AddChild(pid_t pid, HANDLE handle)
3963 {
3964         Assert(win32_childPIDArray && win32_childHNDArray);
3965         if (win32_numChildren < NUM_BACKENDARRAY_ELEMS)
3966         {
3967                 win32_childPIDArray[win32_numChildren] = pid;
3968                 win32_childHNDArray[win32_numChildren] = handle;
3969                 ++win32_numChildren;
3970         }
3971         else
3972                 ereport(FATAL,
3973                                 (errmsg_internal("no room for child entry with pid %lu",
3974                                                                  (unsigned long) pid)));
3975 }
3976
3977 static void
3978 win32_RemoveChild(pid_t pid)
3979 {
3980         int                     i;
3981
3982         Assert(win32_childPIDArray && win32_childHNDArray);
3983
3984         for (i = 0; i < win32_numChildren; i++)
3985         {
3986                 if (win32_childPIDArray[i] == pid)
3987                 {
3988                         CloseHandle(win32_childHNDArray[i]);
3989
3990                         /* Swap last entry into the "removed" one */
3991                         --win32_numChildren;
3992                         win32_childPIDArray[i] = win32_childPIDArray[win32_numChildren];
3993                         win32_childHNDArray[i] = win32_childHNDArray[win32_numChildren];
3994                         return;
3995                 }
3996         }
3997
3998         ereport(WARNING,
3999                         (errmsg_internal("could not find child entry with pid %lu",
4000                                                          (unsigned long) pid)));
4001 }
4002
4003 static pid_t
4004 win32_waitpid(int *exitstatus)
4005 {
4006         /*
4007          * Note: Do NOT use WaitForMultipleObjectsEx, as we don't want to run
4008          * queued APCs here.
4009          */
4010         int                     index;
4011         DWORD           exitCode;
4012         DWORD           ret;
4013         unsigned long offset;
4014
4015         Assert(win32_childPIDArray && win32_childHNDArray);
4016         elog(DEBUG3, "waiting on %lu children", win32_numChildren);
4017
4018         for (offset = 0; offset < win32_numChildren; offset += MAXIMUM_WAIT_OBJECTS)
4019         {
4020                 unsigned long num = Min(MAXIMUM_WAIT_OBJECTS, win32_numChildren - offset);
4021
4022                 ret = WaitForMultipleObjects(num, &win32_childHNDArray[offset], FALSE, 0);
4023                 switch (ret)
4024                 {
4025                         case WAIT_FAILED:
4026                                 ereport(LOG,
4027                                                 (errmsg_internal("failed to wait on %lu of %lu children: error code %d",
4028                                                  num, win32_numChildren, (int) GetLastError())));
4029                                 return -1;
4030
4031                         case WAIT_TIMEOUT:
4032                                 /* No children (in this chunk) have finished */
4033                                 break;
4034
4035                         default:
4036
4037                                 /*
4038                                  * Get the exit code, and return the PID of, the
4039                                  * respective process
4040                                  */
4041                                 index = offset + ret - WAIT_OBJECT_0;
4042                                 Assert(index >= 0 && index < win32_numChildren);
4043                                 if (!GetExitCodeProcess(win32_childHNDArray[index], &exitCode))
4044                                 {
4045                                         /*
4046                                          * If we get this far, this should never happen, but,
4047                                          * then again... No choice other than to assume a
4048                                          * catastrophic failure.
4049                                          */
4050                                         ereport(FATAL,
4051                                                         (errmsg_internal("failed to get exit code for child %lu",
4052                                                                                          (unsigned long) win32_childPIDArray[index])));
4053                                 }
4054                                 *exitstatus = (int) exitCode;
4055                                 return win32_childPIDArray[index];
4056                 }
4057         }
4058
4059         /* No children have finished */
4060         return -1;
4061 }
4062
4063 /*
4064  * Note! Code below executes on separate threads, one for
4065  * each child process created
4066  */
4067 static DWORD WINAPI
4068 win32_sigchld_waiter(LPVOID param)
4069 {
4070         HANDLE          procHandle = (HANDLE) param;
4071
4072         DWORD           r = WaitForSingleObject(procHandle, INFINITE);
4073
4074         if (r == WAIT_OBJECT_0)
4075                 pg_queue_signal(SIGCHLD);
4076         else
4077                 write_stderr("could not wait on child process handle: error code %d\n",
4078                                          (int) GetLastError());
4079         CloseHandle(procHandle);
4080         return 0;
4081 }
4082
4083 #endif   /* WIN32 */