]> granicus.if.org Git - postgresql/blob - src/backend/postmaster/postmaster.c
>> It certainly doesn't. There still was a bug with the locale stuff,
[postgresql] / src / backend / postmaster / postmaster.c
1 /*-------------------------------------------------------------------------
2  *
3  * postmaster.c
4  *        This program acts as a clearing house for requests to the
5  *        POSTGRES system.      Frontend programs send a startup message
6  *        to the Postmaster and the postmaster uses the info in the
7  *        message to setup a backend process.
8  *
9  *        The postmaster also manages system-wide operations such as
10  *        startup and shutdown. The postmaster itself doesn't do those
11  *        operations, mind you --- it just forks off a subprocess to do them
12  *        at the right times.  It also takes care of resetting the system
13  *        if a backend crashes.
14  *
15  *        The postmaster process creates the shared memory and semaphore
16  *        pools during startup, but as a rule does not touch them itself.
17  *        In particular, it is not a member of the PGPROC array of backends
18  *        and so it cannot participate in lock-manager operations.      Keeping
19  *        the postmaster away from shared memory operations makes it simpler
20  *        and more reliable.  The postmaster is almost always able to recover
21  *        from crashes of individual backends by resetting shared memory;
22  *        if it did much with shared memory then it would be prone to crashing
23  *        along with the backends.
24  *
25  *        When a request message is received, we now fork() immediately.
26  *        The child process performs authentication of the request, and
27  *        then becomes a backend if successful.  This allows the auth code
28  *        to be written in a simple single-threaded style (as opposed to the
29  *        crufty "poor man's multitasking" code that used to be needed).
30  *        More importantly, it ensures that blockages in non-multithreaded
31  *        libraries like SSL or PAM cannot cause denial of service to other
32  *        clients.
33  *
34  *
35  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
36  * Portions Copyright (c) 1994, Regents of the University of California
37  *
38  *
39  * IDENTIFICATION
40  *        $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.403 2004/06/11 03:54:43 momjian Exp $
41  *
42  * NOTES
43  *
44  * Initialization:
45  *              The Postmaster sets up shared memory data structures
46  *              for the backends.
47  *
48  * Synchronization:
49  *              The Postmaster shares memory with the backends but should avoid
50  *              touching shared memory, so as not to become stuck if a crashing
51  *              backend screws up locks or shared memory.  Likewise, the Postmaster
52  *              should never block on messages from frontend clients.
53  *
54  * Garbage Collection:
55  *              The Postmaster cleans up after backends if they have an emergency
56  *              exit and/or core dump.
57  *
58  *-------------------------------------------------------------------------
59  */
60
61 #include "postgres.h"
62
63 #include <unistd.h>
64 #include <signal.h>
65 #include <time.h>
66 #include <sys/wait.h>
67 #include <ctype.h>
68 #include <sys/stat.h>
69 #include <sys/socket.h>
70 #include <errno.h>
71 #include <fcntl.h>
72 #include <sys/param.h>
73 #include <netinet/in.h>
74 #include <arpa/inet.h>
75 #include <netdb.h>
76 #include <limits.h>
77
78 #ifdef HAVE_SYS_SELECT_H
79 #include <sys/select.h>
80 #endif
81
82 #ifdef HAVE_GETOPT_H
83 #include <getopt.h>
84 #endif
85
86 #ifdef USE_RENDEZVOUS
87 #include <DNSServiceDiscovery/DNSServiceDiscovery.h>
88 #endif
89
90 #include "catalog/pg_database.h"
91 #include "commands/async.h"
92 #include "lib/dllist.h"
93 #include "libpq/auth.h"
94 #include "libpq/crypt.h"
95 #include "libpq/libpq.h"
96 #include "libpq/pqcomm.h"
97 #include "libpq/pqsignal.h"
98 #include "miscadmin.h"
99 #include "nodes/nodes.h"
100 #include "postmaster/postmaster.h"
101 #include "storage/fd.h"
102 #include "storage/ipc.h"
103 #include "storage/pg_shmem.h"
104 #include "storage/pmsignal.h"
105 #include "storage/proc.h"
106 #include "storage/bufmgr.h"
107 #include "access/xlog.h"
108 #include "tcop/tcopprot.h"
109 #include "utils/guc.h"
110 #include "utils/memutils.h"
111 #include "utils/ps_status.h"
112 #include "bootstrap/bootstrap.h"
113 #include "pgstat.h"
114
115
116 /*
117  * List of active backends (or child processes anyway; we don't actually
118  * know whether a given child has become a backend or is still in the
119  * authorization phase).  This is used mainly to keep track of how many
120  * children we have and send them appropriate signals when necessary.
121  *
122  * "Special" children such as the startup and bgwriter tasks are not in
123  * this list.
124  */
125 typedef struct bkend
126 {
127         pid_t           pid;                    /* process id of backend */
128         long            cancel_key;             /* cancel key for cancels for this backend */
129 } Backend;
130
131 static Dllist *BackendList;
132
133 #ifdef EXEC_BACKEND
134 #define NUM_BACKENDARRAY_ELEMS (2*MaxBackends)
135 static Backend *ShmemBackendArray;
136 #endif
137
138 /* The socket number we are listening for connections on */
139 int                     PostPortNumber;
140 char       *UnixSocketDir;
141 char       *ListenAddresses;
142
143 /*
144  * ReservedBackends is the number of backends reserved for superuser use.
145  * This number is taken out of the pool size given by MaxBackends so
146  * number of backend slots available to non-superusers is
147  * (MaxBackends - ReservedBackends).  Note what this really means is
148  * "if there are <= ReservedBackends connections available, only superusers
149  * can make new connections" --- pre-existing superuser connections don't
150  * count against the limit.
151  */
152 int                     ReservedBackends;
153
154
155 static const char *progname = NULL;
156
157 /* The socket(s) we're listening to. */
158 #define MAXLISTEN       10
159 static int      ListenSocket[MAXLISTEN];
160
161 /*
162  * Set by the -o option
163  */
164 static char ExtraOptions[MAXPGPATH];
165
166 /*
167  * These globals control the behavior of the postmaster in case some
168  * backend dumps core.  Normally, it kills all peers of the dead backend
169  * and reinitializes shared memory.  By specifying -s or -n, we can have
170  * the postmaster stop (rather than kill) peers and not reinitialize
171  * shared data structures.
172  */
173 static bool Reinit = true;
174 static int      SendStop = false;
175
176 /* still more option variables */
177 bool            EnableSSL = false;
178 bool            SilentMode = false; /* silent mode (-S) */
179
180 int                     PreAuthDelay = 0;
181 int                     AuthenticationTimeout = 60;
182
183 bool            log_hostname;           /* for ps display and logging */
184 bool            Log_connections = false;
185 bool            Db_user_namespace = false;
186
187 char       *rendezvous_name;
188
189 /* list of library:init-function to be preloaded */
190 char       *preload_libraries_string = NULL;
191
192 /* PIDs of special child processes; 0 when not running */
193 static pid_t StartupPID = 0,
194                         BgWriterPID = 0;
195
196 /* Startup/shutdown state */
197 #define                 NoShutdown              0
198 #define                 SmartShutdown   1
199 #define                 FastShutdown    2
200
201 static int      Shutdown = NoShutdown;
202
203 static bool FatalError = false; /* T if recovering from backend crash */
204
205 bool            ClientAuthInProgress = false;           /* T during new-client
206                                                                                                  * authentication */
207
208 /*
209  * State for assigning random salts and cancel keys.
210  * Also, the global MyCancelKey passes the cancel key assigned to a given
211  * backend from the postmaster to that backend (via fork).
212  */
213 static unsigned int random_seed = 0;
214
215 static int      debug_flag = 0;
216
217 extern char *optarg;
218 extern int      optind,
219                         opterr;
220
221 #ifdef HAVE_INT_OPTRESET
222 extern int      optreset;
223 #endif
224
225 /*
226  * postmaster.c - function prototypes
227  */
228 static void checkDataDir(const char *checkdir);
229 #ifdef USE_RENDEZVOUS
230 static void reg_reply(DNSServiceRegistrationReplyErrorType errorCode,
231                                           void *context);
232 #endif
233 static void pmdaemonize(void);
234 static Port *ConnCreate(int serverFd);
235 static void ConnFree(Port *port);
236 static void reset_shared(unsigned short port);
237 static void SIGHUP_handler(SIGNAL_ARGS);
238 static void pmdie(SIGNAL_ARGS);
239 static void reaper(SIGNAL_ARGS);
240 static void sigusr1_handler(SIGNAL_ARGS);
241 static void dummy_handler(SIGNAL_ARGS);
242 static void CleanupProc(int pid, int exitstatus);
243 static void HandleChildCrash(int pid, int exitstatus);
244 static void LogChildExit(int lev, const char *procname,
245                          int pid, int exitstatus);
246 static int      BackendRun(Port *port);
247 static void ExitPostmaster(int status);
248 static void usage(const char *);
249 static int      ServerLoop(void);
250 static int      BackendStartup(Port *port);
251 static int      ProcessStartupPacket(Port *port, bool SSLdone);
252 static void processCancelRequest(Port *port, void *pkt);
253 static int      initMasks(fd_set *rmask);
254 static void report_fork_failure_to_client(Port *port, int errnum);
255 static enum CAC_state canAcceptConnections(void);
256 static long PostmasterRandom(void);
257 static void RandomSalt(char *cryptSalt, char *md5Salt);
258 static void SignalChildren(int signal);
259 static int      CountChildren(void);
260 static bool CreateOptsFile(int argc, char *argv[], char *fullprogname);
261 static pid_t StartChildProcess(int xlop);
262 static void
263 postmaster_error(const char *fmt,...)
264 /* This lets gcc check the format string for consistency. */
265 __attribute__((format(printf, 1, 2)));
266
267 #ifdef EXEC_BACKEND
268
269 #ifdef WIN32
270 static pid_t win32_forkexec(const char *path, char *argv[]);
271 static void win32_AddChild(pid_t pid, HANDLE handle);
272 static void win32_RemoveChild(pid_t pid);
273 static pid_t win32_waitpid(int *exitstatus);
274 static DWORD WINAPI win32_sigchld_waiter(LPVOID param);
275
276 static pid_t *win32_childPIDArray;
277 static HANDLE *win32_childHNDArray;
278 static unsigned long win32_numChildren = 0;
279
280 HANDLE PostmasterHandle;
281 #endif
282
283 static pid_t backend_forkexec(Port *port);
284 static pid_t internal_forkexec(int argc, char *argv[], Port *port);
285
286 static void read_backend_variables(char *filename, Port *port);
287 static bool write_backend_variables(char *filename, Port *port);
288
289 static void ShmemBackendArrayAdd(Backend *bn);
290 static void ShmemBackendArrayRemove(pid_t pid);
291
292 #endif /* EXEC_BACKEND */
293
294 #define StartupDataBase()               StartChildProcess(BS_XLOG_STARTUP)
295 #define StartBackgroundWriter() StartChildProcess(BS_XLOG_BGWRITER)
296
297
298 /*
299  * Postmaster main entry point
300  */
301 int
302 PostmasterMain(int argc, char *argv[])
303 {
304         int                     opt;
305         int                     status;
306         char       *potential_DataDir = NULL;
307         int                     i;
308
309         progname = get_progname(argv[0]);
310
311         MyProcPid = PostmasterPid = getpid();
312
313         IsPostmasterEnvironment = true;
314
315         /*
316          * Catch standard options before doing much else.  This even works on
317          * systems without getopt_long.
318          */
319         if (argc > 1)
320         {
321                 if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
322                 {
323                         usage(progname);
324                         ExitPostmaster(0);
325                 }
326                 if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
327                 {
328                         puts("postmaster (PostgreSQL) " PG_VERSION);
329                         ExitPostmaster(0);
330                 }
331         }
332
333         /*
334          * for security, no dir or file created can be group or other
335          * accessible
336          */
337         umask((mode_t) 0077);
338
339         /*
340          * Fire up essential subsystems: memory management
341          */
342         MemoryContextInit();
343
344         /*
345          * By default, palloc() requests in the postmaster will be allocated
346          * in the PostmasterContext, which is space that can be recycled by
347          * backends.  Allocated data that needs to be available to backends
348          * should be allocated in TopMemoryContext.
349          */
350         PostmasterContext = AllocSetContextCreate(TopMemoryContext,
351                                                                                           "Postmaster",
352                                                                                           ALLOCSET_DEFAULT_MINSIZE,
353                                                                                           ALLOCSET_DEFAULT_INITSIZE,
354                                                                                           ALLOCSET_DEFAULT_MAXSIZE);
355         MemoryContextSwitchTo(PostmasterContext);
356
357         IgnoreSystemIndexes(false);
358
359         if (find_my_exec(argv[0], my_exec_path) < 0)
360                 elog(FATAL, "%s: could not locate my own executable path",
361                          argv[0]);
362
363         get_pkglib_path(my_exec_path, pkglib_path);
364
365         /*
366          * Options setup
367          */
368         InitializeGUCOptions();
369
370         potential_DataDir = getenv("PGDATA");           /* default value */
371
372         opterr = 1;
373
374         while ((opt = getopt(argc, argv, "A:a:B:b:c:D:d:Fh:ik:lm:MN:no:p:Ss-:")) != -1)
375         {
376                 switch (opt)
377                 {
378                         case 'A':
379 #ifdef USE_ASSERT_CHECKING
380                                 SetConfigOption("debug_assertions", optarg, PGC_POSTMASTER, PGC_S_ARGV);
381 #else
382                                 postmaster_error("assert checking is not compiled in");
383 #endif
384                                 break;
385                         case 'a':
386                                 /* Can no longer set authentication method. */
387                                 break;
388                         case 'B':
389                                 SetConfigOption("shared_buffers", optarg, PGC_POSTMASTER, PGC_S_ARGV);
390                                 break;
391                         case 'b':
392                                 /* Can no longer set the backend executable file to use. */
393                                 break;
394                         case 'D':
395                                 potential_DataDir = optarg;
396                                 break;
397                         case 'd':
398                                 {
399                                         /* Turn on debugging for the postmaster. */
400                                         char       *debugstr = palloc(strlen("debug") + strlen(optarg) + 1);
401
402                                         sprintf(debugstr, "debug%s", optarg);
403                                         SetConfigOption("log_min_messages", debugstr,
404                                                                         PGC_POSTMASTER, PGC_S_ARGV);
405                                         pfree(debugstr);
406                                         debug_flag = atoi(optarg);
407                                         break;
408                                 }
409                         case 'F':
410                                 SetConfigOption("fsync", "false", PGC_POSTMASTER, PGC_S_ARGV);
411                                 break;
412                         case 'h':
413                                 SetConfigOption("listen_addresses", optarg, PGC_POSTMASTER, PGC_S_ARGV);
414                                 break;
415                         case 'i':
416                                 SetConfigOption("listen_addresses", "*", PGC_POSTMASTER, PGC_S_ARGV);
417                                 break;
418                         case 'k':
419                                 SetConfigOption("unix_socket_directory", optarg, PGC_POSTMASTER, PGC_S_ARGV);
420                                 break;
421 #ifdef USE_SSL
422                         case 'l':
423                                 SetConfigOption("ssl", "true", PGC_POSTMASTER, PGC_S_ARGV);
424                                 break;
425 #endif
426                         case 'm':
427                                 /* Multiplexed backends no longer supported. */
428                                 break;
429                         case 'M':
430
431                                 /*
432                                  * ignore this flag.  This may be passed in because the
433                                  * program was run as 'postgres -M' instead of
434                                  * 'postmaster'
435                                  */
436                                 break;
437                         case 'N':
438                                 /* The max number of backends to start. */
439                                 SetConfigOption("max_connections", optarg, PGC_POSTMASTER, PGC_S_ARGV);
440                                 break;
441                         case 'n':
442                                 /* Don't reinit shared mem after abnormal exit */
443                                 Reinit = false;
444                                 break;
445                         case 'o':
446
447                                 /*
448                                  * Other options to pass to the backend on the command line
449                                  */
450                                 strcat(ExtraOptions, " ");
451                                 strcat(ExtraOptions, optarg);
452                                 break;
453                         case 'p':
454                                 SetConfigOption("port", optarg, PGC_POSTMASTER, PGC_S_ARGV);
455                                 break;
456                         case 'S':
457
458                                 /*
459                                  * Start in 'S'ilent mode (disassociate from controlling
460                                  * tty). You may also think of this as 'S'ysV mode since
461                                  * it's most badly needed on SysV-derived systems like
462                                  * SVR4 and HP-UX.
463                                  */
464                                 SetConfigOption("silent_mode", "true", PGC_POSTMASTER, PGC_S_ARGV);
465                                 break;
466                         case 's':
467
468                                 /*
469                                  * In the event that some backend dumps core, send
470                                  * SIGSTOP, rather than SIGQUIT, to all its peers.      This
471                                  * lets the wily post_hacker collect core dumps from
472                                  * everyone.
473                                  */
474                                 SendStop = true;
475                                 break;
476                         case 'c':
477                         case '-':
478                                 {
479                                         char       *name,
480                                                            *value;
481
482                                         ParseLongOption(optarg, &name, &value);
483                                         if (!value)
484                                         {
485                                                 if (opt == '-')
486                                                         ereport(ERROR,
487                                                                         (errcode(ERRCODE_SYNTAX_ERROR),
488                                                                          errmsg("--%s requires a value",
489                                                                                         optarg)));
490                                                 else
491                                                         ereport(ERROR,
492                                                                         (errcode(ERRCODE_SYNTAX_ERROR),
493                                                                          errmsg("-c %s requires a value",
494                                                                                         optarg)));
495                                         }
496
497                                         SetConfigOption(name, value, PGC_POSTMASTER, PGC_S_ARGV);
498                                         free(name);
499                                         if (value)
500                                                 free(value);
501                                         break;
502                                 }
503
504                         default:
505                                 fprintf(stderr,
506                                         gettext("Try \"%s --help\" for more information.\n"),
507                                                 progname);
508                                 ExitPostmaster(1);
509                 }
510         }
511
512         /*
513          * Postmaster accepts no non-option switch arguments.
514          */
515         if (optind < argc)
516         {
517                 postmaster_error("invalid argument: \"%s\"", argv[optind]);
518                 fprintf(stderr,
519                                 gettext("Try \"%s --help\" for more information.\n"),
520                                 progname);
521                 ExitPostmaster(1);
522         }
523
524         /*
525          * Now we can set the data directory, and then read postgresql.conf.
526          */
527         checkDataDir(potential_DataDir);        /* issues error messages */
528         SetDataDir(potential_DataDir);
529
530         ProcessConfigFile(PGC_POSTMASTER);
531
532         /* If timezone is not set, determine what the OS uses */
533         pg_timezone_initialize();
534
535 #ifdef EXEC_BACKEND
536         write_nondefault_variables(PGC_POSTMASTER);
537 #endif
538
539         /*
540          * Check for invalid combinations of GUC settings.
541          */
542         if (NBuffers < 2 * MaxBackends || NBuffers < 16)
543         {
544                 /*
545                  * Do not accept -B so small that backends are likely to starve
546                  * for lack of buffers.  The specific choices here are somewhat
547                  * arbitrary.
548                  */
549                 postmaster_error("the number of buffers (-B) must be at least twice the number of allowed connections (-N) and at least 16");
550                 ExitPostmaster(1);
551         }
552
553         if (ReservedBackends >= MaxBackends)
554         {
555                 postmaster_error("superuser_reserved_connections must be less than max_connections");
556                 ExitPostmaster(1);
557         }
558
559         /*
560          * Other one-time internal sanity checks can go here.
561          */
562         if (!CheckDateTokenTables())
563         {
564                 postmaster_error("invalid datetoken tables, please fix");
565                 ExitPostmaster(1);
566         }
567
568         /*
569          * Now that we are done processing the postmaster arguments, reset
570          * getopt(3) library so that it will work correctly in subprocesses.
571          */
572         optind = 1;
573 #ifdef HAVE_INT_OPTRESET
574         optreset = 1;                           /* some systems need this too */
575 #endif
576
577         /* For debugging: display postmaster environment */
578         {
579                 extern char **environ;
580                 char      **p;
581
582                 ereport(DEBUG3,
583                         (errmsg_internal("%s: PostmasterMain: initial environ dump:",
584                                                          progname)));
585                 ereport(DEBUG3,
586                  (errmsg_internal("-----------------------------------------")));
587                 for (p = environ; *p; ++p)
588                         ereport(DEBUG3,
589                                         (errmsg_internal("\t%s", *p)));
590                 ereport(DEBUG3,
591                  (errmsg_internal("-----------------------------------------")));
592         }
593
594 #ifdef EXEC_BACKEND
595         if (find_other_exec(argv[0], "postgres", PG_VERSIONSTR,
596                                                 postgres_exec_path) < 0)
597                 ereport(FATAL,
598                                 (errmsg("%s: could not locate matching postgres executable",
599                                                 progname)));
600 #endif
601
602         /*
603          * Initialize SSL library, if specified.
604          */
605 #ifdef USE_SSL
606         if (EnableSSL)
607                 secure_initialize();
608 #endif
609
610         /*
611          * process any libraries that should be preloaded and optionally
612          * pre-initialized
613          */
614         if (preload_libraries_string)
615                 process_preload_libraries(preload_libraries_string);
616
617         /*
618          * Fork away from controlling terminal, if -S specified.
619          *
620          * Must do this before we grab any interlock files, else the interlocks
621          * will show the wrong PID.
622          */
623         if (SilentMode)
624                 pmdaemonize();
625
626         /*
627          * Create lockfile for data directory.
628          *
629          * We want to do this before we try to grab the input sockets, because
630          * the data directory interlock is more reliable than the socket-file
631          * interlock (thanks to whoever decided to put socket files in /tmp
632          * :-(). For the same reason, it's best to grab the TCP socket(s) before
633          * the Unix socket.
634          */
635         CreateDataDirLockFile(DataDir, true);
636
637         /*
638          * Remove old temporary files.  At this point there can be no other
639          * Postgres processes running in this directory, so this should be
640          * safe.
641          */
642         RemovePgTempFiles();
643
644         /*
645          * Establish input sockets.
646          */
647         for (i = 0; i < MAXLISTEN; i++)
648                 ListenSocket[i] = -1;
649
650         if (ListenAddresses)
651         {
652                 char       *curhost,
653                                    *endptr;
654                 char            c;
655
656                 curhost = ListenAddresses;
657                 for (;;)
658                 {
659                         /* ignore whitespace */
660                         while (isspace((unsigned char) *curhost))
661                                 curhost++;
662                         if (*curhost == '\0')
663                                 break;
664                         endptr = curhost;
665                         while (*endptr != '\0' && !isspace((unsigned char) *endptr))
666                                 endptr++;
667                         c = *endptr;
668                         *endptr = '\0';
669                         if (strcmp(curhost, "*") == 0)
670                                 status = StreamServerPort(AF_UNSPEC, NULL,
671                                                                                   (unsigned short) PostPortNumber,
672                                                                                   UnixSocketDir,
673                                                                                   ListenSocket, MAXLISTEN);
674                         else
675                                 status = StreamServerPort(AF_UNSPEC, curhost,
676                                                                                   (unsigned short) PostPortNumber,
677                                                                                   UnixSocketDir,
678                                                                                   ListenSocket, MAXLISTEN);
679                         if (status != STATUS_OK)
680                                 ereport(WARNING,
681                                          (errmsg("could not create listen socket for \"%s\"",
682                                                          curhost)));
683                         *endptr = c;
684                         if (c != '\0')
685                                 curhost = endptr + 1;
686                         else
687                                 break;
688                 }
689         }
690
691 #ifdef USE_RENDEZVOUS
692         /* Register for Rendezvous only if we opened TCP socket(s) */
693         if (ListenSocket[0] != -1 && rendezvous_name != NULL)
694         {
695                 DNSServiceRegistrationCreate(rendezvous_name,
696                                                                          "_postgresql._tcp.",
697                                                                          "",
698                                                                          htonl(PostPortNumber),
699                                                                          "",
700                                                                  (DNSServiceRegistrationReply) reg_reply,
701                                                                          NULL);
702         }
703 #endif
704
705 #ifdef HAVE_UNIX_SOCKETS
706         status = StreamServerPort(AF_UNIX, NULL,
707                                                           (unsigned short) PostPortNumber,
708                                                           UnixSocketDir,
709                                                           ListenSocket, MAXLISTEN);
710         if (status != STATUS_OK)
711                 ereport(WARNING,
712                                 (errmsg("could not create Unix-domain socket")));
713 #endif
714
715         /*
716          * check that we have some socket to listen on
717          */
718         if (ListenSocket[0] == -1)
719                 ereport(FATAL,
720                                 (errmsg("no socket created for listening")));
721
722         XLOGPathInit();
723
724         /*
725          * Set up shared memory and semaphores.
726          */
727         reset_shared(PostPortNumber);
728
729         /*
730          * Estimate number of openable files.  This must happen after setting
731          * up semaphores, because on some platforms semaphores count as open
732          * files.
733          */
734         set_max_safe_fds();
735
736         /*
737          * Initialize the list of active backends.
738          */
739         BackendList = DLNewList();
740
741 #ifdef WIN32
742         /*
743          * Initialize the child pid/HANDLE arrays for signal handling.
744          */
745         win32_childPIDArray = (pid_t *)
746                 malloc(NUM_BACKENDARRAY_ELEMS * sizeof(pid_t));
747         win32_childHNDArray = (HANDLE *)
748                 malloc(NUM_BACKENDARRAY_ELEMS * sizeof(HANDLE));
749         if (!win32_childPIDArray || !win32_childHNDArray)
750                 ereport(FATAL,
751                                 (errcode(ERRCODE_OUT_OF_MEMORY),
752                                  errmsg("out of memory")));
753
754         /*
755          * Set up a handle that child processes can use to check whether the
756          * postmaster is still running.
757          */
758         if (DuplicateHandle(GetCurrentProcess(),
759                                                 GetCurrentProcess(),
760                                                 GetCurrentProcess(),
761                                                 &PostmasterHandle,
762                                                 0,
763                                                 TRUE,
764                                                 DUPLICATE_SAME_ACCESS) == 0)
765                 ereport(FATAL,
766                                 (errmsg_internal("could not duplicate postmaster handle: %d",
767                                                                  (int) GetLastError())));
768 #endif
769
770         /*
771          * Record postmaster options.  We delay this till now to avoid
772          * recording bogus options (eg, NBuffers too high for available
773          * memory).
774          */
775         if (!CreateOptsFile(argc, argv, my_exec_path))
776                 ExitPostmaster(1);
777
778         /*
779          * Set up signal handlers for the postmaster process.
780          *
781          * CAUTION: when changing this list, check for side-effects on the signal
782          * handling setup of child processes.  See tcop/postgres.c,
783          * bootstrap/bootstrap.c, postmaster/bgwriter.c, and postmaster/pgstat.c.
784          */
785         pqinitmask();
786         PG_SETMASK(&BlockSig);
787
788         pqsignal(SIGHUP, SIGHUP_handler);       /* reread config file and have
789                                                                                  * children do same */
790         pqsignal(SIGINT, pmdie);        /* send SIGTERM and shut down */
791         pqsignal(SIGQUIT, pmdie);       /* send SIGQUIT and die */
792         pqsignal(SIGTERM, pmdie);       /* wait for children and shut down */
793         pqsignal(SIGALRM, SIG_IGN); /* ignored */
794         pqsignal(SIGPIPE, SIG_IGN); /* ignored */
795         pqsignal(SIGUSR1, sigusr1_handler); /* message from child process */
796         pqsignal(SIGUSR2, dummy_handler);       /* unused, reserve for children */
797         pqsignal(SIGCHLD, reaper);      /* handle child termination */
798         pqsignal(SIGTTIN, SIG_IGN); /* ignored */
799         pqsignal(SIGTTOU, SIG_IGN); /* ignored */
800         /* ignore SIGXFSZ, so that ulimit violations work like disk full */
801 #ifdef SIGXFSZ
802         pqsignal(SIGXFSZ, SIG_IGN); /* ignored */
803 #endif
804
805         /*
806          * Reset whereToSendOutput from Debug (its starting state) to None.
807          * This prevents ereport from sending log messages to stderr unless
808          * the syslog/stderr switch permits.  We don't do this until the
809          * postmaster is fully launched, since startup failures may as well be
810          * reported to stderr.
811          */
812         whereToSendOutput = None;
813
814         /*
815          * Initialize and try to startup the statistics collector process
816          */
817         pgstat_init();
818         pgstat_start();
819
820         /*
821          * Load cached files for client authentication.
822          */
823         load_hba();
824         load_ident();
825         load_user();
826         load_group();
827
828         /*
829          * We're ready to rock and roll...
830          */
831         StartupPID = StartupDataBase();
832
833 #ifdef EXEC_BACKEND
834         write_nondefault_variables(PGC_POSTMASTER);
835 #endif
836
837         status = ServerLoop();
838
839         /*
840          * ServerLoop probably shouldn't ever return, but if it does, close
841          * down.
842          */
843         ExitPostmaster(status != STATUS_OK);
844
845         return 0;                                       /* not reached */
846 }
847
848
849 /*
850  * Validate the proposed data directory
851  */
852 static void
853 checkDataDir(const char *checkdir)
854 {
855         char            path[MAXPGPATH];
856         FILE       *fp;
857         struct stat stat_buf;
858
859         if (checkdir == NULL)
860         {
861                 fprintf(stderr,
862                                 gettext("%s does not know where to find the database system data.\n"
863                                                 "You must specify the directory that contains the database system\n"
864                                                 "either by specifying the -D invocation option or by setting the\n"
865                                                 "PGDATA environment variable.\n"),
866                                 progname);
867                 ExitPostmaster(2);
868         }
869
870         if (stat(checkdir, &stat_buf) == -1)
871         {
872                 if (errno == ENOENT)
873                         ereport(FATAL,
874                                         (errcode_for_file_access(),
875                                          errmsg("data directory \"%s\" does not exist",
876                                                         checkdir)));
877                 else
878                         ereport(FATAL,
879                                         (errcode_for_file_access(),
880                          errmsg("could not read permissions of directory \"%s\": %m",
881                                         checkdir)));
882         }
883
884         /*
885          * Check if the directory has group or world access.  If so, reject.
886          *
887          * XXX temporarily suppress check when on Windows, because there may not
888          * be proper support for Unix-y file permissions.  Need to think of a
889          * reasonable check to apply on Windows.
890          */
891 #if !defined(__CYGWIN__) && !defined(WIN32)
892         if (stat_buf.st_mode & (S_IRWXG | S_IRWXO))
893                 ereport(FATAL,
894                                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
895                                  errmsg("data directory \"%s\" has group or world access",
896                                                 checkdir),
897                                  errdetail("Permissions should be u=rwx (0700).")));
898 #endif
899
900         /* Look for PG_VERSION before looking for pg_control */
901         ValidatePgVersion(checkdir);
902
903         snprintf(path, sizeof(path), "%s/global/pg_control", checkdir);
904
905         fp = AllocateFile(path, PG_BINARY_R);
906         if (fp == NULL)
907         {
908                 fprintf(stderr,
909                                 gettext("%s: could not find the database system\n"
910                                                 "Expected to find it in the directory \"%s\",\n"
911                                                 "but could not open file \"%s\": %s\n"),
912                                 progname, checkdir, path, strerror(errno));
913                 ExitPostmaster(2);
914         }
915         FreeFile(fp);
916 }
917
918
919 #ifdef USE_RENDEZVOUS
920
921 /*
922  * empty callback function for DNSServiceRegistrationCreate()
923  */
924 static void
925 reg_reply(DNSServiceRegistrationReplyErrorType errorCode, void *context)
926 {
927
928 }
929
930 #endif /* USE_RENDEZVOUS */
931
932
933 /*
934  * Fork away from the controlling terminal (-S option)
935  */
936 static void
937 pmdaemonize(void)
938 {
939 #ifndef WIN32
940         int                     i;
941         pid_t           pid;
942
943 #ifdef LINUX_PROFILE
944         struct itimerval prof_itimer;
945 #endif
946
947 #ifdef LINUX_PROFILE
948         /* see comments in BackendStartup */
949         getitimer(ITIMER_PROF, &prof_itimer);
950 #endif
951
952         pid = fork();
953         if (pid == (pid_t) -1)
954         {
955                 postmaster_error("could not fork background process: %s",
956                                                  strerror(errno));
957                 ExitPostmaster(1);
958         }
959         else if (pid)
960         {                                                       /* parent */
961                 /* Parent should just exit, without doing any atexit cleanup */
962                 _exit(0);
963         }
964
965 #ifdef LINUX_PROFILE
966         setitimer(ITIMER_PROF, &prof_itimer, NULL);
967 #endif
968
969         MyProcPid = PostmasterPid = getpid();   /* reset PID vars to child */
970
971 /* GH: If there's no setsid(), we hopefully don't need silent mode.
972  * Until there's a better solution.
973  */
974 #ifdef HAVE_SETSID
975         if (setsid() < 0)
976         {
977                 postmaster_error("could not dissociate from controlling TTY: %s",
978                                                  strerror(errno));
979                 ExitPostmaster(1);
980         }
981 #endif
982         i = open(NULL_DEV, O_RDWR | PG_BINARY);
983         dup2(i, 0);
984         dup2(i, 1);
985         dup2(i, 2);
986         close(i);
987 #else  /* WIN32 */
988         /* not supported */
989         elog(FATAL, "SilentMode not supported under WIN32");
990 #endif /* WIN32 */
991 }
992
993
994 /*
995  * Print out help message
996  */
997 static void
998 usage(const char *progname)
999 {
1000         printf(gettext("%s is the PostgreSQL server.\n\n"), progname);
1001         printf(gettext("Usage:\n  %s [OPTION]...\n\n"), progname);
1002         printf(gettext("Options:\n"));
1003 #ifdef USE_ASSERT_CHECKING
1004         printf(gettext("  -A 1|0          enable/disable run-time assert checking\n"));
1005 #endif
1006         printf(gettext("  -B NBUFFERS     number of shared buffers\n"));
1007         printf(gettext("  -c NAME=VALUE   set run-time parameter\n"));
1008         printf(gettext("  -d 1-5          debugging level\n"));
1009         printf(gettext("  -D DATADIR      database directory\n"));
1010         printf(gettext("  -F              turn fsync off\n"));
1011         printf(gettext("  -h HOSTNAME     host name or IP address to listen on\n"));
1012         printf(gettext("  -i              enable TCP/IP connections\n"));
1013         printf(gettext("  -k DIRECTORY    Unix-domain socket location\n"));
1014 #ifdef USE_SSL
1015         printf(gettext("  -l              enable SSL connections\n"));
1016 #endif
1017         printf(gettext("  -N MAX-CONNECT  maximum number of allowed connections\n"));
1018         printf(gettext("  -o OPTIONS      pass \"OPTIONS\" to each server process\n"));
1019         printf(gettext("  -p PORT         port number to listen on\n"));
1020         printf(gettext("  -S              silent mode (start in background without logging output)\n"));
1021         printf(gettext("  --help          show this help, then exit\n"));
1022         printf(gettext("  --version       output version information, then exit\n"));
1023
1024         printf(gettext("\nDeveloper options:\n"));
1025         printf(gettext("  -n              do not reinitialize shared memory after abnormal exit\n"));
1026         printf(gettext("  -s              send SIGSTOP to all backend servers if one dies\n"));
1027
1028         printf(gettext("\nPlease read the documentation for the complete list of run-time\n"
1029                                    "configuration settings and how to set them on the command line or in\n"
1030                                    "the configuration file.\n\n"
1031                                    "Report bugs to <pgsql-bugs@postgresql.org>.\n"));
1032 }
1033
1034
1035 /*
1036  * Main idle loop of postmaster
1037  */
1038 static int
1039 ServerLoop(void)
1040 {
1041         fd_set          readmask;
1042         int                     nSockets;
1043         time_t          now,
1044                                 last_touch_time;
1045         struct timeval earlier,
1046                                 later;
1047         struct timezone tz;
1048
1049         gettimeofday(&earlier, &tz);
1050         last_touch_time = time(NULL);
1051
1052         nSockets = initMasks(&readmask);
1053
1054         for (;;)
1055         {
1056                 Port       *port;
1057                 fd_set          rmask;
1058                 struct timeval timeout;
1059                 int                     selres;
1060                 int                     i;
1061
1062                 /*
1063                  * Wait for something to happen.
1064                  *
1065                  * We wait at most one minute, to ensure that the other background
1066                  * tasks handled below get done even when no requests are arriving.
1067                  */
1068                 memcpy((char *) &rmask, (char *) &readmask, sizeof(fd_set));
1069
1070                 timeout.tv_sec = 60;
1071                 timeout.tv_usec = 0;
1072
1073                 PG_SETMASK(&UnBlockSig);
1074
1075                 selres = select(nSockets, &rmask, NULL, NULL, &timeout);
1076
1077                 /*
1078                  * Block all signals until we wait again.  (This makes it safe for
1079                  * our signal handlers to do nontrivial work.)
1080                  */
1081                 PG_SETMASK(&BlockSig);
1082
1083                 if (selres < 0)
1084                 {
1085                         if (errno == EINTR || errno == EWOULDBLOCK)
1086                                 continue;
1087                         ereport(LOG,
1088                                         (errcode_for_socket_access(),
1089                                          errmsg("select() failed in postmaster: %m")));
1090                         return STATUS_ERROR;
1091                 }
1092
1093                 /*
1094                  * New connection pending on any of our sockets? If so, fork a
1095                  * child process to deal with it.
1096                  */
1097                 if (selres > 0)
1098                 {
1099                         /*
1100                          * Select a random seed at the time of first receiving a request.
1101                          */
1102                         while (random_seed == 0)
1103                         {
1104                                 gettimeofday(&later, &tz);
1105
1106                                 /*
1107                                  * We are not sure how much precision is in tv_usec, so we
1108                                  * swap the nibbles of 'later' and XOR them with 'earlier'. On
1109                                  * the off chance that the result is 0, we loop until it isn't.
1110                                  */
1111                                 random_seed = earlier.tv_usec ^
1112                                         ((later.tv_usec << 16) |
1113                                          ((later.tv_usec >> 16) & 0xffff));
1114                         }
1115
1116                         for (i = 0; i < MAXLISTEN; i++)
1117                         {
1118                                 if (ListenSocket[i] == -1)
1119                                         break;
1120                                 if (FD_ISSET(ListenSocket[i], &rmask))
1121                                 {
1122                                         port = ConnCreate(ListenSocket[i]);
1123                                         if (port)
1124                                         {
1125                                                 BackendStartup(port);
1126
1127                                                 /*
1128                                                  * We no longer need the open socket or port structure
1129                                                  * in this process
1130                                                  */
1131                                                 StreamClose(port->sock);
1132                                                 ConnFree(port);
1133                                         }
1134                                 }
1135                         }
1136                 }
1137
1138                 /*
1139                  * If no background writer process is running, and we are not in
1140                  * a state that prevents it, start one.  It doesn't matter if this
1141                  * fails, we'll just try again later.
1142                  */
1143                 if (BgWriterPID == 0 && StartupPID == 0 && !FatalError)
1144                 {
1145                         BgWriterPID = StartBackgroundWriter();
1146                         /* If shutdown is pending, set it going */
1147                         if (Shutdown > NoShutdown && BgWriterPID != 0)
1148                                 kill(BgWriterPID, SIGUSR2);
1149                 }
1150
1151                 /* If we have lost the stats collector, try to start a new one */
1152                 if (!pgstat_is_running)
1153                         pgstat_start();
1154
1155                 /*
1156                  * Touch the socket and lock file at least every ten minutes, to ensure
1157                  * that they are not removed by overzealous /tmp-cleaning tasks.
1158                  */
1159                 now = time(NULL);
1160                 if (now - last_touch_time >= 10 * 60)
1161                 {
1162                         TouchSocketFile();
1163                         TouchSocketLockFile();
1164                         last_touch_time = now;
1165                 }
1166         }
1167 }
1168
1169
1170 /*
1171  * Initialise the masks for select() for the ports we are listening on.
1172  * Return the number of sockets to listen on.
1173  */
1174 static int
1175 initMasks(fd_set *rmask)
1176 {
1177         int                     nsocks = -1;
1178         int                     i;
1179
1180         FD_ZERO(rmask);
1181
1182         for (i = 0; i < MAXLISTEN; i++)
1183         {
1184                 int                     fd = ListenSocket[i];
1185
1186                 if (fd == -1)
1187                         break;
1188                 FD_SET(fd, rmask);
1189                 if (fd > nsocks)
1190                         nsocks = fd;
1191         }
1192
1193         return nsocks + 1;
1194 }
1195
1196
1197 /*
1198  * Read the startup packet and do something according to it.
1199  *
1200  * Returns STATUS_OK or STATUS_ERROR, or might call ereport(FATAL) and
1201  * not return at all.
1202  *
1203  * (Note that ereport(FATAL) stuff is sent to the client, so only use it
1204  * if that's what you want.  Return STATUS_ERROR if you don't want to
1205  * send anything to the client, which would typically be appropriate
1206  * if we detect a communications failure.)
1207  */
1208 static int
1209 ProcessStartupPacket(Port *port, bool SSLdone)
1210 {
1211         int32           len;
1212         void       *buf;
1213         ProtocolVersion proto;
1214         MemoryContext oldcontext;
1215
1216         if (pq_getbytes((char *) &len, 4) == EOF)
1217         {
1218                 /*
1219                  * EOF after SSLdone probably means the client didn't like our
1220                  * response to NEGOTIATE_SSL_CODE.      That's not an error condition,
1221                  * so don't clutter the log with a complaint.
1222                  */
1223                 if (!SSLdone)
1224                         ereport(COMMERROR,
1225                                         (errcode(ERRCODE_PROTOCOL_VIOLATION),
1226                                          errmsg("incomplete startup packet")));
1227                 return STATUS_ERROR;
1228         }
1229
1230         len = ntohl(len);
1231         len -= 4;
1232
1233         if (len < (int32) sizeof(ProtocolVersion) ||
1234                 len > MAX_STARTUP_PACKET_LENGTH)
1235         {
1236                 ereport(COMMERROR,
1237                                 (errcode(ERRCODE_PROTOCOL_VIOLATION),
1238                                  errmsg("invalid length of startup packet")));
1239                 return STATUS_ERROR;
1240         }
1241
1242         /*
1243          * Allocate at least the size of an old-style startup packet, plus one
1244          * extra byte, and make sure all are zeroes.  This ensures we will
1245          * have null termination of all strings, in both fixed- and
1246          * variable-length packet layouts.
1247          */
1248         if (len <= (int32) sizeof(StartupPacket))
1249                 buf = palloc0(sizeof(StartupPacket) + 1);
1250         else
1251                 buf = palloc0(len + 1);
1252
1253         if (pq_getbytes(buf, len) == EOF)
1254         {
1255                 ereport(COMMERROR,
1256                                 (errcode(ERRCODE_PROTOCOL_VIOLATION),
1257                                  errmsg("incomplete startup packet")));
1258                 return STATUS_ERROR;
1259         }
1260
1261         /*
1262          * The first field is either a protocol version number or a special
1263          * request code.
1264          */
1265         port->proto = proto = ntohl(*((ProtocolVersion *) buf));
1266
1267         if (proto == CANCEL_REQUEST_CODE)
1268         {
1269                 processCancelRequest(port, buf);
1270                 return 127;                             /* XXX */
1271         }
1272
1273         if (proto == NEGOTIATE_SSL_CODE && !SSLdone)
1274         {
1275                 char            SSLok;
1276
1277 #ifdef USE_SSL
1278                 /* No SSL when disabled or on Unix sockets */
1279                 if (!EnableSSL || IS_AF_UNIX(port->laddr.addr.ss_family))
1280                         SSLok = 'N';
1281                 else
1282                         SSLok = 'S';            /* Support for SSL */
1283 #else
1284                 SSLok = 'N';                    /* No support for SSL */
1285 #endif
1286                 if (send(port->sock, &SSLok, 1, 0) != 1)
1287                 {
1288                         ereport(COMMERROR,
1289                                         (errcode_for_socket_access(),
1290                                  errmsg("failed to send SSL negotiation response: %m")));
1291                         return STATUS_ERROR;    /* close the connection */
1292                 }
1293
1294 #ifdef USE_SSL
1295                 if (SSLok == 'S' && secure_open_server(port) == -1)
1296                         return STATUS_ERROR;
1297 #endif
1298                 /* regular startup packet, cancel, etc packet should follow... */
1299                 /* but not another SSL negotiation request */
1300                 return ProcessStartupPacket(port, true);
1301         }
1302
1303         /* Could add additional special packet types here */
1304
1305         /*
1306          * Set FrontendProtocol now so that ereport() knows what format to
1307          * send if we fail during startup.
1308          */
1309         FrontendProtocol = proto;
1310
1311         /* Check we can handle the protocol the frontend is using. */
1312
1313         if (PG_PROTOCOL_MAJOR(proto) < PG_PROTOCOL_MAJOR(PG_PROTOCOL_EARLIEST) ||
1314           PG_PROTOCOL_MAJOR(proto) > PG_PROTOCOL_MAJOR(PG_PROTOCOL_LATEST) ||
1315         (PG_PROTOCOL_MAJOR(proto) == PG_PROTOCOL_MAJOR(PG_PROTOCOL_LATEST) &&
1316          PG_PROTOCOL_MINOR(proto) > PG_PROTOCOL_MINOR(PG_PROTOCOL_LATEST)))
1317                 ereport(FATAL,
1318                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1319                                  errmsg("unsupported frontend protocol %u.%u: server supports %u.0 to %u.%u",
1320                                           PG_PROTOCOL_MAJOR(proto), PG_PROTOCOL_MINOR(proto),
1321                                                 PG_PROTOCOL_MAJOR(PG_PROTOCOL_EARLIEST),
1322                                                 PG_PROTOCOL_MAJOR(PG_PROTOCOL_LATEST),
1323                                                 PG_PROTOCOL_MINOR(PG_PROTOCOL_LATEST))));
1324
1325         /*
1326          * Now fetch parameters out of startup packet and save them into the
1327          * Port structure.      All data structures attached to the Port struct
1328          * must be allocated in TopMemoryContext so that they won't disappear
1329          * when we pass them to PostgresMain (see BackendRun).  We need not
1330          * worry about leaking this storage on failure, since we aren't in the
1331          * postmaster process anymore.
1332          */
1333         oldcontext = MemoryContextSwitchTo(TopMemoryContext);
1334
1335         if (PG_PROTOCOL_MAJOR(proto) >= 3)
1336         {
1337                 int32           offset = sizeof(ProtocolVersion);
1338
1339                 /*
1340                  * Scan packet body for name/option pairs.      We can assume any
1341                  * string beginning within the packet body is null-terminated,
1342                  * thanks to zeroing extra byte above.
1343                  */
1344                 port->guc_options = NIL;
1345
1346                 while (offset < len)
1347                 {
1348                         char       *nameptr = ((char *) buf) + offset;
1349                         int32           valoffset;
1350                         char       *valptr;
1351
1352                         if (*nameptr == '\0')
1353                                 break;                  /* found packet terminator */
1354                         valoffset = offset + strlen(nameptr) + 1;
1355                         if (valoffset >= len)
1356                                 break;                  /* missing value, will complain below */
1357                         valptr = ((char *) buf) + valoffset;
1358
1359                         if (strcmp(nameptr, "database") == 0)
1360                                 port->database_name = pstrdup(valptr);
1361                         else if (strcmp(nameptr, "user") == 0)
1362                                 port->user_name = pstrdup(valptr);
1363                         else if (strcmp(nameptr, "options") == 0)
1364                                 port->cmdline_options = pstrdup(valptr);
1365                         else
1366                         {
1367                                 /* Assume it's a generic GUC option */
1368                                 port->guc_options = lappend(port->guc_options,
1369                                                                                         pstrdup(nameptr));
1370                                 port->guc_options = lappend(port->guc_options,
1371                                                                                         pstrdup(valptr));
1372                         }
1373                         offset = valoffset + strlen(valptr) + 1;
1374                 }
1375
1376                 /*
1377                  * If we didn't find a packet terminator exactly at the end of the
1378                  * given packet length, complain.
1379                  */
1380                 if (offset != len - 1)
1381                         ereport(FATAL,
1382                                         (errcode(ERRCODE_PROTOCOL_VIOLATION),
1383                                          errmsg("invalid startup packet layout: expected terminator as last byte")));
1384         }
1385         else
1386         {
1387                 /*
1388                  * Get the parameters from the old-style, fixed-width-fields
1389                  * startup packet as C strings.  The packet destination was
1390                  * cleared first so a short packet has zeros silently added.  We
1391                  * have to be prepared to truncate the pstrdup result for oversize
1392                  * fields, though.
1393                  */
1394                 StartupPacket *packet = (StartupPacket *) buf;
1395
1396                 port->database_name = pstrdup(packet->database);
1397                 if (strlen(port->database_name) > sizeof(packet->database))
1398                         port->database_name[sizeof(packet->database)] = '\0';
1399                 port->user_name = pstrdup(packet->user);
1400                 if (strlen(port->user_name) > sizeof(packet->user))
1401                         port->user_name[sizeof(packet->user)] = '\0';
1402                 port->cmdline_options = pstrdup(packet->options);
1403                 if (strlen(port->cmdline_options) > sizeof(packet->options))
1404                         port->cmdline_options[sizeof(packet->options)] = '\0';
1405                 port->guc_options = NIL;
1406         }
1407
1408         /* Check a user name was given. */
1409         if (port->user_name == NULL || port->user_name[0] == '\0')
1410                 ereport(FATAL,
1411                                 (errcode(ERRCODE_INVALID_AUTHORIZATION_SPECIFICATION),
1412                  errmsg("no PostgreSQL user name specified in startup packet")));
1413
1414         /* The database defaults to the user name. */
1415         if (port->database_name == NULL || port->database_name[0] == '\0')
1416                 port->database_name = pstrdup(port->user_name);
1417
1418         if (Db_user_namespace)
1419         {
1420                 /*
1421                  * If user@, it is a global user, remove '@'. We only want to do
1422                  * this if there is an '@' at the end and no earlier in the user
1423                  * string or they may fake as a local user of another database
1424                  * attaching to this database.
1425                  */
1426                 if (strchr(port->user_name, '@') ==
1427                         port->user_name + strlen(port->user_name) - 1)
1428                         *strchr(port->user_name, '@') = '\0';
1429                 else
1430                 {
1431                         /* Append '@' and dbname */
1432                         char       *db_user;
1433
1434                         db_user = palloc(strlen(port->user_name) +
1435                                                          strlen(port->database_name) + 2);
1436                         sprintf(db_user, "%s@%s", port->user_name, port->database_name);
1437                         port->user_name = db_user;
1438                 }
1439         }
1440
1441         /*
1442          * Truncate given database and user names to length of a Postgres
1443          * name.  This avoids lookup failures when overlength names are given.
1444          */
1445         if (strlen(port->database_name) >= NAMEDATALEN)
1446                 port->database_name[NAMEDATALEN - 1] = '\0';
1447         if (strlen(port->user_name) >= NAMEDATALEN)
1448                 port->user_name[NAMEDATALEN - 1] = '\0';
1449
1450         /*
1451          * Done putting stuff in TopMemoryContext.
1452          */
1453         MemoryContextSwitchTo(oldcontext);
1454
1455         /*
1456          * If we're going to reject the connection due to database state, say
1457          * so now instead of wasting cycles on an authentication exchange.
1458          * (This also allows a pg_ping utility to be written.)
1459          */
1460         switch (port->canAcceptConnections)
1461         {
1462                 case CAC_STARTUP:
1463                         ereport(FATAL,
1464                                         (errcode(ERRCODE_CANNOT_CONNECT_NOW),
1465                                          errmsg("the database system is starting up")));
1466                         break;
1467                 case CAC_SHUTDOWN:
1468                         ereport(FATAL,
1469                                         (errcode(ERRCODE_CANNOT_CONNECT_NOW),
1470                                          errmsg("the database system is shutting down")));
1471                         break;
1472                 case CAC_RECOVERY:
1473                         ereport(FATAL,
1474                                         (errcode(ERRCODE_CANNOT_CONNECT_NOW),
1475                                          errmsg("the database system is in recovery mode")));
1476                         break;
1477                 case CAC_TOOMANY:
1478                         ereport(FATAL,
1479                                         (errcode(ERRCODE_TOO_MANY_CONNECTIONS),
1480                                          errmsg("sorry, too many clients already")));
1481                         break;
1482                 case CAC_OK:
1483                 default:
1484                         break;
1485         }
1486
1487         return STATUS_OK;
1488 }
1489
1490
1491 /*
1492  * The client has sent a cancel request packet, not a normal
1493  * start-a-new-connection packet.  Perform the necessary processing.
1494  * Nothing is sent back to the client.
1495  */
1496 static void
1497 processCancelRequest(Port *port, void *pkt)
1498 {
1499         CancelRequestPacket *canc = (CancelRequestPacket *) pkt;
1500         int                     backendPID;
1501         long            cancelAuthCode;
1502         Backend    *bp;
1503 #ifndef EXEC_BACKEND
1504         Dlelem     *curr;
1505 #else
1506         int                     i;
1507 #endif
1508
1509         backendPID = (int) ntohl(canc->backendPID);
1510         cancelAuthCode = (long) ntohl(canc->cancelAuthCode);
1511
1512         if (backendPID == BgWriterPID)
1513         {
1514                 ereport(DEBUG2,
1515                                 (errmsg_internal("ignoring cancel request for bgwriter process %d",
1516                                                                  backendPID)));
1517                 return;
1518         }
1519
1520         /*
1521          * See if we have a matching backend.  In the EXEC_BACKEND case, we
1522          * can no longer access the postmaster's own backend list, and must
1523          * rely on the duplicate array in shared memory.
1524          */
1525 #ifndef EXEC_BACKEND
1526         for (curr = DLGetHead(BackendList); curr; curr = DLGetSucc(curr))
1527         {
1528                 bp = (Backend *) DLE_VAL(curr);
1529 #else
1530         for (i = 0; i < NUM_BACKENDARRAY_ELEMS; i++)
1531         {
1532                 bp = (Backend *) &ShmemBackendArray[i];
1533 #endif
1534                 if (bp->pid == backendPID)
1535                 {
1536                         if (bp->cancel_key == cancelAuthCode)
1537                         {
1538                                 /* Found a match; signal that backend to cancel current op */
1539                                 ereport(DEBUG2,
1540                                                 (errmsg_internal("processing cancel request: sending SIGINT to process %d",
1541                                                                                  backendPID)));
1542                                 kill(bp->pid, SIGINT);
1543                         }
1544                         else
1545                                 /* Right PID, wrong key: no way, Jose */
1546                                 ereport(DEBUG2,
1547                                                 (errmsg_internal("bad key in cancel request for process %d",
1548                                                                                  backendPID)));
1549                         return;
1550                 }
1551         }
1552
1553         /* No matching backend */
1554         ereport(DEBUG2,
1555                         (errmsg_internal("bad pid in cancel request for process %d",
1556                                                          backendPID)));
1557 }
1558
1559 /*
1560  * canAcceptConnections --- check to see if database state allows connections.
1561  */
1562 static enum CAC_state
1563 canAcceptConnections(void)
1564 {
1565         /* Can't start backends when in startup/shutdown/recovery state. */
1566         if (Shutdown > NoShutdown)
1567                 return CAC_SHUTDOWN;
1568         if (StartupPID)
1569                 return CAC_STARTUP;
1570         if (FatalError)
1571                 return CAC_RECOVERY;
1572
1573         /*
1574          * Don't start too many children.
1575          *
1576          * We allow more connections than we can have backends here because some
1577          * might still be authenticating; they might fail auth, or some
1578          * existing backend might exit before the auth cycle is completed. The
1579          * exact MaxBackends limit is enforced when a new backend tries to
1580          * join the shared-inval backend array.
1581          */
1582         if (CountChildren() >= 2 * MaxBackends)
1583                 return CAC_TOOMANY;
1584
1585         return CAC_OK;
1586 }
1587
1588
1589 /*
1590  * ConnCreate -- create a local connection data structure
1591  */
1592 static Port *
1593 ConnCreate(int serverFd)
1594 {
1595         Port       *port;
1596
1597         if (!(port = (Port *) calloc(1, sizeof(Port))))
1598         {
1599                 ereport(LOG,
1600                                 (errcode(ERRCODE_OUT_OF_MEMORY),
1601                                  errmsg("out of memory")));
1602                 ExitPostmaster(1);
1603         }
1604
1605         if (StreamConnection(serverFd, port) != STATUS_OK)
1606         {
1607                 StreamClose(port->sock);
1608                 ConnFree(port);
1609                 port = NULL;
1610         }
1611         else
1612         {
1613                 /*
1614                  * Precompute password salt values to use for this connection.
1615                  * It's slightly annoying to do this long in advance of knowing
1616                  * whether we'll need 'em or not, but we must do the random()
1617                  * calls before we fork, not after.  Else the postmaster's random
1618                  * sequence won't get advanced, and all backends would end up
1619                  * using the same salt...
1620                  */
1621                 RandomSalt(port->cryptSalt, port->md5Salt);
1622         }
1623
1624         return port;
1625 }
1626
1627
1628 /*
1629  * ConnFree -- free a local connection data structure
1630  */
1631 static void
1632 ConnFree(Port *conn)
1633 {
1634 #ifdef USE_SSL
1635         secure_close(conn);
1636 #endif
1637         free(conn);
1638 }
1639
1640
1641 /*
1642  * ClosePostmasterPorts -- close all the postmaster's open sockets
1643  *
1644  * This is called during child process startup to release file descriptors
1645  * that are not needed by that child process.  The postmaster still has
1646  * them open, of course.
1647  */
1648 void
1649 ClosePostmasterPorts(void)
1650 {
1651         int                     i;
1652
1653         /* Close the listen sockets */
1654         for (i = 0; i < MAXLISTEN; i++)
1655         {
1656                 if (ListenSocket[i] != -1)
1657                 {
1658                         StreamClose(ListenSocket[i]);
1659                         ListenSocket[i] = -1;
1660                 }
1661         }
1662 }
1663
1664
1665 /*
1666  * reset_shared -- reset shared memory and semaphores
1667  */
1668 static void
1669 reset_shared(unsigned short port)
1670 {
1671         /*
1672          * Create or re-create shared memory and semaphores.
1673          *
1674          * Note: in each "cycle of life" we will normally assign the same IPC
1675          * keys (if using SysV shmem and/or semas), since the port number is
1676          * used to determine IPC keys.  This helps ensure that we will clean
1677          * up dead IPC objects if the postmaster crashes and is restarted.
1678          */
1679         CreateSharedMemoryAndSemaphores(false, MaxBackends, port);
1680 }
1681
1682
1683 /*
1684  * SIGHUP -- reread config files, and tell children to do same
1685  */
1686 static void
1687 SIGHUP_handler(SIGNAL_ARGS)
1688 {
1689         int                     save_errno = errno;
1690
1691         PG_SETMASK(&BlockSig);
1692
1693         if (Shutdown <= SmartShutdown)
1694         {
1695                 ereport(LOG,
1696                          (errmsg("received SIGHUP, reloading configuration files")));
1697                 ProcessConfigFile(PGC_SIGHUP);
1698                 SignalChildren(SIGHUP);
1699                 if (BgWriterPID != 0)
1700                         kill(BgWriterPID, SIGHUP);
1701                 load_hba();
1702                 load_ident();
1703
1704 #ifdef EXEC_BACKEND
1705                 /* Update the starting-point file for future children */
1706                 write_nondefault_variables(PGC_SIGHUP);
1707 #endif
1708         }
1709
1710         PG_SETMASK(&UnBlockSig);
1711
1712         errno = save_errno;
1713 }
1714
1715
1716 /*
1717  * pmdie -- signal handler for processing various postmaster signals.
1718  */
1719 static void
1720 pmdie(SIGNAL_ARGS)
1721 {
1722         int                     save_errno = errno;
1723
1724         PG_SETMASK(&BlockSig);
1725
1726         ereport(DEBUG2,
1727                         (errmsg_internal("postmaster received signal %d",
1728                                                          postgres_signal_arg)));
1729
1730         switch (postgres_signal_arg)
1731         {
1732                 case SIGTERM:
1733                         /*
1734                          * Smart Shutdown:
1735                          *
1736                          * Wait for children to end their work, then shut down.
1737                          */
1738                         if (Shutdown >= SmartShutdown)
1739                                 break;
1740                         Shutdown = SmartShutdown;
1741                         ereport(LOG,
1742                                         (errmsg("received smart shutdown request")));
1743
1744                         if (DLGetHead(BackendList))
1745                                 break;                  /* let reaper() handle this */
1746
1747                         /*
1748                          * No children left. Begin shutdown of data base system.
1749                          */
1750                         if (StartupPID != 0 || FatalError)
1751                                 break;                  /* let reaper() handle this */
1752                         /* Start the bgwriter if not running */
1753                         if (BgWriterPID == 0)
1754                                 BgWriterPID = StartBackgroundWriter();
1755                         /* And tell it to shut down */
1756                         if (BgWriterPID != 0)
1757                                 kill(BgWriterPID, SIGUSR2);
1758                         break;
1759
1760                 case SIGINT:
1761                         /*
1762                          * Fast Shutdown:
1763                          *
1764                          * Abort all children with SIGTERM (rollback active transactions
1765                          * and exit) and shut down when they are gone.
1766                          */
1767                         if (Shutdown >= FastShutdown)
1768                                 break;
1769                         Shutdown = FastShutdown;
1770                         ereport(LOG,
1771                                         (errmsg("received fast shutdown request")));
1772
1773                         if (DLGetHead(BackendList))
1774                         {
1775                                 if (!FatalError)
1776                                 {
1777                                         ereport(LOG,
1778                                                         (errmsg("aborting any active transactions")));
1779                                         SignalChildren(SIGTERM);
1780                                         /* reaper() does the rest */
1781                                 }
1782                                 break;
1783                         }
1784
1785                         /*
1786                          * No children left. Begin shutdown of data base system.
1787                          *
1788                          * Note: if we previously got SIGTERM then we may send SIGUSR2
1789                          * to the bgwriter a second time here.  This should be harmless.
1790                          */
1791                         if (StartupPID != 0 || FatalError)
1792                                 break;                  /* let reaper() handle this */
1793                         /* Start the bgwriter if not running */
1794                         if (BgWriterPID == 0)
1795                                 BgWriterPID = StartBackgroundWriter();
1796                         /* And tell it to shut down */
1797                         if (BgWriterPID != 0)
1798                                 kill(BgWriterPID, SIGUSR2);
1799                         break;
1800
1801                 case SIGQUIT:
1802                         /*
1803                          * Immediate Shutdown:
1804                          *
1805                          * abort all children with SIGQUIT and exit without attempt to
1806                          * properly shut down data base system.
1807                          */
1808                         ereport(LOG,
1809                                         (errmsg("received immediate shutdown request")));
1810                         if (StartupPID != 0)
1811                                 kill(StartupPID, SIGQUIT);
1812                         if (BgWriterPID != 0)
1813                                 kill(BgWriterPID, SIGQUIT);
1814                         if (DLGetHead(BackendList))
1815                                 SignalChildren(SIGQUIT);
1816                         ExitPostmaster(0);
1817                         break;
1818         }
1819
1820         PG_SETMASK(&UnBlockSig);
1821
1822         errno = save_errno;
1823 }
1824
1825 /*
1826  * Reaper -- signal handler to cleanup after a backend (child) dies.
1827  */
1828 static void
1829 reaper(SIGNAL_ARGS)
1830 {
1831         int                     save_errno = errno;
1832
1833 #ifdef HAVE_WAITPID
1834         int                     status;                 /* backend exit status */
1835
1836 #else
1837 #ifndef WIN32
1838         union wait      status;                 /* backend exit status */
1839 #endif
1840 #endif
1841         int                     exitstatus;
1842         int                     pid;                    /* process id of dead backend */
1843
1844         PG_SETMASK(&BlockSig);
1845
1846         ereport(DEBUG4,
1847                         (errmsg_internal("reaping dead processes")));
1848 #ifdef HAVE_WAITPID
1849         while ((pid = waitpid(-1, &status, WNOHANG)) > 0)
1850         {
1851                 exitstatus = status;
1852 #else
1853 #ifndef WIN32
1854         while ((pid = wait3(&status, WNOHANG, NULL)) > 0)
1855         {
1856                 exitstatus = status.w_status;
1857 #else
1858         while ((pid = win32_waitpid(&exitstatus)) > 0)
1859         {
1860                 /*
1861                  * We need to do this here, and not in CleanupProc, since this is
1862                  * to be called on all children when we are done with them. Could
1863                  * move to LogChildExit, but that seems like asking for future
1864                  * trouble...
1865                  */
1866                 win32_RemoveChild(pid);
1867 #endif /* WIN32 */
1868 #endif /* HAVE_WAITPID */
1869
1870                 /*
1871                  * Check if this child was the statistics collector. If so, try to
1872                  * start a new one.  (If fail, we'll try again in future cycles of
1873                  * the main loop.)
1874                  */
1875                 if (pgstat_ispgstat(pid))
1876                 {
1877                         LogChildExit(LOG, gettext("statistics collector process"),
1878                                                  pid, exitstatus);
1879                         pgstat_start();
1880                         continue;
1881                 }
1882
1883                 /*
1884                  * Check if this child was a startup process.
1885                  */
1886                 if (StartupPID != 0 && pid == StartupPID)
1887                 {
1888                         StartupPID = 0;
1889                         if (exitstatus != 0)
1890                         {
1891                                 LogChildExit(LOG, gettext("startup process"),
1892                                                          pid, exitstatus);
1893                                 ereport(LOG,
1894                                                 (errmsg("aborting startup due to startup process failure")));
1895                                 ExitPostmaster(1);
1896                         }
1897
1898                         /*
1899                          * Startup succeeded - we are done with system startup or recovery.
1900                          */
1901                         FatalError = false;
1902
1903                         /*
1904                          * Crank up the background writer.  It doesn't matter if this
1905                          * fails, we'll just try again later.
1906                          */
1907                         Assert(BgWriterPID == 0);
1908                         BgWriterPID = StartBackgroundWriter();
1909
1910                         /*
1911                          * Go to shutdown mode if a shutdown request was pending.
1912                          */
1913                         if (Shutdown > NoShutdown && BgWriterPID != 0)
1914                                 kill(BgWriterPID, SIGUSR2);
1915
1916                         continue;
1917                 }
1918
1919                 /*
1920                  * Was it the bgwriter?
1921                  */
1922                 if (BgWriterPID != 0 && pid == BgWriterPID)
1923                 {
1924                         if (exitstatus == 0 && Shutdown > NoShutdown &&
1925                                 !FatalError && !DLGetHead(BackendList))
1926                         {
1927                                 /*
1928                                  * Normal postmaster exit is here: we've seen normal
1929                                  * exit of the bgwriter after it's been told to shut down.
1930                                  * We expect that it wrote a shutdown checkpoint.  (If
1931                                  * for some reason it didn't, recovery will occur on next
1932                                  * postmaster start.)
1933                                  */
1934                                 ExitPostmaster(0);
1935                         }
1936                         /*
1937                          * Any unexpected exit of the bgwriter is treated as a crash.
1938                          */
1939                         LogChildExit(DEBUG2, gettext("background writer process"),
1940                                                  pid, exitstatus);
1941                         HandleChildCrash(pid, exitstatus);
1942                         continue;
1943                 }
1944
1945                 /*
1946                  * Else do standard backend child cleanup.
1947                  */
1948                 CleanupProc(pid, exitstatus);
1949         }                                                       /* loop over pending child-death reports */
1950
1951         if (FatalError)
1952         {
1953                 /*
1954                  * Wait for all children exit, then reset shmem and
1955                  * StartupDataBase.
1956                  */
1957                 if (DLGetHead(BackendList) || StartupPID != 0 || BgWriterPID != 0)
1958                         goto reaper_done;
1959                 ereport(LOG,
1960                         (errmsg("all server processes terminated; reinitializing")));
1961
1962                 shmem_exit(0);
1963                 reset_shared(PostPortNumber);
1964
1965                 StartupPID = StartupDataBase();
1966
1967                 goto reaper_done;
1968         }
1969
1970         if (Shutdown > NoShutdown)
1971         {
1972                 if (DLGetHead(BackendList) || StartupPID != 0)
1973                         goto reaper_done;
1974                 /* Start the bgwriter if not running */
1975                 if (BgWriterPID == 0)
1976                         BgWriterPID = StartBackgroundWriter();
1977                 /* And tell it to shut down */
1978                 if (BgWriterPID != 0)
1979                         kill(BgWriterPID, SIGUSR2);
1980         }
1981
1982 reaper_done:
1983         PG_SETMASK(&UnBlockSig);
1984
1985         errno = save_errno;
1986 }
1987
1988
1989 /*
1990  * CleanupProc -- cleanup after terminated backend.
1991  *
1992  * Remove all local state associated with backend.
1993  */
1994 static void
1995 CleanupProc(int pid,
1996                         int exitstatus)         /* child's exit status. */
1997 {
1998         Dlelem     *curr;
1999
2000         LogChildExit(DEBUG2, gettext("server process"), pid, exitstatus);
2001
2002         /*
2003          * If a backend dies in an ugly way (i.e. exit status not 0) then we
2004          * must signal all other backends to quickdie.  If exit status is zero
2005          * we assume everything is hunky dory and simply remove the backend
2006          * from the active backend list.
2007          */
2008         if (exitstatus != 0)
2009         {
2010                 HandleChildCrash(pid, exitstatus);
2011                 return;
2012         }
2013
2014         for (curr = DLGetHead(BackendList); curr; curr = DLGetSucc(curr))
2015         {
2016                 Backend    *bp = (Backend *) DLE_VAL(curr);
2017
2018                 if (bp->pid == pid)
2019                 {
2020                         DLRemove(curr);
2021                         free(bp);
2022                         DLFreeElem(curr);
2023 #ifdef EXEC_BACKEND
2024                         ShmemBackendArrayRemove(pid);
2025 #endif
2026                         /* Tell the collector about backend termination */
2027                         pgstat_beterm(pid);
2028                         break;
2029                 }
2030         }
2031 }
2032
2033 /*
2034  * HandleChildCrash -- cleanup after failed backend or bgwriter.
2035  *
2036  * The objectives here are to clean up our local state about the child
2037  * process, and to signal all other remaining children to quickdie.
2038  */
2039 static void
2040 HandleChildCrash(int pid,
2041                                  int exitstatus) /* child's exit status. */
2042 {
2043         Dlelem     *curr,
2044                            *next;
2045         Backend    *bp;
2046
2047         /*
2048          * Make log entry unless there was a previous crash (if so, nonzero
2049          * exit status is to be expected in SIGQUIT response; don't clutter log)
2050          */
2051         if (!FatalError)
2052         {
2053                 LogChildExit(LOG,
2054                                          (pid == BgWriterPID) ?
2055                                          gettext("background writer process") :
2056                                          gettext("server process"),
2057                                          pid, exitstatus);
2058                 ereport(LOG,
2059                                 (errmsg("terminating any other active server processes")));
2060         }
2061
2062         /* Process regular backends */
2063         for (curr = DLGetHead(BackendList); curr; curr = next)
2064         {
2065                 next = DLGetSucc(curr);
2066                 bp = (Backend *) DLE_VAL(curr);
2067                 if (bp->pid == pid)
2068                 {
2069                         /*
2070                          * Found entry for freshly-dead backend, so remove it.
2071                          */
2072                         DLRemove(curr);
2073                         free(bp);
2074                         DLFreeElem(curr);
2075 #ifdef EXEC_BACKEND
2076                         ShmemBackendArrayRemove(pid);
2077 #endif
2078                         /* Tell the collector about backend termination */
2079                         pgstat_beterm(pid);
2080                         /* Keep looping so we can signal remaining backends */
2081                 }
2082                 else
2083                 {
2084                         /*
2085                          * This backend is still alive.  Unless we did so already,
2086                          * tell it to commit hara-kiri.
2087                          *
2088                          * SIGQUIT is the special signal that says exit without proc_exit
2089                          * and let the user know what's going on. But if SendStop is
2090                          * set (-s on command line), then we send SIGSTOP instead, so
2091                          * that we can get core dumps from all backends by hand.
2092                          */
2093                         if (!FatalError)
2094                         {
2095                                 ereport(DEBUG2,
2096                                                 (errmsg_internal("sending %s to process %d",
2097                                                                           (SendStop ? "SIGSTOP" : "SIGQUIT"),
2098                                                                                  (int) bp->pid)));
2099                                 kill(bp->pid, (SendStop ? SIGSTOP : SIGQUIT));
2100                         }
2101                 }
2102         }
2103
2104         /* Take care of the bgwriter too */
2105         if (pid == BgWriterPID)
2106                 BgWriterPID = 0;
2107         else if (BgWriterPID != 0 && !FatalError)
2108         {
2109                 ereport(DEBUG2,
2110                                 (errmsg_internal("sending %s to process %d",
2111                                                                  (SendStop ? "SIGSTOP" : "SIGQUIT"),
2112                                                                  (int) BgWriterPID)));
2113                 kill(BgWriterPID, (SendStop ? SIGSTOP : SIGQUIT));
2114         }
2115
2116         FatalError = true;
2117 }
2118
2119 /*
2120  * Log the death of a child process.
2121  */
2122 static void
2123 LogChildExit(int lev, const char *procname, int pid, int exitstatus)
2124 {
2125         if (WIFEXITED(exitstatus))
2126                 ereport(lev,
2127
2128                 /*
2129                  * translator: %s is a noun phrase describing a child process,
2130                  * such as "server process"
2131                  */
2132                                 (errmsg("%s (PID %d) exited with exit code %d",
2133                                                 procname, pid, WEXITSTATUS(exitstatus))));
2134         else if (WIFSIGNALED(exitstatus))
2135                 ereport(lev,
2136
2137                 /*
2138                  * translator: %s is a noun phrase describing a child process,
2139                  * such as "server process"
2140                  */
2141                                 (errmsg("%s (PID %d) was terminated by signal %d",
2142                                                 procname, pid, WTERMSIG(exitstatus))));
2143         else
2144                 ereport(lev,
2145
2146                 /*
2147                  * translator: %s is a noun phrase describing a child process,
2148                  * such as "server process"
2149                  */
2150                                 (errmsg("%s (PID %d) exited with unexpected status %d",
2151                                                 procname, pid, exitstatus)));
2152 }
2153
2154 /*
2155  * Send a signal to all backend children.
2156  */
2157 static void
2158 SignalChildren(int signal)
2159 {
2160         Dlelem     *curr;
2161
2162         for (curr = DLGetHead(BackendList); curr; curr = DLGetSucc(curr))
2163         {
2164                 Backend    *bp = (Backend *) DLE_VAL(curr);
2165
2166                 ereport(DEBUG4,
2167                                 (errmsg_internal("sending signal %d to process %d",
2168                                                                  signal, (int) bp->pid)));
2169                 kill(bp->pid, signal);
2170         }
2171 }
2172
2173 /*
2174  * BackendStartup -- start backend process
2175  *
2176  * returns: STATUS_ERROR if the fork failed, STATUS_OK otherwise.
2177  */
2178 static int
2179 BackendStartup(Port *port)
2180 {
2181         Backend    *bn;                         /* for backend cleanup */
2182         pid_t           pid;
2183
2184 #ifdef LINUX_PROFILE
2185         struct itimerval prof_itimer;
2186 #endif
2187
2188         /*
2189          * Compute the cancel key that will be assigned to this backend. The
2190          * backend will have its own copy in the forked-off process' value of
2191          * MyCancelKey, so that it can transmit the key to the frontend.
2192          */
2193         MyCancelKey = PostmasterRandom();
2194
2195         /*
2196          * Make room for backend data structure.  Better before the fork() so
2197          * we can handle failure cleanly.
2198          */
2199         bn = (Backend *) malloc(sizeof(Backend));
2200         if (!bn)
2201         {
2202                 ereport(LOG,
2203                                 (errcode(ERRCODE_OUT_OF_MEMORY),
2204                                  errmsg("out of memory")));
2205                 return STATUS_ERROR;
2206         }
2207
2208         /* Pass down canAcceptConnections state (kluge for EXEC_BACKEND case) */
2209         port->canAcceptConnections = canAcceptConnections();
2210
2211         /*
2212          * Flush stdio channels just before fork, to avoid double-output
2213          * problems. Ideally we'd use fflush(NULL) here, but there are still a
2214          * few non-ANSI stdio libraries out there (like SunOS 4.1.x) that
2215          * coredump if we do. Presently stdout and stderr are the only stdio
2216          * output channels used by the postmaster, so fflush'ing them should
2217          * be sufficient.
2218          */
2219         fflush(stdout);
2220         fflush(stderr);
2221
2222 #ifdef EXEC_BACKEND
2223
2224         pid = backend_forkexec(port);
2225
2226 #else /* !EXEC_BACKEND */
2227
2228 #ifdef LINUX_PROFILE
2229
2230         /*
2231          * Linux's fork() resets the profiling timer in the child process. If
2232          * we want to profile child processes then we need to save and restore
2233          * the timer setting.  This is a waste of time if not profiling,
2234          * however, so only do it if commanded by specific -DLINUX_PROFILE
2235          * switch.
2236          */
2237         getitimer(ITIMER_PROF, &prof_itimer);
2238 #endif
2239
2240 #ifdef __BEOS__
2241         /* Specific beos actions before backend startup */
2242         beos_before_backend_startup();
2243 #endif
2244
2245         pid = fork();
2246
2247         if (pid == 0)                           /* child */
2248         {
2249 #ifdef LINUX_PROFILE
2250                 setitimer(ITIMER_PROF, &prof_itimer, NULL);
2251 #endif
2252
2253 #ifdef __BEOS__
2254                 /* Specific beos backend startup actions */
2255                 beos_backend_startup();
2256 #endif
2257                 free(bn);
2258
2259                 proc_exit(BackendRun(port));
2260         }
2261
2262 #endif /* EXEC_BACKEND */
2263
2264         if (pid < 0)
2265         {
2266                 /* in parent, fork failed */
2267                 int                     save_errno = errno;
2268
2269 #ifdef __BEOS__
2270                 /* Specific beos backend startup actions */
2271                 beos_backend_startup_failed();
2272 #endif
2273                 free(bn);
2274                 errno = save_errno;
2275                 ereport(LOG,
2276                           (errmsg("could not fork new process for connection: %m")));
2277                 report_fork_failure_to_client(port, save_errno);
2278                 return STATUS_ERROR;
2279         }
2280
2281         /* in parent, successful fork */
2282         ereport(DEBUG2,
2283                         (errmsg_internal("forked new backend, pid=%d socket=%d",
2284                                                          (int) pid, port->sock)));
2285
2286         /*
2287          * Everything's been successful, it's safe to add this backend to our
2288          * list of backends.
2289          */
2290         bn->pid = pid;
2291         bn->cancel_key = MyCancelKey;
2292         DLAddHead(BackendList, DLNewElem(bn));
2293 #ifdef EXEC_BACKEND
2294         ShmemBackendArrayAdd(bn);
2295 #endif
2296
2297         return STATUS_OK;
2298 }
2299
2300 /*
2301  * Try to report backend fork() failure to client before we close the
2302  * connection.  Since we do not care to risk blocking the postmaster on
2303  * this connection, we set the connection to non-blocking and try only once.
2304  *
2305  * This is grungy special-purpose code; we cannot use backend libpq since
2306  * it's not up and running.
2307  */
2308 static void
2309 report_fork_failure_to_client(Port *port, int errnum)
2310 {
2311         char            buffer[1000];
2312
2313         /* Format the error message packet (always V2 protocol) */
2314         snprintf(buffer, sizeof(buffer), "E%s%s\n",
2315                          gettext("could not fork new process for connection: "),
2316                          strerror(errnum));
2317
2318         /* Set port to non-blocking.  Don't do send() if this fails */
2319         if (!set_noblock(port->sock))
2320                 return;
2321
2322         send(port->sock, buffer, strlen(buffer) + 1, 0);
2323 }
2324
2325
2326 /*
2327  * split_opts -- split a string of options and append it to an argv array
2328  *
2329  * NB: the string is destructively modified!
2330  *
2331  * Since no current POSTGRES arguments require any quoting characters,
2332  * we can use the simple-minded tactic of assuming each set of space-
2333  * delimited characters is a separate argv element.
2334  *
2335  * If you don't like that, well, we *used* to pass the whole option string
2336  * as ONE argument to execl(), which was even less intelligent...
2337  */
2338 static void
2339 split_opts(char **argv, int *argcp, char *s)
2340 {
2341         while (s && *s)
2342         {
2343                 while (isspace((unsigned char) *s))
2344                         ++s;
2345                 if (*s == '\0')
2346                         break;
2347                 argv[(*argcp)++] = s;
2348                 while (*s && !isspace((unsigned char) *s))
2349                         ++s;
2350                 if (*s)
2351                         *s++ = '\0';
2352         }
2353 }
2354
2355
2356 /*
2357  * BackendRun -- perform authentication, and if successful,
2358  *                              set up the backend's argument list and invoke PostgresMain()
2359  *
2360  * returns:
2361  *              Shouldn't return at all.
2362  *              If PostgresMain() fails, return status.
2363  */
2364 static int
2365 BackendRun(Port *port)
2366 {
2367         int                     status;
2368         struct timeval now;
2369         struct timezone tz;
2370         char            remote_host[NI_MAXHOST];
2371         char            remote_port[NI_MAXSERV];
2372         char            remote_ps_data[NI_MAXHOST];
2373         char      **av;
2374         int                     maxac;
2375         int                     ac;
2376         char            debugbuf[32];
2377         char            protobuf[32];
2378         int                     i;
2379
2380         IsUnderPostmaster = true;       /* we are a postmaster subprocess now */
2381
2382         /*
2383          * Let's clean up ourselves as the postmaster child, and close the
2384          * postmaster's listen sockets
2385          */
2386         ClosePostmasterPorts();
2387
2388         /* We don't want the postmaster's proc_exit() handlers */
2389         on_exit_reset();
2390
2391         /*
2392          * Signal handlers setting is moved to tcop/postgres...
2393          */
2394
2395         /* Save port etc. for ps status */
2396         MyProcPort = port;
2397
2398         /* Reset MyProcPid to new backend's pid */
2399         MyProcPid = getpid();
2400
2401         /*
2402          * PreAuthDelay is a debugging aid for investigating problems in the
2403          * authentication cycle: it can be set in postgresql.conf to allow
2404          * time to attach to the newly-forked backend with a debugger. (See
2405          * also the -W backend switch, which we allow clients to pass through
2406          * PGOPTIONS, but it is not honored until after authentication.)
2407          */
2408         if (PreAuthDelay > 0)
2409                 pg_usleep(PreAuthDelay * 1000000L);
2410
2411         ClientAuthInProgress = true;    /* limit visibility of log messages */
2412
2413         /* save start time for end of session reporting */
2414         gettimeofday(&(port->session_start), NULL);
2415
2416         /* set these to empty in case they are needed before we set them up */
2417         port->remote_host = "";
2418         port->remote_port = "";
2419         port->commandTag = "";
2420
2421         /*
2422          * Initialize libpq and enable reporting of ereport errors to the
2423          * client. Must do this now because authentication uses libpq to send
2424          * messages.
2425          */
2426         pq_init();                                      /* initialize libpq to talk to client */
2427         whereToSendOutput = Remote; /* now safe to ereport to client */
2428
2429         /*
2430          * We arrange for a simple exit(0) if we receive SIGTERM or SIGQUIT
2431          * during any client authentication related communication. Otherwise
2432          * the postmaster cannot shutdown the database FAST or IMMED cleanly
2433          * if a buggy client blocks a backend during authentication.
2434          */
2435         pqsignal(SIGTERM, authdie);
2436         pqsignal(SIGQUIT, authdie);
2437         pqsignal(SIGALRM, authdie);
2438         PG_SETMASK(&AuthBlockSig);
2439
2440         /*
2441          * Get the remote host name and port for logging and status display.
2442          */
2443         remote_host[0] = '\0';
2444         remote_port[0] = '\0';
2445         if (getnameinfo_all(&port->raddr.addr, port->raddr.salen,
2446                                                 remote_host, sizeof(remote_host),
2447                                                 remote_port, sizeof(remote_port),
2448                                    (log_hostname ? 0 : NI_NUMERICHOST) | NI_NUMERICSERV))
2449         {
2450                 int                     ret = getnameinfo_all(&port->raddr.addr, port->raddr.salen,
2451                                                                                 remote_host, sizeof(remote_host),
2452                                                                                 remote_port, sizeof(remote_port),
2453                                                                                 NI_NUMERICHOST | NI_NUMERICSERV);
2454
2455                 if (ret)
2456                         ereport(WARNING,
2457                                         (errmsg("getnameinfo_all() failed: %s",
2458                                                         gai_strerror(ret))));
2459         }
2460         snprintf(remote_ps_data, sizeof(remote_ps_data),
2461                          remote_port[0] == '\0' ? "%s" : "%s(%s)",
2462                          remote_host, remote_port);
2463
2464         if (Log_connections)
2465                 ereport(LOG,
2466                                 (errmsg("connection received: host=%s port=%s",
2467                                                 remote_host, remote_port)));
2468
2469         /*
2470          * save remote_host and remote_port in port stucture
2471          */
2472         port->remote_host = strdup(remote_host);
2473         port->remote_port = strdup(remote_port);
2474
2475         /*
2476          * In EXEC_BACKEND case, we didn't inherit the contents of pg_hba.c
2477          * etcetera from the postmaster, and have to load them ourselves.
2478          * Build the PostmasterContext (which didn't exist before, in this
2479          * process) to contain the data.
2480          *
2481          * FIXME: [fork/exec] Ugh.  Is there a way around this overhead?
2482          */
2483 #ifdef EXEC_BACKEND
2484         Assert(PostmasterContext == NULL);
2485         PostmasterContext = AllocSetContextCreate(TopMemoryContext,
2486                                                                                           "Postmaster",
2487                                                                                           ALLOCSET_DEFAULT_MINSIZE,
2488                                                                                           ALLOCSET_DEFAULT_INITSIZE,
2489                                                                                           ALLOCSET_DEFAULT_MAXSIZE);
2490         MemoryContextSwitchTo(PostmasterContext);
2491
2492         load_hba();
2493         load_ident();
2494         load_user();
2495         load_group();
2496 #endif
2497
2498         /*
2499          * Ready to begin client interaction.  We will give up and exit(0)
2500          * after a time delay, so that a broken client can't hog a connection
2501          * indefinitely.  PreAuthDelay doesn't count against the time limit.
2502          */
2503         if (!enable_sig_alarm(AuthenticationTimeout * 1000, false))
2504                 elog(FATAL, "could not set timer for authorization timeout");
2505
2506         /*
2507          * Receive the startup packet (which might turn out to be a cancel
2508          * request packet).
2509          */
2510         status = ProcessStartupPacket(port, false);
2511
2512         if (status != STATUS_OK)
2513                 proc_exit(0);
2514
2515         /*
2516          * Now that we have the user and database name, we can set the process
2517          * title for ps.  It's good to do this as early as possible in
2518          * startup.
2519          */
2520         init_ps_display(port->user_name, port->database_name, remote_ps_data);
2521         set_ps_display("authentication");
2522
2523         /*
2524          * Now perform authentication exchange.
2525          */
2526         ClientAuthentication(port); /* might not return, if failure */
2527
2528         /*
2529          * Done with authentication.  Disable timeout, and prevent
2530          * SIGTERM/SIGQUIT again until backend startup is complete.
2531          */
2532         if (!disable_sig_alarm(false))
2533                 elog(FATAL, "could not disable timer for authorization timeout");
2534         PG_SETMASK(&BlockSig);
2535
2536         if (Log_connections)
2537                 ereport(LOG,
2538                                 (errmsg("connection authorized: user=%s database=%s",
2539                                                 port->user_name, port->database_name)));
2540
2541         /*
2542          * Don't want backend to be able to see the postmaster random number
2543          * generator state.  We have to clobber the static random_seed *and*
2544          * start a new random sequence in the random() library function.
2545          */
2546         random_seed = 0;
2547         gettimeofday(&now, &tz);
2548         srandom((unsigned int) now.tv_usec);
2549
2550
2551         /* ----------------
2552          * Now, build the argv vector that will be given to PostgresMain.
2553          *
2554          * The layout of the command line is
2555          *              postgres [secure switches] -p databasename [insecure switches]
2556          * where the switches after -p come from the client request.
2557          *
2558          * The maximum possible number of commandline arguments that could come
2559          * from ExtraOptions or port->cmdline_options is (strlen + 1) / 2; see
2560          * split_opts().
2561          * ----------------
2562          */
2563         maxac = 10;                                     /* for fixed args supplied below */
2564         maxac += (strlen(ExtraOptions) + 1) / 2;
2565         if (port->cmdline_options)
2566                 maxac += (strlen(port->cmdline_options) + 1) / 2;
2567
2568         av = (char **) MemoryContextAlloc(TopMemoryContext,
2569                                                                           maxac * sizeof(char *));
2570         ac = 0;
2571
2572         av[ac++] = "postgres";
2573
2574         /*
2575          * Pass the requested debugging level along to the backend.
2576          */
2577         if (debug_flag > 0)
2578         {
2579                 snprintf(debugbuf, sizeof(debugbuf), "-d%d", debug_flag);
2580                 av[ac++] = debugbuf;
2581         }
2582
2583         /*
2584          * Pass any backend switches specified with -o in the postmaster's own
2585          * command line.  We assume these are secure.  (It's OK to mangle
2586          * ExtraOptions now, since we're safely inside a subprocess.)
2587          */
2588         split_opts(av, &ac, ExtraOptions);
2589
2590         /* Tell the backend what protocol the frontend is using. */
2591         snprintf(protobuf, sizeof(protobuf), "-v%u", port->proto);
2592         av[ac++] = protobuf;
2593
2594         /*
2595          * Tell the backend it is being called from the postmaster, and which
2596          * database to use.  -p marks the end of secure switches.
2597          */
2598         av[ac++] = "-p";
2599         av[ac++] = port->database_name;
2600
2601         /*
2602          * Pass the (insecure) option switches from the connection request.
2603          * (It's OK to mangle port->cmdline_options now.)
2604          */
2605         if (port->cmdline_options)
2606                 split_opts(av, &ac, port->cmdline_options);
2607
2608         av[ac] = NULL;
2609
2610         Assert(ac < maxac);
2611
2612         /*
2613          * Release postmaster's working memory context so that backend can
2614          * recycle the space.  Note this does not trash *MyProcPort, because
2615          * ConnCreate() allocated that space with malloc() ... else we'd need
2616          * to copy the Port data here.  Also, subsidiary data such as the
2617          * username isn't lost either; see ProcessStartupPacket().
2618          */
2619         MemoryContextSwitchTo(TopMemoryContext);
2620         MemoryContextDelete(PostmasterContext);
2621         PostmasterContext = NULL;
2622
2623         /*
2624          * Debug: print arguments being passed to backend
2625          */
2626         ereport(DEBUG3,
2627                         (errmsg_internal("%s child[%d]: starting with (",
2628                                                          progname, getpid())));
2629         for (i = 0; i < ac; ++i)
2630                 ereport(DEBUG3,
2631                                 (errmsg_internal("\t%s", av[i])));
2632         ereport(DEBUG3,
2633                         (errmsg_internal(")")));
2634
2635         ClientAuthInProgress = false;           /* client_min_messages is active
2636                                                                                  * now */
2637
2638         return (PostgresMain(ac, av, port->user_name));
2639 }
2640
2641
2642 #ifdef EXEC_BACKEND
2643
2644 /*
2645  * postmaster_forkexec -- fork and exec a postmaster subprocess
2646  *
2647  * The caller must have set up the argv array already, except for argv[2]
2648  * which will be filled with the name of the temp variable file.
2649  *
2650  * Returns the child process PID, or -1 on fork failure (a suitable error
2651  * message has been logged on failure).
2652  *
2653  * All uses of this routine will dispatch to SubPostmasterMain in the
2654  * child process.
2655  */
2656 pid_t
2657 postmaster_forkexec(int argc, char *argv[])
2658 {
2659         Port            port;
2660
2661         /* This entry point passes dummy values for the Port variables */
2662         memset(&port, 0, sizeof(port));
2663         return internal_forkexec(argc, argv, &port);
2664 }
2665
2666 /*
2667  * backend_forkexec -- fork/exec off a backend process
2668  *
2669  * returns the pid of the fork/exec'd process, or -1 on failure
2670  */
2671 static pid_t
2672 backend_forkexec(Port *port)
2673 {
2674         char       *av[4];
2675         int                     ac = 0;
2676
2677         av[ac++] = "postgres";
2678         av[ac++] = "-forkbackend";
2679         av[ac++] = NULL;                        /* filled in by internal_forkexec */
2680
2681         av[ac] = NULL;
2682         Assert(ac < lengthof(av));
2683
2684         return internal_forkexec(ac, av, port);
2685 }
2686
2687 static pid_t
2688 internal_forkexec(int argc, char *argv[], Port *port)
2689 {
2690         pid_t           pid;
2691         char            tmpfilename[MAXPGPATH];
2692
2693         if (!write_backend_variables(tmpfilename, port))
2694                 return -1;                              /* log made by write_backend_variables */
2695
2696         /* Make sure caller set up argv properly */
2697         Assert(argc >= 3);
2698         Assert(argv[argc] == NULL);
2699         Assert(strncmp(argv[1], "-fork", 5) == 0);
2700         Assert(argv[2] == NULL);
2701
2702         /* Insert temp file name after -fork argument */
2703         argv[2] = tmpfilename;
2704
2705 #ifdef WIN32
2706         pid = win32_forkexec(postgres_exec_path, argv);
2707 #else
2708         /* Fire off execv in child */
2709         if ((pid = fork()) == 0)
2710         {
2711                 if (execv(postgres_exec_path, argv) < 0)
2712                 {
2713                         ereport(LOG,
2714                                         (errmsg("could not exec backend process \"%s\": %m",
2715                                                         postgres_exec_path)));
2716                         /* We're already in the child process here, can't return */
2717                         exit(1);
2718                 }
2719         }
2720 #endif
2721
2722         return pid;                                     /* Parent returns pid, or -1 on fork failure */
2723 }
2724
2725 /*
2726  * SubPostmasterMain -- Get the fork/exec'd process into a state equivalent
2727  *                      to what it would be if we'd simply forked on Unix, and then
2728  *                      dispatch to the appropriate place.
2729  *
2730  * The first two command line arguments are expected to be "-forkFOO"
2731  * (where FOO indicates which postmaster child we are to become), and
2732  * the name of a variables file that we can read to load data that would
2733  * have been inherited by fork() on Unix.  Remaining arguments go to the
2734  * subprocess FooMain() routine.
2735  */
2736 int
2737 SubPostmasterMain(int argc, char *argv[])
2738 {
2739         Port            port;
2740
2741         /* Do this sooner rather than later... */
2742         IsUnderPostmaster = true;       /* we are a postmaster subprocess now */
2743
2744         MyProcPid = getpid();           /* reset MyProcPid */
2745
2746         /* In EXEC_BACKEND case we will not have inherited these settings */
2747         IsPostmasterEnvironment = true;
2748         whereToSendOutput = None;
2749         pqinitmask();
2750         PG_SETMASK(&BlockSig);
2751
2752         /* Setup essential subsystems */
2753         MemoryContextInit();
2754         InitializeGUCOptions();
2755
2756         /* Check we got appropriate args */
2757         if (argc < 3)
2758                 elog(FATAL, "invalid subpostmaster invocation");
2759
2760         /* Read in file-based context */
2761         memset(&port, 0, sizeof(Port));
2762         read_backend_variables(argv[2], &port);
2763         read_nondefault_variables();
2764
2765         /* Run backend or appropriate child */
2766         if (strcmp(argv[1], "-forkbackend") == 0)
2767         {
2768                 /* BackendRun will close sockets */
2769
2770                 /* Attach process to shared segments */
2771                 CreateSharedMemoryAndSemaphores(false, MaxBackends, 0);
2772
2773                 Assert(argc == 3);              /* shouldn't be any more args */
2774                 proc_exit(BackendRun(&port));
2775         }
2776         if (strcmp(argv[1], "-forkboot") == 0)
2777         {
2778                 /* Close the postmaster's sockets */
2779                 ClosePostmasterPorts();
2780
2781                 /* Attach process to shared segments */
2782                 CreateSharedMemoryAndSemaphores(false, MaxBackends, 0);
2783
2784                 BootstrapMain(argc - 2, argv + 2);
2785                 proc_exit(0);
2786         }
2787         if (strcmp(argv[1], "-forkbuf") == 0)
2788         {
2789                 /* Close the postmaster's sockets */
2790                 ClosePostmasterPorts();
2791
2792                 /* Do not want to attach to shared memory */
2793
2794                 PgstatBufferMain(argc, argv);
2795                 proc_exit(0);
2796         }
2797         if (strcmp(argv[1], "-forkcol") == 0)
2798         {
2799                 /*
2800                  * Do NOT close postmaster sockets here, because we are forking from
2801                  * pgstat buffer process, which already did it.
2802                  */
2803
2804                 /* Do not want to attach to shared memory */
2805
2806                 PgstatCollectorMain(argc, argv);
2807                 proc_exit(0);
2808         }
2809
2810         return 1;                                       /* shouldn't get here */
2811 }
2812
2813 #endif /* EXEC_BACKEND */
2814
2815
2816 /*
2817  * ExitPostmaster -- cleanup
2818  *
2819  * Do NOT call exit() directly --- always go through here!
2820  */
2821 static void
2822 ExitPostmaster(int status)
2823 {
2824         /* should cleanup shared memory and kill all backends */
2825
2826         /*
2827          * Not sure of the semantics here.      When the Postmaster dies, should
2828          * the backends all be killed? probably not.
2829          *
2830          * MUST         -- vadim 05-10-1999
2831          */
2832
2833         proc_exit(status);
2834 }
2835
2836 /*
2837  * sigusr1_handler - handle signal conditions from child processes
2838  */
2839 static void
2840 sigusr1_handler(SIGNAL_ARGS)
2841 {
2842         int                     save_errno = errno;
2843
2844         PG_SETMASK(&BlockSig);
2845
2846         if (CheckPostmasterSignal(PMSIGNAL_PASSWORD_CHANGE))
2847         {
2848                 /*
2849                  * Password or group file has changed.
2850                  */
2851                 load_user();
2852                 load_group();
2853         }
2854
2855         if (CheckPostmasterSignal(PMSIGNAL_WAKEN_CHILDREN))
2856         {
2857                 /*
2858                  * Send SIGUSR1 to all children (triggers
2859                  * CatchupInterruptHandler). See storage/ipc/sinval[adt].c for the
2860                  * use of this.
2861                  */
2862                 if (Shutdown <= SmartShutdown)
2863                         SignalChildren(SIGUSR1);
2864         }
2865
2866         PG_SETMASK(&UnBlockSig);
2867
2868         errno = save_errno;
2869 }
2870
2871
2872 /*
2873  * Dummy signal handler
2874  *
2875  * We use this for signals that we don't actually use in the postmaster,
2876  * but we do use in backends.  If we were to SIG_IGN such signals in the
2877  * postmaster, then a newly started backend might drop a signal that arrives
2878  * before it's able to reconfigure its signal processing.  (See notes in
2879  * tcop/postgres.c.)
2880  */
2881 static void
2882 dummy_handler(SIGNAL_ARGS)
2883 {
2884 }
2885
2886
2887 /*
2888  * CharRemap: given an int in range 0..61, produce textual encoding of it
2889  * per crypt(3) conventions.
2890  */
2891 static char
2892 CharRemap(long ch)
2893 {
2894         if (ch < 0)
2895                 ch = -ch;
2896         ch = ch % 62;
2897
2898         if (ch < 26)
2899                 return 'A' + ch;
2900
2901         ch -= 26;
2902         if (ch < 26)
2903                 return 'a' + ch;
2904
2905         ch -= 26;
2906         return '0' + ch;
2907 }
2908
2909 /*
2910  * RandomSalt
2911  */
2912 static void
2913 RandomSalt(char *cryptSalt, char *md5Salt)
2914 {
2915         long            rand = PostmasterRandom();
2916
2917         cryptSalt[0] = CharRemap(rand % 62);
2918         cryptSalt[1] = CharRemap(rand / 62);
2919
2920         /*
2921          * It's okay to reuse the first random value for one of the MD5 salt
2922          * bytes, since only one of the two salts will be sent to the client.
2923          * After that we need to compute more random bits.
2924          *
2925          * We use % 255, sacrificing one possible byte value, so as to ensure
2926          * that all bits of the random() value participate in the result.
2927          * While at it, add one to avoid generating any null bytes.
2928          */
2929         md5Salt[0] = (rand % 255) + 1;
2930         rand = PostmasterRandom();
2931         md5Salt[1] = (rand % 255) + 1;
2932         rand = PostmasterRandom();
2933         md5Salt[2] = (rand % 255) + 1;
2934         rand = PostmasterRandom();
2935         md5Salt[3] = (rand % 255) + 1;
2936 }
2937
2938 /*
2939  * PostmasterRandom
2940  */
2941 static long
2942 PostmasterRandom(void)
2943 {
2944         static bool initialized = false;
2945
2946         if (!initialized)
2947         {
2948                 Assert(random_seed != 0);
2949                 srandom(random_seed);
2950                 initialized = true;
2951         }
2952
2953         return random();
2954 }
2955
2956 /*
2957  * Count up number of child processes.
2958  */
2959 static int
2960 CountChildren(void)
2961 {
2962         Dlelem     *curr;
2963         int                     cnt = 0;
2964
2965         for (curr = DLGetHead(BackendList); curr; curr = DLGetSucc(curr))
2966         {
2967                 cnt++;
2968         }
2969         return cnt;
2970 }
2971
2972
2973 /*
2974  * StartChildProcess -- start a non-backend child process for the postmaster
2975  *
2976  * xlog determines what kind of child will be started.  All child types
2977  * initially go to BootstrapMain, which will handle common setup.
2978  *
2979  * Return value of StartChildProcess is subprocess' PID, or 0 if failed
2980  * to start subprocess.
2981  */
2982 static pid_t
2983 StartChildProcess(int xlop)
2984 {
2985         pid_t           pid;
2986         char       *av[10];
2987         int                     ac = 0;
2988         char            xlbuf[32];
2989 #ifdef LINUX_PROFILE
2990         struct itimerval prof_itimer;
2991 #endif
2992
2993         /*
2994          * Set up command-line arguments for subprocess
2995          */
2996         av[ac++] = "postgres";
2997
2998 #ifdef EXEC_BACKEND
2999         av[ac++] = "-forkboot";
3000         av[ac++] = NULL;                        /* filled in by postmaster_forkexec */
3001 #endif
3002
3003         snprintf(xlbuf, sizeof(xlbuf), "-x%d", xlop);
3004         av[ac++] = xlbuf;
3005
3006         av[ac++] = "-p";
3007         av[ac++] = "template1";
3008
3009         av[ac] = NULL;
3010         Assert(ac < lengthof(av));
3011
3012         /*
3013          * Flush stdio channels (see comments in BackendStartup)
3014          */
3015         fflush(stdout);
3016         fflush(stderr);
3017
3018 #ifdef EXEC_BACKEND
3019
3020         pid = postmaster_forkexec(ac, av);
3021
3022 #else /* !EXEC_BACKEND */
3023
3024 #ifdef LINUX_PROFILE
3025         /* see comments in BackendStartup */
3026         getitimer(ITIMER_PROF, &prof_itimer);
3027 #endif
3028
3029 #ifdef __BEOS__
3030         /* Specific beos actions before backend startup */
3031         beos_before_backend_startup();
3032 #endif
3033
3034         pid = fork();
3035
3036         if (pid == 0)                           /* child */
3037         {
3038 #ifdef LINUX_PROFILE
3039                 setitimer(ITIMER_PROF, &prof_itimer, NULL);
3040 #endif
3041
3042 #ifdef __BEOS__
3043                 /* Specific beos actions after backend startup */
3044                 beos_backend_startup();
3045 #endif
3046
3047                 IsUnderPostmaster = true;       /* we are a postmaster subprocess now */
3048
3049                 /* Close the postmaster's sockets */
3050                 ClosePostmasterPorts();
3051
3052                 /* Lose the postmaster's on-exit routines and port connections */
3053                 on_exit_reset();
3054
3055                 BootstrapMain(ac, av);
3056                 ExitPostmaster(0);
3057         }
3058
3059 #endif /* EXEC_BACKEND */
3060
3061         if (pid < 0)
3062         {
3063                 /* in parent, fork failed */
3064                 int                     save_errno = errno;
3065
3066 #ifdef __BEOS__
3067                 /* Specific beos actions before backend startup */
3068                 beos_backend_startup_failed();
3069 #endif
3070                 errno = save_errno;
3071                 switch (xlop)
3072                 {
3073                         case BS_XLOG_STARTUP:
3074                                 ereport(LOG,
3075                                                 (errmsg("could not fork startup process: %m")));
3076                                 break;
3077                         case BS_XLOG_BGWRITER:
3078                                 ereport(LOG,
3079                                                 (errmsg("could not fork background writer process: %m")));
3080                                 break;
3081                         default:
3082                                 ereport(LOG,
3083                                                 (errmsg("could not fork process: %m")));
3084                                 break;
3085                 }
3086
3087                 /*
3088                  * fork failure is fatal during startup, but there's no need
3089                  * to choke immediately if starting other child types fails.
3090                  */
3091                 if (xlop == BS_XLOG_STARTUP)
3092                         ExitPostmaster(1);
3093                 return 0;
3094         }
3095
3096         /*
3097          * in parent, successful fork
3098          */
3099         return pid;
3100 }
3101
3102
3103 /*
3104  * Create the opts file
3105  */
3106 static bool
3107 CreateOptsFile(int argc, char *argv[], char *fullprogname)
3108 {
3109         char            filename[MAXPGPATH];
3110         FILE       *fp;
3111         int                     i;
3112
3113         snprintf(filename, sizeof(filename), "%s/postmaster.opts", DataDir);
3114
3115         if ((fp = fopen(filename, "w")) == NULL)
3116         {
3117                 elog(LOG, "could not create file \"%s\": %m", filename);
3118                 return false;
3119         }
3120
3121         fprintf(fp, "%s", fullprogname);
3122         for (i = 1; i < argc; i++)
3123                 fprintf(fp, " '%s'", argv[i]);
3124         fputs("\n", fp);
3125
3126         if (fclose(fp))
3127         {
3128                 elog(LOG, "could not write file \"%s\": %m", filename);
3129                 return false;
3130         }
3131
3132         return true;
3133 }
3134
3135 /*
3136  * This should be used only for reporting "interactive" errors (essentially,
3137  * bogus arguments on the command line).  Once the postmaster is launched,
3138  * use ereport.  In particular, don't use this for anything that occurs
3139  * after pmdaemonize.
3140  */
3141 static void
3142 postmaster_error(const char *fmt,...)
3143 {
3144         va_list         ap;
3145
3146         fprintf(stderr, "%s: ", progname);
3147         va_start(ap, fmt);
3148         vfprintf(stderr, gettext(fmt), ap);
3149         va_end(ap);
3150         fprintf(stderr, "\n");
3151 }
3152
3153
3154 #ifdef EXEC_BACKEND
3155
3156 /*
3157  * The following need to be available to the read/write_backend_variables
3158  * functions
3159  */
3160 #include "storage/spin.h"
3161
3162 extern slock_t *ShmemLock;
3163 extern slock_t *ShmemIndexLock;
3164 extern void *ShmemIndexAlloc;
3165 typedef struct LWLock LWLock;
3166 extern LWLock *LWLockArray;
3167 extern slock_t *ProcStructLock;
3168 extern int      pgStatSock;
3169
3170 #define write_var(var,fp) fwrite((void*)&(var),sizeof(var),1,fp)
3171 #define read_var(var,fp)  fread((void*)&(var),sizeof(var),1,fp)
3172 #define write_array_var(var,fp) fwrite((void*)(var),sizeof(var),1,fp)
3173 #define read_array_var(var,fp)  fread((void*)(var),sizeof(var),1,fp)
3174
3175 static bool
3176 write_backend_variables(char *filename, Port *port)
3177 {
3178         static unsigned long tmpBackendFileNum = 0;
3179         FILE       *fp;
3180         char            str_buf[MAXPGPATH];
3181
3182         /* Calculate name for temp file in caller's buffer */
3183         Assert(DataDir);
3184         snprintf(filename, MAXPGPATH, "%s/%s/%s.backend_var.%d.%lu",
3185                          DataDir, PG_TEMP_FILES_DIR, PG_TEMP_FILE_PREFIX,
3186                          MyProcPid, ++tmpBackendFileNum);
3187
3188         /* Open file */
3189         fp = AllocateFile(filename, PG_BINARY_W);
3190         if (!fp)
3191         {
3192                 /* As per OpenTemporaryFile... */
3193                 char            dirname[MAXPGPATH];
3194
3195                 snprintf(dirname, MAXPGPATH, "%s/%s", DataDir, PG_TEMP_FILES_DIR);
3196                 mkdir(dirname, S_IRWXU);
3197
3198                 fp = AllocateFile(filename, PG_BINARY_W);
3199                 if (!fp)
3200                 {
3201                         ereport(LOG,
3202                                         (errcode_for_file_access(),
3203                                          errmsg("could not create file \"%s\": %m",
3204                                                         filename)));
3205                         return false;
3206                 }
3207         }
3208
3209         /* Write vars */
3210         write_var(port->sock, fp);
3211         write_var(port->proto, fp);
3212         write_var(port->laddr, fp);
3213         write_var(port->raddr, fp);
3214         write_var(port->canAcceptConnections, fp);
3215         write_var(port->cryptSalt, fp);
3216         write_var(port->md5Salt, fp);
3217
3218         /*
3219          * XXX FIXME later: writing these strings as MAXPGPATH bytes always is
3220          * probably a waste of resources
3221          */
3222
3223         StrNCpy(str_buf, DataDir, MAXPGPATH);
3224         write_array_var(str_buf, fp);
3225
3226         write_array_var(ListenSocket, fp);
3227
3228         write_var(MyCancelKey, fp);
3229
3230         write_var(UsedShmemSegID, fp);
3231         write_var(UsedShmemSegAddr, fp);
3232
3233         write_var(ShmemLock, fp);
3234         write_var(ShmemIndexLock, fp);
3235         write_var(ShmemVariableCache, fp);
3236         write_var(ShmemIndexAlloc, fp);
3237         write_var(ShmemBackendArray, fp);
3238
3239         write_var(LWLockArray, fp);
3240         write_var(ProcStructLock, fp);
3241         write_var(pgStatSock, fp);
3242
3243         write_var(debug_flag, fp);
3244         write_var(PostmasterPid, fp);
3245 #ifdef WIN32
3246         write_var(PostmasterHandle, fp);
3247 #endif
3248
3249         StrNCpy(str_buf, my_exec_path, MAXPGPATH);
3250         write_array_var(str_buf, fp);
3251
3252         write_array_var(ExtraOptions, fp);
3253
3254         StrNCpy(str_buf, setlocale(LC_COLLATE, NULL), MAXPGPATH);
3255         write_array_var(str_buf, fp);
3256         StrNCpy(str_buf, setlocale(LC_CTYPE, NULL), MAXPGPATH);
3257         write_array_var(str_buf, fp);
3258
3259         /* Release file */
3260         if (FreeFile(fp))
3261         {
3262                 ereport(ERROR,
3263                                 (errcode_for_file_access(),
3264                                  errmsg("could not write to file \"%s\": %m", filename)));
3265                 return false;
3266         }
3267
3268         return true;
3269 }
3270
3271 static void
3272 read_backend_variables(char *filename, Port *port)
3273 {
3274         FILE       *fp;
3275         char            str_buf[MAXPGPATH];
3276
3277         /* Open file */
3278         fp = AllocateFile(filename, PG_BINARY_R);
3279         if (!fp)
3280                 ereport(FATAL,
3281                                 (errcode_for_file_access(),
3282                                  errmsg("could not read from backend variables file \"%s\": %m",
3283                                                 filename)));
3284
3285         /* Read vars */
3286         read_var(port->sock, fp);
3287         read_var(port->proto, fp);
3288         read_var(port->laddr, fp);
3289         read_var(port->raddr, fp);
3290         read_var(port->canAcceptConnections, fp);
3291         read_var(port->cryptSalt, fp);
3292         read_var(port->md5Salt, fp);
3293
3294         read_array_var(str_buf, fp);
3295         SetDataDir(str_buf);
3296
3297         read_array_var(ListenSocket, fp);
3298
3299         read_var(MyCancelKey, fp);
3300
3301         read_var(UsedShmemSegID, fp);
3302         read_var(UsedShmemSegAddr, fp);
3303
3304         read_var(ShmemLock, fp);
3305         read_var(ShmemIndexLock, fp);
3306         read_var(ShmemVariableCache, fp);
3307         read_var(ShmemIndexAlloc, fp);
3308         read_var(ShmemBackendArray, fp);
3309
3310         read_var(LWLockArray, fp);
3311         read_var(ProcStructLock, fp);
3312         read_var(pgStatSock, fp);
3313
3314         read_var(debug_flag, fp);
3315         read_var(PostmasterPid, fp);
3316 #ifdef WIN32
3317         read_var(PostmasterHandle, fp);
3318 #endif
3319
3320         read_array_var(str_buf, fp);
3321         StrNCpy(my_exec_path, str_buf, MAXPGPATH);
3322
3323         read_array_var(ExtraOptions, fp);
3324
3325         read_array_var(str_buf, fp);
3326         setlocale(LC_COLLATE, str_buf);
3327         read_array_var(str_buf, fp);
3328         setlocale(LC_CTYPE, str_buf);
3329
3330         /* Release file */
3331         FreeFile(fp);
3332         if (unlink(filename) != 0)
3333                 ereport(WARNING,
3334                                 (errcode_for_file_access(),
3335                                  errmsg("could not remove file \"%s\": %m", filename)));
3336 }
3337
3338
3339 size_t
3340 ShmemBackendArraySize(void)
3341 {
3342         return (NUM_BACKENDARRAY_ELEMS * sizeof(Backend));
3343 }
3344
3345 void
3346 ShmemBackendArrayAllocation(void)
3347 {
3348         size_t          size = ShmemBackendArraySize();
3349
3350         ShmemBackendArray = (Backend *) ShmemAlloc(size);
3351         /* Mark all slots as empty */
3352         memset(ShmemBackendArray, 0, size);
3353 }
3354
3355 static void
3356 ShmemBackendArrayAdd(Backend *bn)
3357 {
3358         int                     i;
3359
3360         /* Find an empty slot */
3361         for (i = 0; i < NUM_BACKENDARRAY_ELEMS; i++)
3362         {
3363                 if (ShmemBackendArray[i].pid == 0)
3364                 {
3365                         ShmemBackendArray[i] = *bn;
3366                         return;
3367                 }
3368         }
3369
3370         ereport(FATAL,
3371                         (errmsg_internal("no free slots in shmem backend array")));
3372 }
3373
3374 static void
3375 ShmemBackendArrayRemove(pid_t pid)
3376 {
3377         int                     i;
3378
3379         for (i = 0; i < NUM_BACKENDARRAY_ELEMS; i++)
3380         {
3381                 if (ShmemBackendArray[i].pid == pid)
3382                 {
3383                         /* Mark the slot as empty */
3384                         ShmemBackendArray[i].pid = 0;
3385                         return;
3386                 }
3387         }
3388
3389         ereport(WARNING,
3390                         (errmsg_internal("could not find backend entry with pid %d",
3391                                                          (int) pid)));
3392 }
3393
3394 #endif /* EXEC_BACKEND */
3395
3396
3397 #ifdef WIN32
3398
3399 static pid_t
3400 win32_forkexec(const char *path, char *argv[])
3401 {
3402         STARTUPINFO si;
3403         PROCESS_INFORMATION pi;
3404         int                     i;
3405         int                     j;
3406         char            cmdLine[MAXPGPATH * 2];
3407         HANDLE          childHandleCopy;
3408         HANDLE          waiterThread;
3409
3410         /* Format the cmd line */
3411         cmdLine[sizeof(cmdLine)-1] = '\0';
3412         cmdLine[sizeof(cmdLine)-2] = '\0';
3413         snprintf(cmdLine, sizeof(cmdLine)-1, "\"%s\"", path);
3414         i = 0;
3415         while (argv[++i] != NULL)
3416         {
3417                 j = strlen(cmdLine);
3418                 snprintf(cmdLine+j, sizeof(cmdLine)-1-j, " \"%s\"", argv[i]);
3419         }
3420         if (cmdLine[sizeof(cmdLine)-2] != '\0')
3421         {
3422                 elog(LOG, "subprocess command line too long");
3423                 return -1;
3424         }
3425
3426         memset(&pi, 0, sizeof(pi));
3427         memset(&si, 0, sizeof(si));
3428         si.cb = sizeof(si);
3429         if (!CreateProcess(NULL, cmdLine, NULL, NULL, TRUE, 0, NULL, NULL, &si, &pi))
3430         {
3431                 elog(LOG, "CreateProcess call failed (%d): %m", (int) GetLastError());
3432                 return -1;
3433         }
3434
3435         if (!IsUnderPostmaster)
3436         {
3437                 /* We are the Postmaster creating a child... */
3438                 win32_AddChild(pi.dwProcessId, pi.hProcess);
3439         }
3440
3441         if (DuplicateHandle(GetCurrentProcess(),
3442                                                 pi.hProcess,
3443                                                 GetCurrentProcess(),
3444                                                 &childHandleCopy,
3445                                                 0,
3446                                                 FALSE,
3447                                                 DUPLICATE_SAME_ACCESS) == 0)
3448                 ereport(FATAL,
3449                                 (errmsg_internal("could not duplicate child handle: %d",
3450                                                                  (int) GetLastError())));
3451
3452         waiterThread = CreateThread(NULL, 64 * 1024, win32_sigchld_waiter,
3453                                                                 (LPVOID) childHandleCopy, 0, NULL);
3454         if (!waiterThread)
3455                 ereport(FATAL,
3456                                 (errmsg_internal("could not create sigchld waiter thread: %d",
3457                                                                  (int) GetLastError())));
3458         CloseHandle(waiterThread);
3459
3460         if (IsUnderPostmaster)
3461                 CloseHandle(pi.hProcess);
3462         CloseHandle(pi.hThread);
3463
3464         return pi.dwProcessId;
3465 }
3466
3467 /*
3468  * Note: The following three functions must not be interrupted (eg. by
3469  * signals).  As the Postgres Win32 signalling architecture (currently)
3470  * requires polling, or APC checking functions which aren't used here, this
3471  * is not an issue.
3472  *
3473  * We keep two separate arrays, instead of a single array of pid/HANDLE
3474  * structs, to avoid having to re-create a handle array for
3475  * WaitForMultipleObjects on each call to win32_waitpid.
3476  */
3477
3478 static void
3479 win32_AddChild(pid_t pid, HANDLE handle)
3480 {
3481         Assert(win32_childPIDArray && win32_childHNDArray);
3482         if (win32_numChildren < NUM_BACKENDARRAY_ELEMS)
3483         {
3484                 win32_childPIDArray[win32_numChildren] = pid;
3485                 win32_childHNDArray[win32_numChildren] = handle;
3486                 ++win32_numChildren;
3487         }
3488         else
3489                 ereport(FATAL,
3490                                 (errmsg_internal("no room for child entry with pid %lu",
3491                                                                  (unsigned long) pid)));
3492 }
3493
3494 static void
3495 win32_RemoveChild(pid_t pid)
3496 {
3497         int                     i;
3498
3499         Assert(win32_childPIDArray && win32_childHNDArray);
3500
3501         for (i = 0; i < win32_numChildren; i++)
3502         {
3503                 if (win32_childPIDArray[i] == pid)
3504                 {
3505                         CloseHandle(win32_childHNDArray[i]);
3506
3507                         /* Swap last entry into the "removed" one */
3508                         --win32_numChildren;
3509                         win32_childPIDArray[i] = win32_childPIDArray[win32_numChildren];
3510                         win32_childHNDArray[i] = win32_childHNDArray[win32_numChildren];
3511                         return;
3512                 }
3513         }
3514
3515         ereport(WARNING,
3516                         (errmsg_internal("could not find child entry with pid %lu",
3517                                                          (unsigned long) pid)));
3518 }
3519
3520 static pid_t
3521 win32_waitpid(int *exitstatus)
3522 {
3523         Assert(win32_childPIDArray && win32_childHNDArray);
3524         elog(DEBUG3, "waiting on %lu children", win32_numChildren);
3525
3526         if (win32_numChildren > 0)
3527         {
3528                 /*
3529                  * Note: Do NOT use WaitForMultipleObjectsEx, as we don't want to
3530                  * run queued APCs here.
3531                  */
3532                 int                     index;
3533                 DWORD           exitCode;
3534                 DWORD           ret;
3535
3536                 ret = WaitForMultipleObjects(win32_numChildren, win32_childHNDArray,
3537                                                                          FALSE, 0);
3538                 switch (ret)
3539                 {
3540                         case WAIT_FAILED:
3541                                 ereport(LOG,
3542                                    (errmsg_internal("failed to wait on %lu children: %d",
3543                                                           win32_numChildren, (int) GetLastError())));
3544                                 return -1;
3545
3546                         case WAIT_TIMEOUT:
3547                                 /* No children have finished */
3548                                 return -1;
3549
3550                         default:
3551
3552                                 /*
3553                                  * Get the exit code, and return the PID of, the
3554                                  * respective process
3555                                  */
3556                                 index = ret - WAIT_OBJECT_0;
3557                                 Assert(index >= 0 && index < win32_numChildren);
3558                                 if (!GetExitCodeProcess(win32_childHNDArray[index], &exitCode))
3559                                 {
3560                                         /*
3561                                          * If we get this far, this should never happen, but,
3562                                          * then again... No choice other than to assume a
3563                                          * catastrophic failure.
3564                                          */
3565                                         ereport(FATAL,
3566                                                         (errmsg_internal("failed to get exit code for child %lu",
3567                                                                                    win32_childPIDArray[index])));
3568                                 }
3569                                 *exitstatus = (int) exitCode;
3570                                 return win32_childPIDArray[index];
3571                 }
3572         }
3573
3574         /* No children */
3575         return -1;
3576 }
3577
3578 /*
3579  * Note! Code below executes on separate threads, one for
3580  * each child process created
3581  */
3582 static DWORD WINAPI
3583 win32_sigchld_waiter(LPVOID param)
3584 {
3585         HANDLE          procHandle = (HANDLE) param;
3586
3587         DWORD           r = WaitForSingleObject(procHandle, INFINITE);
3588
3589         if (r == WAIT_OBJECT_0)
3590                 pg_queue_signal(SIGCHLD);
3591         else
3592                 fprintf(stderr, "ERROR: failed to wait on child process handle: %d\n",
3593                                 (int) GetLastError());
3594         CloseHandle(procHandle);
3595         return 0;
3596 }
3597
3598 #endif /* WIN32 */