]> granicus.if.org Git - postgresql/blob - src/backend/postmaster/postmaster.c
Separate out bgwriter code into a logically separate module, rather
[postgresql] / src / backend / postmaster / postmaster.c
1 /*-------------------------------------------------------------------------
2  *
3  * postmaster.c
4  *        This program acts as a clearing house for requests to the
5  *        POSTGRES system.      Frontend programs send a startup message
6  *        to the Postmaster and the postmaster uses the info in the
7  *        message to setup a backend process.
8  *
9  *        The postmaster also manages system-wide operations such as
10  *        startup and shutdown. The postmaster itself doesn't do those
11  *        operations, mind you --- it just forks off a subprocess to do them
12  *        at the right times.  It also takes care of resetting the system
13  *        if a backend crashes.
14  *
15  *        The postmaster process creates the shared memory and semaphore
16  *        pools during startup, but as a rule does not touch them itself.
17  *        In particular, it is not a member of the PGPROC array of backends
18  *        and so it cannot participate in lock-manager operations.      Keeping
19  *        the postmaster away from shared memory operations makes it simpler
20  *        and more reliable.  The postmaster is almost always able to recover
21  *        from crashes of individual backends by resetting shared memory;
22  *        if it did much with shared memory then it would be prone to crashing
23  *        along with the backends.
24  *
25  *        When a request message is received, we now fork() immediately.
26  *        The child process performs authentication of the request, and
27  *        then becomes a backend if successful.  This allows the auth code
28  *        to be written in a simple single-threaded style (as opposed to the
29  *        crufty "poor man's multitasking" code that used to be needed).
30  *        More importantly, it ensures that blockages in non-multithreaded
31  *        libraries like SSL or PAM cannot cause denial of service to other
32  *        clients.
33  *
34  *
35  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
36  * Portions Copyright (c) 1994, Regents of the University of California
37  *
38  *
39  * IDENTIFICATION
40  *        $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.400 2004/05/29 22:48:19 tgl Exp $
41  *
42  * NOTES
43  *
44  * Initialization:
45  *              The Postmaster sets up shared memory data structures
46  *              for the backends.
47  *
48  * Synchronization:
49  *              The Postmaster shares memory with the backends but should avoid
50  *              touching shared memory, so as not to become stuck if a crashing
51  *              backend screws up locks or shared memory.  Likewise, the Postmaster
52  *              should never block on messages from frontend clients.
53  *
54  * Garbage Collection:
55  *              The Postmaster cleans up after backends if they have an emergency
56  *              exit and/or core dump.
57  *
58  *-------------------------------------------------------------------------
59  */
60
61 #include "postgres.h"
62
63 #include <unistd.h>
64 #include <signal.h>
65 #include <sys/wait.h>
66 #include <ctype.h>
67 #include <sys/stat.h>
68 #include <sys/socket.h>
69 #include <errno.h>
70 #include <fcntl.h>
71 #include <sys/param.h>
72 #include <netinet/in.h>
73 #include <arpa/inet.h>
74 #include <netdb.h>
75 #include <limits.h>
76
77 #ifdef HAVE_SYS_SELECT_H
78 #include <sys/select.h>
79 #endif
80
81 #ifdef HAVE_GETOPT_H
82 #include <getopt.h>
83 #endif
84
85 #ifdef USE_RENDEZVOUS
86 #include <DNSServiceDiscovery/DNSServiceDiscovery.h>
87 #endif
88
89 #include "catalog/pg_database.h"
90 #include "commands/async.h"
91 #include "lib/dllist.h"
92 #include "libpq/auth.h"
93 #include "libpq/crypt.h"
94 #include "libpq/libpq.h"
95 #include "libpq/pqcomm.h"
96 #include "libpq/pqsignal.h"
97 #include "miscadmin.h"
98 #include "nodes/nodes.h"
99 #include "postmaster/postmaster.h"
100 #include "pgtime.h"
101 #include "storage/fd.h"
102 #include "storage/ipc.h"
103 #include "storage/pg_shmem.h"
104 #include "storage/pmsignal.h"
105 #include "storage/proc.h"
106 #include "storage/bufmgr.h"
107 #include "access/xlog.h"
108 #include "tcop/tcopprot.h"
109 #include "utils/guc.h"
110 #include "utils/memutils.h"
111 #include "utils/ps_status.h"
112 #include "bootstrap/bootstrap.h"
113 #include "pgstat.h"
114
115
116 /*
117  * List of active backends (or child processes anyway; we don't actually
118  * know whether a given child has become a backend or is still in the
119  * authorization phase).  This is used mainly to keep track of how many
120  * children we have and send them appropriate signals when necessary.
121  *
122  * "Special" children such as the startup and bgwriter tasks are not in
123  * this list.
124  */
125 typedef struct bkend
126 {
127         pid_t           pid;                    /* process id of backend */
128         long            cancel_key;             /* cancel key for cancels for this backend */
129 } Backend;
130
131 static Dllist *BackendList;
132
133 #ifdef EXEC_BACKEND
134 #define NUM_BACKENDARRAY_ELEMS (2*MaxBackends)
135 static Backend *ShmemBackendArray;
136 #endif
137
138 /* The socket number we are listening for connections on */
139 int                     PostPortNumber;
140 char       *UnixSocketDir;
141 char       *ListenAddresses;
142
143 /*
144  * ReservedBackends is the number of backends reserved for superuser use.
145  * This number is taken out of the pool size given by MaxBackends so
146  * number of backend slots available to non-superusers is
147  * (MaxBackends - ReservedBackends).  Note what this really means is
148  * "if there are <= ReservedBackends connections available, only superusers
149  * can make new connections" --- pre-existing superuser connections don't
150  * count against the limit.
151  */
152 int                     ReservedBackends;
153
154
155 static const char *progname = NULL;
156
157 /* The socket(s) we're listening to. */
158 #define MAXLISTEN       10
159 static int      ListenSocket[MAXLISTEN];
160
161 /*
162  * Set by the -o option
163  */
164 static char ExtraOptions[MAXPGPATH];
165
166 /*
167  * These globals control the behavior of the postmaster in case some
168  * backend dumps core.  Normally, it kills all peers of the dead backend
169  * and reinitializes shared memory.  By specifying -s or -n, we can have
170  * the postmaster stop (rather than kill) peers and not reinitialize
171  * shared data structures.
172  */
173 static bool Reinit = true;
174 static int      SendStop = false;
175
176 /* still more option variables */
177 bool            EnableSSL = false;
178 bool            SilentMode = false; /* silent mode (-S) */
179
180 int                     PreAuthDelay = 0;
181 int                     AuthenticationTimeout = 60;
182
183 bool            log_hostname;           /* for ps display and logging */
184 bool            Log_connections = false;
185 bool            Db_user_namespace = false;
186
187 char       *rendezvous_name;
188
189 /* list of library:init-function to be preloaded */
190 char       *preload_libraries_string = NULL;
191
192 /* PIDs of special child processes; 0 when not running */
193 static pid_t StartupPID = 0,
194                         BgWriterPID = 0;
195
196 /* Startup/shutdown state */
197 #define                 NoShutdown              0
198 #define                 SmartShutdown   1
199 #define                 FastShutdown    2
200
201 static int      Shutdown = NoShutdown;
202
203 static bool FatalError = false; /* T if recovering from backend crash */
204
205 bool            ClientAuthInProgress = false;           /* T during new-client
206                                                                                                  * authentication */
207
208 /*
209  * State for assigning random salts and cancel keys.
210  * Also, the global MyCancelKey passes the cancel key assigned to a given
211  * backend from the postmaster to that backend (via fork).
212  */
213 static unsigned int random_seed = 0;
214
215 static int      debug_flag = 0;
216
217 extern char *optarg;
218 extern int      optind,
219                         opterr;
220
221 #ifdef HAVE_INT_OPTRESET
222 extern int      optreset;
223 #endif
224
225 /*
226  * postmaster.c - function prototypes
227  */
228 static void checkDataDir(const char *checkdir);
229 #ifdef USE_RENDEZVOUS
230 static void reg_reply(DNSServiceRegistrationReplyErrorType errorCode,
231                                           void *context);
232 #endif
233 static void pmdaemonize(void);
234 static Port *ConnCreate(int serverFd);
235 static void ConnFree(Port *port);
236 static void reset_shared(unsigned short port);
237 static void SIGHUP_handler(SIGNAL_ARGS);
238 static void pmdie(SIGNAL_ARGS);
239 static void reaper(SIGNAL_ARGS);
240 static void sigusr1_handler(SIGNAL_ARGS);
241 static void dummy_handler(SIGNAL_ARGS);
242 static void CleanupProc(int pid, int exitstatus);
243 static void HandleChildCrash(int pid, int exitstatus);
244 static void LogChildExit(int lev, const char *procname,
245                          int pid, int exitstatus);
246 static int      BackendRun(Port *port);
247 static void ExitPostmaster(int status);
248 static void usage(const char *);
249 static int      ServerLoop(void);
250 static int      BackendStartup(Port *port);
251 static int      ProcessStartupPacket(Port *port, bool SSLdone);
252 static void processCancelRequest(Port *port, void *pkt);
253 static int      initMasks(fd_set *rmask);
254 static void report_fork_failure_to_client(Port *port, int errnum);
255 static enum CAC_state canAcceptConnections(void);
256 static long PostmasterRandom(void);
257 static void RandomSalt(char *cryptSalt, char *md5Salt);
258 static void SignalChildren(int signal);
259 static int      CountChildren(void);
260 static bool CreateOptsFile(int argc, char *argv[], char *fullprogname);
261 static pid_t StartChildProcess(int xlop);
262 static void
263 postmaster_error(const char *fmt,...)
264 /* This lets gcc check the format string for consistency. */
265 __attribute__((format(printf, 1, 2)));
266
267 #ifdef EXEC_BACKEND
268
269 #ifdef WIN32
270 static pid_t win32_forkexec(const char *path, char *argv[]);
271 static void win32_AddChild(pid_t pid, HANDLE handle);
272 static void win32_RemoveChild(pid_t pid);
273 static pid_t win32_waitpid(int *exitstatus);
274 static DWORD WINAPI win32_sigchld_waiter(LPVOID param);
275
276 static pid_t *win32_childPIDArray;
277 static HANDLE *win32_childHNDArray;
278 static unsigned long win32_numChildren = 0;
279 #endif
280
281 static pid_t backend_forkexec(Port *port);
282 static pid_t internal_forkexec(int argc, char *argv[], Port *port);
283
284 static void read_backend_variables(char *filename, Port *port);
285 static bool write_backend_variables(char *filename, Port *port);
286
287 static void ShmemBackendArrayAdd(Backend *bn);
288 static void ShmemBackendArrayRemove(pid_t pid);
289
290 #endif /* EXEC_BACKEND */
291
292 #define StartupDataBase()               StartChildProcess(BS_XLOG_STARTUP)
293 #define StartBackgroundWriter() StartChildProcess(BS_XLOG_BGWRITER)
294
295
296 /*
297  * Postmaster main entry point
298  */
299 int
300 PostmasterMain(int argc, char *argv[])
301 {
302         int                     opt;
303         int                     status;
304         char       *potential_DataDir = NULL;
305         int                     i;
306
307         progname = get_progname(argv[0]);
308
309         MyProcPid = PostmasterPid = getpid();
310
311         IsPostmasterEnvironment = true;
312
313         /*
314          * Catch standard options before doing much else.  This even works on
315          * systems without getopt_long.
316          */
317         if (argc > 1)
318         {
319                 if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
320                 {
321                         usage(progname);
322                         ExitPostmaster(0);
323                 }
324                 if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
325                 {
326                         puts("postmaster (PostgreSQL) " PG_VERSION);
327                         ExitPostmaster(0);
328                 }
329         }
330
331         /*
332          * for security, no dir or file created can be group or other
333          * accessible
334          */
335         umask((mode_t) 0077);
336
337         /*
338          * Fire up essential subsystems: memory management
339          */
340         MemoryContextInit();
341
342         /*
343          * By default, palloc() requests in the postmaster will be allocated
344          * in the PostmasterContext, which is space that can be recycled by
345          * backends.  Allocated data that needs to be available to backends
346          * should be allocated in TopMemoryContext.
347          */
348         PostmasterContext = AllocSetContextCreate(TopMemoryContext,
349                                                                                           "Postmaster",
350                                                                                           ALLOCSET_DEFAULT_MINSIZE,
351                                                                                           ALLOCSET_DEFAULT_INITSIZE,
352                                                                                           ALLOCSET_DEFAULT_MAXSIZE);
353         MemoryContextSwitchTo(PostmasterContext);
354
355         IgnoreSystemIndexes(false);
356
357         if (find_my_exec(argv[0], my_exec_path) < 0)
358                 elog(FATAL, "%s: could not locate my own executable path",
359                          argv[0]);
360
361         get_pkglib_path(my_exec_path, pkglib_path);
362
363         /*
364          * Options setup
365          */
366         InitializeGUCOptions();
367
368         potential_DataDir = getenv("PGDATA");           /* default value */
369
370         opterr = 1;
371
372         while ((opt = getopt(argc, argv, "A:a:B:b:c:D:d:Fh:ik:lm:MN:no:p:Ss-:")) != -1)
373         {
374                 switch (opt)
375                 {
376                         case 'A':
377 #ifdef USE_ASSERT_CHECKING
378                                 SetConfigOption("debug_assertions", optarg, PGC_POSTMASTER, PGC_S_ARGV);
379 #else
380                                 postmaster_error("assert checking is not compiled in");
381 #endif
382                                 break;
383                         case 'a':
384                                 /* Can no longer set authentication method. */
385                                 break;
386                         case 'B':
387                                 SetConfigOption("shared_buffers", optarg, PGC_POSTMASTER, PGC_S_ARGV);
388                                 break;
389                         case 'b':
390                                 /* Can no longer set the backend executable file to use. */
391                                 break;
392                         case 'D':
393                                 potential_DataDir = optarg;
394                                 break;
395                         case 'd':
396                                 {
397                                         /* Turn on debugging for the postmaster. */
398                                         char       *debugstr = palloc(strlen("debug") + strlen(optarg) + 1);
399
400                                         sprintf(debugstr, "debug%s", optarg);
401                                         SetConfigOption("log_min_messages", debugstr,
402                                                                         PGC_POSTMASTER, PGC_S_ARGV);
403                                         pfree(debugstr);
404                                         debug_flag = atoi(optarg);
405                                         break;
406                                 }
407                         case 'F':
408                                 SetConfigOption("fsync", "false", PGC_POSTMASTER, PGC_S_ARGV);
409                                 break;
410                         case 'h':
411                                 SetConfigOption("listen_addresses", optarg, PGC_POSTMASTER, PGC_S_ARGV);
412                                 break;
413                         case 'i':
414                                 SetConfigOption("listen_addresses", "*", PGC_POSTMASTER, PGC_S_ARGV);
415                                 break;
416                         case 'k':
417                                 SetConfigOption("unix_socket_directory", optarg, PGC_POSTMASTER, PGC_S_ARGV);
418                                 break;
419 #ifdef USE_SSL
420                         case 'l':
421                                 SetConfigOption("ssl", "true", PGC_POSTMASTER, PGC_S_ARGV);
422                                 break;
423 #endif
424                         case 'm':
425                                 /* Multiplexed backends no longer supported. */
426                                 break;
427                         case 'M':
428
429                                 /*
430                                  * ignore this flag.  This may be passed in because the
431                                  * program was run as 'postgres -M' instead of
432                                  * 'postmaster'
433                                  */
434                                 break;
435                         case 'N':
436                                 /* The max number of backends to start. */
437                                 SetConfigOption("max_connections", optarg, PGC_POSTMASTER, PGC_S_ARGV);
438                                 break;
439                         case 'n':
440                                 /* Don't reinit shared mem after abnormal exit */
441                                 Reinit = false;
442                                 break;
443                         case 'o':
444
445                                 /*
446                                  * Other options to pass to the backend on the command line
447                                  */
448                                 strcat(ExtraOptions, " ");
449                                 strcat(ExtraOptions, optarg);
450                                 break;
451                         case 'p':
452                                 SetConfigOption("port", optarg, PGC_POSTMASTER, PGC_S_ARGV);
453                                 break;
454                         case 'S':
455
456                                 /*
457                                  * Start in 'S'ilent mode (disassociate from controlling
458                                  * tty). You may also think of this as 'S'ysV mode since
459                                  * it's most badly needed on SysV-derived systems like
460                                  * SVR4 and HP-UX.
461                                  */
462                                 SetConfigOption("silent_mode", "true", PGC_POSTMASTER, PGC_S_ARGV);
463                                 break;
464                         case 's':
465
466                                 /*
467                                  * In the event that some backend dumps core, send
468                                  * SIGSTOP, rather than SIGQUIT, to all its peers.      This
469                                  * lets the wily post_hacker collect core dumps from
470                                  * everyone.
471                                  */
472                                 SendStop = true;
473                                 break;
474                         case 'c':
475                         case '-':
476                                 {
477                                         char       *name,
478                                                            *value;
479
480                                         ParseLongOption(optarg, &name, &value);
481                                         if (!value)
482                                         {
483                                                 if (opt == '-')
484                                                         ereport(ERROR,
485                                                                         (errcode(ERRCODE_SYNTAX_ERROR),
486                                                                          errmsg("--%s requires a value",
487                                                                                         optarg)));
488                                                 else
489                                                         ereport(ERROR,
490                                                                         (errcode(ERRCODE_SYNTAX_ERROR),
491                                                                          errmsg("-c %s requires a value",
492                                                                                         optarg)));
493                                         }
494
495                                         SetConfigOption(name, value, PGC_POSTMASTER, PGC_S_ARGV);
496                                         free(name);
497                                         if (value)
498                                                 free(value);
499                                         break;
500                                 }
501
502                         default:
503                                 fprintf(stderr,
504                                         gettext("Try \"%s --help\" for more information.\n"),
505                                                 progname);
506                                 ExitPostmaster(1);
507                 }
508         }
509
510         /*
511          * Postmaster accepts no non-option switch arguments.
512          */
513         if (optind < argc)
514         {
515                 postmaster_error("invalid argument: \"%s\"", argv[optind]);
516                 fprintf(stderr,
517                                 gettext("Try \"%s --help\" for more information.\n"),
518                                 progname);
519                 ExitPostmaster(1);
520         }
521
522         /*
523          * Now we can set the data directory, and then read postgresql.conf.
524          */
525         checkDataDir(potential_DataDir);        /* issues error messages */
526         SetDataDir(potential_DataDir);
527
528         ProcessConfigFile(PGC_POSTMASTER);
529
530         /* If timezone is not set, determine what the OS uses */
531         pg_timezone_initialize();
532
533 #ifdef EXEC_BACKEND
534         write_nondefault_variables(PGC_POSTMASTER);
535 #endif
536
537         /*
538          * Check for invalid combinations of GUC settings.
539          */
540         if (NBuffers < 2 * MaxBackends || NBuffers < 16)
541         {
542                 /*
543                  * Do not accept -B so small that backends are likely to starve
544                  * for lack of buffers.  The specific choices here are somewhat
545                  * arbitrary.
546                  */
547                 postmaster_error("the number of buffers (-B) must be at least twice the number of allowed connections (-N) and at least 16");
548                 ExitPostmaster(1);
549         }
550
551         if (ReservedBackends >= MaxBackends)
552         {
553                 postmaster_error("superuser_reserved_connections must be less than max_connections");
554                 ExitPostmaster(1);
555         }
556
557         /*
558          * Other one-time internal sanity checks can go here.
559          */
560         if (!CheckDateTokenTables())
561         {
562                 postmaster_error("invalid datetoken tables, please fix");
563                 ExitPostmaster(1);
564         }
565
566         /*
567          * Now that we are done processing the postmaster arguments, reset
568          * getopt(3) library so that it will work correctly in subprocesses.
569          */
570         optind = 1;
571 #ifdef HAVE_INT_OPTRESET
572         optreset = 1;                           /* some systems need this too */
573 #endif
574
575         /* For debugging: display postmaster environment */
576         {
577                 extern char **environ;
578                 char      **p;
579
580                 ereport(DEBUG3,
581                         (errmsg_internal("%s: PostmasterMain: initial environ dump:",
582                                                          progname)));
583                 ereport(DEBUG3,
584                  (errmsg_internal("-----------------------------------------")));
585                 for (p = environ; *p; ++p)
586                         ereport(DEBUG3,
587                                         (errmsg_internal("\t%s", *p)));
588                 ereport(DEBUG3,
589                  (errmsg_internal("-----------------------------------------")));
590         }
591
592 #ifdef EXEC_BACKEND
593         if (find_other_exec(argv[0], "postgres", PG_VERSIONSTR,
594                                                 postgres_exec_path) < 0)
595                 ereport(FATAL,
596                                 (errmsg("%s: could not locate matching postgres executable",
597                                                 progname)));
598 #endif
599
600         /*
601          * Initialize SSL library, if specified.
602          */
603 #ifdef USE_SSL
604         if (EnableSSL)
605                 secure_initialize();
606 #endif
607
608         /*
609          * process any libraries that should be preloaded and optionally
610          * pre-initialized
611          */
612         if (preload_libraries_string)
613                 process_preload_libraries(preload_libraries_string);
614
615         /*
616          * Fork away from controlling terminal, if -S specified.
617          *
618          * Must do this before we grab any interlock files, else the interlocks
619          * will show the wrong PID.
620          */
621         if (SilentMode)
622                 pmdaemonize();
623
624         /*
625          * Create lockfile for data directory.
626          *
627          * We want to do this before we try to grab the input sockets, because
628          * the data directory interlock is more reliable than the socket-file
629          * interlock (thanks to whoever decided to put socket files in /tmp
630          * :-(). For the same reason, it's best to grab the TCP socket(s) before
631          * the Unix socket.
632          */
633         CreateDataDirLockFile(DataDir, true);
634
635         /*
636          * Remove old temporary files.  At this point there can be no other
637          * Postgres processes running in this directory, so this should be
638          * safe.
639          */
640         RemovePgTempFiles();
641
642         /*
643          * Establish input sockets.
644          */
645         for (i = 0; i < MAXLISTEN; i++)
646                 ListenSocket[i] = -1;
647
648         if (ListenAddresses)
649         {
650                 char       *curhost,
651                                    *endptr;
652                 char            c;
653
654                 curhost = ListenAddresses;
655                 for (;;)
656                 {
657                         /* ignore whitespace */
658                         while (isspace((unsigned char) *curhost))
659                                 curhost++;
660                         if (*curhost == '\0')
661                                 break;
662                         endptr = curhost;
663                         while (*endptr != '\0' && !isspace((unsigned char) *endptr))
664                                 endptr++;
665                         c = *endptr;
666                         *endptr = '\0';
667                         if (strcmp(curhost, "*") == 0)
668                                 status = StreamServerPort(AF_UNSPEC, NULL,
669                                                                                   (unsigned short) PostPortNumber,
670                                                                                   UnixSocketDir,
671                                                                                   ListenSocket, MAXLISTEN);
672                         else
673                                 status = StreamServerPort(AF_UNSPEC, curhost,
674                                                                                   (unsigned short) PostPortNumber,
675                                                                                   UnixSocketDir,
676                                                                                   ListenSocket, MAXLISTEN);
677                         if (status != STATUS_OK)
678                                 ereport(WARNING,
679                                          (errmsg("could not create listen socket for \"%s\"",
680                                                          curhost)));
681                         *endptr = c;
682                         if (c != '\0')
683                                 curhost = endptr + 1;
684                         else
685                                 break;
686                 }
687         }
688
689 #ifdef USE_RENDEZVOUS
690         /* Register for Rendezvous only if we opened TCP socket(s) */
691         if (ListenSocket[0] != -1 && rendezvous_name != NULL)
692         {
693                 DNSServiceRegistrationCreate(rendezvous_name,
694                                                                          "_postgresql._tcp.",
695                                                                          "",
696                                                                          htonl(PostPortNumber),
697                                                                          "",
698                                                                  (DNSServiceRegistrationReply) reg_reply,
699                                                                          NULL);
700         }
701 #endif
702
703 #ifdef HAVE_UNIX_SOCKETS
704         status = StreamServerPort(AF_UNIX, NULL,
705                                                           (unsigned short) PostPortNumber,
706                                                           UnixSocketDir,
707                                                           ListenSocket, MAXLISTEN);
708         if (status != STATUS_OK)
709                 ereport(WARNING,
710                                 (errmsg("could not create Unix-domain socket")));
711 #endif
712
713         /*
714          * check that we have some socket to listen on
715          */
716         if (ListenSocket[0] == -1)
717                 ereport(FATAL,
718                                 (errmsg("no socket created for listening")));
719
720         XLOGPathInit();
721
722         /*
723          * Set up shared memory and semaphores.
724          */
725         reset_shared(PostPortNumber);
726
727         /*
728          * Estimate number of openable files.  This must happen after setting
729          * up semaphores, because on some platforms semaphores count as open
730          * files.
731          */
732         set_max_safe_fds();
733
734         /*
735          * Initialize the list of active backends.
736          */
737         BackendList = DLNewList();
738
739 #ifdef WIN32
740         /*
741          * Initialize the child pid/HANDLE arrays for signal handling.
742          */
743         win32_childPIDArray = (pid_t *)
744                 malloc(NUM_BACKENDARRAY_ELEMS * sizeof(pid_t));
745         win32_childHNDArray = (HANDLE *)
746                 malloc(NUM_BACKENDARRAY_ELEMS * sizeof(HANDLE));
747         if (!win32_childPIDArray || !win32_childHNDArray)
748                 ereport(FATAL,
749                                 (errcode(ERRCODE_OUT_OF_MEMORY),
750                                  errmsg("out of memory")));
751 #endif
752
753         /*
754          * Record postmaster options.  We delay this till now to avoid
755          * recording bogus options (eg, NBuffers too high for available
756          * memory).
757          */
758         if (!CreateOptsFile(argc, argv, my_exec_path))
759                 ExitPostmaster(1);
760
761         /*
762          * Set up signal handlers for the postmaster process.
763          *
764          * CAUTION: when changing this list, check for side-effects on the signal
765          * handling setup of child processes.  See tcop/postgres.c,
766          * bootstrap/bootstrap.c, postmaster/bgwriter.c, and postmaster/pgstat.c.
767          */
768         pqinitmask();
769         PG_SETMASK(&BlockSig);
770
771         pqsignal(SIGHUP, SIGHUP_handler);       /* reread config file and have
772                                                                                  * children do same */
773         pqsignal(SIGINT, pmdie);        /* send SIGTERM and shut down */
774         pqsignal(SIGQUIT, pmdie);       /* send SIGQUIT and die */
775         pqsignal(SIGTERM, pmdie);       /* wait for children and shut down */
776         pqsignal(SIGALRM, SIG_IGN); /* ignored */
777         pqsignal(SIGPIPE, SIG_IGN); /* ignored */
778         pqsignal(SIGUSR1, sigusr1_handler); /* message from child process */
779         pqsignal(SIGUSR2, dummy_handler);       /* unused, reserve for children */
780         pqsignal(SIGCHLD, reaper);      /* handle child termination */
781         pqsignal(SIGTTIN, SIG_IGN); /* ignored */
782         pqsignal(SIGTTOU, SIG_IGN); /* ignored */
783         /* ignore SIGXFSZ, so that ulimit violations work like disk full */
784 #ifdef SIGXFSZ
785         pqsignal(SIGXFSZ, SIG_IGN); /* ignored */
786 #endif
787
788         /*
789          * Reset whereToSendOutput from Debug (its starting state) to None.
790          * This prevents ereport from sending log messages to stderr unless
791          * the syslog/stderr switch permits.  We don't do this until the
792          * postmaster is fully launched, since startup failures may as well be
793          * reported to stderr.
794          */
795         whereToSendOutput = None;
796
797         /*
798          * Initialize and try to startup the statistics collector process
799          */
800         pgstat_init();
801         pgstat_start();
802
803         /*
804          * Load cached files for client authentication.
805          */
806         load_hba();
807         load_ident();
808         load_user();
809         load_group();
810
811         /*
812          * We're ready to rock and roll...
813          */
814         StartupPID = StartupDataBase();
815
816         status = ServerLoop();
817
818         /*
819          * ServerLoop probably shouldn't ever return, but if it does, close
820          * down.
821          */
822         ExitPostmaster(status != STATUS_OK);
823
824         return 0;                                       /* not reached */
825 }
826
827
828 /*
829  * Validate the proposed data directory
830  */
831 static void
832 checkDataDir(const char *checkdir)
833 {
834         char            path[MAXPGPATH];
835         FILE       *fp;
836         struct stat stat_buf;
837
838         if (checkdir == NULL)
839         {
840                 fprintf(stderr,
841                                 gettext("%s does not know where to find the database system data.\n"
842                                                 "You must specify the directory that contains the database system\n"
843                                                 "either by specifying the -D invocation option or by setting the\n"
844                                                 "PGDATA environment variable.\n"),
845                                 progname);
846                 ExitPostmaster(2);
847         }
848
849         if (stat(checkdir, &stat_buf) == -1)
850         {
851                 if (errno == ENOENT)
852                         ereport(FATAL,
853                                         (errcode_for_file_access(),
854                                          errmsg("data directory \"%s\" does not exist",
855                                                         checkdir)));
856                 else
857                         ereport(FATAL,
858                                         (errcode_for_file_access(),
859                          errmsg("could not read permissions of directory \"%s\": %m",
860                                         checkdir)));
861         }
862
863         /*
864          * Check if the directory has group or world access.  If so, reject.
865          *
866          * XXX temporarily suppress check when on Windows, because there may not
867          * be proper support for Unix-y file permissions.  Need to think of a
868          * reasonable check to apply on Windows.
869          */
870 #if !defined(__CYGWIN__) && !defined(WIN32)
871         if (stat_buf.st_mode & (S_IRWXG | S_IRWXO))
872                 ereport(FATAL,
873                                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
874                                  errmsg("data directory \"%s\" has group or world access",
875                                                 checkdir),
876                                  errdetail("Permissions should be u=rwx (0700).")));
877 #endif
878
879         /* Look for PG_VERSION before looking for pg_control */
880         ValidatePgVersion(checkdir);
881
882         snprintf(path, sizeof(path), "%s/global/pg_control", checkdir);
883
884         fp = AllocateFile(path, PG_BINARY_R);
885         if (fp == NULL)
886         {
887                 fprintf(stderr,
888                                 gettext("%s: could not find the database system\n"
889                                                 "Expected to find it in the directory \"%s\",\n"
890                                                 "but could not open file \"%s\": %s\n"),
891                                 progname, checkdir, path, strerror(errno));
892                 ExitPostmaster(2);
893         }
894         FreeFile(fp);
895 }
896
897
898 #ifdef USE_RENDEZVOUS
899
900 /*
901  * empty callback function for DNSServiceRegistrationCreate()
902  */
903 static void
904 reg_reply(DNSServiceRegistrationReplyErrorType errorCode, void *context)
905 {
906
907 }
908
909 #endif /* USE_RENDEZVOUS */
910
911
912 /*
913  * Fork away from the controlling terminal (-S option)
914  */
915 static void
916 pmdaemonize(void)
917 {
918 #ifndef WIN32
919         int                     i;
920         pid_t           pid;
921
922 #ifdef LINUX_PROFILE
923         struct itimerval prof_itimer;
924 #endif
925
926 #ifdef LINUX_PROFILE
927         /* see comments in BackendStartup */
928         getitimer(ITIMER_PROF, &prof_itimer);
929 #endif
930
931         pid = fork();
932         if (pid == (pid_t) -1)
933         {
934                 postmaster_error("could not fork background process: %s",
935                                                  strerror(errno));
936                 ExitPostmaster(1);
937         }
938         else if (pid)
939         {                                                       /* parent */
940                 /* Parent should just exit, without doing any atexit cleanup */
941                 _exit(0);
942         }
943
944 #ifdef LINUX_PROFILE
945         setitimer(ITIMER_PROF, &prof_itimer, NULL);
946 #endif
947
948         MyProcPid = PostmasterPid = getpid();   /* reset PID vars to child */
949
950 /* GH: If there's no setsid(), we hopefully don't need silent mode.
951  * Until there's a better solution.
952  */
953 #ifdef HAVE_SETSID
954         if (setsid() < 0)
955         {
956                 postmaster_error("could not dissociate from controlling TTY: %s",
957                                                  strerror(errno));
958                 ExitPostmaster(1);
959         }
960 #endif
961         i = open(NULL_DEV, O_RDWR | PG_BINARY);
962         dup2(i, 0);
963         dup2(i, 1);
964         dup2(i, 2);
965         close(i);
966 #else  /* WIN32 */
967         /* not supported */
968         elog(FATAL, "SilentMode not supported under WIN32");
969 #endif /* WIN32 */
970 }
971
972
973 /*
974  * Print out help message
975  */
976 static void
977 usage(const char *progname)
978 {
979         printf(gettext("%s is the PostgreSQL server.\n\n"), progname);
980         printf(gettext("Usage:\n  %s [OPTION]...\n\n"), progname);
981         printf(gettext("Options:\n"));
982 #ifdef USE_ASSERT_CHECKING
983         printf(gettext("  -A 1|0          enable/disable run-time assert checking\n"));
984 #endif
985         printf(gettext("  -B NBUFFERS     number of shared buffers\n"));
986         printf(gettext("  -c NAME=VALUE   set run-time parameter\n"));
987         printf(gettext("  -d 1-5          debugging level\n"));
988         printf(gettext("  -D DATADIR      database directory\n"));
989         printf(gettext("  -F              turn fsync off\n"));
990         printf(gettext("  -h HOSTNAME     host name or IP address to listen on\n"));
991         printf(gettext("  -i              enable TCP/IP connections\n"));
992         printf(gettext("  -k DIRECTORY    Unix-domain socket location\n"));
993 #ifdef USE_SSL
994         printf(gettext("  -l              enable SSL connections\n"));
995 #endif
996         printf(gettext("  -N MAX-CONNECT  maximum number of allowed connections\n"));
997         printf(gettext("  -o OPTIONS      pass \"OPTIONS\" to each server process\n"));
998         printf(gettext("  -p PORT         port number to listen on\n"));
999         printf(gettext("  -S              silent mode (start in background without logging output)\n"));
1000         printf(gettext("  --help          show this help, then exit\n"));
1001         printf(gettext("  --version       output version information, then exit\n"));
1002
1003         printf(gettext("\nDeveloper options:\n"));
1004         printf(gettext("  -n              do not reinitialize shared memory after abnormal exit\n"));
1005         printf(gettext("  -s              send SIGSTOP to all backend servers if one dies\n"));
1006
1007         printf(gettext("\nPlease read the documentation for the complete list of run-time\n"
1008                                    "configuration settings and how to set them on the command line or in\n"
1009                                    "the configuration file.\n\n"
1010                                    "Report bugs to <pgsql-bugs@postgresql.org>.\n"));
1011 }
1012
1013
1014 /*
1015  * Main idle loop of postmaster
1016  */
1017 static int
1018 ServerLoop(void)
1019 {
1020         fd_set          readmask;
1021         int                     nSockets;
1022         time_t          now,
1023                                 last_touch_time;
1024         struct timeval earlier,
1025                                 later;
1026         struct timezone tz;
1027
1028         gettimeofday(&earlier, &tz);
1029         last_touch_time = time(NULL);
1030
1031         nSockets = initMasks(&readmask);
1032
1033         for (;;)
1034         {
1035                 Port       *port;
1036                 fd_set          rmask;
1037                 struct timeval timeout;
1038                 int                     selres;
1039                 int                     i;
1040
1041                 /*
1042                  * Wait for something to happen.
1043                  *
1044                  * We wait at most one minute, to ensure that the other background
1045                  * tasks handled below get done even when no requests are arriving.
1046                  */
1047                 memcpy((char *) &rmask, (char *) &readmask, sizeof(fd_set));
1048
1049                 timeout.tv_sec = 60;
1050                 timeout.tv_usec = 0;
1051
1052                 PG_SETMASK(&UnBlockSig);
1053
1054                 selres = select(nSockets, &rmask, NULL, NULL, &timeout);
1055
1056                 /*
1057                  * Block all signals until we wait again.  (This makes it safe for
1058                  * our signal handlers to do nontrivial work.)
1059                  */
1060                 PG_SETMASK(&BlockSig);
1061
1062                 if (selres < 0)
1063                 {
1064                         if (errno == EINTR || errno == EWOULDBLOCK)
1065                                 continue;
1066                         ereport(LOG,
1067                                         (errcode_for_socket_access(),
1068                                          errmsg("select() failed in postmaster: %m")));
1069                         return STATUS_ERROR;
1070                 }
1071
1072                 /*
1073                  * New connection pending on any of our sockets? If so, fork a
1074                  * child process to deal with it.
1075                  */
1076                 if (selres > 0)
1077                 {
1078                         /*
1079                          * Select a random seed at the time of first receiving a request.
1080                          */
1081                         while (random_seed == 0)
1082                         {
1083                                 gettimeofday(&later, &tz);
1084
1085                                 /*
1086                                  * We are not sure how much precision is in tv_usec, so we
1087                                  * swap the nibbles of 'later' and XOR them with 'earlier'. On
1088                                  * the off chance that the result is 0, we loop until it isn't.
1089                                  */
1090                                 random_seed = earlier.tv_usec ^
1091                                         ((later.tv_usec << 16) |
1092                                          ((later.tv_usec >> 16) & 0xffff));
1093                         }
1094
1095                         for (i = 0; i < MAXLISTEN; i++)
1096                         {
1097                                 if (ListenSocket[i] == -1)
1098                                         break;
1099                                 if (FD_ISSET(ListenSocket[i], &rmask))
1100                                 {
1101                                         port = ConnCreate(ListenSocket[i]);
1102                                         if (port)
1103                                         {
1104                                                 BackendStartup(port);
1105
1106                                                 /*
1107                                                  * We no longer need the open socket or port structure
1108                                                  * in this process
1109                                                  */
1110                                                 StreamClose(port->sock);
1111                                                 ConnFree(port);
1112                                         }
1113                                 }
1114                         }
1115                 }
1116
1117                 /*
1118                  * If no background writer process is running, and we are not in
1119                  * a state that prevents it, start one.  It doesn't matter if this
1120                  * fails, we'll just try again later.
1121                  */
1122                 if (BgWriterPID == 0 && StartupPID == 0 && !FatalError)
1123                 {
1124                         BgWriterPID = StartBackgroundWriter();
1125                         /* If shutdown is pending, set it going */
1126                         if (Shutdown > NoShutdown && BgWriterPID != 0)
1127                                 kill(BgWriterPID, SIGUSR2);
1128                 }
1129
1130                 /* If we have lost the stats collector, try to start a new one */
1131                 if (!pgstat_is_running)
1132                         pgstat_start();
1133
1134                 /*
1135                  * Touch the socket and lock file at least every ten minutes, to ensure
1136                  * that they are not removed by overzealous /tmp-cleaning tasks.
1137                  */
1138                 now = time(NULL);
1139                 if (now - last_touch_time >= 10 * 60)
1140                 {
1141                         TouchSocketFile();
1142                         TouchSocketLockFile();
1143                         last_touch_time = now;
1144                 }
1145         }
1146 }
1147
1148
1149 /*
1150  * Initialise the masks for select() for the ports we are listening on.
1151  * Return the number of sockets to listen on.
1152  */
1153 static int
1154 initMasks(fd_set *rmask)
1155 {
1156         int                     nsocks = -1;
1157         int                     i;
1158
1159         FD_ZERO(rmask);
1160
1161         for (i = 0; i < MAXLISTEN; i++)
1162         {
1163                 int                     fd = ListenSocket[i];
1164
1165                 if (fd == -1)
1166                         break;
1167                 FD_SET(fd, rmask);
1168                 if (fd > nsocks)
1169                         nsocks = fd;
1170         }
1171
1172         return nsocks + 1;
1173 }
1174
1175
1176 /*
1177  * Read the startup packet and do something according to it.
1178  *
1179  * Returns STATUS_OK or STATUS_ERROR, or might call ereport(FATAL) and
1180  * not return at all.
1181  *
1182  * (Note that ereport(FATAL) stuff is sent to the client, so only use it
1183  * if that's what you want.  Return STATUS_ERROR if you don't want to
1184  * send anything to the client, which would typically be appropriate
1185  * if we detect a communications failure.)
1186  */
1187 static int
1188 ProcessStartupPacket(Port *port, bool SSLdone)
1189 {
1190         int32           len;
1191         void       *buf;
1192         ProtocolVersion proto;
1193         MemoryContext oldcontext;
1194
1195         if (pq_getbytes((char *) &len, 4) == EOF)
1196         {
1197                 /*
1198                  * EOF after SSLdone probably means the client didn't like our
1199                  * response to NEGOTIATE_SSL_CODE.      That's not an error condition,
1200                  * so don't clutter the log with a complaint.
1201                  */
1202                 if (!SSLdone)
1203                         ereport(COMMERROR,
1204                                         (errcode(ERRCODE_PROTOCOL_VIOLATION),
1205                                          errmsg("incomplete startup packet")));
1206                 return STATUS_ERROR;
1207         }
1208
1209         len = ntohl(len);
1210         len -= 4;
1211
1212         if (len < (int32) sizeof(ProtocolVersion) ||
1213                 len > MAX_STARTUP_PACKET_LENGTH)
1214         {
1215                 ereport(COMMERROR,
1216                                 (errcode(ERRCODE_PROTOCOL_VIOLATION),
1217                                  errmsg("invalid length of startup packet")));
1218                 return STATUS_ERROR;
1219         }
1220
1221         /*
1222          * Allocate at least the size of an old-style startup packet, plus one
1223          * extra byte, and make sure all are zeroes.  This ensures we will
1224          * have null termination of all strings, in both fixed- and
1225          * variable-length packet layouts.
1226          */
1227         if (len <= (int32) sizeof(StartupPacket))
1228                 buf = palloc0(sizeof(StartupPacket) + 1);
1229         else
1230                 buf = palloc0(len + 1);
1231
1232         if (pq_getbytes(buf, len) == EOF)
1233         {
1234                 ereport(COMMERROR,
1235                                 (errcode(ERRCODE_PROTOCOL_VIOLATION),
1236                                  errmsg("incomplete startup packet")));
1237                 return STATUS_ERROR;
1238         }
1239
1240         /*
1241          * The first field is either a protocol version number or a special
1242          * request code.
1243          */
1244         port->proto = proto = ntohl(*((ProtocolVersion *) buf));
1245
1246         if (proto == CANCEL_REQUEST_CODE)
1247         {
1248                 processCancelRequest(port, buf);
1249                 return 127;                             /* XXX */
1250         }
1251
1252         if (proto == NEGOTIATE_SSL_CODE && !SSLdone)
1253         {
1254                 char            SSLok;
1255
1256 #ifdef USE_SSL
1257                 /* No SSL when disabled or on Unix sockets */
1258                 if (!EnableSSL || IS_AF_UNIX(port->laddr.addr.ss_family))
1259                         SSLok = 'N';
1260                 else
1261                         SSLok = 'S';            /* Support for SSL */
1262 #else
1263                 SSLok = 'N';                    /* No support for SSL */
1264 #endif
1265                 if (send(port->sock, &SSLok, 1, 0) != 1)
1266                 {
1267                         ereport(COMMERROR,
1268                                         (errcode_for_socket_access(),
1269                                  errmsg("failed to send SSL negotiation response: %m")));
1270                         return STATUS_ERROR;    /* close the connection */
1271                 }
1272
1273 #ifdef USE_SSL
1274                 if (SSLok == 'S' && secure_open_server(port) == -1)
1275                         return STATUS_ERROR;
1276 #endif
1277                 /* regular startup packet, cancel, etc packet should follow... */
1278                 /* but not another SSL negotiation request */
1279                 return ProcessStartupPacket(port, true);
1280         }
1281
1282         /* Could add additional special packet types here */
1283
1284         /*
1285          * Set FrontendProtocol now so that ereport() knows what format to
1286          * send if we fail during startup.
1287          */
1288         FrontendProtocol = proto;
1289
1290         /* Check we can handle the protocol the frontend is using. */
1291
1292         if (PG_PROTOCOL_MAJOR(proto) < PG_PROTOCOL_MAJOR(PG_PROTOCOL_EARLIEST) ||
1293           PG_PROTOCOL_MAJOR(proto) > PG_PROTOCOL_MAJOR(PG_PROTOCOL_LATEST) ||
1294         (PG_PROTOCOL_MAJOR(proto) == PG_PROTOCOL_MAJOR(PG_PROTOCOL_LATEST) &&
1295          PG_PROTOCOL_MINOR(proto) > PG_PROTOCOL_MINOR(PG_PROTOCOL_LATEST)))
1296                 ereport(FATAL,
1297                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1298                                  errmsg("unsupported frontend protocol %u.%u: server supports %u.0 to %u.%u",
1299                                           PG_PROTOCOL_MAJOR(proto), PG_PROTOCOL_MINOR(proto),
1300                                                 PG_PROTOCOL_MAJOR(PG_PROTOCOL_EARLIEST),
1301                                                 PG_PROTOCOL_MAJOR(PG_PROTOCOL_LATEST),
1302                                                 PG_PROTOCOL_MINOR(PG_PROTOCOL_LATEST))));
1303
1304         /*
1305          * Now fetch parameters out of startup packet and save them into the
1306          * Port structure.      All data structures attached to the Port struct
1307          * must be allocated in TopMemoryContext so that they won't disappear
1308          * when we pass them to PostgresMain (see BackendRun).  We need not
1309          * worry about leaking this storage on failure, since we aren't in the
1310          * postmaster process anymore.
1311          */
1312         oldcontext = MemoryContextSwitchTo(TopMemoryContext);
1313
1314         if (PG_PROTOCOL_MAJOR(proto) >= 3)
1315         {
1316                 int32           offset = sizeof(ProtocolVersion);
1317
1318                 /*
1319                  * Scan packet body for name/option pairs.      We can assume any
1320                  * string beginning within the packet body is null-terminated,
1321                  * thanks to zeroing extra byte above.
1322                  */
1323                 port->guc_options = NIL;
1324
1325                 while (offset < len)
1326                 {
1327                         char       *nameptr = ((char *) buf) + offset;
1328                         int32           valoffset;
1329                         char       *valptr;
1330
1331                         if (*nameptr == '\0')
1332                                 break;                  /* found packet terminator */
1333                         valoffset = offset + strlen(nameptr) + 1;
1334                         if (valoffset >= len)
1335                                 break;                  /* missing value, will complain below */
1336                         valptr = ((char *) buf) + valoffset;
1337
1338                         if (strcmp(nameptr, "database") == 0)
1339                                 port->database_name = pstrdup(valptr);
1340                         else if (strcmp(nameptr, "user") == 0)
1341                                 port->user_name = pstrdup(valptr);
1342                         else if (strcmp(nameptr, "options") == 0)
1343                                 port->cmdline_options = pstrdup(valptr);
1344                         else
1345                         {
1346                                 /* Assume it's a generic GUC option */
1347                                 port->guc_options = lappend(port->guc_options,
1348                                                                                         pstrdup(nameptr));
1349                                 port->guc_options = lappend(port->guc_options,
1350                                                                                         pstrdup(valptr));
1351                         }
1352                         offset = valoffset + strlen(valptr) + 1;
1353                 }
1354
1355                 /*
1356                  * If we didn't find a packet terminator exactly at the end of the
1357                  * given packet length, complain.
1358                  */
1359                 if (offset != len - 1)
1360                         ereport(FATAL,
1361                                         (errcode(ERRCODE_PROTOCOL_VIOLATION),
1362                                          errmsg("invalid startup packet layout: expected terminator as last byte")));
1363         }
1364         else
1365         {
1366                 /*
1367                  * Get the parameters from the old-style, fixed-width-fields
1368                  * startup packet as C strings.  The packet destination was
1369                  * cleared first so a short packet has zeros silently added.  We
1370                  * have to be prepared to truncate the pstrdup result for oversize
1371                  * fields, though.
1372                  */
1373                 StartupPacket *packet = (StartupPacket *) buf;
1374
1375                 port->database_name = pstrdup(packet->database);
1376                 if (strlen(port->database_name) > sizeof(packet->database))
1377                         port->database_name[sizeof(packet->database)] = '\0';
1378                 port->user_name = pstrdup(packet->user);
1379                 if (strlen(port->user_name) > sizeof(packet->user))
1380                         port->user_name[sizeof(packet->user)] = '\0';
1381                 port->cmdline_options = pstrdup(packet->options);
1382                 if (strlen(port->cmdline_options) > sizeof(packet->options))
1383                         port->cmdline_options[sizeof(packet->options)] = '\0';
1384                 port->guc_options = NIL;
1385         }
1386
1387         /* Check a user name was given. */
1388         if (port->user_name == NULL || port->user_name[0] == '\0')
1389                 ereport(FATAL,
1390                                 (errcode(ERRCODE_INVALID_AUTHORIZATION_SPECIFICATION),
1391                  errmsg("no PostgreSQL user name specified in startup packet")));
1392
1393         /* The database defaults to the user name. */
1394         if (port->database_name == NULL || port->database_name[0] == '\0')
1395                 port->database_name = pstrdup(port->user_name);
1396
1397         if (Db_user_namespace)
1398         {
1399                 /*
1400                  * If user@, it is a global user, remove '@'. We only want to do
1401                  * this if there is an '@' at the end and no earlier in the user
1402                  * string or they may fake as a local user of another database
1403                  * attaching to this database.
1404                  */
1405                 if (strchr(port->user_name, '@') ==
1406                         port->user_name + strlen(port->user_name) - 1)
1407                         *strchr(port->user_name, '@') = '\0';
1408                 else
1409                 {
1410                         /* Append '@' and dbname */
1411                         char       *db_user;
1412
1413                         db_user = palloc(strlen(port->user_name) +
1414                                                          strlen(port->database_name) + 2);
1415                         sprintf(db_user, "%s@%s", port->user_name, port->database_name);
1416                         port->user_name = db_user;
1417                 }
1418         }
1419
1420         /*
1421          * Truncate given database and user names to length of a Postgres
1422          * name.  This avoids lookup failures when overlength names are given.
1423          */
1424         if (strlen(port->database_name) >= NAMEDATALEN)
1425                 port->database_name[NAMEDATALEN - 1] = '\0';
1426         if (strlen(port->user_name) >= NAMEDATALEN)
1427                 port->user_name[NAMEDATALEN - 1] = '\0';
1428
1429         /*
1430          * Done putting stuff in TopMemoryContext.
1431          */
1432         MemoryContextSwitchTo(oldcontext);
1433
1434         /*
1435          * If we're going to reject the connection due to database state, say
1436          * so now instead of wasting cycles on an authentication exchange.
1437          * (This also allows a pg_ping utility to be written.)
1438          */
1439         switch (port->canAcceptConnections)
1440         {
1441                 case CAC_STARTUP:
1442                         ereport(FATAL,
1443                                         (errcode(ERRCODE_CANNOT_CONNECT_NOW),
1444                                          errmsg("the database system is starting up")));
1445                         break;
1446                 case CAC_SHUTDOWN:
1447                         ereport(FATAL,
1448                                         (errcode(ERRCODE_CANNOT_CONNECT_NOW),
1449                                          errmsg("the database system is shutting down")));
1450                         break;
1451                 case CAC_RECOVERY:
1452                         ereport(FATAL,
1453                                         (errcode(ERRCODE_CANNOT_CONNECT_NOW),
1454                                          errmsg("the database system is in recovery mode")));
1455                         break;
1456                 case CAC_TOOMANY:
1457                         ereport(FATAL,
1458                                         (errcode(ERRCODE_TOO_MANY_CONNECTIONS),
1459                                          errmsg("sorry, too many clients already")));
1460                         break;
1461                 case CAC_OK:
1462                 default:
1463                         break;
1464         }
1465
1466         return STATUS_OK;
1467 }
1468
1469
1470 /*
1471  * The client has sent a cancel request packet, not a normal
1472  * start-a-new-connection packet.  Perform the necessary processing.
1473  * Nothing is sent back to the client.
1474  */
1475 static void
1476 processCancelRequest(Port *port, void *pkt)
1477 {
1478         CancelRequestPacket *canc = (CancelRequestPacket *) pkt;
1479         int                     backendPID;
1480         long            cancelAuthCode;
1481         Backend    *bp;
1482 #ifndef EXEC_BACKEND
1483         Dlelem     *curr;
1484 #else
1485         int                     i;
1486 #endif
1487
1488         backendPID = (int) ntohl(canc->backendPID);
1489         cancelAuthCode = (long) ntohl(canc->cancelAuthCode);
1490
1491         if (backendPID == BgWriterPID)
1492         {
1493                 ereport(DEBUG2,
1494                                 (errmsg_internal("ignoring cancel request for bgwriter process %d",
1495                                                                  backendPID)));
1496                 return;
1497         }
1498
1499         /*
1500          * See if we have a matching backend.  In the EXEC_BACKEND case, we
1501          * can no longer access the postmaster's own backend list, and must
1502          * rely on the duplicate array in shared memory.
1503          */
1504 #ifndef EXEC_BACKEND
1505         for (curr = DLGetHead(BackendList); curr; curr = DLGetSucc(curr))
1506         {
1507                 bp = (Backend *) DLE_VAL(curr);
1508 #else
1509         for (i = 0; i < NUM_BACKENDARRAY_ELEMS; i++)
1510         {
1511                 bp = (Backend *) &ShmemBackendArray[i];
1512 #endif
1513                 if (bp->pid == backendPID)
1514                 {
1515                         if (bp->cancel_key == cancelAuthCode)
1516                         {
1517                                 /* Found a match; signal that backend to cancel current op */
1518                                 ereport(DEBUG2,
1519                                                 (errmsg_internal("processing cancel request: sending SIGINT to process %d",
1520                                                                                  backendPID)));
1521                                 kill(bp->pid, SIGINT);
1522                         }
1523                         else
1524                                 /* Right PID, wrong key: no way, Jose */
1525                                 ereport(DEBUG2,
1526                                                 (errmsg_internal("bad key in cancel request for process %d",
1527                                                                                  backendPID)));
1528                         return;
1529                 }
1530         }
1531
1532         /* No matching backend */
1533         ereport(DEBUG2,
1534                         (errmsg_internal("bad pid in cancel request for process %d",
1535                                                          backendPID)));
1536 }
1537
1538 /*
1539  * canAcceptConnections --- check to see if database state allows connections.
1540  */
1541 static enum CAC_state
1542 canAcceptConnections(void)
1543 {
1544         /* Can't start backends when in startup/shutdown/recovery state. */
1545         if (Shutdown > NoShutdown)
1546                 return CAC_SHUTDOWN;
1547         if (StartupPID)
1548                 return CAC_STARTUP;
1549         if (FatalError)
1550                 return CAC_RECOVERY;
1551
1552         /*
1553          * Don't start too many children.
1554          *
1555          * We allow more connections than we can have backends here because some
1556          * might still be authenticating; they might fail auth, or some
1557          * existing backend might exit before the auth cycle is completed. The
1558          * exact MaxBackends limit is enforced when a new backend tries to
1559          * join the shared-inval backend array.
1560          */
1561         if (CountChildren() >= 2 * MaxBackends)
1562                 return CAC_TOOMANY;
1563
1564         return CAC_OK;
1565 }
1566
1567
1568 /*
1569  * ConnCreate -- create a local connection data structure
1570  */
1571 static Port *
1572 ConnCreate(int serverFd)
1573 {
1574         Port       *port;
1575
1576         if (!(port = (Port *) calloc(1, sizeof(Port))))
1577         {
1578                 ereport(LOG,
1579                                 (errcode(ERRCODE_OUT_OF_MEMORY),
1580                                  errmsg("out of memory")));
1581                 ExitPostmaster(1);
1582         }
1583
1584         if (StreamConnection(serverFd, port) != STATUS_OK)
1585         {
1586                 StreamClose(port->sock);
1587                 ConnFree(port);
1588                 port = NULL;
1589         }
1590         else
1591         {
1592                 /*
1593                  * Precompute password salt values to use for this connection.
1594                  * It's slightly annoying to do this long in advance of knowing
1595                  * whether we'll need 'em or not, but we must do the random()
1596                  * calls before we fork, not after.  Else the postmaster's random
1597                  * sequence won't get advanced, and all backends would end up
1598                  * using the same salt...
1599                  */
1600                 RandomSalt(port->cryptSalt, port->md5Salt);
1601         }
1602
1603         return port;
1604 }
1605
1606
1607 /*
1608  * ConnFree -- free a local connection data structure
1609  */
1610 static void
1611 ConnFree(Port *conn)
1612 {
1613 #ifdef USE_SSL
1614         secure_close(conn);
1615 #endif
1616         free(conn);
1617 }
1618
1619
1620 /*
1621  * ClosePostmasterPorts -- close all the postmaster's open sockets
1622  *
1623  * This is called during child process startup to release file descriptors
1624  * that are not needed by that child process.  The postmaster still has
1625  * them open, of course.
1626  */
1627 void
1628 ClosePostmasterPorts(void)
1629 {
1630         int                     i;
1631
1632         /* Close the listen sockets */
1633         for (i = 0; i < MAXLISTEN; i++)
1634         {
1635                 if (ListenSocket[i] != -1)
1636                 {
1637                         StreamClose(ListenSocket[i]);
1638                         ListenSocket[i] = -1;
1639                 }
1640         }
1641 }
1642
1643
1644 /*
1645  * reset_shared -- reset shared memory and semaphores
1646  */
1647 static void
1648 reset_shared(unsigned short port)
1649 {
1650         /*
1651          * Create or re-create shared memory and semaphores.
1652          *
1653          * Note: in each "cycle of life" we will normally assign the same IPC
1654          * keys (if using SysV shmem and/or semas), since the port number is
1655          * used to determine IPC keys.  This helps ensure that we will clean
1656          * up dead IPC objects if the postmaster crashes and is restarted.
1657          */
1658         CreateSharedMemoryAndSemaphores(false, MaxBackends, port);
1659 }
1660
1661
1662 /*
1663  * SIGHUP -- reread config files, and tell children to do same
1664  */
1665 static void
1666 SIGHUP_handler(SIGNAL_ARGS)
1667 {
1668         int                     save_errno = errno;
1669
1670         PG_SETMASK(&BlockSig);
1671
1672         if (Shutdown <= SmartShutdown)
1673         {
1674                 ereport(LOG,
1675                          (errmsg("received SIGHUP, reloading configuration files")));
1676                 ProcessConfigFile(PGC_SIGHUP);
1677                 SignalChildren(SIGHUP);
1678                 if (BgWriterPID != 0)
1679                         kill(BgWriterPID, SIGHUP);
1680                 load_hba();
1681                 load_ident();
1682
1683 #ifdef EXEC_BACKEND
1684                 /* Update the starting-point file for future children */
1685                 write_nondefault_variables(PGC_SIGHUP);
1686 #endif
1687         }
1688
1689         PG_SETMASK(&UnBlockSig);
1690
1691         errno = save_errno;
1692 }
1693
1694
1695 /*
1696  * pmdie -- signal handler for processing various postmaster signals.
1697  */
1698 static void
1699 pmdie(SIGNAL_ARGS)
1700 {
1701         int                     save_errno = errno;
1702
1703         PG_SETMASK(&BlockSig);
1704
1705         ereport(DEBUG2,
1706                         (errmsg_internal("postmaster received signal %d",
1707                                                          postgres_signal_arg)));
1708
1709         switch (postgres_signal_arg)
1710         {
1711                 case SIGTERM:
1712                         /*
1713                          * Smart Shutdown:
1714                          *
1715                          * Wait for children to end their work, then shut down.
1716                          */
1717                         if (Shutdown >= SmartShutdown)
1718                                 break;
1719                         Shutdown = SmartShutdown;
1720                         ereport(LOG,
1721                                         (errmsg("received smart shutdown request")));
1722
1723                         if (DLGetHead(BackendList))
1724                                 break;                  /* let reaper() handle this */
1725
1726                         /*
1727                          * No children left. Begin shutdown of data base system.
1728                          */
1729                         if (StartupPID != 0 || FatalError)
1730                                 break;                  /* let reaper() handle this */
1731                         /* Start the bgwriter if not running */
1732                         if (BgWriterPID == 0)
1733                                 BgWriterPID = StartBackgroundWriter();
1734                         /* And tell it to shut down */
1735                         if (BgWriterPID != 0)
1736                                 kill(BgWriterPID, SIGUSR2);
1737                         break;
1738
1739                 case SIGINT:
1740                         /*
1741                          * Fast Shutdown:
1742                          *
1743                          * Abort all children with SIGTERM (rollback active transactions
1744                          * and exit) and shut down when they are gone.
1745                          */
1746                         if (Shutdown >= FastShutdown)
1747                                 break;
1748                         Shutdown = FastShutdown;
1749                         ereport(LOG,
1750                                         (errmsg("received fast shutdown request")));
1751
1752                         if (DLGetHead(BackendList))
1753                         {
1754                                 if (!FatalError)
1755                                 {
1756                                         ereport(LOG,
1757                                                         (errmsg("aborting any active transactions")));
1758                                         SignalChildren(SIGTERM);
1759                                         /* reaper() does the rest */
1760                                 }
1761                                 break;
1762                         }
1763
1764                         /*
1765                          * No children left. Begin shutdown of data base system.
1766                          *
1767                          * Note: if we previously got SIGTERM then we may send SIGUSR2
1768                          * to the bgwriter a second time here.  This should be harmless.
1769                          */
1770                         if (StartupPID != 0 || FatalError)
1771                                 break;                  /* let reaper() handle this */
1772                         /* Start the bgwriter if not running */
1773                         if (BgWriterPID == 0)
1774                                 BgWriterPID = StartBackgroundWriter();
1775                         /* And tell it to shut down */
1776                         if (BgWriterPID != 0)
1777                                 kill(BgWriterPID, SIGUSR2);
1778                         break;
1779
1780                 case SIGQUIT:
1781                         /*
1782                          * Immediate Shutdown:
1783                          *
1784                          * abort all children with SIGQUIT and exit without attempt to
1785                          * properly shut down data base system.
1786                          */
1787                         ereport(LOG,
1788                                         (errmsg("received immediate shutdown request")));
1789                         if (StartupPID != 0)
1790                                 kill(StartupPID, SIGQUIT);
1791                         if (BgWriterPID != 0)
1792                                 kill(BgWriterPID, SIGQUIT);
1793                         if (DLGetHead(BackendList))
1794                                 SignalChildren(SIGQUIT);
1795                         ExitPostmaster(0);
1796                         break;
1797         }
1798
1799         PG_SETMASK(&UnBlockSig);
1800
1801         errno = save_errno;
1802 }
1803
1804 /*
1805  * Reaper -- signal handler to cleanup after a backend (child) dies.
1806  */
1807 static void
1808 reaper(SIGNAL_ARGS)
1809 {
1810         int                     save_errno = errno;
1811
1812 #ifdef HAVE_WAITPID
1813         int                     status;                 /* backend exit status */
1814
1815 #else
1816 #ifndef WIN32
1817         union wait      status;                 /* backend exit status */
1818 #endif
1819 #endif
1820         int                     exitstatus;
1821         int                     pid;                    /* process id of dead backend */
1822
1823         PG_SETMASK(&BlockSig);
1824
1825         ereport(DEBUG4,
1826                         (errmsg_internal("reaping dead processes")));
1827 #ifdef HAVE_WAITPID
1828         while ((pid = waitpid(-1, &status, WNOHANG)) > 0)
1829         {
1830                 exitstatus = status;
1831 #else
1832 #ifndef WIN32
1833         while ((pid = wait3(&status, WNOHANG, NULL)) > 0)
1834         {
1835                 exitstatus = status.w_status;
1836 #else
1837         while ((pid = win32_waitpid(&exitstatus)) > 0)
1838         {
1839                 /*
1840                  * We need to do this here, and not in CleanupProc, since this is
1841                  * to be called on all children when we are done with them. Could
1842                  * move to LogChildExit, but that seems like asking for future
1843                  * trouble...
1844                  */
1845                 win32_RemoveChild(pid);
1846 #endif /* WIN32 */
1847 #endif /* HAVE_WAITPID */
1848
1849                 /*
1850                  * Check if this child was the statistics collector. If so, try to
1851                  * start a new one.  (If fail, we'll try again in future cycles of
1852                  * the main loop.)
1853                  */
1854                 if (pgstat_ispgstat(pid))
1855                 {
1856                         LogChildExit(LOG, gettext("statistics collector process"),
1857                                                  pid, exitstatus);
1858                         pgstat_start();
1859                         continue;
1860                 }
1861
1862                 /*
1863                  * Check if this child was a startup process.
1864                  */
1865                 if (StartupPID != 0 && pid == StartupPID)
1866                 {
1867                         StartupPID = 0;
1868                         if (exitstatus != 0)
1869                         {
1870                                 LogChildExit(LOG, gettext("startup process"),
1871                                                          pid, exitstatus);
1872                                 ereport(LOG,
1873                                                 (errmsg("aborting startup due to startup process failure")));
1874                                 ExitPostmaster(1);
1875                         }
1876
1877                         /*
1878                          * Startup succeeded - we are done with system startup or recovery.
1879                          */
1880                         FatalError = false;
1881
1882                         /*
1883                          * Crank up the background writer.  It doesn't matter if this
1884                          * fails, we'll just try again later.
1885                          */
1886                         Assert(BgWriterPID == 0);
1887                         BgWriterPID = StartBackgroundWriter();
1888
1889                         /*
1890                          * Go to shutdown mode if a shutdown request was pending.
1891                          */
1892                         if (Shutdown > NoShutdown && BgWriterPID != 0)
1893                                 kill(BgWriterPID, SIGUSR2);
1894
1895                         continue;
1896                 }
1897
1898                 /*
1899                  * Was it the bgwriter?
1900                  */
1901                 if (BgWriterPID != 0 && pid == BgWriterPID)
1902                 {
1903                         if (exitstatus == 0 && Shutdown > NoShutdown &&
1904                                 !FatalError && !DLGetHead(BackendList))
1905                         {
1906                                 /*
1907                                  * Normal postmaster exit is here: we've seen normal
1908                                  * exit of the bgwriter after it's been told to shut down.
1909                                  * We expect that it wrote a shutdown checkpoint.  (If
1910                                  * for some reason it didn't, recovery will occur on next
1911                                  * postmaster start.)
1912                                  */
1913                                 ExitPostmaster(0);
1914                         }
1915                         /*
1916                          * Any unexpected exit of the bgwriter is treated as a crash.
1917                          */
1918                         LogChildExit(DEBUG2, gettext("background writer process"),
1919                                                  pid, exitstatus);
1920                         HandleChildCrash(pid, exitstatus);
1921                         continue;
1922                 }
1923
1924                 /*
1925                  * Else do standard backend child cleanup.
1926                  */
1927                 CleanupProc(pid, exitstatus);
1928         }                                                       /* loop over pending child-death reports */
1929
1930         if (FatalError)
1931         {
1932                 /*
1933                  * Wait for all children exit, then reset shmem and
1934                  * StartupDataBase.
1935                  */
1936                 if (DLGetHead(BackendList) || StartupPID != 0 || BgWriterPID != 0)
1937                         goto reaper_done;
1938                 ereport(LOG,
1939                         (errmsg("all server processes terminated; reinitializing")));
1940
1941                 shmem_exit(0);
1942                 reset_shared(PostPortNumber);
1943
1944                 StartupPID = StartupDataBase();
1945
1946                 goto reaper_done;
1947         }
1948
1949         if (Shutdown > NoShutdown)
1950         {
1951                 if (DLGetHead(BackendList) || StartupPID != 0)
1952                         goto reaper_done;
1953                 /* Start the bgwriter if not running */
1954                 if (BgWriterPID == 0)
1955                         BgWriterPID = StartBackgroundWriter();
1956                 /* And tell it to shut down */
1957                 if (BgWriterPID != 0)
1958                         kill(BgWriterPID, SIGUSR2);
1959         }
1960
1961 reaper_done:
1962         PG_SETMASK(&UnBlockSig);
1963
1964         errno = save_errno;
1965 }
1966
1967
1968 /*
1969  * CleanupProc -- cleanup after terminated backend.
1970  *
1971  * Remove all local state associated with backend.
1972  */
1973 static void
1974 CleanupProc(int pid,
1975                         int exitstatus)         /* child's exit status. */
1976 {
1977         Dlelem     *curr;
1978
1979         LogChildExit(DEBUG2, gettext("server process"), pid, exitstatus);
1980
1981         /*
1982          * If a backend dies in an ugly way (i.e. exit status not 0) then we
1983          * must signal all other backends to quickdie.  If exit status is zero
1984          * we assume everything is hunky dory and simply remove the backend
1985          * from the active backend list.
1986          */
1987         if (exitstatus != 0)
1988         {
1989                 HandleChildCrash(pid, exitstatus);
1990                 return;
1991         }
1992
1993         for (curr = DLGetHead(BackendList); curr; curr = DLGetSucc(curr))
1994         {
1995                 Backend    *bp = (Backend *) DLE_VAL(curr);
1996
1997                 if (bp->pid == pid)
1998                 {
1999                         DLRemove(curr);
2000                         free(bp);
2001                         DLFreeElem(curr);
2002 #ifdef EXEC_BACKEND
2003                         ShmemBackendArrayRemove(pid);
2004 #endif
2005                         /* Tell the collector about backend termination */
2006                         pgstat_beterm(pid);
2007                         break;
2008                 }
2009         }
2010 }
2011
2012 /*
2013  * HandleChildCrash -- cleanup after failed backend or bgwriter.
2014  *
2015  * The objectives here are to clean up our local state about the child
2016  * process, and to signal all other remaining children to quickdie.
2017  */
2018 static void
2019 HandleChildCrash(int pid,
2020                                  int exitstatus) /* child's exit status. */
2021 {
2022         Dlelem     *curr,
2023                            *next;
2024         Backend    *bp;
2025
2026         /*
2027          * Make log entry unless there was a previous crash (if so, nonzero
2028          * exit status is to be expected in SIGQUIT response; don't clutter log)
2029          */
2030         if (!FatalError)
2031         {
2032                 LogChildExit(LOG,
2033                                          (pid == BgWriterPID) ?
2034                                          gettext("background writer process") :
2035                                          gettext("server process"),
2036                                          pid, exitstatus);
2037                 ereport(LOG,
2038                                 (errmsg("terminating any other active server processes")));
2039         }
2040
2041         /* Process regular backends */
2042         for (curr = DLGetHead(BackendList); curr; curr = next)
2043         {
2044                 next = DLGetSucc(curr);
2045                 bp = (Backend *) DLE_VAL(curr);
2046                 if (bp->pid == pid)
2047                 {
2048                         /*
2049                          * Found entry for freshly-dead backend, so remove it.
2050                          */
2051                         DLRemove(curr);
2052                         free(bp);
2053                         DLFreeElem(curr);
2054 #ifdef EXEC_BACKEND
2055                         ShmemBackendArrayRemove(pid);
2056 #endif
2057                         /* Tell the collector about backend termination */
2058                         pgstat_beterm(pid);
2059                         /* Keep looping so we can signal remaining backends */
2060                 }
2061                 else
2062                 {
2063                         /*
2064                          * This backend is still alive.  Unless we did so already,
2065                          * tell it to commit hara-kiri.
2066                          *
2067                          * SIGQUIT is the special signal that says exit without proc_exit
2068                          * and let the user know what's going on. But if SendStop is
2069                          * set (-s on command line), then we send SIGSTOP instead, so
2070                          * that we can get core dumps from all backends by hand.
2071                          */
2072                         if (!FatalError)
2073                         {
2074                                 ereport(DEBUG2,
2075                                                 (errmsg_internal("sending %s to process %d",
2076                                                                           (SendStop ? "SIGSTOP" : "SIGQUIT"),
2077                                                                                  (int) bp->pid)));
2078                                 kill(bp->pid, (SendStop ? SIGSTOP : SIGQUIT));
2079                         }
2080                 }
2081         }
2082
2083         /* Take care of the bgwriter too */
2084         if (pid == BgWriterPID)
2085                 BgWriterPID = 0;
2086         else if (BgWriterPID != 0 && !FatalError)
2087         {
2088                 ereport(DEBUG2,
2089                                 (errmsg_internal("sending %s to process %d",
2090                                                                  (SendStop ? "SIGSTOP" : "SIGQUIT"),
2091                                                                  (int) BgWriterPID)));
2092                 kill(BgWriterPID, (SendStop ? SIGSTOP : SIGQUIT));
2093         }
2094
2095         FatalError = true;
2096 }
2097
2098 /*
2099  * Log the death of a child process.
2100  */
2101 static void
2102 LogChildExit(int lev, const char *procname, int pid, int exitstatus)
2103 {
2104         if (WIFEXITED(exitstatus))
2105                 ereport(lev,
2106
2107                 /*
2108                  * translator: %s is a noun phrase describing a child process,
2109                  * such as "server process"
2110                  */
2111                                 (errmsg("%s (PID %d) exited with exit code %d",
2112                                                 procname, pid, WEXITSTATUS(exitstatus))));
2113         else if (WIFSIGNALED(exitstatus))
2114                 ereport(lev,
2115
2116                 /*
2117                  * translator: %s is a noun phrase describing a child process,
2118                  * such as "server process"
2119                  */
2120                                 (errmsg("%s (PID %d) was terminated by signal %d",
2121                                                 procname, pid, WTERMSIG(exitstatus))));
2122         else
2123                 ereport(lev,
2124
2125                 /*
2126                  * translator: %s is a noun phrase describing a child process,
2127                  * such as "server process"
2128                  */
2129                                 (errmsg("%s (PID %d) exited with unexpected status %d",
2130                                                 procname, pid, exitstatus)));
2131 }
2132
2133 /*
2134  * Send a signal to all backend children.
2135  */
2136 static void
2137 SignalChildren(int signal)
2138 {
2139         Dlelem     *curr;
2140
2141         for (curr = DLGetHead(BackendList); curr; curr = DLGetSucc(curr))
2142         {
2143                 Backend    *bp = (Backend *) DLE_VAL(curr);
2144
2145                 ereport(DEBUG4,
2146                                 (errmsg_internal("sending signal %d to process %d",
2147                                                                  signal, (int) bp->pid)));
2148                 kill(bp->pid, signal);
2149         }
2150 }
2151
2152 /*
2153  * BackendStartup -- start backend process
2154  *
2155  * returns: STATUS_ERROR if the fork failed, STATUS_OK otherwise.
2156  */
2157 static int
2158 BackendStartup(Port *port)
2159 {
2160         Backend    *bn;                         /* for backend cleanup */
2161         pid_t           pid;
2162
2163 #ifdef LINUX_PROFILE
2164         struct itimerval prof_itimer;
2165 #endif
2166
2167         /*
2168          * Compute the cancel key that will be assigned to this backend. The
2169          * backend will have its own copy in the forked-off process' value of
2170          * MyCancelKey, so that it can transmit the key to the frontend.
2171          */
2172         MyCancelKey = PostmasterRandom();
2173
2174         /*
2175          * Make room for backend data structure.  Better before the fork() so
2176          * we can handle failure cleanly.
2177          */
2178         bn = (Backend *) malloc(sizeof(Backend));
2179         if (!bn)
2180         {
2181                 ereport(LOG,
2182                                 (errcode(ERRCODE_OUT_OF_MEMORY),
2183                                  errmsg("out of memory")));
2184                 return STATUS_ERROR;
2185         }
2186
2187         /* Pass down canAcceptConnections state (kluge for EXEC_BACKEND case) */
2188         port->canAcceptConnections = canAcceptConnections();
2189
2190         /*
2191          * Flush stdio channels just before fork, to avoid double-output
2192          * problems. Ideally we'd use fflush(NULL) here, but there are still a
2193          * few non-ANSI stdio libraries out there (like SunOS 4.1.x) that
2194          * coredump if we do. Presently stdout and stderr are the only stdio
2195          * output channels used by the postmaster, so fflush'ing them should
2196          * be sufficient.
2197          */
2198         fflush(stdout);
2199         fflush(stderr);
2200
2201 #ifdef EXEC_BACKEND
2202
2203         pid = backend_forkexec(port);
2204
2205 #else /* !EXEC_BACKEND */
2206
2207 #ifdef LINUX_PROFILE
2208
2209         /*
2210          * Linux's fork() resets the profiling timer in the child process. If
2211          * we want to profile child processes then we need to save and restore
2212          * the timer setting.  This is a waste of time if not profiling,
2213          * however, so only do it if commanded by specific -DLINUX_PROFILE
2214          * switch.
2215          */
2216         getitimer(ITIMER_PROF, &prof_itimer);
2217 #endif
2218
2219 #ifdef __BEOS__
2220         /* Specific beos actions before backend startup */
2221         beos_before_backend_startup();
2222 #endif
2223
2224         pid = fork();
2225
2226         if (pid == 0)                           /* child */
2227         {
2228 #ifdef LINUX_PROFILE
2229                 setitimer(ITIMER_PROF, &prof_itimer, NULL);
2230 #endif
2231
2232 #ifdef __BEOS__
2233                 /* Specific beos backend startup actions */
2234                 beos_backend_startup();
2235 #endif
2236                 free(bn);
2237
2238                 proc_exit(BackendRun(port));
2239         }
2240
2241 #endif /* EXEC_BACKEND */
2242
2243         if (pid < 0)
2244         {
2245                 /* in parent, fork failed */
2246                 int                     save_errno = errno;
2247
2248 #ifdef __BEOS__
2249                 /* Specific beos backend startup actions */
2250                 beos_backend_startup_failed();
2251 #endif
2252                 free(bn);
2253                 errno = save_errno;
2254                 ereport(LOG,
2255                           (errmsg("could not fork new process for connection: %m")));
2256                 report_fork_failure_to_client(port, save_errno);
2257                 return STATUS_ERROR;
2258         }
2259
2260         /* in parent, successful fork */
2261         ereport(DEBUG2,
2262                         (errmsg_internal("forked new backend, pid=%d socket=%d",
2263                                                          (int) pid, port->sock)));
2264
2265         /*
2266          * Everything's been successful, it's safe to add this backend to our
2267          * list of backends.
2268          */
2269         bn->pid = pid;
2270         bn->cancel_key = MyCancelKey;
2271         DLAddHead(BackendList, DLNewElem(bn));
2272 #ifdef EXEC_BACKEND
2273         ShmemBackendArrayAdd(bn);
2274 #endif
2275
2276         return STATUS_OK;
2277 }
2278
2279 /*
2280  * Try to report backend fork() failure to client before we close the
2281  * connection.  Since we do not care to risk blocking the postmaster on
2282  * this connection, we set the connection to non-blocking and try only once.
2283  *
2284  * This is grungy special-purpose code; we cannot use backend libpq since
2285  * it's not up and running.
2286  */
2287 static void
2288 report_fork_failure_to_client(Port *port, int errnum)
2289 {
2290         char            buffer[1000];
2291
2292         /* Format the error message packet (always V2 protocol) */
2293         snprintf(buffer, sizeof(buffer), "E%s%s\n",
2294                          gettext("could not fork new process for connection: "),
2295                          strerror(errnum));
2296
2297         /* Set port to non-blocking.  Don't do send() if this fails */
2298         if (!set_noblock(port->sock))
2299                 return;
2300
2301         send(port->sock, buffer, strlen(buffer) + 1, 0);
2302 }
2303
2304
2305 /*
2306  * split_opts -- split a string of options and append it to an argv array
2307  *
2308  * NB: the string is destructively modified!
2309  *
2310  * Since no current POSTGRES arguments require any quoting characters,
2311  * we can use the simple-minded tactic of assuming each set of space-
2312  * delimited characters is a separate argv element.
2313  *
2314  * If you don't like that, well, we *used* to pass the whole option string
2315  * as ONE argument to execl(), which was even less intelligent...
2316  */
2317 static void
2318 split_opts(char **argv, int *argcp, char *s)
2319 {
2320         while (s && *s)
2321         {
2322                 while (isspace((unsigned char) *s))
2323                         ++s;
2324                 if (*s == '\0')
2325                         break;
2326                 argv[(*argcp)++] = s;
2327                 while (*s && !isspace((unsigned char) *s))
2328                         ++s;
2329                 if (*s)
2330                         *s++ = '\0';
2331         }
2332 }
2333
2334
2335 /*
2336  * BackendRun -- perform authentication, and if successful,
2337  *                              set up the backend's argument list and invoke PostgresMain()
2338  *
2339  * returns:
2340  *              Shouldn't return at all.
2341  *              If PostgresMain() fails, return status.
2342  */
2343 static int
2344 BackendRun(Port *port)
2345 {
2346         int                     status;
2347         struct timeval now;
2348         struct timezone tz;
2349         char            remote_host[NI_MAXHOST];
2350         char            remote_port[NI_MAXSERV];
2351         char            remote_ps_data[NI_MAXHOST];
2352         char      **av;
2353         int                     maxac;
2354         int                     ac;
2355         char            debugbuf[32];
2356         char            protobuf[32];
2357         int                     i;
2358
2359         IsUnderPostmaster = true;       /* we are a postmaster subprocess now */
2360
2361         /*
2362          * Let's clean up ourselves as the postmaster child, and close the
2363          * postmaster's listen sockets
2364          */
2365         ClosePostmasterPorts();
2366
2367         /* We don't want the postmaster's proc_exit() handlers */
2368         on_exit_reset();
2369
2370         /*
2371          * Signal handlers setting is moved to tcop/postgres...
2372          */
2373
2374         /* Save port etc. for ps status */
2375         MyProcPort = port;
2376
2377         /* Reset MyProcPid to new backend's pid */
2378         MyProcPid = getpid();
2379
2380         /*
2381          * PreAuthDelay is a debugging aid for investigating problems in the
2382          * authentication cycle: it can be set in postgresql.conf to allow
2383          * time to attach to the newly-forked backend with a debugger. (See
2384          * also the -W backend switch, which we allow clients to pass through
2385          * PGOPTIONS, but it is not honored until after authentication.)
2386          */
2387         if (PreAuthDelay > 0)
2388                 pg_usleep(PreAuthDelay * 1000000L);
2389
2390         ClientAuthInProgress = true;    /* limit visibility of log messages */
2391
2392         /* save start time for end of session reporting */
2393         gettimeofday(&(port->session_start), NULL);
2394
2395         /* set these to empty in case they are needed before we set them up */
2396         port->remote_host = "";
2397         port->remote_port = "";
2398         port->commandTag = "";
2399
2400         /*
2401          * Initialize libpq and enable reporting of ereport errors to the
2402          * client. Must do this now because authentication uses libpq to send
2403          * messages.
2404          */
2405         pq_init();                                      /* initialize libpq to talk to client */
2406         whereToSendOutput = Remote; /* now safe to ereport to client */
2407
2408         /*
2409          * We arrange for a simple exit(0) if we receive SIGTERM or SIGQUIT
2410          * during any client authentication related communication. Otherwise
2411          * the postmaster cannot shutdown the database FAST or IMMED cleanly
2412          * if a buggy client blocks a backend during authentication.
2413          */
2414         pqsignal(SIGTERM, authdie);
2415         pqsignal(SIGQUIT, authdie);
2416         pqsignal(SIGALRM, authdie);
2417         PG_SETMASK(&AuthBlockSig);
2418
2419         /*
2420          * Get the remote host name and port for logging and status display.
2421          */
2422         remote_host[0] = '\0';
2423         remote_port[0] = '\0';
2424         if (getnameinfo_all(&port->raddr.addr, port->raddr.salen,
2425                                                 remote_host, sizeof(remote_host),
2426                                                 remote_port, sizeof(remote_port),
2427                                    (log_hostname ? 0 : NI_NUMERICHOST) | NI_NUMERICSERV))
2428         {
2429                 int                     ret = getnameinfo_all(&port->raddr.addr, port->raddr.salen,
2430                                                                                 remote_host, sizeof(remote_host),
2431                                                                                 remote_port, sizeof(remote_port),
2432                                                                                 NI_NUMERICHOST | NI_NUMERICSERV);
2433
2434                 if (ret)
2435                         ereport(WARNING,
2436                                         (errmsg("getnameinfo_all() failed: %s",
2437                                                         gai_strerror(ret))));
2438         }
2439         snprintf(remote_ps_data, sizeof(remote_ps_data),
2440                          remote_port[0] == '\0' ? "%s" : "%s(%s)",
2441                          remote_host, remote_port);
2442
2443         if (Log_connections)
2444                 ereport(LOG,
2445                                 (errmsg("connection received: host=%s port=%s",
2446                                                 remote_host, remote_port)));
2447
2448         /*
2449          * save remote_host and remote_port in port stucture
2450          */
2451         port->remote_host = strdup(remote_host);
2452         port->remote_port = strdup(remote_port);
2453
2454         /*
2455          * In EXEC_BACKEND case, we didn't inherit the contents of pg_hba.c
2456          * etcetera from the postmaster, and have to load them ourselves.
2457          * Build the PostmasterContext (which didn't exist before, in this
2458          * process) to contain the data.
2459          *
2460          * FIXME: [fork/exec] Ugh.  Is there a way around this overhead?
2461          */
2462 #ifdef EXEC_BACKEND
2463         Assert(PostmasterContext == NULL);
2464         PostmasterContext = AllocSetContextCreate(TopMemoryContext,
2465                                                                                           "Postmaster",
2466                                                                                           ALLOCSET_DEFAULT_MINSIZE,
2467                                                                                           ALLOCSET_DEFAULT_INITSIZE,
2468                                                                                           ALLOCSET_DEFAULT_MAXSIZE);
2469         MemoryContextSwitchTo(PostmasterContext);
2470
2471         load_hba();
2472         load_ident();
2473         load_user();
2474         load_group();
2475 #endif
2476
2477         /*
2478          * Ready to begin client interaction.  We will give up and exit(0)
2479          * after a time delay, so that a broken client can't hog a connection
2480          * indefinitely.  PreAuthDelay doesn't count against the time limit.
2481          */
2482         if (!enable_sig_alarm(AuthenticationTimeout * 1000, false))
2483                 elog(FATAL, "could not set timer for authorization timeout");
2484
2485         /*
2486          * Receive the startup packet (which might turn out to be a cancel
2487          * request packet).
2488          */
2489         status = ProcessStartupPacket(port, false);
2490
2491         if (status != STATUS_OK)
2492                 proc_exit(0);
2493
2494         /*
2495          * Now that we have the user and database name, we can set the process
2496          * title for ps.  It's good to do this as early as possible in
2497          * startup.
2498          */
2499         init_ps_display(port->user_name, port->database_name, remote_ps_data);
2500         set_ps_display("authentication");
2501
2502         /*
2503          * Now perform authentication exchange.
2504          */
2505         ClientAuthentication(port); /* might not return, if failure */
2506
2507         /*
2508          * Done with authentication.  Disable timeout, and prevent
2509          * SIGTERM/SIGQUIT again until backend startup is complete.
2510          */
2511         if (!disable_sig_alarm(false))
2512                 elog(FATAL, "could not disable timer for authorization timeout");
2513         PG_SETMASK(&BlockSig);
2514
2515         if (Log_connections)
2516                 ereport(LOG,
2517                                 (errmsg("connection authorized: user=%s database=%s",
2518                                                 port->user_name, port->database_name)));
2519
2520         /*
2521          * Don't want backend to be able to see the postmaster random number
2522          * generator state.  We have to clobber the static random_seed *and*
2523          * start a new random sequence in the random() library function.
2524          */
2525         random_seed = 0;
2526         gettimeofday(&now, &tz);
2527         srandom((unsigned int) now.tv_usec);
2528
2529
2530         /* ----------------
2531          * Now, build the argv vector that will be given to PostgresMain.
2532          *
2533          * The layout of the command line is
2534          *              postgres [secure switches] -p databasename [insecure switches]
2535          * where the switches after -p come from the client request.
2536          *
2537          * The maximum possible number of commandline arguments that could come
2538          * from ExtraOptions or port->cmdline_options is (strlen + 1) / 2; see
2539          * split_opts().
2540          * ----------------
2541          */
2542         maxac = 10;                                     /* for fixed args supplied below */
2543         maxac += (strlen(ExtraOptions) + 1) / 2;
2544         if (port->cmdline_options)
2545                 maxac += (strlen(port->cmdline_options) + 1) / 2;
2546
2547         av = (char **) MemoryContextAlloc(TopMemoryContext,
2548                                                                           maxac * sizeof(char *));
2549         ac = 0;
2550
2551         av[ac++] = "postgres";
2552
2553         /*
2554          * Pass the requested debugging level along to the backend.
2555          */
2556         if (debug_flag > 0)
2557         {
2558                 snprintf(debugbuf, sizeof(debugbuf), "-d%d", debug_flag);
2559                 av[ac++] = debugbuf;
2560         }
2561
2562         /*
2563          * Pass any backend switches specified with -o in the postmaster's own
2564          * command line.  We assume these are secure.  (It's OK to mangle
2565          * ExtraOptions now, since we're safely inside a subprocess.)
2566          */
2567         split_opts(av, &ac, ExtraOptions);
2568
2569         /* Tell the backend what protocol the frontend is using. */
2570         snprintf(protobuf, sizeof(protobuf), "-v%u", port->proto);
2571         av[ac++] = protobuf;
2572
2573         /*
2574          * Tell the backend it is being called from the postmaster, and which
2575          * database to use.  -p marks the end of secure switches.
2576          */
2577         av[ac++] = "-p";
2578         av[ac++] = port->database_name;
2579
2580         /*
2581          * Pass the (insecure) option switches from the connection request.
2582          * (It's OK to mangle port->cmdline_options now.)
2583          */
2584         if (port->cmdline_options)
2585                 split_opts(av, &ac, port->cmdline_options);
2586
2587         av[ac] = NULL;
2588
2589         Assert(ac < maxac);
2590
2591         /*
2592          * Release postmaster's working memory context so that backend can
2593          * recycle the space.  Note this does not trash *MyProcPort, because
2594          * ConnCreate() allocated that space with malloc() ... else we'd need
2595          * to copy the Port data here.  Also, subsidiary data such as the
2596          * username isn't lost either; see ProcessStartupPacket().
2597          */
2598         MemoryContextSwitchTo(TopMemoryContext);
2599         MemoryContextDelete(PostmasterContext);
2600         PostmasterContext = NULL;
2601
2602         /*
2603          * Debug: print arguments being passed to backend
2604          */
2605         ereport(DEBUG3,
2606                         (errmsg_internal("%s child[%d]: starting with (",
2607                                                          progname, getpid())));
2608         for (i = 0; i < ac; ++i)
2609                 ereport(DEBUG3,
2610                                 (errmsg_internal("\t%s", av[i])));
2611         ereport(DEBUG3,
2612                         (errmsg_internal(")")));
2613
2614         ClientAuthInProgress = false;           /* client_min_messages is active
2615                                                                                  * now */
2616
2617         return (PostgresMain(ac, av, port->user_name));
2618 }
2619
2620
2621 #ifdef EXEC_BACKEND
2622
2623 /*
2624  * postmaster_forkexec -- fork and exec a postmaster subprocess
2625  *
2626  * The caller must have set up the argv array already, except for argv[2]
2627  * which will be filled with the name of the temp variable file.
2628  *
2629  * Returns the child process PID, or -1 on fork failure (a suitable error
2630  * message has been logged on failure).
2631  *
2632  * All uses of this routine will dispatch to SubPostmasterMain in the
2633  * child process.
2634  */
2635 pid_t
2636 postmaster_forkexec(int argc, char *argv[])
2637 {
2638         Port            port;
2639
2640         /* This entry point passes dummy values for the Port variables */
2641         memset(&port, 0, sizeof(port));
2642         return internal_forkexec(argc, argv, &port);
2643 }
2644
2645 /*
2646  * backend_forkexec -- fork/exec off a backend process
2647  *
2648  * returns the pid of the fork/exec'd process, or -1 on failure
2649  */
2650 static pid_t
2651 backend_forkexec(Port *port)
2652 {
2653         char       *av[4];
2654         int                     ac = 0;
2655
2656         av[ac++] = "postgres";
2657         av[ac++] = "-forkbackend";
2658         av[ac++] = NULL;                        /* filled in by internal_forkexec */
2659
2660         av[ac] = NULL;
2661         Assert(ac < lengthof(av));
2662
2663         return internal_forkexec(ac, av, port);
2664 }
2665
2666 static pid_t
2667 internal_forkexec(int argc, char *argv[], Port *port)
2668 {
2669         pid_t           pid;
2670         char            tmpfilename[MAXPGPATH];
2671
2672         if (!write_backend_variables(tmpfilename, port))
2673                 return -1;                              /* log made by write_backend_variables */
2674
2675         /* Make sure caller set up argv properly */
2676         Assert(argc >= 3);
2677         Assert(argv[argc] == NULL);
2678         Assert(strncmp(argv[1], "-fork", 5) == 0);
2679         Assert(argv[2] == NULL);
2680
2681         /* Insert temp file name after -fork argument */
2682         argv[2] = tmpfilename;
2683
2684 #ifdef WIN32
2685         pid = win32_forkexec(postgres_exec_path, argv);
2686 #else
2687         /* Fire off execv in child */
2688         if ((pid = fork()) == 0)
2689         {
2690                 if (execv(postgres_exec_path, argv) < 0)
2691                 {
2692                         ereport(LOG,
2693                                         (errmsg("could not exec backend process \"%s\": %m",
2694                                                         postgres_exec_path)));
2695                         /* We're already in the child process here, can't return */
2696                         exit(1);
2697                 }
2698         }
2699 #endif
2700
2701         return pid;                                     /* Parent returns pid, or -1 on fork failure */
2702 }
2703
2704 /*
2705  * SubPostmasterMain -- Get the fork/exec'd process into a state equivalent
2706  *                      to what it would be if we'd simply forked on Unix, and then
2707  *                      dispatch to the appropriate place.
2708  *
2709  * The first two command line arguments are expected to be "-forkFOO"
2710  * (where FOO indicates which postmaster child we are to become), and
2711  * the name of a variables file that we can read to load data that would
2712  * have been inherited by fork() on Unix.  Remaining arguments go to the
2713  * subprocess FooMain() routine.
2714  */
2715 int
2716 SubPostmasterMain(int argc, char *argv[])
2717 {
2718         Port            port;
2719
2720         /* Do this sooner rather than later... */
2721         IsUnderPostmaster = true;       /* we are a postmaster subprocess now */
2722
2723         MyProcPid = getpid();           /* reset MyProcPid */
2724
2725         /* In EXEC_BACKEND case we will not have inherited these settings */
2726         IsPostmasterEnvironment = true;
2727         whereToSendOutput = None;
2728         pqinitmask();
2729         PG_SETMASK(&BlockSig);
2730
2731         /* Setup essential subsystems */
2732         MemoryContextInit();
2733         InitializeGUCOptions();
2734
2735         /* Check we got appropriate args */
2736         if (argc < 3)
2737                 elog(FATAL, "invalid subpostmaster invocation");
2738
2739         /* Read in file-based context */
2740         memset(&port, 0, sizeof(Port));
2741         read_backend_variables(argv[2], &port);
2742         read_nondefault_variables();
2743
2744         /* Run backend or appropriate child */
2745         if (strcmp(argv[1], "-forkbackend") == 0)
2746         {
2747                 /* BackendRun will close sockets */
2748
2749                 /* Attach process to shared segments */
2750                 CreateSharedMemoryAndSemaphores(false, MaxBackends, 0);
2751
2752                 Assert(argc == 3);              /* shouldn't be any more args */
2753                 proc_exit(BackendRun(&port));
2754         }
2755         if (strcmp(argv[1], "-forkboot") == 0)
2756         {
2757                 /* Close the postmaster's sockets */
2758                 ClosePostmasterPorts();
2759
2760                 /* Attach process to shared segments */
2761                 CreateSharedMemoryAndSemaphores(false, MaxBackends, 0);
2762
2763                 BootstrapMain(argc - 2, argv + 2);
2764                 proc_exit(0);
2765         }
2766         if (strcmp(argv[1], "-forkbuf") == 0)
2767         {
2768                 /* Close the postmaster's sockets */
2769                 ClosePostmasterPorts();
2770
2771                 /* Do not want to attach to shared memory */
2772
2773                 PgstatBufferMain(argc, argv);
2774                 proc_exit(0);
2775         }
2776         if (strcmp(argv[1], "-forkcol") == 0)
2777         {
2778                 /*
2779                  * Do NOT close postmaster sockets here, because we are forking from
2780                  * pgstat buffer process, which already did it.
2781                  */
2782
2783                 /* Do not want to attach to shared memory */
2784
2785                 PgstatCollectorMain(argc, argv);
2786                 proc_exit(0);
2787         }
2788
2789         return 1;                                       /* shouldn't get here */
2790 }
2791
2792 #endif /* EXEC_BACKEND */
2793
2794
2795 /*
2796  * ExitPostmaster -- cleanup
2797  *
2798  * Do NOT call exit() directly --- always go through here!
2799  */
2800 static void
2801 ExitPostmaster(int status)
2802 {
2803         /* should cleanup shared memory and kill all backends */
2804
2805         /*
2806          * Not sure of the semantics here.      When the Postmaster dies, should
2807          * the backends all be killed? probably not.
2808          *
2809          * MUST         -- vadim 05-10-1999
2810          */
2811
2812         proc_exit(status);
2813 }
2814
2815 /*
2816  * sigusr1_handler - handle signal conditions from child processes
2817  */
2818 static void
2819 sigusr1_handler(SIGNAL_ARGS)
2820 {
2821         int                     save_errno = errno;
2822
2823         PG_SETMASK(&BlockSig);
2824
2825         if (CheckPostmasterSignal(PMSIGNAL_PASSWORD_CHANGE))
2826         {
2827                 /*
2828                  * Password or group file has changed.
2829                  */
2830                 load_user();
2831                 load_group();
2832         }
2833
2834         if (CheckPostmasterSignal(PMSIGNAL_WAKEN_CHILDREN))
2835         {
2836                 /*
2837                  * Send SIGUSR1 to all children (triggers
2838                  * CatchupInterruptHandler). See storage/ipc/sinval[adt].c for the
2839                  * use of this.
2840                  */
2841                 if (Shutdown <= SmartShutdown)
2842                         SignalChildren(SIGUSR1);
2843         }
2844
2845         PG_SETMASK(&UnBlockSig);
2846
2847         errno = save_errno;
2848 }
2849
2850
2851 /*
2852  * Dummy signal handler
2853  *
2854  * We use this for signals that we don't actually use in the postmaster,
2855  * but we do use in backends.  If we were to SIG_IGN such signals in the
2856  * postmaster, then a newly started backend might drop a signal that arrives
2857  * before it's able to reconfigure its signal processing.  (See notes in
2858  * tcop/postgres.c.)
2859  */
2860 static void
2861 dummy_handler(SIGNAL_ARGS)
2862 {
2863 }
2864
2865
2866 /*
2867  * CharRemap: given an int in range 0..61, produce textual encoding of it
2868  * per crypt(3) conventions.
2869  */
2870 static char
2871 CharRemap(long ch)
2872 {
2873         if (ch < 0)
2874                 ch = -ch;
2875         ch = ch % 62;
2876
2877         if (ch < 26)
2878                 return 'A' + ch;
2879
2880         ch -= 26;
2881         if (ch < 26)
2882                 return 'a' + ch;
2883
2884         ch -= 26;
2885         return '0' + ch;
2886 }
2887
2888 /*
2889  * RandomSalt
2890  */
2891 static void
2892 RandomSalt(char *cryptSalt, char *md5Salt)
2893 {
2894         long            rand = PostmasterRandom();
2895
2896         cryptSalt[0] = CharRemap(rand % 62);
2897         cryptSalt[1] = CharRemap(rand / 62);
2898
2899         /*
2900          * It's okay to reuse the first random value for one of the MD5 salt
2901          * bytes, since only one of the two salts will be sent to the client.
2902          * After that we need to compute more random bits.
2903          *
2904          * We use % 255, sacrificing one possible byte value, so as to ensure
2905          * that all bits of the random() value participate in the result.
2906          * While at it, add one to avoid generating any null bytes.
2907          */
2908         md5Salt[0] = (rand % 255) + 1;
2909         rand = PostmasterRandom();
2910         md5Salt[1] = (rand % 255) + 1;
2911         rand = PostmasterRandom();
2912         md5Salt[2] = (rand % 255) + 1;
2913         rand = PostmasterRandom();
2914         md5Salt[3] = (rand % 255) + 1;
2915 }
2916
2917 /*
2918  * PostmasterRandom
2919  */
2920 static long
2921 PostmasterRandom(void)
2922 {
2923         static bool initialized = false;
2924
2925         if (!initialized)
2926         {
2927                 Assert(random_seed != 0);
2928                 srandom(random_seed);
2929                 initialized = true;
2930         }
2931
2932         return random();
2933 }
2934
2935 /*
2936  * Count up number of child processes.
2937  */
2938 static int
2939 CountChildren(void)
2940 {
2941         Dlelem     *curr;
2942         int                     cnt = 0;
2943
2944         for (curr = DLGetHead(BackendList); curr; curr = DLGetSucc(curr))
2945         {
2946                 cnt++;
2947         }
2948         return cnt;
2949 }
2950
2951
2952 /*
2953  * StartChildProcess -- start a non-backend child process for the postmaster
2954  *
2955  * xlog determines what kind of child will be started.  All child types
2956  * initially go to BootstrapMain, which will handle common setup.
2957  *
2958  * Return value of StartChildProcess is subprocess' PID, or 0 if failed
2959  * to start subprocess.
2960  */
2961 static pid_t
2962 StartChildProcess(int xlop)
2963 {
2964         pid_t           pid;
2965         char       *av[10];
2966         int                     ac = 0;
2967         char            xlbuf[32];
2968 #ifdef LINUX_PROFILE
2969         struct itimerval prof_itimer;
2970 #endif
2971
2972         /*
2973          * Set up command-line arguments for subprocess
2974          */
2975         av[ac++] = "postgres";
2976
2977 #ifdef EXEC_BACKEND
2978         av[ac++] = "-forkboot";
2979         av[ac++] = NULL;                        /* filled in by postmaster_forkexec */
2980 #endif
2981
2982         snprintf(xlbuf, sizeof(xlbuf), "-x%d", xlop);
2983         av[ac++] = xlbuf;
2984
2985         av[ac++] = "-p";
2986         av[ac++] = "template1";
2987
2988         av[ac] = NULL;
2989         Assert(ac < lengthof(av));
2990
2991         /*
2992          * Flush stdio channels (see comments in BackendStartup)
2993          */
2994         fflush(stdout);
2995         fflush(stderr);
2996
2997 #ifdef EXEC_BACKEND
2998
2999         pid = postmaster_forkexec(ac, av);
3000
3001 #else /* !EXEC_BACKEND */
3002
3003 #ifdef LINUX_PROFILE
3004         /* see comments in BackendStartup */
3005         getitimer(ITIMER_PROF, &prof_itimer);
3006 #endif
3007
3008 #ifdef __BEOS__
3009         /* Specific beos actions before backend startup */
3010         beos_before_backend_startup();
3011 #endif
3012
3013         pid = fork();
3014
3015         if (pid == 0)                           /* child */
3016         {
3017 #ifdef LINUX_PROFILE
3018                 setitimer(ITIMER_PROF, &prof_itimer, NULL);
3019 #endif
3020
3021 #ifdef __BEOS__
3022                 /* Specific beos actions after backend startup */
3023                 beos_backend_startup();
3024 #endif
3025
3026                 IsUnderPostmaster = true;       /* we are a postmaster subprocess now */
3027
3028                 /* Close the postmaster's sockets */
3029                 ClosePostmasterPorts();
3030
3031                 /* Lose the postmaster's on-exit routines and port connections */
3032                 on_exit_reset();
3033
3034                 BootstrapMain(ac, av);
3035                 ExitPostmaster(0);
3036         }
3037
3038 #endif /* EXEC_BACKEND */
3039
3040         if (pid < 0)
3041         {
3042                 /* in parent, fork failed */
3043                 int                     save_errno = errno;
3044
3045 #ifdef __BEOS__
3046                 /* Specific beos actions before backend startup */
3047                 beos_backend_startup_failed();
3048 #endif
3049                 errno = save_errno;
3050                 switch (xlop)
3051                 {
3052                         case BS_XLOG_STARTUP:
3053                                 ereport(LOG,
3054                                                 (errmsg("could not fork startup process: %m")));
3055                                 break;
3056                         case BS_XLOG_BGWRITER:
3057                                 ereport(LOG,
3058                                                 (errmsg("could not fork background writer process: %m")));
3059                                 break;
3060                         default:
3061                                 ereport(LOG,
3062                                                 (errmsg("could not fork process: %m")));
3063                                 break;
3064                 }
3065
3066                 /*
3067                  * fork failure is fatal during startup, but there's no need
3068                  * to choke immediately if starting other child types fails.
3069                  */
3070                 if (xlop == BS_XLOG_STARTUP)
3071                         ExitPostmaster(1);
3072                 return 0;
3073         }
3074
3075         /*
3076          * in parent, successful fork
3077          */
3078         return pid;
3079 }
3080
3081
3082 /*
3083  * Create the opts file
3084  */
3085 static bool
3086 CreateOptsFile(int argc, char *argv[], char *fullprogname)
3087 {
3088         char            filename[MAXPGPATH];
3089         FILE       *fp;
3090         int                     i;
3091
3092         snprintf(filename, sizeof(filename), "%s/postmaster.opts", DataDir);
3093
3094         if ((fp = fopen(filename, "w")) == NULL)
3095         {
3096                 elog(LOG, "could not create file \"%s\": %m", filename);
3097                 return false;
3098         }
3099
3100         fprintf(fp, "%s", fullprogname);
3101         for (i = 1; i < argc; i++)
3102                 fprintf(fp, " '%s'", argv[i]);
3103         fputs("\n", fp);
3104
3105         if (fclose(fp))
3106         {
3107                 elog(LOG, "could not write file \"%s\": %m", filename);
3108                 return false;
3109         }
3110
3111         return true;
3112 }
3113
3114 /*
3115  * This should be used only for reporting "interactive" errors (essentially,
3116  * bogus arguments on the command line).  Once the postmaster is launched,
3117  * use ereport.  In particular, don't use this for anything that occurs
3118  * after pmdaemonize.
3119  */
3120 static void
3121 postmaster_error(const char *fmt,...)
3122 {
3123         va_list         ap;
3124
3125         fprintf(stderr, "%s: ", progname);
3126         va_start(ap, fmt);
3127         vfprintf(stderr, gettext(fmt), ap);
3128         va_end(ap);
3129         fprintf(stderr, "\n");
3130 }
3131
3132
3133 #ifdef EXEC_BACKEND
3134
3135 /*
3136  * The following need to be available to the read/write_backend_variables
3137  * functions
3138  */
3139 #include "storage/spin.h"
3140
3141 extern slock_t *ShmemLock;
3142 extern slock_t *ShmemIndexLock;
3143 extern void *ShmemIndexAlloc;
3144 typedef struct LWLock LWLock;
3145 extern LWLock *LWLockArray;
3146 extern slock_t *ProcStructLock;
3147 extern int      pgStatSock;
3148
3149 #define write_var(var,fp) fwrite((void*)&(var),sizeof(var),1,fp)
3150 #define read_var(var,fp)  fread((void*)&(var),sizeof(var),1,fp)
3151 #define write_array_var(var,fp) fwrite((void*)(var),sizeof(var),1,fp)
3152 #define read_array_var(var,fp)  fread((void*)(var),sizeof(var),1,fp)
3153
3154 static bool
3155 write_backend_variables(char *filename, Port *port)
3156 {
3157         static unsigned long tmpBackendFileNum = 0;
3158         FILE       *fp;
3159         char            str_buf[MAXPGPATH];
3160
3161         /* Calculate name for temp file in caller's buffer */
3162         Assert(DataDir);
3163         snprintf(filename, MAXPGPATH, "%s/%s/%s.backend_var.%d.%lu",
3164                          DataDir, PG_TEMP_FILES_DIR, PG_TEMP_FILE_PREFIX,
3165                          MyProcPid, ++tmpBackendFileNum);
3166
3167         /* Open file */
3168         fp = AllocateFile(filename, PG_BINARY_W);
3169         if (!fp)
3170         {
3171                 /* As per OpenTemporaryFile... */
3172                 char            dirname[MAXPGPATH];
3173
3174                 snprintf(dirname, MAXPGPATH, "%s/%s", DataDir, PG_TEMP_FILES_DIR);
3175                 mkdir(dirname, S_IRWXU);
3176
3177                 fp = AllocateFile(filename, PG_BINARY_W);
3178                 if (!fp)
3179                 {
3180                         ereport(LOG,
3181                                         (errcode_for_file_access(),
3182                                          errmsg("could not create file \"%s\": %m",
3183                                                         filename)));
3184                         return false;
3185                 }
3186         }
3187
3188         /* Write vars */
3189         write_var(port->sock, fp);
3190         write_var(port->proto, fp);
3191         write_var(port->laddr, fp);
3192         write_var(port->raddr, fp);
3193         write_var(port->canAcceptConnections, fp);
3194         write_var(port->cryptSalt, fp);
3195         write_var(port->md5Salt, fp);
3196
3197         /*
3198          * XXX FIXME later: writing these strings as MAXPGPATH bytes always is
3199          * probably a waste of resources
3200          */
3201
3202         StrNCpy(str_buf, DataDir, MAXPGPATH);
3203         write_array_var(str_buf, fp);
3204
3205         write_array_var(ListenSocket, fp);
3206
3207         write_var(MyCancelKey, fp);
3208
3209         write_var(UsedShmemSegID, fp);
3210         write_var(UsedShmemSegAddr, fp);
3211
3212         write_var(ShmemLock, fp);
3213         write_var(ShmemIndexLock, fp);
3214         write_var(ShmemVariableCache, fp);
3215         write_var(ShmemIndexAlloc, fp);
3216         write_var(ShmemBackendArray, fp);
3217
3218         write_var(LWLockArray, fp);
3219         write_var(ProcStructLock, fp);
3220         write_var(pgStatSock, fp);
3221
3222         write_var(debug_flag, fp);
3223         write_var(PostmasterPid, fp);
3224
3225         StrNCpy(str_buf, my_exec_path, MAXPGPATH);
3226         write_array_var(str_buf, fp);
3227
3228         write_array_var(ExtraOptions, fp);
3229
3230         StrNCpy(str_buf, setlocale(LC_COLLATE, NULL), MAXPGPATH);
3231         write_array_var(str_buf, fp);
3232         StrNCpy(str_buf, setlocale(LC_CTYPE, NULL), MAXPGPATH);
3233         write_array_var(str_buf, fp);
3234
3235         /* Release file */
3236         if (FreeFile(fp))
3237         {
3238                 ereport(ERROR,
3239                                 (errcode_for_file_access(),
3240                                  errmsg("could not write to file \"%s\": %m", filename)));
3241                 return false;
3242         }
3243
3244         return true;
3245 }
3246
3247 static void
3248 read_backend_variables(char *filename, Port *port)
3249 {
3250         FILE       *fp;
3251         char            str_buf[MAXPGPATH];
3252
3253         /* Open file */
3254         fp = AllocateFile(filename, PG_BINARY_R);
3255         if (!fp)
3256                 ereport(FATAL,
3257                                 (errcode_for_file_access(),
3258                                  errmsg("could not read from backend variables file \"%s\": %m",
3259                                                 filename)));
3260
3261         /* Read vars */
3262         read_var(port->sock, fp);
3263         read_var(port->proto, fp);
3264         read_var(port->laddr, fp);
3265         read_var(port->raddr, fp);
3266         read_var(port->canAcceptConnections, fp);
3267         read_var(port->cryptSalt, fp);
3268         read_var(port->md5Salt, fp);
3269
3270         read_array_var(str_buf, fp);
3271         SetDataDir(str_buf);
3272
3273         read_array_var(ListenSocket, fp);
3274
3275         read_var(MyCancelKey, fp);
3276
3277         read_var(UsedShmemSegID, fp);
3278         read_var(UsedShmemSegAddr, fp);
3279
3280         read_var(ShmemLock, fp);
3281         read_var(ShmemIndexLock, fp);
3282         read_var(ShmemVariableCache, fp);
3283         read_var(ShmemIndexAlloc, fp);
3284         read_var(ShmemBackendArray, fp);
3285
3286         read_var(LWLockArray, fp);
3287         read_var(ProcStructLock, fp);
3288         read_var(pgStatSock, fp);
3289
3290         read_var(debug_flag, fp);
3291         read_var(PostmasterPid, fp);
3292
3293         read_array_var(str_buf, fp);
3294         StrNCpy(my_exec_path, str_buf, MAXPGPATH);
3295
3296         read_array_var(ExtraOptions, fp);
3297
3298         read_array_var(str_buf, fp);
3299         setlocale(LC_COLLATE, str_buf);
3300         read_array_var(str_buf, fp);
3301         setlocale(LC_CTYPE, str_buf);
3302
3303         /* Release file */
3304         FreeFile(fp);
3305         if (unlink(filename) != 0)
3306                 ereport(WARNING,
3307                                 (errcode_for_file_access(),
3308                                  errmsg("could not remove file \"%s\": %m", filename)));
3309 }
3310
3311
3312 size_t
3313 ShmemBackendArraySize(void)
3314 {
3315         return (NUM_BACKENDARRAY_ELEMS * sizeof(Backend));
3316 }
3317
3318 void
3319 ShmemBackendArrayAllocation(void)
3320 {
3321         size_t          size = ShmemBackendArraySize();
3322
3323         ShmemBackendArray = (Backend *) ShmemAlloc(size);
3324         /* Mark all slots as empty */
3325         memset(ShmemBackendArray, 0, size);
3326 }
3327
3328 static void
3329 ShmemBackendArrayAdd(Backend *bn)
3330 {
3331         int                     i;
3332
3333         /* Find an empty slot */
3334         for (i = 0; i < NUM_BACKENDARRAY_ELEMS; i++)
3335         {
3336                 if (ShmemBackendArray[i].pid == 0)
3337                 {
3338                         ShmemBackendArray[i] = *bn;
3339                         return;
3340                 }
3341         }
3342
3343         ereport(FATAL,
3344                         (errmsg_internal("no free slots in shmem backend array")));
3345 }
3346
3347 static void
3348 ShmemBackendArrayRemove(pid_t pid)
3349 {
3350         int                     i;
3351
3352         for (i = 0; i < NUM_BACKENDARRAY_ELEMS; i++)
3353         {
3354                 if (ShmemBackendArray[i].pid == pid)
3355                 {
3356                         /* Mark the slot as empty */
3357                         ShmemBackendArray[i].pid = 0;
3358                         return;
3359                 }
3360         }
3361
3362         ereport(WARNING,
3363                         (errmsg_internal("unable to find backend entry with pid %d",
3364                                                          (int) pid)));
3365 }
3366
3367 #endif /* EXEC_BACKEND */
3368
3369
3370 #ifdef WIN32
3371
3372 static pid_t
3373 win32_forkexec(const char *path, char *argv[])
3374 {
3375         STARTUPINFO si;
3376         PROCESS_INFORMATION pi;
3377         int                     i;
3378         int                     j;
3379         char            cmdLine[MAXPGPATH * 2];
3380         HANDLE          childHandleCopy;
3381         HANDLE          waiterThread;
3382
3383         /* Format the cmd line */
3384         cmdLine[sizeof(cmdLine)-1] = '\0';
3385         cmdLine[sizeof(cmdLine)-2] = '\0';
3386         snprintf(cmdLine, sizeof(cmdLine)-1, "\"%s\"", path);
3387         i = 0;
3388         while (argv[++i] != NULL)
3389         {
3390                 j = strlen(cmdLine);
3391                 snprintf(cmdLine+j, sizeof(cmdLine)-1-j, " \"%s\"", argv[i]);
3392         }
3393         if (cmdLine[sizeof(cmdLine)-2] != '\0')
3394         {
3395                 elog(LOG, "subprocess command line too long");
3396                 return -1;
3397         }
3398
3399         memset(&pi, 0, sizeof(pi));
3400         memset(&si, 0, sizeof(si));
3401         si.cb = sizeof(si);
3402         if (!CreateProcess(NULL, cmdLine, NULL, NULL, TRUE, 0, NULL, NULL, &si, &pi))
3403         {
3404                 elog(LOG, "CreateProcess call failed (%d): %m", (int) GetLastError());
3405                 return -1;
3406         }
3407
3408         if (!IsUnderPostmaster)
3409         {
3410                 /* We are the Postmaster creating a child... */
3411                 win32_AddChild(pi.dwProcessId, pi.hProcess);
3412         }
3413
3414         if (!DuplicateHandle(GetCurrentProcess(),
3415                                                  pi.hProcess,
3416                                                  GetCurrentProcess(),
3417                                                  &childHandleCopy,
3418                                                  0,
3419                                                  FALSE,
3420                                                  DUPLICATE_SAME_ACCESS))
3421                 ereport(FATAL,
3422                                 (errmsg_internal("failed to duplicate child handle: %d",
3423                                                                  (int) GetLastError())));
3424
3425         waiterThread = CreateThread(NULL, 64 * 1024, win32_sigchld_waiter,
3426                                                                 (LPVOID) childHandleCopy, 0, NULL);
3427         if (!waiterThread)
3428                 ereport(FATAL,
3429                                 (errmsg_internal("failed to create sigchld waiter thread: %d",
3430                                                                  (int) GetLastError())));
3431         CloseHandle(waiterThread);
3432
3433         if (IsUnderPostmaster)
3434                 CloseHandle(pi.hProcess);
3435         CloseHandle(pi.hThread);
3436
3437         return pi.dwProcessId;
3438 }
3439
3440 /*
3441  * Note: The following three functions must not be interrupted (eg. by
3442  * signals).  As the Postgres Win32 signalling architecture (currently)
3443  * requires polling, or APC checking functions which aren't used here, this
3444  * is not an issue.
3445  *
3446  * We keep two separate arrays, instead of a single array of pid/HANDLE
3447  * structs, to avoid having to re-create a handle array for
3448  * WaitForMultipleObjects on each call to win32_waitpid.
3449  */
3450
3451 static void
3452 win32_AddChild(pid_t pid, HANDLE handle)
3453 {
3454         Assert(win32_childPIDArray && win32_childHNDArray);
3455         if (win32_numChildren < NUM_BACKENDARRAY_ELEMS)
3456         {
3457                 win32_childPIDArray[win32_numChildren] = pid;
3458                 win32_childHNDArray[win32_numChildren] = handle;
3459                 ++win32_numChildren;
3460         }
3461         else
3462                 ereport(FATAL,
3463                                 (errmsg_internal("unable to add child entry with pid %lu",
3464                                                                  (unsigned long) pid)));
3465 }
3466
3467 static void
3468 win32_RemoveChild(pid_t pid)
3469 {
3470         int                     i;
3471
3472         Assert(win32_childPIDArray && win32_childHNDArray);
3473
3474         for (i = 0; i < win32_numChildren; i++)
3475         {
3476                 if (win32_childPIDArray[i] == pid)
3477                 {
3478                         CloseHandle(win32_childHNDArray[i]);
3479
3480                         /* Swap last entry into the "removed" one */
3481                         --win32_numChildren;
3482                         win32_childPIDArray[i] = win32_childPIDArray[win32_numChildren];
3483                         win32_childHNDArray[i] = win32_childHNDArray[win32_numChildren];
3484                         return;
3485                 }
3486         }
3487
3488         ereport(WARNING,
3489                         (errmsg_internal("unable to find child entry with pid %lu",
3490                                                          (unsigned long) pid)));
3491 }
3492
3493 static pid_t
3494 win32_waitpid(int *exitstatus)
3495 {
3496         Assert(win32_childPIDArray && win32_childHNDArray);
3497         elog(DEBUG3, "waiting on %lu children", win32_numChildren);
3498
3499         if (win32_numChildren > 0)
3500         {
3501                 /*
3502                  * Note: Do NOT use WaitForMultipleObjectsEx, as we don't want to
3503                  * run queued APCs here.
3504                  */
3505                 int                     index;
3506                 DWORD           exitCode;
3507                 DWORD           ret;
3508
3509                 ret = WaitForMultipleObjects(win32_numChildren, win32_childHNDArray,
3510                                                                          FALSE, 0);
3511                 switch (ret)
3512                 {
3513                         case WAIT_FAILED:
3514                                 ereport(LOG,
3515                                    (errmsg_internal("failed to wait on %lu children: %d",
3516                                                           win32_numChildren, (int) GetLastError())));
3517                                 return -1;
3518
3519                         case WAIT_TIMEOUT:
3520                                 /* No children have finished */
3521                                 return -1;
3522
3523                         default:
3524
3525                                 /*
3526                                  * Get the exit code, and return the PID of, the
3527                                  * respective process
3528                                  */
3529                                 index = ret - WAIT_OBJECT_0;
3530                                 Assert(index >= 0 && index < win32_numChildren);
3531                                 if (!GetExitCodeProcess(win32_childHNDArray[index], &exitCode))
3532                                 {
3533                                         /*
3534                                          * If we get this far, this should never happen, but,
3535                                          * then again... No choice other than to assume a
3536                                          * catastrophic failure.
3537                                          */
3538                                         ereport(FATAL,
3539                                                         (errmsg_internal("failed to get exit code for child %lu",
3540                                                                                    win32_childPIDArray[index])));
3541                                 }
3542                                 *exitstatus = (int) exitCode;
3543                                 return win32_childPIDArray[index];
3544                 }
3545         }
3546
3547         /* No children */
3548         return -1;
3549 }
3550
3551 /*
3552  * Note! Code below executes on separate threads, one for
3553  * each child process created
3554  */
3555 static DWORD WINAPI
3556 win32_sigchld_waiter(LPVOID param)
3557 {
3558         HANDLE          procHandle = (HANDLE) param;
3559
3560         DWORD           r = WaitForSingleObject(procHandle, INFINITE);
3561
3562         if (r == WAIT_OBJECT_0)
3563                 pg_queue_signal(SIGCHLD);
3564         else
3565                 fprintf(stderr, "ERROR: Failed to wait on child process handle: %i\n",
3566                                 (int) GetLastError());
3567         CloseHandle(procHandle);
3568         return 0;
3569 }
3570
3571 #endif /* WIN32 */