]> granicus.if.org Git - postgresql/blob - src/backend/postmaster/postmaster.c
Divide the lock manager's shared state into 'partitions', so as to
[postgresql] / src / backend / postmaster / postmaster.c
1 /*-------------------------------------------------------------------------
2  *
3  * postmaster.c
4  *        This program acts as a clearing house for requests to the
5  *        POSTGRES system.      Frontend programs send a startup message
6  *        to the Postmaster and the postmaster uses the info in the
7  *        message to setup a backend process.
8  *
9  *        The postmaster also manages system-wide operations such as
10  *        startup and shutdown. The postmaster itself doesn't do those
11  *        operations, mind you --- it just forks off a subprocess to do them
12  *        at the right times.  It also takes care of resetting the system
13  *        if a backend crashes.
14  *
15  *        The postmaster process creates the shared memory and semaphore
16  *        pools during startup, but as a rule does not touch them itself.
17  *        In particular, it is not a member of the PGPROC array of backends
18  *        and so it cannot participate in lock-manager operations.      Keeping
19  *        the postmaster away from shared memory operations makes it simpler
20  *        and more reliable.  The postmaster is almost always able to recover
21  *        from crashes of individual backends by resetting shared memory;
22  *        if it did much with shared memory then it would be prone to crashing
23  *        along with the backends.
24  *
25  *        When a request message is received, we now fork() immediately.
26  *        The child process performs authentication of the request, and
27  *        then becomes a backend if successful.  This allows the auth code
28  *        to be written in a simple single-threaded style (as opposed to the
29  *        crufty "poor man's multitasking" code that used to be needed).
30  *        More importantly, it ensures that blockages in non-multithreaded
31  *        libraries like SSL or PAM cannot cause denial of service to other
32  *        clients.
33  *
34  *
35  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
36  * Portions Copyright (c) 1994, Regents of the University of California
37  *
38  *
39  * IDENTIFICATION
40  *        $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.476 2005/11/22 18:17:18 momjian Exp $
41  *
42  * NOTES
43  *
44  * Initialization:
45  *              The Postmaster sets up shared memory data structures
46  *              for the backends.
47  *
48  * Synchronization:
49  *              The Postmaster shares memory with the backends but should avoid
50  *              touching shared memory, so as not to become stuck if a crashing
51  *              backend screws up locks or shared memory.  Likewise, the Postmaster
52  *              should never block on messages from frontend clients.
53  *
54  * Garbage Collection:
55  *              The Postmaster cleans up after backends if they have an emergency
56  *              exit and/or core dump.
57  *
58  * Error Reporting:
59  *              Use write_stderr() only for reporting "interactive" errors
60  *              (essentially, bogus arguments on the command line).  Once the
61  *              postmaster is launched, use ereport().  In particular, don't use
62  *              write_stderr() for anything that occurs after pmdaemonize.
63  *
64  *-------------------------------------------------------------------------
65  */
66
67 #include "postgres.h"
68
69 #include <unistd.h>
70 #include <signal.h>
71 #include <time.h>
72 #include <sys/wait.h>
73 #include <ctype.h>
74 #include <sys/stat.h>
75 #include <sys/socket.h>
76 #include <fcntl.h>
77 #include <sys/param.h>
78 #include <netinet/in.h>
79 #include <arpa/inet.h>
80 #include <netdb.h>
81 #include <limits.h>
82
83 #ifdef HAVE_SYS_SELECT_H
84 #include <sys/select.h>
85 #endif
86
87 #ifdef HAVE_GETOPT_H
88 #include <getopt.h>
89 #endif
90
91 #ifdef USE_BONJOUR
92 #include <DNSServiceDiscovery/DNSServiceDiscovery.h>
93 #endif
94
95 #include "access/xlog.h"
96 #include "bootstrap/bootstrap.h"
97 #include "catalog/pg_control.h"
98 #include "catalog/pg_database.h"
99 #include "commands/async.h"
100 #include "lib/dllist.h"
101 #include "libpq/auth.h"
102 #include "libpq/crypt.h"
103 #include "libpq/libpq.h"
104 #include "libpq/pqcomm.h"
105 #include "libpq/pqsignal.h"
106 #include "miscadmin.h"
107 #include "nodes/nodes.h"
108 #include "pgstat.h"
109 #include "postmaster/autovacuum.h"
110 #include "postmaster/fork_process.h"
111 #include "postmaster/pgarch.h"
112 #include "postmaster/postmaster.h"
113 #include "postmaster/syslogger.h"
114 #include "storage/bufmgr.h"
115 #include "storage/fd.h"
116 #include "storage/ipc.h"
117 #include "storage/pg_shmem.h"
118 #include "storage/pmsignal.h"
119 #include "storage/proc.h"
120 #include "tcop/tcopprot.h"
121 #include "utils/builtins.h"
122 #include "utils/datetime.h"
123 #include "utils/guc.h"
124 #include "utils/memutils.h"
125 #include "utils/ps_status.h"
126
127 #ifdef EXEC_BACKEND
128 #include "storage/spin.h"
129 #endif
130
131
132 /*
133  * List of active backends (or child processes anyway; we don't actually
134  * know whether a given child has become a backend or is still in the
135  * authorization phase).  This is used mainly to keep track of how many
136  * children we have and send them appropriate signals when necessary.
137  *
138  * "Special" children such as the startup and bgwriter tasks are not in
139  * this list.
140  */
141 typedef struct bkend
142 {
143         pid_t           pid;                    /* process id of backend */
144         long            cancel_key;             /* cancel key for cancels for this backend */
145 } Backend;
146
147 static Dllist *BackendList;
148
149 #ifdef EXEC_BACKEND
150 #define NUM_BACKENDARRAY_ELEMS (2*MaxBackends)
151 static Backend *ShmemBackendArray;
152 #endif
153
154 /* The socket number we are listening for connections on */
155 int                     PostPortNumber;
156 char       *UnixSocketDir;
157 char       *ListenAddresses;
158
159 /*
160  * ReservedBackends is the number of backends reserved for superuser use.
161  * This number is taken out of the pool size given by MaxBackends so
162  * number of backend slots available to non-superusers is
163  * (MaxBackends - ReservedBackends).  Note what this really means is
164  * "if there are <= ReservedBackends connections available, only superusers
165  * can make new connections" --- pre-existing superuser connections don't
166  * count against the limit.
167  */
168 int                     ReservedBackends;
169
170
171 static const char *progname = NULL;
172
173 /* The socket(s) we're listening to. */
174 #define MAXLISTEN       64
175 static int      ListenSocket[MAXLISTEN];
176
177 /*
178  * Set by the -o option
179  */
180 static char ExtraOptions[MAXPGPATH];
181
182 /*
183  * These globals control the behavior of the postmaster in case some
184  * backend dumps core.  Normally, it kills all peers of the dead backend
185  * and reinitializes shared memory.  By specifying -s or -n, we can have
186  * the postmaster stop (rather than kill) peers and not reinitialize
187  * shared data structures.
188  */
189 static bool Reinit = true;
190 static int      SendStop = false;
191
192 /* still more option variables */
193 bool            EnableSSL = false;
194 bool            SilentMode = false; /* silent mode (-S) */
195
196 int                     PreAuthDelay = 0;
197 int                     AuthenticationTimeout = 60;
198
199 bool            log_hostname;           /* for ps display and logging */
200 bool            Log_connections = false;
201 bool            Db_user_namespace = false;
202
203 char       *bonjour_name;
204
205 /* list of library:init-function to be preloaded */
206 char       *preload_libraries_string = NULL;
207
208 /* PIDs of special child processes; 0 when not running */
209 static pid_t StartupPID = 0,
210                         BgWriterPID = 0,
211                         AutoVacPID = 0,
212                         PgArchPID = 0,
213                         PgStatPID = 0,
214                         SysLoggerPID = 0;
215
216 /* Startup/shutdown state */
217 #define                 NoShutdown              0
218 #define                 SmartShutdown   1
219 #define                 FastShutdown    2
220
221 static int      Shutdown = NoShutdown;
222
223 static bool FatalError = false; /* T if recovering from backend crash */
224
225 bool            ClientAuthInProgress = false;           /* T during new-client
226                                                                                                  * authentication */
227
228 /*
229  * State for assigning random salts and cancel keys.
230  * Also, the global MyCancelKey passes the cancel key assigned to a given
231  * backend from the postmaster to that backend (via fork).
232  */
233 static unsigned int random_seed = 0;
234
235 extern char *optarg;
236 extern int      optind,
237                         opterr;
238
239 #ifdef HAVE_INT_OPTRESET
240 extern int      optreset;
241 #endif
242
243 /*
244  * postmaster.c - function prototypes
245  */
246 static void checkDataDir(void);
247
248 #ifdef USE_BONJOUR
249 static void reg_reply(DNSServiceRegistrationReplyErrorType errorCode,
250                   void *context);
251 #endif
252 static void pmdaemonize(void);
253 static Port *ConnCreate(int serverFd);
254 static void ConnFree(Port *port);
255 static void reset_shared(int port);
256 static void SIGHUP_handler(SIGNAL_ARGS);
257 static void pmdie(SIGNAL_ARGS);
258 static void reaper(SIGNAL_ARGS);
259 static void sigusr1_handler(SIGNAL_ARGS);
260 static void dummy_handler(SIGNAL_ARGS);
261 static void CleanupBackend(int pid, int exitstatus);
262 static void HandleChildCrash(int pid, int exitstatus, const char *procname);
263 static void LogChildExit(int lev, const char *procname,
264                          int pid, int exitstatus);
265 static int      BackendRun(Port *port);
266 static void ExitPostmaster(int status);
267 static void usage(const char *);
268 static int      ServerLoop(void);
269 static int      BackendStartup(Port *port);
270 static int      ProcessStartupPacket(Port *port, bool SSLdone);
271 static void processCancelRequest(Port *port, void *pkt);
272 static int      initMasks(fd_set *rmask);
273 static void report_fork_failure_to_client(Port *port, int errnum);
274 static enum CAC_state canAcceptConnections(void);
275 static long PostmasterRandom(void);
276 static void RandomSalt(char *cryptSalt, char *md5Salt);
277 static void SignalChildren(int signal);
278 static int      CountChildren(void);
279 static bool CreateOptsFile(int argc, char *argv[], char *fullprogname);
280 static pid_t StartChildProcess(int xlop);
281
282 #ifdef EXEC_BACKEND
283
284 #ifdef WIN32
285 static void win32_AddChild(pid_t pid, HANDLE handle);
286 static void win32_RemoveChild(pid_t pid);
287 static pid_t win32_waitpid(int *exitstatus);
288 static DWORD WINAPI win32_sigchld_waiter(LPVOID param);
289
290 static pid_t *win32_childPIDArray;
291 static HANDLE *win32_childHNDArray;
292 static unsigned long win32_numChildren = 0;
293
294 HANDLE          PostmasterHandle;
295 #endif
296
297 static pid_t backend_forkexec(Port *port);
298 static pid_t internal_forkexec(int argc, char *argv[], Port *port);
299
300 /* Type for a socket that can be inherited to a client process */
301 #ifdef WIN32
302 typedef struct
303 {
304         SOCKET          origsocket;             /* Original socket value, or -1 if not a
305                                                                  * socket */
306         WSAPROTOCOL_INFO wsainfo;
307 }       InheritableSocket;
308 #else
309 typedef int InheritableSocket;
310 #endif
311
312 typedef struct LWLock LWLock;   /* ugly kluge */
313
314 /*
315  * Structure contains all variables passed to exec:ed backends
316  */
317 typedef struct
318 {
319         Port            port;
320         InheritableSocket portsocket;
321         char            DataDir[MAXPGPATH];
322         int                     ListenSocket[MAXLISTEN];
323         long            MyCancelKey;
324         unsigned long UsedShmemSegID;
325         void       *UsedShmemSegAddr;
326         slock_t    *ShmemLock;
327         slock_t    *ShmemIndexLock;
328         VariableCache ShmemVariableCache;
329         void       *ShmemIndexAlloc;
330         Backend    *ShmemBackendArray;
331         LWLock     *LWLockArray;
332         slock_t    *ProcStructLock;
333         InheritableSocket pgStatSock;
334         InheritableSocket pgStatPipe0;
335         InheritableSocket pgStatPipe1;
336         pid_t           PostmasterPid;
337         TimestampTz PgStartTime;
338 #ifdef WIN32
339         HANDLE          PostmasterHandle;
340         HANDLE          initial_signal_pipe;
341         HANDLE          syslogPipe[2];
342 #else
343         int                     syslogPipe[2];
344 #endif
345         char            my_exec_path[MAXPGPATH];
346         char            pkglib_path[MAXPGPATH];
347         char            ExtraOptions[MAXPGPATH];
348         char            lc_collate[LOCALE_NAME_BUFLEN];
349         char            lc_ctype[LOCALE_NAME_BUFLEN];
350 }       BackendParameters;
351
352 static void read_backend_variables(char *id, Port *port);
353 static void restore_backend_variables(BackendParameters * param, Port *port);
354
355 #ifndef WIN32
356 static bool save_backend_variables(BackendParameters * param, Port *port);
357 #else
358 static bool save_backend_variables(BackendParameters * param, Port *port,
359                                            HANDLE childProcess, pid_t childPid);
360 #endif
361
362 static void ShmemBackendArrayAdd(Backend *bn);
363 static void ShmemBackendArrayRemove(pid_t pid);
364 #endif   /* EXEC_BACKEND */
365
366 #define StartupDataBase()               StartChildProcess(BS_XLOG_STARTUP)
367 #define StartBackgroundWriter() StartChildProcess(BS_XLOG_BGWRITER)
368
369
370 /*
371  * Postmaster main entry point
372  */
373 int
374 PostmasterMain(int argc, char *argv[])
375 {
376         int                     opt;
377         int                     status;
378         char       *userDoption = NULL;
379         int                     i;
380
381         /* This will call exit() if strdup() fails. */
382         progname = get_progname(argv[0]);
383
384         MyProcPid = PostmasterPid = getpid();
385
386         IsPostmasterEnvironment = true;
387
388         /*
389          * Catch standard options before doing much else.  This even works on
390          * systems without getopt_long.
391          */
392         if (argc > 1)
393         {
394                 if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
395                 {
396                         usage(progname);
397                         ExitPostmaster(0);
398                 }
399                 if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
400                 {
401                         puts("postmaster (PostgreSQL) " PG_VERSION);
402                         ExitPostmaster(0);
403                 }
404         }
405
406 #ifdef WIN32
407         /* Start our win32 signal implementation */
408         pgwin32_signal_initialize();
409 #endif
410
411         /*
412          * for security, no dir or file created can be group or other accessible
413          */
414         umask((mode_t) 0077);
415
416         /*
417          * Fire up essential subsystems: memory management
418          */
419         MemoryContextInit();
420
421         /*
422          * By default, palloc() requests in the postmaster will be allocated in
423          * the PostmasterContext, which is space that can be recycled by backends.
424          * Allocated data that needs to be available to backends should be
425          * allocated in TopMemoryContext.
426          */
427         PostmasterContext = AllocSetContextCreate(TopMemoryContext,
428                                                                                           "Postmaster",
429                                                                                           ALLOCSET_DEFAULT_MINSIZE,
430                                                                                           ALLOCSET_DEFAULT_INITSIZE,
431                                                                                           ALLOCSET_DEFAULT_MAXSIZE);
432         MemoryContextSwitchTo(PostmasterContext);
433
434         IgnoreSystemIndexes(false);
435
436         if (find_my_exec(argv[0], my_exec_path) < 0)
437                 elog(FATAL, "%s: could not locate my own executable path",
438                          argv[0]);
439
440         get_pkglib_path(my_exec_path, pkglib_path);
441
442         /*
443          * Options setup
444          */
445         InitializeGUCOptions();
446
447         opterr = 1;
448
449         while ((opt = getopt(argc, argv, "A:a:B:b:c:D:d:Fh:ik:lm:MN:no:p:Ss-:")) != -1)
450         {
451                 switch (opt)
452                 {
453                         case 'A':
454 #ifdef USE_ASSERT_CHECKING
455                                 SetConfigOption("debug_assertions", optarg, PGC_POSTMASTER, PGC_S_ARGV);
456 #else
457                                 write_stderr("%s: assert checking is not compiled in\n", progname);
458 #endif
459                                 break;
460                         case 'a':
461                                 /* Can no longer set authentication method. */
462                                 break;
463                         case 'B':
464                                 SetConfigOption("shared_buffers", optarg, PGC_POSTMASTER, PGC_S_ARGV);
465                                 break;
466                         case 'b':
467                                 /* Can no longer set the backend executable file to use. */
468                                 break;
469                         case 'D':
470                                 userDoption = optarg;
471                                 break;
472                         case 'd':
473                                 set_debug_options(atoi(optarg), PGC_POSTMASTER, PGC_S_ARGV);
474                                 break;
475                         case 'F':
476                                 SetConfigOption("fsync", "false", PGC_POSTMASTER, PGC_S_ARGV);
477                                 break;
478                         case 'h':
479                                 SetConfigOption("listen_addresses", optarg, PGC_POSTMASTER, PGC_S_ARGV);
480                                 break;
481                         case 'i':
482                                 SetConfigOption("listen_addresses", "*", PGC_POSTMASTER, PGC_S_ARGV);
483                                 break;
484                         case 'k':
485                                 SetConfigOption("unix_socket_directory", optarg, PGC_POSTMASTER, PGC_S_ARGV);
486                                 break;
487 #ifdef USE_SSL
488                         case 'l':
489                                 SetConfigOption("ssl", "true", PGC_POSTMASTER, PGC_S_ARGV);
490                                 break;
491 #endif
492                         case 'm':
493                                 /* Multiplexed backends no longer supported. */
494                                 break;
495                         case 'M':
496
497                                 /*
498                                  * ignore this flag.  This may be passed in because the
499                                  * program was run as 'postgres -M' instead of 'postmaster'
500                                  */
501                                 break;
502                         case 'N':
503                                 /* The max number of backends to start. */
504                                 SetConfigOption("max_connections", optarg, PGC_POSTMASTER, PGC_S_ARGV);
505                                 break;
506                         case 'n':
507                                 /* Don't reinit shared mem after abnormal exit */
508                                 Reinit = false;
509                                 break;
510                         case 'o':
511
512                                 /*
513                                  * Other options to pass to the backend on the command line
514                                  */
515                                 snprintf(ExtraOptions + strlen(ExtraOptions),
516                                                  sizeof(ExtraOptions) - strlen(ExtraOptions),
517                                                  " %s", optarg);
518                                 break;
519                         case 'p':
520                                 SetConfigOption("port", optarg, PGC_POSTMASTER, PGC_S_ARGV);
521                                 break;
522                         case 'S':
523
524                                 /*
525                                  * Start in 'S'ilent mode (disassociate from controlling tty).
526                                  * You may also think of this as 'S'ysV mode since it's most
527                                  * badly needed on SysV-derived systems like SVR4 and HP-UX.
528                                  */
529                                 SetConfigOption("silent_mode", "true", PGC_POSTMASTER, PGC_S_ARGV);
530                                 break;
531                         case 's':
532
533                                 /*
534                                  * In the event that some backend dumps core, send SIGSTOP,
535                                  * rather than SIGQUIT, to all its peers.  This lets the wily
536                                  * post_hacker collect core dumps from everyone.
537                                  */
538                                 SendStop = true;
539                                 break;
540                         case 'c':
541                         case '-':
542                                 {
543                                         char       *name,
544                                                            *value;
545
546                                         ParseLongOption(optarg, &name, &value);
547                                         if (!value)
548                                         {
549                                                 if (opt == '-')
550                                                         ereport(ERROR,
551                                                                         (errcode(ERRCODE_SYNTAX_ERROR),
552                                                                          errmsg("--%s requires a value",
553                                                                                         optarg)));
554                                                 else
555                                                         ereport(ERROR,
556                                                                         (errcode(ERRCODE_SYNTAX_ERROR),
557                                                                          errmsg("-c %s requires a value",
558                                                                                         optarg)));
559                                         }
560
561                                         SetConfigOption(name, value, PGC_POSTMASTER, PGC_S_ARGV);
562                                         free(name);
563                                         if (value)
564                                                 free(value);
565                                         break;
566                                 }
567
568                         default:
569                                 write_stderr("Try \"%s --help\" for more information.\n",
570                                                          progname);
571                                 ExitPostmaster(1);
572                 }
573         }
574
575         /*
576          * Postmaster accepts no non-option switch arguments.
577          */
578         if (optind < argc)
579         {
580                 write_stderr("%s: invalid argument: \"%s\"\n",
581                                          progname, argv[optind]);
582                 write_stderr("Try \"%s --help\" for more information.\n",
583                                          progname);
584                 ExitPostmaster(1);
585         }
586
587 #ifdef EXEC_BACKEND
588         /* Locate executable backend before we change working directory */
589         if (find_other_exec(argv[0], "postgres", PG_VERSIONSTR,
590                                                 postgres_exec_path) < 0)
591                 ereport(FATAL,
592                                 (errmsg("%s: could not locate matching postgres executable",
593                                                 progname)));
594 #endif
595
596         /*
597          * Locate the proper configuration files and data directory, and read
598          * postgresql.conf for the first time.
599          */
600         if (!SelectConfigFiles(userDoption, progname))
601                 ExitPostmaster(2);
602
603         /* Verify that DataDir looks reasonable */
604         checkDataDir();
605
606         /* And switch working directory into it */
607         ChangeToDataDir();
608
609         /*
610          * Check for invalid combinations of GUC settings.
611          */
612         if (NBuffers < 2 * MaxBackends || NBuffers < 16)
613         {
614                 /*
615                  * Do not accept -B so small that backends are likely to starve for
616                  * lack of buffers.  The specific choices here are somewhat arbitrary.
617                  */
618                 write_stderr("%s: the number of buffers (-B) must be at least twice the number of allowed connections (-N) and at least 16\n", progname);
619                 ExitPostmaster(1);
620         }
621
622         if (ReservedBackends >= MaxBackends)
623         {
624                 write_stderr("%s: superuser_reserved_connections must be less than max_connections\n", progname);
625                 ExitPostmaster(1);
626         }
627
628         /*
629          * Other one-time internal sanity checks can go here, if they are fast.
630          * (Put any slow processing further down, after postmaster.pid creation.)
631          */
632         if (!CheckDateTokenTables())
633         {
634                 write_stderr("%s: invalid datetoken tables, please fix\n", progname);
635                 ExitPostmaster(1);
636         }
637
638         /*
639          * Now that we are done processing the postmaster arguments, reset
640          * getopt(3) library so that it will work correctly in subprocesses.
641          */
642         optind = 1;
643 #ifdef HAVE_INT_OPTRESET
644         optreset = 1;                           /* some systems need this too */
645 #endif
646
647         /* For debugging: display postmaster environment */
648         {
649                 extern char **environ;
650                 char      **p;
651
652                 ereport(DEBUG3,
653                                 (errmsg_internal("%s: PostmasterMain: initial environ dump:",
654                                                                  progname)));
655                 ereport(DEBUG3,
656                          (errmsg_internal("-----------------------------------------")));
657                 for (p = environ; *p; ++p)
658                         ereport(DEBUG3,
659                                         (errmsg_internal("\t%s", *p)));
660                 ereport(DEBUG3,
661                          (errmsg_internal("-----------------------------------------")));
662         }
663
664         /*
665          * Fork away from controlling terminal, if -S specified.
666          *
667          * Must do this before we grab any interlock files, else the interlocks
668          * will show the wrong PID.
669          */
670         if (SilentMode)
671                 pmdaemonize();
672
673         /*
674          * Create lockfile for data directory.
675          *
676          * We want to do this before we try to grab the input sockets, because the
677          * data directory interlock is more reliable than the socket-file
678          * interlock (thanks to whoever decided to put socket files in /tmp :-().
679          * For the same reason, it's best to grab the TCP socket(s) before the
680          * Unix socket.
681          */
682         CreateDataDirLockFile(true);
683
684         /*
685          * If timezone is not set, determine what the OS uses.  (In theory this
686          * should be done during GUC initialization, but because it can take as
687          * much as several seconds, we delay it until after we've created the
688          * postmaster.pid file.  This prevents problems with boot scripts that
689          * expect the pidfile to appear quickly.)
690          */
691         pg_timezone_initialize();
692
693         /*
694          * Initialize SSL library, if specified.
695          */
696 #ifdef USE_SSL
697         if (EnableSSL)
698                 secure_initialize();
699 #endif
700
701         /*
702          * process any libraries that should be preloaded and optionally
703          * pre-initialized
704          */
705         if (preload_libraries_string)
706                 process_preload_libraries(preload_libraries_string);
707
708         /*
709          * Remove old temporary files.  At this point there can be no other
710          * Postgres processes running in this directory, so this should be safe.
711          */
712         RemovePgTempFiles();
713
714         /*
715          * Establish input sockets.
716          */
717         for (i = 0; i < MAXLISTEN; i++)
718                 ListenSocket[i] = -1;
719
720         if (ListenAddresses)
721         {
722                 char       *rawstring;
723                 List       *elemlist;
724                 ListCell   *l;
725                 int                     success = 0;
726
727                 /* Need a modifiable copy of ListenAddresses */
728                 rawstring = pstrdup(ListenAddresses);
729
730                 /* Parse string into list of identifiers */
731                 if (!SplitIdentifierString(rawstring, ',', &elemlist))
732                 {
733                         /* syntax error in list */
734                         ereport(FATAL,
735                                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
736                                          errmsg("invalid list syntax for \"listen_addresses\"")));
737                 }
738
739                 foreach(l, elemlist)
740                 {
741                         char       *curhost = (char *) lfirst(l);
742
743                         if (strcmp(curhost, "*") == 0)
744                                 status = StreamServerPort(AF_UNSPEC, NULL,
745                                                                                   (unsigned short) PostPortNumber,
746                                                                                   UnixSocketDir,
747                                                                                   ListenSocket, MAXLISTEN);
748                         else
749                                 status = StreamServerPort(AF_UNSPEC, curhost,
750                                                                                   (unsigned short) PostPortNumber,
751                                                                                   UnixSocketDir,
752                                                                                   ListenSocket, MAXLISTEN);
753                         if (status == STATUS_OK)
754                                 success++;
755                         else
756                                 ereport(WARNING,
757                                                 (errmsg("could not create listen socket for \"%s\"",
758                                                                 curhost)));
759                 }
760
761                 if (!success && list_length(elemlist))
762                         ereport(FATAL,
763                                         (errmsg("could not create any TCP/IP sockets")));
764
765                 list_free(elemlist);
766                 pfree(rawstring);
767         }
768
769 #ifdef USE_BONJOUR
770         /* Register for Bonjour only if we opened TCP socket(s) */
771         if (ListenSocket[0] != -1 && bonjour_name != NULL)
772         {
773                 DNSServiceRegistrationCreate(bonjour_name,
774                                                                          "_postgresql._tcp.",
775                                                                          "",
776                                                                          htonl(PostPortNumber),
777                                                                          "",
778                                                                          (DNSServiceRegistrationReply) reg_reply,
779                                                                          NULL);
780         }
781 #endif
782
783 #ifdef HAVE_UNIX_SOCKETS
784         status = StreamServerPort(AF_UNIX, NULL,
785                                                           (unsigned short) PostPortNumber,
786                                                           UnixSocketDir,
787                                                           ListenSocket, MAXLISTEN);
788         if (status != STATUS_OK)
789                 ereport(WARNING,
790                                 (errmsg("could not create Unix-domain socket")));
791 #endif
792
793         /*
794          * check that we have some socket to listen on
795          */
796         if (ListenSocket[0] == -1)
797                 ereport(FATAL,
798                                 (errmsg("no socket created for listening")));
799
800         /*
801          * Set up shared memory and semaphores.
802          */
803         reset_shared(PostPortNumber);
804
805         /*
806          * Estimate number of openable files.  This must happen after setting up
807          * semaphores, because on some platforms semaphores count as open files.
808          */
809         set_max_safe_fds();
810
811         /*
812          * Initialize the list of active backends.
813          */
814         BackendList = DLNewList();
815
816 #ifdef WIN32
817
818         /*
819          * Initialize the child pid/HANDLE arrays for signal handling.
820          */
821         win32_childPIDArray = (pid_t *)
822                 malloc(mul_size(NUM_BACKENDARRAY_ELEMS, sizeof(pid_t)));
823         win32_childHNDArray = (HANDLE *)
824                 malloc(mul_size(NUM_BACKENDARRAY_ELEMS, sizeof(HANDLE)));
825         if (!win32_childPIDArray || !win32_childHNDArray)
826                 ereport(FATAL,
827                                 (errcode(ERRCODE_OUT_OF_MEMORY),
828                                  errmsg("out of memory")));
829
830         /*
831          * Set up a handle that child processes can use to check whether the
832          * postmaster is still running.
833          */
834         if (DuplicateHandle(GetCurrentProcess(),
835                                                 GetCurrentProcess(),
836                                                 GetCurrentProcess(),
837                                                 &PostmasterHandle,
838                                                 0,
839                                                 TRUE,
840                                                 DUPLICATE_SAME_ACCESS) == 0)
841                 ereport(FATAL,
842                                 (errmsg_internal("could not duplicate postmaster handle: error code %d",
843                                                                  (int) GetLastError())));
844 #endif
845
846         /*
847          * Record postmaster options.  We delay this till now to avoid recording
848          * bogus options (eg, NBuffers too high for available memory).
849          */
850         if (!CreateOptsFile(argc, argv, my_exec_path))
851                 ExitPostmaster(1);
852
853 #ifdef EXEC_BACKEND
854         write_nondefault_variables(PGC_POSTMASTER);
855 #endif
856
857         /*
858          * Write the external PID file if requested
859          */
860         if (external_pid_file)
861         {
862                 FILE       *fpidfile = fopen(external_pid_file, "w");
863
864                 if (fpidfile)
865                 {
866                         fprintf(fpidfile, "%d\n", MyProcPid);
867                         fclose(fpidfile);
868                         /* Should we remove the pid file on postmaster exit? */
869                 }
870                 else
871                         write_stderr("%s: could not write external PID file \"%s\": %s\n",
872                                                  progname, external_pid_file, strerror(errno));
873         }
874
875         /*
876          * Set up signal handlers for the postmaster process.
877          *
878          * CAUTION: when changing this list, check for side-effects on the signal
879          * handling setup of child processes.  See tcop/postgres.c,
880          * bootstrap/bootstrap.c, postmaster/bgwriter.c, postmaster/autovacuum.c,
881          * postmaster/pgarch.c, postmaster/pgstat.c, and postmaster/syslogger.c.
882          */
883         pqinitmask();
884         PG_SETMASK(&BlockSig);
885
886         pqsignal(SIGHUP, SIGHUP_handler);       /* reread config file and have
887                                                                                  * children do same */
888         pqsignal(SIGINT, pmdie);        /* send SIGTERM and shut down */
889         pqsignal(SIGQUIT, pmdie);       /* send SIGQUIT and die */
890         pqsignal(SIGTERM, pmdie);       /* wait for children and shut down */
891         pqsignal(SIGALRM, SIG_IGN); /* ignored */
892         pqsignal(SIGPIPE, SIG_IGN); /* ignored */
893         pqsignal(SIGUSR1, sigusr1_handler); /* message from child process */
894         pqsignal(SIGUSR2, dummy_handler);       /* unused, reserve for children */
895         pqsignal(SIGCHLD, reaper);      /* handle child termination */
896         pqsignal(SIGTTIN, SIG_IGN); /* ignored */
897         pqsignal(SIGTTOU, SIG_IGN); /* ignored */
898         /* ignore SIGXFSZ, so that ulimit violations work like disk full */
899 #ifdef SIGXFSZ
900         pqsignal(SIGXFSZ, SIG_IGN); /* ignored */
901 #endif
902
903         /*
904          * If enabled, start up syslogger collection subprocess
905          */
906         SysLoggerPID = SysLogger_Start();
907
908         /*
909          * Reset whereToSendOutput from DestDebug (its starting state) to
910          * DestNone. This stops ereport from sending log messages to stderr unless
911          * Log_destination permits.  We don't do this until the postmaster is
912          * fully launched, since startup failures may as well be reported to
913          * stderr.
914          */
915         whereToSendOutput = DestNone;
916
917         /*
918          * Initialize the statistics collector stuff
919          */
920         pgstat_init();
921
922         /*
923          * Load configuration files for client authentication.
924          */
925         load_hba();
926         load_ident();
927
928         /*
929          * We're ready to rock and roll...
930          */
931         StartupPID = StartupDataBase();
932
933         /*
934          * Remember postmaster startup time
935          */
936         PgStartTime = GetCurrentTimestamp();
937
938         /*
939          * Initialize the autovacuum daemon
940          */
941         autovac_init();
942
943         status = ServerLoop();
944
945         /*
946          * ServerLoop probably shouldn't ever return, but if it does, close down.
947          */
948         ExitPostmaster(status != STATUS_OK);
949
950         return 0;                                       /* not reached */
951 }
952
953
954 /*
955  * Validate the proposed data directory
956  */
957 static void
958 checkDataDir(void)
959 {
960         char            path[MAXPGPATH];
961         FILE       *fp;
962         struct stat stat_buf;
963
964         Assert(DataDir);
965
966         if (stat(DataDir, &stat_buf) != 0)
967         {
968                 if (errno == ENOENT)
969                         ereport(FATAL,
970                                         (errcode_for_file_access(),
971                                          errmsg("data directory \"%s\" does not exist",
972                                                         DataDir)));
973                 else
974                         ereport(FATAL,
975                                         (errcode_for_file_access(),
976                                  errmsg("could not read permissions of directory \"%s\": %m",
977                                                 DataDir)));
978         }
979
980         /*
981          * Check that the directory belongs to my userid; if not, reject.
982          *
983          * This check is an essential part of the interlock that prevents two
984          * postmasters from starting in the same directory (see CreateLockFile()).
985          * Do not remove or weaken it.
986          *
987          * XXX can we safely enable this check on Windows?
988          */
989 #if !defined(WIN32) && !defined(__CYGWIN__)
990         if (stat_buf.st_uid != geteuid())
991                 ereport(FATAL,
992                                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
993                                  errmsg("data directory \"%s\" has wrong ownership",
994                                                 DataDir),
995                                  errhint("The server must be started by the user that owns the data directory.")));
996 #endif
997
998         /*
999          * Check if the directory has group or world access.  If so, reject.
1000          *
1001          * It would be possible to allow weaker constraints (for example, allow
1002          * group access) but we cannot make a general assumption that that is
1003          * okay; for example there are platforms where nearly all users
1004          * customarily belong to the same group.  Perhaps this test should be
1005          * configurable.
1006          *
1007          * XXX temporarily suppress check when on Windows, because there may not
1008          * be proper support for Unix-y file permissions.  Need to think of a
1009          * reasonable check to apply on Windows.
1010          */
1011 #if !defined(WIN32) && !defined(__CYGWIN__)
1012         if (stat_buf.st_mode & (S_IRWXG | S_IRWXO))
1013                 ereport(FATAL,
1014                                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1015                                  errmsg("data directory \"%s\" has group or world access",
1016                                                 DataDir),
1017                                  errdetail("Permissions should be u=rwx (0700).")));
1018 #endif
1019
1020         /* Look for PG_VERSION before looking for pg_control */
1021         ValidatePgVersion(DataDir);
1022
1023         snprintf(path, sizeof(path), "%s/global/pg_control", DataDir);
1024
1025         fp = AllocateFile(path, PG_BINARY_R);
1026         if (fp == NULL)
1027         {
1028                 write_stderr("%s: could not find the database system\n"
1029                                          "Expected to find it in the directory \"%s\",\n"
1030                                          "but could not open file \"%s\": %s\n",
1031                                          progname, DataDir, path, strerror(errno));
1032                 ExitPostmaster(2);
1033         }
1034         FreeFile(fp);
1035 }
1036
1037
1038 #ifdef USE_BONJOUR
1039
1040 /*
1041  * empty callback function for DNSServiceRegistrationCreate()
1042  */
1043 static void
1044 reg_reply(DNSServiceRegistrationReplyErrorType errorCode, void *context)
1045 {
1046
1047 }
1048 #endif   /* USE_BONJOUR */
1049
1050
1051 /*
1052  * Fork away from the controlling terminal (-S option)
1053  */
1054 static void
1055 pmdaemonize(void)
1056 {
1057 #ifndef WIN32
1058         int                     i;
1059         pid_t           pid;
1060
1061         pid = fork_process();
1062         if (pid == (pid_t) -1)
1063         {
1064                 write_stderr("%s: could not fork background process: %s\n",
1065                                          progname, strerror(errno));
1066                 ExitPostmaster(1);
1067         }
1068         else if (pid)
1069         {                                                       /* parent */
1070                 /* Parent should just exit, without doing any atexit cleanup */
1071                 _exit(0);
1072         }
1073
1074         MyProcPid = PostmasterPid = getpid();           /* reset PID vars to child */
1075
1076 /* GH: If there's no setsid(), we hopefully don't need silent mode.
1077  * Until there's a better solution.
1078  */
1079 #ifdef HAVE_SETSID
1080         if (setsid() < 0)
1081         {
1082                 write_stderr("%s: could not dissociate from controlling TTY: %s\n",
1083                                          progname, strerror(errno));
1084                 ExitPostmaster(1);
1085         }
1086 #endif
1087         i = open(NULL_DEV, O_RDWR);
1088         dup2(i, 0);
1089         dup2(i, 1);
1090         dup2(i, 2);
1091         close(i);
1092 #else                                                   /* WIN32 */
1093         /* not supported */
1094         elog(FATAL, "SilentMode not supported under WIN32");
1095 #endif   /* WIN32 */
1096 }
1097
1098
1099 /*
1100  * Print out help message
1101  */
1102 static void
1103 usage(const char *progname)
1104 {
1105         printf(_("%s is the PostgreSQL server.\n\n"), progname);
1106         printf(_("Usage:\n  %s [OPTION]...\n\n"), progname);
1107         printf(_("Options:\n"));
1108 #ifdef USE_ASSERT_CHECKING
1109         printf(_("  -A 1|0          enable/disable run-time assert checking\n"));
1110 #endif
1111         printf(_("  -B NBUFFERS     number of shared buffers\n"));
1112         printf(_("  -c NAME=VALUE   set run-time parameter\n"));
1113         printf(_("  -d 1-5          debugging level\n"));
1114         printf(_("  -D DATADIR      database directory\n"));
1115         printf(_("  -F              turn fsync off\n"));
1116         printf(_("  -h HOSTNAME     host name or IP address to listen on\n"));
1117         printf(_("  -i              enable TCP/IP connections\n"));
1118         printf(_("  -k DIRECTORY    Unix-domain socket location\n"));
1119 #ifdef USE_SSL
1120         printf(_("  -l              enable SSL connections\n"));
1121 #endif
1122         printf(_("  -N MAX-CONNECT  maximum number of allowed connections\n"));
1123         printf(_("  -o OPTIONS      pass \"OPTIONS\" to each server process\n"));
1124         printf(_("  -p PORT         port number to listen on\n"));
1125         printf(_("  -S              silent mode (start in background without logging output)\n"));
1126         printf(_("  --help          show this help, then exit\n"));
1127         printf(_("  --version       output version information, then exit\n"));
1128
1129         printf(_("\nDeveloper options:\n"));
1130         printf(_("  -n              do not reinitialize shared memory after abnormal exit\n"));
1131         printf(_("  -s              send SIGSTOP to all backend servers if one dies\n"));
1132
1133         printf(_("\nPlease read the documentation for the complete list of run-time\n"
1134          "configuration settings and how to set them on the command line or in\n"
1135                          "the configuration file.\n\n"
1136                          "Report bugs to <pgsql-bugs@postgresql.org>.\n"));
1137 }
1138
1139
1140 /*
1141  * Main idle loop of postmaster
1142  */
1143 static int
1144 ServerLoop(void)
1145 {
1146         fd_set          readmask;
1147         int                     nSockets;
1148         time_t          now,
1149                                 last_touch_time;
1150         struct timeval earlier,
1151                                 later;
1152
1153         gettimeofday(&earlier, NULL);
1154         last_touch_time = time(NULL);
1155
1156         nSockets = initMasks(&readmask);
1157
1158         for (;;)
1159         {
1160                 Port       *port;
1161                 fd_set          rmask;
1162                 struct timeval timeout;
1163                 int                     selres;
1164                 int                     i;
1165
1166                 /*
1167                  * Wait for something to happen.
1168                  *
1169                  * We wait at most one minute, or the minimum autovacuum delay, to
1170                  * ensure that the other background tasks handled below get done even
1171                  * when no requests are arriving.
1172                  */
1173                 memcpy((char *) &rmask, (char *) &readmask, sizeof(fd_set));
1174
1175                 timeout.tv_sec = Min(60, autovacuum_naptime);
1176                 timeout.tv_usec = 0;
1177
1178                 PG_SETMASK(&UnBlockSig);
1179
1180                 selres = select(nSockets, &rmask, NULL, NULL, &timeout);
1181
1182                 /*
1183                  * Block all signals until we wait again.  (This makes it safe for our
1184                  * signal handlers to do nontrivial work.)
1185                  */
1186                 PG_SETMASK(&BlockSig);
1187
1188                 if (selres < 0)
1189                 {
1190                         if (errno != EINTR && errno != EWOULDBLOCK)
1191                         {
1192                                 ereport(LOG,
1193                                                 (errcode_for_socket_access(),
1194                                                  errmsg("select() failed in postmaster: %m")));
1195                                 return STATUS_ERROR;
1196                         }
1197                 }
1198
1199                 /*
1200                  * New connection pending on any of our sockets? If so, fork a child
1201                  * process to deal with it.
1202                  */
1203                 if (selres > 0)
1204                 {
1205                         /*
1206                          * Select a random seed at the time of first receiving a request.
1207                          */
1208                         while (random_seed == 0)
1209                         {
1210                                 gettimeofday(&later, NULL);
1211
1212                                 /*
1213                                  * We are not sure how much precision is in tv_usec, so we
1214                                  * swap the high and low 16 bits of 'later' and XOR them with
1215                                  * 'earlier'. On the off chance that the result is 0, we loop
1216                                  * until it isn't.
1217                                  */
1218                                 random_seed = earlier.tv_usec ^
1219                                         ((later.tv_usec << 16) |
1220                                          ((later.tv_usec >> 16) & 0xffff));
1221                         }
1222
1223                         for (i = 0; i < MAXLISTEN; i++)
1224                         {
1225                                 if (ListenSocket[i] == -1)
1226                                         break;
1227                                 if (FD_ISSET(ListenSocket[i], &rmask))
1228                                 {
1229                                         port = ConnCreate(ListenSocket[i]);
1230                                         if (port)
1231                                         {
1232                                                 BackendStartup(port);
1233
1234                                                 /*
1235                                                  * We no longer need the open socket or port structure
1236                                                  * in this process
1237                                                  */
1238                                                 StreamClose(port->sock);
1239                                                 ConnFree(port);
1240                                         }
1241                                 }
1242                         }
1243                 }
1244
1245                 /* If we have lost the system logger, try to start a new one */
1246                 if (SysLoggerPID == 0 && Redirect_stderr)
1247                         SysLoggerPID = SysLogger_Start();
1248
1249                 /*
1250                  * If no background writer process is running, and we are not in a
1251                  * state that prevents it, start one.  It doesn't matter if this
1252                  * fails, we'll just try again later.
1253                  */
1254                 if (BgWriterPID == 0 && StartupPID == 0 && !FatalError)
1255                 {
1256                         BgWriterPID = StartBackgroundWriter();
1257                         /* If shutdown is pending, set it going */
1258                         if (Shutdown > NoShutdown && BgWriterPID != 0)
1259                                 kill(BgWriterPID, SIGUSR2);
1260                 }
1261
1262                 /*
1263                  * Start a new autovacuum process, if there isn't one running already.
1264                  * (It'll die relatively quickly.)  We check that it's not started too
1265                  * frequently in autovac_start.
1266                  */
1267                 if (AutoVacuumingActive() && AutoVacPID == 0 &&
1268                         StartupPID == 0 && !FatalError && Shutdown == NoShutdown)
1269                         AutoVacPID = autovac_start();
1270
1271                 /* If we have lost the archiver, try to start a new one */
1272                 if (XLogArchivingActive() && PgArchPID == 0 &&
1273                         StartupPID == 0 && !FatalError && Shutdown == NoShutdown)
1274                         PgArchPID = pgarch_start();
1275
1276                 /* If we have lost the stats collector, try to start a new one */
1277                 if (PgStatPID == 0 &&
1278                         StartupPID == 0 && !FatalError && Shutdown == NoShutdown)
1279                         PgStatPID = pgstat_start();
1280
1281                 /*
1282                  * Touch the socket and lock file every 58 minutes, to ensure that
1283                  * they are not removed by overzealous /tmp-cleaning tasks.  We assume
1284                  * no one runs cleaners with cutoff times of less than an hour ...
1285                  */
1286                 now = time(NULL);
1287                 if (now - last_touch_time >= 58 * SECS_PER_MINUTE)
1288                 {
1289                         TouchSocketFile();
1290                         TouchSocketLockFile();
1291                         last_touch_time = now;
1292                 }
1293         }
1294 }
1295
1296
1297 /*
1298  * Initialise the masks for select() for the ports we are listening on.
1299  * Return the number of sockets to listen on.
1300  */
1301 static int
1302 initMasks(fd_set *rmask)
1303 {
1304         int                     nsocks = -1;
1305         int                     i;
1306
1307         FD_ZERO(rmask);
1308
1309         for (i = 0; i < MAXLISTEN; i++)
1310         {
1311                 int                     fd = ListenSocket[i];
1312
1313                 if (fd == -1)
1314                         break;
1315                 FD_SET(fd, rmask);
1316                 if (fd > nsocks)
1317                         nsocks = fd;
1318         }
1319
1320         return nsocks + 1;
1321 }
1322
1323
1324 /*
1325  * Read the startup packet and do something according to it.
1326  *
1327  * Returns STATUS_OK or STATUS_ERROR, or might call ereport(FATAL) and
1328  * not return at all.
1329  *
1330  * (Note that ereport(FATAL) stuff is sent to the client, so only use it
1331  * if that's what you want.  Return STATUS_ERROR if you don't want to
1332  * send anything to the client, which would typically be appropriate
1333  * if we detect a communications failure.)
1334  */
1335 static int
1336 ProcessStartupPacket(Port *port, bool SSLdone)
1337 {
1338         int32           len;
1339         void       *buf;
1340         ProtocolVersion proto;
1341         MemoryContext oldcontext;
1342
1343         if (pq_getbytes((char *) &len, 4) == EOF)
1344         {
1345                 /*
1346                  * EOF after SSLdone probably means the client didn't like our
1347                  * response to NEGOTIATE_SSL_CODE.      That's not an error condition, so
1348                  * don't clutter the log with a complaint.
1349                  */
1350                 if (!SSLdone)
1351                         ereport(COMMERROR,
1352                                         (errcode(ERRCODE_PROTOCOL_VIOLATION),
1353                                          errmsg("incomplete startup packet")));
1354                 return STATUS_ERROR;
1355         }
1356
1357         len = ntohl(len);
1358         len -= 4;
1359
1360         if (len < (int32) sizeof(ProtocolVersion) ||
1361                 len > MAX_STARTUP_PACKET_LENGTH)
1362         {
1363                 ereport(COMMERROR,
1364                                 (errcode(ERRCODE_PROTOCOL_VIOLATION),
1365                                  errmsg("invalid length of startup packet")));
1366                 return STATUS_ERROR;
1367         }
1368
1369         /*
1370          * Allocate at least the size of an old-style startup packet, plus one
1371          * extra byte, and make sure all are zeroes.  This ensures we will have
1372          * null termination of all strings, in both fixed- and variable-length
1373          * packet layouts.
1374          */
1375         if (len <= (int32) sizeof(StartupPacket))
1376                 buf = palloc0(sizeof(StartupPacket) + 1);
1377         else
1378                 buf = palloc0(len + 1);
1379
1380         if (pq_getbytes(buf, len) == EOF)
1381         {
1382                 ereport(COMMERROR,
1383                                 (errcode(ERRCODE_PROTOCOL_VIOLATION),
1384                                  errmsg("incomplete startup packet")));
1385                 return STATUS_ERROR;
1386         }
1387
1388         /*
1389          * The first field is either a protocol version number or a special
1390          * request code.
1391          */
1392         port->proto = proto = ntohl(*((ProtocolVersion *) buf));
1393
1394         if (proto == CANCEL_REQUEST_CODE)
1395         {
1396                 processCancelRequest(port, buf);
1397                 return 127;                             /* XXX */
1398         }
1399
1400         if (proto == NEGOTIATE_SSL_CODE && !SSLdone)
1401         {
1402                 char            SSLok;
1403
1404 #ifdef USE_SSL
1405                 /* No SSL when disabled or on Unix sockets */
1406                 if (!EnableSSL || IS_AF_UNIX(port->laddr.addr.ss_family))
1407                         SSLok = 'N';
1408                 else
1409                         SSLok = 'S';            /* Support for SSL */
1410 #else
1411                 SSLok = 'N';                    /* No support for SSL */
1412 #endif
1413                 if (send(port->sock, &SSLok, 1, 0) != 1)
1414                 {
1415                         ereport(COMMERROR,
1416                                         (errcode_for_socket_access(),
1417                                          errmsg("failed to send SSL negotiation response: %m")));
1418                         return STATUS_ERROR;    /* close the connection */
1419                 }
1420
1421 #ifdef USE_SSL
1422                 if (SSLok == 'S' && secure_open_server(port) == -1)
1423                         return STATUS_ERROR;
1424 #endif
1425                 /* regular startup packet, cancel, etc packet should follow... */
1426                 /* but not another SSL negotiation request */
1427                 return ProcessStartupPacket(port, true);
1428         }
1429
1430         /* Could add additional special packet types here */
1431
1432         /*
1433          * Set FrontendProtocol now so that ereport() knows what format to send if
1434          * we fail during startup.
1435          */
1436         FrontendProtocol = proto;
1437
1438         /* Check we can handle the protocol the frontend is using. */
1439
1440         if (PG_PROTOCOL_MAJOR(proto) < PG_PROTOCOL_MAJOR(PG_PROTOCOL_EARLIEST) ||
1441                 PG_PROTOCOL_MAJOR(proto) > PG_PROTOCOL_MAJOR(PG_PROTOCOL_LATEST) ||
1442                 (PG_PROTOCOL_MAJOR(proto) == PG_PROTOCOL_MAJOR(PG_PROTOCOL_LATEST) &&
1443                  PG_PROTOCOL_MINOR(proto) > PG_PROTOCOL_MINOR(PG_PROTOCOL_LATEST)))
1444                 ereport(FATAL,
1445                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1446                                  errmsg("unsupported frontend protocol %u.%u: server supports %u.0 to %u.%u",
1447                                                 PG_PROTOCOL_MAJOR(proto), PG_PROTOCOL_MINOR(proto),
1448                                                 PG_PROTOCOL_MAJOR(PG_PROTOCOL_EARLIEST),
1449                                                 PG_PROTOCOL_MAJOR(PG_PROTOCOL_LATEST),
1450                                                 PG_PROTOCOL_MINOR(PG_PROTOCOL_LATEST))));
1451
1452         /*
1453          * Now fetch parameters out of startup packet and save them into the Port
1454          * structure.  All data structures attached to the Port struct must be
1455          * allocated in TopMemoryContext so that they won't disappear when we pass
1456          * them to PostgresMain (see BackendRun).  We need not worry about leaking
1457          * this storage on failure, since we aren't in the postmaster process
1458          * anymore.
1459          */
1460         oldcontext = MemoryContextSwitchTo(TopMemoryContext);
1461
1462         if (PG_PROTOCOL_MAJOR(proto) >= 3)
1463         {
1464                 int32           offset = sizeof(ProtocolVersion);
1465
1466                 /*
1467                  * Scan packet body for name/option pairs.      We can assume any string
1468                  * beginning within the packet body is null-terminated, thanks to
1469                  * zeroing extra byte above.
1470                  */
1471                 port->guc_options = NIL;
1472
1473                 while (offset < len)
1474                 {
1475                         char       *nameptr = ((char *) buf) + offset;
1476                         int32           valoffset;
1477                         char       *valptr;
1478
1479                         if (*nameptr == '\0')
1480                                 break;                  /* found packet terminator */
1481                         valoffset = offset + strlen(nameptr) + 1;
1482                         if (valoffset >= len)
1483                                 break;                  /* missing value, will complain below */
1484                         valptr = ((char *) buf) + valoffset;
1485
1486                         if (strcmp(nameptr, "database") == 0)
1487                                 port->database_name = pstrdup(valptr);
1488                         else if (strcmp(nameptr, "user") == 0)
1489                                 port->user_name = pstrdup(valptr);
1490                         else if (strcmp(nameptr, "options") == 0)
1491                                 port->cmdline_options = pstrdup(valptr);
1492                         else
1493                         {
1494                                 /* Assume it's a generic GUC option */
1495                                 port->guc_options = lappend(port->guc_options,
1496                                                                                         pstrdup(nameptr));
1497                                 port->guc_options = lappend(port->guc_options,
1498                                                                                         pstrdup(valptr));
1499                         }
1500                         offset = valoffset + strlen(valptr) + 1;
1501                 }
1502
1503                 /*
1504                  * If we didn't find a packet terminator exactly at the end of the
1505                  * given packet length, complain.
1506                  */
1507                 if (offset != len - 1)
1508                         ereport(FATAL,
1509                                         (errcode(ERRCODE_PROTOCOL_VIOLATION),
1510                                          errmsg("invalid startup packet layout: expected terminator as last byte")));
1511         }
1512         else
1513         {
1514                 /*
1515                  * Get the parameters from the old-style, fixed-width-fields startup
1516                  * packet as C strings.  The packet destination was cleared first so a
1517                  * short packet has zeros silently added.  We have to be prepared to
1518                  * truncate the pstrdup result for oversize fields, though.
1519                  */
1520                 StartupPacket *packet = (StartupPacket *) buf;
1521
1522                 port->database_name = pstrdup(packet->database);
1523                 if (strlen(port->database_name) > sizeof(packet->database))
1524                         port->database_name[sizeof(packet->database)] = '\0';
1525                 port->user_name = pstrdup(packet->user);
1526                 if (strlen(port->user_name) > sizeof(packet->user))
1527                         port->user_name[sizeof(packet->user)] = '\0';
1528                 port->cmdline_options = pstrdup(packet->options);
1529                 if (strlen(port->cmdline_options) > sizeof(packet->options))
1530                         port->cmdline_options[sizeof(packet->options)] = '\0';
1531                 port->guc_options = NIL;
1532         }
1533
1534         /* Check a user name was given. */
1535         if (port->user_name == NULL || port->user_name[0] == '\0')
1536                 ereport(FATAL,
1537                                 (errcode(ERRCODE_INVALID_AUTHORIZATION_SPECIFICATION),
1538                          errmsg("no PostgreSQL user name specified in startup packet")));
1539
1540         /* The database defaults to the user name. */
1541         if (port->database_name == NULL || port->database_name[0] == '\0')
1542                 port->database_name = pstrdup(port->user_name);
1543
1544         if (Db_user_namespace)
1545         {
1546                 /*
1547                  * If user@, it is a global user, remove '@'. We only want to do this
1548                  * if there is an '@' at the end and no earlier in the user string or
1549                  * they may fake as a local user of another database attaching to this
1550                  * database.
1551                  */
1552                 if (strchr(port->user_name, '@') ==
1553                         port->user_name + strlen(port->user_name) - 1)
1554                         *strchr(port->user_name, '@') = '\0';
1555                 else
1556                 {
1557                         /* Append '@' and dbname */
1558                         char       *db_user;
1559
1560                         db_user = palloc(strlen(port->user_name) +
1561                                                          strlen(port->database_name) + 2);
1562                         sprintf(db_user, "%s@%s", port->user_name, port->database_name);
1563                         port->user_name = db_user;
1564                 }
1565         }
1566
1567         /*
1568          * Truncate given database and user names to length of a Postgres name.
1569          * This avoids lookup failures when overlength names are given.
1570          */
1571         if (strlen(port->database_name) >= NAMEDATALEN)
1572                 port->database_name[NAMEDATALEN - 1] = '\0';
1573         if (strlen(port->user_name) >= NAMEDATALEN)
1574                 port->user_name[NAMEDATALEN - 1] = '\0';
1575
1576         /*
1577          * Done putting stuff in TopMemoryContext.
1578          */
1579         MemoryContextSwitchTo(oldcontext);
1580
1581         /*
1582          * If we're going to reject the connection due to database state, say so
1583          * now instead of wasting cycles on an authentication exchange. (This also
1584          * allows a pg_ping utility to be written.)
1585          */
1586         switch (port->canAcceptConnections)
1587         {
1588                 case CAC_STARTUP:
1589                         ereport(FATAL,
1590                                         (errcode(ERRCODE_CANNOT_CONNECT_NOW),
1591                                          errmsg("the database system is starting up")));
1592                         break;
1593                 case CAC_SHUTDOWN:
1594                         ereport(FATAL,
1595                                         (errcode(ERRCODE_CANNOT_CONNECT_NOW),
1596                                          errmsg("the database system is shutting down")));
1597                         break;
1598                 case CAC_RECOVERY:
1599                         ereport(FATAL,
1600                                         (errcode(ERRCODE_CANNOT_CONNECT_NOW),
1601                                          errmsg("the database system is in recovery mode")));
1602                         break;
1603                 case CAC_TOOMANY:
1604                         ereport(FATAL,
1605                                         (errcode(ERRCODE_TOO_MANY_CONNECTIONS),
1606                                          errmsg("sorry, too many clients already")));
1607                         break;
1608                 case CAC_OK:
1609                 default:
1610                         break;
1611         }
1612
1613         return STATUS_OK;
1614 }
1615
1616
1617 /*
1618  * The client has sent a cancel request packet, not a normal
1619  * start-a-new-connection packet.  Perform the necessary processing.
1620  * Nothing is sent back to the client.
1621  */
1622 static void
1623 processCancelRequest(Port *port, void *pkt)
1624 {
1625         CancelRequestPacket *canc = (CancelRequestPacket *) pkt;
1626         int                     backendPID;
1627         long            cancelAuthCode;
1628         Backend    *bp;
1629
1630 #ifndef EXEC_BACKEND
1631         Dlelem     *curr;
1632 #else
1633         int                     i;
1634 #endif
1635
1636         backendPID = (int) ntohl(canc->backendPID);
1637         cancelAuthCode = (long) ntohl(canc->cancelAuthCode);
1638
1639         /*
1640          * See if we have a matching backend.  In the EXEC_BACKEND case, we can no
1641          * longer access the postmaster's own backend list, and must rely on the
1642          * duplicate array in shared memory.
1643          */
1644 #ifndef EXEC_BACKEND
1645         for (curr = DLGetHead(BackendList); curr; curr = DLGetSucc(curr))
1646         {
1647                 bp = (Backend *) DLE_VAL(curr);
1648 #else
1649         for (i = 0; i < NUM_BACKENDARRAY_ELEMS; i++)
1650         {
1651                 bp = (Backend *) &ShmemBackendArray[i];
1652 #endif
1653                 if (bp->pid == backendPID)
1654                 {
1655                         if (bp->cancel_key == cancelAuthCode)
1656                         {
1657                                 /* Found a match; signal that backend to cancel current op */
1658                                 ereport(DEBUG2,
1659                                                 (errmsg_internal("processing cancel request: sending SIGINT to process %d",
1660                                                                                  backendPID)));
1661                                 kill(bp->pid, SIGINT);
1662                         }
1663                         else
1664                                 /* Right PID, wrong key: no way, Jose */
1665                                 ereport(DEBUG2,
1666                                  (errmsg_internal("bad key in cancel request for process %d",
1667                                                                   backendPID)));
1668                         return;
1669                 }
1670         }
1671
1672         /* No matching backend */
1673         ereport(DEBUG2,
1674                         (errmsg_internal("bad pid in cancel request for process %d",
1675                                                          backendPID)));
1676 }
1677
1678 /*
1679  * canAcceptConnections --- check to see if database state allows connections.
1680  */
1681 static enum CAC_state
1682 canAcceptConnections(void)
1683 {
1684         /* Can't start backends when in startup/shutdown/recovery state. */
1685         if (Shutdown > NoShutdown)
1686                 return CAC_SHUTDOWN;
1687         if (StartupPID)
1688                 return CAC_STARTUP;
1689         if (FatalError)
1690                 return CAC_RECOVERY;
1691
1692         /*
1693          * Don't start too many children.
1694          *
1695          * We allow more connections than we can have backends here because some
1696          * might still be authenticating; they might fail auth, or some existing
1697          * backend might exit before the auth cycle is completed. The exact
1698          * MaxBackends limit is enforced when a new backend tries to join the
1699          * shared-inval backend array.
1700          */
1701         if (CountChildren() >= 2 * MaxBackends)
1702                 return CAC_TOOMANY;
1703
1704         return CAC_OK;
1705 }
1706
1707
1708 /*
1709  * ConnCreate -- create a local connection data structure
1710  */
1711 static Port *
1712 ConnCreate(int serverFd)
1713 {
1714         Port       *port;
1715
1716         if (!(port = (Port *) calloc(1, sizeof(Port))))
1717         {
1718                 ereport(LOG,
1719                                 (errcode(ERRCODE_OUT_OF_MEMORY),
1720                                  errmsg("out of memory")));
1721                 ExitPostmaster(1);
1722         }
1723
1724         if (StreamConnection(serverFd, port) != STATUS_OK)
1725         {
1726                 StreamClose(port->sock);
1727                 ConnFree(port);
1728                 port = NULL;
1729         }
1730         else
1731         {
1732                 /*
1733                  * Precompute password salt values to use for this connection. It's
1734                  * slightly annoying to do this long in advance of knowing whether
1735                  * we'll need 'em or not, but we must do the random() calls before we
1736                  * fork, not after.  Else the postmaster's random sequence won't get
1737                  * advanced, and all backends would end up using the same salt...
1738                  */
1739                 RandomSalt(port->cryptSalt, port->md5Salt);
1740         }
1741
1742         return port;
1743 }
1744
1745
1746 /*
1747  * ConnFree -- free a local connection data structure
1748  */
1749 static void
1750 ConnFree(Port *conn)
1751 {
1752 #ifdef USE_SSL
1753         secure_close(conn);
1754 #endif
1755         free(conn);
1756 }
1757
1758
1759 /*
1760  * ClosePostmasterPorts -- close all the postmaster's open sockets
1761  *
1762  * This is called during child process startup to release file descriptors
1763  * that are not needed by that child process.  The postmaster still has
1764  * them open, of course.
1765  *
1766  * Note: we pass am_syslogger as a boolean because we don't want to set
1767  * the global variable yet when this is called.
1768  */
1769 void
1770 ClosePostmasterPorts(bool am_syslogger)
1771 {
1772         int                     i;
1773
1774         /* Close the listen sockets */
1775         for (i = 0; i < MAXLISTEN; i++)
1776         {
1777                 if (ListenSocket[i] != -1)
1778                 {
1779                         StreamClose(ListenSocket[i]);
1780                         ListenSocket[i] = -1;
1781                 }
1782         }
1783
1784         /* If using syslogger, close the read side of the pipe */
1785         if (!am_syslogger)
1786         {
1787 #ifndef WIN32
1788                 if (syslogPipe[0] >= 0)
1789                         close(syslogPipe[0]);
1790                 syslogPipe[0] = -1;
1791 #else
1792                 if (syslogPipe[0])
1793                         CloseHandle(syslogPipe[0]);
1794                 syslogPipe[0] = 0;
1795 #endif
1796         }
1797 }
1798
1799
1800 /*
1801  * reset_shared -- reset shared memory and semaphores
1802  */
1803 static void
1804 reset_shared(int port)
1805 {
1806         /*
1807          * Create or re-create shared memory and semaphores.
1808          *
1809          * Note: in each "cycle of life" we will normally assign the same IPC keys
1810          * (if using SysV shmem and/or semas), since the port number is used to
1811          * determine IPC keys.  This helps ensure that we will clean up dead IPC
1812          * objects if the postmaster crashes and is restarted.
1813          */
1814         CreateSharedMemoryAndSemaphores(false, port);
1815 }
1816
1817
1818 /*
1819  * SIGHUP -- reread config files, and tell children to do same
1820  */
1821 static void
1822 SIGHUP_handler(SIGNAL_ARGS)
1823 {
1824         int                     save_errno = errno;
1825
1826         PG_SETMASK(&BlockSig);
1827
1828         if (Shutdown <= SmartShutdown)
1829         {
1830                 ereport(LOG,
1831                                 (errmsg("received SIGHUP, reloading configuration files")));
1832                 ProcessConfigFile(PGC_SIGHUP);
1833                 SignalChildren(SIGHUP);
1834                 if (BgWriterPID != 0)
1835                         kill(BgWriterPID, SIGHUP);
1836                 if (AutoVacPID != 0)
1837                         kill(AutoVacPID, SIGHUP);
1838                 if (PgArchPID != 0)
1839                         kill(PgArchPID, SIGHUP);
1840                 if (SysLoggerPID != 0)
1841                         kill(SysLoggerPID, SIGHUP);
1842                 /* PgStatPID does not currently need SIGHUP */
1843
1844                 /* Reload authentication config files too */
1845                 load_hba();
1846                 load_ident();
1847
1848 #ifdef EXEC_BACKEND
1849                 /* Update the starting-point file for future children */
1850                 write_nondefault_variables(PGC_SIGHUP);
1851 #endif
1852         }
1853
1854         PG_SETMASK(&UnBlockSig);
1855
1856         errno = save_errno;
1857 }
1858
1859
1860 /*
1861  * pmdie -- signal handler for processing various postmaster signals.
1862  */
1863 static void
1864 pmdie(SIGNAL_ARGS)
1865 {
1866         int                     save_errno = errno;
1867
1868         PG_SETMASK(&BlockSig);
1869
1870         ereport(DEBUG2,
1871                         (errmsg_internal("postmaster received signal %d",
1872                                                          postgres_signal_arg)));
1873
1874         switch (postgres_signal_arg)
1875         {
1876                 case SIGTERM:
1877
1878                         /*
1879                          * Smart Shutdown:
1880                          *
1881                          * Wait for children to end their work, then shut down.
1882                          */
1883                         if (Shutdown >= SmartShutdown)
1884                                 break;
1885                         Shutdown = SmartShutdown;
1886                         ereport(LOG,
1887                                         (errmsg("received smart shutdown request")));
1888
1889                         /*
1890                          * We won't wait out an autovacuum iteration ...
1891                          */
1892                         if (AutoVacPID != 0)
1893                         {
1894                                 /* Use statement cancel to shut it down */
1895                                 kill(AutoVacPID, SIGINT);
1896                                 break;                  /* let reaper() handle this */
1897                         }
1898
1899                         if (DLGetHead(BackendList))
1900                                 break;                  /* let reaper() handle this */
1901
1902                         /*
1903                          * No children left. Begin shutdown of data base system.
1904                          */
1905                         if (StartupPID != 0 || FatalError)
1906                                 break;                  /* let reaper() handle this */
1907                         /* Start the bgwriter if not running */
1908                         if (BgWriterPID == 0)
1909                                 BgWriterPID = StartBackgroundWriter();
1910                         /* And tell it to shut down */
1911                         if (BgWriterPID != 0)
1912                                 kill(BgWriterPID, SIGUSR2);
1913                         /* Tell pgarch to shut down too; nothing left for it to do */
1914                         if (PgArchPID != 0)
1915                                 kill(PgArchPID, SIGQUIT);
1916                         /* Tell pgstat to shut down too; nothing left for it to do */
1917                         if (PgStatPID != 0)
1918                                 kill(PgStatPID, SIGQUIT);
1919                         break;
1920
1921                 case SIGINT:
1922
1923                         /*
1924                          * Fast Shutdown:
1925                          *
1926                          * Abort all children with SIGTERM (rollback active transactions
1927                          * and exit) and shut down when they are gone.
1928                          */
1929                         if (Shutdown >= FastShutdown)
1930                                 break;
1931                         Shutdown = FastShutdown;
1932                         ereport(LOG,
1933                                         (errmsg("received fast shutdown request")));
1934
1935                         if (DLGetHead(BackendList) || AutoVacPID != 0)
1936                         {
1937                                 if (!FatalError)
1938                                 {
1939                                         ereport(LOG,
1940                                                         (errmsg("aborting any active transactions")));
1941                                         SignalChildren(SIGTERM);
1942                                         if (AutoVacPID != 0)
1943                                                 kill(AutoVacPID, SIGTERM);
1944                                         /* reaper() does the rest */
1945                                 }
1946                                 break;
1947                         }
1948
1949                         /*
1950                          * No children left. Begin shutdown of data base system.
1951                          *
1952                          * Note: if we previously got SIGTERM then we may send SIGUSR2 to
1953                          * the bgwriter a second time here.  This should be harmless.
1954                          */
1955                         if (StartupPID != 0 || FatalError)
1956                                 break;                  /* let reaper() handle this */
1957                         /* Start the bgwriter if not running */
1958                         if (BgWriterPID == 0)
1959                                 BgWriterPID = StartBackgroundWriter();
1960                         /* And tell it to shut down */
1961                         if (BgWriterPID != 0)
1962                                 kill(BgWriterPID, SIGUSR2);
1963                         /* Tell pgarch to shut down too; nothing left for it to do */
1964                         if (PgArchPID != 0)
1965                                 kill(PgArchPID, SIGQUIT);
1966                         /* Tell pgstat to shut down too; nothing left for it to do */
1967                         if (PgStatPID != 0)
1968                                 kill(PgStatPID, SIGQUIT);
1969                         break;
1970
1971                 case SIGQUIT:
1972
1973                         /*
1974                          * Immediate Shutdown:
1975                          *
1976                          * abort all children with SIGQUIT and exit without attempt to
1977                          * properly shut down data base system.
1978                          */
1979                         ereport(LOG,
1980                                         (errmsg("received immediate shutdown request")));
1981                         if (StartupPID != 0)
1982                                 kill(StartupPID, SIGQUIT);
1983                         if (BgWriterPID != 0)
1984                                 kill(BgWriterPID, SIGQUIT);
1985                         if (AutoVacPID != 0)
1986                                 kill(AutoVacPID, SIGQUIT);
1987                         if (PgArchPID != 0)
1988                                 kill(PgArchPID, SIGQUIT);
1989                         if (PgStatPID != 0)
1990                                 kill(PgStatPID, SIGQUIT);
1991                         if (DLGetHead(BackendList))
1992                                 SignalChildren(SIGQUIT);
1993                         ExitPostmaster(0);
1994                         break;
1995         }
1996
1997         PG_SETMASK(&UnBlockSig);
1998
1999         errno = save_errno;
2000 }
2001
2002 /*
2003  * Reaper -- signal handler to cleanup after a backend (child) dies.
2004  */
2005 static void
2006 reaper(SIGNAL_ARGS)
2007 {
2008         int                     save_errno = errno;
2009
2010 #ifdef HAVE_WAITPID
2011         int                     status;                 /* backend exit status */
2012 #else
2013 #ifndef WIN32
2014         union wait      status;                 /* backend exit status */
2015 #endif
2016 #endif
2017         int                     exitstatus;
2018         int                     pid;                    /* process id of dead backend */
2019
2020         PG_SETMASK(&BlockSig);
2021
2022         ereport(DEBUG4,
2023                         (errmsg_internal("reaping dead processes")));
2024 #ifdef HAVE_WAITPID
2025         while ((pid = waitpid(-1, &status, WNOHANG)) > 0)
2026         {
2027                 exitstatus = status;
2028 #else
2029 #ifndef WIN32
2030         while ((pid = wait3(&status, WNOHANG, NULL)) > 0)
2031         {
2032                 exitstatus = status.w_status;
2033 #else
2034         while ((pid = win32_waitpid(&exitstatus)) > 0)
2035         {
2036                 /*
2037                  * We need to do this here, and not in CleanupBackend, since this is
2038                  * to be called on all children when we are done with them. Could move
2039                  * to LogChildExit, but that seems like asking for future trouble...
2040                  */
2041                 win32_RemoveChild(pid);
2042 #endif   /* WIN32 */
2043 #endif   /* HAVE_WAITPID */
2044
2045                 /*
2046                  * Check if this child was a startup process.
2047                  */
2048                 if (StartupPID != 0 && pid == StartupPID)
2049                 {
2050                         StartupPID = 0;
2051                         if (exitstatus != 0)
2052                         {
2053                                 LogChildExit(LOG, _("startup process"),
2054                                                          pid, exitstatus);
2055                                 ereport(LOG,
2056                                 (errmsg("aborting startup due to startup process failure")));
2057                                 ExitPostmaster(1);
2058                         }
2059
2060                         /*
2061                          * Startup succeeded - we are done with system startup or
2062                          * recovery.
2063                          */
2064                         FatalError = false;
2065
2066                         /*
2067                          * Load the flat authorization file into postmaster's cache. The
2068                          * startup process has recomputed this from the database contents,
2069                          * so we wait till it finishes before loading it.
2070                          */
2071                         load_role();
2072
2073                         /*
2074                          * Crank up the background writer.      It doesn't matter if this
2075                          * fails, we'll just try again later.
2076                          */
2077                         Assert(BgWriterPID == 0);
2078                         BgWriterPID = StartBackgroundWriter();
2079
2080                         /*
2081                          * Go to shutdown mode if a shutdown request was pending.
2082                          * Otherwise, try to start the archiver and stats collector too.
2083                          * (We could, but don't, try to start autovacuum here.)
2084                          */
2085                         if (Shutdown > NoShutdown && BgWriterPID != 0)
2086                                 kill(BgWriterPID, SIGUSR2);
2087                         else if (Shutdown == NoShutdown)
2088                         {
2089                                 if (XLogArchivingActive() && PgArchPID == 0)
2090                                         PgArchPID = pgarch_start();
2091                                 if (PgStatPID == 0)
2092                                         PgStatPID = pgstat_start();
2093                         }
2094
2095                         continue;
2096                 }
2097
2098                 /*
2099                  * Was it the bgwriter?
2100                  */
2101                 if (BgWriterPID != 0 && pid == BgWriterPID)
2102                 {
2103                         BgWriterPID = 0;
2104                         if (exitstatus == 0 && Shutdown > NoShutdown && !FatalError &&
2105                                 !DLGetHead(BackendList) && AutoVacPID == 0)
2106                         {
2107                                 /*
2108                                  * Normal postmaster exit is here: we've seen normal exit of
2109                                  * the bgwriter after it's been told to shut down. We expect
2110                                  * that it wrote a shutdown checkpoint.  (If for some reason
2111                                  * it didn't, recovery will occur on next postmaster start.)
2112                                  *
2113                                  * Note: we do not wait around for exit of the archiver or
2114                                  * stats processes.  They've been sent SIGQUIT by this point,
2115                                  * and in any case contain logic to commit hara-kiri if they
2116                                  * notice the postmaster is gone.
2117                                  */
2118                                 ExitPostmaster(0);
2119                         }
2120
2121                         /*
2122                          * Any unexpected exit of the bgwriter is treated as a crash.
2123                          */
2124                         HandleChildCrash(pid, exitstatus,
2125                                                          _("background writer process"));
2126                         continue;
2127                 }
2128
2129                 /*
2130                  * Was it the autovacuum process?  Normal exit can be ignored; we'll
2131                  * start a new one at the next iteration of the postmaster's main
2132                  * loop, if necessary.
2133                  *
2134                  * An unexpected exit must crash the system.
2135                  */
2136                 if (AutoVacPID != 0 && pid == AutoVacPID)
2137                 {
2138                         AutoVacPID = 0;
2139                         autovac_stopped();
2140                         if (exitstatus != 0)
2141                                 HandleChildCrash(pid, exitstatus,
2142                                                                  _("autovacuum process"));
2143                         continue;
2144                 }
2145
2146                 /*
2147                  * Was it the archiver?  If so, just try to start a new one; no need
2148                  * to force reset of the rest of the system.  (If fail, we'll try
2149                  * again in future cycles of the main loop.)
2150                  */
2151                 if (PgArchPID != 0 && pid == PgArchPID)
2152                 {
2153                         PgArchPID = 0;
2154                         if (exitstatus != 0)
2155                                 LogChildExit(LOG, _("archiver process"),
2156                                                          pid, exitstatus);
2157                         if (XLogArchivingActive() &&
2158                                 StartupPID == 0 && !FatalError && Shutdown == NoShutdown)
2159                                 PgArchPID = pgarch_start();
2160                         continue;
2161                 }
2162
2163                 /*
2164                  * Was it the statistics collector?  If so, just try to start a new
2165                  * one; no need to force reset of the rest of the system.  (If fail,
2166                  * we'll try again in future cycles of the main loop.)
2167                  */
2168                 if (PgStatPID != 0 && pid == PgStatPID)
2169                 {
2170                         PgStatPID = 0;
2171                         if (exitstatus != 0)
2172                                 LogChildExit(LOG, _("statistics collector process"),
2173                                                          pid, exitstatus);
2174                         if (StartupPID == 0 && !FatalError && Shutdown == NoShutdown)
2175                                 PgStatPID = pgstat_start();
2176                         continue;
2177                 }
2178
2179                 /* Was it the system logger? try to start a new one */
2180                 if (SysLoggerPID != 0 && pid == SysLoggerPID)
2181                 {
2182                         SysLoggerPID = 0;
2183                         /* for safety's sake, launch new logger *first* */
2184                         SysLoggerPID = SysLogger_Start();
2185                         if (exitstatus != 0)
2186                                 LogChildExit(LOG, _("system logger process"),
2187                                                          pid, exitstatus);
2188                         continue;
2189                 }
2190
2191                 /*
2192                  * Else do standard backend child cleanup.
2193                  */
2194                 CleanupBackend(pid, exitstatus);
2195         }                                                       /* loop over pending child-death reports */
2196
2197         if (FatalError)
2198         {
2199                 /*
2200                  * Wait for all important children to exit, then reset shmem and
2201                  * StartupDataBase.  (We can ignore the archiver and stats processes
2202                  * here since they are not connected to shmem.)
2203                  */
2204                 if (DLGetHead(BackendList) || StartupPID != 0 || BgWriterPID != 0 ||
2205                         AutoVacPID != 0)
2206                         goto reaper_done;
2207                 ereport(LOG,
2208                                 (errmsg("all server processes terminated; reinitializing")));
2209
2210                 shmem_exit(0);
2211                 reset_shared(PostPortNumber);
2212
2213                 StartupPID = StartupDataBase();
2214
2215                 goto reaper_done;
2216         }
2217
2218         if (Shutdown > NoShutdown)
2219         {
2220                 if (DLGetHead(BackendList) || StartupPID != 0 || AutoVacPID != 0)
2221                         goto reaper_done;
2222                 /* Start the bgwriter if not running */
2223                 if (BgWriterPID == 0)
2224                         BgWriterPID = StartBackgroundWriter();
2225                 /* And tell it to shut down */
2226                 if (BgWriterPID != 0)
2227                         kill(BgWriterPID, SIGUSR2);
2228                 /* Tell pgarch to shut down too; nothing left for it to do */
2229                 if (PgArchPID != 0)
2230                         kill(PgArchPID, SIGQUIT);
2231                 /* Tell pgstat to shut down too; nothing left for it to do */
2232                 if (PgStatPID != 0)
2233                         kill(PgStatPID, SIGQUIT);
2234         }
2235
2236 reaper_done:
2237         PG_SETMASK(&UnBlockSig);
2238
2239         errno = save_errno;
2240 }
2241
2242
2243 /*
2244  * CleanupBackend -- cleanup after terminated backend.
2245  *
2246  * Remove all local state associated with backend.
2247  */
2248 static void
2249 CleanupBackend(int pid,
2250                            int exitstatus)      /* child's exit status. */
2251 {
2252         Dlelem     *curr;
2253
2254         LogChildExit(DEBUG2, _("server process"), pid, exitstatus);
2255
2256         /*
2257          * If a backend dies in an ugly way (i.e. exit status not 0) then we must
2258          * signal all other backends to quickdie.  If exit status is zero we
2259          * assume everything is hunky dory and simply remove the backend from the
2260          * active backend list.
2261          */
2262         if (exitstatus != 0)
2263         {
2264                 HandleChildCrash(pid, exitstatus, _("server process"));
2265                 return;
2266         }
2267
2268         for (curr = DLGetHead(BackendList); curr; curr = DLGetSucc(curr))
2269         {
2270                 Backend    *bp = (Backend *) DLE_VAL(curr);
2271
2272                 if (bp->pid == pid)
2273                 {
2274                         DLRemove(curr);
2275                         free(bp);
2276                         DLFreeElem(curr);
2277 #ifdef EXEC_BACKEND
2278                         ShmemBackendArrayRemove(pid);
2279 #endif
2280                         /* Tell the collector about backend termination */
2281                         pgstat_beterm(pid);
2282                         break;
2283                 }
2284         }
2285 }
2286
2287 /*
2288  * HandleChildCrash -- cleanup after failed backend, bgwriter, or autovacuum.
2289  *
2290  * The objectives here are to clean up our local state about the child
2291  * process, and to signal all other remaining children to quickdie.
2292  */
2293 static void
2294 HandleChildCrash(int pid, int exitstatus, const char *procname)
2295 {
2296         Dlelem     *curr,
2297                            *next;
2298         Backend    *bp;
2299
2300         /*
2301          * Make log entry unless there was a previous crash (if so, nonzero exit
2302          * status is to be expected in SIGQUIT response; don't clutter log)
2303          */
2304         if (!FatalError)
2305         {
2306                 LogChildExit(LOG, procname, pid, exitstatus);
2307                 ereport(LOG,
2308                                 (errmsg("terminating any other active server processes")));
2309         }
2310
2311         /* Process regular backends */
2312         for (curr = DLGetHead(BackendList); curr; curr = next)
2313         {
2314                 next = DLGetSucc(curr);
2315                 bp = (Backend *) DLE_VAL(curr);
2316                 if (bp->pid == pid)
2317                 {
2318                         /*
2319                          * Found entry for freshly-dead backend, so remove it.
2320                          */
2321                         DLRemove(curr);
2322                         free(bp);
2323                         DLFreeElem(curr);
2324 #ifdef EXEC_BACKEND
2325                         ShmemBackendArrayRemove(pid);
2326 #endif
2327                         /* Tell the collector about backend termination */
2328                         pgstat_beterm(pid);
2329                         /* Keep looping so we can signal remaining backends */
2330                 }
2331                 else
2332                 {
2333                         /*
2334                          * This backend is still alive.  Unless we did so already, tell it
2335                          * to commit hara-kiri.
2336                          *
2337                          * SIGQUIT is the special signal that says exit without proc_exit
2338                          * and let the user know what's going on. But if SendStop is set
2339                          * (-s on command line), then we send SIGSTOP instead, so that we
2340                          * can get core dumps from all backends by hand.
2341                          */
2342                         if (!FatalError)
2343                         {
2344                                 ereport(DEBUG2,
2345                                                 (errmsg_internal("sending %s to process %d",
2346                                                                                  (SendStop ? "SIGSTOP" : "SIGQUIT"),
2347                                                                                  (int) bp->pid)));
2348                                 kill(bp->pid, (SendStop ? SIGSTOP : SIGQUIT));
2349                         }
2350                 }
2351         }
2352
2353         /* Take care of the bgwriter too */
2354         if (pid == BgWriterPID)
2355                 BgWriterPID = 0;
2356         else if (BgWriterPID != 0 && !FatalError)
2357         {
2358                 ereport(DEBUG2,
2359                                 (errmsg_internal("sending %s to process %d",
2360                                                                  (SendStop ? "SIGSTOP" : "SIGQUIT"),
2361                                                                  (int) BgWriterPID)));
2362                 kill(BgWriterPID, (SendStop ? SIGSTOP : SIGQUIT));
2363         }
2364
2365         /* Take care of the autovacuum daemon too */
2366         if (pid == AutoVacPID)
2367                 AutoVacPID = 0;
2368         else if (AutoVacPID != 0 && !FatalError)
2369         {
2370                 ereport(DEBUG2,
2371                                 (errmsg_internal("sending %s to process %d",
2372                                                                  (SendStop ? "SIGSTOP" : "SIGQUIT"),
2373                                                                  (int) AutoVacPID)));
2374                 kill(AutoVacPID, (SendStop ? SIGSTOP : SIGQUIT));
2375         }
2376
2377         /* Force a power-cycle of the pgarch process too */
2378         /* (Shouldn't be necessary, but just for luck) */
2379         if (PgArchPID != 0 && !FatalError)
2380         {
2381                 ereport(DEBUG2,
2382                                 (errmsg_internal("sending %s to process %d",
2383                                                                  "SIGQUIT",
2384                                                                  (int) PgArchPID)));
2385                 kill(PgArchPID, SIGQUIT);
2386         }
2387
2388         /* Force a power-cycle of the pgstat processes too */
2389         /* (Shouldn't be necessary, but just for luck) */
2390         if (PgStatPID != 0 && !FatalError)
2391         {
2392                 ereport(DEBUG2,
2393                                 (errmsg_internal("sending %s to process %d",
2394                                                                  "SIGQUIT",
2395                                                                  (int) PgStatPID)));
2396                 kill(PgStatPID, SIGQUIT);
2397         }
2398
2399         /* We do NOT restart the syslogger */
2400
2401         FatalError = true;
2402 }
2403
2404 /*
2405  * Log the death of a child process.
2406  */
2407 static void
2408 LogChildExit(int lev, const char *procname, int pid, int exitstatus)
2409 {
2410         if (WIFEXITED(exitstatus))
2411                 ereport(lev,
2412
2413                 /*
2414                  * translator: %s is a noun phrase describing a child process, such as
2415                  * "server process"
2416                  */
2417                                 (errmsg("%s (PID %d) exited with exit code %d",
2418                                                 procname, pid, WEXITSTATUS(exitstatus))));
2419         else if (WIFSIGNALED(exitstatus))
2420                 ereport(lev,
2421
2422                 /*
2423                  * translator: %s is a noun phrase describing a child process, such as
2424                  * "server process"
2425                  */
2426                                 (errmsg("%s (PID %d) was terminated by signal %d",
2427                                                 procname, pid, WTERMSIG(exitstatus))));
2428         else
2429                 ereport(lev,
2430
2431                 /*
2432                  * translator: %s is a noun phrase describing a child process, such as
2433                  * "server process"
2434                  */
2435                                 (errmsg("%s (PID %d) exited with unexpected status %d",
2436                                                 procname, pid, exitstatus)));
2437 }
2438
2439 /*
2440  * Send a signal to all backend children (but NOT special children)
2441  */
2442 static void
2443 SignalChildren(int signal)
2444 {
2445         Dlelem     *curr;
2446
2447         for (curr = DLGetHead(BackendList); curr; curr = DLGetSucc(curr))
2448         {
2449                 Backend    *bp = (Backend *) DLE_VAL(curr);
2450
2451                 ereport(DEBUG4,
2452                                 (errmsg_internal("sending signal %d to process %d",
2453                                                                  signal, (int) bp->pid)));
2454                 kill(bp->pid, signal);
2455         }
2456 }
2457
2458 /*
2459  * BackendStartup -- start backend process
2460  *
2461  * returns: STATUS_ERROR if the fork failed, STATUS_OK otherwise.
2462  */
2463 static int
2464 BackendStartup(Port *port)
2465 {
2466         Backend    *bn;                         /* for backend cleanup */
2467         pid_t           pid;
2468
2469         /*
2470          * Compute the cancel key that will be assigned to this backend. The
2471          * backend will have its own copy in the forked-off process' value of
2472          * MyCancelKey, so that it can transmit the key to the frontend.
2473          */
2474         MyCancelKey = PostmasterRandom();
2475
2476         /*
2477          * Make room for backend data structure.  Better before the fork() so we
2478          * can handle failure cleanly.
2479          */
2480         bn = (Backend *) malloc(sizeof(Backend));
2481         if (!bn)
2482         {
2483                 ereport(LOG,
2484                                 (errcode(ERRCODE_OUT_OF_MEMORY),
2485                                  errmsg("out of memory")));
2486                 return STATUS_ERROR;
2487         }
2488
2489         /* Pass down canAcceptConnections state (kluge for EXEC_BACKEND case) */
2490         port->canAcceptConnections = canAcceptConnections();
2491
2492 #ifdef EXEC_BACKEND
2493         pid = backend_forkexec(port);
2494 #else                                                   /* !EXEC_BACKEND */
2495         pid = fork_process();
2496         if (pid == 0)                           /* child */
2497         {
2498                 free(bn);
2499                 proc_exit(BackendRun(port));
2500         }
2501 #endif   /* EXEC_BACKEND */
2502
2503         if (pid < 0)
2504         {
2505                 /* in parent, fork failed */
2506                 int                     save_errno = errno;
2507
2508                 free(bn);
2509                 errno = save_errno;
2510                 ereport(LOG,
2511                                 (errmsg("could not fork new process for connection: %m")));
2512                 report_fork_failure_to_client(port, save_errno);
2513                 return STATUS_ERROR;
2514         }
2515
2516         /* in parent, successful fork */
2517         ereport(DEBUG2,
2518                         (errmsg_internal("forked new backend, pid=%d socket=%d",
2519                                                          (int) pid, port->sock)));
2520
2521         /*
2522          * Everything's been successful, it's safe to add this backend to our list
2523          * of backends.
2524          */
2525         bn->pid = pid;
2526         bn->cancel_key = MyCancelKey;
2527         DLAddHead(BackendList, DLNewElem(bn));
2528 #ifdef EXEC_BACKEND
2529         ShmemBackendArrayAdd(bn);
2530 #endif
2531
2532         return STATUS_OK;
2533 }
2534
2535 /*
2536  * Try to report backend fork() failure to client before we close the
2537  * connection.  Since we do not care to risk blocking the postmaster on
2538  * this connection, we set the connection to non-blocking and try only once.
2539  *
2540  * This is grungy special-purpose code; we cannot use backend libpq since
2541  * it's not up and running.
2542  */
2543 static void
2544 report_fork_failure_to_client(Port *port, int errnum)
2545 {
2546         char            buffer[1000];
2547
2548         /* Format the error message packet (always V2 protocol) */
2549         snprintf(buffer, sizeof(buffer), "E%s%s\n",
2550                          _("could not fork new process for connection: "),
2551                          strerror(errnum));
2552
2553         /* Set port to non-blocking.  Don't do send() if this fails */
2554         if (!pg_set_noblock(port->sock))
2555                 return;
2556
2557         send(port->sock, buffer, strlen(buffer) + 1, 0);
2558 }
2559
2560
2561 /*
2562  * split_opts -- split a string of options and append it to an argv array
2563  *
2564  * NB: the string is destructively modified!
2565  *
2566  * Since no current POSTGRES arguments require any quoting characters,
2567  * we can use the simple-minded tactic of assuming each set of space-
2568  * delimited characters is a separate argv element.
2569  *
2570  * If you don't like that, well, we *used* to pass the whole option string
2571  * as ONE argument to execl(), which was even less intelligent...
2572  */
2573 static void
2574 split_opts(char **argv, int *argcp, char *s)
2575 {
2576         while (s && *s)
2577         {
2578                 while (isspace((unsigned char) *s))
2579                         ++s;
2580                 if (*s == '\0')
2581                         break;
2582                 argv[(*argcp)++] = s;
2583                 while (*s && !isspace((unsigned char) *s))
2584                         ++s;
2585                 if (*s)
2586                         *s++ = '\0';
2587         }
2588 }
2589
2590
2591 /*
2592  * BackendRun -- perform authentication, and if successful,
2593  *                              set up the backend's argument list and invoke PostgresMain()
2594  *
2595  * returns:
2596  *              Shouldn't return at all.
2597  *              If PostgresMain() fails, return status.
2598  */
2599 static int
2600 BackendRun(Port *port)
2601 {
2602         int                     status;
2603         char            remote_host[NI_MAXHOST];
2604         char            remote_port[NI_MAXSERV];
2605         char            remote_ps_data[NI_MAXHOST];
2606         char      **av;
2607         int                     maxac;
2608         int                     ac;
2609         char            protobuf[32];
2610         int                     i;
2611
2612         IsUnderPostmaster = true;       /* we are a postmaster subprocess now */
2613
2614         /*
2615          * Let's clean up ourselves as the postmaster child, and close the
2616          * postmaster's listen sockets
2617          */
2618         ClosePostmasterPorts(false);
2619
2620         /* We don't want the postmaster's proc_exit() handlers */
2621         on_exit_reset();
2622
2623         /*
2624          * Signal handlers setting is moved to tcop/postgres...
2625          */
2626
2627         /* Save port etc. for ps status */
2628         MyProcPort = port;
2629
2630         /* Reset MyProcPid to new backend's pid */
2631         MyProcPid = getpid();
2632
2633         /*
2634          * PreAuthDelay is a debugging aid for investigating problems in the
2635          * authentication cycle: it can be set in postgresql.conf to allow time to
2636          * attach to the newly-forked backend with a debugger. (See also the -W
2637          * backend switch, which we allow clients to pass through PGOPTIONS, but
2638          * it is not honored until after authentication.)
2639          */
2640         if (PreAuthDelay > 0)
2641                 pg_usleep(PreAuthDelay * 1000000L);
2642
2643         ClientAuthInProgress = true;    /* limit visibility of log messages */
2644
2645         /* save start time for end of session reporting */
2646         gettimeofday(&(port->session_start), NULL);
2647
2648         /* set these to empty in case they are needed before we set them up */
2649         port->remote_host = "";
2650         port->remote_port = "";
2651
2652         /*
2653          * Initialize libpq and enable reporting of ereport errors to the client.
2654          * Must do this now because authentication uses libpq to send messages.
2655          */
2656         pq_init();                                      /* initialize libpq to talk to client */
2657         whereToSendOutput = DestRemote;         /* now safe to ereport to client */
2658
2659         /*
2660          * We arrange for a simple exit(0) if we receive SIGTERM or SIGQUIT during
2661          * any client authentication related communication. Otherwise the
2662          * postmaster cannot shutdown the database FAST or IMMED cleanly if a
2663          * buggy client blocks a backend during authentication.
2664          */
2665         pqsignal(SIGTERM, authdie);
2666         pqsignal(SIGQUIT, authdie);
2667         pqsignal(SIGALRM, authdie);
2668         PG_SETMASK(&AuthBlockSig);
2669
2670         /*
2671          * Get the remote host name and port for logging and status display.
2672          */
2673         remote_host[0] = '\0';
2674         remote_port[0] = '\0';
2675         if (pg_getnameinfo_all(&port->raddr.addr, port->raddr.salen,
2676                                                    remote_host, sizeof(remote_host),
2677                                                    remote_port, sizeof(remote_port),
2678                                            (log_hostname ? 0 : NI_NUMERICHOST) | NI_NUMERICSERV))
2679         {
2680                 int                     ret = pg_getnameinfo_all(&port->raddr.addr, port->raddr.salen,
2681                                                                                          remote_host, sizeof(remote_host),
2682                                                                                          remote_port, sizeof(remote_port),
2683                                                                                          NI_NUMERICHOST | NI_NUMERICSERV);
2684
2685                 if (ret)
2686                         ereport(WARNING,
2687                                         (errmsg_internal("pg_getnameinfo_all() failed: %s",
2688                                                                          gai_strerror(ret))));
2689         }
2690         snprintf(remote_ps_data, sizeof(remote_ps_data),
2691                          remote_port[0] == '\0' ? "%s" : "%s(%s)",
2692                          remote_host, remote_port);
2693
2694         if (Log_connections)
2695                 ereport(LOG,
2696                                 (errmsg("connection received: host=%s%s%s",
2697                                                 remote_host, remote_port[0] ? " port=" : "",
2698                                                 remote_port)));
2699
2700         /*
2701          * save remote_host and remote_port in port stucture
2702          */
2703         port->remote_host = strdup(remote_host);
2704         port->remote_port = strdup(remote_port);
2705
2706         /*
2707          * In EXEC_BACKEND case, we didn't inherit the contents of pg_hba.conf
2708          * etcetera from the postmaster, and have to load them ourselves. Build
2709          * the PostmasterContext (which didn't exist before, in this process) to
2710          * contain the data.
2711          *
2712          * FIXME: [fork/exec] Ugh.      Is there a way around this overhead?
2713          */
2714 #ifdef EXEC_BACKEND
2715         Assert(PostmasterContext == NULL);
2716         PostmasterContext = AllocSetContextCreate(TopMemoryContext,
2717                                                                                           "Postmaster",
2718                                                                                           ALLOCSET_DEFAULT_MINSIZE,
2719                                                                                           ALLOCSET_DEFAULT_INITSIZE,
2720                                                                                           ALLOCSET_DEFAULT_MAXSIZE);
2721         MemoryContextSwitchTo(PostmasterContext);
2722
2723         load_hba();
2724         load_ident();
2725         load_role();
2726 #endif
2727
2728         /*
2729          * Ready to begin client interaction.  We will give up and exit(0) after a
2730          * time delay, so that a broken client can't hog a connection
2731          * indefinitely.  PreAuthDelay doesn't count against the time limit.
2732          */
2733         if (!enable_sig_alarm(AuthenticationTimeout * 1000, false))
2734                 elog(FATAL, "could not set timer for authorization timeout");
2735
2736         /*
2737          * Receive the startup packet (which might turn out to be a cancel request
2738          * packet).
2739          */
2740         status = ProcessStartupPacket(port, false);
2741
2742         if (status != STATUS_OK)
2743                 proc_exit(0);
2744
2745         /*
2746          * Now that we have the user and database name, we can set the process
2747          * title for ps.  It's good to do this as early as possible in startup.
2748          */
2749         init_ps_display(port->user_name, port->database_name, remote_ps_data);
2750         set_ps_display("authentication");
2751
2752         /*
2753          * Now perform authentication exchange.
2754          */
2755         ClientAuthentication(port); /* might not return, if failure */
2756
2757         /*
2758          * Done with authentication.  Disable timeout, and prevent SIGTERM/SIGQUIT
2759          * again until backend startup is complete.
2760          */
2761         if (!disable_sig_alarm(false))
2762                 elog(FATAL, "could not disable timer for authorization timeout");
2763         PG_SETMASK(&BlockSig);
2764
2765         if (Log_connections)
2766                 ereport(LOG,
2767                                 (errmsg("connection authorized: user=%s database=%s",
2768                                                 port->user_name, port->database_name)));
2769
2770         /*
2771          * Don't want backend to be able to see the postmaster random number
2772          * generator state.  We have to clobber the static random_seed *and* start
2773          * a new random sequence in the random() library function.
2774          */
2775         random_seed = 0;
2776         srandom((unsigned int) (MyProcPid ^ port->session_start.tv_usec));
2777
2778         /* ----------------
2779          * Now, build the argv vector that will be given to PostgresMain.
2780          *
2781          * The layout of the command line is
2782          *              postgres [secure switches] -p databasename [insecure switches]
2783          * where the switches after -p come from the client request.
2784          *
2785          * The maximum possible number of commandline arguments that could come
2786          * from ExtraOptions or port->cmdline_options is (strlen + 1) / 2; see
2787          * split_opts().
2788          * ----------------
2789          */
2790         maxac = 10;                                     /* for fixed args supplied below */
2791         maxac += (strlen(ExtraOptions) + 1) / 2;
2792         if (port->cmdline_options)
2793                 maxac += (strlen(port->cmdline_options) + 1) / 2;
2794
2795         av = (char **) MemoryContextAlloc(TopMemoryContext,
2796                                                                           maxac * sizeof(char *));
2797         ac = 0;
2798
2799         av[ac++] = "postgres";
2800
2801         /*
2802          * Pass any backend switches specified with -o in the postmaster's own
2803          * command line.  We assume these are secure.  (It's OK to mangle
2804          * ExtraOptions now, since we're safely inside a subprocess.)
2805          */
2806         split_opts(av, &ac, ExtraOptions);
2807
2808         /* Tell the backend what protocol the frontend is using. */
2809         snprintf(protobuf, sizeof(protobuf), "-v%u", port->proto);
2810         av[ac++] = protobuf;
2811
2812         /*
2813          * Tell the backend it is being called from the postmaster, and which
2814          * database to use.  -p marks the end of secure switches.
2815          */
2816         av[ac++] = "-p";
2817         av[ac++] = port->database_name;
2818
2819         /*
2820          * Pass the (insecure) option switches from the connection request. (It's
2821          * OK to mangle port->cmdline_options now.)
2822          */
2823         if (port->cmdline_options)
2824                 split_opts(av, &ac, port->cmdline_options);
2825
2826         av[ac] = NULL;
2827
2828         Assert(ac < maxac);
2829
2830         /*
2831          * Release postmaster's working memory context so that backend can recycle
2832          * the space.  Note this does not trash *MyProcPort, because ConnCreate()
2833          * allocated that space with malloc() ... else we'd need to copy the Port
2834          * data here.  Also, subsidiary data such as the username isn't lost
2835          * either; see ProcessStartupPacket().
2836          */
2837         MemoryContextSwitchTo(TopMemoryContext);
2838         MemoryContextDelete(PostmasterContext);
2839         PostmasterContext = NULL;
2840
2841         /*
2842          * Debug: print arguments being passed to backend
2843          */
2844         ereport(DEBUG3,
2845                         (errmsg_internal("%s child[%d]: starting with (",
2846                                                          progname, (int) getpid())));
2847         for (i = 0; i < ac; ++i)
2848                 ereport(DEBUG3,
2849                                 (errmsg_internal("\t%s", av[i])));
2850         ereport(DEBUG3,
2851                         (errmsg_internal(")")));
2852
2853         ClientAuthInProgress = false;           /* client_min_messages is active now */
2854
2855         return (PostgresMain(ac, av, port->user_name));
2856 }
2857
2858
2859 #ifdef EXEC_BACKEND
2860
2861 /*
2862  * postmaster_forkexec -- fork and exec a postmaster subprocess
2863  *
2864  * The caller must have set up the argv array already, except for argv[2]
2865  * which will be filled with the name of the temp variable file.
2866  *
2867  * Returns the child process PID, or -1 on fork failure (a suitable error
2868  * message has been logged on failure).
2869  *
2870  * All uses of this routine will dispatch to SubPostmasterMain in the
2871  * child process.
2872  */
2873 pid_t
2874 postmaster_forkexec(int argc, char *argv[])
2875 {
2876         Port            port;
2877
2878         /* This entry point passes dummy values for the Port variables */
2879         memset(&port, 0, sizeof(port));
2880         return internal_forkexec(argc, argv, &port);
2881 }
2882
2883 /*
2884  * backend_forkexec -- fork/exec off a backend process
2885  *
2886  * returns the pid of the fork/exec'd process, or -1 on failure
2887  */
2888 static pid_t
2889 backend_forkexec(Port *port)
2890 {
2891         char       *av[4];
2892         int                     ac = 0;
2893
2894         av[ac++] = "postgres";
2895         av[ac++] = "-forkbackend";
2896         av[ac++] = NULL;                        /* filled in by internal_forkexec */
2897
2898         av[ac] = NULL;
2899         Assert(ac < lengthof(av));
2900
2901         return internal_forkexec(ac, av, port);
2902 }
2903
2904 #ifndef WIN32
2905
2906 /*
2907  * internal_forkexec non-win32 implementation
2908  *
2909  * - writes out backend variables to the parameter file
2910  * - fork():s, and then exec():s the child process
2911  */
2912 static pid_t
2913 internal_forkexec(int argc, char *argv[], Port *port)
2914 {
2915         static unsigned long tmpBackendFileNum = 0;
2916         pid_t           pid;
2917         char            tmpfilename[MAXPGPATH];
2918         BackendParameters param;
2919         FILE       *fp;
2920
2921         if (!save_backend_variables(&param, port))
2922                 return -1;                              /* log made by save_backend_variables */
2923
2924         /* Calculate name for temp file */
2925         snprintf(tmpfilename, MAXPGPATH, "%s/%s.backend_var.%d.%lu",
2926                          PG_TEMP_FILES_DIR, PG_TEMP_FILE_PREFIX,
2927                          MyProcPid, ++tmpBackendFileNum);
2928
2929         /* Open file */
2930         fp = AllocateFile(tmpfilename, PG_BINARY_W);
2931         if (!fp)
2932         {
2933                 /* As in OpenTemporaryFile, try to make the temp-file directory */
2934                 mkdir(PG_TEMP_FILES_DIR, S_IRWXU);
2935
2936                 fp = AllocateFile(tmpfilename, PG_BINARY_W);
2937                 if (!fp)
2938                 {
2939                         ereport(LOG,
2940                                         (errcode_for_file_access(),
2941                                          errmsg("could not create file \"%s\": %m",
2942                                                         tmpfilename)));
2943                         return -1;
2944                 }
2945         }
2946
2947         if (fwrite(&param, sizeof(param), 1, fp) != 1)
2948         {
2949                 ereport(LOG,
2950                                 (errcode_for_file_access(),
2951                                  errmsg("could not write to file \"%s\": %m", tmpfilename)));
2952                 FreeFile(fp);
2953                 return -1;
2954         }
2955
2956         /* Release file */
2957         if (FreeFile(fp))
2958         {
2959                 ereport(LOG,
2960                                 (errcode_for_file_access(),
2961                                  errmsg("could not write to file \"%s\": %m", tmpfilename)));
2962                 return -1;
2963         }
2964
2965         /* Make sure caller set up argv properly */
2966         Assert(argc >= 3);
2967         Assert(argv[argc] == NULL);
2968         Assert(strncmp(argv[1], "-fork", 5) == 0);
2969         Assert(argv[2] == NULL);
2970
2971         /* Insert temp file name after -fork argument */
2972         argv[2] = tmpfilename;
2973
2974         /* Fire off execv in child */
2975         if ((pid = fork_process()) == 0)
2976         {
2977                 if (execv(postgres_exec_path, argv) < 0)
2978                 {
2979                         ereport(LOG,
2980                                         (errmsg("could not execute server process \"%s\": %m",
2981                                                         postgres_exec_path)));
2982                         /* We're already in the child process here, can't return */
2983                         exit(1);
2984                 }
2985         }
2986
2987         return pid;                                     /* Parent returns pid, or -1 on fork failure */
2988 }
2989 #else                                                   /* WIN32 */
2990
2991 /*
2992  * internal_forkexec win32 implementation
2993  *
2994  * - starts backend using CreateProcess(), in suspended state
2995  * - writes out backend variables to the parameter file
2996  *      - during this, duplicates handles and sockets required for
2997  *        inheritance into the new process
2998  * - resumes execution of the new process once the backend parameter
2999  *       file is complete.
3000  */
3001 static pid_t
3002 internal_forkexec(int argc, char *argv[], Port *port)
3003 {
3004         STARTUPINFO si;
3005         PROCESS_INFORMATION pi;
3006         int                     i;
3007         int                     j;
3008         char            cmdLine[MAXPGPATH * 2];
3009         HANDLE          childHandleCopy;
3010         HANDLE          waiterThread;
3011         HANDLE          paramHandle;
3012         BackendParameters *param;
3013         SECURITY_ATTRIBUTES sa;
3014         char            paramHandleStr[32];
3015
3016         /* Make sure caller set up argv properly */
3017         Assert(argc >= 3);
3018         Assert(argv[argc] == NULL);
3019         Assert(strncmp(argv[1], "-fork", 5) == 0);
3020         Assert(argv[2] == NULL);
3021
3022         /* Set up shared memory for parameter passing */
3023         ZeroMemory(&sa, sizeof(sa));
3024         sa.nLength = sizeof(sa);
3025         sa.bInheritHandle = TRUE;
3026         paramHandle = CreateFileMapping(INVALID_HANDLE_VALUE,
3027                                                                         &sa,
3028                                                                         PAGE_READWRITE,
3029                                                                         0,
3030                                                                         sizeof(BackendParameters),
3031                                                                         NULL);
3032         if (paramHandle == INVALID_HANDLE_VALUE)
3033         {
3034                 elog(LOG, "could not create backend parameter file mapping: error code %d",
3035                          (int) GetLastError());
3036                 return -1;
3037         }
3038
3039         param = MapViewOfFile(paramHandle, FILE_MAP_WRITE, 0, 0, sizeof(BackendParameters));
3040         if (!param)
3041         {
3042                 elog(LOG, "could not map backend parameter memory: error code %d",
3043                          (int) GetLastError());
3044                 CloseHandle(paramHandle);
3045                 return -1;
3046         }
3047
3048         /* Insert temp file name after -fork argument */
3049         sprintf(paramHandleStr, "%lu", (DWORD) paramHandle);
3050         argv[2] = paramHandleStr;
3051
3052         /* Format the cmd line */
3053         cmdLine[sizeof(cmdLine) - 1] = '\0';
3054         cmdLine[sizeof(cmdLine) - 2] = '\0';
3055         snprintf(cmdLine, sizeof(cmdLine) - 1, "\"%s\"", postgres_exec_path);
3056         i = 0;
3057         while (argv[++i] != NULL)
3058         {
3059                 j = strlen(cmdLine);
3060                 snprintf(cmdLine + j, sizeof(cmdLine) - 1 - j, " \"%s\"", argv[i]);
3061         }
3062         if (cmdLine[sizeof(cmdLine) - 2] != '\0')
3063         {
3064                 elog(LOG, "subprocess command line too long");
3065                 return -1;
3066         }
3067
3068         memset(&pi, 0, sizeof(pi));
3069         memset(&si, 0, sizeof(si));
3070         si.cb = sizeof(si);
3071
3072         /*
3073          * Create the subprocess in a suspended state. This will be resumed later,
3074          * once we have written out the parameter file.
3075          */
3076         if (!CreateProcess(NULL, cmdLine, NULL, NULL, TRUE, CREATE_SUSPENDED,
3077                                            NULL, NULL, &si, &pi))
3078         {
3079                 elog(LOG, "CreateProcess call failed: %m (error code %d)",
3080                          (int) GetLastError());
3081                 return -1;
3082         }
3083
3084         if (!save_backend_variables(param, port, pi.hProcess, pi.dwProcessId))
3085         {
3086                 /*
3087                  * log made by save_backend_variables, but we have to clean up the
3088                  * mess with the half-started process
3089                  */
3090                 if (!TerminateProcess(pi.hProcess, 255))
3091                         ereport(ERROR,
3092                                         (errmsg_internal("could not terminate unstarted process: error code %d",
3093                                                                          (int) GetLastError())));
3094                 CloseHandle(pi.hProcess);
3095                 CloseHandle(pi.hThread);
3096                 return -1;                              /* log made by save_backend_variables */
3097         }
3098
3099         /* Drop the shared memory that is now inherited to the backend */
3100         if (!UnmapViewOfFile(param))
3101                 elog(LOG, "could not unmap view of backend parameter file: error code %d",
3102                          (int) GetLastError());
3103         if (!CloseHandle(paramHandle))
3104                 elog(LOG, "could not close handle to backend parameter file: error code %d",
3105                          (int) GetLastError());
3106
3107         /*
3108          * Now that the backend variables are written out, we start the child
3109          * thread so it can start initializing while we set up the rest of the
3110          * parent state.
3111          */
3112         if (ResumeThread(pi.hThread) == -1)
3113         {
3114                 if (!TerminateProcess(pi.hProcess, 255))
3115                 {
3116                         ereport(ERROR,
3117                                         (errmsg_internal("could not terminate unstartable process: error code %d",
3118                                                                          (int) GetLastError())));
3119                         CloseHandle(pi.hProcess);
3120                         CloseHandle(pi.hThread);
3121                         return -1;
3122                 }
3123                 CloseHandle(pi.hProcess);
3124                 CloseHandle(pi.hThread);
3125                 ereport(ERROR,
3126                                 (errmsg_internal("could not resume thread of unstarted process: error code %d",
3127                                                                  (int) GetLastError())));
3128                 return -1;
3129         }
3130
3131         if (!IsUnderPostmaster)
3132         {
3133                 /* We are the Postmaster creating a child... */
3134                 win32_AddChild(pi.dwProcessId, pi.hProcess);
3135         }
3136
3137         /* Set up the thread to handle the SIGCHLD for this process */
3138         if (DuplicateHandle(GetCurrentProcess(),
3139                                                 pi.hProcess,
3140                                                 GetCurrentProcess(),
3141                                                 &childHandleCopy,
3142                                                 0,
3143                                                 FALSE,
3144                                                 DUPLICATE_SAME_ACCESS) == 0)
3145                 ereport(FATAL,
3146                   (errmsg_internal("could not duplicate child handle: error code %d",
3147                                                    (int) GetLastError())));
3148
3149         waiterThread = CreateThread(NULL, 64 * 1024, win32_sigchld_waiter,
3150                                                                 (LPVOID) childHandleCopy, 0, NULL);
3151         if (!waiterThread)
3152                 ereport(FATAL,
3153                                 (errmsg_internal("could not create sigchld waiter thread: error code %d",
3154                                                                  (int) GetLastError())));
3155         CloseHandle(waiterThread);
3156
3157         if (IsUnderPostmaster)
3158                 CloseHandle(pi.hProcess);
3159         CloseHandle(pi.hThread);
3160
3161         return pi.dwProcessId;
3162 }
3163 #endif   /* WIN32 */
3164
3165
3166 /*
3167  * SubPostmasterMain -- Get the fork/exec'd process into a state equivalent
3168  *                      to what it would be if we'd simply forked on Unix, and then
3169  *                      dispatch to the appropriate place.
3170  *
3171  * The first two command line arguments are expected to be "-forkFOO"
3172  * (where FOO indicates which postmaster child we are to become), and
3173  * the name of a variables file that we can read to load data that would
3174  * have been inherited by fork() on Unix.  Remaining arguments go to the
3175  * subprocess FooMain() routine.
3176  */
3177 int
3178 SubPostmasterMain(int argc, char *argv[])
3179 {
3180         Port            port;
3181
3182         /* Do this sooner rather than later... */
3183         IsUnderPostmaster = true;       /* we are a postmaster subprocess now */
3184
3185         MyProcPid = getpid();           /* reset MyProcPid */
3186
3187         /* In EXEC_BACKEND case we will not have inherited these settings */
3188         IsPostmasterEnvironment = true;
3189         whereToSendOutput = DestNone;
3190
3191         /* Setup essential subsystems (to ensure elog() behaves sanely) */
3192         MemoryContextInit();
3193         InitializeGUCOptions();
3194
3195         /* Read in the variables file */
3196         memset(&port, 0, sizeof(Port));
3197         read_backend_variables(argv[2], &port);
3198
3199         /* Check we got appropriate args */
3200         if (argc < 3)
3201                 elog(FATAL, "invalid subpostmaster invocation");
3202
3203         /*
3204          * If appropriate, physically re-attach to shared memory segment. We want
3205          * to do this before going any further to ensure that we can attach at the
3206          * same address the postmaster used.
3207          */
3208         if (strcmp(argv[1], "-forkbackend") == 0 ||
3209                 strcmp(argv[1], "-forkautovac") == 0 ||
3210                 strcmp(argv[1], "-forkboot") == 0)
3211                 PGSharedMemoryReAttach();
3212
3213         /*
3214          * Start our win32 signal implementation. This has to be done after we
3215          * read the backend variables, because we need to pick up the signal pipe
3216          * from the parent process.
3217          */
3218 #ifdef WIN32
3219         pgwin32_signal_initialize();
3220 #endif
3221
3222         /* In EXEC_BACKEND case we will not have inherited these settings */
3223         pqinitmask();
3224         PG_SETMASK(&BlockSig);
3225
3226         /* Read in remaining GUC variables */
3227         read_nondefault_variables();
3228
3229         /* Run backend or appropriate child */
3230         if (strcmp(argv[1], "-forkbackend") == 0)
3231         {
3232                 /* BackendRun will close sockets */
3233
3234                 /* Attach process to shared data structures */
3235                 CreateSharedMemoryAndSemaphores(false, 0);
3236
3237 #ifdef USE_SSL
3238
3239                 /*
3240                  * Need to reinitialize the SSL library in the backend, since the
3241                  * context structures contain function pointers and cannot be passed
3242                  * through the parameter file.
3243                  */
3244                 if (EnableSSL)
3245                         secure_initialize();
3246 #endif
3247
3248                 Assert(argc == 3);              /* shouldn't be any more args */
3249                 proc_exit(BackendRun(&port));
3250         }
3251         if (strcmp(argv[1], "-forkboot") == 0)
3252         {
3253                 /* Close the postmaster's sockets */
3254                 ClosePostmasterPorts(false);
3255
3256                 /* Attach process to shared data structures */
3257                 CreateSharedMemoryAndSemaphores(false, 0);
3258
3259                 BootstrapMain(argc - 2, argv + 2);
3260                 proc_exit(0);
3261         }
3262         if (strcmp(argv[1], "-forkautovac") == 0)
3263         {
3264                 /* Close the postmaster's sockets */
3265                 ClosePostmasterPorts(false);
3266
3267                 /* Attach process to shared data structures */
3268                 CreateSharedMemoryAndSemaphores(false, 0);
3269
3270                 AutoVacMain(argc - 2, argv + 2);
3271                 proc_exit(0);
3272         }
3273         if (strcmp(argv[1], "-forkarch") == 0)
3274         {
3275                 /* Close the postmaster's sockets */
3276                 ClosePostmasterPorts(false);
3277
3278                 /* Do not want to attach to shared memory */
3279
3280                 PgArchiverMain(argc, argv);
3281                 proc_exit(0);
3282         }
3283         if (strcmp(argv[1], "-forkbuf") == 0)
3284         {
3285                 /* Close the postmaster's sockets */
3286                 ClosePostmasterPorts(false);
3287
3288                 /* Do not want to attach to shared memory */
3289
3290                 PgstatBufferMain(argc, argv);
3291                 proc_exit(0);
3292         }
3293         if (strcmp(argv[1], "-forkcol") == 0)
3294         {
3295                 /*
3296                  * Do NOT close postmaster sockets here, because we are forking from
3297                  * pgstat buffer process, which already did it.
3298                  */
3299
3300                 /* Do not want to attach to shared memory */
3301
3302                 PgstatCollectorMain(argc, argv);
3303                 proc_exit(0);
3304         }
3305         if (strcmp(argv[1], "-forklog") == 0)
3306         {
3307                 /* Close the postmaster's sockets */
3308                 ClosePostmasterPorts(true);
3309
3310                 /* Do not want to attach to shared memory */
3311
3312                 SysLoggerMain(argc, argv);
3313                 proc_exit(0);
3314         }
3315
3316         return 1;                                       /* shouldn't get here */
3317 }
3318 #endif   /* EXEC_BACKEND */
3319
3320
3321 /*
3322  * ExitPostmaster -- cleanup
3323  *
3324  * Do NOT call exit() directly --- always go through here!
3325  */
3326 static void
3327 ExitPostmaster(int status)
3328 {
3329         /* should cleanup shared memory and kill all backends */
3330
3331         /*
3332          * Not sure of the semantics here.      When the Postmaster dies, should the
3333          * backends all be killed? probably not.
3334          *
3335          * MUST         -- vadim 05-10-1999
3336          */
3337
3338         proc_exit(status);
3339 }
3340
3341 /*
3342  * sigusr1_handler - handle signal conditions from child processes
3343  */
3344 static void
3345 sigusr1_handler(SIGNAL_ARGS)
3346 {
3347         int                     save_errno = errno;
3348
3349         PG_SETMASK(&BlockSig);
3350
3351         if (CheckPostmasterSignal(PMSIGNAL_PASSWORD_CHANGE))
3352         {
3353                 /*
3354                  * Authorization file has changed.
3355                  */
3356                 load_role();
3357         }
3358
3359         if (CheckPostmasterSignal(PMSIGNAL_WAKEN_CHILDREN))
3360         {
3361                 /*
3362                  * Send SIGUSR1 to all children (triggers CatchupInterruptHandler).
3363                  * See storage/ipc/sinval[adt].c for the use of this.
3364                  */
3365                 if (Shutdown <= SmartShutdown)
3366                 {
3367                         SignalChildren(SIGUSR1);
3368                         if (AutoVacPID != 0)
3369                                 kill(AutoVacPID, SIGUSR1);
3370                 }
3371         }
3372
3373         if (CheckPostmasterSignal(PMSIGNAL_WAKEN_ARCHIVER) &&
3374                 PgArchPID != 0 && Shutdown == NoShutdown)
3375         {
3376                 /*
3377                  * Send SIGUSR1 to archiver process, to wake it up and begin archiving
3378                  * next transaction log file.
3379                  */
3380                 kill(PgArchPID, SIGUSR1);
3381         }
3382
3383         if (CheckPostmasterSignal(PMSIGNAL_ROTATE_LOGFILE) &&
3384                 SysLoggerPID != 0)
3385         {
3386                 /* Tell syslogger to rotate logfile */
3387                 kill(SysLoggerPID, SIGUSR1);
3388         }
3389
3390         PG_SETMASK(&UnBlockSig);
3391
3392         errno = save_errno;
3393 }
3394
3395
3396 /*
3397  * Dummy signal handler
3398  *
3399  * We use this for signals that we don't actually use in the postmaster,
3400  * but we do use in backends.  If we were to SIG_IGN such signals in the
3401  * postmaster, then a newly started backend might drop a signal that arrives
3402  * before it's able to reconfigure its signal processing.  (See notes in
3403  * tcop/postgres.c.)
3404  */
3405 static void
3406 dummy_handler(SIGNAL_ARGS)
3407 {
3408 }
3409
3410
3411 /*
3412  * CharRemap: given an int in range 0..61, produce textual encoding of it
3413  * per crypt(3) conventions.
3414  */
3415 static char
3416 CharRemap(long ch)
3417 {
3418         if (ch < 0)
3419                 ch = -ch;
3420         ch = ch % 62;
3421
3422         if (ch < 26)
3423                 return 'A' + ch;
3424
3425         ch -= 26;
3426         if (ch < 26)
3427                 return 'a' + ch;
3428
3429         ch -= 26;
3430         return '0' + ch;
3431 }
3432
3433 /*
3434  * RandomSalt
3435  */
3436 static void
3437 RandomSalt(char *cryptSalt, char *md5Salt)
3438 {
3439         long            rand = PostmasterRandom();
3440
3441         cryptSalt[0] = CharRemap(rand % 62);
3442         cryptSalt[1] = CharRemap(rand / 62);
3443
3444         /*
3445          * It's okay to reuse the first random value for one of the MD5 salt
3446          * bytes, since only one of the two salts will be sent to the client.
3447          * After that we need to compute more random bits.
3448          *
3449          * We use % 255, sacrificing one possible byte value, so as to ensure that
3450          * all bits of the random() value participate in the result. While at it,
3451          * add one to avoid generating any null bytes.
3452          */
3453         md5Salt[0] = (rand % 255) + 1;
3454         rand = PostmasterRandom();
3455         md5Salt[1] = (rand % 255) + 1;
3456         rand = PostmasterRandom();
3457         md5Salt[2] = (rand % 255) + 1;
3458         rand = PostmasterRandom();
3459         md5Salt[3] = (rand % 255) + 1;
3460 }
3461
3462 /*
3463  * PostmasterRandom
3464  */
3465 static long
3466 PostmasterRandom(void)
3467 {
3468         static bool initialized = false;
3469
3470         if (!initialized)
3471         {
3472                 Assert(random_seed != 0);
3473                 srandom(random_seed);
3474                 initialized = true;
3475         }
3476
3477         return random();
3478 }
3479
3480 /*
3481  * Count up number of child processes (regular backends only)
3482  */
3483 static int
3484 CountChildren(void)
3485 {
3486         Dlelem     *curr;
3487         int                     cnt = 0;
3488
3489         for (curr = DLGetHead(BackendList); curr; curr = DLGetSucc(curr))
3490                 cnt++;
3491         return cnt;
3492 }
3493
3494
3495 /*
3496  * StartChildProcess -- start a non-backend child process for the postmaster
3497  *
3498  * xlop determines what kind of child will be started.  All child types
3499  * initially go to BootstrapMain, which will handle common setup.
3500  *
3501  * Return value of StartChildProcess is subprocess' PID, or 0 if failed
3502  * to start subprocess.
3503  */
3504 static pid_t
3505 StartChildProcess(int xlop)
3506 {
3507         pid_t           pid;
3508         char       *av[10];
3509         int                     ac = 0;
3510         char            xlbuf[32];
3511
3512         /*
3513          * Set up command-line arguments for subprocess
3514          */
3515         av[ac++] = "postgres";
3516
3517 #ifdef EXEC_BACKEND
3518         av[ac++] = "-forkboot";
3519         av[ac++] = NULL;                        /* filled in by postmaster_forkexec */
3520 #endif
3521
3522         snprintf(xlbuf, sizeof(xlbuf), "-x%d", xlop);
3523         av[ac++] = xlbuf;
3524
3525         av[ac++] = "-p";
3526         av[ac++] = "template1";
3527
3528         av[ac] = NULL;
3529         Assert(ac < lengthof(av));
3530
3531 #ifdef EXEC_BACKEND
3532         pid = postmaster_forkexec(ac, av);
3533 #else                                                   /* !EXEC_BACKEND */
3534         pid = fork_process();
3535
3536         if (pid == 0)                           /* child */
3537         {
3538                 IsUnderPostmaster = true;               /* we are a postmaster subprocess now */
3539
3540                 /* Close the postmaster's sockets */
3541                 ClosePostmasterPorts(false);
3542
3543                 /* Lose the postmaster's on-exit routines and port connections */
3544                 on_exit_reset();
3545
3546                 /* Release postmaster's working memory context */
3547                 MemoryContextSwitchTo(TopMemoryContext);
3548                 MemoryContextDelete(PostmasterContext);
3549                 PostmasterContext = NULL;
3550
3551                 BootstrapMain(ac, av);
3552                 ExitPostmaster(0);
3553         }
3554 #endif   /* EXEC_BACKEND */
3555
3556         if (pid < 0)
3557         {
3558                 /* in parent, fork failed */
3559                 int                     save_errno = errno;
3560
3561                 errno = save_errno;
3562                 switch (xlop)
3563                 {
3564                         case BS_XLOG_STARTUP:
3565                                 ereport(LOG,
3566                                                 (errmsg("could not fork startup process: %m")));
3567                                 break;
3568                         case BS_XLOG_BGWRITER:
3569                                 ereport(LOG,
3570                                    (errmsg("could not fork background writer process: %m")));
3571                                 break;
3572                         default:
3573                                 ereport(LOG,
3574                                                 (errmsg("could not fork process: %m")));
3575                                 break;
3576                 }
3577
3578                 /*
3579                  * fork failure is fatal during startup, but there's no need to choke
3580                  * immediately if starting other child types fails.
3581                  */
3582                 if (xlop == BS_XLOG_STARTUP)
3583                         ExitPostmaster(1);
3584                 return 0;
3585         }
3586
3587         /*
3588          * in parent, successful fork
3589          */
3590         return pid;
3591 }
3592
3593
3594 /*
3595  * Create the opts file
3596  */
3597 static bool
3598 CreateOptsFile(int argc, char *argv[], char *fullprogname)
3599 {
3600         FILE       *fp;
3601         int                     i;
3602
3603 #define OPTS_FILE       "postmaster.opts"
3604
3605         if ((fp = fopen(OPTS_FILE, "w")) == NULL)
3606         {
3607                 elog(LOG, "could not create file \"%s\": %m", OPTS_FILE);
3608                 return false;
3609         }
3610
3611         fprintf(fp, "%s", fullprogname);
3612         for (i = 1; i < argc; i++)
3613                 fprintf(fp, " %s%s%s", SYSTEMQUOTE, argv[i], SYSTEMQUOTE);
3614         fputs("\n", fp);
3615
3616         if (fclose(fp))
3617         {
3618                 elog(LOG, "could not write file \"%s\": %m", OPTS_FILE);
3619                 return false;
3620         }
3621
3622         return true;
3623 }
3624
3625
3626 #ifdef EXEC_BACKEND
3627
3628 /*
3629  * The following need to be available to the save/restore_backend_variables
3630  * functions
3631  */
3632 extern slock_t *ShmemLock;
3633 extern slock_t *ShmemIndexLock;
3634 extern void *ShmemIndexAlloc;
3635 extern LWLock *LWLockArray;
3636 extern slock_t *ProcStructLock;
3637 extern int      pgStatSock;
3638 extern int      pgStatPipe[2];
3639
3640 #ifndef WIN32
3641 #define write_inheritable_socket(dest, src, childpid) (*(dest) = (src))
3642 #define read_inheritable_socket(dest, src) (*(dest) = *(src))
3643 #else
3644 static void write_duplicated_handle(HANDLE * dest, HANDLE src, HANDLE child);
3645 static void write_inheritable_socket(InheritableSocket * dest, SOCKET src,
3646                                                  pid_t childPid);
3647 static void read_inheritable_socket(SOCKET * dest, InheritableSocket * src);
3648 #endif
3649
3650
3651 /* Save critical backend variables into the BackendParameters struct */
3652 #ifndef WIN32
3653 static bool
3654 save_backend_variables(BackendParameters * param, Port *port)
3655 #else
3656 static bool
3657 save_backend_variables(BackendParameters * param, Port *port,
3658                                            HANDLE childProcess, pid_t childPid)
3659 #endif
3660 {
3661         memcpy(&param->port, port, sizeof(Port));
3662         write_inheritable_socket(&param->portsocket, port->sock, childPid);
3663
3664         StrNCpy(param->DataDir, DataDir, MAXPGPATH);
3665
3666         memcpy(&param->ListenSocket, &ListenSocket, sizeof(ListenSocket));
3667
3668         param->MyCancelKey = MyCancelKey;
3669
3670         param->UsedShmemSegID = UsedShmemSegID;
3671         param->UsedShmemSegAddr = UsedShmemSegAddr;
3672
3673         param->ShmemLock = ShmemLock;
3674         param->ShmemIndexLock = ShmemIndexLock;
3675         param->ShmemVariableCache = ShmemVariableCache;
3676         param->ShmemIndexAlloc = ShmemIndexAlloc;
3677         param->ShmemBackendArray = ShmemBackendArray;
3678
3679         param->LWLockArray = LWLockArray;
3680         param->ProcStructLock = ProcStructLock;
3681         write_inheritable_socket(&param->pgStatSock, pgStatSock, childPid);
3682         write_inheritable_socket(&param->pgStatPipe0, pgStatPipe[0], childPid);
3683         write_inheritable_socket(&param->pgStatPipe1, pgStatPipe[1], childPid);
3684
3685         param->PostmasterPid = PostmasterPid;
3686         param->PgStartTime = PgStartTime;
3687
3688 #ifdef WIN32
3689         param->PostmasterHandle = PostmasterHandle;
3690         write_duplicated_handle(&param->initial_signal_pipe,
3691                                                         pgwin32_create_signal_listener(childPid),
3692                                                         childProcess);
3693 #endif
3694
3695         memcpy(&param->syslogPipe, &syslogPipe, sizeof(syslogPipe));
3696
3697         StrNCpy(param->my_exec_path, my_exec_path, MAXPGPATH);
3698
3699         StrNCpy(param->pkglib_path, pkglib_path, MAXPGPATH);
3700
3701         StrNCpy(param->ExtraOptions, ExtraOptions, MAXPGPATH);
3702
3703         StrNCpy(param->lc_collate, setlocale(LC_COLLATE, NULL), LOCALE_NAME_BUFLEN);
3704         StrNCpy(param->lc_ctype, setlocale(LC_CTYPE, NULL), LOCALE_NAME_BUFLEN);
3705
3706         return true;
3707 }
3708
3709
3710 #ifdef WIN32
3711 /*
3712  * Duplicate a handle for usage in a child process, and write the child
3713  * process instance of the handle to the parameter file.
3714  */
3715 static void
3716 write_duplicated_handle(HANDLE * dest, HANDLE src, HANDLE childProcess)
3717 {
3718         HANDLE          hChild = INVALID_HANDLE_VALUE;
3719
3720         if (!DuplicateHandle(GetCurrentProcess(),
3721                                                  src,
3722                                                  childProcess,
3723                                                  &hChild,
3724                                                  0,
3725                                                  TRUE,
3726                                                  DUPLICATE_CLOSE_SOURCE | DUPLICATE_SAME_ACCESS))
3727                 ereport(ERROR,
3728                                 (errmsg_internal("could not duplicate handle to be written to backend parameter file: error code %d",
3729                                                                  (int) GetLastError())));
3730
3731         *dest = hChild;
3732 }
3733
3734 /*
3735  * Duplicate a socket for usage in a child process, and write the resulting
3736  * structure to the parameter file.
3737  * This is required because a number of LSPs (Layered Service Providers) very
3738  * common on Windows (antivirus, firewalls, download managers etc) break
3739  * straight socket inheritance.
3740  */
3741 static void
3742 write_inheritable_socket(InheritableSocket * dest, SOCKET src, pid_t childpid)
3743 {
3744         dest->origsocket = src;
3745         if (src != 0 && src != -1)
3746         {
3747                 /* Actual socket */
3748                 if (WSADuplicateSocket(src, childpid, &dest->wsainfo) != 0)
3749                         ereport(ERROR,
3750                                         (errmsg("could not duplicate socket %d for use in backend: error code %d",
3751                                                         src, WSAGetLastError())));
3752         }
3753 }
3754
3755 /*
3756  * Read a duplicate socket structure back, and get the socket descriptor.
3757  */
3758 static void
3759 read_inheritable_socket(SOCKET * dest, InheritableSocket * src)
3760 {
3761         SOCKET          s;
3762
3763         if (src->origsocket == -1 || src->origsocket == 0)
3764         {
3765                 /* Not a real socket! */
3766                 *dest = src->origsocket;
3767         }
3768         else
3769         {
3770                 /* Actual socket, so create from structure */
3771                 s = WSASocket(FROM_PROTOCOL_INFO,
3772                                           FROM_PROTOCOL_INFO,
3773                                           FROM_PROTOCOL_INFO,
3774                                           &src->wsainfo,
3775                                           0,
3776                                           0);
3777                 if (s == INVALID_SOCKET)
3778                 {
3779                         write_stderr("could not create inherited socket: error code %d\n",
3780                                                  WSAGetLastError());
3781                         exit(1);
3782                 }
3783                 *dest = s;
3784
3785                 /*
3786                  * To make sure we don't get two references to the same socket, close
3787                  * the original one. (This would happen when inheritance actually
3788                  * works..
3789                  */
3790                 closesocket(src->origsocket);
3791         }
3792 }
3793 #endif
3794
3795 static void
3796 read_backend_variables(char *id, Port *port)
3797 {
3798         BackendParameters param;
3799
3800 #ifndef WIN32
3801         /* Non-win32 implementation reads from file */
3802         FILE       *fp;
3803
3804         /* Open file */
3805         fp = AllocateFile(id, PG_BINARY_R);
3806         if (!fp)
3807         {
3808                 write_stderr("could not read from backend variables file \"%s\": %s\n",
3809                                          id, strerror(errno));
3810                 exit(1);
3811         }
3812
3813         if (fread(&param, sizeof(param), 1, fp) != 1)
3814         {
3815                 write_stderr("could not read from backend variables file \"%s\": %s\n",
3816                                          id, strerror(errno));
3817                 exit(1);
3818         }
3819
3820         /* Release file */
3821         FreeFile(fp);
3822         if (unlink(id) != 0)
3823         {
3824                 write_stderr("could not remove file \"%s\": %s\n",
3825                                          id, strerror(errno));
3826                 exit(1);
3827         }
3828 #else
3829         /* Win32 version uses mapped file */
3830         HANDLE          paramHandle;
3831         BackendParameters *paramp;
3832
3833         paramHandle = (HANDLE) atol(id);
3834         paramp = MapViewOfFile(paramHandle, FILE_MAP_READ, 0, 0, 0);
3835         if (!paramp)
3836         {
3837                 write_stderr("could not map view of backend variables: error code %d\n",
3838                                          (int) GetLastError());
3839                 exit(1);
3840         }
3841
3842         memcpy(&param, paramp, sizeof(BackendParameters));
3843
3844         if (!UnmapViewOfFile(paramp))
3845         {
3846                 write_stderr("could not unmap view of backend variables: error code %d\n",
3847                                          (int) GetLastError());
3848                 exit(1);
3849         }
3850
3851         if (!CloseHandle(paramHandle))
3852         {
3853                 write_stderr("could not close handle to backend parameter variables: error code %d\n",
3854                                          (int) GetLastError());
3855                 exit(1);
3856         }
3857 #endif
3858
3859         restore_backend_variables(&param, port);
3860 }
3861
3862 /* Restore critical backend variables from the BackendParameters struct */
3863 static void
3864 restore_backend_variables(BackendParameters * param, Port *port)
3865 {
3866         memcpy(port, &param->port, sizeof(Port));
3867         read_inheritable_socket(&port->sock, &param->portsocket);
3868
3869         SetDataDir(param->DataDir);
3870
3871         memcpy(&ListenSocket, &param->ListenSocket, sizeof(ListenSocket));
3872
3873         MyCancelKey = param->MyCancelKey;
3874
3875         UsedShmemSegID = param->UsedShmemSegID;
3876         UsedShmemSegAddr = param->UsedShmemSegAddr;
3877
3878         ShmemLock = param->ShmemLock;
3879         ShmemIndexLock = param->ShmemIndexLock;
3880         ShmemVariableCache = param->ShmemVariableCache;
3881         ShmemIndexAlloc = param->ShmemIndexAlloc;
3882         ShmemBackendArray = param->ShmemBackendArray;
3883
3884         LWLockArray = param->LWLockArray;
3885         ProcStructLock = param->ProcStructLock;
3886         read_inheritable_socket(&pgStatSock, &param->pgStatSock);
3887         read_inheritable_socket(&pgStatPipe[0], &param->pgStatPipe0);
3888         read_inheritable_socket(&pgStatPipe[1], &param->pgStatPipe1);
3889
3890         PostmasterPid = param->PostmasterPid;
3891         PgStartTime = param->PgStartTime;
3892
3893 #ifdef WIN32
3894         PostmasterHandle = param->PostmasterHandle;
3895         pgwin32_initial_signal_pipe = param->initial_signal_pipe;
3896 #endif
3897
3898         memcpy(&syslogPipe, &param->syslogPipe, sizeof(syslogPipe));
3899
3900         StrNCpy(my_exec_path, param->my_exec_path, MAXPGPATH);
3901
3902         StrNCpy(pkglib_path, param->pkglib_path, MAXPGPATH);
3903
3904         StrNCpy(ExtraOptions, param->ExtraOptions, MAXPGPATH);
3905
3906         setlocale(LC_COLLATE, param->lc_collate);
3907         setlocale(LC_CTYPE, param->lc_ctype);
3908 }
3909
3910
3911 Size
3912 ShmemBackendArraySize(void)
3913 {
3914         return mul_size(NUM_BACKENDARRAY_ELEMS, sizeof(Backend));
3915 }
3916
3917 void
3918 ShmemBackendArrayAllocation(void)
3919 {
3920         Size            size = ShmemBackendArraySize();
3921
3922         ShmemBackendArray = (Backend *) ShmemAlloc(size);
3923         /* Mark all slots as empty */
3924         memset(ShmemBackendArray, 0, size);
3925 }
3926
3927 static void
3928 ShmemBackendArrayAdd(Backend *bn)
3929 {
3930         int                     i;
3931
3932         /* Find an empty slot */
3933         for (i = 0; i < NUM_BACKENDARRAY_ELEMS; i++)
3934         {
3935                 if (ShmemBackendArray[i].pid == 0)
3936                 {
3937                         ShmemBackendArray[i] = *bn;
3938                         return;
3939                 }
3940         }
3941
3942         ereport(FATAL,
3943                         (errmsg_internal("no free slots in shmem backend array")));
3944 }
3945
3946 static void
3947 ShmemBackendArrayRemove(pid_t pid)
3948 {
3949         int                     i;
3950
3951         for (i = 0; i < NUM_BACKENDARRAY_ELEMS; i++)
3952         {
3953                 if (ShmemBackendArray[i].pid == pid)
3954                 {
3955                         /* Mark the slot as empty */
3956                         ShmemBackendArray[i].pid = 0;
3957                         return;
3958                 }
3959         }
3960
3961         ereport(WARNING,
3962                         (errmsg_internal("could not find backend entry with pid %d",
3963                                                          (int) pid)));
3964 }
3965 #endif   /* EXEC_BACKEND */
3966
3967
3968 #ifdef WIN32
3969
3970 /*
3971  * Note: The following three functions must not be interrupted (eg. by
3972  * signals).  As the Postgres Win32 signalling architecture (currently)
3973  * requires polling, or APC checking functions which aren't used here, this
3974  * is not an issue.
3975  *
3976  * We keep two separate arrays, instead of a single array of pid/HANDLE
3977  * structs, to avoid having to re-create a handle array for
3978  * WaitForMultipleObjects on each call to win32_waitpid.
3979  */
3980
3981 static void
3982 win32_AddChild(pid_t pid, HANDLE handle)
3983 {
3984         Assert(win32_childPIDArray && win32_childHNDArray);
3985         if (win32_numChildren < NUM_BACKENDARRAY_ELEMS)
3986         {
3987                 win32_childPIDArray[win32_numChildren] = pid;
3988                 win32_childHNDArray[win32_numChildren] = handle;
3989                 ++win32_numChildren;
3990         }
3991         else
3992                 ereport(FATAL,
3993                                 (errmsg_internal("no room for child entry with pid %lu",
3994                                                                  (unsigned long) pid)));
3995 }
3996
3997 static void
3998 win32_RemoveChild(pid_t pid)
3999 {
4000         int                     i;
4001
4002         Assert(win32_childPIDArray && win32_childHNDArray);
4003
4004         for (i = 0; i < win32_numChildren; i++)
4005         {
4006                 if (win32_childPIDArray[i] == pid)
4007                 {
4008                         CloseHandle(win32_childHNDArray[i]);
4009
4010                         /* Swap last entry into the "removed" one */
4011                         --win32_numChildren;
4012                         win32_childPIDArray[i] = win32_childPIDArray[win32_numChildren];
4013                         win32_childHNDArray[i] = win32_childHNDArray[win32_numChildren];
4014                         return;
4015                 }
4016         }
4017
4018         ereport(WARNING,
4019                         (errmsg_internal("could not find child entry with pid %lu",
4020                                                          (unsigned long) pid)));
4021 }
4022
4023 static pid_t
4024 win32_waitpid(int *exitstatus)
4025 {
4026         /*
4027          * Note: Do NOT use WaitForMultipleObjectsEx, as we don't want to run
4028          * queued APCs here.
4029          */
4030         int                     index;
4031         DWORD           exitCode;
4032         DWORD           ret;
4033         unsigned long offset;
4034
4035         Assert(win32_childPIDArray && win32_childHNDArray);
4036         elog(DEBUG3, "waiting on %lu children", win32_numChildren);
4037
4038         for (offset = 0; offset < win32_numChildren; offset += MAXIMUM_WAIT_OBJECTS)
4039         {
4040                 unsigned long num = Min(MAXIMUM_WAIT_OBJECTS, win32_numChildren - offset);
4041
4042                 ret = WaitForMultipleObjects(num, &win32_childHNDArray[offset], FALSE, 0);
4043                 switch (ret)
4044                 {
4045                         case WAIT_FAILED:
4046                                 ereport(LOG,
4047                                                 (errmsg_internal("failed to wait on %lu of %lu children: error code %d",
4048                                                          num, win32_numChildren, (int) GetLastError())));
4049                                 return -1;
4050
4051                         case WAIT_TIMEOUT:
4052                                 /* No children (in this chunk) have finished */
4053                                 break;
4054
4055                         default:
4056
4057                                 /*
4058                                  * Get the exit code, and return the PID of, the respective
4059                                  * process
4060                                  */
4061                                 index = offset + ret - WAIT_OBJECT_0;
4062                                 Assert(index >= 0 && index < win32_numChildren);
4063                                 if (!GetExitCodeProcess(win32_childHNDArray[index], &exitCode))
4064                                 {
4065                                         /*
4066                                          * If we get this far, this should never happen, but, then
4067                                          * again... No choice other than to assume a catastrophic
4068                                          * failure.
4069                                          */
4070                                         ereport(FATAL,
4071                                         (errmsg_internal("failed to get exit code for child %lu",
4072                                                            (unsigned long) win32_childPIDArray[index])));
4073                                 }
4074                                 *exitstatus = (int) exitCode;
4075                                 return win32_childPIDArray[index];
4076                 }
4077         }
4078
4079         /* No children have finished */
4080         return -1;
4081 }
4082
4083 /*
4084  * Note! Code below executes on separate threads, one for
4085  * each child process created
4086  */
4087 static DWORD WINAPI
4088 win32_sigchld_waiter(LPVOID param)
4089 {
4090         HANDLE          procHandle = (HANDLE) param;
4091
4092         DWORD           r = WaitForSingleObject(procHandle, INFINITE);
4093
4094         if (r == WAIT_OBJECT_0)
4095                 pg_queue_signal(SIGCHLD);
4096         else
4097                 write_stderr("could not wait on child process handle: error code %d\n",
4098                                          (int) GetLastError());
4099         CloseHandle(procHandle);
4100         return 0;
4101 }
4102
4103 #endif   /* WIN32 */