]> granicus.if.org Git - postgresql/blob - src/backend/postmaster/postmaster.c
Remove GUC USERLIMIT variable category, making the affected variables
[postgresql] / src / backend / postmaster / postmaster.c
1 /*-------------------------------------------------------------------------
2  *
3  * postmaster.c
4  *        This program acts as a clearing house for requests to the
5  *        POSTGRES system.      Frontend programs send a startup message
6  *        to the Postmaster and the postmaster uses the info in the
7  *        message to setup a backend process.
8  *
9  *        The postmaster also manages system-wide operations such as
10  *        startup and shutdown. The postmaster itself doesn't do those
11  *        operations, mind you --- it just forks off a subprocess to do them
12  *        at the right times.  It also takes care of resetting the system
13  *        if a backend crashes.
14  *
15  *        The postmaster process creates the shared memory and semaphore
16  *        pools during startup, but as a rule does not touch them itself.
17  *        In particular, it is not a member of the PGPROC array of backends
18  *        and so it cannot participate in lock-manager operations.      Keeping
19  *        the postmaster away from shared memory operations makes it simpler
20  *        and more reliable.  The postmaster is almost always able to recover
21  *        from crashes of individual backends by resetting shared memory;
22  *        if it did much with shared memory then it would be prone to crashing
23  *        along with the backends.
24  *
25  *        When a request message is received, we now fork() immediately.
26  *        The child process performs authentication of the request, and
27  *        then becomes a backend if successful.  This allows the auth code
28  *        to be written in a simple single-threaded style (as opposed to the
29  *        crufty "poor man's multitasking" code that used to be needed).
30  *        More importantly, it ensures that blockages in non-multithreaded
31  *        libraries like SSL or PAM cannot cause denial of service to other
32  *        clients.
33  *
34  *
35  * Portions Copyright (c) 1996-2004, PostgreSQL Global Development Group
36  * Portions Copyright (c) 1994, Regents of the University of California
37  *
38  *
39  * IDENTIFICATION
40  *        $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.438 2004/11/14 19:35:30 tgl Exp $
41  *
42  * NOTES
43  *
44  * Initialization:
45  *              The Postmaster sets up shared memory data structures
46  *              for the backends.
47  *
48  * Synchronization:
49  *              The Postmaster shares memory with the backends but should avoid
50  *              touching shared memory, so as not to become stuck if a crashing
51  *              backend screws up locks or shared memory.  Likewise, the Postmaster
52  *              should never block on messages from frontend clients.
53  *
54  * Garbage Collection:
55  *              The Postmaster cleans up after backends if they have an emergency
56  *              exit and/or core dump.
57  *
58  * Error Reporting:
59  *              Use write_stderr() only for reporting "interactive" errors
60  *              (essentially, bogus arguments on the command line).  Once the
61  *              postmaster is launched, use ereport().  In particular, don't use
62  *              write_stderr() for anything that occurs after pmdaemonize.
63  *
64  *-------------------------------------------------------------------------
65  */
66
67 #include "postgres.h"
68
69 #include <unistd.h>
70 #include <signal.h>
71 #include <time.h>
72 #include <sys/wait.h>
73 #include <ctype.h>
74 #include <sys/stat.h>
75 #include <sys/socket.h>
76 #include <fcntl.h>
77 #include <sys/param.h>
78 #include <netinet/in.h>
79 #include <arpa/inet.h>
80 #include <netdb.h>
81 #include <limits.h>
82
83 #ifdef HAVE_SYS_SELECT_H
84 #include <sys/select.h>
85 #endif
86
87 #ifdef HAVE_GETOPT_H
88 #include <getopt.h>
89 #endif
90
91 #ifdef USE_RENDEZVOUS
92 #include <DNSServiceDiscovery/DNSServiceDiscovery.h>
93 #endif
94
95 #include "catalog/pg_database.h"
96 #include "commands/async.h"
97 #include "lib/dllist.h"
98 #include "libpq/auth.h"
99 #include "libpq/crypt.h"
100 #include "libpq/libpq.h"
101 #include "libpq/pqcomm.h"
102 #include "libpq/pqsignal.h"
103 #include "miscadmin.h"
104 #include "nodes/nodes.h"
105 #include "postmaster/postmaster.h"
106 #include "postmaster/pgarch.h"
107 #include "postmaster/syslogger.h"
108 #include "storage/fd.h"
109 #include "storage/ipc.h"
110 #include "storage/pg_shmem.h"
111 #include "storage/pmsignal.h"
112 #include "storage/proc.h"
113 #include "storage/bufmgr.h"
114 #include "access/xlog.h"
115 #include "tcop/tcopprot.h"
116 #include "utils/builtins.h"
117 #include "utils/guc.h"
118 #include "utils/memutils.h"
119 #include "utils/ps_status.h"
120 #include "bootstrap/bootstrap.h"
121 #include "pgstat.h"
122
123
124 /*
125  * List of active backends (or child processes anyway; we don't actually
126  * know whether a given child has become a backend or is still in the
127  * authorization phase).  This is used mainly to keep track of how many
128  * children we have and send them appropriate signals when necessary.
129  *
130  * "Special" children such as the startup and bgwriter tasks are not in
131  * this list.
132  */
133 typedef struct bkend
134 {
135         pid_t           pid;                    /* process id of backend */
136         long            cancel_key;             /* cancel key for cancels for this backend */
137 } Backend;
138
139 static Dllist *BackendList;
140
141 #ifdef EXEC_BACKEND
142 #define NUM_BACKENDARRAY_ELEMS (2*MaxBackends)
143 static Backend *ShmemBackendArray;
144 #endif
145
146 /* The socket number we are listening for connections on */
147 int                     PostPortNumber;
148 char       *UnixSocketDir;
149 char       *ListenAddresses;
150
151 /*
152  * ReservedBackends is the number of backends reserved for superuser use.
153  * This number is taken out of the pool size given by MaxBackends so
154  * number of backend slots available to non-superusers is
155  * (MaxBackends - ReservedBackends).  Note what this really means is
156  * "if there are <= ReservedBackends connections available, only superusers
157  * can make new connections" --- pre-existing superuser connections don't
158  * count against the limit.
159  */
160 int                     ReservedBackends;
161
162
163 static const char *progname = NULL;
164
165 /* The socket(s) we're listening to. */
166 #define MAXLISTEN       10
167 static int      ListenSocket[MAXLISTEN];
168
169 /*
170  * Set by the -o option
171  */
172 static char ExtraOptions[MAXPGPATH];
173
174 /*
175  * These globals control the behavior of the postmaster in case some
176  * backend dumps core.  Normally, it kills all peers of the dead backend
177  * and reinitializes shared memory.  By specifying -s or -n, we can have
178  * the postmaster stop (rather than kill) peers and not reinitialize
179  * shared data structures.
180  */
181 static bool Reinit = true;
182 static int      SendStop = false;
183
184 /* still more option variables */
185 bool            EnableSSL = false;
186 bool            SilentMode = false; /* silent mode (-S) */
187
188 int                     PreAuthDelay = 0;
189 int                     AuthenticationTimeout = 60;
190
191 bool            log_hostname;           /* for ps display and logging */
192 bool            Log_connections = false;
193 bool            Db_user_namespace = false;
194
195 char       *rendezvous_name;
196
197 /* list of library:init-function to be preloaded */
198 char       *preload_libraries_string = NULL;
199
200 /* PIDs of special child processes; 0 when not running */
201 static pid_t StartupPID = 0,
202                         BgWriterPID = 0,
203                         PgArchPID = 0,
204                         PgStatPID = 0,
205                         SysLoggerPID = 0;
206
207 /* Startup/shutdown state */
208 #define                 NoShutdown              0
209 #define                 SmartShutdown   1
210 #define                 FastShutdown    2
211
212 static int      Shutdown = NoShutdown;
213
214 static bool FatalError = false; /* T if recovering from backend crash */
215
216 bool            ClientAuthInProgress = false;           /* T during new-client
217                                                                                                  * authentication */
218
219 /*
220  * State for assigning random salts and cancel keys.
221  * Also, the global MyCancelKey passes the cancel key assigned to a given
222  * backend from the postmaster to that backend (via fork).
223  */
224 static unsigned int random_seed = 0;
225
226 extern char *optarg;
227 extern int      optind,
228                         opterr;
229
230 #ifdef HAVE_INT_OPTRESET
231 extern int      optreset;
232 #endif
233
234 /*
235  * postmaster.c - function prototypes
236  */
237 static void checkDataDir(void);
238
239 #ifdef USE_RENDEZVOUS
240 static void reg_reply(DNSServiceRegistrationReplyErrorType errorCode,
241                   void *context);
242 #endif
243 static void pmdaemonize(void);
244 static Port *ConnCreate(int serverFd);
245 static void ConnFree(Port *port);
246 static void reset_shared(unsigned short port);
247 static void SIGHUP_handler(SIGNAL_ARGS);
248 static void pmdie(SIGNAL_ARGS);
249 static void reaper(SIGNAL_ARGS);
250 static void sigusr1_handler(SIGNAL_ARGS);
251 static void dummy_handler(SIGNAL_ARGS);
252 static void CleanupBackend(int pid, int exitstatus);
253 static void HandleChildCrash(int pid, int exitstatus, const char *procname);
254 static void LogChildExit(int lev, const char *procname,
255                          int pid, int exitstatus);
256 static int      BackendRun(Port *port);
257 static void ExitPostmaster(int status);
258 static void usage(const char *);
259 static int      ServerLoop(void);
260 static int      BackendStartup(Port *port);
261 static int      ProcessStartupPacket(Port *port, bool SSLdone);
262 static void processCancelRequest(Port *port, void *pkt);
263 static int      initMasks(fd_set *rmask);
264 static void report_fork_failure_to_client(Port *port, int errnum);
265 static enum CAC_state canAcceptConnections(void);
266 static long PostmasterRandom(void);
267 static void RandomSalt(char *cryptSalt, char *md5Salt);
268 static void SignalChildren(int signal);
269 static int      CountChildren(void);
270 static bool CreateOptsFile(int argc, char *argv[], char *fullprogname);
271 static pid_t StartChildProcess(int xlop);
272
273 #ifdef EXEC_BACKEND
274
275 #ifdef WIN32
276 static pid_t win32_forkexec(const char *path, char *argv[]);
277 static void win32_AddChild(pid_t pid, HANDLE handle);
278 static void win32_RemoveChild(pid_t pid);
279 static pid_t win32_waitpid(int *exitstatus);
280 static DWORD WINAPI win32_sigchld_waiter(LPVOID param);
281
282 static pid_t *win32_childPIDArray;
283 static HANDLE *win32_childHNDArray;
284 static unsigned long win32_numChildren = 0;
285
286 HANDLE          PostmasterHandle;
287 #endif
288
289 static pid_t backend_forkexec(Port *port);
290 static pid_t internal_forkexec(int argc, char *argv[], Port *port);
291
292 static void read_backend_variables(char *filename, Port *port);
293 static bool write_backend_variables(char *filename, Port *port);
294
295 static void ShmemBackendArrayAdd(Backend *bn);
296 static void ShmemBackendArrayRemove(pid_t pid);
297 #endif   /* EXEC_BACKEND */
298
299 #define StartupDataBase()               StartChildProcess(BS_XLOG_STARTUP)
300 #define StartBackgroundWriter() StartChildProcess(BS_XLOG_BGWRITER)
301
302
303 /*
304  * Postmaster main entry point
305  */
306 int
307 PostmasterMain(int argc, char *argv[])
308 {
309         int                     opt;
310         int                     status;
311         char       *userDoption = NULL;
312         int                     i;
313
314         /* This will call exit() if strdup() fails. */
315         progname = get_progname(argv[0]);       
316
317         MyProcPid = PostmasterPid = getpid();
318
319         IsPostmasterEnvironment = true;
320
321         /*
322          * Catch standard options before doing much else.  This even works on
323          * systems without getopt_long.
324          */
325         if (argc > 1)
326         {
327                 if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
328                 {
329                         usage(progname);
330                         ExitPostmaster(0);
331                 }
332                 if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
333                 {
334                         puts("postmaster (PostgreSQL) " PG_VERSION);
335                         ExitPostmaster(0);
336                 }
337         }
338
339         /*
340          * for security, no dir or file created can be group or other
341          * accessible
342          */
343         umask((mode_t) 0077);
344
345         /*
346          * Fire up essential subsystems: memory management
347          */
348         MemoryContextInit();
349
350         /*
351          * By default, palloc() requests in the postmaster will be allocated
352          * in the PostmasterContext, which is space that can be recycled by
353          * backends.  Allocated data that needs to be available to backends
354          * should be allocated in TopMemoryContext.
355          */
356         PostmasterContext = AllocSetContextCreate(TopMemoryContext,
357                                                                                           "Postmaster",
358                                                                                           ALLOCSET_DEFAULT_MINSIZE,
359                                                                                           ALLOCSET_DEFAULT_INITSIZE,
360                                                                                           ALLOCSET_DEFAULT_MAXSIZE);
361         MemoryContextSwitchTo(PostmasterContext);
362
363         IgnoreSystemIndexes(false);
364
365         if (find_my_exec(argv[0], my_exec_path) < 0)
366                 elog(FATAL, "%s: could not locate my own executable path",
367                          argv[0]);
368
369         get_pkglib_path(my_exec_path, pkglib_path);
370
371         /*
372          * Options setup
373          */
374         InitializeGUCOptions();
375
376         opterr = 1;
377
378         while ((opt = getopt(argc, argv, "A:a:B:b:c:D:d:Fh:ik:lm:MN:no:p:Ss-:")) != -1)
379         {
380                 switch (opt)
381                 {
382                         case 'A':
383 #ifdef USE_ASSERT_CHECKING
384                                 SetConfigOption("debug_assertions", optarg, PGC_POSTMASTER, PGC_S_ARGV);
385 #else
386                                 write_stderr("%s: assert checking is not compiled in\n", progname);
387 #endif
388                                 break;
389                         case 'a':
390                                 /* Can no longer set authentication method. */
391                                 break;
392                         case 'B':
393                                 SetConfigOption("shared_buffers", optarg, PGC_POSTMASTER, PGC_S_ARGV);
394                                 break;
395                         case 'b':
396                                 /* Can no longer set the backend executable file to use. */
397                                 break;
398                         case 'D':
399                                 userDoption = optarg;
400                                 break;
401                         case 'd':
402                                 set_debug_options(atoi(optarg), PGC_POSTMASTER, PGC_S_ARGV);
403                                 break;
404                         case 'F':
405                                 SetConfigOption("fsync", "false", PGC_POSTMASTER, PGC_S_ARGV);
406                                 break;
407                         case 'h':
408                                 SetConfigOption("listen_addresses", optarg, PGC_POSTMASTER, PGC_S_ARGV);
409                                 break;
410                         case 'i':
411                                 SetConfigOption("listen_addresses", "*", PGC_POSTMASTER, PGC_S_ARGV);
412                                 break;
413                         case 'k':
414                                 SetConfigOption("unix_socket_directory", optarg, PGC_POSTMASTER, PGC_S_ARGV);
415                                 break;
416 #ifdef USE_SSL
417                         case 'l':
418                                 SetConfigOption("ssl", "true", PGC_POSTMASTER, PGC_S_ARGV);
419                                 break;
420 #endif
421                         case 'm':
422                                 /* Multiplexed backends no longer supported. */
423                                 break;
424                         case 'M':
425
426                                 /*
427                                  * ignore this flag.  This may be passed in because the
428                                  * program was run as 'postgres -M' instead of
429                                  * 'postmaster'
430                                  */
431                                 break;
432                         case 'N':
433                                 /* The max number of backends to start. */
434                                 SetConfigOption("max_connections", optarg, PGC_POSTMASTER, PGC_S_ARGV);
435                                 break;
436                         case 'n':
437                                 /* Don't reinit shared mem after abnormal exit */
438                                 Reinit = false;
439                                 break;
440                         case 'o':
441
442                                 /*
443                                  * Other options to pass to the backend on the command
444                                  * line
445                                  */
446                                 snprintf(ExtraOptions + strlen(ExtraOptions),
447                                                  sizeof(ExtraOptions) - strlen(ExtraOptions),
448                                                  " %s", optarg);
449                                 break;
450                         case 'p':
451                                 SetConfigOption("port", optarg, PGC_POSTMASTER, PGC_S_ARGV);
452                                 break;
453                         case 'S':
454
455                                 /*
456                                  * Start in 'S'ilent mode (disassociate from controlling
457                                  * tty). You may also think of this as 'S'ysV mode since
458                                  * it's most badly needed on SysV-derived systems like
459                                  * SVR4 and HP-UX.
460                                  */
461                                 SetConfigOption("silent_mode", "true", PGC_POSTMASTER, PGC_S_ARGV);
462                                 break;
463                         case 's':
464
465                                 /*
466                                  * In the event that some backend dumps core, send
467                                  * SIGSTOP, rather than SIGQUIT, to all its peers.      This
468                                  * lets the wily post_hacker collect core dumps from
469                                  * everyone.
470                                  */
471                                 SendStop = true;
472                                 break;
473                         case 'c':
474                         case '-':
475                                 {
476                                         char       *name,
477                                                            *value;
478
479                                         ParseLongOption(optarg, &name, &value);
480                                         if (!value)
481                                         {
482                                                 if (opt == '-')
483                                                         ereport(ERROR,
484                                                                         (errcode(ERRCODE_SYNTAX_ERROR),
485                                                                          errmsg("--%s requires a value",
486                                                                                         optarg)));
487                                                 else
488                                                         ereport(ERROR,
489                                                                         (errcode(ERRCODE_SYNTAX_ERROR),
490                                                                          errmsg("-c %s requires a value",
491                                                                                         optarg)));
492                                         }
493
494                                         SetConfigOption(name, value, PGC_POSTMASTER, PGC_S_ARGV);
495                                         free(name);
496                                         if (value)
497                                                 free(value);
498                                         break;
499                                 }
500
501                         default:
502                                 write_stderr("Try \"%s --help\" for more information.\n",
503                                                          progname);
504                                 ExitPostmaster(1);
505                 }
506         }
507
508         /*
509          * Postmaster accepts no non-option switch arguments.
510          */
511         if (optind < argc)
512         {
513                 write_stderr("%s: invalid argument: \"%s\"\n",
514                                          progname, argv[optind]);
515                 write_stderr("Try \"%s --help\" for more information.\n",
516                                          progname);
517                 ExitPostmaster(1);
518         }
519
520         /*
521          * Locate the proper configuration files and data directory, and
522          * read postgresql.conf for the first time.
523          */
524         if (!SelectConfigFiles(userDoption, progname))
525                 ExitPostmaster(2);
526
527         /* Verify that DataDir looks reasonable */
528         checkDataDir();
529
530         /*
531          * Check for invalid combinations of GUC settings.
532          */
533         if (NBuffers < 2 * MaxBackends || NBuffers < 16)
534         {
535                 /*
536                  * Do not accept -B so small that backends are likely to starve
537                  * for lack of buffers.  The specific choices here are somewhat
538                  * arbitrary.
539                  */
540                 write_stderr("%s: the number of buffers (-B) must be at least twice the number of allowed connections (-N) and at least 16\n", progname);
541                 ExitPostmaster(1);
542         }
543
544         if (ReservedBackends >= MaxBackends)
545         {
546                 write_stderr("%s: superuser_reserved_connections must be less than max_connections\n", progname);
547                 ExitPostmaster(1);
548         }
549
550         /*
551          * Other one-time internal sanity checks can go here.
552          */
553         if (!CheckDateTokenTables())
554         {
555                 write_stderr("%s: invalid datetoken tables, please fix\n", progname);
556                 ExitPostmaster(1);
557         }
558
559         /*
560          * Now that we are done processing the postmaster arguments, reset
561          * getopt(3) library so that it will work correctly in subprocesses.
562          */
563         optind = 1;
564 #ifdef HAVE_INT_OPTRESET
565         optreset = 1;                           /* some systems need this too */
566 #endif
567
568         /* For debugging: display postmaster environment */
569         {
570                 extern char **environ;
571                 char      **p;
572
573                 ereport(DEBUG3,
574                         (errmsg_internal("%s: PostmasterMain: initial environ dump:",
575                                                          progname)));
576                 ereport(DEBUG3,
577                  (errmsg_internal("-----------------------------------------")));
578                 for (p = environ; *p; ++p)
579                         ereport(DEBUG3,
580                                         (errmsg_internal("\t%s", *p)));
581                 ereport(DEBUG3,
582                  (errmsg_internal("-----------------------------------------")));
583         }
584
585 #ifdef EXEC_BACKEND
586         if (find_other_exec(argv[0], "postgres", PG_VERSIONSTR,
587                                                 postgres_exec_path) < 0)
588                 ereport(FATAL,
589                          (errmsg("%s: could not locate matching postgres executable",
590                                          progname)));
591 #endif
592
593         /*
594          * Initialize SSL library, if specified.
595          */
596 #ifdef USE_SSL
597         if (EnableSSL)
598                 secure_initialize();
599 #endif
600
601         /*
602          * process any libraries that should be preloaded and optionally
603          * pre-initialized
604          */
605         if (preload_libraries_string)
606                 process_preload_libraries(preload_libraries_string);
607
608         /*
609          * Fork away from controlling terminal, if -S specified.
610          *
611          * Must do this before we grab any interlock files, else the interlocks
612          * will show the wrong PID.
613          */
614         if (SilentMode)
615                 pmdaemonize();
616
617         /*
618          * Create lockfile for data directory.
619          *
620          * We want to do this before we try to grab the input sockets, because
621          * the data directory interlock is more reliable than the socket-file
622          * interlock (thanks to whoever decided to put socket files in /tmp
623          * :-(). For the same reason, it's best to grab the TCP socket(s)
624          * before the Unix socket.
625          */
626         CreateDataDirLockFile(DataDir, true);
627
628         /*
629          * Remove old temporary files.  At this point there can be no other
630          * Postgres processes running in this directory, so this should be
631          * safe.
632          */
633         RemovePgTempFiles();
634
635         /*
636          * Establish input sockets.
637          */
638         for (i = 0; i < MAXLISTEN; i++)
639                 ListenSocket[i] = -1;
640
641         if (ListenAddresses)
642         {
643                 char       *rawstring;
644                 List       *elemlist;
645                 ListCell   *l;
646
647                 /* Need a modifiable copy of ListenAddresses */
648                 rawstring = pstrdup(ListenAddresses);
649
650                 /* Parse string into list of identifiers */
651                 if (!SplitIdentifierString(rawstring, ',', &elemlist))
652                 {
653                         /* syntax error in list */
654                         ereport(FATAL,
655                                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
656                                 errmsg("invalid list syntax for \"listen_addresses\"")));
657                 }
658
659                 foreach(l, elemlist)
660                 {
661                         char       *curhost = (char *) lfirst(l);
662
663                         if (strcmp(curhost, "*") == 0)
664                                 status = StreamServerPort(AF_UNSPEC, NULL,
665                                                                                   (unsigned short) PostPortNumber,
666                                                                                   UnixSocketDir,
667                                                                                   ListenSocket, MAXLISTEN);
668                         else
669                                 status = StreamServerPort(AF_UNSPEC, curhost,
670                                                                                   (unsigned short) PostPortNumber,
671                                                                                   UnixSocketDir,
672                                                                                   ListenSocket, MAXLISTEN);
673                         if (status != STATUS_OK)
674                                 ereport(WARNING,
675                                          (errmsg("could not create listen socket for \"%s\"",
676                                                          curhost)));
677                 }
678
679                 list_free(elemlist);
680                 pfree(rawstring);
681         }
682
683 #ifdef USE_RENDEZVOUS
684         /* Register for Rendezvous only if we opened TCP socket(s) */
685         if (ListenSocket[0] != -1 && rendezvous_name != NULL)
686         {
687                 DNSServiceRegistrationCreate(rendezvous_name,
688                                                                          "_postgresql._tcp.",
689                                                                          "",
690                                                                          htonl(PostPortNumber),
691                                                                          "",
692                                                                  (DNSServiceRegistrationReply) reg_reply,
693                                                                          NULL);
694         }
695 #endif
696
697 #ifdef HAVE_UNIX_SOCKETS
698         status = StreamServerPort(AF_UNIX, NULL,
699                                                           (unsigned short) PostPortNumber,
700                                                           UnixSocketDir,
701                                                           ListenSocket, MAXLISTEN);
702         if (status != STATUS_OK)
703                 ereport(WARNING,
704                                 (errmsg("could not create Unix-domain socket")));
705 #endif
706
707         /*
708          * check that we have some socket to listen on
709          */
710         if (ListenSocket[0] == -1)
711                 ereport(FATAL,
712                                 (errmsg("no socket created for listening")));
713
714         XLOGPathInit();
715
716         /*
717          * Set up shared memory and semaphores.
718          */
719         reset_shared(PostPortNumber);
720
721         /*
722          * Estimate number of openable files.  This must happen after setting
723          * up semaphores, because on some platforms semaphores count as open
724          * files.
725          */
726         set_max_safe_fds();
727
728         /*
729          * Initialize the list of active backends.
730          */
731         BackendList = DLNewList();
732
733 #ifdef WIN32
734
735         /*
736          * Initialize the child pid/HANDLE arrays for signal handling.
737          */
738         win32_childPIDArray = (pid_t *)
739                 malloc(NUM_BACKENDARRAY_ELEMS * sizeof(pid_t));
740         win32_childHNDArray = (HANDLE *)
741                 malloc(NUM_BACKENDARRAY_ELEMS * sizeof(HANDLE));
742         if (!win32_childPIDArray || !win32_childHNDArray)
743                 ereport(FATAL,
744                                 (errcode(ERRCODE_OUT_OF_MEMORY),
745                                  errmsg("out of memory")));
746
747         /*
748          * Set up a handle that child processes can use to check whether the
749          * postmaster is still running.
750          */
751         if (DuplicateHandle(GetCurrentProcess(),
752                                                 GetCurrentProcess(),
753                                                 GetCurrentProcess(),
754                                                 &PostmasterHandle,
755                                                 0,
756                                                 TRUE,
757                                                 DUPLICATE_SAME_ACCESS) == 0)
758                 ereport(FATAL,
759                         (errmsg_internal("could not duplicate postmaster handle: %d",
760                                                          (int) GetLastError())));
761 #endif
762
763         /*
764          * Record postmaster options.  We delay this till now to avoid
765          * recording bogus options (eg, NBuffers too high for available
766          * memory).
767          */
768         if (!CreateOptsFile(argc, argv, my_exec_path))
769                 ExitPostmaster(1);
770
771 #ifdef EXEC_BACKEND
772         write_nondefault_variables(PGC_POSTMASTER);
773 #endif
774
775         /*
776          * Write the external PID file if requested
777          */
778         if (external_pid_file)
779         {
780                 FILE       *fpidfile = fopen(external_pid_file, "w");
781
782                 if (fpidfile)
783                 {
784                         fprintf(fpidfile, "%d\n", MyProcPid);
785                         fclose(fpidfile);
786                         /* Should we remove the pid file on postmaster exit? */
787                 }
788                 else
789                         write_stderr("%s: could not write external PID file \"%s\": %s\n",
790                                                  progname, external_pid_file, strerror(errno));
791         }
792
793         /*
794          * Set up signal handlers for the postmaster process.
795          *
796          * CAUTION: when changing this list, check for side-effects on the signal
797          * handling setup of child processes.  See tcop/postgres.c,
798          * bootstrap/bootstrap.c, postmaster/bgwriter.c, postmaster/pgarch.c,
799          * postmaster/pgstat.c, and postmaster/syslogger.c.
800          */
801         pqinitmask();
802         PG_SETMASK(&BlockSig);
803
804         pqsignal(SIGHUP, SIGHUP_handler);       /* reread config file and have
805                                                                                  * children do same */
806         pqsignal(SIGINT, pmdie);        /* send SIGTERM and shut down */
807         pqsignal(SIGQUIT, pmdie);       /* send SIGQUIT and die */
808         pqsignal(SIGTERM, pmdie);       /* wait for children and shut down */
809         pqsignal(SIGALRM, SIG_IGN); /* ignored */
810         pqsignal(SIGPIPE, SIG_IGN); /* ignored */
811         pqsignal(SIGUSR1, sigusr1_handler); /* message from child process */
812         pqsignal(SIGUSR2, dummy_handler);       /* unused, reserve for children */
813         pqsignal(SIGCHLD, reaper);      /* handle child termination */
814         pqsignal(SIGTTIN, SIG_IGN); /* ignored */
815         pqsignal(SIGTTOU, SIG_IGN); /* ignored */
816         /* ignore SIGXFSZ, so that ulimit violations work like disk full */
817 #ifdef SIGXFSZ
818         pqsignal(SIGXFSZ, SIG_IGN); /* ignored */
819 #endif
820
821         /*
822          * If enabled, start up syslogger collection subprocess
823          */
824         SysLoggerPID = SysLogger_Start();
825
826         /*
827          * Reset whereToSendOutput from Debug (its starting state) to None.
828          * This stops ereport from sending log messages to stderr unless
829          * Log_destination permits.  We don't do this until the postmaster is
830          * fully launched, since startup failures may as well be reported to
831          * stderr.
832          */
833         whereToSendOutput = None;
834
835         /*
836          * Initialize the statistics collector stuff
837          */
838         pgstat_init();
839
840         /*
841          * Load cached files for client authentication.
842          */
843         load_hba();
844         load_ident();
845         load_user();
846         load_group();
847
848         /*
849          * We're ready to rock and roll...
850          */
851         StartupPID = StartupDataBase();
852
853         status = ServerLoop();
854
855         /*
856          * ServerLoop probably shouldn't ever return, but if it does, close
857          * down.
858          */
859         ExitPostmaster(status != STATUS_OK);
860
861         return 0;                                       /* not reached */
862 }
863
864
865 /*
866  * Validate the proposed data directory
867  */
868 static void
869 checkDataDir(void)
870 {
871         char            path[MAXPGPATH];
872         FILE       *fp;
873         struct stat stat_buf;
874
875         Assert(DataDir);
876
877         if (stat(DataDir, &stat_buf) != 0)
878         {
879                 if (errno == ENOENT)
880                         ereport(FATAL,
881                                         (errcode_for_file_access(),
882                                          errmsg("data directory \"%s\" does not exist",
883                                                         DataDir)));
884                 else
885                         ereport(FATAL,
886                                         (errcode_for_file_access(),
887                          errmsg("could not read permissions of directory \"%s\": %m",
888                                         DataDir)));
889         }
890
891         /*
892          * Check if the directory has group or world access.  If so, reject.
893          *
894          * XXX temporarily suppress check when on Windows, because there may not
895          * be proper support for Unix-y file permissions.  Need to think of a
896          * reasonable check to apply on Windows.
897          */
898 #if !defined(WIN32) && !defined(__CYGWIN__)
899         if (stat_buf.st_mode & (S_IRWXG | S_IRWXO))
900                 ereport(FATAL,
901                                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
902                                  errmsg("data directory \"%s\" has group or world access",
903                                                 DataDir),
904                                  errdetail("Permissions should be u=rwx (0700).")));
905 #endif
906
907         /* Look for PG_VERSION before looking for pg_control */
908         ValidatePgVersion(DataDir);
909
910         snprintf(path, sizeof(path), "%s/global/pg_control", DataDir);
911
912         fp = AllocateFile(path, PG_BINARY_R);
913         if (fp == NULL)
914         {
915                 write_stderr("%s: could not find the database system\n"
916                                          "Expected to find it in the directory \"%s\",\n"
917                                          "but could not open file \"%s\": %s\n",
918                                          progname, DataDir, path, strerror(errno));
919                 ExitPostmaster(2);
920         }
921         FreeFile(fp);
922 }
923
924
925 #ifdef USE_RENDEZVOUS
926
927 /*
928  * empty callback function for DNSServiceRegistrationCreate()
929  */
930 static void
931 reg_reply(DNSServiceRegistrationReplyErrorType errorCode, void *context)
932 {
933
934 }
935 #endif   /* USE_RENDEZVOUS */
936
937
938 /*
939  * Fork away from the controlling terminal (-S option)
940  */
941 static void
942 pmdaemonize(void)
943 {
944 #ifndef WIN32
945         int                     i;
946         pid_t           pid;
947
948 #ifdef LINUX_PROFILE
949         struct itimerval prof_itimer;
950 #endif
951
952 #ifdef LINUX_PROFILE
953         /* see comments in BackendStartup */
954         getitimer(ITIMER_PROF, &prof_itimer);
955 #endif
956
957         pid = fork();
958         if (pid == (pid_t) -1)
959         {
960                 write_stderr("%s: could not fork background process: %s\n",
961                                          progname, strerror(errno));
962                 ExitPostmaster(1);
963         }
964         else if (pid)
965         {                                                       /* parent */
966                 /* Parent should just exit, without doing any atexit cleanup */
967                 _exit(0);
968         }
969
970 #ifdef LINUX_PROFILE
971         setitimer(ITIMER_PROF, &prof_itimer, NULL);
972 #endif
973
974         MyProcPid = PostmasterPid = getpid();           /* reset PID vars to child */
975
976 /* GH: If there's no setsid(), we hopefully don't need silent mode.
977  * Until there's a better solution.
978  */
979 #ifdef HAVE_SETSID
980         if (setsid() < 0)
981         {
982                 write_stderr("%s: could not dissociate from controlling TTY: %s\n",
983                                          progname, strerror(errno));
984                 ExitPostmaster(1);
985         }
986 #endif
987         i = open(NULL_DEV, O_RDWR);
988         dup2(i, 0);
989         dup2(i, 1);
990         dup2(i, 2);
991         close(i);
992 #else                                                   /* WIN32 */
993         /* not supported */
994         elog(FATAL, "SilentMode not supported under WIN32");
995 #endif   /* WIN32 */
996 }
997
998
999 /*
1000  * Print out help message
1001  */
1002 static void
1003 usage(const char *progname)
1004 {
1005         printf(gettext("%s is the PostgreSQL server.\n\n"), progname);
1006         printf(gettext("Usage:\n  %s [OPTION]...\n\n"), progname);
1007         printf(gettext("Options:\n"));
1008 #ifdef USE_ASSERT_CHECKING
1009         printf(gettext("  -A 1|0          enable/disable run-time assert checking\n"));
1010 #endif
1011         printf(gettext("  -B NBUFFERS     number of shared buffers\n"));
1012         printf(gettext("  -c NAME=VALUE   set run-time parameter\n"));
1013         printf(gettext("  -d 1-5          debugging level\n"));
1014         printf(gettext("  -D DATADIR      database directory\n"));
1015         printf(gettext("  -F              turn fsync off\n"));
1016         printf(gettext("  -h HOSTNAME     host name or IP address to listen on\n"));
1017         printf(gettext("  -i              enable TCP/IP connections\n"));
1018         printf(gettext("  -k DIRECTORY    Unix-domain socket location\n"));
1019 #ifdef USE_SSL
1020         printf(gettext("  -l              enable SSL connections\n"));
1021 #endif
1022         printf(gettext("  -N MAX-CONNECT  maximum number of allowed connections\n"));
1023         printf(gettext("  -o OPTIONS      pass \"OPTIONS\" to each server process\n"));
1024         printf(gettext("  -p PORT         port number to listen on\n"));
1025         printf(gettext("  -S              silent mode (start in background without logging output)\n"));
1026         printf(gettext("  --help          show this help, then exit\n"));
1027         printf(gettext("  --version       output version information, then exit\n"));
1028
1029         printf(gettext("\nDeveloper options:\n"));
1030         printf(gettext("  -n              do not reinitialize shared memory after abnormal exit\n"));
1031         printf(gettext("  -s              send SIGSTOP to all backend servers if one dies\n"));
1032
1033         printf(gettext("\nPlease read the documentation for the complete list of run-time\n"
1034                                    "configuration settings and how to set them on the command line or in\n"
1035                                    "the configuration file.\n\n"
1036                                    "Report bugs to <pgsql-bugs@postgresql.org>.\n"));
1037 }
1038
1039
1040 /*
1041  * Main idle loop of postmaster
1042  */
1043 static int
1044 ServerLoop(void)
1045 {
1046         fd_set          readmask;
1047         int                     nSockets;
1048         time_t          now,
1049                                 last_touch_time;
1050         struct timeval earlier,
1051                                 later;
1052         struct timezone tz;
1053
1054         gettimeofday(&earlier, &tz);
1055         last_touch_time = time(NULL);
1056
1057         nSockets = initMasks(&readmask);
1058
1059         for (;;)
1060         {
1061                 Port       *port;
1062                 fd_set          rmask;
1063                 struct timeval timeout;
1064                 int                     selres;
1065                 int                     i;
1066
1067                 /*
1068                  * Wait for something to happen.
1069                  *
1070                  * We wait at most one minute, to ensure that the other background
1071                  * tasks handled below get done even when no requests are
1072                  * arriving.
1073                  */
1074                 memcpy((char *) &rmask, (char *) &readmask, sizeof(fd_set));
1075
1076                 timeout.tv_sec = 60;
1077                 timeout.tv_usec = 0;
1078
1079                 PG_SETMASK(&UnBlockSig);
1080
1081                 selres = select(nSockets, &rmask, NULL, NULL, &timeout);
1082
1083                 /*
1084                  * Block all signals until we wait again.  (This makes it safe for
1085                  * our signal handlers to do nontrivial work.)
1086                  */
1087                 PG_SETMASK(&BlockSig);
1088
1089                 if (selres < 0)
1090                 {
1091                         if (errno != EINTR && errno != EWOULDBLOCK)
1092                         {
1093                                 ereport(LOG,
1094                                                 (errcode_for_socket_access(),
1095                                                  errmsg("select() failed in postmaster: %m")));
1096                                 return STATUS_ERROR;
1097                         }
1098                 }
1099
1100                 /*
1101                  * New connection pending on any of our sockets? If so, fork a
1102                  * child process to deal with it.
1103                  */
1104                 if (selres > 0)
1105                 {
1106                         /*
1107                          * Select a random seed at the time of first receiving a
1108                          * request.
1109                          */
1110                         while (random_seed == 0)
1111                         {
1112                                 gettimeofday(&later, &tz);
1113
1114                                 /*
1115                                  * We are not sure how much precision is in tv_usec, so we
1116                                  * swap the high and low 16 bits of 'later' and XOR them with
1117                                  * 'earlier'. On the off chance that the result is 0, we
1118                                  * loop until it isn't.
1119                                  */
1120                                 random_seed = earlier.tv_usec ^
1121                                         ((later.tv_usec << 16) |
1122                                          ((later.tv_usec >> 16) & 0xffff));
1123                         }
1124
1125                         for (i = 0; i < MAXLISTEN; i++)
1126                         {
1127                                 if (ListenSocket[i] == -1)
1128                                         break;
1129                                 if (FD_ISSET(ListenSocket[i], &rmask))
1130                                 {
1131                                         port = ConnCreate(ListenSocket[i]);
1132                                         if (port)
1133                                         {
1134                                                 BackendStartup(port);
1135
1136                                                 /*
1137                                                  * We no longer need the open socket or port
1138                                                  * structure in this process
1139                                                  */
1140                                                 StreamClose(port->sock);
1141                                                 ConnFree(port);
1142                                         }
1143                                 }
1144                         }
1145                 }
1146
1147                 /* If we have lost the system logger, try to start a new one */
1148                 if (SysLoggerPID == 0 && Redirect_stderr)
1149                         SysLoggerPID = SysLogger_Start();
1150
1151                 /*
1152                  * If no background writer process is running, and we are not in a
1153                  * state that prevents it, start one.  It doesn't matter if this
1154                  * fails, we'll just try again later.
1155                  */
1156                 if (BgWriterPID == 0 && StartupPID == 0 && !FatalError)
1157                 {
1158                         BgWriterPID = StartBackgroundWriter();
1159                         /* If shutdown is pending, set it going */
1160                         if (Shutdown > NoShutdown && BgWriterPID != 0)
1161                                 kill(BgWriterPID, SIGUSR2);
1162                 }
1163
1164                 /* If we have lost the archiver, try to start a new one */
1165                 if (XLogArchivingActive() && PgArchPID == 0 &&
1166                         StartupPID == 0 && !FatalError && Shutdown == NoShutdown)
1167                         PgArchPID = pgarch_start();
1168
1169                 /* If we have lost the stats collector, try to start a new one */
1170                 if (PgStatPID == 0 &&
1171                         StartupPID == 0 && !FatalError && Shutdown == NoShutdown)
1172                         PgStatPID = pgstat_start();
1173
1174                 /*
1175                  * Touch the socket and lock file at least every ten minutes, to
1176                  * ensure that they are not removed by overzealous /tmp-cleaning
1177                  * tasks.
1178                  */
1179                 now = time(NULL);
1180                 if (now - last_touch_time >= 10 * 60)
1181                 {
1182                         TouchSocketFile();
1183                         TouchSocketLockFile();
1184                         last_touch_time = now;
1185                 }
1186         }
1187 }
1188
1189
1190 /*
1191  * Initialise the masks for select() for the ports we are listening on.
1192  * Return the number of sockets to listen on.
1193  */
1194 static int
1195 initMasks(fd_set *rmask)
1196 {
1197         int                     nsocks = -1;
1198         int                     i;
1199
1200         FD_ZERO(rmask);
1201
1202         for (i = 0; i < MAXLISTEN; i++)
1203         {
1204                 int                     fd = ListenSocket[i];
1205
1206                 if (fd == -1)
1207                         break;
1208                 FD_SET(fd, rmask);
1209                 if (fd > nsocks)
1210                         nsocks = fd;
1211         }
1212
1213         return nsocks + 1;
1214 }
1215
1216
1217 /*
1218  * Read the startup packet and do something according to it.
1219  *
1220  * Returns STATUS_OK or STATUS_ERROR, or might call ereport(FATAL) and
1221  * not return at all.
1222  *
1223  * (Note that ereport(FATAL) stuff is sent to the client, so only use it
1224  * if that's what you want.  Return STATUS_ERROR if you don't want to
1225  * send anything to the client, which would typically be appropriate
1226  * if we detect a communications failure.)
1227  */
1228 static int
1229 ProcessStartupPacket(Port *port, bool SSLdone)
1230 {
1231         int32           len;
1232         void       *buf;
1233         ProtocolVersion proto;
1234         MemoryContext oldcontext;
1235
1236         if (pq_getbytes((char *) &len, 4) == EOF)
1237         {
1238                 /*
1239                  * EOF after SSLdone probably means the client didn't like our
1240                  * response to NEGOTIATE_SSL_CODE.      That's not an error condition,
1241                  * so don't clutter the log with a complaint.
1242                  */
1243                 if (!SSLdone)
1244                         ereport(COMMERROR,
1245                                         (errcode(ERRCODE_PROTOCOL_VIOLATION),
1246                                          errmsg("incomplete startup packet")));
1247                 return STATUS_ERROR;
1248         }
1249
1250         len = ntohl(len);
1251         len -= 4;
1252
1253         if (len < (int32) sizeof(ProtocolVersion) ||
1254                 len > MAX_STARTUP_PACKET_LENGTH)
1255         {
1256                 ereport(COMMERROR,
1257                                 (errcode(ERRCODE_PROTOCOL_VIOLATION),
1258                                  errmsg("invalid length of startup packet")));
1259                 return STATUS_ERROR;
1260         }
1261
1262         /*
1263          * Allocate at least the size of an old-style startup packet, plus one
1264          * extra byte, and make sure all are zeroes.  This ensures we will
1265          * have null termination of all strings, in both fixed- and
1266          * variable-length packet layouts.
1267          */
1268         if (len <= (int32) sizeof(StartupPacket))
1269                 buf = palloc0(sizeof(StartupPacket) + 1);
1270         else
1271                 buf = palloc0(len + 1);
1272
1273         if (pq_getbytes(buf, len) == EOF)
1274         {
1275                 ereport(COMMERROR,
1276                                 (errcode(ERRCODE_PROTOCOL_VIOLATION),
1277                                  errmsg("incomplete startup packet")));
1278                 return STATUS_ERROR;
1279         }
1280
1281         /*
1282          * The first field is either a protocol version number or a special
1283          * request code.
1284          */
1285         port->proto = proto = ntohl(*((ProtocolVersion *) buf));
1286
1287         if (proto == CANCEL_REQUEST_CODE)
1288         {
1289                 processCancelRequest(port, buf);
1290                 return 127;                             /* XXX */
1291         }
1292
1293         if (proto == NEGOTIATE_SSL_CODE && !SSLdone)
1294         {
1295                 char            SSLok;
1296
1297 #ifdef USE_SSL
1298                 /* No SSL when disabled or on Unix sockets */
1299                 if (!EnableSSL || IS_AF_UNIX(port->laddr.addr.ss_family))
1300                         SSLok = 'N';
1301                 else
1302                         SSLok = 'S';            /* Support for SSL */
1303 #else
1304                 SSLok = 'N';                    /* No support for SSL */
1305 #endif
1306                 if (send(port->sock, &SSLok, 1, 0) != 1)
1307                 {
1308                         ereport(COMMERROR,
1309                                         (errcode_for_socket_access(),
1310                                  errmsg("failed to send SSL negotiation response: %m")));
1311                         return STATUS_ERROR;    /* close the connection */
1312                 }
1313
1314 #ifdef USE_SSL
1315                 if (SSLok == 'S' && secure_open_server(port) == -1)
1316                         return STATUS_ERROR;
1317 #endif
1318                 /* regular startup packet, cancel, etc packet should follow... */
1319                 /* but not another SSL negotiation request */
1320                 return ProcessStartupPacket(port, true);
1321         }
1322
1323         /* Could add additional special packet types here */
1324
1325         /*
1326          * Set FrontendProtocol now so that ereport() knows what format to
1327          * send if we fail during startup.
1328          */
1329         FrontendProtocol = proto;
1330
1331         /* Check we can handle the protocol the frontend is using. */
1332
1333         if (PG_PROTOCOL_MAJOR(proto) < PG_PROTOCOL_MAJOR(PG_PROTOCOL_EARLIEST) ||
1334           PG_PROTOCOL_MAJOR(proto) > PG_PROTOCOL_MAJOR(PG_PROTOCOL_LATEST) ||
1335         (PG_PROTOCOL_MAJOR(proto) == PG_PROTOCOL_MAJOR(PG_PROTOCOL_LATEST) &&
1336          PG_PROTOCOL_MINOR(proto) > PG_PROTOCOL_MINOR(PG_PROTOCOL_LATEST)))
1337                 ereport(FATAL,
1338                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1339                                  errmsg("unsupported frontend protocol %u.%u: server supports %u.0 to %u.%u",
1340                                           PG_PROTOCOL_MAJOR(proto), PG_PROTOCOL_MINOR(proto),
1341                                                 PG_PROTOCOL_MAJOR(PG_PROTOCOL_EARLIEST),
1342                                                 PG_PROTOCOL_MAJOR(PG_PROTOCOL_LATEST),
1343                                                 PG_PROTOCOL_MINOR(PG_PROTOCOL_LATEST))));
1344
1345         /*
1346          * Now fetch parameters out of startup packet and save them into the
1347          * Port structure.      All data structures attached to the Port struct
1348          * must be allocated in TopMemoryContext so that they won't disappear
1349          * when we pass them to PostgresMain (see BackendRun).  We need not
1350          * worry about leaking this storage on failure, since we aren't in the
1351          * postmaster process anymore.
1352          */
1353         oldcontext = MemoryContextSwitchTo(TopMemoryContext);
1354
1355         if (PG_PROTOCOL_MAJOR(proto) >= 3)
1356         {
1357                 int32           offset = sizeof(ProtocolVersion);
1358
1359                 /*
1360                  * Scan packet body for name/option pairs.      We can assume any
1361                  * string beginning within the packet body is null-terminated,
1362                  * thanks to zeroing extra byte above.
1363                  */
1364                 port->guc_options = NIL;
1365
1366                 while (offset < len)
1367                 {
1368                         char       *nameptr = ((char *) buf) + offset;
1369                         int32           valoffset;
1370                         char       *valptr;
1371
1372                         if (*nameptr == '\0')
1373                                 break;                  /* found packet terminator */
1374                         valoffset = offset + strlen(nameptr) + 1;
1375                         if (valoffset >= len)
1376                                 break;                  /* missing value, will complain below */
1377                         valptr = ((char *) buf) + valoffset;
1378
1379                         if (strcmp(nameptr, "database") == 0)
1380                                 port->database_name = pstrdup(valptr);
1381                         else if (strcmp(nameptr, "user") == 0)
1382                                 port->user_name = pstrdup(valptr);
1383                         else if (strcmp(nameptr, "options") == 0)
1384                                 port->cmdline_options = pstrdup(valptr);
1385                         else
1386                         {
1387                                 /* Assume it's a generic GUC option */
1388                                 port->guc_options = lappend(port->guc_options,
1389                                                                                         pstrdup(nameptr));
1390                                 port->guc_options = lappend(port->guc_options,
1391                                                                                         pstrdup(valptr));
1392                         }
1393                         offset = valoffset + strlen(valptr) + 1;
1394                 }
1395
1396                 /*
1397                  * If we didn't find a packet terminator exactly at the end of the
1398                  * given packet length, complain.
1399                  */
1400                 if (offset != len - 1)
1401                         ereport(FATAL,
1402                                         (errcode(ERRCODE_PROTOCOL_VIOLATION),
1403                                          errmsg("invalid startup packet layout: expected terminator as last byte")));
1404         }
1405         else
1406         {
1407                 /*
1408                  * Get the parameters from the old-style, fixed-width-fields
1409                  * startup packet as C strings.  The packet destination was
1410                  * cleared first so a short packet has zeros silently added.  We
1411                  * have to be prepared to truncate the pstrdup result for oversize
1412                  * fields, though.
1413                  */
1414                 StartupPacket *packet = (StartupPacket *) buf;
1415
1416                 port->database_name = pstrdup(packet->database);
1417                 if (strlen(port->database_name) > sizeof(packet->database))
1418                         port->database_name[sizeof(packet->database)] = '\0';
1419                 port->user_name = pstrdup(packet->user);
1420                 if (strlen(port->user_name) > sizeof(packet->user))
1421                         port->user_name[sizeof(packet->user)] = '\0';
1422                 port->cmdline_options = pstrdup(packet->options);
1423                 if (strlen(port->cmdline_options) > sizeof(packet->options))
1424                         port->cmdline_options[sizeof(packet->options)] = '\0';
1425                 port->guc_options = NIL;
1426         }
1427
1428         /* Check a user name was given. */
1429         if (port->user_name == NULL || port->user_name[0] == '\0')
1430                 ereport(FATAL,
1431                                 (errcode(ERRCODE_INVALID_AUTHORIZATION_SPECIFICATION),
1432                  errmsg("no PostgreSQL user name specified in startup packet")));
1433
1434         /* The database defaults to the user name. */
1435         if (port->database_name == NULL || port->database_name[0] == '\0')
1436                 port->database_name = pstrdup(port->user_name);
1437
1438         if (Db_user_namespace)
1439         {
1440                 /*
1441                  * If user@, it is a global user, remove '@'. We only want to do
1442                  * this if there is an '@' at the end and no earlier in the user
1443                  * string or they may fake as a local user of another database
1444                  * attaching to this database.
1445                  */
1446                 if (strchr(port->user_name, '@') ==
1447                         port->user_name + strlen(port->user_name) - 1)
1448                         *strchr(port->user_name, '@') = '\0';
1449                 else
1450                 {
1451                         /* Append '@' and dbname */
1452                         char       *db_user;
1453
1454                         db_user = palloc(strlen(port->user_name) +
1455                                                          strlen(port->database_name) + 2);
1456                         sprintf(db_user, "%s@%s", port->user_name, port->database_name);
1457                         port->user_name = db_user;
1458                 }
1459         }
1460
1461         /*
1462          * Truncate given database and user names to length of a Postgres
1463          * name.  This avoids lookup failures when overlength names are given.
1464          */
1465         if (strlen(port->database_name) >= NAMEDATALEN)
1466                 port->database_name[NAMEDATALEN - 1] = '\0';
1467         if (strlen(port->user_name) >= NAMEDATALEN)
1468                 port->user_name[NAMEDATALEN - 1] = '\0';
1469
1470         /*
1471          * Done putting stuff in TopMemoryContext.
1472          */
1473         MemoryContextSwitchTo(oldcontext);
1474
1475         /*
1476          * If we're going to reject the connection due to database state, say
1477          * so now instead of wasting cycles on an authentication exchange.
1478          * (This also allows a pg_ping utility to be written.)
1479          */
1480         switch (port->canAcceptConnections)
1481         {
1482                 case CAC_STARTUP:
1483                         ereport(FATAL,
1484                                         (errcode(ERRCODE_CANNOT_CONNECT_NOW),
1485                                          errmsg("the database system is starting up")));
1486                         break;
1487                 case CAC_SHUTDOWN:
1488                         ereport(FATAL,
1489                                         (errcode(ERRCODE_CANNOT_CONNECT_NOW),
1490                                          errmsg("the database system is shutting down")));
1491                         break;
1492                 case CAC_RECOVERY:
1493                         ereport(FATAL,
1494                                         (errcode(ERRCODE_CANNOT_CONNECT_NOW),
1495                                          errmsg("the database system is in recovery mode")));
1496                         break;
1497                 case CAC_TOOMANY:
1498                         ereport(FATAL,
1499                                         (errcode(ERRCODE_TOO_MANY_CONNECTIONS),
1500                                          errmsg("sorry, too many clients already")));
1501                         break;
1502                 case CAC_OK:
1503                 default:
1504                         break;
1505         }
1506
1507         return STATUS_OK;
1508 }
1509
1510
1511 /*
1512  * The client has sent a cancel request packet, not a normal
1513  * start-a-new-connection packet.  Perform the necessary processing.
1514  * Nothing is sent back to the client.
1515  */
1516 static void
1517 processCancelRequest(Port *port, void *pkt)
1518 {
1519         CancelRequestPacket *canc = (CancelRequestPacket *) pkt;
1520         int                     backendPID;
1521         long            cancelAuthCode;
1522         Backend    *bp;
1523
1524 #ifndef EXEC_BACKEND
1525         Dlelem     *curr;
1526
1527 #else
1528         int                     i;
1529 #endif
1530
1531         backendPID = (int) ntohl(canc->backendPID);
1532         cancelAuthCode = (long) ntohl(canc->cancelAuthCode);
1533
1534         /*
1535          * See if we have a matching backend.  In the EXEC_BACKEND case, we
1536          * can no longer access the postmaster's own backend list, and must
1537          * rely on the duplicate array in shared memory.
1538          */
1539 #ifndef EXEC_BACKEND
1540         for (curr = DLGetHead(BackendList); curr; curr = DLGetSucc(curr))
1541         {
1542                 bp = (Backend *) DLE_VAL(curr);
1543 #else
1544         for (i = 0; i < NUM_BACKENDARRAY_ELEMS; i++)
1545         {
1546                 bp = (Backend *) &ShmemBackendArray[i];
1547 #endif
1548                 if (bp->pid == backendPID)
1549                 {
1550                         if (bp->cancel_key == cancelAuthCode)
1551                         {
1552                                 /* Found a match; signal that backend to cancel current op */
1553                                 ereport(DEBUG2,
1554                                                 (errmsg_internal("processing cancel request: sending SIGINT to process %d",
1555                                                                                  backendPID)));
1556                                 kill(bp->pid, SIGINT);
1557                         }
1558                         else
1559                                 /* Right PID, wrong key: no way, Jose */
1560                                 ereport(DEBUG2,
1561                                                 (errmsg_internal("bad key in cancel request for process %d",
1562                                                                                  backendPID)));
1563                         return;
1564                 }
1565         }
1566
1567         /* No matching backend */
1568         ereport(DEBUG2,
1569                         (errmsg_internal("bad pid in cancel request for process %d",
1570                                                          backendPID)));
1571 }
1572
1573 /*
1574  * canAcceptConnections --- check to see if database state allows connections.
1575  */
1576 static enum CAC_state
1577 canAcceptConnections(void)
1578 {
1579         /* Can't start backends when in startup/shutdown/recovery state. */
1580         if (Shutdown > NoShutdown)
1581                 return CAC_SHUTDOWN;
1582         if (StartupPID)
1583                 return CAC_STARTUP;
1584         if (FatalError)
1585                 return CAC_RECOVERY;
1586
1587         /*
1588          * Don't start too many children.
1589          *
1590          * We allow more connections than we can have backends here because some
1591          * might still be authenticating; they might fail auth, or some
1592          * existing backend might exit before the auth cycle is completed. The
1593          * exact MaxBackends limit is enforced when a new backend tries to
1594          * join the shared-inval backend array.
1595          */
1596         if (CountChildren() >= 2 * MaxBackends)
1597                 return CAC_TOOMANY;
1598
1599         return CAC_OK;
1600 }
1601
1602
1603 /*
1604  * ConnCreate -- create a local connection data structure
1605  */
1606 static Port *
1607 ConnCreate(int serverFd)
1608 {
1609         Port       *port;
1610
1611         if (!(port = (Port *) calloc(1, sizeof(Port))))
1612         {
1613                 ereport(LOG,
1614                                 (errcode(ERRCODE_OUT_OF_MEMORY),
1615                                  errmsg("out of memory")));
1616                 ExitPostmaster(1);
1617         }
1618
1619         if (StreamConnection(serverFd, port) != STATUS_OK)
1620         {
1621                 StreamClose(port->sock);
1622                 ConnFree(port);
1623                 port = NULL;
1624         }
1625         else
1626         {
1627                 /*
1628                  * Precompute password salt values to use for this connection.
1629                  * It's slightly annoying to do this long in advance of knowing
1630                  * whether we'll need 'em or not, but we must do the random()
1631                  * calls before we fork, not after.  Else the postmaster's random
1632                  * sequence won't get advanced, and all backends would end up
1633                  * using the same salt...
1634                  */
1635                 RandomSalt(port->cryptSalt, port->md5Salt);
1636         }
1637
1638         return port;
1639 }
1640
1641
1642 /*
1643  * ConnFree -- free a local connection data structure
1644  */
1645 static void
1646 ConnFree(Port *conn)
1647 {
1648 #ifdef USE_SSL
1649         secure_close(conn);
1650 #endif
1651         free(conn);
1652 }
1653
1654
1655 /*
1656  * ClosePostmasterPorts -- close all the postmaster's open sockets
1657  *
1658  * This is called during child process startup to release file descriptors
1659  * that are not needed by that child process.  The postmaster still has
1660  * them open, of course.
1661  *
1662  * Note: we pass am_syslogger as a boolean because we don't want to set
1663  * the global variable yet when this is called.
1664  */
1665 void
1666 ClosePostmasterPorts(bool am_syslogger)
1667 {
1668         int                     i;
1669
1670         /* Close the listen sockets */
1671         for (i = 0; i < MAXLISTEN; i++)
1672         {
1673                 if (ListenSocket[i] != -1)
1674                 {
1675                         StreamClose(ListenSocket[i]);
1676                         ListenSocket[i] = -1;
1677                 }
1678         }
1679
1680         /* If using syslogger, close the read side of the pipe */
1681         if (!am_syslogger)
1682         {
1683 #ifndef WIN32
1684                 if (syslogPipe[0] >= 0)
1685                         close(syslogPipe[0]);
1686                 syslogPipe[0] = -1;
1687 #else
1688                 if (syslogPipe[0])
1689                         CloseHandle(syslogPipe[0]);
1690                 syslogPipe[0] = 0;
1691 #endif
1692         }
1693 }
1694
1695
1696 /*
1697  * reset_shared -- reset shared memory and semaphores
1698  */
1699 static void
1700 reset_shared(unsigned short port)
1701 {
1702         /*
1703          * Create or re-create shared memory and semaphores.
1704          *
1705          * Note: in each "cycle of life" we will normally assign the same IPC
1706          * keys (if using SysV shmem and/or semas), since the port number is
1707          * used to determine IPC keys.  This helps ensure that we will clean
1708          * up dead IPC objects if the postmaster crashes and is restarted.
1709          */
1710         CreateSharedMemoryAndSemaphores(false, MaxBackends, port);
1711 }
1712
1713
1714 /*
1715  * SIGHUP -- reread config files, and tell children to do same
1716  */
1717 static void
1718 SIGHUP_handler(SIGNAL_ARGS)
1719 {
1720         int                     save_errno = errno;
1721
1722         PG_SETMASK(&BlockSig);
1723
1724         if (Shutdown <= SmartShutdown)
1725         {
1726                 ereport(LOG,
1727                          (errmsg("received SIGHUP, reloading configuration files")));
1728                 ProcessConfigFile(PGC_SIGHUP);
1729                 SignalChildren(SIGHUP);
1730                 if (BgWriterPID != 0)
1731                         kill(BgWriterPID, SIGHUP);
1732                 if (PgArchPID != 0)
1733                         kill(PgArchPID, SIGHUP);
1734                 if (SysLoggerPID != 0)
1735                         kill(SysLoggerPID, SIGHUP);
1736                 /* PgStatPID does not currently need SIGHUP */
1737                 load_hba();
1738                 load_ident();
1739
1740 #ifdef EXEC_BACKEND
1741                 /* Update the starting-point file for future children */
1742                 write_nondefault_variables(PGC_SIGHUP);
1743 #endif
1744         }
1745
1746         PG_SETMASK(&UnBlockSig);
1747
1748         errno = save_errno;
1749 }
1750
1751
1752 /*
1753  * pmdie -- signal handler for processing various postmaster signals.
1754  */
1755 static void
1756 pmdie(SIGNAL_ARGS)
1757 {
1758         int                     save_errno = errno;
1759
1760         PG_SETMASK(&BlockSig);
1761
1762         ereport(DEBUG2,
1763                         (errmsg_internal("postmaster received signal %d",
1764                                                          postgres_signal_arg)));
1765
1766         switch (postgres_signal_arg)
1767         {
1768                 case SIGTERM:
1769
1770                         /*
1771                          * Smart Shutdown:
1772                          *
1773                          * Wait for children to end their work, then shut down.
1774                          */
1775                         if (Shutdown >= SmartShutdown)
1776                                 break;
1777                         Shutdown = SmartShutdown;
1778                         ereport(LOG,
1779                                         (errmsg("received smart shutdown request")));
1780
1781                         if (DLGetHead(BackendList))
1782                                 break;                  /* let reaper() handle this */
1783
1784                         /*
1785                          * No children left. Begin shutdown of data base system.
1786                          */
1787                         if (StartupPID != 0 || FatalError)
1788                                 break;                  /* let reaper() handle this */
1789                         /* Start the bgwriter if not running */
1790                         if (BgWriterPID == 0)
1791                                 BgWriterPID = StartBackgroundWriter();
1792                         /* And tell it to shut down */
1793                         if (BgWriterPID != 0)
1794                                 kill(BgWriterPID, SIGUSR2);
1795                         /* Tell pgarch to shut down too; nothing left for it to do */
1796                         if (PgArchPID != 0)
1797                                 kill(PgArchPID, SIGQUIT);
1798                         /* Tell pgstat to shut down too; nothing left for it to do */
1799                         if (PgStatPID != 0)
1800                                 kill(PgStatPID, SIGQUIT);
1801                         break;
1802
1803                 case SIGINT:
1804
1805                         /*
1806                          * Fast Shutdown:
1807                          *
1808                          * Abort all children with SIGTERM (rollback active transactions
1809                          * and exit) and shut down when they are gone.
1810                          */
1811                         if (Shutdown >= FastShutdown)
1812                                 break;
1813                         Shutdown = FastShutdown;
1814                         ereport(LOG,
1815                                         (errmsg("received fast shutdown request")));
1816
1817                         if (DLGetHead(BackendList))
1818                         {
1819                                 if (!FatalError)
1820                                 {
1821                                         ereport(LOG,
1822                                                         (errmsg("aborting any active transactions")));
1823                                         SignalChildren(SIGTERM);
1824                                         /* reaper() does the rest */
1825                                 }
1826                                 break;
1827                         }
1828
1829                         /*
1830                          * No children left. Begin shutdown of data base system.
1831                          *
1832                          * Note: if we previously got SIGTERM then we may send SIGUSR2 to
1833                          * the bgwriter a second time here.  This should be harmless.
1834                          */
1835                         if (StartupPID != 0 || FatalError)
1836                                 break;                  /* let reaper() handle this */
1837                         /* Start the bgwriter if not running */
1838                         if (BgWriterPID == 0)
1839                                 BgWriterPID = StartBackgroundWriter();
1840                         /* And tell it to shut down */
1841                         if (BgWriterPID != 0)
1842                                 kill(BgWriterPID, SIGUSR2);
1843                         /* Tell pgarch to shut down too; nothing left for it to do */
1844                         if (PgArchPID != 0)
1845                                 kill(PgArchPID, SIGQUIT);
1846                         /* Tell pgstat to shut down too; nothing left for it to do */
1847                         if (PgStatPID != 0)
1848                                 kill(PgStatPID, SIGQUIT);
1849                         break;
1850
1851                 case SIGQUIT:
1852
1853                         /*
1854                          * Immediate Shutdown:
1855                          *
1856                          * abort all children with SIGQUIT and exit without attempt to
1857                          * properly shut down data base system.
1858                          */
1859                         ereport(LOG,
1860                                         (errmsg("received immediate shutdown request")));
1861                         if (StartupPID != 0)
1862                                 kill(StartupPID, SIGQUIT);
1863                         if (BgWriterPID != 0)
1864                                 kill(BgWriterPID, SIGQUIT);
1865                         if (PgArchPID != 0)
1866                                 kill(PgArchPID, SIGQUIT);
1867                         if (PgStatPID != 0)
1868                                 kill(PgStatPID, SIGQUIT);
1869                         if (DLGetHead(BackendList))
1870                                 SignalChildren(SIGQUIT);
1871                         ExitPostmaster(0);
1872                         break;
1873         }
1874
1875         PG_SETMASK(&UnBlockSig);
1876
1877         errno = save_errno;
1878 }
1879
1880 /*
1881  * Reaper -- signal handler to cleanup after a backend (child) dies.
1882  */
1883 static void
1884 reaper(SIGNAL_ARGS)
1885 {
1886         int                     save_errno = errno;
1887
1888 #ifdef HAVE_WAITPID
1889         int                     status;                 /* backend exit status */
1890
1891 #else
1892 #ifndef WIN32
1893         union wait      status;                 /* backend exit status */
1894 #endif
1895 #endif
1896         int                     exitstatus;
1897         int                     pid;                    /* process id of dead backend */
1898
1899         PG_SETMASK(&BlockSig);
1900
1901         ereport(DEBUG4,
1902                         (errmsg_internal("reaping dead processes")));
1903 #ifdef HAVE_WAITPID
1904         while ((pid = waitpid(-1, &status, WNOHANG)) > 0)
1905         {
1906                 exitstatus = status;
1907 #else
1908 #ifndef WIN32
1909         while ((pid = wait3(&status, WNOHANG, NULL)) > 0)
1910         {
1911                 exitstatus = status.w_status;
1912 #else
1913         while ((pid = win32_waitpid(&exitstatus)) > 0)
1914         {
1915                 /*
1916                  * We need to do this here, and not in CleanupBackend, since this
1917                  * is to be called on all children when we are done with them.
1918                  * Could move to LogChildExit, but that seems like asking for
1919                  * future trouble...
1920                  */
1921                 win32_RemoveChild(pid);
1922 #endif   /* WIN32 */
1923 #endif   /* HAVE_WAITPID */
1924
1925                 /*
1926                  * Check if this child was a startup process.
1927                  */
1928                 if (StartupPID != 0 && pid == StartupPID)
1929                 {
1930                         StartupPID = 0;
1931                         if (exitstatus != 0)
1932                         {
1933                                 LogChildExit(LOG, gettext("startup process"),
1934                                                          pid, exitstatus);
1935                                 ereport(LOG,
1936                                                 (errmsg("aborting startup due to startup process failure")));
1937                                 ExitPostmaster(1);
1938                         }
1939
1940                         /*
1941                          * Startup succeeded - we are done with system startup or
1942                          * recovery.
1943                          */
1944                         FatalError = false;
1945
1946                         /*
1947                          * Crank up the background writer.      It doesn't matter if this
1948                          * fails, we'll just try again later.
1949                          */
1950                         Assert(BgWriterPID == 0);
1951                         BgWriterPID = StartBackgroundWriter();
1952
1953                         /*
1954                          * Go to shutdown mode if a shutdown request was pending.
1955                          * Otherwise, try to start the archiver and stats collector
1956                          * too.
1957                          */
1958                         if (Shutdown > NoShutdown && BgWriterPID != 0)
1959                                 kill(BgWriterPID, SIGUSR2);
1960                         else if (Shutdown == NoShutdown)
1961                         {
1962                                 if (XLogArchivingActive() && PgArchPID == 0)
1963                                         PgArchPID = pgarch_start();
1964                                 if (PgStatPID == 0)
1965                                         PgStatPID = pgstat_start();
1966                         }
1967
1968                         continue;
1969                 }
1970
1971                 /*
1972                  * Was it the bgwriter?
1973                  */
1974                 if (BgWriterPID != 0 && pid == BgWriterPID)
1975                 {
1976                         BgWriterPID = 0;
1977                         if (exitstatus == 0 && Shutdown > NoShutdown &&
1978                                 !FatalError && !DLGetHead(BackendList))
1979                         {
1980                                 /*
1981                                  * Normal postmaster exit is here: we've seen normal exit
1982                                  * of the bgwriter after it's been told to shut down. We
1983                                  * expect that it wrote a shutdown checkpoint.  (If for
1984                                  * some reason it didn't, recovery will occur on next
1985                                  * postmaster start.)
1986                                  *
1987                                  * Note: we do not wait around for exit of the archiver or
1988                                  * stats processes.  They've been sent SIGQUIT by this
1989                                  * point, and in any case contain logic to commit
1990                                  * hara-kiri if they notice the postmaster is gone.
1991                                  */
1992                                 ExitPostmaster(0);
1993                         }
1994
1995                         /*
1996                          * Any unexpected exit of the bgwriter is treated as a crash.
1997                          */
1998                         HandleChildCrash(pid, exitstatus,
1999                                                          gettext("background writer process"));
2000                         continue;
2001                 }
2002
2003                 /*
2004                  * Was it the archiver?  If so, just try to start a new one; no
2005                  * need to force reset of the rest of the system.  (If fail, we'll
2006                  * try again in future cycles of the main loop.)
2007                  */
2008                 if (PgArchPID != 0 && pid == PgArchPID)
2009                 {
2010                         PgArchPID = 0;
2011                         if (exitstatus != 0)
2012                                 LogChildExit(LOG, gettext("archiver process"),
2013                                                          pid, exitstatus);
2014                         if (XLogArchivingActive() &&
2015                                 StartupPID == 0 && !FatalError && Shutdown == NoShutdown)
2016                                 PgArchPID = pgarch_start();
2017                         continue;
2018                 }
2019
2020                 /*
2021                  * Was it the statistics collector?  If so, just try to start a
2022                  * new one; no need to force reset of the rest of the system.  (If
2023                  * fail, we'll try again in future cycles of the main loop.)
2024                  */
2025                 if (PgStatPID != 0 && pid == PgStatPID)
2026                 {
2027                         PgStatPID = 0;
2028                         if (exitstatus != 0)
2029                                 LogChildExit(LOG, gettext("statistics collector process"),
2030                                                          pid, exitstatus);
2031                         if (StartupPID == 0 && !FatalError && Shutdown == NoShutdown)
2032                                 PgStatPID = pgstat_start();
2033                         continue;
2034                 }
2035
2036                 /* Was it the system logger? try to start a new one */
2037                 if (SysLoggerPID != 0 && pid == SysLoggerPID)
2038                 {
2039                         SysLoggerPID = 0;
2040                         /* for safety's sake, launch new logger *first* */
2041                         SysLoggerPID = SysLogger_Start();
2042                         if (exitstatus != 0)
2043                                 LogChildExit(LOG, gettext("system logger process"),
2044                                                          pid, exitstatus);
2045                         continue;
2046                 }
2047
2048                 /*
2049                  * Else do standard backend child cleanup.
2050                  */
2051                 CleanupBackend(pid, exitstatus);
2052         }                                                       /* loop over pending child-death reports */
2053
2054         if (FatalError)
2055         {
2056                 /*
2057                  * Wait for all important children to exit, then reset shmem and
2058                  * StartupDataBase.  (We can ignore the archiver and stats
2059                  * processes here since they are not connected to shmem.)
2060                  */
2061                 if (DLGetHead(BackendList) || StartupPID != 0 || BgWriterPID != 0)
2062                         goto reaper_done;
2063                 ereport(LOG,
2064                         (errmsg("all server processes terminated; reinitializing")));
2065
2066                 shmem_exit(0);
2067                 reset_shared(PostPortNumber);
2068
2069                 StartupPID = StartupDataBase();
2070
2071                 goto reaper_done;
2072         }
2073
2074         if (Shutdown > NoShutdown)
2075         {
2076                 if (DLGetHead(BackendList) || StartupPID != 0)
2077                         goto reaper_done;
2078                 /* Start the bgwriter if not running */
2079                 if (BgWriterPID == 0)
2080                         BgWriterPID = StartBackgroundWriter();
2081                 /* And tell it to shut down */
2082                 if (BgWriterPID != 0)
2083                         kill(BgWriterPID, SIGUSR2);
2084                 /* Tell pgarch to shut down too; nothing left for it to do */
2085                 if (PgArchPID != 0)
2086                         kill(PgArchPID, SIGQUIT);
2087                 /* Tell pgstat to shut down too; nothing left for it to do */
2088                 if (PgStatPID != 0)
2089                         kill(PgStatPID, SIGQUIT);
2090         }
2091
2092 reaper_done:
2093         PG_SETMASK(&UnBlockSig);
2094
2095         errno = save_errno;
2096 }
2097
2098
2099 /*
2100  * CleanupBackend -- cleanup after terminated backend.
2101  *
2102  * Remove all local state associated with backend.
2103  */
2104 static void
2105 CleanupBackend(int pid,
2106                            int exitstatus)      /* child's exit status. */
2107 {
2108         Dlelem     *curr;
2109
2110         LogChildExit(DEBUG2, gettext("server process"), pid, exitstatus);
2111
2112         /*
2113          * If a backend dies in an ugly way (i.e. exit status not 0) then we
2114          * must signal all other backends to quickdie.  If exit status is zero
2115          * we assume everything is hunky dory and simply remove the backend
2116          * from the active backend list.
2117          */
2118         if (exitstatus != 0)
2119         {
2120                 HandleChildCrash(pid, exitstatus, gettext("server process"));
2121                 return;
2122         }
2123
2124         for (curr = DLGetHead(BackendList); curr; curr = DLGetSucc(curr))
2125         {
2126                 Backend    *bp = (Backend *) DLE_VAL(curr);
2127
2128                 if (bp->pid == pid)
2129                 {
2130                         DLRemove(curr);
2131                         free(bp);
2132                         DLFreeElem(curr);
2133 #ifdef EXEC_BACKEND
2134                         ShmemBackendArrayRemove(pid);
2135 #endif
2136                         /* Tell the collector about backend termination */
2137                         pgstat_beterm(pid);
2138                         break;
2139                 }
2140         }
2141 }
2142
2143 /*
2144  * HandleChildCrash -- cleanup after failed backend or bgwriter.
2145  *
2146  * The objectives here are to clean up our local state about the child
2147  * process, and to signal all other remaining children to quickdie.
2148  */
2149 static void
2150 HandleChildCrash(int pid, int exitstatus, const char *procname)
2151 {
2152         Dlelem     *curr,
2153                            *next;
2154         Backend    *bp;
2155
2156         /*
2157          * Make log entry unless there was a previous crash (if so, nonzero
2158          * exit status is to be expected in SIGQUIT response; don't clutter
2159          * log)
2160          */
2161         if (!FatalError)
2162         {
2163                 LogChildExit(LOG, procname, pid, exitstatus);
2164                 ereport(LOG,
2165                           (errmsg("terminating any other active server processes")));
2166         }
2167
2168         /* Process regular backends */
2169         for (curr = DLGetHead(BackendList); curr; curr = next)
2170         {
2171                 next = DLGetSucc(curr);
2172                 bp = (Backend *) DLE_VAL(curr);
2173                 if (bp->pid == pid)
2174                 {
2175                         /*
2176                          * Found entry for freshly-dead backend, so remove it.
2177                          */
2178                         DLRemove(curr);
2179                         free(bp);
2180                         DLFreeElem(curr);
2181 #ifdef EXEC_BACKEND
2182                         ShmemBackendArrayRemove(pid);
2183 #endif
2184                         /* Tell the collector about backend termination */
2185                         pgstat_beterm(pid);
2186                         /* Keep looping so we can signal remaining backends */
2187                 }
2188                 else
2189                 {
2190                         /*
2191                          * This backend is still alive.  Unless we did so already,
2192                          * tell it to commit hara-kiri.
2193                          *
2194                          * SIGQUIT is the special signal that says exit without proc_exit
2195                          * and let the user know what's going on. But if SendStop is
2196                          * set (-s on command line), then we send SIGSTOP instead, so
2197                          * that we can get core dumps from all backends by hand.
2198                          */
2199                         if (!FatalError)
2200                         {
2201                                 ereport(DEBUG2,
2202                                                 (errmsg_internal("sending %s to process %d",
2203                                                                           (SendStop ? "SIGSTOP" : "SIGQUIT"),
2204                                                                                  (int) bp->pid)));
2205                                 kill(bp->pid, (SendStop ? SIGSTOP : SIGQUIT));
2206                         }
2207                 }
2208         }
2209
2210         /* Take care of the bgwriter too */
2211         if (pid == BgWriterPID)
2212                 BgWriterPID = 0;
2213         else if (BgWriterPID != 0 && !FatalError)
2214         {
2215                 ereport(DEBUG2,
2216                                 (errmsg_internal("sending %s to process %d",
2217                                                                  (SendStop ? "SIGSTOP" : "SIGQUIT"),
2218                                                                  (int) BgWriterPID)));
2219                 kill(BgWriterPID, (SendStop ? SIGSTOP : SIGQUIT));
2220         }
2221
2222         /* Force a power-cycle of the pgarch process too */
2223         /* (Shouldn't be necessary, but just for luck) */
2224         if (PgArchPID != 0 && !FatalError)
2225         {
2226                 ereport(DEBUG2,
2227                                 (errmsg_internal("sending %s to process %d",
2228                                                                  "SIGQUIT",
2229                                                                  (int) PgArchPID)));
2230                 kill(PgArchPID, SIGQUIT);
2231         }
2232
2233         /* Force a power-cycle of the pgstat processes too */
2234         /* (Shouldn't be necessary, but just for luck) */
2235         if (PgStatPID != 0 && !FatalError)
2236         {
2237                 ereport(DEBUG2,
2238                                 (errmsg_internal("sending %s to process %d",
2239                                                                  "SIGQUIT",
2240                                                                  (int) PgStatPID)));
2241                 kill(PgStatPID, SIGQUIT);
2242         }
2243
2244         /* We do NOT restart the syslogger */
2245
2246         FatalError = true;
2247 }
2248
2249 /*
2250  * Log the death of a child process.
2251  */
2252 static void
2253 LogChildExit(int lev, const char *procname, int pid, int exitstatus)
2254 {
2255         if (WIFEXITED(exitstatus))
2256                 ereport(lev,
2257
2258                 /*
2259                  * translator: %s is a noun phrase describing a child process,
2260                  * such as "server process"
2261                  */
2262                                 (errmsg("%s (PID %d) exited with exit code %d",
2263                                                 procname, pid, WEXITSTATUS(exitstatus))));
2264         else if (WIFSIGNALED(exitstatus))
2265                 ereport(lev,
2266
2267                 /*
2268                  * translator: %s is a noun phrase describing a child process,
2269                  * such as "server process"
2270                  */
2271                                 (errmsg("%s (PID %d) was terminated by signal %d",
2272                                                 procname, pid, WTERMSIG(exitstatus))));
2273         else
2274                 ereport(lev,
2275
2276                 /*
2277                  * translator: %s is a noun phrase describing a child process,
2278                  * such as "server process"
2279                  */
2280                                 (errmsg("%s (PID %d) exited with unexpected status %d",
2281                                                 procname, pid, exitstatus)));
2282 }
2283
2284 /*
2285  * Send a signal to all backend children (but NOT special children)
2286  */
2287 static void
2288 SignalChildren(int signal)
2289 {
2290         Dlelem     *curr;
2291
2292         for (curr = DLGetHead(BackendList); curr; curr = DLGetSucc(curr))
2293         {
2294                 Backend    *bp = (Backend *) DLE_VAL(curr);
2295
2296                 ereport(DEBUG4,
2297                                 (errmsg_internal("sending signal %d to process %d",
2298                                                                  signal, (int) bp->pid)));
2299                 kill(bp->pid, signal);
2300         }
2301 }
2302
2303 /*
2304  * BackendStartup -- start backend process
2305  *
2306  * returns: STATUS_ERROR if the fork failed, STATUS_OK otherwise.
2307  */
2308 static int
2309 BackendStartup(Port *port)
2310 {
2311         Backend    *bn;                         /* for backend cleanup */
2312         pid_t           pid;
2313
2314 #ifdef LINUX_PROFILE
2315         struct itimerval prof_itimer;
2316 #endif
2317
2318         /*
2319          * Compute the cancel key that will be assigned to this backend. The
2320          * backend will have its own copy in the forked-off process' value of
2321          * MyCancelKey, so that it can transmit the key to the frontend.
2322          */
2323         MyCancelKey = PostmasterRandom();
2324
2325         /*
2326          * Make room for backend data structure.  Better before the fork() so
2327          * we can handle failure cleanly.
2328          */
2329         bn = (Backend *) malloc(sizeof(Backend));
2330         if (!bn)
2331         {
2332                 ereport(LOG,
2333                                 (errcode(ERRCODE_OUT_OF_MEMORY),
2334                                  errmsg("out of memory")));
2335                 return STATUS_ERROR;
2336         }
2337
2338         /* Pass down canAcceptConnections state (kluge for EXEC_BACKEND case) */
2339         port->canAcceptConnections = canAcceptConnections();
2340
2341         /*
2342          * Flush stdio channels just before fork, to avoid double-output
2343          * problems. Ideally we'd use fflush(NULL) here, but there are still a
2344          * few non-ANSI stdio libraries out there (like SunOS 4.1.x) that
2345          * coredump if we do. Presently stdout and stderr are the only stdio
2346          * output channels used by the postmaster, so fflush'ing them should
2347          * be sufficient.
2348          */
2349         fflush(stdout);
2350         fflush(stderr);
2351
2352 #ifdef EXEC_BACKEND
2353
2354         pid = backend_forkexec(port);
2355
2356 #else                                                   /* !EXEC_BACKEND */
2357
2358 #ifdef LINUX_PROFILE
2359
2360         /*
2361          * Linux's fork() resets the profiling timer in the child process. If
2362          * we want to profile child processes then we need to save and restore
2363          * the timer setting.  This is a waste of time if not profiling,
2364          * however, so only do it if commanded by specific -DLINUX_PROFILE
2365          * switch.
2366          */
2367         getitimer(ITIMER_PROF, &prof_itimer);
2368 #endif
2369
2370 #ifdef __BEOS__
2371         /* Specific beos actions before backend startup */
2372         beos_before_backend_startup();
2373 #endif
2374
2375         pid = fork();
2376
2377         if (pid == 0)                           /* child */
2378         {
2379 #ifdef LINUX_PROFILE
2380                 setitimer(ITIMER_PROF, &prof_itimer, NULL);
2381 #endif
2382
2383 #ifdef __BEOS__
2384                 /* Specific beos backend startup actions */
2385                 beos_backend_startup();
2386 #endif
2387                 free(bn);
2388
2389                 proc_exit(BackendRun(port));
2390         }
2391 #endif   /* EXEC_BACKEND */
2392
2393         if (pid < 0)
2394         {
2395                 /* in parent, fork failed */
2396                 int                     save_errno = errno;
2397
2398 #ifdef __BEOS__
2399                 /* Specific beos backend startup actions */
2400                 beos_backend_startup_failed();
2401 #endif
2402                 free(bn);
2403                 errno = save_errno;
2404                 ereport(LOG,
2405                           (errmsg("could not fork new process for connection: %m")));
2406                 report_fork_failure_to_client(port, save_errno);
2407                 return STATUS_ERROR;
2408         }
2409
2410         /* in parent, successful fork */
2411         ereport(DEBUG2,
2412                         (errmsg_internal("forked new backend, pid=%d socket=%d",
2413                                                          (int) pid, port->sock)));
2414
2415         /*
2416          * Everything's been successful, it's safe to add this backend to our
2417          * list of backends.
2418          */
2419         bn->pid = pid;
2420         bn->cancel_key = MyCancelKey;
2421         DLAddHead(BackendList, DLNewElem(bn));
2422 #ifdef EXEC_BACKEND
2423         ShmemBackendArrayAdd(bn);
2424 #endif
2425
2426         return STATUS_OK;
2427 }
2428
2429 /*
2430  * Try to report backend fork() failure to client before we close the
2431  * connection.  Since we do not care to risk blocking the postmaster on
2432  * this connection, we set the connection to non-blocking and try only once.
2433  *
2434  * This is grungy special-purpose code; we cannot use backend libpq since
2435  * it's not up and running.
2436  */
2437 static void
2438 report_fork_failure_to_client(Port *port, int errnum)
2439 {
2440         char            buffer[1000];
2441
2442         /* Format the error message packet (always V2 protocol) */
2443         snprintf(buffer, sizeof(buffer), "E%s%s\n",
2444                          gettext("could not fork new process for connection: "),
2445                          strerror(errnum));
2446
2447         /* Set port to non-blocking.  Don't do send() if this fails */
2448         if (!set_noblock(port->sock))
2449                 return;
2450
2451         send(port->sock, buffer, strlen(buffer) + 1, 0);
2452 }
2453
2454
2455 /*
2456  * split_opts -- split a string of options and append it to an argv array
2457  *
2458  * NB: the string is destructively modified!
2459  *
2460  * Since no current POSTGRES arguments require any quoting characters,
2461  * we can use the simple-minded tactic of assuming each set of space-
2462  * delimited characters is a separate argv element.
2463  *
2464  * If you don't like that, well, we *used* to pass the whole option string
2465  * as ONE argument to execl(), which was even less intelligent...
2466  */
2467 static void
2468 split_opts(char **argv, int *argcp, char *s)
2469 {
2470         while (s && *s)
2471         {
2472                 while (isspace((unsigned char) *s))
2473                         ++s;
2474                 if (*s == '\0')
2475                         break;
2476                 argv[(*argcp)++] = s;
2477                 while (*s && !isspace((unsigned char) *s))
2478                         ++s;
2479                 if (*s)
2480                         *s++ = '\0';
2481         }
2482 }
2483
2484
2485 /*
2486  * BackendRun -- perform authentication, and if successful,
2487  *                              set up the backend's argument list and invoke PostgresMain()
2488  *
2489  * returns:
2490  *              Shouldn't return at all.
2491  *              If PostgresMain() fails, return status.
2492  */
2493 static int
2494 BackendRun(Port *port)
2495 {
2496         int                     status;
2497         char            remote_host[NI_MAXHOST];
2498         char            remote_port[NI_MAXSERV];
2499         char            remote_ps_data[NI_MAXHOST];
2500         char      **av;
2501         int                     maxac;
2502         int                     ac;
2503         char            protobuf[32];
2504         int                     i;
2505
2506         IsUnderPostmaster = true;       /* we are a postmaster subprocess now */
2507
2508         /*
2509          * Let's clean up ourselves as the postmaster child, and close the
2510          * postmaster's listen sockets
2511          */
2512         ClosePostmasterPorts(false);
2513
2514         /* We don't want the postmaster's proc_exit() handlers */
2515         on_exit_reset();
2516
2517         /*
2518          * Signal handlers setting is moved to tcop/postgres...
2519          */
2520
2521         /* Save port etc. for ps status */
2522         MyProcPort = port;
2523
2524         /* Reset MyProcPid to new backend's pid */
2525         MyProcPid = getpid();
2526
2527         /*
2528          * PreAuthDelay is a debugging aid for investigating problems in the
2529          * authentication cycle: it can be set in postgresql.conf to allow
2530          * time to attach to the newly-forked backend with a debugger. (See
2531          * also the -W backend switch, which we allow clients to pass through
2532          * PGOPTIONS, but it is not honored until after authentication.)
2533          */
2534         if (PreAuthDelay > 0)
2535                 pg_usleep(PreAuthDelay * 1000000L);
2536
2537         ClientAuthInProgress = true;    /* limit visibility of log messages */
2538
2539         /* save start time for end of session reporting */
2540         gettimeofday(&(port->session_start), NULL);
2541
2542         /* set these to empty in case they are needed before we set them up */
2543         port->remote_host = "";
2544         port->remote_port = "";
2545         port->commandTag = "";
2546
2547         /*
2548          * Initialize libpq and enable reporting of ereport errors to the
2549          * client. Must do this now because authentication uses libpq to send
2550          * messages.
2551          */
2552         pq_init();                                      /* initialize libpq to talk to client */
2553         whereToSendOutput = Remote; /* now safe to ereport to client */
2554
2555         /*
2556          * We arrange for a simple exit(0) if we receive SIGTERM or SIGQUIT
2557          * during any client authentication related communication. Otherwise
2558          * the postmaster cannot shutdown the database FAST or IMMED cleanly
2559          * if a buggy client blocks a backend during authentication.
2560          */
2561         pqsignal(SIGTERM, authdie);
2562         pqsignal(SIGQUIT, authdie);
2563         pqsignal(SIGALRM, authdie);
2564         PG_SETMASK(&AuthBlockSig);
2565
2566         /*
2567          * Get the remote host name and port for logging and status display.
2568          */
2569         remote_host[0] = '\0';
2570         remote_port[0] = '\0';
2571         if (getnameinfo_all(&port->raddr.addr, port->raddr.salen,
2572                                                 remote_host, sizeof(remote_host),
2573                                                 remote_port, sizeof(remote_port),
2574                                    (log_hostname ? 0 : NI_NUMERICHOST) | NI_NUMERICSERV))
2575         {
2576                 int                     ret = getnameinfo_all(&port->raddr.addr, port->raddr.salen,
2577                                                                                 remote_host, sizeof(remote_host),
2578                                                                                 remote_port, sizeof(remote_port),
2579                                                                                 NI_NUMERICHOST | NI_NUMERICSERV);
2580
2581                 if (ret)
2582                         ereport(WARNING,
2583                                         (errmsg("getnameinfo_all() failed: %s",
2584                                                         gai_strerror(ret))));
2585         }
2586         snprintf(remote_ps_data, sizeof(remote_ps_data),
2587                          remote_port[0] == '\0' ? "%s" : "%s(%s)",
2588                          remote_host, remote_port);
2589
2590         if (Log_connections)
2591                 ereport(LOG,
2592                                 (errmsg("connection received: host=%s port=%s",
2593                                                 remote_host, remote_port)));
2594
2595         /*
2596          * save remote_host and remote_port in port stucture
2597          */
2598         port->remote_host = strdup(remote_host);
2599         port->remote_port = strdup(remote_port);
2600
2601         /*
2602          * In EXEC_BACKEND case, we didn't inherit the contents of pg_hba.c
2603          * etcetera from the postmaster, and have to load them ourselves.
2604          * Build the PostmasterContext (which didn't exist before, in this
2605          * process) to contain the data.
2606          *
2607          * FIXME: [fork/exec] Ugh.      Is there a way around this overhead?
2608          */
2609 #ifdef EXEC_BACKEND
2610         Assert(PostmasterContext == NULL);
2611         PostmasterContext = AllocSetContextCreate(TopMemoryContext,
2612                                                                                           "Postmaster",
2613                                                                                           ALLOCSET_DEFAULT_MINSIZE,
2614                                                                                           ALLOCSET_DEFAULT_INITSIZE,
2615                                                                                           ALLOCSET_DEFAULT_MAXSIZE);
2616         MemoryContextSwitchTo(PostmasterContext);
2617
2618         load_hba();
2619         load_ident();
2620         load_user();
2621         load_group();
2622 #endif
2623
2624         /*
2625          * Ready to begin client interaction.  We will give up and exit(0)
2626          * after a time delay, so that a broken client can't hog a connection
2627          * indefinitely.  PreAuthDelay doesn't count against the time limit.
2628          */
2629         if (!enable_sig_alarm(AuthenticationTimeout * 1000, false))
2630                 elog(FATAL, "could not set timer for authorization timeout");
2631
2632         /*
2633          * Receive the startup packet (which might turn out to be a cancel
2634          * request packet).
2635          */
2636         status = ProcessStartupPacket(port, false);
2637
2638         if (status != STATUS_OK)
2639                 proc_exit(0);
2640
2641         /*
2642          * Now that we have the user and database name, we can set the process
2643          * title for ps.  It's good to do this as early as possible in
2644          * startup.
2645          */
2646         init_ps_display(port->user_name, port->database_name, remote_ps_data);
2647         set_ps_display("authentication");
2648
2649         /*
2650          * Now perform authentication exchange.
2651          */
2652         ClientAuthentication(port); /* might not return, if failure */
2653
2654         /*
2655          * Done with authentication.  Disable timeout, and prevent
2656          * SIGTERM/SIGQUIT again until backend startup is complete.
2657          */
2658         if (!disable_sig_alarm(false))
2659                 elog(FATAL, "could not disable timer for authorization timeout");
2660         PG_SETMASK(&BlockSig);
2661
2662         if (Log_connections)
2663                 ereport(LOG,
2664                                 (errmsg("connection authorized: user=%s database=%s",
2665                                                 port->user_name, port->database_name)));
2666
2667         /*
2668          * Don't want backend to be able to see the postmaster random number
2669          * generator state.  We have to clobber the static random_seed *and*
2670          * start a new random sequence in the random() library function.
2671          */
2672         random_seed = 0;
2673         srandom((unsigned int) (MyProcPid ^ port->session_start.tv_usec));
2674
2675         /* ----------------
2676          * Now, build the argv vector that will be given to PostgresMain.
2677          *
2678          * The layout of the command line is
2679          *              postgres [secure switches] -p databasename [insecure switches]
2680          * where the switches after -p come from the client request.
2681          *
2682          * The maximum possible number of commandline arguments that could come
2683          * from ExtraOptions or port->cmdline_options is (strlen + 1) / 2; see
2684          * split_opts().
2685          * ----------------
2686          */
2687         maxac = 10;                                     /* for fixed args supplied below */
2688         maxac += (strlen(ExtraOptions) + 1) / 2;
2689         if (port->cmdline_options)
2690                 maxac += (strlen(port->cmdline_options) + 1) / 2;
2691
2692         av = (char **) MemoryContextAlloc(TopMemoryContext,
2693                                                                           maxac * sizeof(char *));
2694         ac = 0;
2695
2696         av[ac++] = "postgres";
2697
2698         /*
2699          * Pass any backend switches specified with -o in the postmaster's own
2700          * command line.  We assume these are secure.  (It's OK to mangle
2701          * ExtraOptions now, since we're safely inside a subprocess.)
2702          */
2703         split_opts(av, &ac, ExtraOptions);
2704
2705         /* Tell the backend what protocol the frontend is using. */
2706         snprintf(protobuf, sizeof(protobuf), "-v%u", port->proto);
2707         av[ac++] = protobuf;
2708
2709         /*
2710          * Tell the backend it is being called from the postmaster, and which
2711          * database to use.  -p marks the end of secure switches.
2712          */
2713         av[ac++] = "-p";
2714         av[ac++] = port->database_name;
2715
2716         /*
2717          * Pass the (insecure) option switches from the connection request.
2718          * (It's OK to mangle port->cmdline_options now.)
2719          */
2720         if (port->cmdline_options)
2721                 split_opts(av, &ac, port->cmdline_options);
2722
2723         av[ac] = NULL;
2724
2725         Assert(ac < maxac);
2726
2727         /*
2728          * Release postmaster's working memory context so that backend can
2729          * recycle the space.  Note this does not trash *MyProcPort, because
2730          * ConnCreate() allocated that space with malloc() ... else we'd need
2731          * to copy the Port data here.  Also, subsidiary data such as the
2732          * username isn't lost either; see ProcessStartupPacket().
2733          */
2734         MemoryContextSwitchTo(TopMemoryContext);
2735         MemoryContextDelete(PostmasterContext);
2736         PostmasterContext = NULL;
2737
2738         /*
2739          * Debug: print arguments being passed to backend
2740          */
2741         ereport(DEBUG3,
2742                         (errmsg_internal("%s child[%d]: starting with (",
2743                                                          progname, (int)getpid())));
2744         for (i = 0; i < ac; ++i)
2745                 ereport(DEBUG3,
2746                                 (errmsg_internal("\t%s", av[i])));
2747         ereport(DEBUG3,
2748                         (errmsg_internal(")")));
2749
2750         ClientAuthInProgress = false;           /* client_min_messages is active
2751                                                                                  * now */
2752
2753         return (PostgresMain(ac, av, port->user_name));
2754 }
2755
2756
2757 #ifdef EXEC_BACKEND
2758
2759 /*
2760  * postmaster_forkexec -- fork and exec a postmaster subprocess
2761  *
2762  * The caller must have set up the argv array already, except for argv[2]
2763  * which will be filled with the name of the temp variable file.
2764  *
2765  * Returns the child process PID, or -1 on fork failure (a suitable error
2766  * message has been logged on failure).
2767  *
2768  * All uses of this routine will dispatch to SubPostmasterMain in the
2769  * child process.
2770  */
2771 pid_t
2772 postmaster_forkexec(int argc, char *argv[])
2773 {
2774         Port            port;
2775
2776         /* This entry point passes dummy values for the Port variables */
2777         memset(&port, 0, sizeof(port));
2778         return internal_forkexec(argc, argv, &port);
2779 }
2780
2781 /*
2782  * backend_forkexec -- fork/exec off a backend process
2783  *
2784  * returns the pid of the fork/exec'd process, or -1 on failure
2785  */
2786 static pid_t
2787 backend_forkexec(Port *port)
2788 {
2789         char       *av[4];
2790         int                     ac = 0;
2791
2792         av[ac++] = "postgres";
2793         av[ac++] = "-forkbackend";
2794         av[ac++] = NULL;                        /* filled in by internal_forkexec */
2795
2796         av[ac] = NULL;
2797         Assert(ac < lengthof(av));
2798
2799         return internal_forkexec(ac, av, port);
2800 }
2801
2802 static pid_t
2803 internal_forkexec(int argc, char *argv[], Port *port)
2804 {
2805         pid_t           pid;
2806         char            tmpfilename[MAXPGPATH];
2807
2808         if (!write_backend_variables(tmpfilename, port))
2809                 return -1;                              /* log made by write_backend_variables */
2810
2811         /* Make sure caller set up argv properly */
2812         Assert(argc >= 3);
2813         Assert(argv[argc] == NULL);
2814         Assert(strncmp(argv[1], "-fork", 5) == 0);
2815         Assert(argv[2] == NULL);
2816
2817         /* Insert temp file name after -fork argument */
2818         argv[2] = tmpfilename;
2819
2820 #ifdef WIN32
2821         pid = win32_forkexec(postgres_exec_path, argv);
2822 #else
2823         /* Fire off execv in child */
2824         if ((pid = fork()) == 0)
2825         {
2826                 if (execv(postgres_exec_path, argv) < 0)
2827                 {
2828                         ereport(LOG,
2829                                         (errmsg("could not execute server process \"%s\": %m",
2830                                                         postgres_exec_path)));
2831                         /* We're already in the child process here, can't return */
2832                         exit(1);
2833                 }
2834         }
2835 #endif
2836
2837         return pid;                                     /* Parent returns pid, or -1 on fork
2838                                                                  * failure */
2839 }
2840
2841 /*
2842  * SubPostmasterMain -- Get the fork/exec'd process into a state equivalent
2843  *                      to what it would be if we'd simply forked on Unix, and then
2844  *                      dispatch to the appropriate place.
2845  *
2846  * The first two command line arguments are expected to be "-forkFOO"
2847  * (where FOO indicates which postmaster child we are to become), and
2848  * the name of a variables file that we can read to load data that would
2849  * have been inherited by fork() on Unix.  Remaining arguments go to the
2850  * subprocess FooMain() routine.
2851  */
2852 int
2853 SubPostmasterMain(int argc, char *argv[])
2854 {
2855         Port            port;
2856
2857         /* Do this sooner rather than later... */
2858         IsUnderPostmaster = true;       /* we are a postmaster subprocess now */
2859
2860         MyProcPid = getpid();           /* reset MyProcPid */
2861
2862         /* In EXEC_BACKEND case we will not have inherited these settings */
2863         IsPostmasterEnvironment = true;
2864         whereToSendOutput = None;
2865         pqinitmask();
2866         PG_SETMASK(&BlockSig);
2867
2868         /* Setup essential subsystems */
2869         MemoryContextInit();
2870         InitializeGUCOptions();
2871
2872         /* Check we got appropriate args */
2873         if (argc < 3)
2874                 elog(FATAL, "invalid subpostmaster invocation");
2875
2876         /* Read in file-based context */
2877         memset(&port, 0, sizeof(Port));
2878         read_backend_variables(argv[2], &port);
2879         read_nondefault_variables();
2880
2881         /* Run backend or appropriate child */
2882         if (strcmp(argv[1], "-forkbackend") == 0)
2883         {
2884                 /* BackendRun will close sockets */
2885
2886                 /* Attach process to shared segments */
2887                 CreateSharedMemoryAndSemaphores(false, MaxBackends, 0);
2888
2889 #ifdef USE_SSL
2890                 /*
2891                  *      Need to reinitialize the SSL library in the backend,
2892                  *      since the context structures contain function pointers
2893                  *      and cannot be passed through the parameter file.
2894                  */
2895                 if (EnableSSL)
2896                         secure_initialize();
2897 #endif
2898
2899                 Assert(argc == 3);              /* shouldn't be any more args */
2900                 proc_exit(BackendRun(&port));
2901         }
2902         if (strcmp(argv[1], "-forkboot") == 0)
2903         {
2904                 /* Close the postmaster's sockets */
2905                 ClosePostmasterPorts(false);
2906
2907                 /* Attach process to shared segments */
2908                 CreateSharedMemoryAndSemaphores(false, MaxBackends, 0);
2909
2910                 BootstrapMain(argc - 2, argv + 2);
2911                 proc_exit(0);
2912         }
2913         if (strcmp(argv[1], "-forkarch") == 0)
2914         {
2915                 /* Close the postmaster's sockets */
2916                 ClosePostmasterPorts(false);
2917
2918                 /* Do not want to attach to shared memory */
2919
2920                 PgArchiverMain(argc, argv);
2921                 proc_exit(0);
2922         }
2923         if (strcmp(argv[1], "-forkbuf") == 0)
2924         {
2925                 /* Close the postmaster's sockets */
2926                 ClosePostmasterPorts(false);
2927
2928                 /* Do not want to attach to shared memory */
2929
2930                 PgstatBufferMain(argc, argv);
2931                 proc_exit(0);
2932         }
2933         if (strcmp(argv[1], "-forkcol") == 0)
2934         {
2935                 /*
2936                  * Do NOT close postmaster sockets here, because we are forking
2937                  * from pgstat buffer process, which already did it.
2938                  */
2939
2940                 /* Do not want to attach to shared memory */
2941
2942                 PgstatCollectorMain(argc, argv);
2943                 proc_exit(0);
2944         }
2945         if (strcmp(argv[1], "-forklog") == 0)
2946         {
2947                 /* Close the postmaster's sockets */
2948                 ClosePostmasterPorts(true);
2949
2950                 /* Do not want to attach to shared memory */
2951
2952                 SysLoggerMain(argc, argv);
2953                 proc_exit(0);
2954         }
2955
2956         return 1;                                       /* shouldn't get here */
2957 }
2958 #endif   /* EXEC_BACKEND */
2959
2960
2961 /*
2962  * ExitPostmaster -- cleanup
2963  *
2964  * Do NOT call exit() directly --- always go through here!
2965  */
2966 static void
2967 ExitPostmaster(int status)
2968 {
2969         /* should cleanup shared memory and kill all backends */
2970
2971         /*
2972          * Not sure of the semantics here.      When the Postmaster dies, should
2973          * the backends all be killed? probably not.
2974          *
2975          * MUST         -- vadim 05-10-1999
2976          */
2977
2978         proc_exit(status);
2979 }
2980
2981 /*
2982  * sigusr1_handler - handle signal conditions from child processes
2983  */
2984 static void
2985 sigusr1_handler(SIGNAL_ARGS)
2986 {
2987         int                     save_errno = errno;
2988
2989         PG_SETMASK(&BlockSig);
2990
2991         if (CheckPostmasterSignal(PMSIGNAL_PASSWORD_CHANGE))
2992         {
2993                 /*
2994                  * Password or group file has changed.
2995                  */
2996                 load_user();
2997                 load_group();
2998         }
2999
3000         if (CheckPostmasterSignal(PMSIGNAL_WAKEN_CHILDREN))
3001         {
3002                 /*
3003                  * Send SIGUSR1 to all children (triggers
3004                  * CatchupInterruptHandler). See storage/ipc/sinval[adt].c for the
3005                  * use of this.
3006                  */
3007                 if (Shutdown <= SmartShutdown)
3008                         SignalChildren(SIGUSR1);
3009         }
3010
3011         if (PgArchPID != 0 && Shutdown == NoShutdown)
3012         {
3013                 if (CheckPostmasterSignal(PMSIGNAL_WAKEN_ARCHIVER))
3014                 {
3015                         /*
3016                          * Send SIGUSR1 to archiver process, to wake it up and begin
3017                          * archiving next transaction log file.
3018                          */
3019                         kill(PgArchPID, SIGUSR1);
3020                 }
3021         }
3022
3023         PG_SETMASK(&UnBlockSig);
3024
3025         errno = save_errno;
3026 }
3027
3028
3029 /*
3030  * Dummy signal handler
3031  *
3032  * We use this for signals that we don't actually use in the postmaster,
3033  * but we do use in backends.  If we were to SIG_IGN such signals in the
3034  * postmaster, then a newly started backend might drop a signal that arrives
3035  * before it's able to reconfigure its signal processing.  (See notes in
3036  * tcop/postgres.c.)
3037  */
3038 static void
3039 dummy_handler(SIGNAL_ARGS)
3040 {
3041 }
3042
3043
3044 /*
3045  * CharRemap: given an int in range 0..61, produce textual encoding of it
3046  * per crypt(3) conventions.
3047  */
3048 static char
3049 CharRemap(long ch)
3050 {
3051         if (ch < 0)
3052                 ch = -ch;
3053         ch = ch % 62;
3054
3055         if (ch < 26)
3056                 return 'A' + ch;
3057
3058         ch -= 26;
3059         if (ch < 26)
3060                 return 'a' + ch;
3061
3062         ch -= 26;
3063         return '0' + ch;
3064 }
3065
3066 /*
3067  * RandomSalt
3068  */
3069 static void
3070 RandomSalt(char *cryptSalt, char *md5Salt)
3071 {
3072         long            rand = PostmasterRandom();
3073
3074         cryptSalt[0] = CharRemap(rand % 62);
3075         cryptSalt[1] = CharRemap(rand / 62);
3076
3077         /*
3078          * It's okay to reuse the first random value for one of the MD5 salt
3079          * bytes, since only one of the two salts will be sent to the client.
3080          * After that we need to compute more random bits.
3081          *
3082          * We use % 255, sacrificing one possible byte value, so as to ensure
3083          * that all bits of the random() value participate in the result.
3084          * While at it, add one to avoid generating any null bytes.
3085          */
3086         md5Salt[0] = (rand % 255) + 1;
3087         rand = PostmasterRandom();
3088         md5Salt[1] = (rand % 255) + 1;
3089         rand = PostmasterRandom();
3090         md5Salt[2] = (rand % 255) + 1;
3091         rand = PostmasterRandom();
3092         md5Salt[3] = (rand % 255) + 1;
3093 }
3094
3095 /*
3096  * PostmasterRandom
3097  */
3098 static long
3099 PostmasterRandom(void)
3100 {
3101         static bool initialized = false;
3102
3103         if (!initialized)
3104         {
3105                 Assert(random_seed != 0);
3106                 srandom(random_seed);
3107                 initialized = true;
3108         }
3109
3110         return random();
3111 }
3112
3113 /*
3114  * Count up number of child processes (regular backends only)
3115  */
3116 static int
3117 CountChildren(void)
3118 {
3119         Dlelem     *curr;
3120         int                     cnt = 0;
3121
3122         for (curr = DLGetHead(BackendList); curr; curr = DLGetSucc(curr))
3123                 cnt++;
3124         return cnt;
3125 }
3126
3127
3128 /*
3129  * StartChildProcess -- start a non-backend child process for the postmaster
3130  *
3131  * xlog determines what kind of child will be started.  All child types
3132  * initially go to BootstrapMain, which will handle common setup.
3133  *
3134  * Return value of StartChildProcess is subprocess' PID, or 0 if failed
3135  * to start subprocess.
3136  */
3137 static pid_t
3138 StartChildProcess(int xlop)
3139 {
3140         pid_t           pid;
3141         char       *av[10];
3142         int                     ac = 0;
3143         char            xlbuf[32];
3144
3145 #ifdef LINUX_PROFILE
3146         struct itimerval prof_itimer;
3147 #endif
3148
3149         /*
3150          * Set up command-line arguments for subprocess
3151          */
3152         av[ac++] = "postgres";
3153
3154 #ifdef EXEC_BACKEND
3155         av[ac++] = "-forkboot";
3156         av[ac++] = NULL;                        /* filled in by postmaster_forkexec */
3157 #endif
3158
3159         snprintf(xlbuf, sizeof(xlbuf), "-x%d", xlop);
3160         av[ac++] = xlbuf;
3161
3162         av[ac++] = "-p";
3163         av[ac++] = "template1";
3164
3165         av[ac] = NULL;
3166         Assert(ac < lengthof(av));
3167
3168         /*
3169          * Flush stdio channels (see comments in BackendStartup)
3170          */
3171         fflush(stdout);
3172         fflush(stderr);
3173
3174 #ifdef EXEC_BACKEND
3175
3176         pid = postmaster_forkexec(ac, av);
3177
3178 #else                                                   /* !EXEC_BACKEND */
3179
3180 #ifdef LINUX_PROFILE
3181         /* see comments in BackendStartup */
3182         getitimer(ITIMER_PROF, &prof_itimer);
3183 #endif
3184
3185 #ifdef __BEOS__
3186         /* Specific beos actions before backend startup */
3187         beos_before_backend_startup();
3188 #endif
3189
3190         pid = fork();
3191
3192         if (pid == 0)                           /* child */
3193         {
3194 #ifdef LINUX_PROFILE
3195                 setitimer(ITIMER_PROF, &prof_itimer, NULL);
3196 #endif
3197
3198 #ifdef __BEOS__
3199                 /* Specific beos actions after backend startup */
3200                 beos_backend_startup();
3201 #endif
3202
3203                 IsUnderPostmaster = true;               /* we are a postmaster subprocess
3204                                                                                  * now */
3205
3206                 /* Close the postmaster's sockets */
3207                 ClosePostmasterPorts(false);
3208
3209                 /* Lose the postmaster's on-exit routines and port connections */
3210                 on_exit_reset();
3211
3212                 /* Release postmaster's working memory context */
3213                 MemoryContextSwitchTo(TopMemoryContext);
3214                 MemoryContextDelete(PostmasterContext);
3215                 PostmasterContext = NULL;
3216
3217                 BootstrapMain(ac, av);
3218                 ExitPostmaster(0);
3219         }
3220 #endif   /* EXEC_BACKEND */
3221
3222         if (pid < 0)
3223         {
3224                 /* in parent, fork failed */
3225                 int                     save_errno = errno;
3226
3227 #ifdef __BEOS__
3228                 /* Specific beos actions before backend startup */
3229                 beos_backend_startup_failed();
3230 #endif
3231                 errno = save_errno;
3232                 switch (xlop)
3233                 {
3234                         case BS_XLOG_STARTUP:
3235                                 ereport(LOG,
3236                                                 (errmsg("could not fork startup process: %m")));
3237                                 break;
3238                         case BS_XLOG_BGWRITER:
3239                                 ereport(LOG,
3240                                 (errmsg("could not fork background writer process: %m")));
3241                                 break;
3242                         default:
3243                                 ereport(LOG,
3244                                                 (errmsg("could not fork process: %m")));
3245                                 break;
3246                 }
3247
3248                 /*
3249                  * fork failure is fatal during startup, but there's no need to
3250                  * choke immediately if starting other child types fails.
3251                  */
3252                 if (xlop == BS_XLOG_STARTUP)
3253                         ExitPostmaster(1);
3254                 return 0;
3255         }
3256
3257         /*
3258          * in parent, successful fork
3259          */
3260         return pid;
3261 }
3262
3263
3264 /*
3265  * Create the opts file
3266  */
3267 static bool
3268 CreateOptsFile(int argc, char *argv[], char *fullprogname)
3269 {
3270         char            filename[MAXPGPATH];
3271         FILE       *fp;
3272         int                     i;
3273
3274         snprintf(filename, sizeof(filename), "%s/postmaster.opts", DataDir);
3275
3276         if ((fp = fopen(filename, "w")) == NULL)
3277         {
3278                 elog(LOG, "could not create file \"%s\": %m", filename);
3279                 return false;
3280         }
3281
3282         fprintf(fp, "%s", fullprogname);
3283         for (i = 1; i < argc; i++)
3284                 fprintf(fp, " %s%s%s", SYSTEMQUOTE, argv[i], SYSTEMQUOTE);
3285         fputs("\n", fp);
3286
3287         if (fclose(fp))
3288         {
3289                 elog(LOG, "could not write file \"%s\": %m", filename);
3290                 return false;
3291         }
3292
3293         return true;
3294 }
3295
3296
3297 #ifdef EXEC_BACKEND
3298
3299 /*
3300  * The following need to be available to the read/write_backend_variables
3301  * functions
3302  */
3303 #include "storage/spin.h"
3304
3305 extern slock_t *ShmemLock;
3306 extern slock_t *ShmemIndexLock;
3307 extern void *ShmemIndexAlloc;
3308 typedef struct LWLock LWLock;
3309 extern LWLock *LWLockArray;
3310 extern slock_t *ProcStructLock;
3311 extern int      pgStatSock;
3312
3313 #define write_var(var,fp) fwrite((void*)&(var),sizeof(var),1,fp)
3314 #define read_var(var,fp)  fread((void*)&(var),sizeof(var),1,fp)
3315 #define write_array_var(var,fp) fwrite((void*)(var),sizeof(var),1,fp)
3316 #define read_array_var(var,fp)  fread((void*)(var),sizeof(var),1,fp)
3317
3318 static bool
3319 write_backend_variables(char *filename, Port *port)
3320 {
3321         static unsigned long tmpBackendFileNum = 0;
3322         FILE       *fp;
3323         char            str_buf[MAXPGPATH];
3324
3325         /* Calculate name for temp file in caller's buffer */
3326         Assert(DataDir);
3327         snprintf(filename, MAXPGPATH, "%s/%s/%s.backend_var.%d.%lu",
3328                          DataDir, PG_TEMP_FILES_DIR, PG_TEMP_FILE_PREFIX,
3329                          MyProcPid, ++tmpBackendFileNum);
3330
3331         /* Open file */
3332         fp = AllocateFile(filename, PG_BINARY_W);
3333         if (!fp)
3334         {
3335                 /* As per OpenTemporaryFile... */
3336                 char            dirname[MAXPGPATH];
3337
3338                 snprintf(dirname, MAXPGPATH, "%s/%s", DataDir, PG_TEMP_FILES_DIR);
3339                 mkdir(dirname, S_IRWXU);
3340
3341                 fp = AllocateFile(filename, PG_BINARY_W);
3342                 if (!fp)
3343                 {
3344                         ereport(LOG,
3345                                         (errcode_for_file_access(),
3346                                          errmsg("could not create file \"%s\": %m",
3347                                                         filename)));
3348                         return false;
3349                 }
3350         }
3351
3352         /* Write vars */
3353         write_var(port->sock, fp);
3354         write_var(port->proto, fp);
3355         write_var(port->laddr, fp);
3356         write_var(port->raddr, fp);
3357         write_var(port->canAcceptConnections, fp);
3358         write_var(port->cryptSalt, fp);
3359         write_var(port->md5Salt, fp);
3360
3361         /*
3362          * XXX FIXME later: writing these strings as MAXPGPATH bytes always is
3363          * probably a waste of resources
3364          */
3365
3366         StrNCpy(str_buf, DataDir, MAXPGPATH);
3367         write_array_var(str_buf, fp);
3368
3369         write_array_var(ListenSocket, fp);
3370
3371         write_var(MyCancelKey, fp);
3372
3373         write_var(UsedShmemSegID, fp);
3374         write_var(UsedShmemSegAddr, fp);
3375
3376         write_var(ShmemLock, fp);
3377         write_var(ShmemIndexLock, fp);
3378         write_var(ShmemVariableCache, fp);
3379         write_var(ShmemIndexAlloc, fp);
3380         write_var(ShmemBackendArray, fp);
3381
3382         write_var(LWLockArray, fp);
3383         write_var(ProcStructLock, fp);
3384         write_var(pgStatSock, fp);
3385
3386         write_var(PostmasterPid, fp);
3387 #ifdef WIN32
3388         write_var(PostmasterHandle, fp);
3389 #endif
3390
3391         write_var(syslogPipe[0], fp);
3392         write_var(syslogPipe[1], fp);
3393
3394         StrNCpy(str_buf, my_exec_path, MAXPGPATH);
3395         write_array_var(str_buf, fp);
3396
3397         write_array_var(ExtraOptions, fp);
3398
3399         StrNCpy(str_buf, setlocale(LC_COLLATE, NULL), MAXPGPATH);
3400         write_array_var(str_buf, fp);
3401         StrNCpy(str_buf, setlocale(LC_CTYPE, NULL), MAXPGPATH);
3402         write_array_var(str_buf, fp);
3403
3404         /* Release file */
3405         if (FreeFile(fp))
3406         {
3407                 ereport(ERROR,
3408                                 (errcode_for_file_access(),
3409                                  errmsg("could not write to file \"%s\": %m", filename)));
3410                 return false;
3411         }
3412
3413         return true;
3414 }
3415
3416 static void
3417 read_backend_variables(char *filename, Port *port)
3418 {
3419         FILE       *fp;
3420         char            str_buf[MAXPGPATH];
3421
3422         /* Open file */
3423         fp = AllocateFile(filename, PG_BINARY_R);
3424         if (!fp)
3425                 ereport(FATAL,
3426                                 (errcode_for_file_access(),
3427                   errmsg("could not read from backend variables file \"%s\": %m",
3428                                  filename)));
3429
3430         /* Read vars */
3431         read_var(port->sock, fp);
3432         read_var(port->proto, fp);
3433         read_var(port->laddr, fp);
3434         read_var(port->raddr, fp);
3435         read_var(port->canAcceptConnections, fp);
3436         read_var(port->cryptSalt, fp);
3437         read_var(port->md5Salt, fp);
3438
3439         read_array_var(str_buf, fp);
3440         SetDataDir(str_buf);
3441
3442         read_array_var(ListenSocket, fp);
3443
3444         read_var(MyCancelKey, fp);
3445
3446         read_var(UsedShmemSegID, fp);
3447         read_var(UsedShmemSegAddr, fp);
3448
3449         read_var(ShmemLock, fp);
3450         read_var(ShmemIndexLock, fp);
3451         read_var(ShmemVariableCache, fp);
3452         read_var(ShmemIndexAlloc, fp);
3453         read_var(ShmemBackendArray, fp);
3454
3455         read_var(LWLockArray, fp);
3456         read_var(ProcStructLock, fp);
3457         read_var(pgStatSock, fp);
3458
3459         read_var(PostmasterPid, fp);
3460 #ifdef WIN32
3461         read_var(PostmasterHandle, fp);
3462 #endif
3463
3464         read_var(syslogPipe[0], fp);
3465         read_var(syslogPipe[1], fp);
3466
3467         read_array_var(str_buf, fp);
3468         StrNCpy(my_exec_path, str_buf, MAXPGPATH);
3469
3470         read_array_var(ExtraOptions, fp);
3471
3472         read_array_var(str_buf, fp);
3473         setlocale(LC_COLLATE, str_buf);
3474         read_array_var(str_buf, fp);
3475         setlocale(LC_CTYPE, str_buf);
3476
3477         /* Release file */
3478         FreeFile(fp);
3479         if (unlink(filename) != 0)
3480                 ereport(WARNING,
3481                                 (errcode_for_file_access(),
3482                                  errmsg("could not remove file \"%s\": %m", filename)));
3483 }
3484
3485
3486 size_t
3487 ShmemBackendArraySize(void)
3488 {
3489         return (NUM_BACKENDARRAY_ELEMS * sizeof(Backend));
3490 }
3491
3492 void
3493 ShmemBackendArrayAllocation(void)
3494 {
3495         size_t          size = ShmemBackendArraySize();
3496
3497         ShmemBackendArray = (Backend *) ShmemAlloc(size);
3498         /* Mark all slots as empty */
3499         memset(ShmemBackendArray, 0, size);
3500 }
3501
3502 static void
3503 ShmemBackendArrayAdd(Backend *bn)
3504 {
3505         int                     i;
3506
3507         /* Find an empty slot */
3508         for (i = 0; i < NUM_BACKENDARRAY_ELEMS; i++)
3509         {
3510                 if (ShmemBackendArray[i].pid == 0)
3511                 {
3512                         ShmemBackendArray[i] = *bn;
3513                         return;
3514                 }
3515         }
3516
3517         ereport(FATAL,
3518                         (errmsg_internal("no free slots in shmem backend array")));
3519 }
3520
3521 static void
3522 ShmemBackendArrayRemove(pid_t pid)
3523 {
3524         int                     i;
3525
3526         for (i = 0; i < NUM_BACKENDARRAY_ELEMS; i++)
3527         {
3528                 if (ShmemBackendArray[i].pid == pid)
3529                 {
3530                         /* Mark the slot as empty */
3531                         ShmemBackendArray[i].pid = 0;
3532                         return;
3533                 }
3534         }
3535
3536         ereport(WARNING,
3537                         (errmsg_internal("could not find backend entry with pid %d",
3538                                                          (int) pid)));
3539 }
3540 #endif   /* EXEC_BACKEND */
3541
3542
3543 #ifdef WIN32
3544
3545 static pid_t
3546 win32_forkexec(const char *path, char *argv[])
3547 {
3548         STARTUPINFO si;
3549         PROCESS_INFORMATION pi;
3550         int                     i;
3551         int                     j;
3552         char            cmdLine[MAXPGPATH * 2];
3553         HANDLE          childHandleCopy;
3554         HANDLE          waiterThread;
3555
3556         /* Format the cmd line */
3557         cmdLine[sizeof(cmdLine) - 1] = '\0';
3558         cmdLine[sizeof(cmdLine) - 2] = '\0';
3559         snprintf(cmdLine, sizeof(cmdLine) - 1, "\"%s\"", path);
3560         i = 0;
3561         while (argv[++i] != NULL)
3562         {
3563                 j = strlen(cmdLine);
3564                 snprintf(cmdLine + j, sizeof(cmdLine) - 1 - j, " \"%s\"", argv[i]);
3565         }
3566         if (cmdLine[sizeof(cmdLine) - 2] != '\0')
3567         {
3568                 elog(LOG, "subprocess command line too long");
3569                 return -1;
3570         }
3571
3572         memset(&pi, 0, sizeof(pi));
3573         memset(&si, 0, sizeof(si));
3574         si.cb = sizeof(si);
3575         if (!CreateProcess(NULL, cmdLine, NULL, NULL, TRUE, 0, NULL, NULL, &si, &pi))
3576         {
3577                 elog(LOG, "CreateProcess call failed (%d): %m", (int) GetLastError());
3578                 return -1;
3579         }
3580
3581         if (!IsUnderPostmaster)
3582         {
3583                 /* We are the Postmaster creating a child... */
3584                 win32_AddChild(pi.dwProcessId, pi.hProcess);
3585         }
3586
3587         if (DuplicateHandle(GetCurrentProcess(),
3588                                                 pi.hProcess,
3589                                                 GetCurrentProcess(),
3590                                                 &childHandleCopy,
3591                                                 0,
3592                                                 FALSE,
3593                                                 DUPLICATE_SAME_ACCESS) == 0)
3594                 ereport(FATAL,
3595                                 (errmsg_internal("could not duplicate child handle: %d",
3596                                                                  (int) GetLastError())));
3597
3598         waiterThread = CreateThread(NULL, 64 * 1024, win32_sigchld_waiter,
3599                                                                 (LPVOID) childHandleCopy, 0, NULL);
3600         if (!waiterThread)
3601                 ereport(FATAL,
3602                    (errmsg_internal("could not create sigchld waiter thread: %d",
3603                                                         (int) GetLastError())));
3604         CloseHandle(waiterThread);
3605
3606         if (IsUnderPostmaster)
3607                 CloseHandle(pi.hProcess);
3608         CloseHandle(pi.hThread);
3609
3610         return pi.dwProcessId;
3611 }
3612
3613 /*
3614  * Note: The following three functions must not be interrupted (eg. by
3615  * signals).  As the Postgres Win32 signalling architecture (currently)
3616  * requires polling, or APC checking functions which aren't used here, this
3617  * is not an issue.
3618  *
3619  * We keep two separate arrays, instead of a single array of pid/HANDLE
3620  * structs, to avoid having to re-create a handle array for
3621  * WaitForMultipleObjects on each call to win32_waitpid.
3622  */
3623
3624 static void
3625 win32_AddChild(pid_t pid, HANDLE handle)
3626 {
3627         Assert(win32_childPIDArray && win32_childHNDArray);
3628         if (win32_numChildren < NUM_BACKENDARRAY_ELEMS)
3629         {
3630                 win32_childPIDArray[win32_numChildren] = pid;
3631                 win32_childHNDArray[win32_numChildren] = handle;
3632                 ++win32_numChildren;
3633         }
3634         else
3635                 ereport(FATAL,
3636                                 (errmsg_internal("no room for child entry with pid %lu",
3637                                                                  (unsigned long) pid)));
3638 }
3639
3640 static void
3641 win32_RemoveChild(pid_t pid)
3642 {
3643         int                     i;
3644
3645         Assert(win32_childPIDArray && win32_childHNDArray);
3646
3647         for (i = 0; i < win32_numChildren; i++)
3648         {
3649                 if (win32_childPIDArray[i] == pid)
3650                 {
3651                         CloseHandle(win32_childHNDArray[i]);
3652
3653                         /* Swap last entry into the "removed" one */
3654                         --win32_numChildren;
3655                         win32_childPIDArray[i] = win32_childPIDArray[win32_numChildren];
3656                         win32_childHNDArray[i] = win32_childHNDArray[win32_numChildren];
3657                         return;
3658                 }
3659         }
3660
3661         ereport(WARNING,
3662                         (errmsg_internal("could not find child entry with pid %lu",
3663                                                          (unsigned long) pid)));
3664 }
3665
3666 static pid_t
3667 win32_waitpid(int *exitstatus)
3668 {
3669         /*
3670          * Note: Do NOT use WaitForMultipleObjectsEx, as we don't want to run
3671          * queued APCs here.
3672          */
3673         int                     index;
3674         DWORD           exitCode;
3675         DWORD           ret;
3676         unsigned long offset;
3677
3678         Assert(win32_childPIDArray && win32_childHNDArray);
3679         elog(DEBUG3, "waiting on %lu children", win32_numChildren);
3680
3681         for (offset = 0; offset < win32_numChildren; offset += MAXIMUM_WAIT_OBJECTS)
3682         {
3683                 unsigned long num = Min(MAXIMUM_WAIT_OBJECTS, win32_numChildren - offset);
3684
3685                 ret = WaitForMultipleObjects(num, &win32_childHNDArray[offset], FALSE, 0);
3686                 switch (ret)
3687                 {
3688                         case WAIT_FAILED:
3689                                 ereport(LOG,
3690                                                 (errmsg_internal("failed to wait on %lu of %lu children: %d",
3691                                                  num, win32_numChildren, (int) GetLastError())));
3692                                 return -1;
3693
3694                         case WAIT_TIMEOUT:
3695                                 /* No children (in this chunk) have finished */
3696                                 break;
3697
3698                         default:
3699
3700                                 /*
3701                                  * Get the exit code, and return the PID of, the
3702                                  * respective process
3703                                  */
3704                                 index = offset + ret - WAIT_OBJECT_0;
3705                                 Assert(index >= 0 && index < win32_numChildren);
3706                                 if (!GetExitCodeProcess(win32_childHNDArray[index], &exitCode))
3707                                 {
3708                                         /*
3709                                          * If we get this far, this should never happen, but,
3710                                          * then again... No choice other than to assume a
3711                                          * catastrophic failure.
3712                                          */
3713                                         ereport(FATAL,
3714                                                         (errmsg_internal("failed to get exit code for child %lu",
3715                                                                                    win32_childPIDArray[index])));
3716                                 }
3717                                 *exitstatus = (int) exitCode;
3718                                 return win32_childPIDArray[index];
3719                 }
3720         }
3721
3722         /* No children have finished */
3723         return -1;
3724 }
3725
3726 /*
3727  * Note! Code below executes on separate threads, one for
3728  * each child process created
3729  */
3730 static DWORD WINAPI
3731 win32_sigchld_waiter(LPVOID param)
3732 {
3733         HANDLE          procHandle = (HANDLE) param;
3734
3735         DWORD           r = WaitForSingleObject(procHandle, INFINITE);
3736
3737         if (r == WAIT_OBJECT_0)
3738                 pg_queue_signal(SIGCHLD);
3739         else
3740                 write_stderr("could not wait on child process handle: error code %d\n",
3741                                          (int) GetLastError());
3742         CloseHandle(procHandle);
3743         return 0;
3744 }
3745
3746 #endif   /* WIN32 */