]> granicus.if.org Git - postgresql/blob - src/backend/postmaster/postmaster.c
Here's a patch implementing the "thread method" to workaround the bug
[postgresql] / src / backend / postmaster / postmaster.c
1 /*-------------------------------------------------------------------------
2  *
3  * postmaster.c
4  *        This program acts as a clearing house for requests to the
5  *        POSTGRES system.      Frontend programs send a startup message
6  *        to the Postmaster and the postmaster uses the info in the
7  *        message to setup a backend process.
8  *
9  *        The postmaster also manages system-wide operations such as
10  *        startup, shutdown, and periodic checkpoints.  The postmaster
11  *        itself doesn't do those operations, mind you --- it just forks
12  *        off a subprocess to do them at the right times.  It also takes
13  *        care of resetting the system if a backend crashes.
14  *
15  *        The postmaster process creates the shared memory and semaphore
16  *        pools during startup, but as a rule does not touch them itself.
17  *        In particular, it is not a member of the PGPROC array of backends
18  *        and so it cannot participate in lock-manager operations.      Keeping
19  *        the postmaster away from shared memory operations makes it simpler
20  *        and more reliable.  The postmaster is almost always able to recover
21  *        from crashes of individual backends by resetting shared memory;
22  *        if it did much with shared memory then it would be prone to crashing
23  *        along with the backends.
24  *
25  *        When a request message is received, we now fork() immediately.
26  *        The child process performs authentication of the request, and
27  *        then becomes a backend if successful.  This allows the auth code
28  *        to be written in a simple single-threaded style (as opposed to the
29  *        crufty "poor man's multitasking" code that used to be needed).
30  *        More importantly, it ensures that blockages in non-multithreaded
31  *        libraries like SSL or PAM cannot cause denial of service to other
32  *        clients.
33  *
34  *
35  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
36  * Portions Copyright (c) 1994, Regents of the University of California
37  *
38  *
39  * IDENTIFICATION
40  *        $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.377 2004/03/24 04:04:51 momjian Exp $
41  *
42  * NOTES
43  *
44  * Initialization:
45  *              The Postmaster sets up a few shared memory data structures
46  *              for the backends.  It should at the very least initialize the
47  *              lock manager.
48  *
49  * Synchronization:
50  *              The Postmaster shares memory with the backends but should avoid
51  *              touching shared memory, so as not to become stuck if a crashing
52  *              backend screws up locks or shared memory.  Likewise, the Postmaster
53  *              should never block on messages from frontend clients.
54  *
55  * Garbage Collection:
56  *              The Postmaster cleans up after backends if they have an emergency
57  *              exit and/or core dump.
58  *
59  *-------------------------------------------------------------------------
60  */
61
62 #include "postgres.h"
63
64 #include <unistd.h>
65 #include <signal.h>
66 #include <sys/wait.h>
67 #include <ctype.h>
68 #include <sys/stat.h>
69 #include <sys/time.h>
70 #include <sys/socket.h>
71 #include <errno.h>
72 #include <fcntl.h>
73 #include <time.h>
74 #include <sys/param.h>
75 #include <netinet/in.h>
76 #include <arpa/inet.h>
77 #include <netdb.h>
78 #include <limits.h>
79
80 #ifdef HAVE_SYS_SELECT_H
81 #include <sys/select.h>
82 #endif
83
84 #ifdef HAVE_GETOPT_H
85 #include <getopt.h>
86 #endif
87
88 #ifdef USE_RENDEZVOUS
89 #include <DNSServiceDiscovery/DNSServiceDiscovery.h>
90 #endif
91
92 #include "catalog/pg_database.h"
93 #include "commands/async.h"
94 #include "lib/dllist.h"
95 #include "libpq/auth.h"
96 #include "libpq/crypt.h"
97 #include "libpq/libpq.h"
98 #include "libpq/pqcomm.h"
99 #include "libpq/pqsignal.h"
100 #include "miscadmin.h"
101 #include "nodes/nodes.h"
102 #include "storage/fd.h"
103 #include "storage/ipc.h"
104 #include "storage/pg_shmem.h"
105 #include "storage/pmsignal.h"
106 #include "storage/proc.h"
107 #include "storage/bufmgr.h"
108 #include "access/xlog.h"
109 #include "tcop/tcopprot.h"
110 #include "utils/guc.h"
111 #include "utils/memutils.h"
112 #include "utils/ps_status.h"
113 #include "bootstrap/bootstrap.h"
114 #include "pgstat.h"
115
116
117 #define INVALID_SOCK    (-1)
118
119 #ifdef HAVE_SIGPROCMASK
120 sigset_t        UnBlockSig,
121                         BlockSig,
122                         AuthBlockSig;
123
124 #else
125 int                     UnBlockSig,
126                         BlockSig,
127                         AuthBlockSig;
128 #endif
129
130 /*
131  * List of active backends (or child processes anyway; we don't actually
132  * know whether a given child has become a backend or is still in the
133  * authorization phase).  This is used mainly to keep track of how many
134  * children we have and send them appropriate signals when necessary.
135  */
136 typedef struct bkend
137 {
138         pid_t           pid;                    /* process id of backend */
139         long            cancel_key;             /* cancel key for cancels for this backend */
140 } Backend;
141
142 static Dllist *BackendList;
143
144 #ifdef EXEC_BACKEND
145 #define NUM_BACKENDARRAY_ELEMS (2*MaxBackends)
146 static Backend *ShmemBackendArray;
147 #endif
148
149 /* The socket number we are listening for connections on */
150 int                     PostPortNumber;
151 char       *UnixSocketDir;
152 char       *ListenAddresses;
153
154 /*
155  * MaxBackends is the limit on the number of backends we can start.
156  * Note that a larger MaxBackends value will increase the size of the
157  * shared memory area as well as cause the postmaster to grab more
158  * kernel semaphores, even if you never actually use that many
159  * backends.
160  */
161 int                     MaxBackends;
162
163 /*
164  * ReservedBackends is the number of backends reserved for superuser use.
165  * This number is taken out of the pool size given by MaxBackends so
166  * number of backend slots available to non-superusers is
167  * (MaxBackends - ReservedBackends).  Note what this really means is
168  * "if there are <= ReservedBackends connections available, only superusers
169  * can make new connections" --- pre-existing superuser connections don't
170  * count against the limit.
171  */
172 int                     ReservedBackends;
173
174
175 static char *progname = NULL;
176
177 /* The socket(s) we're listening to. */
178 #define MAXLISTEN       10
179 static int      ListenSocket[MAXLISTEN];
180
181 /* Used to reduce macros tests */
182 #ifdef EXEC_BACKEND
183 const bool      ExecBackend = true;
184
185 #else
186 const bool      ExecBackend = false;
187 #endif
188
189 /*
190  * Set by the -o option
191  */
192 static char ExtraOptions[MAXPGPATH];
193
194 /*
195  * These globals control the behavior of the postmaster in case some
196  * backend dumps core.  Normally, it kills all peers of the dead backend
197  * and reinitializes shared memory.  By specifying -s or -n, we can have
198  * the postmaster stop (rather than kill) peers and not reinitialize
199  * shared data structures.
200  */
201 static bool Reinit = true;
202 static int      SendStop = false;
203
204 /* still more option variables */
205 bool            EnableSSL = false;
206 bool            SilentMode = false; /* silent mode (-S) */
207
208 int                     PreAuthDelay = 0;
209 int                     AuthenticationTimeout = 60;
210 int                     CheckPointTimeout = 300;
211 int                     CheckPointWarning = 30;
212 time_t          LastSignalledCheckpoint = 0;
213
214 bool            log_hostname;           /* for ps display and logging */
215 bool            Log_connections = false;
216 bool            Db_user_namespace = false;
217
218 char       *rendezvous_name;
219
220 /* list of library:init-function to be preloaded */
221 char       *preload_libraries_string = NULL;
222
223 /* Startup/shutdown state */
224 static pid_t StartupPID = 0,
225                         ShutdownPID = 0,
226                         CheckPointPID = 0,
227                         BgWriterPID = 0;
228 static time_t checkpointed = 0;
229
230 #define                 NoShutdown              0
231 #define                 SmartShutdown   1
232 #define                 FastShutdown    2
233
234 static int      Shutdown = NoShutdown;
235
236 static bool FatalError = false; /* T if recovering from backend crash */
237
238 bool            ClientAuthInProgress = false;           /* T during new-client
239                                                                                                  * authentication */
240
241 /*
242  * State for assigning random salts and cancel keys.
243  * Also, the global MyCancelKey passes the cancel key assigned to a given
244  * backend from the postmaster to that backend (via fork).
245  */
246
247 static unsigned int random_seed = 0;
248
249 static int      debug_flag = 0;
250
251 extern char *optarg;
252 extern int      optind,
253                         opterr;
254
255 #ifdef HAVE_INT_OPTRESET
256 extern int      optreset;
257 #endif
258
259 /*
260  * postmaster.c - function prototypes
261  */
262 static void pmdaemonize(int argc, char *argv[]);
263 static Port *ConnCreate(int serverFd);
264 static void ConnFree(Port *port);
265 static void reset_shared(unsigned short port);
266 static void SIGHUP_handler(SIGNAL_ARGS);
267 static void pmdie(SIGNAL_ARGS);
268 static void reaper(SIGNAL_ARGS);
269 static void sigusr1_handler(SIGNAL_ARGS);
270 static void dummy_handler(SIGNAL_ARGS);
271 static void CleanupProc(int pid, int exitstatus);
272 static void LogChildExit(int lev, const char *procname,
273                          int pid, int exitstatus);
274 static void BackendInit(Port *port);
275 static int  BackendRun(Port *port);
276 static void ExitPostmaster(int status);
277 static void usage(const char *);
278 static int      ServerLoop(void);
279 static int      BackendStartup(Port *port);
280 static int      ProcessStartupPacket(Port *port, bool SSLdone);
281 static void processCancelRequest(Port *port, void *pkt);
282 static int      initMasks(fd_set *rmask);
283 static void report_fork_failure_to_client(Port *port, int errnum);
284 static enum CAC_state canAcceptConnections(void);
285 static long PostmasterRandom(void);
286 static void RandomSalt(char *cryptSalt, char *md5Salt);
287 static void SignalChildren(int signal);
288 static int      CountChildren(void);
289 static bool CreateOptsFile(int argc, char *argv[]);
290 NON_EXEC_STATIC void SSDataBaseInit(int xlop);
291 static pid_t SSDataBase(int xlop);
292 static void
293 postmaster_error(const char *fmt,...)
294 /* This lets gcc check the format string for consistency. */
295 __attribute__((format(printf, 1, 2)));
296
297 #ifdef EXEC_BACKEND
298 #ifdef WIN32
299 pid_t win32_forkexec(const char* path, char *argv[]);
300
301 static void  win32_AddChild(pid_t pid, HANDLE handle);
302 static void  win32_RemoveChild(pid_t pid);
303 static pid_t win32_waitpid(int *exitstatus);
304 static DWORD WINAPI win32_sigchld_waiter(LPVOID param);
305
306 static pid_t  *win32_childPIDArray;
307 static HANDLE *win32_childHNDArray;
308 static unsigned long win32_numChildren = 0;
309 #endif
310
311 static pid_t Backend_forkexec(Port *port);
312
313 static unsigned long tmpBackendFileNum = 0;
314 void read_backend_variables(unsigned long id, Port *port);
315 static bool write_backend_variables(Port *port);
316
317 size_t          ShmemBackendArraySize(void);
318 void            ShmemBackendArrayAllocation(void);
319 static void     ShmemBackendArrayAdd(Backend *bn);
320 static void ShmemBackendArrayRemove(pid_t pid);
321 #endif
322
323 #define StartupDataBase()               SSDataBase(BS_XLOG_STARTUP)
324 #define CheckPointDataBase()    SSDataBase(BS_XLOG_CHECKPOINT)
325 #define StartBackgroundWriter() SSDataBase(BS_XLOG_BGWRITER)
326 #define ShutdownDataBase()              SSDataBase(BS_XLOG_SHUTDOWN)
327
328
329 static void
330 checkDataDir(const char *checkdir)
331 {
332         char            path[MAXPGPATH];
333         FILE       *fp;
334         struct stat stat_buf;
335
336         if (checkdir == NULL)
337         {
338                 fprintf(stderr,
339                                 gettext("%s does not know where to find the database system data.\n"
340                                                 "You must specify the directory that contains the database system\n"
341                                                 "either by specifying the -D invocation option or by setting the\n"
342                                                 "PGDATA environment variable.\n"),
343                                 progname);
344                 ExitPostmaster(2);
345         }
346
347         if (stat(checkdir, &stat_buf) == -1)
348         {
349                 if (errno == ENOENT)
350                         ereport(FATAL,
351                                         (errcode_for_file_access(),
352                                          errmsg("data directory \"%s\" does not exist",
353                                                         checkdir)));
354                 else
355                         ereport(FATAL,
356                                         (errcode_for_file_access(),
357                          errmsg("could not read permissions of directory \"%s\": %m",
358                                         checkdir)));
359         }
360
361         /*
362          * Check if the directory has group or world access.  If so, reject.
363          *
364          * XXX temporarily suppress check when on Windows, because there may not
365          * be proper support for Unix-y file permissions.  Need to think of a
366          * reasonable check to apply on Windows.
367          */
368 #if !defined(__CYGWIN__) && !defined(WIN32)
369         if (stat_buf.st_mode & (S_IRWXG | S_IRWXO))
370                 ereport(FATAL,
371                                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
372                                  errmsg("data directory \"%s\" has group or world access",
373                                                 checkdir),
374                                  errdetail("Permissions should be u=rwx (0700).")));
375 #endif
376
377         /* Look for PG_VERSION before looking for pg_control */
378         ValidatePgVersion(checkdir);
379
380         snprintf(path, sizeof(path), "%s/global/pg_control", checkdir);
381
382         fp = AllocateFile(path, PG_BINARY_R);
383         if (fp == NULL)
384         {
385                 fprintf(stderr,
386                                 gettext("%s: could not find the database system\n"
387                                                 "Expected to find it in the directory \"%s\",\n"
388                                                 "but could not open file \"%s\": %s\n"),
389                                 progname, checkdir, path, strerror(errno));
390                 ExitPostmaster(2);
391         }
392         FreeFile(fp);
393 }
394
395
396 #ifdef USE_RENDEZVOUS
397
398 /* reg_reply -- empty callback function for DNSServiceRegistrationCreate() */
399 static void
400 reg_reply(DNSServiceRegistrationReplyErrorType errorCode, void *context)
401 {
402
403 }
404 #endif
405
406 int
407 PostmasterMain(int argc, char *argv[])
408 {
409         int                     opt;
410         int                     status;
411         char            original_extraoptions[MAXPGPATH];
412         char       *potential_DataDir = NULL;
413         int                     i;
414
415         *original_extraoptions = '\0';
416
417         progname = argv[0];
418
419         IsPostmasterEnvironment = true;
420
421         /*
422          * Catch standard options before doing much else.  This even works on
423          * systems without getopt_long.
424          */
425         if (argc > 1)
426         {
427                 if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
428                 {
429                         usage(progname);
430                         ExitPostmaster(0);
431                 }
432                 if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
433                 {
434                         puts("postmaster (PostgreSQL) " PG_VERSION);
435                         ExitPostmaster(0);
436                 }
437         }
438
439         /*
440          * for security, no dir or file created can be group or other
441          * accessible
442          */
443         umask((mode_t) 0077);
444
445         MyProcPid = PostmasterPid = getpid();
446
447         /*
448          * Fire up essential subsystems: memory management
449          */
450         MemoryContextInit();
451
452         /*
453          * By default, palloc() requests in the postmaster will be allocated
454          * in the PostmasterContext, which is space that can be recycled by
455          * backends.  Allocated data that needs to be available to backends
456          * should be allocated in TopMemoryContext.
457          */
458         PostmasterContext = AllocSetContextCreate(TopMemoryContext,
459                                                                                           "Postmaster",
460                                                                                           ALLOCSET_DEFAULT_MINSIZE,
461                                                                                           ALLOCSET_DEFAULT_INITSIZE,
462                                                                                           ALLOCSET_DEFAULT_MAXSIZE);
463         MemoryContextSwitchTo(PostmasterContext);
464
465         IgnoreSystemIndexes(false);
466
467         /*
468          * Options setup
469          */
470         InitializeGUCOptions();
471
472         potential_DataDir = getenv("PGDATA");           /* default value */
473
474         opterr = 1;
475
476         while ((opt = getopt(argc, argv, "A:a:B:b:c:D:d:Fh:ik:lm:MN:no:p:Ss-:")) != -1)
477         {
478                 switch (opt)
479                 {
480                         case 'A':
481 #ifdef USE_ASSERT_CHECKING
482                                 SetConfigOption("debug_assertions", optarg, PGC_POSTMASTER, PGC_S_ARGV);
483 #else
484                                 postmaster_error("assert checking is not compiled in");
485 #endif
486                                 break;
487                         case 'a':
488                                 /* Can no longer set authentication method. */
489                                 break;
490                         case 'B':
491                                 SetConfigOption("shared_buffers", optarg, PGC_POSTMASTER, PGC_S_ARGV);
492                                 break;
493                         case 'b':
494                                 /* Can no longer set the backend executable file to use. */
495                                 break;
496                         case 'D':
497                                 potential_DataDir = optarg;
498                                 break;
499                         case 'd':
500                                 {
501                                         /* Turn on debugging for the postmaster. */
502                                         char       *debugstr = palloc(strlen("debug") + strlen(optarg) + 1);
503
504                                         sprintf(debugstr, "debug%s", optarg);
505                                         SetConfigOption("log_min_messages", debugstr,
506                                                                         PGC_POSTMASTER, PGC_S_ARGV);
507                                         pfree(debugstr);
508                                         debug_flag = atoi(optarg);
509                                         break;
510                                 }
511                         case 'F':
512                                 SetConfigOption("fsync", "false", PGC_POSTMASTER, PGC_S_ARGV);
513                                 break;
514                         case 'h':
515                                 SetConfigOption("listen_addresses", optarg, PGC_POSTMASTER, PGC_S_ARGV);
516                                 break;
517                         case 'i':
518                                 SetConfigOption("listen_addresses", "*", PGC_POSTMASTER, PGC_S_ARGV);
519                                 break;
520                         case 'k':
521                                 SetConfigOption("unix_socket_directory", optarg, PGC_POSTMASTER, PGC_S_ARGV);
522                                 break;
523 #ifdef USE_SSL
524                         case 'l':
525                                 SetConfigOption("ssl", "true", PGC_POSTMASTER, PGC_S_ARGV);
526                                 break;
527 #endif
528                         case 'm':
529                                 /* Multiplexed backends no longer supported. */
530                                 break;
531                         case 'M':
532
533                                 /*
534                                  * ignore this flag.  This may be passed in because the
535                                  * program was run as 'postgres -M' instead of
536                                  * 'postmaster'
537                                  */
538                                 break;
539                         case 'N':
540                                 /* The max number of backends to start. */
541                                 SetConfigOption("max_connections", optarg, PGC_POSTMASTER, PGC_S_ARGV);
542                                 break;
543                         case 'n':
544                                 /* Don't reinit shared mem after abnormal exit */
545                                 Reinit = false;
546                                 break;
547                         case 'o':
548
549                                 /*
550                                  * Other options to pass to the backend on the command
551                                  * line -- useful only for debugging.
552                                  */
553                                 strcat(ExtraOptions, " ");
554                                 strcat(ExtraOptions, optarg);
555                                 strcpy(original_extraoptions, optarg);
556                                 break;
557                         case 'p':
558                                 SetConfigOption("port", optarg, PGC_POSTMASTER, PGC_S_ARGV);
559                                 break;
560                         case 'S':
561
562                                 /*
563                                  * Start in 'S'ilent mode (disassociate from controlling
564                                  * tty). You may also think of this as 'S'ysV mode since
565                                  * it's most badly needed on SysV-derived systems like
566                                  * SVR4 and HP-UX.
567                                  */
568                                 SetConfigOption("silent_mode", "true", PGC_POSTMASTER, PGC_S_ARGV);
569                                 break;
570                         case 's':
571
572                                 /*
573                                  * In the event that some backend dumps core, send
574                                  * SIGSTOP, rather than SIGQUIT, to all its peers.      This
575                                  * lets the wily post_hacker collect core dumps from
576                                  * everyone.
577                                  */
578                                 SendStop = true;
579                                 break;
580                         case 'c':
581                         case '-':
582                                 {
583                                         char       *name,
584                                                            *value;
585
586                                         ParseLongOption(optarg, &name, &value);
587                                         if (!value)
588                                         {
589                                                 if (opt == '-')
590                                                         ereport(ERROR,
591                                                                         (errcode(ERRCODE_SYNTAX_ERROR),
592                                                                          errmsg("--%s requires a value",
593                                                                                         optarg)));
594                                                 else
595                                                         ereport(ERROR,
596                                                                         (errcode(ERRCODE_SYNTAX_ERROR),
597                                                                          errmsg("-c %s requires a value",
598                                                                                         optarg)));
599                                         }
600
601                                         SetConfigOption(name, value, PGC_POSTMASTER, PGC_S_ARGV);
602                                         free(name);
603                                         if (value)
604                                                 free(value);
605                                         break;
606                                 }
607
608                         default:
609                                 fprintf(stderr,
610                                           gettext("Try \"%s --help\" for more information.\n"),
611                                                 progname);
612                                 ExitPostmaster(1);
613                 }
614         }
615
616         /*
617          * Postmaster accepts no non-option switch arguments.
618          */
619         if (optind < argc)
620         {
621                 postmaster_error("invalid argument: \"%s\"", argv[optind]);
622                 fprintf(stderr,
623                                 gettext("Try \"%s --help\" for more information.\n"),
624                                 progname);
625                 ExitPostmaster(1);
626         }
627
628         /*
629          * Now we can set the data directory, and then read postgresql.conf.
630          */
631         checkDataDir(potential_DataDir);        /* issues error messages */
632         SetDataDir(potential_DataDir);
633
634         ProcessConfigFile(PGC_POSTMASTER);
635 #ifdef EXEC_BACKEND
636         write_nondefault_variables(PGC_POSTMASTER);
637 #endif
638
639         /*
640          * Check for invalid combinations of GUC settings.
641          */
642         if (NBuffers < 2 * MaxBackends || NBuffers < 16)
643         {
644                 /*
645                  * Do not accept -B so small that backends are likely to starve
646                  * for lack of buffers.  The specific choices here are somewhat
647                  * arbitrary.
648                  */
649                 postmaster_error("the number of buffers (-B) must be at least twice the number of allowed connections (-N) and at least 16");
650                 ExitPostmaster(1);
651         }
652
653         if (ReservedBackends >= MaxBackends)
654         {
655                 postmaster_error("superuser_reserved_connections must be less than max_connections");
656                 ExitPostmaster(1);
657         }
658
659         /*
660          * Other one-time internal sanity checks can go here.
661          */
662         if (!CheckDateTokenTables())
663         {
664                 postmaster_error("invalid datetoken tables, please fix");
665                 ExitPostmaster(1);
666         }
667
668         /*
669          * Now that we are done processing the postmaster arguments, reset
670          * getopt(3) library so that it will work correctly in subprocesses.
671          */
672         optind = 1;
673 #ifdef HAVE_INT_OPTRESET
674         optreset = 1;                           /* some systems need this too */
675 #endif
676
677         /* For debugging: display postmaster environment */
678         {
679                 extern char **environ;
680                 char      **p;
681
682                 ereport(DEBUG3,
683                                 (errmsg_internal("%s: PostmasterMain: initial environ dump:",
684                                                                  progname)));
685                 ereport(DEBUG3,
686                                 (errmsg_internal("-----------------------------------------")));
687                 for (p = environ; *p; ++p)
688                         ereport(DEBUG3,
689                                         (errmsg_internal("\t%s", *p)));
690                 ereport(DEBUG3,
691                                 (errmsg_internal("-----------------------------------------")));
692         }
693
694         /*
695          * On some systems our dynloader code needs the executable's pathname.
696          */
697         if (FindExec(pg_pathname, progname, "postgres") < 0)
698                 ereport(FATAL,
699                                 (errmsg("%s: could not locate postgres executable",
700                                                 progname)));
701
702         /*
703          * Initialize SSL library, if specified.
704          */
705 #ifdef USE_SSL
706         if (EnableSSL)
707                 secure_initialize();
708 #endif
709
710         /*
711          * process any libraries that should be preloaded and optionally
712          * pre-initialized
713          */
714         if (preload_libraries_string)
715                 process_preload_libraries(preload_libraries_string);
716
717         /*
718          * Fork away from controlling terminal, if -S specified.
719          *
720          * Must do this before we grab any interlock files, else the interlocks
721          * will show the wrong PID.
722          */
723         if (SilentMode)
724                 pmdaemonize(argc, argv);
725
726         /*
727          * Create lockfile for data directory.
728          *
729          * We want to do this before we try to grab the input sockets, because
730          * the data directory interlock is more reliable than the socket-file
731          * interlock (thanks to whoever decided to put socket files in /tmp
732          * :-(). For the same reason, it's best to grab the TCP socket before
733          * the Unix socket.
734          */
735         CreateDataDirLockFile(DataDir, true);
736
737         /*
738          * Remove old temporary files.  At this point there can be no other
739          * Postgres processes running in this directory, so this should be
740          * safe.
741          */
742         RemovePgTempFiles();
743
744         /*
745          * Establish input sockets.
746          */
747         for (i = 0; i < MAXLISTEN; i++)
748                 ListenSocket[i] = -1;
749
750         if (ListenAddresses)
751         {
752                 char       *curhost,
753                                    *endptr;
754                 char            c;
755
756                 curhost = ListenAddresses;
757                 for (;;)
758                 {
759                         /* ignore whitespace */
760                         while (isspace((unsigned char) *curhost))
761                                 curhost++;
762                         if (*curhost == '\0')
763                                 break;
764                         endptr = curhost;
765                         while (*endptr != '\0' && !isspace((unsigned char) *endptr))
766                                 endptr++;
767                         c = *endptr;
768                         *endptr = '\0';
769                         if (strcmp(curhost,"*") == 0)
770                                 status = StreamServerPort(AF_UNSPEC, NULL,
771                                                                                   (unsigned short) PostPortNumber,
772                                                                                   UnixSocketDir,
773                                                                                   ListenSocket, MAXLISTEN);
774                         else
775                                 status = StreamServerPort(AF_UNSPEC, curhost,
776                                                                                   (unsigned short) PostPortNumber,
777                                                                                   UnixSocketDir,
778                                                                                   ListenSocket, MAXLISTEN);
779                         if (status != STATUS_OK)
780                                 ereport(WARNING,
781                                                 (errmsg("could not create listen socket for \"%s\"",
782                                                                 curhost)));
783                         *endptr = c;
784                         if (c != '\0')
785                                 curhost = endptr+1;
786                         else
787                                 break;
788                 }
789         }
790
791 #ifdef USE_RENDEZVOUS
792         /* Register for Rendezvous only if we opened TCP socket(s) */
793         if (ListenSocket[0] != -1 && rendezvous_name != NULL)
794         {
795                 DNSServiceRegistrationCreate(rendezvous_name,
796                                                                          "_postgresql._tcp.",
797                                                                          "",
798                                                                          htonl(PostPortNumber),
799                                                                          "",
800                                                                          (DNSServiceRegistrationReply) reg_reply,
801                                                                          NULL);
802         }
803 #endif
804
805 #ifdef HAVE_UNIX_SOCKETS
806         status = StreamServerPort(AF_UNIX, NULL,
807                                                           (unsigned short) PostPortNumber,
808                                                           UnixSocketDir,
809                                                           ListenSocket, MAXLISTEN);
810         if (status != STATUS_OK)
811                 ereport(WARNING,
812                                 (errmsg("could not create Unix-domain socket")));
813 #endif
814
815         /*
816          * check that we have some socket to listen on
817          */
818         if (ListenSocket[0] == -1)
819                 ereport(FATAL,
820                                 (errmsg("no socket configured to listen on")));
821
822         XLOGPathInit();
823
824         /*
825          * Set up shared memory and semaphores.
826          */
827         reset_shared(PostPortNumber);
828
829         /*
830          * Estimate number of openable files.  This must happen after setting up
831          * semaphores, because on some platforms semaphores count as open files.
832          */
833         set_max_safe_fds();
834
835         /*
836          * Initialize the list of active backends.
837          */
838         BackendList = DLNewList();
839
840 #ifdef WIN32
841         /*
842          * Initialize the child pid/HANDLE arrays
843          */
844         win32_childPIDArray = (pid_t*)malloc(NUM_BACKENDARRAY_ELEMS*sizeof(pid_t));
845         win32_childHNDArray = (HANDLE*)malloc(NUM_BACKENDARRAY_ELEMS*sizeof(HANDLE));
846         if (!win32_childPIDArray || !win32_childHNDArray)
847                 ereport(FATAL,
848                                 (errcode(ERRCODE_OUT_OF_MEMORY),
849                                  errmsg("out of memory")));
850 #endif
851
852         /*
853          * Record postmaster options.  We delay this till now to avoid
854          * recording bogus options (eg, NBuffers too high for available
855          * memory).
856          */
857         if (!CreateOptsFile(argc, argv))
858                 ExitPostmaster(1);
859
860         /*
861          * Set up signal handlers for the postmaster process.
862          *
863          * CAUTION: when changing this list, check for side-effects on the signal
864          * handling setup of child processes.  See tcop/postgres.c,
865          * bootstrap/bootstrap.c, and postmaster/pgstat.c.
866          */
867         pqinitmask();
868         PG_SETMASK(&BlockSig);
869
870         pqsignal(SIGHUP, SIGHUP_handler);       /* reread config file and have
871                                                                                  * children do same */
872         pqsignal(SIGINT, pmdie);        /* send SIGTERM and ShutdownDataBase */
873         pqsignal(SIGQUIT, pmdie);       /* send SIGQUIT and die */
874         pqsignal(SIGTERM, pmdie);       /* wait for children and ShutdownDataBase */
875         pqsignal(SIGALRM, SIG_IGN); /* ignored */
876         pqsignal(SIGPIPE, SIG_IGN); /* ignored */
877         pqsignal(SIGUSR1, sigusr1_handler); /* message from child process */
878         pqsignal(SIGUSR2, dummy_handler);       /* unused, reserve for children */
879         pqsignal(SIGCHLD, reaper);      /* handle child termination */
880         pqsignal(SIGTTIN, SIG_IGN); /* ignored */
881         pqsignal(SIGTTOU, SIG_IGN); /* ignored */
882         /* ignore SIGXFSZ, so that ulimit violations work like disk full */
883 #ifdef SIGXFSZ
884         pqsignal(SIGXFSZ, SIG_IGN); /* ignored */
885 #endif
886
887         /*
888          * Reset whereToSendOutput from Debug (its starting state) to None.
889          * This prevents ereport from sending log messages to stderr unless
890          * the syslog/stderr switch permits.  We don't do this until the
891          * postmaster is fully launched, since startup failures may as well be
892          * reported to stderr.
893          */
894         whereToSendOutput = None;
895
896         /*
897          * On many platforms, the first call of localtime() incurs significant
898          * overhead to load timezone info from the system configuration files.
899          * By doing it once in the postmaster, we avoid having to do it in
900          * every started child process.  The savings are not huge, but they
901          * add up...
902          */
903         {
904                 time_t          now = time(NULL);
905
906                 (void) localtime(&now);
907         }
908
909         /*
910          * Initialize and try to startup the statistics collector process
911          */
912         pgstat_init();
913         pgstat_start();
914
915         /*
916          * Load cached files for client authentication.
917          */
918         load_hba();
919         load_ident();
920         load_user();
921         load_group();
922
923         /*
924          * We're ready to rock and roll...
925          */
926         StartupPID = StartupDataBase();
927
928         status = ServerLoop();
929
930         /*
931          * ServerLoop probably shouldn't ever return, but if it does, close
932          * down.
933          */
934         ExitPostmaster(status != STATUS_OK);
935
936         return 0;                                       /* not reached */
937 }
938
939 static void
940 pmdaemonize(int argc, char *argv[])
941 {
942 #ifdef WIN32
943         /* not supported */
944         elog(FATAL,"SilentMode not supported under WIN32");
945 #else
946         int                     i;
947         pid_t           pid;
948
949 #ifdef LINUX_PROFILE
950         struct itimerval prof_itimer;
951 #endif
952
953 #ifdef LINUX_PROFILE
954         /* see comments in BackendRun */
955         getitimer(ITIMER_PROF, &prof_itimer);
956 #endif
957
958         pid = fork();
959         if (pid == (pid_t) -1)
960         {
961                 postmaster_error("could not fork background process: %s",
962                                                  strerror(errno));
963                 ExitPostmaster(1);
964         }
965         else if (pid)
966         {                                                       /* parent */
967                 /* Parent should just exit, without doing any atexit cleanup */
968                 _exit(0);
969         }
970
971 #ifdef LINUX_PROFILE
972         setitimer(ITIMER_PROF, &prof_itimer, NULL);
973 #endif
974
975         MyProcPid = getpid();           /* reset MyProcPid to child */
976
977 /* GH: If there's no setsid(), we hopefully don't need silent mode.
978  * Until there's a better solution.
979  */
980 #ifdef HAVE_SETSID
981         if (setsid() < 0)
982         {
983                 postmaster_error("could not dissociate from controlling TTY: %s",
984                                                  strerror(errno));
985                 ExitPostmaster(1);
986         }
987 #endif
988         i = open(NULL_DEV, O_RDWR | PG_BINARY);
989         dup2(i, 0);
990         dup2(i, 1);
991         dup2(i, 2);
992         close(i);
993 #endif
994 }
995
996
997
998 /*
999  * Print out help message
1000  */
1001 static void
1002 usage(const char *progname)
1003 {
1004         printf(gettext("%s is the PostgreSQL server.\n\n"), progname);
1005         printf(gettext("Usage:\n  %s [OPTION]...\n\n"), progname);
1006         printf(gettext("Options:\n"));
1007 #ifdef USE_ASSERT_CHECKING
1008         printf(gettext("  -A 1|0          enable/disable run-time assert checking\n"));
1009 #endif
1010         printf(gettext("  -B NBUFFERS     number of shared buffers\n"));
1011         printf(gettext("  -c NAME=VALUE   set run-time parameter\n"));
1012         printf(gettext("  -d 1-5          debugging level\n"));
1013         printf(gettext("  -D DATADIR      database directory\n"));
1014         printf(gettext("  -F              turn fsync off\n"));
1015         printf(gettext("  -h HOSTNAME     host name or IP address to listen on\n"));
1016         printf(gettext("  -i              enable TCP/IP connections\n"));
1017         printf(gettext("  -k DIRECTORY    Unix-domain socket location\n"));
1018 #ifdef USE_SSL
1019         printf(gettext("  -l              enable SSL connections\n"));
1020 #endif
1021         printf(gettext("  -N MAX-CONNECT  maximum number of allowed connections\n"));
1022         printf(gettext("  -o OPTIONS      pass \"OPTIONS\" to each server process\n"));
1023         printf(gettext("  -p PORT         port number to listen on\n"));
1024         printf(gettext("  -S              silent mode (start in background without logging output)\n"));
1025         printf(gettext("  --help          show this help, then exit\n"));
1026         printf(gettext("  --version       output version information, then exit\n"));
1027
1028         printf(gettext("\nDeveloper options:\n"));
1029         printf(gettext("  -n              do not reinitialize shared memory after abnormal exit\n"));
1030         printf(gettext("  -s              send SIGSTOP to all backend servers if one dies\n"));
1031
1032         printf(gettext("\nPlease read the documentation for the complete list of run-time\n"
1033                                    "configuration settings and how to set them on the command line or in\n"
1034                                    "the configuration file.\n\n"
1035                                    "Report bugs to <pgsql-bugs@postgresql.org>.\n"));
1036 }
1037
1038 static int
1039 ServerLoop(void)
1040 {
1041         fd_set          readmask;
1042         int                     nSockets;
1043         struct timeval now,
1044                                 later;
1045         struct timezone tz;
1046         int                     i;
1047
1048         gettimeofday(&now, &tz);
1049
1050         nSockets = initMasks(&readmask);
1051
1052         for (;;)
1053         {
1054                 Port       *port;
1055                 fd_set          rmask;
1056                 struct timeval timeout;
1057
1058                 /*
1059                  * The timeout for the select() below is normally set on the basis
1060                  * of the time to the next checkpoint.  However, if for some
1061                  * reason we don't have a next-checkpoint time, time out after 60
1062                  * seconds. This keeps checkpoint scheduling from locking up when
1063                  * we get new connection requests infrequently (since we are
1064                  * likely to detect checkpoint completion just after enabling
1065                  * signals below, after we've already made the decision about how
1066                  * long to wait this time).
1067                  */
1068                 timeout.tv_sec = 60;
1069                 timeout.tv_usec = 0;
1070
1071                 if (CheckPointPID == 0 && checkpointed &&
1072                         StartupPID == 0 && Shutdown == NoShutdown &&
1073                         !FatalError && random_seed != 0)
1074                 {
1075                         time_t          now = time(NULL);
1076
1077                         if (CheckPointTimeout + checkpointed > now)
1078                         {
1079                                 /*
1080                                  * Not time for checkpoint yet, so set select timeout
1081                                  */
1082                                 timeout.tv_sec = CheckPointTimeout + checkpointed - now;
1083                         }
1084                         else
1085                         {
1086                                 /* Time to make the checkpoint... */
1087                                 CheckPointPID = CheckPointDataBase();
1088
1089                                 /*
1090                                  * if fork failed, schedule another try at 0.1 normal
1091                                  * delay
1092                                  */
1093                                 if (CheckPointPID == 0)
1094                                 {
1095                                         timeout.tv_sec = CheckPointTimeout / 10;
1096                                         checkpointed = now + timeout.tv_sec - CheckPointTimeout;
1097                                 }
1098                         }
1099                 }
1100
1101                 /*
1102                  * If no background writer process is running and we should
1103                  * do background writing, start one. It doesn't matter if
1104                  * this fails, we'll just try again later.
1105                  */
1106                 if (BgWriterPID == 0 && BgWriterPercent > 0 &&
1107                         StartupPID == 0 && Shutdown == NoShutdown &&
1108                         !FatalError && random_seed != 0)
1109                 {
1110                         BgWriterPID = StartBackgroundWriter();
1111                 }
1112
1113                 /*
1114                  * Wait for something to happen.
1115                  */
1116                 memcpy((char *) &rmask, (char *) &readmask, sizeof(fd_set));
1117
1118                 PG_SETMASK(&UnBlockSig);
1119
1120                 if (select(nSockets, &rmask, NULL, NULL, &timeout) < 0)
1121                 {
1122                         PG_SETMASK(&BlockSig);
1123                         if (errno == EINTR || errno == EWOULDBLOCK)
1124                                 continue;
1125                         ereport(LOG,
1126                                         (errcode_for_socket_access(),
1127                                          errmsg("select() failed in postmaster: %m")));
1128                         return STATUS_ERROR;
1129                 }
1130
1131                 /*
1132                  * Block all signals until we wait again.  (This makes it safe for
1133                  * our signal handlers to do nontrivial work.)
1134                  */
1135                 PG_SETMASK(&BlockSig);
1136
1137                 /*
1138                  * Select a random seed at the time of first receiving a request.
1139                  */
1140                 while (random_seed == 0)
1141                 {
1142                         gettimeofday(&later, &tz);
1143
1144                         /*
1145                          * We are not sure how much precision is in tv_usec, so we
1146                          * swap the nibbles of 'later' and XOR them with 'now'. On the
1147                          * off chance that the result is 0, we loop until it isn't.
1148                          */
1149                         random_seed = now.tv_usec ^
1150                                 ((later.tv_usec << 16) |
1151                                  ((later.tv_usec >> 16) & 0xffff));
1152                 }
1153
1154                 /*
1155                  * New connection pending on any of our sockets? If so, fork a
1156                  * child process to deal with it.
1157                  */
1158                 for (i = 0; i < MAXLISTEN; i++)
1159                 {
1160                         if (ListenSocket[i] == -1)
1161                                 break;
1162                         if (FD_ISSET(ListenSocket[i], &rmask))
1163                         {
1164                                 port = ConnCreate(ListenSocket[i]);
1165                                 if (port)
1166                                 {
1167                                         BackendStartup(port);
1168
1169                                         /*
1170                                          * We no longer need the open socket or port structure
1171                                          * in this process
1172                                          */
1173                                         StreamClose(port->sock);
1174                                         ConnFree(port);
1175                                 }
1176                         }
1177                 }
1178
1179                 /* If we have lost the stats collector, try to start a new one */
1180                 if (!pgstat_is_running)
1181                         pgstat_start();
1182         }
1183 }
1184
1185
1186 /*
1187  * Initialise the masks for select() for the ports
1188  * we are listening on.  Return the number of sockets to listen on.
1189  */
1190
1191 static int
1192 initMasks(fd_set *rmask)
1193 {
1194         int                     nsocks = -1;
1195         int                     i;
1196
1197         FD_ZERO(rmask);
1198
1199         for (i = 0; i < MAXLISTEN; i++)
1200         {
1201                 int                     fd = ListenSocket[i];
1202
1203                 if (fd == -1)
1204                         break;
1205                 FD_SET(fd, rmask);
1206                 if (fd > nsocks)
1207                         nsocks = fd;
1208         }
1209
1210         return nsocks + 1;
1211 }
1212
1213
1214 /*
1215  * Read the startup packet and do something according to it.
1216  *
1217  * Returns STATUS_OK or STATUS_ERROR, or might call ereport(FATAL) and
1218  * not return at all.
1219  *
1220  * (Note that ereport(FATAL) stuff is sent to the client, so only use it
1221  * if that's what you want.  Return STATUS_ERROR if you don't want to
1222  * send anything to the client, which would typically be appropriate
1223  * if we detect a communications failure.)
1224  */
1225 static int
1226 ProcessStartupPacket(Port *port, bool SSLdone)
1227 {
1228         int32           len;
1229         void       *buf;
1230         ProtocolVersion proto;
1231         MemoryContext oldcontext;
1232
1233         if (pq_getbytes((char *) &len, 4) == EOF)
1234         {
1235                 /*
1236                  * EOF after SSLdone probably means the client didn't like our
1237                  * response to NEGOTIATE_SSL_CODE.      That's not an error condition,
1238                  * so don't clutter the log with a complaint.
1239                  */
1240                 if (!SSLdone)
1241                         ereport(COMMERROR,
1242                                         (errcode(ERRCODE_PROTOCOL_VIOLATION),
1243                                          errmsg("incomplete startup packet")));
1244                 return STATUS_ERROR;
1245         }
1246
1247         len = ntohl(len);
1248         len -= 4;
1249
1250         if (len < (int32) sizeof(ProtocolVersion) ||
1251                 len > MAX_STARTUP_PACKET_LENGTH)
1252         {
1253                 ereport(COMMERROR,
1254                                 (errcode(ERRCODE_PROTOCOL_VIOLATION),
1255                                  errmsg("invalid length of startup packet")));
1256                 return STATUS_ERROR;
1257         }
1258
1259         /*
1260          * Allocate at least the size of an old-style startup packet, plus one
1261          * extra byte, and make sure all are zeroes.  This ensures we will
1262          * have null termination of all strings, in both fixed- and
1263          * variable-length packet layouts.
1264          */
1265         if (len <= (int32) sizeof(StartupPacket))
1266                 buf = palloc0(sizeof(StartupPacket) + 1);
1267         else
1268                 buf = palloc0(len + 1);
1269
1270         if (pq_getbytes(buf, len) == EOF)
1271         {
1272                 ereport(COMMERROR,
1273                                 (errcode(ERRCODE_PROTOCOL_VIOLATION),
1274                                  errmsg("incomplete startup packet")));
1275                 return STATUS_ERROR;
1276         }
1277
1278         /*
1279          * The first field is either a protocol version number or a special
1280          * request code.
1281          */
1282         port->proto = proto = ntohl(*((ProtocolVersion *) buf));
1283
1284         if (proto == CANCEL_REQUEST_CODE)
1285         {
1286                 processCancelRequest(port, buf);
1287                 return 127;                             /* XXX */
1288         }
1289
1290         if (proto == NEGOTIATE_SSL_CODE && !SSLdone)
1291         {
1292                 char            SSLok;
1293
1294 #ifdef USE_SSL
1295                 /* No SSL when disabled or on Unix sockets */
1296                 if (!EnableSSL || IS_AF_UNIX(port->laddr.addr.ss_family))
1297                         SSLok = 'N';
1298                 else
1299                         SSLok = 'S';            /* Support for SSL */
1300 #else
1301                 SSLok = 'N';                    /* No support for SSL */
1302 #endif
1303                 if (send(port->sock, &SSLok, 1, 0) != 1)
1304                 {
1305                         ereport(COMMERROR,
1306                                         (errcode_for_socket_access(),
1307                                  errmsg("failed to send SSL negotiation response: %m")));
1308                         return STATUS_ERROR;    /* close the connection */
1309                 }
1310
1311 #ifdef USE_SSL
1312                 if (SSLok == 'S' && secure_open_server(port) == -1)
1313                         return STATUS_ERROR;
1314 #endif
1315                 /* regular startup packet, cancel, etc packet should follow... */
1316                 /* but not another SSL negotiation request */
1317                 return ProcessStartupPacket(port, true);
1318         }
1319
1320         /* Could add additional special packet types here */
1321
1322         /*
1323          * Set FrontendProtocol now so that ereport() knows what format to
1324          * send if we fail during startup.
1325          */
1326         FrontendProtocol = proto;
1327
1328         /* Check we can handle the protocol the frontend is using. */
1329
1330         if (PG_PROTOCOL_MAJOR(proto) < PG_PROTOCOL_MAJOR(PG_PROTOCOL_EARLIEST) ||
1331           PG_PROTOCOL_MAJOR(proto) > PG_PROTOCOL_MAJOR(PG_PROTOCOL_LATEST) ||
1332         (PG_PROTOCOL_MAJOR(proto) == PG_PROTOCOL_MAJOR(PG_PROTOCOL_LATEST) &&
1333          PG_PROTOCOL_MINOR(proto) > PG_PROTOCOL_MINOR(PG_PROTOCOL_LATEST)))
1334                 ereport(FATAL,
1335                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1336                                  errmsg("unsupported frontend protocol %u.%u: server supports %u.0 to %u.%u",
1337                                           PG_PROTOCOL_MAJOR(proto), PG_PROTOCOL_MINOR(proto),
1338                                                 PG_PROTOCOL_MAJOR(PG_PROTOCOL_EARLIEST),
1339                                                 PG_PROTOCOL_MAJOR(PG_PROTOCOL_LATEST),
1340                                                 PG_PROTOCOL_MINOR(PG_PROTOCOL_LATEST))));
1341
1342         /*
1343          * Now fetch parameters out of startup packet and save them into the
1344          * Port structure.      All data structures attached to the Port struct
1345          * must be allocated in TopMemoryContext so that they won't disappear
1346          * when we pass them to PostgresMain (see BackendRun).  We need not
1347          * worry about leaking this storage on failure, since we aren't in the
1348          * postmaster process anymore.
1349          */
1350         oldcontext = MemoryContextSwitchTo(TopMemoryContext);
1351
1352         if (PG_PROTOCOL_MAJOR(proto) >= 3)
1353         {
1354                 int32           offset = sizeof(ProtocolVersion);
1355
1356                 /*
1357                  * Scan packet body for name/option pairs.      We can assume any
1358                  * string beginning within the packet body is null-terminated,
1359                  * thanks to zeroing extra byte above.
1360                  */
1361                 port->guc_options = NIL;
1362
1363                 while (offset < len)
1364                 {
1365                         char       *nameptr = ((char *) buf) + offset;
1366                         int32           valoffset;
1367                         char       *valptr;
1368
1369                         if (*nameptr == '\0')
1370                                 break;                  /* found packet terminator */
1371                         valoffset = offset + strlen(nameptr) + 1;
1372                         if (valoffset >= len)
1373                                 break;                  /* missing value, will complain below */
1374                         valptr = ((char *) buf) + valoffset;
1375
1376                         if (strcmp(nameptr, "database") == 0)
1377                                 port->database_name = pstrdup(valptr);
1378                         else if (strcmp(nameptr, "user") == 0)
1379                                 port->user_name = pstrdup(valptr);
1380                         else if (strcmp(nameptr, "options") == 0)
1381                                 port->cmdline_options = pstrdup(valptr);
1382                         else
1383                         {
1384                                 /* Assume it's a generic GUC option */
1385                                 port->guc_options = lappend(port->guc_options,
1386                                                                                         pstrdup(nameptr));
1387                                 port->guc_options = lappend(port->guc_options,
1388                                                                                         pstrdup(valptr));
1389                         }
1390                         offset = valoffset + strlen(valptr) + 1;
1391                 }
1392
1393                 /*
1394                  * If we didn't find a packet terminator exactly at the end of the
1395                  * given packet length, complain.
1396                  */
1397                 if (offset != len - 1)
1398                         ereport(FATAL,
1399                                         (errcode(ERRCODE_PROTOCOL_VIOLATION),
1400                                          errmsg("invalid startup packet layout: expected terminator as last byte")));
1401         }
1402         else
1403         {
1404                 /*
1405                  * Get the parameters from the old-style, fixed-width-fields
1406                  * startup packet as C strings.  The packet destination was
1407                  * cleared first so a short packet has zeros silently added.  We
1408                  * have to be prepared to truncate the pstrdup result for oversize
1409                  * fields, though.
1410                  */
1411                 StartupPacket *packet = (StartupPacket *) buf;
1412
1413                 port->database_name = pstrdup(packet->database);
1414                 if (strlen(port->database_name) > sizeof(packet->database))
1415                         port->database_name[sizeof(packet->database)] = '\0';
1416                 port->user_name = pstrdup(packet->user);
1417                 if (strlen(port->user_name) > sizeof(packet->user))
1418                         port->user_name[sizeof(packet->user)] = '\0';
1419                 port->cmdline_options = pstrdup(packet->options);
1420                 if (strlen(port->cmdline_options) > sizeof(packet->options))
1421                         port->cmdline_options[sizeof(packet->options)] = '\0';
1422                 port->guc_options = NIL;
1423         }
1424
1425         /* Check a user name was given. */
1426         if (port->user_name == NULL || port->user_name[0] == '\0')
1427                 ereport(FATAL,
1428                                 (errcode(ERRCODE_INVALID_AUTHORIZATION_SPECIFICATION),
1429                  errmsg("no PostgreSQL user name specified in startup packet")));
1430
1431         /* The database defaults to the user name. */
1432         if (port->database_name == NULL || port->database_name[0] == '\0')
1433                 port->database_name = pstrdup(port->user_name);
1434
1435         if (Db_user_namespace)
1436         {
1437                 /*
1438                  * If user@, it is a global user, remove '@'. We only want to do
1439                  * this if there is an '@' at the end and no earlier in the user
1440                  * string or they may fake as a local user of another database
1441                  * attaching to this database.
1442                  */
1443                 if (strchr(port->user_name, '@') ==
1444                         port->user_name + strlen(port->user_name) - 1)
1445                         *strchr(port->user_name, '@') = '\0';
1446                 else
1447                 {
1448                         /* Append '@' and dbname */
1449                         char       *db_user;
1450
1451                         db_user = palloc(strlen(port->user_name) +
1452                                                          strlen(port->database_name) + 2);
1453                         sprintf(db_user, "%s@%s", port->user_name, port->database_name);
1454                         port->user_name = db_user;
1455                 }
1456         }
1457
1458         /*
1459          * Truncate given database and user names to length of a Postgres
1460          * name.  This avoids lookup failures when overlength names are given.
1461          */
1462         if (strlen(port->database_name) >= NAMEDATALEN)
1463                 port->database_name[NAMEDATALEN - 1] = '\0';
1464         if (strlen(port->user_name) >= NAMEDATALEN)
1465                 port->user_name[NAMEDATALEN - 1] = '\0';
1466
1467         /*
1468          * Done putting stuff in TopMemoryContext.
1469          */
1470         MemoryContextSwitchTo(oldcontext);
1471
1472         /*
1473          * If we're going to reject the connection due to database state, say
1474          * so now instead of wasting cycles on an authentication exchange.
1475          * (This also allows a pg_ping utility to be written.)
1476          */
1477         switch (port->canAcceptConnections)
1478         {
1479                 case CAC_STARTUP:
1480                         ereport(FATAL,
1481                                         (errcode(ERRCODE_CANNOT_CONNECT_NOW),
1482                                          errmsg("the database system is starting up")));
1483                         break;
1484                 case CAC_SHUTDOWN:
1485                         ereport(FATAL,
1486                                         (errcode(ERRCODE_CANNOT_CONNECT_NOW),
1487                                          errmsg("the database system is shutting down")));
1488                         break;
1489                 case CAC_RECOVERY:
1490                         ereport(FATAL,
1491                                         (errcode(ERRCODE_CANNOT_CONNECT_NOW),
1492                                          errmsg("the database system is in recovery mode")));
1493                         break;
1494                 case CAC_TOOMANY:
1495                         ereport(FATAL,
1496                                         (errcode(ERRCODE_TOO_MANY_CONNECTIONS),
1497                                          errmsg("sorry, too many clients already")));
1498                         break;
1499                 case CAC_OK:
1500                 default:
1501                         break;
1502         }
1503
1504         return STATUS_OK;
1505 }
1506
1507
1508 /*
1509  * The client has sent a cancel request packet, not a normal
1510  * start-a-new-connection packet.  Perform the necessary processing.
1511  * Nothing is sent back to the client.
1512  */
1513 static void
1514 processCancelRequest(Port *port, void *pkt)
1515 {
1516         CancelRequestPacket *canc = (CancelRequestPacket *) pkt;
1517         int                     backendPID;
1518         long            cancelAuthCode;
1519         Backend    *bp;
1520 #ifndef EXEC_BACKEND
1521         Dlelem     *curr;
1522 #else
1523         int i;
1524 #endif
1525
1526         backendPID = (int) ntohl(canc->backendPID);
1527         cancelAuthCode = (long) ntohl(canc->cancelAuthCode);
1528
1529         if (backendPID == CheckPointPID)
1530         {
1531                 ereport(DEBUG2,
1532                                 (errmsg_internal("ignoring cancel request for checkpoint process %d",
1533                                                                  backendPID)));
1534                 return;
1535         }
1536         else if (backendPID == BgWriterPID)
1537         {
1538                 ereport(DEBUG2,
1539                                 (errmsg_internal("ignoring cancel request for bgwriter process %d",
1540                                                                  backendPID)));
1541                 return;
1542         }
1543
1544         /* See if we have a matching backend */
1545 #ifndef EXEC_BACKEND
1546         for (curr = DLGetHead(BackendList); curr; curr = DLGetSucc(curr))
1547         {
1548                 bp = (Backend *) DLE_VAL(curr);
1549 #else
1550         for (i = 0; i < NUM_BACKENDARRAY_ELEMS; i++)
1551         {
1552                 bp = (Backend*) &ShmemBackendArray[i];
1553 #endif
1554                 if (bp->pid == backendPID)
1555                 {
1556                         if (bp->cancel_key == cancelAuthCode)
1557                         {
1558                                 /* Found a match; signal that backend to cancel current op */
1559                                 ereport(DEBUG2,
1560                                                 (errmsg_internal("processing cancel request: sending SIGINT to process %d",
1561                                                                                  backendPID)));
1562                                 kill(bp->pid, SIGINT);
1563                         }
1564                         else
1565                                 /* Right PID, wrong key: no way, Jose */
1566                                 ereport(DEBUG2,
1567                                                 (errmsg_internal("bad key in cancel request for process %d",
1568                                                                                  backendPID)));
1569                         return;
1570                 }
1571         }
1572
1573         /* No matching backend */
1574         ereport(DEBUG2,
1575                         (errmsg_internal("bad pid in cancel request for process %d",
1576                                                          backendPID)));
1577 }
1578
1579 /*
1580  * canAcceptConnections --- check to see if database state allows connections.
1581  */
1582 static enum CAC_state
1583 canAcceptConnections(void)
1584 {
1585         /* Can't start backends when in startup/shutdown/recovery state. */
1586         if (Shutdown > NoShutdown)
1587                 return CAC_SHUTDOWN;
1588         if (StartupPID)
1589                 return CAC_STARTUP;
1590         if (FatalError)
1591                 return CAC_RECOVERY;
1592
1593         /*
1594          * Don't start too many children.
1595          *
1596          * We allow more connections than we can have backends here because some
1597          * might still be authenticating; they might fail auth, or some
1598          * existing backend might exit before the auth cycle is completed. The
1599          * exact MaxBackends limit is enforced when a new backend tries to
1600          * join the shared-inval backend array.
1601          */
1602         if (CountChildren() >= 2 * MaxBackends)
1603                 return CAC_TOOMANY;
1604
1605         return CAC_OK;
1606 }
1607
1608
1609 /*
1610  * ConnCreate -- create a local connection data structure
1611  */
1612 static Port *
1613 ConnCreate(int serverFd)
1614 {
1615         Port       *port;
1616
1617         if (!(port = (Port *) calloc(1, sizeof(Port))))
1618         {
1619                 ereport(LOG,
1620                                 (errcode(ERRCODE_OUT_OF_MEMORY),
1621                                  errmsg("out of memory")));
1622                 ExitPostmaster(1);
1623         }
1624
1625         if (StreamConnection(serverFd, port) != STATUS_OK)
1626         {
1627                 StreamClose(port->sock);
1628                 ConnFree(port);
1629                 port = NULL;
1630         }
1631         else
1632         {
1633                 /*
1634                  * Precompute password salt values to use for this connection.
1635                  * It's slightly annoying to do this long in advance of knowing
1636                  * whether we'll need 'em or not, but we must do the random()
1637                  * calls before we fork, not after.  Else the postmaster's random
1638                  * sequence won't get advanced, and all backends would end up
1639                  * using the same salt...
1640                  */
1641                 RandomSalt(port->cryptSalt, port->md5Salt);
1642         }
1643
1644         return port;
1645 }
1646
1647
1648 /*
1649  * ConnFree -- free a local connection data structure
1650  */
1651 static void
1652 ConnFree(Port *conn)
1653 {
1654 #ifdef USE_SSL
1655         secure_close(conn);
1656 #endif
1657         free(conn);
1658 }
1659
1660
1661 /*
1662  * ClosePostmasterPorts -- close all the postmaster's open sockets
1663  *
1664  * This is called during child process startup to release file descriptors
1665  * that are not needed by that child process.  The postmaster still has
1666  * them open, of course.
1667  */
1668 void
1669 ClosePostmasterPorts(bool pgstat_too)
1670 {
1671         int                     i;
1672
1673         /* Close the listen sockets */
1674         for (i = 0; i < MAXLISTEN; i++)
1675         {
1676                 if (ListenSocket[i] != -1)
1677                 {
1678                         StreamClose(ListenSocket[i]);
1679                         ListenSocket[i] = -1;
1680                 }
1681         }
1682
1683         /* Close pgstat control sockets, unless we're starting pgstat itself */
1684         if (pgstat_too)
1685                 pgstat_close_sockets();
1686 }
1687
1688
1689 /*
1690  * reset_shared -- reset shared memory and semaphores
1691  */
1692 static void
1693 reset_shared(unsigned short port)
1694 {
1695         /*
1696          * Create or re-create shared memory and semaphores.
1697          *
1698          * Note: in each "cycle of life" we will normally assign the same IPC
1699          * keys (if using SysV shmem and/or semas), since the port number is
1700          * used to determine IPC keys.  This helps ensure that we will clean
1701          * up dead IPC objects if the postmaster crashes and is restarted.
1702          */
1703         CreateSharedMemoryAndSemaphores(false, MaxBackends, port);
1704 }
1705
1706
1707 /*
1708  * SIGHUP -- reread config files, and tell children to do same
1709  */
1710 static void
1711 SIGHUP_handler(SIGNAL_ARGS)
1712 {
1713         int                     save_errno = errno;
1714
1715         PG_SETMASK(&BlockSig);
1716
1717         if (Shutdown <= SmartShutdown)
1718         {
1719                 ereport(LOG,
1720                          (errmsg("received SIGHUP, reloading configuration files")));
1721                 ProcessConfigFile(PGC_SIGHUP);
1722 #ifdef EXEC_BACKEND
1723                 write_nondefault_variables(PGC_SIGHUP);
1724 #endif
1725                 SignalChildren(SIGHUP);
1726                 load_hba();
1727                 load_ident();
1728
1729                 /*
1730                  * Tell the background writer to terminate so that we
1731                  * will start a new one with a possibly changed config
1732                  */
1733                 if (BgWriterPID != 0)
1734                         kill(BgWriterPID, SIGTERM);
1735         }
1736
1737         PG_SETMASK(&UnBlockSig);
1738
1739         errno = save_errno;
1740 }
1741
1742
1743
1744 /*
1745  * pmdie -- signal handler for processing various postmaster signals.
1746  */
1747 static void
1748 pmdie(SIGNAL_ARGS)
1749 {
1750         int                     save_errno = errno;
1751
1752         PG_SETMASK(&BlockSig);
1753
1754         ereport(DEBUG2,
1755                         (errmsg_internal("postmaster received signal %d",
1756                                                          postgres_signal_arg)));
1757
1758         switch (postgres_signal_arg)
1759         {
1760                 case SIGTERM:
1761
1762                         /*
1763                          * Smart Shutdown:
1764                          *
1765                          * Wait for children to end their work and ShutdownDataBase.
1766                          */
1767                         if (Shutdown >= SmartShutdown)
1768                                 break;
1769                         Shutdown = SmartShutdown;
1770                         ereport(LOG,
1771                                         (errmsg("received smart shutdown request")));
1772
1773                         /* Must tell bgwriter to quit, or it never will... */
1774                         if (BgWriterPID != 0)
1775                                 kill(BgWriterPID, SIGTERM);
1776
1777                         if (DLGetHead(BackendList)) /* let reaper() handle this */
1778                                 break;
1779
1780                         /*
1781                          * No children left. Shutdown data base system.
1782                          */
1783                         if (StartupPID > 0 || FatalError)       /* let reaper() handle
1784                                                                                                  * this */
1785                                 break;
1786                         if (ShutdownPID > 0)
1787                         {
1788                                 elog(PANIC, "shutdown process %d already running",
1789                                          (int) ShutdownPID);
1790                                 abort();
1791                         }
1792
1793                         ShutdownPID = ShutdownDataBase();
1794                         break;
1795
1796                 case SIGINT:
1797
1798                         /*
1799                          * Fast Shutdown:
1800                          *
1801                          * Abort all children with SIGTERM (rollback active transactions
1802                          * and exit) and ShutdownDataBase when they are gone.
1803                          */
1804                         if (Shutdown >= FastShutdown)
1805                                 break;
1806                         Shutdown = FastShutdown;
1807                         ereport(LOG,
1808                                         (errmsg("received fast shutdown request")));
1809
1810                         if (DLGetHead(BackendList))
1811                         {
1812                                 if (!FatalError)
1813                                 {
1814                                         ereport(LOG,
1815                                                         (errmsg("aborting any active transactions")));
1816                                         SignalChildren(SIGTERM);
1817                                         /* reaper() does the rest */
1818                                 }
1819                                 break;
1820                         }
1821
1822                         /*
1823                          * No children left. Shutdown data base system.
1824                          *
1825                          * Unlike the previous case, it is not an error for the shutdown
1826                          * process to be running already (we could get SIGTERM followed
1827                          * shortly later by SIGINT).
1828                          */
1829                         if (StartupPID > 0 || FatalError)       /* let reaper() handle
1830                                                                                                  * this */
1831                                 break;
1832                         if (ShutdownPID == 0)
1833                                 ShutdownPID = ShutdownDataBase();
1834                         break;
1835
1836                 case SIGQUIT:
1837
1838                         /*
1839                          * Immediate Shutdown:
1840                          *
1841                          * abort all children with SIGQUIT and exit without attempt to
1842                          * properly shutdown data base system.
1843                          */
1844                         ereport(LOG,
1845                                         (errmsg("received immediate shutdown request")));
1846                         if (ShutdownPID > 0)
1847                                 kill(ShutdownPID, SIGQUIT);
1848                         if (StartupPID > 0)
1849                                 kill(StartupPID, SIGQUIT);
1850                         if (DLGetHead(BackendList))
1851                                 SignalChildren(SIGQUIT);
1852                         ExitPostmaster(0);
1853                         break;
1854         }
1855
1856         PG_SETMASK(&UnBlockSig);
1857
1858         errno = save_errno;
1859 }
1860
1861 /*
1862  * Reaper -- signal handler to cleanup after a backend (child) dies.
1863  */
1864 static void
1865 reaper(SIGNAL_ARGS)
1866 {
1867         int                     save_errno = errno;
1868
1869 #ifdef HAVE_WAITPID
1870         int                     status;                 /* backend exit status */
1871 #else
1872 #ifndef WIN32
1873         union wait      status;                 /* backend exit status */
1874 #endif
1875 #endif
1876         int                     exitstatus;
1877         int                     pid;                    /* process id of dead backend */
1878
1879         PG_SETMASK(&BlockSig);
1880
1881         ereport(DEBUG4,
1882                         (errmsg_internal("reaping dead processes")));
1883 #ifdef HAVE_WAITPID
1884         while ((pid = waitpid(-1, &status, WNOHANG)) > 0)
1885         {
1886                 exitstatus = status;
1887 #else
1888 #ifndef WIN32
1889         while ((pid = wait3(&status, WNOHANG, NULL)) > 0)
1890         {
1891                 exitstatus = status.w_status;
1892 #else
1893         while ((pid = win32_waitpid(&exitstatus)) > 0)
1894         {
1895                 /*
1896                  * We need to do this here, and not in CleanupProc, since this
1897                  * is to be called on all children when we are done with them.
1898                  * Could move to LogChildExit, but that seems like asking for
1899                  * future trouble...
1900                  */
1901                 win32_RemoveChild(pid);
1902 #endif
1903 #endif
1904
1905                 /*
1906                  * Check if this child was the statistics collector. If so, try to
1907                  * start a new one.  (If fail, we'll try again in future cycles of
1908                  * the main loop.)
1909                  */
1910                 if (pgstat_ispgstat(pid))
1911                 {
1912                         LogChildExit(LOG, gettext("statistics collector process"),
1913                                                  pid, exitstatus);
1914                         pgstat_start();
1915                         continue;
1916                 }
1917
1918                 /*
1919                  * Check if this child was a shutdown or startup process.
1920                  */
1921                 if (ShutdownPID > 0 && pid == ShutdownPID)
1922                 {
1923                         if (exitstatus != 0)
1924                         {
1925                                 LogChildExit(LOG, gettext("shutdown process"),
1926                                                          pid, exitstatus);
1927                                 ExitPostmaster(1);
1928                         }
1929                         /* Normal postmaster exit is here */
1930                         ExitPostmaster(0);
1931                 }
1932
1933                 if (StartupPID > 0 && pid == StartupPID)
1934                 {
1935                         if (exitstatus != 0)
1936                         {
1937                                 LogChildExit(LOG, gettext("startup process"),
1938                                                          pid, exitstatus);
1939                                 ereport(LOG,
1940                                                 (errmsg("aborting startup due to startup process failure")));
1941                                 ExitPostmaster(1);
1942                         }
1943                         StartupPID = 0;
1944
1945                         /*
1946                          * Startup succeeded - remember its ID and RedoRecPtr.
1947                          *
1948                          * NB: this MUST happen before we fork a checkpoint or shutdown
1949                          * subprocess, else they will have wrong local ThisStartUpId.
1950                          */
1951                         SetThisStartUpID();
1952
1953                         FatalError = false; /* done with recovery */
1954
1955                         /*
1956                          * Arrange for first checkpoint to occur after standard delay.
1957                          */
1958                         CheckPointPID = 0;
1959                         checkpointed = time(NULL);
1960
1961                         /*
1962                          * Go to shutdown mode if a shutdown request was pending.
1963                          */
1964                         if (Shutdown > NoShutdown)
1965                         {
1966                                 if (ShutdownPID > 0)
1967                                 {
1968                                         elog(PANIC, "startup process %d died while shutdown process %d already running",
1969                                                  pid, (int) ShutdownPID);
1970                                         abort();
1971                                 }
1972                                 ShutdownPID = ShutdownDataBase();
1973                         }
1974
1975                         goto reaper_done;
1976                 }
1977
1978                 /*
1979                  * Else do standard child cleanup.
1980                  */
1981                 CleanupProc(pid, exitstatus);
1982
1983         }                                                       /* loop over pending child-death reports */
1984
1985         if (FatalError)
1986         {
1987                 /*
1988                  * Wait for all children exit, then reset shmem and
1989                  * StartupDataBase.
1990                  */
1991                 if (DLGetHead(BackendList) || StartupPID > 0 || ShutdownPID > 0)
1992                         goto reaper_done;
1993                 ereport(LOG,
1994                         (errmsg("all server processes terminated; reinitializing")));
1995
1996                 shmem_exit(0);
1997                 reset_shared(PostPortNumber);
1998
1999                 StartupPID = StartupDataBase();
2000
2001                 goto reaper_done;
2002         }
2003
2004         if (Shutdown > NoShutdown)
2005         {
2006                 if (DLGetHead(BackendList))
2007                         goto reaper_done;
2008                 if (StartupPID > 0 || ShutdownPID > 0)
2009                         goto reaper_done;
2010                 ShutdownPID = ShutdownDataBase();
2011         }
2012
2013 reaper_done:
2014         PG_SETMASK(&UnBlockSig);
2015
2016         errno = save_errno;
2017 }
2018
2019
2020 #ifdef WIN32
2021 /* 
2022  * On WIN32, we cannot use socket functions inside
2023  * an APC (signal handler). If we do, select() will return
2024  * with incorrect return values, causing the postmaster to
2025  * enter a blocking accept(). We work around this by
2026  * running it on a separate thread. We still block the main 
2027  * thread until it is done, so we don't scribble over any
2028  * data from the wrong thread (pgstat functions aqre not
2029  * thread safe).
2030  */
2031 static DWORD WINAPI win32_pgstat_beterm_thread(LPVOID param)
2032 {
2033         pgstat_beterm((int)param);
2034         return 0;
2035 }
2036
2037 static void win32_pgstat_beterm(int pid) {
2038         HANDLE beterm_thread = CreateThread(NULL, 64*1024, win32_pgstat_beterm_thread, (LPVOID)pid, 0, NULL);
2039         if (!beterm_thread)
2040                 ereport(FATAL,
2041                                 (errmsg_internal("failed to create beterm sender thread: %i", (int)GetLastError())));
2042         if (WaitForSingleObject(beterm_thread,INFINITE) != WAIT_OBJECT_0)
2043                 ereport(FATAL,
2044                                 (errmsg_internal("failed to wait for beterm sender thread: %i", (int)GetLastError())));
2045         CloseHandle(beterm_thread);
2046 }
2047 #endif
2048
2049 /*
2050  * CleanupProc -- cleanup after terminated backend.
2051  *
2052  * Remove all local state associated with backend.
2053  */
2054 static void
2055 CleanupProc(int pid,
2056                         int exitstatus)         /* child's exit status. */
2057 {
2058         Dlelem     *curr,
2059                            *next;
2060         Backend    *bp;
2061
2062         LogChildExit(DEBUG2, gettext("child process"), pid, exitstatus);
2063
2064         /*
2065          * If a backend dies in an ugly way (i.e. exit status not 0) then we
2066          * must signal all other backends to quickdie.  If exit status is zero
2067          * we assume everything is hunky dory and simply remove the backend
2068          * from the active backend list.
2069          */
2070         if (exitstatus == 0)
2071         {
2072                 curr = DLGetHead(BackendList);
2073                 while (curr)
2074                 {
2075                         bp = (Backend *) DLE_VAL(curr);
2076                         if (bp->pid == pid)
2077                         {
2078 #ifdef EXEC_BACKEND
2079                                 ShmemBackendArrayRemove(bp->pid);
2080 #endif
2081                                 DLRemove(curr);
2082                                 free(bp);
2083                                 DLFreeElem(curr);
2084                                 break;
2085                         }
2086                         curr = DLGetSucc(curr);
2087                 }
2088
2089                 if (pid == CheckPointPID)
2090                 {
2091                         CheckPointPID = 0;
2092                         if (!FatalError)
2093                         {
2094                                 checkpointed = time(NULL);
2095                                 /* Update RedoRecPtr for future child backends */
2096                                 GetSavedRedoRecPtr();
2097                         }
2098                 }
2099                 else if (pid == BgWriterPID)
2100                         BgWriterPID = 0;
2101                 else
2102 #ifndef WIN32
2103                         pgstat_beterm(pid);
2104 #else
2105                     win32_pgstat_beterm(pid);
2106 #endif
2107
2108                 return;
2109         }
2110
2111         /* below here we're dealing with a non-normal exit */
2112
2113         /* Make log entry unless we did so already */
2114         if (!FatalError)
2115         {
2116                 LogChildExit(LOG,
2117                                          (pid == CheckPointPID) ? gettext("checkpoint process") :
2118                                          (pid == BgWriterPID) ? gettext("bgwriter process") :
2119                                          gettext("server process"),
2120                                          pid, exitstatus);
2121                 ereport(LOG,
2122                           (errmsg("terminating any other active server processes")));
2123         }
2124
2125         curr = DLGetHead(BackendList);
2126         while (curr)
2127         {
2128                 next = DLGetSucc(curr);
2129                 bp = (Backend *) DLE_VAL(curr);
2130                 if (bp->pid != pid)
2131                 {
2132                         /*
2133                          * This backend is still alive.  Unless we did so already,
2134                          * tell it to commit hara-kiri.
2135                          *
2136                          * SIGQUIT is the special signal that says exit without proc_exit
2137                          * and let the user know what's going on. But if SendStop is
2138                          * set (-s on command line), then we send SIGSTOP instead, so
2139                          * that we can get core dumps from all backends by hand.
2140                          */
2141                         if (!FatalError)
2142                         {
2143                                 ereport(DEBUG2,
2144                                                 (errmsg_internal("sending %s to process %d",
2145                                                                                  (SendStop ? "SIGSTOP" : "SIGQUIT"),
2146                                                                                  (int) bp->pid)));
2147                                 kill(bp->pid, (SendStop ? SIGSTOP : SIGQUIT));
2148                         }
2149                 }
2150                 else
2151                 {
2152                         /*
2153                          * Found entry for freshly-dead backend, so remove it.
2154                          */
2155 #ifdef EXEC_BACKEND
2156                         ShmemBackendArrayRemove(bp->pid);
2157 #endif
2158                         DLRemove(curr);
2159                         free(bp);
2160                         DLFreeElem(curr);
2161                 }
2162                 curr = next;
2163         }
2164
2165         if (pid == CheckPointPID)
2166         {
2167                 CheckPointPID = 0;
2168                 checkpointed = 0;
2169         }
2170         else if (pid == BgWriterPID)
2171         {
2172                 BgWriterPID = 0;
2173         }
2174         else
2175         {
2176                 /*
2177                  * Tell the collector about backend termination
2178                  */
2179                 pgstat_beterm(pid);
2180         }
2181
2182         FatalError = true;
2183 }
2184
2185 /*
2186  * Log the death of a child process.
2187  */
2188 static void
2189 LogChildExit(int lev, const char *procname, int pid, int exitstatus)
2190 {
2191         if (WIFEXITED(exitstatus))
2192                 ereport(lev,
2193
2194                 /*
2195                  * translator: %s is a noun phrase describing a child process,
2196                  * such as "server process"
2197                  */
2198                                 (errmsg("%s (PID %d) exited with exit code %d",
2199                                                 procname, pid, WEXITSTATUS(exitstatus))));
2200         else if (WIFSIGNALED(exitstatus))
2201                 ereport(lev,
2202
2203                 /*
2204                  * translator: %s is a noun phrase describing a child process,
2205                  * such as "server process"
2206                  */
2207                                 (errmsg("%s (PID %d) was terminated by signal %d",
2208                                                 procname, pid, WTERMSIG(exitstatus))));
2209         else
2210                 ereport(lev,
2211
2212                 /*
2213                  * translator: %s is a noun phrase describing a child process,
2214                  * such as "server process"
2215                  */
2216                                 (errmsg("%s (PID %d) exited with unexpected status %d",
2217                                                 procname, pid, exitstatus)));
2218 }
2219
2220 /*
2221  * Send a signal to all backend children.
2222  */
2223 static void
2224 SignalChildren(int signal)
2225 {
2226         Dlelem     *curr,
2227                            *next;
2228         Backend    *bp;
2229
2230         curr = DLGetHead(BackendList);
2231         while (curr)
2232         {
2233                 next = DLGetSucc(curr);
2234                 bp = (Backend *) DLE_VAL(curr);
2235
2236                 if (bp->pid != MyProcPid)
2237                 {
2238                         ereport(DEBUG2,
2239                                         (errmsg_internal("sending signal %d to process %d",
2240                                                                          signal,
2241                                                                          (int) bp->pid)));
2242                         kill(bp->pid, signal);
2243                 }
2244
2245                 curr = next;
2246         }
2247 }
2248
2249 /*
2250  * BackendStartup -- start backend process
2251  *
2252  * returns: STATUS_ERROR if the fork failed, STATUS_OK otherwise.
2253  */
2254 static int
2255 BackendStartup(Port *port)
2256 {
2257         Backend    *bn;                         /* for backend cleanup */
2258         pid_t           pid;
2259
2260 #ifdef LINUX_PROFILE
2261         struct itimerval prof_itimer;
2262 #endif
2263
2264         /*
2265          * Compute the cancel key that will be assigned to this backend. The
2266          * backend will have its own copy in the forked-off process' value of
2267          * MyCancelKey, so that it can transmit the key to the frontend.
2268          */
2269         MyCancelKey = PostmasterRandom();
2270
2271         /*
2272          * Make room for backend data structure.  Better before the fork() so
2273          * we can handle failure cleanly.
2274          */
2275         bn = (Backend *) malloc(sizeof(Backend));
2276         if (!bn)
2277         {
2278                 ereport(LOG,
2279                                 (errcode(ERRCODE_OUT_OF_MEMORY),
2280                                  errmsg("out of memory")));
2281                 return STATUS_ERROR;
2282         }
2283
2284         /*
2285          * Flush stdio channels just before fork, to avoid double-output
2286          * problems. Ideally we'd use fflush(NULL) here, but there are still a
2287          * few non-ANSI stdio libraries out there (like SunOS 4.1.x) that
2288          * coredump if we do. Presently stdout and stderr are the only stdio
2289          * output channels used by the postmaster, so fflush'ing them should
2290          * be sufficient.
2291          */
2292         fflush(stdout);
2293         fflush(stderr);
2294
2295 #ifdef LINUX_PROFILE
2296
2297         /*
2298          * Linux's fork() resets the profiling timer in the child process. If
2299          * we want to profile child processes then we need to save and restore
2300          * the timer setting.  This is a waste of time if not profiling,
2301          * however, so only do it if commanded by specific -DLINUX_PROFILE
2302          * switch.
2303          */
2304         getitimer(ITIMER_PROF, &prof_itimer);
2305 #endif
2306
2307 #ifdef __BEOS__
2308         /* Specific beos actions before backend startup */
2309         beos_before_backend_startup();
2310 #endif
2311
2312         port->canAcceptConnections = canAcceptConnections();
2313 #ifdef EXEC_BACKEND
2314         pid = Backend_forkexec(port);
2315 #else
2316         pid = fork();
2317
2318         if (pid == 0)                           /* child */
2319         {
2320 #ifdef LINUX_PROFILE
2321                 setitimer(ITIMER_PROF, &prof_itimer, NULL);
2322 #endif
2323
2324 #ifdef __BEOS__
2325                 /* Specific beos backend startup actions */
2326                 beos_backend_startup();
2327 #endif
2328                 free(bn);
2329
2330                 proc_exit(BackendRun(port));
2331         }
2332 #endif
2333
2334         /* in parent, error */
2335         if (pid < 0)
2336         {
2337                 int                     save_errno = errno;
2338
2339 #ifdef __BEOS__
2340                 /* Specific beos backend startup actions */
2341                 beos_backend_startup_failed();
2342 #endif
2343                 free(bn);
2344                 errno = save_errno;
2345                 ereport(LOG,
2346                           (errmsg("could not fork new process for connection: %m")));
2347                 report_fork_failure_to_client(port, save_errno);
2348                 return STATUS_ERROR;
2349         }
2350
2351         /* in parent, normal */
2352         ereport(DEBUG2,
2353                         (errmsg_internal("forked new backend, pid=%d socket=%d",
2354                                                          (int) pid, port->sock)));
2355
2356         /*
2357          * Everything's been successful, it's safe to add this backend to our
2358          * list of backends.
2359          */
2360         bn->pid = pid;
2361         bn->cancel_key = MyCancelKey;
2362 #ifdef EXEC_BACKEND
2363         ShmemBackendArrayAdd(bn);
2364 #endif
2365         DLAddHead(BackendList, DLNewElem(bn));
2366
2367         return STATUS_OK;
2368 }
2369
2370 /*
2371  * Try to report backend fork() failure to client before we close the
2372  * connection.  Since we do not care to risk blocking the postmaster on
2373  * this connection, we set the connection to non-blocking and try only once.
2374  *
2375  * This is grungy special-purpose code; we cannot use backend libpq since
2376  * it's not up and running.
2377  */
2378 static void
2379 report_fork_failure_to_client(Port *port, int errnum)
2380 {
2381         char            buffer[1000];
2382
2383         /* Format the error message packet (always V2 protocol) */
2384         snprintf(buffer, sizeof(buffer), "E%s%s\n",
2385                          gettext("could not fork new process for connection: "),
2386                          strerror(errnum));
2387
2388         /* Set port to non-blocking.  Don't do send() if this fails */
2389         if (!set_noblock(port->sock))
2390                 return;
2391
2392         send(port->sock, buffer, strlen(buffer) + 1, 0);
2393 }
2394
2395
2396 /*
2397  * split_opts -- split a string of options and append it to an argv array
2398  *
2399  * NB: the string is destructively modified!
2400  *
2401  * Since no current POSTGRES arguments require any quoting characters,
2402  * we can use the simple-minded tactic of assuming each set of space-
2403  * delimited characters is a separate argv element.
2404  *
2405  * If you don't like that, well, we *used* to pass the whole option string
2406  * as ONE argument to execl(), which was even less intelligent...
2407  */
2408 static void
2409 split_opts(char **argv, int *argcp, char *s)
2410 {
2411         while (s && *s)
2412         {
2413                 while (isspace((unsigned char) *s))
2414                         ++s;
2415                 if (*s == '\0')
2416                         break;
2417                 argv[(*argcp)++] = s;
2418                 while (*s && !isspace((unsigned char) *s))
2419                         ++s;
2420                 if (*s)
2421                         *s++ = '\0';
2422         }
2423 }
2424
2425
2426 /*
2427  * BackendInit/Run -- perform authentication [BackendInit], and if successful,
2428  *              set up the backend's argument list [BackendRun] and invoke
2429  *              backend main()
2430  *
2431  * returns:
2432  *              Shouldn't return at all.
2433  *              If PostgresMain() fails, return status.
2434  */
2435 static void
2436 BackendInit(Port *port)
2437 {
2438         int                     status;
2439         struct timeval now;
2440         struct timezone tz;
2441         char            remote_host[NI_MAXHOST];
2442         char            remote_port[NI_MAXSERV];
2443         char            remote_ps_data[NI_MAXHOST];
2444
2445         IsUnderPostmaster = true;       /* we are a postmaster subprocess now */
2446
2447         ClientAuthInProgress = true;    /* limit visibility of log messages */
2448
2449         /* We don't want the postmaster's proc_exit() handlers */
2450         on_exit_reset();
2451
2452         /*
2453          * Signal handlers setting is moved to tcop/postgres...
2454          */
2455
2456         /* save start time for end of session reporting */
2457         gettimeofday(&(port->session_start),NULL);
2458
2459         /* set these to empty in case they are needed before we set them up */
2460         port->remote_host = "";
2461         port->remote_port = "";
2462         port->commandTag = "";
2463
2464         /* Save port etc. for ps status */
2465         MyProcPort = port;
2466
2467         /* Reset MyProcPid to new backend's pid */
2468         MyProcPid = getpid();
2469
2470         /*
2471          * Initialize libpq and enable reporting of ereport errors to the
2472          * client. Must do this now because authentication uses libpq to send
2473          * messages.
2474          */
2475         pq_init();                                      /* initialize libpq to talk to client */
2476         whereToSendOutput = Remote; /* now safe to ereport to client */
2477
2478         /*
2479          * We arrange for a simple exit(0) if we receive SIGTERM or SIGQUIT
2480          * during any client authentication related communication. Otherwise
2481          * the postmaster cannot shutdown the database FAST or IMMED cleanly
2482          * if a buggy client blocks a backend during authentication.
2483          */
2484         pqsignal(SIGTERM, authdie);
2485         pqsignal(SIGQUIT, authdie);
2486         pqsignal(SIGALRM, authdie);
2487         PG_SETMASK(&AuthBlockSig);
2488
2489         /*
2490          * Get the remote host name and port for logging and status display.
2491          */
2492         remote_host[0] = '\0';
2493         remote_port[0] = '\0';
2494         if (getnameinfo_all(&port->raddr.addr, port->raddr.salen,
2495                                                 remote_host, sizeof(remote_host),
2496                                                 remote_port, sizeof(remote_port),
2497                                    (log_hostname ? 0 : NI_NUMERICHOST) | NI_NUMERICSERV))
2498         {
2499                 getnameinfo_all(&port->raddr.addr, port->raddr.salen,
2500                                                 remote_host, sizeof(remote_host),
2501                                                 remote_port, sizeof(remote_port),
2502                                                 NI_NUMERICHOST | NI_NUMERICSERV);
2503         }
2504         snprintf(remote_ps_data, sizeof(remote_ps_data),
2505                          remote_port[0] == '\0' ? "%s" : "%s(%s)",
2506                          remote_host, remote_port);
2507
2508         if (Log_connections)
2509                 ereport(LOG,
2510                                 (errmsg("connection received: host=%s port=%s",
2511                                                 remote_host, remote_port)));
2512
2513         /*
2514          * save remote_host and remote_port in port stucture
2515          */
2516         port->remote_host = strdup(remote_host);
2517         port->remote_port = strdup(remote_port);
2518
2519         /*
2520          * Ready to begin client interaction.  We will give up and exit(0)
2521          * after a time delay, so that a broken client can't hog a connection
2522          * indefinitely.  PreAuthDelay doesn't count against the time limit.
2523          */
2524         if (!enable_sig_alarm(AuthenticationTimeout * 1000, false))
2525                 elog(FATAL, "could not set timer for authorization timeout");
2526
2527         /*
2528          * Receive the startup packet (which might turn out to be a cancel
2529          * request packet).
2530          */
2531         status = ProcessStartupPacket(port, false);
2532
2533         if (status != STATUS_OK)
2534                 proc_exit(0);
2535
2536         /*
2537          * Now that we have the user and database name, we can set the process
2538          * title for ps.  It's good to do this as early as possible in
2539          * startup.
2540          */
2541         init_ps_display(port->user_name, port->database_name, remote_ps_data);
2542         set_ps_display("authentication");
2543
2544         /*
2545          * Now perform authentication exchange.
2546          */
2547         ClientAuthentication(port); /* might not return, if failure */
2548
2549         /*
2550          * Done with authentication.  Disable timeout, and prevent
2551          * SIGTERM/SIGQUIT again until backend startup is complete.
2552          */
2553         if (!disable_sig_alarm(false))
2554                 elog(FATAL, "could not disable timer for authorization timeout");
2555         PG_SETMASK(&BlockSig);
2556
2557         if (Log_connections)
2558                 ereport(LOG,
2559                                 (errmsg("connection authorized: user=%s database=%s",
2560                                                 port->user_name, port->database_name)));
2561
2562         /*
2563          * Don't want backend to be able to see the postmaster random number
2564          * generator state.  We have to clobber the static random_seed *and*
2565          * start a new random sequence in the random() library function.
2566          */
2567         random_seed = 0;
2568         gettimeofday(&now, &tz);
2569         srandom((unsigned int) now.tv_usec);
2570 }
2571
2572
2573 static int
2574 BackendRun(Port *port)
2575 {
2576         char      **av;
2577         int                     maxac;
2578         int                     ac;
2579         char            debugbuf[32];
2580         char            protobuf[32];
2581         int                     i;
2582
2583         /*
2584          * Let's clean up ourselves as the postmaster child, and
2585          * close the postmaster's other sockets
2586          */
2587         ClosePostmasterPorts(true);
2588
2589         /*
2590          * PreAuthDelay is a debugging aid for investigating problems in the
2591          * authentication cycle: it can be set in postgresql.conf to allow
2592          * time to attach to the newly-forked backend with a debugger. (See
2593          * also the -W backend switch, which we allow clients to pass through
2594          * PGOPTIONS, but it is not honored until after authentication.)
2595          */
2596         if (PreAuthDelay > 0)
2597                 sleep(PreAuthDelay);
2598
2599         /* Will exit on failure */
2600         BackendInit(port);
2601
2602
2603         /* ----------------
2604          * Now, build the argv vector that will be given to PostgresMain.
2605          *
2606          * The layout of the command line is
2607          *              postgres [secure switches] -p databasename [insecure switches]
2608          * where the switches after -p come from the client request.
2609          *
2610          * The maximum possible number of commandline arguments that could come
2611          * from ExtraOptions or port->cmdline_options is (strlen + 1) / 2; see
2612          * split_opts().
2613          * ----------------
2614          */
2615         maxac = 10;                                     /* for fixed args supplied below */
2616         maxac += (strlen(ExtraOptions) + 1) / 2;
2617         if (port->cmdline_options)
2618                 maxac += (strlen(port->cmdline_options) + 1) / 2;
2619
2620         av = (char **) MemoryContextAlloc(TopMemoryContext,
2621                                                                           maxac * sizeof(char *));
2622         ac = 0;
2623
2624         av[ac++] = "postgres";
2625
2626         /*
2627          * Pass the requested debugging level along to the backend.
2628          */
2629         if (debug_flag > 0)
2630         {
2631                 snprintf(debugbuf, sizeof(debugbuf), "-d%d", debug_flag);
2632                 av[ac++] = debugbuf;
2633         }
2634
2635         /*
2636          * Pass any backend switches specified with -o in the postmaster's own
2637          * command line.  We assume these are secure.
2638          */
2639         split_opts(av, &ac, ExtraOptions);
2640
2641         /* Tell the backend what protocol the frontend is using. */
2642         snprintf(protobuf, sizeof(protobuf), "-v%u", port->proto);
2643         av[ac++] = protobuf;
2644
2645 #ifdef EXEC_BACKEND
2646         /* pass data dir before end of secure switches (-p) */
2647         av[ac++] = "-D";
2648         av[ac++] = DataDir;
2649 #endif
2650
2651         /*
2652          * Tell the backend it is being called from the postmaster, and which
2653          * database to use.  -p marks the end of secure switches.
2654          */
2655         av[ac++] = "-p";
2656         av[ac++] = port->database_name;
2657
2658         /*
2659          * Pass the (insecure) option switches from the connection request.
2660          * (It's OK to mangle port->cmdline_options now.)
2661          */
2662         if (port->cmdline_options)
2663                 split_opts(av, &ac, port->cmdline_options);
2664
2665         av[ac] = NULL;
2666
2667         Assert(ac < maxac);
2668
2669         /*
2670          * Release postmaster's working memory context so that backend can
2671          * recycle the space.  Note this does not trash *MyProcPort, because
2672          * ConnCreate() allocated that space with malloc() ... else we'd need
2673          * to copy the Port data here.  Also, subsidiary data such as the
2674          * username isn't lost either; see ProcessStartupPacket().
2675          */
2676         MemoryContextSwitchTo(TopMemoryContext);
2677 #ifndef EXEC_BACKEND
2678         MemoryContextDelete(PostmasterContext);
2679 #endif
2680         PostmasterContext = NULL;
2681
2682         /*
2683          * Debug: print arguments being passed to backend
2684          */
2685         ereport(DEBUG3,
2686                         (errmsg_internal("%s child[%d]: starting with (",
2687                                                          progname, getpid())));
2688         for (i = 0; i < ac; ++i)
2689                 ereport(DEBUG3,
2690                                 (errmsg_internal("\t%s", av[i])));
2691         ereport(DEBUG3,
2692                         (errmsg_internal(")")));
2693
2694         ClientAuthInProgress = false;           /* client_min_messages is active
2695                                                                                  * now */
2696
2697         return (PostgresMain(ac, av, port->user_name));
2698 }
2699
2700
2701 #ifdef EXEC_BACKEND
2702
2703
2704 /*
2705  * SubPostmasterMain -- prepare the fork/exec'd process to be in an equivalent
2706  *                      state (for calling BackendRun) as a forked process.
2707  *
2708  * returns:
2709  *              Shouldn't return at all.
2710  */
2711 void
2712 SubPostmasterMain(int argc, char* argv[])
2713 {
2714         unsigned long   backendID;
2715         Port                    port;
2716
2717         memset((void*)&port, 0, sizeof(Port));
2718         Assert(argc == 2);
2719
2720         /* Do this sooner rather than later... */
2721         IsUnderPostmaster = true;       /* we are a postmaster subprocess now */
2722
2723         /* In EXEC case we will not have inherited these settings */
2724         IsPostmasterEnvironment = true;
2725         whereToSendOutput = None;
2726
2727         /* Setup global context */
2728         MemoryContextInit();
2729         InitializeGUCOptions();
2730
2731         /* Parse passed-in context */
2732         argc = 0;
2733         backendID               = (unsigned long)atol(argv[argc++]);
2734         DataDir                 = strdup(argv[argc++]);
2735
2736         /* Read in file-based context */
2737         read_nondefault_variables();
2738         read_backend_variables(backendID,&port);
2739
2740         /* Remaining initialization */
2741         pgstat_init_forkexec_backend();
2742
2743         /* FIXME: [fork/exec] Ugh */
2744         load_hba();
2745         load_ident();
2746         load_user();
2747         load_group();
2748
2749         /* Attach process to shared segments */
2750         CreateSharedMemoryAndSemaphores(false, MaxBackends, 0);
2751
2752         /* Run backend */
2753         proc_exit(BackendRun(&port));
2754 }
2755
2756
2757 /*
2758  * Backend_forkexec -- fork/exec off a backend process
2759  *
2760  * returns:
2761  *              the pid of the fork/exec'd process
2762  */
2763 static pid_t
2764 Backend_forkexec(Port *port)
2765 {
2766         pid_t pid;
2767         char *av[5];
2768         int ac = 0, bufc = 0, i;
2769         char buf[2][MAXPGPATH];
2770
2771         if (!write_backend_variables(port))
2772                 return -1; /* log made by write_backend_variables */
2773
2774         av[ac++] = "postgres";
2775         av[ac++] = "-forkexec";
2776
2777         /* Format up context to pass to exec'd process */
2778         snprintf(buf[bufc++],MAXPGPATH,"%lu",tmpBackendFileNum);
2779         /* FIXME: [fork/exec] whitespaces in directories? */
2780         snprintf(buf[bufc++],MAXPGPATH,"%s",DataDir);
2781
2782         /* Add to the arg list */
2783         Assert(bufc <= lengthof(buf));
2784         for (i = 0; i < bufc; i++)
2785                 av[ac++] = buf[i];
2786
2787         /* FIXME: [fork/exec] ExtraOptions? */
2788
2789         av[ac++] = NULL;
2790         Assert(ac <= lengthof(av));
2791
2792 #ifdef WIN32
2793         pid = win32_forkexec(pg_pathname,av); /* logs on error */
2794 #else
2795         /* Fire off execv in child */
2796         if ((pid = fork()) == 0 && (execv(pg_pathname,av) == -1))
2797                 /*
2798                  * FIXME: [fork/exec] suggestions for what to do here?
2799                  *  Probably OK to issue error (unlike pgstat case)
2800                  */
2801                 abort();
2802 #endif
2803         return pid; /* Parent returns pid */
2804 }
2805
2806 #endif
2807
2808
2809 /*
2810  * ExitPostmaster -- cleanup
2811  *
2812  * Do NOT call exit() directly --- always go through here!
2813  */
2814 static void
2815 ExitPostmaster(int status)
2816 {
2817         /* should cleanup shared memory and kill all backends */
2818
2819         /*
2820          * Not sure of the semantics here.      When the Postmaster dies, should
2821          * the backends all be killed? probably not.
2822          *
2823          * MUST         -- vadim 05-10-1999
2824          */
2825         /* Should I use true instead? */
2826         ClosePostmasterPorts(false);
2827
2828         proc_exit(status);
2829 }
2830
2831 /*
2832  * sigusr1_handler - handle signal conditions from child processes
2833  */
2834 static void
2835 sigusr1_handler(SIGNAL_ARGS)
2836 {
2837         int                     save_errno = errno;
2838
2839         PG_SETMASK(&BlockSig);
2840
2841         if (CheckPostmasterSignal(PMSIGNAL_DO_CHECKPOINT))
2842         {
2843                 if (CheckPointWarning != 0)
2844                 {
2845                         /*
2846                          * This only times checkpoints forced by running out of
2847                          * segment files.  Other checkpoints could reduce the
2848                          * frequency of forced checkpoints.
2849                          */
2850                         time_t          now = time(NULL);
2851
2852                         if (LastSignalledCheckpoint != 0)
2853                         {
2854                                 int                     elapsed_secs = now - LastSignalledCheckpoint;
2855
2856                                 if (elapsed_secs < CheckPointWarning)
2857                                         ereport(LOG,
2858                                                         (errmsg("checkpoints are occurring too frequently (%d seconds apart)",
2859                                                                         elapsed_secs),
2860                                         errhint("Consider increasing the configuration parameter \"checkpoint_segments\".")));
2861                         }
2862                         LastSignalledCheckpoint = now;
2863                 }
2864
2865                 /*
2866                  * Request to schedule a checkpoint
2867                  *
2868                  * Ignore request if checkpoint is already running or checkpointing
2869                  * is currently disabled
2870                  */
2871                 if (CheckPointPID == 0 && checkpointed &&
2872                         StartupPID == 0 && Shutdown == NoShutdown &&
2873                         !FatalError && random_seed != 0)
2874                 {
2875                         CheckPointPID = CheckPointDataBase();
2876                         /* note: if fork fails, CheckPointPID stays 0; nothing happens */
2877                 }
2878         }
2879
2880         if (CheckPostmasterSignal(PMSIGNAL_PASSWORD_CHANGE))
2881         {
2882                 /*
2883                  * Password or group file has changed.
2884                  */
2885                 load_user();
2886                 load_group();
2887         }
2888
2889         if (CheckPostmasterSignal(PMSIGNAL_WAKEN_CHILDREN))
2890         {
2891                 /*
2892                  * Send SIGUSR2 to all children (triggers AsyncNotifyHandler). See
2893                  * storage/ipc/sinvaladt.c for the use of this.
2894                  */
2895                 if (Shutdown == NoShutdown)
2896                         SignalChildren(SIGUSR2);
2897         }
2898
2899         PG_SETMASK(&UnBlockSig);
2900
2901         errno = save_errno;
2902 }
2903
2904
2905 /*
2906  * Dummy signal handler
2907  *
2908  * We use this for signals that we don't actually use in the postmaster,
2909  * but we do use in backends.  If we SIG_IGN such signals in the postmaster,
2910  * then a newly started backend might drop a signal that arrives before it's
2911  * able to reconfigure its signal processing.  (See notes in postgres.c.)
2912  */
2913 static void
2914 dummy_handler(SIGNAL_ARGS)
2915 {
2916 }
2917
2918
2919 /*
2920  * CharRemap: given an int in range 0..61, produce textual encoding of it
2921  * per crypt(3) conventions.
2922  */
2923 static char
2924 CharRemap(long ch)
2925 {
2926         if (ch < 0)
2927                 ch = -ch;
2928         ch = ch % 62;
2929
2930         if (ch < 26)
2931                 return 'A' + ch;
2932
2933         ch -= 26;
2934         if (ch < 26)
2935                 return 'a' + ch;
2936
2937         ch -= 26;
2938         return '0' + ch;
2939 }
2940
2941 /*
2942  * RandomSalt
2943  */
2944 static void
2945 RandomSalt(char *cryptSalt, char *md5Salt)
2946 {
2947         long            rand = PostmasterRandom();
2948
2949         cryptSalt[0] = CharRemap(rand % 62);
2950         cryptSalt[1] = CharRemap(rand / 62);
2951
2952         /*
2953          * It's okay to reuse the first random value for one of the MD5 salt
2954          * bytes, since only one of the two salts will be sent to the client.
2955          * After that we need to compute more random bits.
2956          *
2957          * We use % 255, sacrificing one possible byte value, so as to ensure
2958          * that all bits of the random() value participate in the result.
2959          * While at it, add one to avoid generating any null bytes.
2960          */
2961         md5Salt[0] = (rand % 255) + 1;
2962         rand = PostmasterRandom();
2963         md5Salt[1] = (rand % 255) + 1;
2964         rand = PostmasterRandom();
2965         md5Salt[2] = (rand % 255) + 1;
2966         rand = PostmasterRandom();
2967         md5Salt[3] = (rand % 255) + 1;
2968 }
2969
2970 /*
2971  * PostmasterRandom
2972  */
2973 static long
2974 PostmasterRandom(void)
2975 {
2976         static bool initialized = false;
2977
2978         if (!initialized)
2979         {
2980                 Assert(random_seed != 0);
2981                 srandom(random_seed);
2982                 initialized = true;
2983         }
2984
2985         return random();
2986 }
2987
2988 /*
2989  * Count up number of child processes.
2990  */
2991 static int
2992 CountChildren(void)
2993 {
2994         Dlelem     *curr;
2995         Backend    *bp;
2996         int                     cnt = 0;
2997
2998         for (curr = DLGetHead(BackendList); curr; curr = DLGetSucc(curr))
2999         {
3000                 bp = (Backend *) DLE_VAL(curr);
3001                 if (bp->pid != MyProcPid)
3002                         cnt++;
3003         }
3004         /* Checkpoint and bgwriter will be in the list, discount them */
3005         if (CheckPointPID != 0)
3006                 cnt--;
3007         if (BgWriterPID != 0)
3008                 cnt--;
3009         return cnt;
3010 }
3011
3012 /*
3013  * Fire off a subprocess for startup/shutdown/checkpoint/bgwriter.
3014  *
3015  * Return value of SSDataBase is subprocess' PID, or 0 if failed to start subprocess
3016  * (0 is returned only for checkpoint/bgwriter cases).
3017  *
3018  * note: in the EXEC_BACKEND case, we delay the fork until argument list has been
3019  *      established
3020  */
3021 NON_EXEC_STATIC void
3022 SSDataBaseInit(int xlop)
3023 {
3024         const char *statmsg;
3025
3026         IsUnderPostmaster = true;               /* we are a postmaster subprocess
3027                                                                          * now */
3028
3029 #ifdef EXEC_BACKEND
3030         /* In EXEC case we will not have inherited these settings */
3031         IsPostmasterEnvironment = true;
3032         whereToSendOutput = None;
3033 #endif
3034
3035         MyProcPid = getpid();           /* reset MyProcPid */
3036
3037         /* Lose the postmaster's on-exit routines and port connections */
3038         on_exit_reset();
3039
3040         /*
3041          * Identify myself via ps
3042          */
3043         switch (xlop)
3044         {
3045                 case BS_XLOG_STARTUP:
3046                         statmsg = "startup subprocess";
3047                         break;
3048                 case BS_XLOG_CHECKPOINT:
3049                         statmsg = "checkpoint subprocess";
3050                         break;
3051                 case BS_XLOG_BGWRITER:
3052                         statmsg = "bgwriter subprocess";
3053                         break;
3054                 case BS_XLOG_SHUTDOWN:
3055                         statmsg = "shutdown subprocess";
3056                         break;
3057                 default:
3058                         statmsg = "??? subprocess";
3059                         break;
3060         }
3061         init_ps_display(statmsg, "", "");
3062         set_ps_display("");
3063 }
3064
3065
3066 static pid_t
3067 SSDataBase(int xlop)
3068 {
3069         pid_t           pid;
3070         Backend    *bn;
3071 #ifndef EXEC_BACKEND
3072 #ifdef LINUX_PROFILE
3073         struct itimerval prof_itimer;
3074 #endif
3075 #else
3076         char            idbuf[32];
3077 #endif
3078
3079         fflush(stdout);
3080         fflush(stderr);
3081
3082 #ifndef EXEC_BACKEND
3083 #ifdef LINUX_PROFILE
3084         /* see comments in BackendRun */
3085         getitimer(ITIMER_PROF, &prof_itimer);
3086 #endif
3087
3088 #ifdef __BEOS__
3089         /* Specific beos actions before backend startup */
3090         beos_before_backend_startup();
3091 #endif
3092
3093         /* Non EXEC_BACKEND case; fork here */
3094         if ((pid = fork()) == 0)        /* child */
3095 #endif
3096         {
3097                 char       *av[10];
3098                 int                     ac = 0;
3099                 char            nbbuf[32];
3100                 char            xlbuf[32];
3101
3102 #ifndef EXEC_BACKEND
3103 #ifdef LINUX_PROFILE
3104                 setitimer(ITIMER_PROF, &prof_itimer, NULL);
3105 #endif
3106
3107 #ifdef __BEOS__
3108                 /* Specific beos actions after backend startup */
3109                 beos_backend_startup();
3110 #endif
3111
3112                 /* Close the postmaster's sockets */
3113                 ClosePostmasterPorts(true);
3114
3115                 SSDataBaseInit(xlop);
3116 #else
3117                 if (!write_backend_variables(NULL))
3118                         return -1; /* log issued by write_backend_variables */
3119 #endif
3120
3121                 /* Set up command-line arguments for subprocess */
3122                 av[ac++] = "postgres";
3123
3124 #ifdef EXEC_BACKEND
3125                 av[ac++] = "-boot";
3126 #endif
3127                 snprintf(nbbuf, sizeof(nbbuf), "-B%d", NBuffers);
3128                 av[ac++] = nbbuf;
3129
3130                 snprintf(xlbuf, sizeof(xlbuf), "-x%d", xlop);
3131                 av[ac++] = xlbuf;
3132
3133 #ifdef EXEC_BACKEND
3134                 /* pass data dir before end of secure switches (-p) */
3135                 av[ac++] = "-D";
3136                 av[ac++] = DataDir;
3137
3138                 /* and the backend identifier + dbname */
3139                 snprintf(idbuf, sizeof(idbuf), "-p%lu,template1", tmpBackendFileNum);
3140                 av[ac++] = idbuf;
3141 #else
3142                 av[ac++] = "-p";
3143                 av[ac++] = "template1";
3144 #endif
3145
3146                 av[ac] = NULL;
3147
3148                 Assert(ac < lengthof(av));
3149
3150 #ifdef EXEC_BACKEND
3151                 /* EXEC_BACKEND case; fork/exec here */
3152 #ifdef WIN32
3153                 pid = win32_forkexec(pg_pathname,av); /* logs on error */
3154 #else
3155                 if ((pid = fork()) == 0 && (execv(pg_pathname,av) == -1))
3156                 {
3157                         /* in child */
3158                         elog(ERROR,"unable to execv in SSDataBase: %m");
3159                         exit(0);
3160                 }
3161 #endif
3162 #else
3163                 BootstrapMain(ac, av);
3164                 ExitPostmaster(0);
3165 #endif
3166         }
3167
3168         /* in parent */
3169         if (pid < 0)
3170         {
3171 #ifndef EXEC_BACKEND
3172 #ifdef __BEOS__
3173                 /* Specific beos actions before backend startup */
3174                 beos_backend_startup_failed();
3175 #endif
3176 #endif
3177                 switch (xlop)
3178                 {
3179                         case BS_XLOG_STARTUP:
3180                                 ereport(LOG,
3181                                                 (errmsg("could not fork startup process: %m")));
3182                                 break;
3183                         case BS_XLOG_CHECKPOINT:
3184                                 ereport(LOG,
3185                                           (errmsg("could not fork checkpoint process: %m")));
3186                                 break;
3187                         case BS_XLOG_BGWRITER:
3188                                 ereport(LOG,
3189                                           (errmsg("could not fork bgwriter process: %m")));
3190                                 break;
3191                         case BS_XLOG_SHUTDOWN:
3192                                 ereport(LOG,
3193                                                 (errmsg("could not fork shutdown process: %m")));
3194                                 break;
3195                         default:
3196                                 ereport(LOG,
3197                                                 (errmsg("could not fork process: %m")));
3198                                 break;
3199                 }
3200
3201                 /*
3202                  * fork failure is fatal during startup/shutdown, but there's no
3203                  * need to choke if a routine checkpoint or starting a background
3204                  * writer fails.
3205                  */
3206                 if (xlop == BS_XLOG_CHECKPOINT)
3207                         return 0;
3208                 if (xlop == BS_XLOG_BGWRITER)
3209                         return 0;
3210                 ExitPostmaster(1);
3211         }
3212
3213         /*
3214          * The startup and shutdown processes are not considered normal
3215          * backends, but the checkpoint and bgwriter processes are.
3216          * They must be added to the list of backends.
3217          */
3218         if (xlop == BS_XLOG_CHECKPOINT || xlop == BS_XLOG_BGWRITER)
3219         {
3220                 if (!(bn = (Backend *) malloc(sizeof(Backend))))
3221                 {
3222                         ereport(LOG,
3223                                         (errcode(ERRCODE_OUT_OF_MEMORY),
3224                                          errmsg("out of memory")));
3225                         ExitPostmaster(1);
3226                 }
3227
3228                 bn->pid = pid;
3229                 bn->cancel_key = PostmasterRandom();
3230 #ifdef EXEC_BACKEND
3231                 ShmemBackendArrayAdd(bn);
3232 #endif
3233                 DLAddHead(BackendList, DLNewElem(bn));
3234
3235                 /*
3236                  * Since this code is executed periodically, it's a fine place to
3237                  * do other actions that should happen every now and then on no
3238                  * particular schedule.  Such as...
3239                  */
3240                 TouchSocketFile();
3241                 TouchSocketLockFile();
3242         }
3243
3244         return pid;
3245 }
3246
3247
3248 /*
3249  * Create the opts file
3250  */
3251 static bool
3252 CreateOptsFile(int argc, char *argv[])
3253 {
3254         char            fullprogname[MAXPGPATH];
3255         char            filename[MAXPGPATH];
3256         FILE       *fp;
3257         int                     i;
3258
3259         if (FindExec(fullprogname, argv[0], "postmaster") < 0)
3260         {
3261                 elog(LOG, "could not locate postmaster");
3262                 return false;
3263         }
3264
3265         snprintf(filename, sizeof(filename), "%s/postmaster.opts", DataDir);
3266
3267         if ((fp = fopen(filename, "w")) == NULL)
3268         {
3269                 elog(LOG, "could not create file \"%s\": %m", filename);
3270                 return false;
3271         }
3272
3273         fprintf(fp, "%s", fullprogname);
3274         for (i = 1; i < argc; i++)
3275                 fprintf(fp, " '%s'", argv[i]);
3276         fputs("\n", fp);
3277
3278         if (fclose(fp))
3279         {
3280                 elog(LOG, "could not write file \"%s\": %m", filename);
3281                 return false;
3282         }
3283
3284         return true;
3285 }
3286
3287 /*
3288  * This should be used only for reporting "interactive" errors (essentially,
3289  * bogus arguments on the command line).  Once the postmaster is launched,
3290  * use ereport.  In particular, don't use this for anything that occurs
3291  * after pmdaemonize.
3292  */
3293 static void
3294 postmaster_error(const char *fmt,...)
3295 {
3296         va_list         ap;
3297
3298         fprintf(stderr, "%s: ", progname);
3299         va_start(ap, fmt);
3300         vfprintf(stderr, gettext(fmt), ap);
3301         va_end(ap);
3302         fprintf(stderr, "\n");
3303 }
3304
3305
3306 #ifdef EXEC_BACKEND
3307
3308 /*
3309  * The following need to be available to the read/write_backend_variables
3310  * functions
3311  */
3312 #include "storage/spin.h"
3313 extern XLogRecPtr RedoRecPtr;
3314 extern XLogwrtResult LogwrtResult;
3315 extern slock_t *ShmemLock;
3316 extern slock_t *ShmemIndexLock;
3317 extern void *ShmemIndexAlloc;
3318 typedef struct LWLock LWLock;
3319 extern LWLock *LWLockArray;
3320 extern slock_t  *ProcStructLock;
3321 extern int      pgStatSock;
3322
3323 #define write_var(var,fp) fwrite((void*)&(var),sizeof(var),1,fp)
3324 #define read_var(var,fp)  fread((void*)&(var),sizeof(var),1,fp)
3325 #define get_tmp_backend_file_name(buf,id)       \
3326                 do {                                                            \
3327                         Assert(DataDir);                                \
3328                         sprintf((buf),                                  \
3329                                 "%s/%s/%s.backend_var.%lu",     \
3330                                 DataDir,                                        \
3331                                 PG_TEMP_FILES_DIR,                      \
3332                                 PG_TEMP_FILE_PREFIX,            \
3333                                 (id));                                          \
3334                 } while (0)
3335
3336 static bool
3337 write_backend_variables(Port *port)
3338 {
3339         char    filename[MAXPGPATH];
3340         FILE    *fp;
3341         get_tmp_backend_file_name(filename,++tmpBackendFileNum);
3342
3343         /* Open file */
3344         fp = AllocateFile(filename, PG_BINARY_W);
3345         if (!fp)
3346         {
3347                 /* As per OpenTemporaryFile... */
3348                 char dirname[MAXPGPATH];
3349                 sprintf(dirname,"%s/%s",DataDir,PG_TEMP_FILES_DIR);
3350                 mkdir(dirname, S_IRWXU);
3351
3352                 fp = AllocateFile(filename, PG_BINARY_W);
3353                 if (!fp)
3354                 {
3355                         ereport(ERROR,
3356                                 (errcode_for_file_access(),
3357                                 errmsg("could not write to file \"%s\": %m", filename)));
3358                         return false;
3359                 }
3360         }
3361
3362         /* Write vars */
3363         if (port)
3364         {
3365                 write_var(port->sock,fp);
3366                 write_var(port->proto,fp);
3367                 write_var(port->laddr,fp);
3368                 write_var(port->raddr,fp);
3369                 write_var(port->canAcceptConnections,fp);
3370                 write_var(port->cryptSalt,fp);
3371                 write_var(port->md5Salt,fp);
3372         }
3373         write_var(MyCancelKey,fp);
3374
3375         write_var(RedoRecPtr,fp);
3376         write_var(LogwrtResult,fp);
3377
3378         write_var(UsedShmemSegID,fp);
3379         write_var(UsedShmemSegAddr,fp);
3380
3381         write_var(ShmemLock,fp);
3382         write_var(ShmemIndexLock,fp);
3383         write_var(ShmemVariableCache,fp);
3384         write_var(ShmemIndexAlloc,fp);
3385         write_var(ShmemBackendArray,fp);
3386
3387         write_var(LWLockArray,fp);
3388         write_var(ProcStructLock,fp);
3389         write_var(pgStatSock,fp);
3390
3391         write_var(PreAuthDelay,fp);
3392         write_var(debug_flag,fp);
3393         write_var(PostmasterPid,fp);
3394
3395         /* Release file */
3396         if (FreeFile(fp))
3397         {
3398                 ereport(ERROR,
3399                                 (errcode_for_file_access(),
3400                                  errmsg("could not write to file \"%s\": %m", filename)));
3401                 return false;
3402         }
3403
3404         return true;
3405 }
3406
3407 void
3408 read_backend_variables(unsigned long id, Port *port)
3409 {
3410         char    filename[MAXPGPATH];
3411         FILE    *fp;
3412         get_tmp_backend_file_name(filename,id);
3413
3414         /* Open file */
3415         fp = AllocateFile(filename, PG_BINARY_R);
3416         if (!fp)
3417         {
3418                 ereport(ERROR,
3419                         (errcode_for_file_access(),
3420                         errmsg("could not read from backend_variables file \"%s\": %m", filename)));
3421                 return;
3422         }
3423
3424         /* Read vars */
3425         if (port)
3426         {
3427                 read_var(port->sock,fp);
3428                 read_var(port->proto,fp);
3429                 read_var(port->laddr,fp);
3430                 read_var(port->raddr,fp);
3431                 read_var(port->canAcceptConnections,fp);
3432                 read_var(port->cryptSalt,fp);
3433                 read_var(port->md5Salt,fp);
3434         }
3435         read_var(MyCancelKey,fp);
3436
3437         read_var(RedoRecPtr,fp);
3438         read_var(LogwrtResult,fp);
3439
3440         read_var(UsedShmemSegID,fp);
3441         read_var(UsedShmemSegAddr,fp);
3442
3443         read_var(ShmemLock,fp);
3444         read_var(ShmemIndexLock,fp);
3445         read_var(ShmemVariableCache,fp);
3446         read_var(ShmemIndexAlloc,fp);
3447         read_var(ShmemBackendArray,fp);
3448
3449         read_var(LWLockArray,fp);
3450         read_var(ProcStructLock,fp);
3451         read_var(pgStatSock,fp);
3452
3453         read_var(PreAuthDelay,fp);
3454         read_var(debug_flag,fp);
3455         read_var(PostmasterPid,fp);
3456
3457         /* Release file */
3458         FreeFile(fp);
3459         if (unlink(filename) != 0)
3460                 ereport(WARNING,
3461                                 (errcode_for_file_access(),
3462                                  errmsg("could not remove file \"%s\": %m", filename)));
3463 }
3464
3465
3466 size_t ShmemBackendArraySize(void)
3467 {
3468         return (NUM_BACKENDARRAY_ELEMS*sizeof(Backend));
3469 }
3470
3471 void ShmemBackendArrayAllocation(void)
3472 {
3473         size_t size = ShmemBackendArraySize();
3474         ShmemBackendArray = (Backend*)ShmemAlloc(size);
3475         memset(ShmemBackendArray, 0, size);
3476 }
3477
3478 static void ShmemBackendArrayAdd(Backend *bn)
3479 {
3480         int i;
3481         for (i = 0; i < NUM_BACKENDARRAY_ELEMS; i++)
3482         {
3483                 /* Find an empty slot */
3484                 if (ShmemBackendArray[i].pid == 0)
3485                 {
3486                         ShmemBackendArray[i] = *bn;
3487                         return;
3488                 }
3489         }
3490
3491         /* FIXME: [fork/exec] some sort of error */
3492         abort();
3493 }
3494
3495 static void ShmemBackendArrayRemove(pid_t pid)
3496 {
3497         int i;
3498         for (i = 0; i < NUM_BACKENDARRAY_ELEMS; i++)
3499         {
3500                 if (ShmemBackendArray[i].pid == pid)
3501                 {
3502                         /* Mark the slot as empty */
3503                         ShmemBackendArray[i].pid = 0;
3504                         return;
3505                 }
3506         }
3507
3508         /* Something stronger than WARNING here? */
3509         ereport(WARNING,
3510                         (errmsg_internal("unable to find backend entry with pid %d",
3511                                                          pid)));
3512 }
3513
3514 #endif
3515
3516 #ifdef WIN32
3517
3518 pid_t win32_forkexec(const char* path, char *argv[])
3519 {
3520         STARTUPINFO si;
3521         PROCESS_INFORMATION pi;
3522         char *p;
3523         int i;
3524         char cmdLine[MAXPGPATH];
3525         HANDLE childHandleCopy;
3526         HANDLE waiterThread;
3527
3528         /* Format the cmd line */
3529         snprintf(cmdLine,sizeof(cmdLine),"%s",path);
3530         i = 0;
3531         while (argv[++i] != NULL)
3532         {
3533                 /* FIXME: [fork/exec] some strlen checks might be prudent here */
3534                 strcat(cmdLine," ");
3535                 strcat(cmdLine,argv[i]);
3536         }
3537
3538         /*
3539          * The following snippet can disappear when we consistently
3540          * use forward slashes.
3541          */
3542         p = cmdLine;
3543         while (*(p++) != '\0')
3544                 if (*p == '/') *p = '\\';
3545
3546         memset(&pi,0,sizeof(pi));
3547         memset(&si,0,sizeof(si));
3548         si.cb = sizeof(si);
3549         if (!CreateProcess(NULL,cmdLine,NULL,NULL,TRUE,0,NULL,NULL,&si,&pi))
3550         {
3551                 elog(ERROR,"CreateProcess call failed (%i): %m",(int)GetLastError());
3552                 return -1;
3553         }
3554
3555         if (!IsUnderPostmaster)
3556                 /* We are the Postmaster creating a child... */
3557                 win32_AddChild(pi.dwProcessId,pi.hProcess);
3558
3559         if (!DuplicateHandle(GetCurrentProcess(),
3560                                                  pi.hProcess,
3561                                                  GetCurrentProcess(),
3562                                                  &childHandleCopy,
3563                                                  0,
3564                                                  FALSE,
3565                                                  DUPLICATE_SAME_ACCESS))
3566                 ereport(FATAL,
3567                                 (errmsg_internal("failed to duplicate child handle: %i",(int)GetLastError())));
3568         waiterThread = CreateThread(NULL, 64*1024, win32_sigchld_waiter, (LPVOID)childHandleCopy, 0, NULL);
3569         if (!waiterThread)
3570                 ereport(FATAL,
3571                                 (errmsg_internal("failed to create sigchld waiter thread: %i",(int)GetLastError())));
3572         CloseHandle(waiterThread);
3573
3574         if (IsUnderPostmaster)
3575                 CloseHandle(pi.hProcess);
3576         CloseHandle(pi.hThread);
3577
3578         return pi.dwProcessId;
3579 }
3580
3581 /*
3582  * Note: The following three functions must not be interrupted (eg. by signals).
3583  *  As the Postgres Win32 signalling architecture (currently) requires polling,
3584  *  or APC checking functions which aren't used here, this is not an issue.
3585  *
3586  *  We keep two separate arrays, instead of a single array of pid/HANDLE structs,
3587  *  to avoid having to re-create a handle array for WaitForMultipleObjects on
3588  *  each call to win32_waitpid.
3589  */
3590
3591 static void win32_AddChild(pid_t pid, HANDLE handle)
3592 {
3593         Assert(win32_childPIDArray && win32_childHNDArray);
3594         if (win32_numChildren < NUM_BACKENDARRAY_ELEMS)
3595         {
3596                 win32_childPIDArray[win32_numChildren] = pid;
3597                 win32_childHNDArray[win32_numChildren] = handle;
3598                 ++win32_numChildren;
3599         }
3600         else
3601                 /* FIXME: [fork/exec] some sort of error */
3602                 abort();
3603 }
3604
3605 static void win32_RemoveChild(pid_t pid)
3606 {
3607         int i;
3608         Assert(win32_childPIDArray && win32_childHNDArray);
3609
3610         for (i = 0; i < win32_numChildren; i++)
3611         {
3612                 if (win32_childPIDArray[i] == pid)
3613                 {
3614                         CloseHandle(win32_childHNDArray[i]);
3615
3616                         /* Swap last entry into the "removed" one */
3617                         --win32_numChildren;
3618                         win32_childPIDArray[i] = win32_childPIDArray[win32_numChildren];
3619                         win32_childHNDArray[i] = win32_childHNDArray[win32_numChildren];
3620                         return;
3621                 }
3622         }
3623
3624         /* Something stronger than WARNING here? */
3625         ereport(WARNING,
3626                         (errmsg_internal("unable to find child entry with pid %lu",
3627                                                          pid)));
3628 }
3629
3630 static pid_t win32_waitpid(int *exitstatus)
3631 {
3632         Assert(win32_childPIDArray && win32_childHNDArray);
3633         elog(DEBUG3,"waiting on %lu children",win32_numChildren);
3634
3635         if (win32_numChildren > 0)
3636         {
3637                 /*
3638                  * Note: Do NOT use WaitForMultipleObjectsEx, as we don't
3639                  * want to run queued APCs here.
3640                  */
3641                 int index;
3642                 DWORD exitCode;
3643                 DWORD ret = WaitForMultipleObjects(win32_numChildren,win32_childHNDArray,FALSE,0);
3644
3645                 switch (ret)
3646                 {
3647                         case WAIT_FAILED:
3648                                 ereport(ERROR,
3649                                                 (errmsg_internal("failed to wait on %lu children: %i",
3650                                                                                  win32_numChildren,(int)GetLastError())));
3651                                 /* Fall through to WAIT_TIMEOUTs return */
3652
3653                         case WAIT_TIMEOUT:
3654                                 /* No children have finished */
3655                                 return -1;
3656
3657                         default:
3658                                 /* Get the exit code, and return the PID of, the respective process */
3659                                 index = ret-WAIT_OBJECT_0;
3660                                 Assert(index >= 0 && index < win32_numChildren);
3661                                 if (!GetExitCodeProcess(win32_childHNDArray[index],&exitCode))
3662                                         /*
3663                                          * If we get this far, this should never happen, but, then again...
3664                                          * No choice other than to assume a catastrophic failure.
3665                                          */
3666                                         ereport(FATAL,
3667                                                         (errmsg_internal("failed to get exit code for child %lu",
3668                                                                                          win32_childPIDArray[index])));
3669                                 *exitstatus = (int)exitCode;
3670                                 return win32_childPIDArray[index];
3671                 }
3672         }
3673
3674         /* No children */
3675         return -1;
3676 }
3677
3678 /* Note! Code belows executes on separate threads, one for
3679    each child process created */
3680 static DWORD WINAPI win32_sigchld_waiter(LPVOID param) {
3681         HANDLE procHandle = (HANDLE)param;
3682
3683         DWORD r = WaitForSingleObject(procHandle, INFINITE);
3684         if (r == WAIT_OBJECT_0)
3685                 pg_queue_signal(SIGCHLD);
3686         else
3687                 fprintf(stderr,"ERROR: Failed to wait on child process handle: %i\n",(int)GetLastError());
3688         CloseHandle(procHandle);
3689         return 0;
3690 }
3691
3692 #endif