]> granicus.if.org Git - postgresql/blob - src/backend/postmaster/postmaster.c
Clarify some error messages
[postgresql] / src / backend / postmaster / postmaster.c
1 /*-------------------------------------------------------------------------
2  *
3  * postmaster.c
4  *        This program acts as a clearing house for requests to the
5  *        POSTGRES system.      Frontend programs send a startup message
6  *        to the Postmaster and the postmaster uses the info in the
7  *        message to setup a backend process.
8  *
9  *        The postmaster also manages system-wide operations such as
10  *        startup and shutdown. The postmaster itself doesn't do those
11  *        operations, mind you --- it just forks off a subprocess to do them
12  *        at the right times.  It also takes care of resetting the system
13  *        if a backend crashes.
14  *
15  *        The postmaster process creates the shared memory and semaphore
16  *        pools during startup, but as a rule does not touch them itself.
17  *        In particular, it is not a member of the PGPROC array of backends
18  *        and so it cannot participate in lock-manager operations.      Keeping
19  *        the postmaster away from shared memory operations makes it simpler
20  *        and more reliable.  The postmaster is almost always able to recover
21  *        from crashes of individual backends by resetting shared memory;
22  *        if it did much with shared memory then it would be prone to crashing
23  *        along with the backends.
24  *
25  *        When a request message is received, we now fork() immediately.
26  *        The child process performs authentication of the request, and
27  *        then becomes a backend if successful.  This allows the auth code
28  *        to be written in a simple single-threaded style (as opposed to the
29  *        crufty "poor man's multitasking" code that used to be needed).
30  *        More importantly, it ensures that blockages in non-multithreaded
31  *        libraries like SSL or PAM cannot cause denial of service to other
32  *        clients.
33  *
34  *
35  * Portions Copyright (c) 1996-2004, PostgreSQL Global Development Group
36  * Portions Copyright (c) 1994, Regents of the University of California
37  *
38  *
39  * IDENTIFICATION
40  *        $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.437 2004/11/09 13:01:26 petere Exp $
41  *
42  * NOTES
43  *
44  * Initialization:
45  *              The Postmaster sets up shared memory data structures
46  *              for the backends.
47  *
48  * Synchronization:
49  *              The Postmaster shares memory with the backends but should avoid
50  *              touching shared memory, so as not to become stuck if a crashing
51  *              backend screws up locks or shared memory.  Likewise, the Postmaster
52  *              should never block on messages from frontend clients.
53  *
54  * Garbage Collection:
55  *              The Postmaster cleans up after backends if they have an emergency
56  *              exit and/or core dump.
57  *
58  * Error Reporting:
59  *              Use write_stderr() only for reporting "interactive" errors
60  *              (essentially, bogus arguments on the command line).  Once the
61  *              postmaster is launched, use ereport().  In particular, don't use
62  *              write_stderr() for anything that occurs after pmdaemonize.
63  *
64  *-------------------------------------------------------------------------
65  */
66
67 #include "postgres.h"
68
69 #include <unistd.h>
70 #include <signal.h>
71 #include <time.h>
72 #include <sys/wait.h>
73 #include <ctype.h>
74 #include <sys/stat.h>
75 #include <sys/socket.h>
76 #include <fcntl.h>
77 #include <sys/param.h>
78 #include <netinet/in.h>
79 #include <arpa/inet.h>
80 #include <netdb.h>
81 #include <limits.h>
82
83 #ifdef HAVE_SYS_SELECT_H
84 #include <sys/select.h>
85 #endif
86
87 #ifdef HAVE_GETOPT_H
88 #include <getopt.h>
89 #endif
90
91 #ifdef USE_RENDEZVOUS
92 #include <DNSServiceDiscovery/DNSServiceDiscovery.h>
93 #endif
94
95 #include "catalog/pg_database.h"
96 #include "commands/async.h"
97 #include "lib/dllist.h"
98 #include "libpq/auth.h"
99 #include "libpq/crypt.h"
100 #include "libpq/libpq.h"
101 #include "libpq/pqcomm.h"
102 #include "libpq/pqsignal.h"
103 #include "miscadmin.h"
104 #include "nodes/nodes.h"
105 #include "postmaster/postmaster.h"
106 #include "postmaster/pgarch.h"
107 #include "postmaster/syslogger.h"
108 #include "storage/fd.h"
109 #include "storage/ipc.h"
110 #include "storage/pg_shmem.h"
111 #include "storage/pmsignal.h"
112 #include "storage/proc.h"
113 #include "storage/bufmgr.h"
114 #include "access/xlog.h"
115 #include "tcop/tcopprot.h"
116 #include "utils/builtins.h"
117 #include "utils/guc.h"
118 #include "utils/memutils.h"
119 #include "utils/ps_status.h"
120 #include "bootstrap/bootstrap.h"
121 #include "pgstat.h"
122
123
124 /*
125  * List of active backends (or child processes anyway; we don't actually
126  * know whether a given child has become a backend or is still in the
127  * authorization phase).  This is used mainly to keep track of how many
128  * children we have and send them appropriate signals when necessary.
129  *
130  * "Special" children such as the startup and bgwriter tasks are not in
131  * this list.
132  */
133 typedef struct bkend
134 {
135         pid_t           pid;                    /* process id of backend */
136         long            cancel_key;             /* cancel key for cancels for this backend */
137 } Backend;
138
139 static Dllist *BackendList;
140
141 #ifdef EXEC_BACKEND
142 #define NUM_BACKENDARRAY_ELEMS (2*MaxBackends)
143 static Backend *ShmemBackendArray;
144 #endif
145
146 /* The socket number we are listening for connections on */
147 int                     PostPortNumber;
148 char       *UnixSocketDir;
149 char       *ListenAddresses;
150
151 /*
152  * ReservedBackends is the number of backends reserved for superuser use.
153  * This number is taken out of the pool size given by MaxBackends so
154  * number of backend slots available to non-superusers is
155  * (MaxBackends - ReservedBackends).  Note what this really means is
156  * "if there are <= ReservedBackends connections available, only superusers
157  * can make new connections" --- pre-existing superuser connections don't
158  * count against the limit.
159  */
160 int                     ReservedBackends;
161
162
163 static const char *progname = NULL;
164
165 /* The socket(s) we're listening to. */
166 #define MAXLISTEN       10
167 static int      ListenSocket[MAXLISTEN];
168
169 /*
170  * Set by the -o option
171  */
172 static char ExtraOptions[MAXPGPATH];
173
174 /*
175  * These globals control the behavior of the postmaster in case some
176  * backend dumps core.  Normally, it kills all peers of the dead backend
177  * and reinitializes shared memory.  By specifying -s or -n, we can have
178  * the postmaster stop (rather than kill) peers and not reinitialize
179  * shared data structures.
180  */
181 static bool Reinit = true;
182 static int      SendStop = false;
183
184 /* still more option variables */
185 bool            EnableSSL = false;
186 bool            SilentMode = false; /* silent mode (-S) */
187
188 int                     PreAuthDelay = 0;
189 int                     AuthenticationTimeout = 60;
190
191 bool            log_hostname;           /* for ps display and logging */
192 bool            Log_connections = false;
193 bool            Db_user_namespace = false;
194
195 char       *rendezvous_name;
196
197 /* list of library:init-function to be preloaded */
198 char       *preload_libraries_string = NULL;
199
200 /* PIDs of special child processes; 0 when not running */
201 static pid_t StartupPID = 0,
202                         BgWriterPID = 0,
203                         PgArchPID = 0,
204                         PgStatPID = 0,
205                         SysLoggerPID = 0;
206
207 /* Startup/shutdown state */
208 #define                 NoShutdown              0
209 #define                 SmartShutdown   1
210 #define                 FastShutdown    2
211
212 static int      Shutdown = NoShutdown;
213
214 static bool FatalError = false; /* T if recovering from backend crash */
215
216 bool            ClientAuthInProgress = false;           /* T during new-client
217                                                                                                  * authentication */
218
219 /*
220  * State for assigning random salts and cancel keys.
221  * Also, the global MyCancelKey passes the cancel key assigned to a given
222  * backend from the postmaster to that backend (via fork).
223  */
224 static unsigned int random_seed = 0;
225
226 static int      debug_flag = 0;
227
228 extern char *optarg;
229 extern int      optind,
230                         opterr;
231
232 #ifdef HAVE_INT_OPTRESET
233 extern int      optreset;
234 #endif
235
236 /*
237  * postmaster.c - function prototypes
238  */
239 static void checkDataDir(void);
240
241 #ifdef USE_RENDEZVOUS
242 static void reg_reply(DNSServiceRegistrationReplyErrorType errorCode,
243                   void *context);
244 #endif
245 static void pmdaemonize(void);
246 static Port *ConnCreate(int serverFd);
247 static void ConnFree(Port *port);
248 static void reset_shared(unsigned short port);
249 static void SIGHUP_handler(SIGNAL_ARGS);
250 static void pmdie(SIGNAL_ARGS);
251 static void reaper(SIGNAL_ARGS);
252 static void sigusr1_handler(SIGNAL_ARGS);
253 static void dummy_handler(SIGNAL_ARGS);
254 static void CleanupBackend(int pid, int exitstatus);
255 static void HandleChildCrash(int pid, int exitstatus, const char *procname);
256 static void LogChildExit(int lev, const char *procname,
257                          int pid, int exitstatus);
258 static int      BackendRun(Port *port);
259 static void ExitPostmaster(int status);
260 static void usage(const char *);
261 static int      ServerLoop(void);
262 static int      BackendStartup(Port *port);
263 static int      ProcessStartupPacket(Port *port, bool SSLdone);
264 static void processCancelRequest(Port *port, void *pkt);
265 static int      initMasks(fd_set *rmask);
266 static void report_fork_failure_to_client(Port *port, int errnum);
267 static enum CAC_state canAcceptConnections(void);
268 static long PostmasterRandom(void);
269 static void RandomSalt(char *cryptSalt, char *md5Salt);
270 static void SignalChildren(int signal);
271 static int      CountChildren(void);
272 static bool CreateOptsFile(int argc, char *argv[], char *fullprogname);
273 static pid_t StartChildProcess(int xlop);
274
275 #ifdef EXEC_BACKEND
276
277 #ifdef WIN32
278 static pid_t win32_forkexec(const char *path, char *argv[]);
279 static void win32_AddChild(pid_t pid, HANDLE handle);
280 static void win32_RemoveChild(pid_t pid);
281 static pid_t win32_waitpid(int *exitstatus);
282 static DWORD WINAPI win32_sigchld_waiter(LPVOID param);
283
284 static pid_t *win32_childPIDArray;
285 static HANDLE *win32_childHNDArray;
286 static unsigned long win32_numChildren = 0;
287
288 HANDLE          PostmasterHandle;
289 #endif
290
291 static pid_t backend_forkexec(Port *port);
292 static pid_t internal_forkexec(int argc, char *argv[], Port *port);
293
294 static void read_backend_variables(char *filename, Port *port);
295 static bool write_backend_variables(char *filename, Port *port);
296
297 static void ShmemBackendArrayAdd(Backend *bn);
298 static void ShmemBackendArrayRemove(pid_t pid);
299 #endif   /* EXEC_BACKEND */
300
301 #define StartupDataBase()               StartChildProcess(BS_XLOG_STARTUP)
302 #define StartBackgroundWriter() StartChildProcess(BS_XLOG_BGWRITER)
303
304
305 /*
306  * Postmaster main entry point
307  */
308 int
309 PostmasterMain(int argc, char *argv[])
310 {
311         int                     opt;
312         int                     status;
313         char       *userDoption = NULL;
314         int                     i;
315
316         /* This will call exit() if strdup() fails. */
317         progname = get_progname(argv[0]);       
318
319         MyProcPid = PostmasterPid = getpid();
320
321         IsPostmasterEnvironment = true;
322
323         /*
324          * Catch standard options before doing much else.  This even works on
325          * systems without getopt_long.
326          */
327         if (argc > 1)
328         {
329                 if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
330                 {
331                         usage(progname);
332                         ExitPostmaster(0);
333                 }
334                 if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
335                 {
336                         puts("postmaster (PostgreSQL) " PG_VERSION);
337                         ExitPostmaster(0);
338                 }
339         }
340
341         /*
342          * for security, no dir or file created can be group or other
343          * accessible
344          */
345         umask((mode_t) 0077);
346
347         /*
348          * Fire up essential subsystems: memory management
349          */
350         MemoryContextInit();
351
352         /*
353          * By default, palloc() requests in the postmaster will be allocated
354          * in the PostmasterContext, which is space that can be recycled by
355          * backends.  Allocated data that needs to be available to backends
356          * should be allocated in TopMemoryContext.
357          */
358         PostmasterContext = AllocSetContextCreate(TopMemoryContext,
359                                                                                           "Postmaster",
360                                                                                           ALLOCSET_DEFAULT_MINSIZE,
361                                                                                           ALLOCSET_DEFAULT_INITSIZE,
362                                                                                           ALLOCSET_DEFAULT_MAXSIZE);
363         MemoryContextSwitchTo(PostmasterContext);
364
365         IgnoreSystemIndexes(false);
366
367         if (find_my_exec(argv[0], my_exec_path) < 0)
368                 elog(FATAL, "%s: could not locate my own executable path",
369                          argv[0]);
370
371         get_pkglib_path(my_exec_path, pkglib_path);
372
373         /*
374          * Options setup
375          */
376         InitializeGUCOptions();
377
378         opterr = 1;
379
380         while ((opt = getopt(argc, argv, "A:a:B:b:c:D:d:Fh:ik:lm:MN:no:p:Ss-:")) != -1)
381         {
382                 switch (opt)
383                 {
384                         case 'A':
385 #ifdef USE_ASSERT_CHECKING
386                                 SetConfigOption("debug_assertions", optarg, PGC_POSTMASTER, PGC_S_ARGV);
387 #else
388                                 write_stderr("%s: assert checking is not compiled in\n", progname);
389 #endif
390                                 break;
391                         case 'a':
392                                 /* Can no longer set authentication method. */
393                                 break;
394                         case 'B':
395                                 SetConfigOption("shared_buffers", optarg, PGC_POSTMASTER, PGC_S_ARGV);
396                                 break;
397                         case 'b':
398                                 /* Can no longer set the backend executable file to use. */
399                                 break;
400                         case 'D':
401                                 userDoption = optarg;
402                                 break;
403                         case 'd':
404                                 {
405                                         /* Turn on debugging for the postmaster. */
406                                         char       *debugstr = palloc(strlen("debug") + strlen(optarg) + 1);
407
408                                         sprintf(debugstr, "debug%s", optarg);
409                                         SetConfigOption("log_min_messages", debugstr,
410                                                                         PGC_POSTMASTER, PGC_S_ARGV);
411                                         pfree(debugstr);
412                                         debug_flag = atoi(optarg);
413                                         break;
414                                 }
415                         case 'F':
416                                 SetConfigOption("fsync", "false", PGC_POSTMASTER, PGC_S_ARGV);
417                                 break;
418                         case 'h':
419                                 SetConfigOption("listen_addresses", optarg, PGC_POSTMASTER, PGC_S_ARGV);
420                                 break;
421                         case 'i':
422                                 SetConfigOption("listen_addresses", "*", PGC_POSTMASTER, PGC_S_ARGV);
423                                 break;
424                         case 'k':
425                                 SetConfigOption("unix_socket_directory", optarg, PGC_POSTMASTER, PGC_S_ARGV);
426                                 break;
427 #ifdef USE_SSL
428                         case 'l':
429                                 SetConfigOption("ssl", "true", PGC_POSTMASTER, PGC_S_ARGV);
430                                 break;
431 #endif
432                         case 'm':
433                                 /* Multiplexed backends no longer supported. */
434                                 break;
435                         case 'M':
436
437                                 /*
438                                  * ignore this flag.  This may be passed in because the
439                                  * program was run as 'postgres -M' instead of
440                                  * 'postmaster'
441                                  */
442                                 break;
443                         case 'N':
444                                 /* The max number of backends to start. */
445                                 SetConfigOption("max_connections", optarg, PGC_POSTMASTER, PGC_S_ARGV);
446                                 break;
447                         case 'n':
448                                 /* Don't reinit shared mem after abnormal exit */
449                                 Reinit = false;
450                                 break;
451                         case 'o':
452
453                                 /*
454                                  * Other options to pass to the backend on the command
455                                  * line
456                                  */
457                                 snprintf(ExtraOptions + strlen(ExtraOptions),
458                                                  sizeof(ExtraOptions) - strlen(ExtraOptions),
459                                                  " %s", optarg);
460                                 break;
461                         case 'p':
462                                 SetConfigOption("port", optarg, PGC_POSTMASTER, PGC_S_ARGV);
463                                 break;
464                         case 'S':
465
466                                 /*
467                                  * Start in 'S'ilent mode (disassociate from controlling
468                                  * tty). You may also think of this as 'S'ysV mode since
469                                  * it's most badly needed on SysV-derived systems like
470                                  * SVR4 and HP-UX.
471                                  */
472                                 SetConfigOption("silent_mode", "true", PGC_POSTMASTER, PGC_S_ARGV);
473                                 break;
474                         case 's':
475
476                                 /*
477                                  * In the event that some backend dumps core, send
478                                  * SIGSTOP, rather than SIGQUIT, to all its peers.      This
479                                  * lets the wily post_hacker collect core dumps from
480                                  * everyone.
481                                  */
482                                 SendStop = true;
483                                 break;
484                         case 'c':
485                         case '-':
486                                 {
487                                         char       *name,
488                                                            *value;
489
490                                         ParseLongOption(optarg, &name, &value);
491                                         if (!value)
492                                         {
493                                                 if (opt == '-')
494                                                         ereport(ERROR,
495                                                                         (errcode(ERRCODE_SYNTAX_ERROR),
496                                                                          errmsg("--%s requires a value",
497                                                                                         optarg)));
498                                                 else
499                                                         ereport(ERROR,
500                                                                         (errcode(ERRCODE_SYNTAX_ERROR),
501                                                                          errmsg("-c %s requires a value",
502                                                                                         optarg)));
503                                         }
504
505                                         SetConfigOption(name, value, PGC_POSTMASTER, PGC_S_ARGV);
506                                         free(name);
507                                         if (value)
508                                                 free(value);
509                                         break;
510                                 }
511
512                         default:
513                                 write_stderr("Try \"%s --help\" for more information.\n",
514                                                          progname);
515                                 ExitPostmaster(1);
516                 }
517         }
518
519         /*
520          * Postmaster accepts no non-option switch arguments.
521          */
522         if (optind < argc)
523         {
524                 write_stderr("%s: invalid argument: \"%s\"\n",
525                                          progname, argv[optind]);
526                 write_stderr("Try \"%s --help\" for more information.\n",
527                                          progname);
528                 ExitPostmaster(1);
529         }
530
531         /*
532          * Locate the proper configuration files and data directory, and
533          * read postgresql.conf for the first time.
534          */
535         if (!SelectConfigFiles(userDoption, progname))
536                 ExitPostmaster(2);
537
538         /* Verify that DataDir looks reasonable */
539         checkDataDir();
540
541         /*
542          * Check for invalid combinations of GUC settings.
543          */
544         if (NBuffers < 2 * MaxBackends || NBuffers < 16)
545         {
546                 /*
547                  * Do not accept -B so small that backends are likely to starve
548                  * for lack of buffers.  The specific choices here are somewhat
549                  * arbitrary.
550                  */
551                 write_stderr("%s: the number of buffers (-B) must be at least twice the number of allowed connections (-N) and at least 16\n", progname);
552                 ExitPostmaster(1);
553         }
554
555         if (ReservedBackends >= MaxBackends)
556         {
557                 write_stderr("%s: superuser_reserved_connections must be less than max_connections\n", progname);
558                 ExitPostmaster(1);
559         }
560
561         /*
562          * Other one-time internal sanity checks can go here.
563          */
564         if (!CheckDateTokenTables())
565         {
566                 write_stderr("%s: invalid datetoken tables, please fix\n", progname);
567                 ExitPostmaster(1);
568         }
569
570         /*
571          * Now that we are done processing the postmaster arguments, reset
572          * getopt(3) library so that it will work correctly in subprocesses.
573          */
574         optind = 1;
575 #ifdef HAVE_INT_OPTRESET
576         optreset = 1;                           /* some systems need this too */
577 #endif
578
579         /* For debugging: display postmaster environment */
580         {
581                 extern char **environ;
582                 char      **p;
583
584                 ereport(DEBUG3,
585                         (errmsg_internal("%s: PostmasterMain: initial environ dump:",
586                                                          progname)));
587                 ereport(DEBUG3,
588                  (errmsg_internal("-----------------------------------------")));
589                 for (p = environ; *p; ++p)
590                         ereport(DEBUG3,
591                                         (errmsg_internal("\t%s", *p)));
592                 ereport(DEBUG3,
593                  (errmsg_internal("-----------------------------------------")));
594         }
595
596 #ifdef EXEC_BACKEND
597         if (find_other_exec(argv[0], "postgres", PG_VERSIONSTR,
598                                                 postgres_exec_path) < 0)
599                 ereport(FATAL,
600                          (errmsg("%s: could not locate matching postgres executable",
601                                          progname)));
602 #endif
603
604         /*
605          * Initialize SSL library, if specified.
606          */
607 #ifdef USE_SSL
608         if (EnableSSL)
609                 secure_initialize();
610 #endif
611
612         /*
613          * process any libraries that should be preloaded and optionally
614          * pre-initialized
615          */
616         if (preload_libraries_string)
617                 process_preload_libraries(preload_libraries_string);
618
619         /*
620          * Fork away from controlling terminal, if -S specified.
621          *
622          * Must do this before we grab any interlock files, else the interlocks
623          * will show the wrong PID.
624          */
625         if (SilentMode)
626                 pmdaemonize();
627
628         /*
629          * Create lockfile for data directory.
630          *
631          * We want to do this before we try to grab the input sockets, because
632          * the data directory interlock is more reliable than the socket-file
633          * interlock (thanks to whoever decided to put socket files in /tmp
634          * :-(). For the same reason, it's best to grab the TCP socket(s)
635          * before the Unix socket.
636          */
637         CreateDataDirLockFile(DataDir, true);
638
639         /*
640          * Remove old temporary files.  At this point there can be no other
641          * Postgres processes running in this directory, so this should be
642          * safe.
643          */
644         RemovePgTempFiles();
645
646         /*
647          * Establish input sockets.
648          */
649         for (i = 0; i < MAXLISTEN; i++)
650                 ListenSocket[i] = -1;
651
652         if (ListenAddresses)
653         {
654                 char       *rawstring;
655                 List       *elemlist;
656                 ListCell   *l;
657
658                 /* Need a modifiable copy of ListenAddresses */
659                 rawstring = pstrdup(ListenAddresses);
660
661                 /* Parse string into list of identifiers */
662                 if (!SplitIdentifierString(rawstring, ',', &elemlist))
663                 {
664                         /* syntax error in list */
665                         ereport(FATAL,
666                                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
667                                 errmsg("invalid list syntax for \"listen_addresses\"")));
668                 }
669
670                 foreach(l, elemlist)
671                 {
672                         char       *curhost = (char *) lfirst(l);
673
674                         if (strcmp(curhost, "*") == 0)
675                                 status = StreamServerPort(AF_UNSPEC, NULL,
676                                                                                   (unsigned short) PostPortNumber,
677                                                                                   UnixSocketDir,
678                                                                                   ListenSocket, MAXLISTEN);
679                         else
680                                 status = StreamServerPort(AF_UNSPEC, curhost,
681                                                                                   (unsigned short) PostPortNumber,
682                                                                                   UnixSocketDir,
683                                                                                   ListenSocket, MAXLISTEN);
684                         if (status != STATUS_OK)
685                                 ereport(WARNING,
686                                          (errmsg("could not create listen socket for \"%s\"",
687                                                          curhost)));
688                 }
689
690                 list_free(elemlist);
691                 pfree(rawstring);
692         }
693
694 #ifdef USE_RENDEZVOUS
695         /* Register for Rendezvous only if we opened TCP socket(s) */
696         if (ListenSocket[0] != -1 && rendezvous_name != NULL)
697         {
698                 DNSServiceRegistrationCreate(rendezvous_name,
699                                                                          "_postgresql._tcp.",
700                                                                          "",
701                                                                          htonl(PostPortNumber),
702                                                                          "",
703                                                                  (DNSServiceRegistrationReply) reg_reply,
704                                                                          NULL);
705         }
706 #endif
707
708 #ifdef HAVE_UNIX_SOCKETS
709         status = StreamServerPort(AF_UNIX, NULL,
710                                                           (unsigned short) PostPortNumber,
711                                                           UnixSocketDir,
712                                                           ListenSocket, MAXLISTEN);
713         if (status != STATUS_OK)
714                 ereport(WARNING,
715                                 (errmsg("could not create Unix-domain socket")));
716 #endif
717
718         /*
719          * check that we have some socket to listen on
720          */
721         if (ListenSocket[0] == -1)
722                 ereport(FATAL,
723                                 (errmsg("no socket created for listening")));
724
725         XLOGPathInit();
726
727         /*
728          * Set up shared memory and semaphores.
729          */
730         reset_shared(PostPortNumber);
731
732         /*
733          * Estimate number of openable files.  This must happen after setting
734          * up semaphores, because on some platforms semaphores count as open
735          * files.
736          */
737         set_max_safe_fds();
738
739         /*
740          * Initialize the list of active backends.
741          */
742         BackendList = DLNewList();
743
744 #ifdef WIN32
745
746         /*
747          * Initialize the child pid/HANDLE arrays for signal handling.
748          */
749         win32_childPIDArray = (pid_t *)
750                 malloc(NUM_BACKENDARRAY_ELEMS * sizeof(pid_t));
751         win32_childHNDArray = (HANDLE *)
752                 malloc(NUM_BACKENDARRAY_ELEMS * sizeof(HANDLE));
753         if (!win32_childPIDArray || !win32_childHNDArray)
754                 ereport(FATAL,
755                                 (errcode(ERRCODE_OUT_OF_MEMORY),
756                                  errmsg("out of memory")));
757
758         /*
759          * Set up a handle that child processes can use to check whether the
760          * postmaster is still running.
761          */
762         if (DuplicateHandle(GetCurrentProcess(),
763                                                 GetCurrentProcess(),
764                                                 GetCurrentProcess(),
765                                                 &PostmasterHandle,
766                                                 0,
767                                                 TRUE,
768                                                 DUPLICATE_SAME_ACCESS) == 0)
769                 ereport(FATAL,
770                         (errmsg_internal("could not duplicate postmaster handle: %d",
771                                                          (int) GetLastError())));
772 #endif
773
774         /*
775          * Record postmaster options.  We delay this till now to avoid
776          * recording bogus options (eg, NBuffers too high for available
777          * memory).
778          */
779         if (!CreateOptsFile(argc, argv, my_exec_path))
780                 ExitPostmaster(1);
781
782 #ifdef EXEC_BACKEND
783         write_nondefault_variables(PGC_POSTMASTER);
784 #endif
785
786         /*
787          * Write the external PID file if requested
788          */
789         if (external_pid_file)
790         {
791                 FILE       *fpidfile = fopen(external_pid_file, "w");
792
793                 if (fpidfile)
794                 {
795                         fprintf(fpidfile, "%d\n", MyProcPid);
796                         fclose(fpidfile);
797                         /* Should we remove the pid file on postmaster exit? */
798                 }
799                 else
800                         write_stderr("%s: could not write external PID file \"%s\": %s\n",
801                                                  progname, external_pid_file, strerror(errno));
802         }
803
804         /*
805          * Set up signal handlers for the postmaster process.
806          *
807          * CAUTION: when changing this list, check for side-effects on the signal
808          * handling setup of child processes.  See tcop/postgres.c,
809          * bootstrap/bootstrap.c, postmaster/bgwriter.c, postmaster/pgarch.c,
810          * postmaster/pgstat.c, and postmaster/syslogger.c.
811          */
812         pqinitmask();
813         PG_SETMASK(&BlockSig);
814
815         pqsignal(SIGHUP, SIGHUP_handler);       /* reread config file and have
816                                                                                  * children do same */
817         pqsignal(SIGINT, pmdie);        /* send SIGTERM and shut down */
818         pqsignal(SIGQUIT, pmdie);       /* send SIGQUIT and die */
819         pqsignal(SIGTERM, pmdie);       /* wait for children and shut down */
820         pqsignal(SIGALRM, SIG_IGN); /* ignored */
821         pqsignal(SIGPIPE, SIG_IGN); /* ignored */
822         pqsignal(SIGUSR1, sigusr1_handler); /* message from child process */
823         pqsignal(SIGUSR2, dummy_handler);       /* unused, reserve for children */
824         pqsignal(SIGCHLD, reaper);      /* handle child termination */
825         pqsignal(SIGTTIN, SIG_IGN); /* ignored */
826         pqsignal(SIGTTOU, SIG_IGN); /* ignored */
827         /* ignore SIGXFSZ, so that ulimit violations work like disk full */
828 #ifdef SIGXFSZ
829         pqsignal(SIGXFSZ, SIG_IGN); /* ignored */
830 #endif
831
832         /*
833          * If enabled, start up syslogger collection subprocess
834          */
835         SysLoggerPID = SysLogger_Start();
836
837         /*
838          * Reset whereToSendOutput from Debug (its starting state) to None.
839          * This stops ereport from sending log messages to stderr unless
840          * Log_destination permits.  We don't do this until the postmaster is
841          * fully launched, since startup failures may as well be reported to
842          * stderr.
843          */
844         whereToSendOutput = None;
845
846         /*
847          * Initialize the statistics collector stuff
848          */
849         pgstat_init();
850
851         /*
852          * Load cached files for client authentication.
853          */
854         load_hba();
855         load_ident();
856         load_user();
857         load_group();
858
859         /*
860          * We're ready to rock and roll...
861          */
862         StartupPID = StartupDataBase();
863
864         status = ServerLoop();
865
866         /*
867          * ServerLoop probably shouldn't ever return, but if it does, close
868          * down.
869          */
870         ExitPostmaster(status != STATUS_OK);
871
872         return 0;                                       /* not reached */
873 }
874
875
876 /*
877  * Validate the proposed data directory
878  */
879 static void
880 checkDataDir(void)
881 {
882         char            path[MAXPGPATH];
883         FILE       *fp;
884         struct stat stat_buf;
885
886         Assert(DataDir);
887
888         if (stat(DataDir, &stat_buf) != 0)
889         {
890                 if (errno == ENOENT)
891                         ereport(FATAL,
892                                         (errcode_for_file_access(),
893                                          errmsg("data directory \"%s\" does not exist",
894                                                         DataDir)));
895                 else
896                         ereport(FATAL,
897                                         (errcode_for_file_access(),
898                          errmsg("could not read permissions of directory \"%s\": %m",
899                                         DataDir)));
900         }
901
902         /*
903          * Check if the directory has group or world access.  If so, reject.
904          *
905          * XXX temporarily suppress check when on Windows, because there may not
906          * be proper support for Unix-y file permissions.  Need to think of a
907          * reasonable check to apply on Windows.
908          */
909 #if !defined(WIN32) && !defined(__CYGWIN__)
910         if (stat_buf.st_mode & (S_IRWXG | S_IRWXO))
911                 ereport(FATAL,
912                                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
913                                  errmsg("data directory \"%s\" has group or world access",
914                                                 DataDir),
915                                  errdetail("Permissions should be u=rwx (0700).")));
916 #endif
917
918         /* Look for PG_VERSION before looking for pg_control */
919         ValidatePgVersion(DataDir);
920
921         snprintf(path, sizeof(path), "%s/global/pg_control", DataDir);
922
923         fp = AllocateFile(path, PG_BINARY_R);
924         if (fp == NULL)
925         {
926                 write_stderr("%s: could not find the database system\n"
927                                          "Expected to find it in the directory \"%s\",\n"
928                                          "but could not open file \"%s\": %s\n",
929                                          progname, DataDir, path, strerror(errno));
930                 ExitPostmaster(2);
931         }
932         FreeFile(fp);
933 }
934
935
936 #ifdef USE_RENDEZVOUS
937
938 /*
939  * empty callback function for DNSServiceRegistrationCreate()
940  */
941 static void
942 reg_reply(DNSServiceRegistrationReplyErrorType errorCode, void *context)
943 {
944
945 }
946 #endif   /* USE_RENDEZVOUS */
947
948
949 /*
950  * Fork away from the controlling terminal (-S option)
951  */
952 static void
953 pmdaemonize(void)
954 {
955 #ifndef WIN32
956         int                     i;
957         pid_t           pid;
958
959 #ifdef LINUX_PROFILE
960         struct itimerval prof_itimer;
961 #endif
962
963 #ifdef LINUX_PROFILE
964         /* see comments in BackendStartup */
965         getitimer(ITIMER_PROF, &prof_itimer);
966 #endif
967
968         pid = fork();
969         if (pid == (pid_t) -1)
970         {
971                 write_stderr("%s: could not fork background process: %s\n",
972                                          progname, strerror(errno));
973                 ExitPostmaster(1);
974         }
975         else if (pid)
976         {                                                       /* parent */
977                 /* Parent should just exit, without doing any atexit cleanup */
978                 _exit(0);
979         }
980
981 #ifdef LINUX_PROFILE
982         setitimer(ITIMER_PROF, &prof_itimer, NULL);
983 #endif
984
985         MyProcPid = PostmasterPid = getpid();           /* reset PID vars to child */
986
987 /* GH: If there's no setsid(), we hopefully don't need silent mode.
988  * Until there's a better solution.
989  */
990 #ifdef HAVE_SETSID
991         if (setsid() < 0)
992         {
993                 write_stderr("%s: could not dissociate from controlling TTY: %s\n",
994                                          progname, strerror(errno));
995                 ExitPostmaster(1);
996         }
997 #endif
998         i = open(NULL_DEV, O_RDWR);
999         dup2(i, 0);
1000         dup2(i, 1);
1001         dup2(i, 2);
1002         close(i);
1003 #else                                                   /* WIN32 */
1004         /* not supported */
1005         elog(FATAL, "SilentMode not supported under WIN32");
1006 #endif   /* WIN32 */
1007 }
1008
1009
1010 /*
1011  * Print out help message
1012  */
1013 static void
1014 usage(const char *progname)
1015 {
1016         printf(gettext("%s is the PostgreSQL server.\n\n"), progname);
1017         printf(gettext("Usage:\n  %s [OPTION]...\n\n"), progname);
1018         printf(gettext("Options:\n"));
1019 #ifdef USE_ASSERT_CHECKING
1020         printf(gettext("  -A 1|0          enable/disable run-time assert checking\n"));
1021 #endif
1022         printf(gettext("  -B NBUFFERS     number of shared buffers\n"));
1023         printf(gettext("  -c NAME=VALUE   set run-time parameter\n"));
1024         printf(gettext("  -d 1-5          debugging level\n"));
1025         printf(gettext("  -D DATADIR      database directory\n"));
1026         printf(gettext("  -F              turn fsync off\n"));
1027         printf(gettext("  -h HOSTNAME     host name or IP address to listen on\n"));
1028         printf(gettext("  -i              enable TCP/IP connections\n"));
1029         printf(gettext("  -k DIRECTORY    Unix-domain socket location\n"));
1030 #ifdef USE_SSL
1031         printf(gettext("  -l              enable SSL connections\n"));
1032 #endif
1033         printf(gettext("  -N MAX-CONNECT  maximum number of allowed connections\n"));
1034         printf(gettext("  -o OPTIONS      pass \"OPTIONS\" to each server process\n"));
1035         printf(gettext("  -p PORT         port number to listen on\n"));
1036         printf(gettext("  -S              silent mode (start in background without logging output)\n"));
1037         printf(gettext("  --help          show this help, then exit\n"));
1038         printf(gettext("  --version       output version information, then exit\n"));
1039
1040         printf(gettext("\nDeveloper options:\n"));
1041         printf(gettext("  -n              do not reinitialize shared memory after abnormal exit\n"));
1042         printf(gettext("  -s              send SIGSTOP to all backend servers if one dies\n"));
1043
1044         printf(gettext("\nPlease read the documentation for the complete list of run-time\n"
1045                                    "configuration settings and how to set them on the command line or in\n"
1046                                    "the configuration file.\n\n"
1047                                    "Report bugs to <pgsql-bugs@postgresql.org>.\n"));
1048 }
1049
1050
1051 /*
1052  * Main idle loop of postmaster
1053  */
1054 static int
1055 ServerLoop(void)
1056 {
1057         fd_set          readmask;
1058         int                     nSockets;
1059         time_t          now,
1060                                 last_touch_time;
1061         struct timeval earlier,
1062                                 later;
1063         struct timezone tz;
1064
1065         gettimeofday(&earlier, &tz);
1066         last_touch_time = time(NULL);
1067
1068         nSockets = initMasks(&readmask);
1069
1070         for (;;)
1071         {
1072                 Port       *port;
1073                 fd_set          rmask;
1074                 struct timeval timeout;
1075                 int                     selres;
1076                 int                     i;
1077
1078                 /*
1079                  * Wait for something to happen.
1080                  *
1081                  * We wait at most one minute, to ensure that the other background
1082                  * tasks handled below get done even when no requests are
1083                  * arriving.
1084                  */
1085                 memcpy((char *) &rmask, (char *) &readmask, sizeof(fd_set));
1086
1087                 timeout.tv_sec = 60;
1088                 timeout.tv_usec = 0;
1089
1090                 PG_SETMASK(&UnBlockSig);
1091
1092                 selres = select(nSockets, &rmask, NULL, NULL, &timeout);
1093
1094                 /*
1095                  * Block all signals until we wait again.  (This makes it safe for
1096                  * our signal handlers to do nontrivial work.)
1097                  */
1098                 PG_SETMASK(&BlockSig);
1099
1100                 if (selres < 0)
1101                 {
1102                         if (errno != EINTR && errno != EWOULDBLOCK)
1103                         {
1104                                 ereport(LOG,
1105                                                 (errcode_for_socket_access(),
1106                                                  errmsg("select() failed in postmaster: %m")));
1107                                 return STATUS_ERROR;
1108                         }
1109                 }
1110
1111                 /*
1112                  * New connection pending on any of our sockets? If so, fork a
1113                  * child process to deal with it.
1114                  */
1115                 if (selres > 0)
1116                 {
1117                         /*
1118                          * Select a random seed at the time of first receiving a
1119                          * request.
1120                          */
1121                         while (random_seed == 0)
1122                         {
1123                                 gettimeofday(&later, &tz);
1124
1125                                 /*
1126                                  * We are not sure how much precision is in tv_usec, so we
1127                                  * swap the high and low 16 bits of 'later' and XOR them with
1128                                  * 'earlier'. On the off chance that the result is 0, we
1129                                  * loop until it isn't.
1130                                  */
1131                                 random_seed = earlier.tv_usec ^
1132                                         ((later.tv_usec << 16) |
1133                                          ((later.tv_usec >> 16) & 0xffff));
1134                         }
1135
1136                         for (i = 0; i < MAXLISTEN; i++)
1137                         {
1138                                 if (ListenSocket[i] == -1)
1139                                         break;
1140                                 if (FD_ISSET(ListenSocket[i], &rmask))
1141                                 {
1142                                         port = ConnCreate(ListenSocket[i]);
1143                                         if (port)
1144                                         {
1145                                                 BackendStartup(port);
1146
1147                                                 /*
1148                                                  * We no longer need the open socket or port
1149                                                  * structure in this process
1150                                                  */
1151                                                 StreamClose(port->sock);
1152                                                 ConnFree(port);
1153                                         }
1154                                 }
1155                         }
1156                 }
1157
1158                 /* If we have lost the system logger, try to start a new one */
1159                 if (SysLoggerPID == 0 && Redirect_stderr)
1160                         SysLoggerPID = SysLogger_Start();
1161
1162                 /*
1163                  * If no background writer process is running, and we are not in a
1164                  * state that prevents it, start one.  It doesn't matter if this
1165                  * fails, we'll just try again later.
1166                  */
1167                 if (BgWriterPID == 0 && StartupPID == 0 && !FatalError)
1168                 {
1169                         BgWriterPID = StartBackgroundWriter();
1170                         /* If shutdown is pending, set it going */
1171                         if (Shutdown > NoShutdown && BgWriterPID != 0)
1172                                 kill(BgWriterPID, SIGUSR2);
1173                 }
1174
1175                 /* If we have lost the archiver, try to start a new one */
1176                 if (XLogArchivingActive() && PgArchPID == 0 &&
1177                         StartupPID == 0 && !FatalError && Shutdown == NoShutdown)
1178                         PgArchPID = pgarch_start();
1179
1180                 /* If we have lost the stats collector, try to start a new one */
1181                 if (PgStatPID == 0 &&
1182                         StartupPID == 0 && !FatalError && Shutdown == NoShutdown)
1183                         PgStatPID = pgstat_start();
1184
1185                 /*
1186                  * Touch the socket and lock file at least every ten minutes, to
1187                  * ensure that they are not removed by overzealous /tmp-cleaning
1188                  * tasks.
1189                  */
1190                 now = time(NULL);
1191                 if (now - last_touch_time >= 10 * 60)
1192                 {
1193                         TouchSocketFile();
1194                         TouchSocketLockFile();
1195                         last_touch_time = now;
1196                 }
1197         }
1198 }
1199
1200
1201 /*
1202  * Initialise the masks for select() for the ports we are listening on.
1203  * Return the number of sockets to listen on.
1204  */
1205 static int
1206 initMasks(fd_set *rmask)
1207 {
1208         int                     nsocks = -1;
1209         int                     i;
1210
1211         FD_ZERO(rmask);
1212
1213         for (i = 0; i < MAXLISTEN; i++)
1214         {
1215                 int                     fd = ListenSocket[i];
1216
1217                 if (fd == -1)
1218                         break;
1219                 FD_SET(fd, rmask);
1220                 if (fd > nsocks)
1221                         nsocks = fd;
1222         }
1223
1224         return nsocks + 1;
1225 }
1226
1227
1228 /*
1229  * Read the startup packet and do something according to it.
1230  *
1231  * Returns STATUS_OK or STATUS_ERROR, or might call ereport(FATAL) and
1232  * not return at all.
1233  *
1234  * (Note that ereport(FATAL) stuff is sent to the client, so only use it
1235  * if that's what you want.  Return STATUS_ERROR if you don't want to
1236  * send anything to the client, which would typically be appropriate
1237  * if we detect a communications failure.)
1238  */
1239 static int
1240 ProcessStartupPacket(Port *port, bool SSLdone)
1241 {
1242         int32           len;
1243         void       *buf;
1244         ProtocolVersion proto;
1245         MemoryContext oldcontext;
1246
1247         if (pq_getbytes((char *) &len, 4) == EOF)
1248         {
1249                 /*
1250                  * EOF after SSLdone probably means the client didn't like our
1251                  * response to NEGOTIATE_SSL_CODE.      That's not an error condition,
1252                  * so don't clutter the log with a complaint.
1253                  */
1254                 if (!SSLdone)
1255                         ereport(COMMERROR,
1256                                         (errcode(ERRCODE_PROTOCOL_VIOLATION),
1257                                          errmsg("incomplete startup packet")));
1258                 return STATUS_ERROR;
1259         }
1260
1261         len = ntohl(len);
1262         len -= 4;
1263
1264         if (len < (int32) sizeof(ProtocolVersion) ||
1265                 len > MAX_STARTUP_PACKET_LENGTH)
1266         {
1267                 ereport(COMMERROR,
1268                                 (errcode(ERRCODE_PROTOCOL_VIOLATION),
1269                                  errmsg("invalid length of startup packet")));
1270                 return STATUS_ERROR;
1271         }
1272
1273         /*
1274          * Allocate at least the size of an old-style startup packet, plus one
1275          * extra byte, and make sure all are zeroes.  This ensures we will
1276          * have null termination of all strings, in both fixed- and
1277          * variable-length packet layouts.
1278          */
1279         if (len <= (int32) sizeof(StartupPacket))
1280                 buf = palloc0(sizeof(StartupPacket) + 1);
1281         else
1282                 buf = palloc0(len + 1);
1283
1284         if (pq_getbytes(buf, len) == EOF)
1285         {
1286                 ereport(COMMERROR,
1287                                 (errcode(ERRCODE_PROTOCOL_VIOLATION),
1288                                  errmsg("incomplete startup packet")));
1289                 return STATUS_ERROR;
1290         }
1291
1292         /*
1293          * The first field is either a protocol version number or a special
1294          * request code.
1295          */
1296         port->proto = proto = ntohl(*((ProtocolVersion *) buf));
1297
1298         if (proto == CANCEL_REQUEST_CODE)
1299         {
1300                 processCancelRequest(port, buf);
1301                 return 127;                             /* XXX */
1302         }
1303
1304         if (proto == NEGOTIATE_SSL_CODE && !SSLdone)
1305         {
1306                 char            SSLok;
1307
1308 #ifdef USE_SSL
1309                 /* No SSL when disabled or on Unix sockets */
1310                 if (!EnableSSL || IS_AF_UNIX(port->laddr.addr.ss_family))
1311                         SSLok = 'N';
1312                 else
1313                         SSLok = 'S';            /* Support for SSL */
1314 #else
1315                 SSLok = 'N';                    /* No support for SSL */
1316 #endif
1317                 if (send(port->sock, &SSLok, 1, 0) != 1)
1318                 {
1319                         ereport(COMMERROR,
1320                                         (errcode_for_socket_access(),
1321                                  errmsg("failed to send SSL negotiation response: %m")));
1322                         return STATUS_ERROR;    /* close the connection */
1323                 }
1324
1325 #ifdef USE_SSL
1326                 if (SSLok == 'S' && secure_open_server(port) == -1)
1327                         return STATUS_ERROR;
1328 #endif
1329                 /* regular startup packet, cancel, etc packet should follow... */
1330                 /* but not another SSL negotiation request */
1331                 return ProcessStartupPacket(port, true);
1332         }
1333
1334         /* Could add additional special packet types here */
1335
1336         /*
1337          * Set FrontendProtocol now so that ereport() knows what format to
1338          * send if we fail during startup.
1339          */
1340         FrontendProtocol = proto;
1341
1342         /* Check we can handle the protocol the frontend is using. */
1343
1344         if (PG_PROTOCOL_MAJOR(proto) < PG_PROTOCOL_MAJOR(PG_PROTOCOL_EARLIEST) ||
1345           PG_PROTOCOL_MAJOR(proto) > PG_PROTOCOL_MAJOR(PG_PROTOCOL_LATEST) ||
1346         (PG_PROTOCOL_MAJOR(proto) == PG_PROTOCOL_MAJOR(PG_PROTOCOL_LATEST) &&
1347          PG_PROTOCOL_MINOR(proto) > PG_PROTOCOL_MINOR(PG_PROTOCOL_LATEST)))
1348                 ereport(FATAL,
1349                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1350                                  errmsg("unsupported frontend protocol %u.%u: server supports %u.0 to %u.%u",
1351                                           PG_PROTOCOL_MAJOR(proto), PG_PROTOCOL_MINOR(proto),
1352                                                 PG_PROTOCOL_MAJOR(PG_PROTOCOL_EARLIEST),
1353                                                 PG_PROTOCOL_MAJOR(PG_PROTOCOL_LATEST),
1354                                                 PG_PROTOCOL_MINOR(PG_PROTOCOL_LATEST))));
1355
1356         /*
1357          * Now fetch parameters out of startup packet and save them into the
1358          * Port structure.      All data structures attached to the Port struct
1359          * must be allocated in TopMemoryContext so that they won't disappear
1360          * when we pass them to PostgresMain (see BackendRun).  We need not
1361          * worry about leaking this storage on failure, since we aren't in the
1362          * postmaster process anymore.
1363          */
1364         oldcontext = MemoryContextSwitchTo(TopMemoryContext);
1365
1366         if (PG_PROTOCOL_MAJOR(proto) >= 3)
1367         {
1368                 int32           offset = sizeof(ProtocolVersion);
1369
1370                 /*
1371                  * Scan packet body for name/option pairs.      We can assume any
1372                  * string beginning within the packet body is null-terminated,
1373                  * thanks to zeroing extra byte above.
1374                  */
1375                 port->guc_options = NIL;
1376
1377                 while (offset < len)
1378                 {
1379                         char       *nameptr = ((char *) buf) + offset;
1380                         int32           valoffset;
1381                         char       *valptr;
1382
1383                         if (*nameptr == '\0')
1384                                 break;                  /* found packet terminator */
1385                         valoffset = offset + strlen(nameptr) + 1;
1386                         if (valoffset >= len)
1387                                 break;                  /* missing value, will complain below */
1388                         valptr = ((char *) buf) + valoffset;
1389
1390                         if (strcmp(nameptr, "database") == 0)
1391                                 port->database_name = pstrdup(valptr);
1392                         else if (strcmp(nameptr, "user") == 0)
1393                                 port->user_name = pstrdup(valptr);
1394                         else if (strcmp(nameptr, "options") == 0)
1395                                 port->cmdline_options = pstrdup(valptr);
1396                         else
1397                         {
1398                                 /* Assume it's a generic GUC option */
1399                                 port->guc_options = lappend(port->guc_options,
1400                                                                                         pstrdup(nameptr));
1401                                 port->guc_options = lappend(port->guc_options,
1402                                                                                         pstrdup(valptr));
1403                         }
1404                         offset = valoffset + strlen(valptr) + 1;
1405                 }
1406
1407                 /*
1408                  * If we didn't find a packet terminator exactly at the end of the
1409                  * given packet length, complain.
1410                  */
1411                 if (offset != len - 1)
1412                         ereport(FATAL,
1413                                         (errcode(ERRCODE_PROTOCOL_VIOLATION),
1414                                          errmsg("invalid startup packet layout: expected terminator as last byte")));
1415         }
1416         else
1417         {
1418                 /*
1419                  * Get the parameters from the old-style, fixed-width-fields
1420                  * startup packet as C strings.  The packet destination was
1421                  * cleared first so a short packet has zeros silently added.  We
1422                  * have to be prepared to truncate the pstrdup result for oversize
1423                  * fields, though.
1424                  */
1425                 StartupPacket *packet = (StartupPacket *) buf;
1426
1427                 port->database_name = pstrdup(packet->database);
1428                 if (strlen(port->database_name) > sizeof(packet->database))
1429                         port->database_name[sizeof(packet->database)] = '\0';
1430                 port->user_name = pstrdup(packet->user);
1431                 if (strlen(port->user_name) > sizeof(packet->user))
1432                         port->user_name[sizeof(packet->user)] = '\0';
1433                 port->cmdline_options = pstrdup(packet->options);
1434                 if (strlen(port->cmdline_options) > sizeof(packet->options))
1435                         port->cmdline_options[sizeof(packet->options)] = '\0';
1436                 port->guc_options = NIL;
1437         }
1438
1439         /* Check a user name was given. */
1440         if (port->user_name == NULL || port->user_name[0] == '\0')
1441                 ereport(FATAL,
1442                                 (errcode(ERRCODE_INVALID_AUTHORIZATION_SPECIFICATION),
1443                  errmsg("no PostgreSQL user name specified in startup packet")));
1444
1445         /* The database defaults to the user name. */
1446         if (port->database_name == NULL || port->database_name[0] == '\0')
1447                 port->database_name = pstrdup(port->user_name);
1448
1449         if (Db_user_namespace)
1450         {
1451                 /*
1452                  * If user@, it is a global user, remove '@'. We only want to do
1453                  * this if there is an '@' at the end and no earlier in the user
1454                  * string or they may fake as a local user of another database
1455                  * attaching to this database.
1456                  */
1457                 if (strchr(port->user_name, '@') ==
1458                         port->user_name + strlen(port->user_name) - 1)
1459                         *strchr(port->user_name, '@') = '\0';
1460                 else
1461                 {
1462                         /* Append '@' and dbname */
1463                         char       *db_user;
1464
1465                         db_user = palloc(strlen(port->user_name) +
1466                                                          strlen(port->database_name) + 2);
1467                         sprintf(db_user, "%s@%s", port->user_name, port->database_name);
1468                         port->user_name = db_user;
1469                 }
1470         }
1471
1472         /*
1473          * Truncate given database and user names to length of a Postgres
1474          * name.  This avoids lookup failures when overlength names are given.
1475          */
1476         if (strlen(port->database_name) >= NAMEDATALEN)
1477                 port->database_name[NAMEDATALEN - 1] = '\0';
1478         if (strlen(port->user_name) >= NAMEDATALEN)
1479                 port->user_name[NAMEDATALEN - 1] = '\0';
1480
1481         /*
1482          * Done putting stuff in TopMemoryContext.
1483          */
1484         MemoryContextSwitchTo(oldcontext);
1485
1486         /*
1487          * If we're going to reject the connection due to database state, say
1488          * so now instead of wasting cycles on an authentication exchange.
1489          * (This also allows a pg_ping utility to be written.)
1490          */
1491         switch (port->canAcceptConnections)
1492         {
1493                 case CAC_STARTUP:
1494                         ereport(FATAL,
1495                                         (errcode(ERRCODE_CANNOT_CONNECT_NOW),
1496                                          errmsg("the database system is starting up")));
1497                         break;
1498                 case CAC_SHUTDOWN:
1499                         ereport(FATAL,
1500                                         (errcode(ERRCODE_CANNOT_CONNECT_NOW),
1501                                          errmsg("the database system is shutting down")));
1502                         break;
1503                 case CAC_RECOVERY:
1504                         ereport(FATAL,
1505                                         (errcode(ERRCODE_CANNOT_CONNECT_NOW),
1506                                          errmsg("the database system is in recovery mode")));
1507                         break;
1508                 case CAC_TOOMANY:
1509                         ereport(FATAL,
1510                                         (errcode(ERRCODE_TOO_MANY_CONNECTIONS),
1511                                          errmsg("sorry, too many clients already")));
1512                         break;
1513                 case CAC_OK:
1514                 default:
1515                         break;
1516         }
1517
1518         return STATUS_OK;
1519 }
1520
1521
1522 /*
1523  * The client has sent a cancel request packet, not a normal
1524  * start-a-new-connection packet.  Perform the necessary processing.
1525  * Nothing is sent back to the client.
1526  */
1527 static void
1528 processCancelRequest(Port *port, void *pkt)
1529 {
1530         CancelRequestPacket *canc = (CancelRequestPacket *) pkt;
1531         int                     backendPID;
1532         long            cancelAuthCode;
1533         Backend    *bp;
1534
1535 #ifndef EXEC_BACKEND
1536         Dlelem     *curr;
1537
1538 #else
1539         int                     i;
1540 #endif
1541
1542         backendPID = (int) ntohl(canc->backendPID);
1543         cancelAuthCode = (long) ntohl(canc->cancelAuthCode);
1544
1545         /*
1546          * See if we have a matching backend.  In the EXEC_BACKEND case, we
1547          * can no longer access the postmaster's own backend list, and must
1548          * rely on the duplicate array in shared memory.
1549          */
1550 #ifndef EXEC_BACKEND
1551         for (curr = DLGetHead(BackendList); curr; curr = DLGetSucc(curr))
1552         {
1553                 bp = (Backend *) DLE_VAL(curr);
1554 #else
1555         for (i = 0; i < NUM_BACKENDARRAY_ELEMS; i++)
1556         {
1557                 bp = (Backend *) &ShmemBackendArray[i];
1558 #endif
1559                 if (bp->pid == backendPID)
1560                 {
1561                         if (bp->cancel_key == cancelAuthCode)
1562                         {
1563                                 /* Found a match; signal that backend to cancel current op */
1564                                 ereport(DEBUG2,
1565                                                 (errmsg_internal("processing cancel request: sending SIGINT to process %d",
1566                                                                                  backendPID)));
1567                                 kill(bp->pid, SIGINT);
1568                         }
1569                         else
1570                                 /* Right PID, wrong key: no way, Jose */
1571                                 ereport(DEBUG2,
1572                                                 (errmsg_internal("bad key in cancel request for process %d",
1573                                                                                  backendPID)));
1574                         return;
1575                 }
1576         }
1577
1578         /* No matching backend */
1579         ereport(DEBUG2,
1580                         (errmsg_internal("bad pid in cancel request for process %d",
1581                                                          backendPID)));
1582 }
1583
1584 /*
1585  * canAcceptConnections --- check to see if database state allows connections.
1586  */
1587 static enum CAC_state
1588 canAcceptConnections(void)
1589 {
1590         /* Can't start backends when in startup/shutdown/recovery state. */
1591         if (Shutdown > NoShutdown)
1592                 return CAC_SHUTDOWN;
1593         if (StartupPID)
1594                 return CAC_STARTUP;
1595         if (FatalError)
1596                 return CAC_RECOVERY;
1597
1598         /*
1599          * Don't start too many children.
1600          *
1601          * We allow more connections than we can have backends here because some
1602          * might still be authenticating; they might fail auth, or some
1603          * existing backend might exit before the auth cycle is completed. The
1604          * exact MaxBackends limit is enforced when a new backend tries to
1605          * join the shared-inval backend array.
1606          */
1607         if (CountChildren() >= 2 * MaxBackends)
1608                 return CAC_TOOMANY;
1609
1610         return CAC_OK;
1611 }
1612
1613
1614 /*
1615  * ConnCreate -- create a local connection data structure
1616  */
1617 static Port *
1618 ConnCreate(int serverFd)
1619 {
1620         Port       *port;
1621
1622         if (!(port = (Port *) calloc(1, sizeof(Port))))
1623         {
1624                 ereport(LOG,
1625                                 (errcode(ERRCODE_OUT_OF_MEMORY),
1626                                  errmsg("out of memory")));
1627                 ExitPostmaster(1);
1628         }
1629
1630         if (StreamConnection(serverFd, port) != STATUS_OK)
1631         {
1632                 StreamClose(port->sock);
1633                 ConnFree(port);
1634                 port = NULL;
1635         }
1636         else
1637         {
1638                 /*
1639                  * Precompute password salt values to use for this connection.
1640                  * It's slightly annoying to do this long in advance of knowing
1641                  * whether we'll need 'em or not, but we must do the random()
1642                  * calls before we fork, not after.  Else the postmaster's random
1643                  * sequence won't get advanced, and all backends would end up
1644                  * using the same salt...
1645                  */
1646                 RandomSalt(port->cryptSalt, port->md5Salt);
1647         }
1648
1649         return port;
1650 }
1651
1652
1653 /*
1654  * ConnFree -- free a local connection data structure
1655  */
1656 static void
1657 ConnFree(Port *conn)
1658 {
1659 #ifdef USE_SSL
1660         secure_close(conn);
1661 #endif
1662         free(conn);
1663 }
1664
1665
1666 /*
1667  * ClosePostmasterPorts -- close all the postmaster's open sockets
1668  *
1669  * This is called during child process startup to release file descriptors
1670  * that are not needed by that child process.  The postmaster still has
1671  * them open, of course.
1672  *
1673  * Note: we pass am_syslogger as a boolean because we don't want to set
1674  * the global variable yet when this is called.
1675  */
1676 void
1677 ClosePostmasterPorts(bool am_syslogger)
1678 {
1679         int                     i;
1680
1681         /* Close the listen sockets */
1682         for (i = 0; i < MAXLISTEN; i++)
1683         {
1684                 if (ListenSocket[i] != -1)
1685                 {
1686                         StreamClose(ListenSocket[i]);
1687                         ListenSocket[i] = -1;
1688                 }
1689         }
1690
1691         /* If using syslogger, close the read side of the pipe */
1692         if (!am_syslogger)
1693         {
1694 #ifndef WIN32
1695                 if (syslogPipe[0] >= 0)
1696                         close(syslogPipe[0]);
1697                 syslogPipe[0] = -1;
1698 #else
1699                 if (syslogPipe[0])
1700                         CloseHandle(syslogPipe[0]);
1701                 syslogPipe[0] = 0;
1702 #endif
1703         }
1704 }
1705
1706
1707 /*
1708  * reset_shared -- reset shared memory and semaphores
1709  */
1710 static void
1711 reset_shared(unsigned short port)
1712 {
1713         /*
1714          * Create or re-create shared memory and semaphores.
1715          *
1716          * Note: in each "cycle of life" we will normally assign the same IPC
1717          * keys (if using SysV shmem and/or semas), since the port number is
1718          * used to determine IPC keys.  This helps ensure that we will clean
1719          * up dead IPC objects if the postmaster crashes and is restarted.
1720          */
1721         CreateSharedMemoryAndSemaphores(false, MaxBackends, port);
1722 }
1723
1724
1725 /*
1726  * SIGHUP -- reread config files, and tell children to do same
1727  */
1728 static void
1729 SIGHUP_handler(SIGNAL_ARGS)
1730 {
1731         int                     save_errno = errno;
1732
1733         PG_SETMASK(&BlockSig);
1734
1735         if (Shutdown <= SmartShutdown)
1736         {
1737                 ereport(LOG,
1738                          (errmsg("received SIGHUP, reloading configuration files")));
1739                 ProcessConfigFile(PGC_SIGHUP);
1740                 SignalChildren(SIGHUP);
1741                 if (BgWriterPID != 0)
1742                         kill(BgWriterPID, SIGHUP);
1743                 if (PgArchPID != 0)
1744                         kill(PgArchPID, SIGHUP);
1745                 if (SysLoggerPID != 0)
1746                         kill(SysLoggerPID, SIGHUP);
1747                 /* PgStatPID does not currently need SIGHUP */
1748                 load_hba();
1749                 load_ident();
1750
1751 #ifdef EXEC_BACKEND
1752                 /* Update the starting-point file for future children */
1753                 write_nondefault_variables(PGC_SIGHUP);
1754 #endif
1755         }
1756
1757         PG_SETMASK(&UnBlockSig);
1758
1759         errno = save_errno;
1760 }
1761
1762
1763 /*
1764  * pmdie -- signal handler for processing various postmaster signals.
1765  */
1766 static void
1767 pmdie(SIGNAL_ARGS)
1768 {
1769         int                     save_errno = errno;
1770
1771         PG_SETMASK(&BlockSig);
1772
1773         ereport(DEBUG2,
1774                         (errmsg_internal("postmaster received signal %d",
1775                                                          postgres_signal_arg)));
1776
1777         switch (postgres_signal_arg)
1778         {
1779                 case SIGTERM:
1780
1781                         /*
1782                          * Smart Shutdown:
1783                          *
1784                          * Wait for children to end their work, then shut down.
1785                          */
1786                         if (Shutdown >= SmartShutdown)
1787                                 break;
1788                         Shutdown = SmartShutdown;
1789                         ereport(LOG,
1790                                         (errmsg("received smart shutdown request")));
1791
1792                         if (DLGetHead(BackendList))
1793                                 break;                  /* let reaper() handle this */
1794
1795                         /*
1796                          * No children left. Begin shutdown of data base system.
1797                          */
1798                         if (StartupPID != 0 || FatalError)
1799                                 break;                  /* let reaper() handle this */
1800                         /* Start the bgwriter if not running */
1801                         if (BgWriterPID == 0)
1802                                 BgWriterPID = StartBackgroundWriter();
1803                         /* And tell it to shut down */
1804                         if (BgWriterPID != 0)
1805                                 kill(BgWriterPID, SIGUSR2);
1806                         /* Tell pgarch to shut down too; nothing left for it to do */
1807                         if (PgArchPID != 0)
1808                                 kill(PgArchPID, SIGQUIT);
1809                         /* Tell pgstat to shut down too; nothing left for it to do */
1810                         if (PgStatPID != 0)
1811                                 kill(PgStatPID, SIGQUIT);
1812                         break;
1813
1814                 case SIGINT:
1815
1816                         /*
1817                          * Fast Shutdown:
1818                          *
1819                          * Abort all children with SIGTERM (rollback active transactions
1820                          * and exit) and shut down when they are gone.
1821                          */
1822                         if (Shutdown >= FastShutdown)
1823                                 break;
1824                         Shutdown = FastShutdown;
1825                         ereport(LOG,
1826                                         (errmsg("received fast shutdown request")));
1827
1828                         if (DLGetHead(BackendList))
1829                         {
1830                                 if (!FatalError)
1831                                 {
1832                                         ereport(LOG,
1833                                                         (errmsg("aborting any active transactions")));
1834                                         SignalChildren(SIGTERM);
1835                                         /* reaper() does the rest */
1836                                 }
1837                                 break;
1838                         }
1839
1840                         /*
1841                          * No children left. Begin shutdown of data base system.
1842                          *
1843                          * Note: if we previously got SIGTERM then we may send SIGUSR2 to
1844                          * the bgwriter a second time here.  This should be harmless.
1845                          */
1846                         if (StartupPID != 0 || FatalError)
1847                                 break;                  /* let reaper() handle this */
1848                         /* Start the bgwriter if not running */
1849                         if (BgWriterPID == 0)
1850                                 BgWriterPID = StartBackgroundWriter();
1851                         /* And tell it to shut down */
1852                         if (BgWriterPID != 0)
1853                                 kill(BgWriterPID, SIGUSR2);
1854                         /* Tell pgarch to shut down too; nothing left for it to do */
1855                         if (PgArchPID != 0)
1856                                 kill(PgArchPID, SIGQUIT);
1857                         /* Tell pgstat to shut down too; nothing left for it to do */
1858                         if (PgStatPID != 0)
1859                                 kill(PgStatPID, SIGQUIT);
1860                         break;
1861
1862                 case SIGQUIT:
1863
1864                         /*
1865                          * Immediate Shutdown:
1866                          *
1867                          * abort all children with SIGQUIT and exit without attempt to
1868                          * properly shut down data base system.
1869                          */
1870                         ereport(LOG,
1871                                         (errmsg("received immediate shutdown request")));
1872                         if (StartupPID != 0)
1873                                 kill(StartupPID, SIGQUIT);
1874                         if (BgWriterPID != 0)
1875                                 kill(BgWriterPID, SIGQUIT);
1876                         if (PgArchPID != 0)
1877                                 kill(PgArchPID, SIGQUIT);
1878                         if (PgStatPID != 0)
1879                                 kill(PgStatPID, SIGQUIT);
1880                         if (DLGetHead(BackendList))
1881                                 SignalChildren(SIGQUIT);
1882                         ExitPostmaster(0);
1883                         break;
1884         }
1885
1886         PG_SETMASK(&UnBlockSig);
1887
1888         errno = save_errno;
1889 }
1890
1891 /*
1892  * Reaper -- signal handler to cleanup after a backend (child) dies.
1893  */
1894 static void
1895 reaper(SIGNAL_ARGS)
1896 {
1897         int                     save_errno = errno;
1898
1899 #ifdef HAVE_WAITPID
1900         int                     status;                 /* backend exit status */
1901
1902 #else
1903 #ifndef WIN32
1904         union wait      status;                 /* backend exit status */
1905 #endif
1906 #endif
1907         int                     exitstatus;
1908         int                     pid;                    /* process id of dead backend */
1909
1910         PG_SETMASK(&BlockSig);
1911
1912         ereport(DEBUG4,
1913                         (errmsg_internal("reaping dead processes")));
1914 #ifdef HAVE_WAITPID
1915         while ((pid = waitpid(-1, &status, WNOHANG)) > 0)
1916         {
1917                 exitstatus = status;
1918 #else
1919 #ifndef WIN32
1920         while ((pid = wait3(&status, WNOHANG, NULL)) > 0)
1921         {
1922                 exitstatus = status.w_status;
1923 #else
1924         while ((pid = win32_waitpid(&exitstatus)) > 0)
1925         {
1926                 /*
1927                  * We need to do this here, and not in CleanupBackend, since this
1928                  * is to be called on all children when we are done with them.
1929                  * Could move to LogChildExit, but that seems like asking for
1930                  * future trouble...
1931                  */
1932                 win32_RemoveChild(pid);
1933 #endif   /* WIN32 */
1934 #endif   /* HAVE_WAITPID */
1935
1936                 /*
1937                  * Check if this child was a startup process.
1938                  */
1939                 if (StartupPID != 0 && pid == StartupPID)
1940                 {
1941                         StartupPID = 0;
1942                         if (exitstatus != 0)
1943                         {
1944                                 LogChildExit(LOG, gettext("startup process"),
1945                                                          pid, exitstatus);
1946                                 ereport(LOG,
1947                                                 (errmsg("aborting startup due to startup process failure")));
1948                                 ExitPostmaster(1);
1949                         }
1950
1951                         /*
1952                          * Startup succeeded - we are done with system startup or
1953                          * recovery.
1954                          */
1955                         FatalError = false;
1956
1957                         /*
1958                          * Crank up the background writer.      It doesn't matter if this
1959                          * fails, we'll just try again later.
1960                          */
1961                         Assert(BgWriterPID == 0);
1962                         BgWriterPID = StartBackgroundWriter();
1963
1964                         /*
1965                          * Go to shutdown mode if a shutdown request was pending.
1966                          * Otherwise, try to start the archiver and stats collector
1967                          * too.
1968                          */
1969                         if (Shutdown > NoShutdown && BgWriterPID != 0)
1970                                 kill(BgWriterPID, SIGUSR2);
1971                         else if (Shutdown == NoShutdown)
1972                         {
1973                                 if (XLogArchivingActive() && PgArchPID == 0)
1974                                         PgArchPID = pgarch_start();
1975                                 if (PgStatPID == 0)
1976                                         PgStatPID = pgstat_start();
1977                         }
1978
1979                         continue;
1980                 }
1981
1982                 /*
1983                  * Was it the bgwriter?
1984                  */
1985                 if (BgWriterPID != 0 && pid == BgWriterPID)
1986                 {
1987                         BgWriterPID = 0;
1988                         if (exitstatus == 0 && Shutdown > NoShutdown &&
1989                                 !FatalError && !DLGetHead(BackendList))
1990                         {
1991                                 /*
1992                                  * Normal postmaster exit is here: we've seen normal exit
1993                                  * of the bgwriter after it's been told to shut down. We
1994                                  * expect that it wrote a shutdown checkpoint.  (If for
1995                                  * some reason it didn't, recovery will occur on next
1996                                  * postmaster start.)
1997                                  *
1998                                  * Note: we do not wait around for exit of the archiver or
1999                                  * stats processes.  They've been sent SIGQUIT by this
2000                                  * point, and in any case contain logic to commit
2001                                  * hara-kiri if they notice the postmaster is gone.
2002                                  */
2003                                 ExitPostmaster(0);
2004                         }
2005
2006                         /*
2007                          * Any unexpected exit of the bgwriter is treated as a crash.
2008                          */
2009                         HandleChildCrash(pid, exitstatus,
2010                                                          gettext("background writer process"));
2011                         continue;
2012                 }
2013
2014                 /*
2015                  * Was it the archiver?  If so, just try to start a new one; no
2016                  * need to force reset of the rest of the system.  (If fail, we'll
2017                  * try again in future cycles of the main loop.)
2018                  */
2019                 if (PgArchPID != 0 && pid == PgArchPID)
2020                 {
2021                         PgArchPID = 0;
2022                         if (exitstatus != 0)
2023                                 LogChildExit(LOG, gettext("archiver process"),
2024                                                          pid, exitstatus);
2025                         if (XLogArchivingActive() &&
2026                                 StartupPID == 0 && !FatalError && Shutdown == NoShutdown)
2027                                 PgArchPID = pgarch_start();
2028                         continue;
2029                 }
2030
2031                 /*
2032                  * Was it the statistics collector?  If so, just try to start a
2033                  * new one; no need to force reset of the rest of the system.  (If
2034                  * fail, we'll try again in future cycles of the main loop.)
2035                  */
2036                 if (PgStatPID != 0 && pid == PgStatPID)
2037                 {
2038                         PgStatPID = 0;
2039                         if (exitstatus != 0)
2040                                 LogChildExit(LOG, gettext("statistics collector process"),
2041                                                          pid, exitstatus);
2042                         if (StartupPID == 0 && !FatalError && Shutdown == NoShutdown)
2043                                 PgStatPID = pgstat_start();
2044                         continue;
2045                 }
2046
2047                 /* Was it the system logger? try to start a new one */
2048                 if (SysLoggerPID != 0 && pid == SysLoggerPID)
2049                 {
2050                         SysLoggerPID = 0;
2051                         /* for safety's sake, launch new logger *first* */
2052                         SysLoggerPID = SysLogger_Start();
2053                         if (exitstatus != 0)
2054                                 LogChildExit(LOG, gettext("system logger process"),
2055                                                          pid, exitstatus);
2056                         continue;
2057                 }
2058
2059                 /*
2060                  * Else do standard backend child cleanup.
2061                  */
2062                 CleanupBackend(pid, exitstatus);
2063         }                                                       /* loop over pending child-death reports */
2064
2065         if (FatalError)
2066         {
2067                 /*
2068                  * Wait for all important children to exit, then reset shmem and
2069                  * StartupDataBase.  (We can ignore the archiver and stats
2070                  * processes here since they are not connected to shmem.)
2071                  */
2072                 if (DLGetHead(BackendList) || StartupPID != 0 || BgWriterPID != 0)
2073                         goto reaper_done;
2074                 ereport(LOG,
2075                         (errmsg("all server processes terminated; reinitializing")));
2076
2077                 shmem_exit(0);
2078                 reset_shared(PostPortNumber);
2079
2080                 StartupPID = StartupDataBase();
2081
2082                 goto reaper_done;
2083         }
2084
2085         if (Shutdown > NoShutdown)
2086         {
2087                 if (DLGetHead(BackendList) || StartupPID != 0)
2088                         goto reaper_done;
2089                 /* Start the bgwriter if not running */
2090                 if (BgWriterPID == 0)
2091                         BgWriterPID = StartBackgroundWriter();
2092                 /* And tell it to shut down */
2093                 if (BgWriterPID != 0)
2094                         kill(BgWriterPID, SIGUSR2);
2095                 /* Tell pgarch to shut down too; nothing left for it to do */
2096                 if (PgArchPID != 0)
2097                         kill(PgArchPID, SIGQUIT);
2098                 /* Tell pgstat to shut down too; nothing left for it to do */
2099                 if (PgStatPID != 0)
2100                         kill(PgStatPID, SIGQUIT);
2101         }
2102
2103 reaper_done:
2104         PG_SETMASK(&UnBlockSig);
2105
2106         errno = save_errno;
2107 }
2108
2109
2110 /*
2111  * CleanupBackend -- cleanup after terminated backend.
2112  *
2113  * Remove all local state associated with backend.
2114  */
2115 static void
2116 CleanupBackend(int pid,
2117                            int exitstatus)      /* child's exit status. */
2118 {
2119         Dlelem     *curr;
2120
2121         LogChildExit(DEBUG2, gettext("server process"), pid, exitstatus);
2122
2123         /*
2124          * If a backend dies in an ugly way (i.e. exit status not 0) then we
2125          * must signal all other backends to quickdie.  If exit status is zero
2126          * we assume everything is hunky dory and simply remove the backend
2127          * from the active backend list.
2128          */
2129         if (exitstatus != 0)
2130         {
2131                 HandleChildCrash(pid, exitstatus, gettext("server process"));
2132                 return;
2133         }
2134
2135         for (curr = DLGetHead(BackendList); curr; curr = DLGetSucc(curr))
2136         {
2137                 Backend    *bp = (Backend *) DLE_VAL(curr);
2138
2139                 if (bp->pid == pid)
2140                 {
2141                         DLRemove(curr);
2142                         free(bp);
2143                         DLFreeElem(curr);
2144 #ifdef EXEC_BACKEND
2145                         ShmemBackendArrayRemove(pid);
2146 #endif
2147                         /* Tell the collector about backend termination */
2148                         pgstat_beterm(pid);
2149                         break;
2150                 }
2151         }
2152 }
2153
2154 /*
2155  * HandleChildCrash -- cleanup after failed backend or bgwriter.
2156  *
2157  * The objectives here are to clean up our local state about the child
2158  * process, and to signal all other remaining children to quickdie.
2159  */
2160 static void
2161 HandleChildCrash(int pid, int exitstatus, const char *procname)
2162 {
2163         Dlelem     *curr,
2164                            *next;
2165         Backend    *bp;
2166
2167         /*
2168          * Make log entry unless there was a previous crash (if so, nonzero
2169          * exit status is to be expected in SIGQUIT response; don't clutter
2170          * log)
2171          */
2172         if (!FatalError)
2173         {
2174                 LogChildExit(LOG, procname, pid, exitstatus);
2175                 ereport(LOG,
2176                           (errmsg("terminating any other active server processes")));
2177         }
2178
2179         /* Process regular backends */
2180         for (curr = DLGetHead(BackendList); curr; curr = next)
2181         {
2182                 next = DLGetSucc(curr);
2183                 bp = (Backend *) DLE_VAL(curr);
2184                 if (bp->pid == pid)
2185                 {
2186                         /*
2187                          * Found entry for freshly-dead backend, so remove it.
2188                          */
2189                         DLRemove(curr);
2190                         free(bp);
2191                         DLFreeElem(curr);
2192 #ifdef EXEC_BACKEND
2193                         ShmemBackendArrayRemove(pid);
2194 #endif
2195                         /* Tell the collector about backend termination */
2196                         pgstat_beterm(pid);
2197                         /* Keep looping so we can signal remaining backends */
2198                 }
2199                 else
2200                 {
2201                         /*
2202                          * This backend is still alive.  Unless we did so already,
2203                          * tell it to commit hara-kiri.
2204                          *
2205                          * SIGQUIT is the special signal that says exit without proc_exit
2206                          * and let the user know what's going on. But if SendStop is
2207                          * set (-s on command line), then we send SIGSTOP instead, so
2208                          * that we can get core dumps from all backends by hand.
2209                          */
2210                         if (!FatalError)
2211                         {
2212                                 ereport(DEBUG2,
2213                                                 (errmsg_internal("sending %s to process %d",
2214                                                                           (SendStop ? "SIGSTOP" : "SIGQUIT"),
2215                                                                                  (int) bp->pid)));
2216                                 kill(bp->pid, (SendStop ? SIGSTOP : SIGQUIT));
2217                         }
2218                 }
2219         }
2220
2221         /* Take care of the bgwriter too */
2222         if (pid == BgWriterPID)
2223                 BgWriterPID = 0;
2224         else if (BgWriterPID != 0 && !FatalError)
2225         {
2226                 ereport(DEBUG2,
2227                                 (errmsg_internal("sending %s to process %d",
2228                                                                  (SendStop ? "SIGSTOP" : "SIGQUIT"),
2229                                                                  (int) BgWriterPID)));
2230                 kill(BgWriterPID, (SendStop ? SIGSTOP : SIGQUIT));
2231         }
2232
2233         /* Force a power-cycle of the pgarch process too */
2234         /* (Shouldn't be necessary, but just for luck) */
2235         if (PgArchPID != 0 && !FatalError)
2236         {
2237                 ereport(DEBUG2,
2238                                 (errmsg_internal("sending %s to process %d",
2239                                                                  "SIGQUIT",
2240                                                                  (int) PgArchPID)));
2241                 kill(PgArchPID, SIGQUIT);
2242         }
2243
2244         /* Force a power-cycle of the pgstat processes too */
2245         /* (Shouldn't be necessary, but just for luck) */
2246         if (PgStatPID != 0 && !FatalError)
2247         {
2248                 ereport(DEBUG2,
2249                                 (errmsg_internal("sending %s to process %d",
2250                                                                  "SIGQUIT",
2251                                                                  (int) PgStatPID)));
2252                 kill(PgStatPID, SIGQUIT);
2253         }
2254
2255         /* We do NOT restart the syslogger */
2256
2257         FatalError = true;
2258 }
2259
2260 /*
2261  * Log the death of a child process.
2262  */
2263 static void
2264 LogChildExit(int lev, const char *procname, int pid, int exitstatus)
2265 {
2266         if (WIFEXITED(exitstatus))
2267                 ereport(lev,
2268
2269                 /*
2270                  * translator: %s is a noun phrase describing a child process,
2271                  * such as "server process"
2272                  */
2273                                 (errmsg("%s (PID %d) exited with exit code %d",
2274                                                 procname, pid, WEXITSTATUS(exitstatus))));
2275         else if (WIFSIGNALED(exitstatus))
2276                 ereport(lev,
2277
2278                 /*
2279                  * translator: %s is a noun phrase describing a child process,
2280                  * such as "server process"
2281                  */
2282                                 (errmsg("%s (PID %d) was terminated by signal %d",
2283                                                 procname, pid, WTERMSIG(exitstatus))));
2284         else
2285                 ereport(lev,
2286
2287                 /*
2288                  * translator: %s is a noun phrase describing a child process,
2289                  * such as "server process"
2290                  */
2291                                 (errmsg("%s (PID %d) exited with unexpected status %d",
2292                                                 procname, pid, exitstatus)));
2293 }
2294
2295 /*
2296  * Send a signal to all backend children (but NOT special children)
2297  */
2298 static void
2299 SignalChildren(int signal)
2300 {
2301         Dlelem     *curr;
2302
2303         for (curr = DLGetHead(BackendList); curr; curr = DLGetSucc(curr))
2304         {
2305                 Backend    *bp = (Backend *) DLE_VAL(curr);
2306
2307                 ereport(DEBUG4,
2308                                 (errmsg_internal("sending signal %d to process %d",
2309                                                                  signal, (int) bp->pid)));
2310                 kill(bp->pid, signal);
2311         }
2312 }
2313
2314 /*
2315  * BackendStartup -- start backend process
2316  *
2317  * returns: STATUS_ERROR if the fork failed, STATUS_OK otherwise.
2318  */
2319 static int
2320 BackendStartup(Port *port)
2321 {
2322         Backend    *bn;                         /* for backend cleanup */
2323         pid_t           pid;
2324
2325 #ifdef LINUX_PROFILE
2326         struct itimerval prof_itimer;
2327 #endif
2328
2329         /*
2330          * Compute the cancel key that will be assigned to this backend. The
2331          * backend will have its own copy in the forked-off process' value of
2332          * MyCancelKey, so that it can transmit the key to the frontend.
2333          */
2334         MyCancelKey = PostmasterRandom();
2335
2336         /*
2337          * Make room for backend data structure.  Better before the fork() so
2338          * we can handle failure cleanly.
2339          */
2340         bn = (Backend *) malloc(sizeof(Backend));
2341         if (!bn)
2342         {
2343                 ereport(LOG,
2344                                 (errcode(ERRCODE_OUT_OF_MEMORY),
2345                                  errmsg("out of memory")));
2346                 return STATUS_ERROR;
2347         }
2348
2349         /* Pass down canAcceptConnections state (kluge for EXEC_BACKEND case) */
2350         port->canAcceptConnections = canAcceptConnections();
2351
2352         /*
2353          * Flush stdio channels just before fork, to avoid double-output
2354          * problems. Ideally we'd use fflush(NULL) here, but there are still a
2355          * few non-ANSI stdio libraries out there (like SunOS 4.1.x) that
2356          * coredump if we do. Presently stdout and stderr are the only stdio
2357          * output channels used by the postmaster, so fflush'ing them should
2358          * be sufficient.
2359          */
2360         fflush(stdout);
2361         fflush(stderr);
2362
2363 #ifdef EXEC_BACKEND
2364
2365         pid = backend_forkexec(port);
2366
2367 #else                                                   /* !EXEC_BACKEND */
2368
2369 #ifdef LINUX_PROFILE
2370
2371         /*
2372          * Linux's fork() resets the profiling timer in the child process. If
2373          * we want to profile child processes then we need to save and restore
2374          * the timer setting.  This is a waste of time if not profiling,
2375          * however, so only do it if commanded by specific -DLINUX_PROFILE
2376          * switch.
2377          */
2378         getitimer(ITIMER_PROF, &prof_itimer);
2379 #endif
2380
2381 #ifdef __BEOS__
2382         /* Specific beos actions before backend startup */
2383         beos_before_backend_startup();
2384 #endif
2385
2386         pid = fork();
2387
2388         if (pid == 0)                           /* child */
2389         {
2390 #ifdef LINUX_PROFILE
2391                 setitimer(ITIMER_PROF, &prof_itimer, NULL);
2392 #endif
2393
2394 #ifdef __BEOS__
2395                 /* Specific beos backend startup actions */
2396                 beos_backend_startup();
2397 #endif
2398                 free(bn);
2399
2400                 proc_exit(BackendRun(port));
2401         }
2402 #endif   /* EXEC_BACKEND */
2403
2404         if (pid < 0)
2405         {
2406                 /* in parent, fork failed */
2407                 int                     save_errno = errno;
2408
2409 #ifdef __BEOS__
2410                 /* Specific beos backend startup actions */
2411                 beos_backend_startup_failed();
2412 #endif
2413                 free(bn);
2414                 errno = save_errno;
2415                 ereport(LOG,
2416                           (errmsg("could not fork new process for connection: %m")));
2417                 report_fork_failure_to_client(port, save_errno);
2418                 return STATUS_ERROR;
2419         }
2420
2421         /* in parent, successful fork */
2422         ereport(DEBUG2,
2423                         (errmsg_internal("forked new backend, pid=%d socket=%d",
2424                                                          (int) pid, port->sock)));
2425
2426         /*
2427          * Everything's been successful, it's safe to add this backend to our
2428          * list of backends.
2429          */
2430         bn->pid = pid;
2431         bn->cancel_key = MyCancelKey;
2432         DLAddHead(BackendList, DLNewElem(bn));
2433 #ifdef EXEC_BACKEND
2434         ShmemBackendArrayAdd(bn);
2435 #endif
2436
2437         return STATUS_OK;
2438 }
2439
2440 /*
2441  * Try to report backend fork() failure to client before we close the
2442  * connection.  Since we do not care to risk blocking the postmaster on
2443  * this connection, we set the connection to non-blocking and try only once.
2444  *
2445  * This is grungy special-purpose code; we cannot use backend libpq since
2446  * it's not up and running.
2447  */
2448 static void
2449 report_fork_failure_to_client(Port *port, int errnum)
2450 {
2451         char            buffer[1000];
2452
2453         /* Format the error message packet (always V2 protocol) */
2454         snprintf(buffer, sizeof(buffer), "E%s%s\n",
2455                          gettext("could not fork new process for connection: "),
2456                          strerror(errnum));
2457
2458         /* Set port to non-blocking.  Don't do send() if this fails */
2459         if (!set_noblock(port->sock))
2460                 return;
2461
2462         send(port->sock, buffer, strlen(buffer) + 1, 0);
2463 }
2464
2465
2466 /*
2467  * split_opts -- split a string of options and append it to an argv array
2468  *
2469  * NB: the string is destructively modified!
2470  *
2471  * Since no current POSTGRES arguments require any quoting characters,
2472  * we can use the simple-minded tactic of assuming each set of space-
2473  * delimited characters is a separate argv element.
2474  *
2475  * If you don't like that, well, we *used* to pass the whole option string
2476  * as ONE argument to execl(), which was even less intelligent...
2477  */
2478 static void
2479 split_opts(char **argv, int *argcp, char *s)
2480 {
2481         while (s && *s)
2482         {
2483                 while (isspace((unsigned char) *s))
2484                         ++s;
2485                 if (*s == '\0')
2486                         break;
2487                 argv[(*argcp)++] = s;
2488                 while (*s && !isspace((unsigned char) *s))
2489                         ++s;
2490                 if (*s)
2491                         *s++ = '\0';
2492         }
2493 }
2494
2495
2496 /*
2497  * BackendRun -- perform authentication, and if successful,
2498  *                              set up the backend's argument list and invoke PostgresMain()
2499  *
2500  * returns:
2501  *              Shouldn't return at all.
2502  *              If PostgresMain() fails, return status.
2503  */
2504 static int
2505 BackendRun(Port *port)
2506 {
2507         int                     status;
2508         char            remote_host[NI_MAXHOST];
2509         char            remote_port[NI_MAXSERV];
2510         char            remote_ps_data[NI_MAXHOST];
2511         char      **av;
2512         int                     maxac;
2513         int                     ac;
2514         char            debugbuf[32];
2515         char            protobuf[32];
2516         int                     i;
2517
2518         IsUnderPostmaster = true;       /* we are a postmaster subprocess now */
2519
2520         /*
2521          * Let's clean up ourselves as the postmaster child, and close the
2522          * postmaster's listen sockets
2523          */
2524         ClosePostmasterPorts(false);
2525
2526         /* We don't want the postmaster's proc_exit() handlers */
2527         on_exit_reset();
2528
2529         /*
2530          * Signal handlers setting is moved to tcop/postgres...
2531          */
2532
2533         /* Save port etc. for ps status */
2534         MyProcPort = port;
2535
2536         /* Reset MyProcPid to new backend's pid */
2537         MyProcPid = getpid();
2538
2539         /*
2540          * PreAuthDelay is a debugging aid for investigating problems in the
2541          * authentication cycle: it can be set in postgresql.conf to allow
2542          * time to attach to the newly-forked backend with a debugger. (See
2543          * also the -W backend switch, which we allow clients to pass through
2544          * PGOPTIONS, but it is not honored until after authentication.)
2545          */
2546         if (PreAuthDelay > 0)
2547                 pg_usleep(PreAuthDelay * 1000000L);
2548
2549         ClientAuthInProgress = true;    /* limit visibility of log messages */
2550
2551         /* save start time for end of session reporting */
2552         gettimeofday(&(port->session_start), NULL);
2553
2554         /* set these to empty in case they are needed before we set them up */
2555         port->remote_host = "";
2556         port->remote_port = "";
2557         port->commandTag = "";
2558
2559         /*
2560          * Initialize libpq and enable reporting of ereport errors to the
2561          * client. Must do this now because authentication uses libpq to send
2562          * messages.
2563          */
2564         pq_init();                                      /* initialize libpq to talk to client */
2565         whereToSendOutput = Remote; /* now safe to ereport to client */
2566
2567         /*
2568          * We arrange for a simple exit(0) if we receive SIGTERM or SIGQUIT
2569          * during any client authentication related communication. Otherwise
2570          * the postmaster cannot shutdown the database FAST or IMMED cleanly
2571          * if a buggy client blocks a backend during authentication.
2572          */
2573         pqsignal(SIGTERM, authdie);
2574         pqsignal(SIGQUIT, authdie);
2575         pqsignal(SIGALRM, authdie);
2576         PG_SETMASK(&AuthBlockSig);
2577
2578         /*
2579          * Get the remote host name and port for logging and status display.
2580          */
2581         remote_host[0] = '\0';
2582         remote_port[0] = '\0';
2583         if (getnameinfo_all(&port->raddr.addr, port->raddr.salen,
2584                                                 remote_host, sizeof(remote_host),
2585                                                 remote_port, sizeof(remote_port),
2586                                    (log_hostname ? 0 : NI_NUMERICHOST) | NI_NUMERICSERV))
2587         {
2588                 int                     ret = getnameinfo_all(&port->raddr.addr, port->raddr.salen,
2589                                                                                 remote_host, sizeof(remote_host),
2590                                                                                 remote_port, sizeof(remote_port),
2591                                                                                 NI_NUMERICHOST | NI_NUMERICSERV);
2592
2593                 if (ret)
2594                         ereport(WARNING,
2595                                         (errmsg("getnameinfo_all() failed: %s",
2596                                                         gai_strerror(ret))));
2597         }
2598         snprintf(remote_ps_data, sizeof(remote_ps_data),
2599                          remote_port[0] == '\0' ? "%s" : "%s(%s)",
2600                          remote_host, remote_port);
2601
2602         if (Log_connections)
2603                 ereport(LOG,
2604                                 (errmsg("connection received: host=%s port=%s",
2605                                                 remote_host, remote_port)));
2606
2607         /*
2608          * save remote_host and remote_port in port stucture
2609          */
2610         port->remote_host = strdup(remote_host);
2611         port->remote_port = strdup(remote_port);
2612
2613         /*
2614          * In EXEC_BACKEND case, we didn't inherit the contents of pg_hba.c
2615          * etcetera from the postmaster, and have to load them ourselves.
2616          * Build the PostmasterContext (which didn't exist before, in this
2617          * process) to contain the data.
2618          *
2619          * FIXME: [fork/exec] Ugh.      Is there a way around this overhead?
2620          */
2621 #ifdef EXEC_BACKEND
2622         Assert(PostmasterContext == NULL);
2623         PostmasterContext = AllocSetContextCreate(TopMemoryContext,
2624                                                                                           "Postmaster",
2625                                                                                           ALLOCSET_DEFAULT_MINSIZE,
2626                                                                                           ALLOCSET_DEFAULT_INITSIZE,
2627                                                                                           ALLOCSET_DEFAULT_MAXSIZE);
2628         MemoryContextSwitchTo(PostmasterContext);
2629
2630         load_hba();
2631         load_ident();
2632         load_user();
2633         load_group();
2634 #endif
2635
2636         /*
2637          * Ready to begin client interaction.  We will give up and exit(0)
2638          * after a time delay, so that a broken client can't hog a connection
2639          * indefinitely.  PreAuthDelay doesn't count against the time limit.
2640          */
2641         if (!enable_sig_alarm(AuthenticationTimeout * 1000, false))
2642                 elog(FATAL, "could not set timer for authorization timeout");
2643
2644         /*
2645          * Receive the startup packet (which might turn out to be a cancel
2646          * request packet).
2647          */
2648         status = ProcessStartupPacket(port, false);
2649
2650         if (status != STATUS_OK)
2651                 proc_exit(0);
2652
2653         /*
2654          * Now that we have the user and database name, we can set the process
2655          * title for ps.  It's good to do this as early as possible in
2656          * startup.
2657          */
2658         init_ps_display(port->user_name, port->database_name, remote_ps_data);
2659         set_ps_display("authentication");
2660
2661         /*
2662          * Now perform authentication exchange.
2663          */
2664         ClientAuthentication(port); /* might not return, if failure */
2665
2666         /*
2667          * Done with authentication.  Disable timeout, and prevent
2668          * SIGTERM/SIGQUIT again until backend startup is complete.
2669          */
2670         if (!disable_sig_alarm(false))
2671                 elog(FATAL, "could not disable timer for authorization timeout");
2672         PG_SETMASK(&BlockSig);
2673
2674         if (Log_connections)
2675                 ereport(LOG,
2676                                 (errmsg("connection authorized: user=%s database=%s",
2677                                                 port->user_name, port->database_name)));
2678
2679         /*
2680          * Don't want backend to be able to see the postmaster random number
2681          * generator state.  We have to clobber the static random_seed *and*
2682          * start a new random sequence in the random() library function.
2683          */
2684         random_seed = 0;
2685         srandom((unsigned int) (MyProcPid ^ port->session_start.tv_usec));
2686
2687         /* ----------------
2688          * Now, build the argv vector that will be given to PostgresMain.
2689          *
2690          * The layout of the command line is
2691          *              postgres [secure switches] -p databasename [insecure switches]
2692          * where the switches after -p come from the client request.
2693          *
2694          * The maximum possible number of commandline arguments that could come
2695          * from ExtraOptions or port->cmdline_options is (strlen + 1) / 2; see
2696          * split_opts().
2697          * ----------------
2698          */
2699         maxac = 10;                                     /* for fixed args supplied below */
2700         maxac += (strlen(ExtraOptions) + 1) / 2;
2701         if (port->cmdline_options)
2702                 maxac += (strlen(port->cmdline_options) + 1) / 2;
2703
2704         av = (char **) MemoryContextAlloc(TopMemoryContext,
2705                                                                           maxac * sizeof(char *));
2706         ac = 0;
2707
2708         av[ac++] = "postgres";
2709
2710         /*
2711          * Pass the requested debugging level along to the backend.
2712          */
2713         if (debug_flag > 0)
2714         {
2715                 snprintf(debugbuf, sizeof(debugbuf), "-d%d", debug_flag);
2716                 av[ac++] = debugbuf;
2717         }
2718
2719         /*
2720          * Pass any backend switches specified with -o in the postmaster's own
2721          * command line.  We assume these are secure.  (It's OK to mangle
2722          * ExtraOptions now, since we're safely inside a subprocess.)
2723          */
2724         split_opts(av, &ac, ExtraOptions);
2725
2726         /* Tell the backend what protocol the frontend is using. */
2727         snprintf(protobuf, sizeof(protobuf), "-v%u", port->proto);
2728         av[ac++] = protobuf;
2729
2730         /*
2731          * Tell the backend it is being called from the postmaster, and which
2732          * database to use.  -p marks the end of secure switches.
2733          */
2734         av[ac++] = "-p";
2735         av[ac++] = port->database_name;
2736
2737         /*
2738          * Pass the (insecure) option switches from the connection request.
2739          * (It's OK to mangle port->cmdline_options now.)
2740          */
2741         if (port->cmdline_options)
2742                 split_opts(av, &ac, port->cmdline_options);
2743
2744         av[ac] = NULL;
2745
2746         Assert(ac < maxac);
2747
2748         /*
2749          * Release postmaster's working memory context so that backend can
2750          * recycle the space.  Note this does not trash *MyProcPort, because
2751          * ConnCreate() allocated that space with malloc() ... else we'd need
2752          * to copy the Port data here.  Also, subsidiary data such as the
2753          * username isn't lost either; see ProcessStartupPacket().
2754          */
2755         MemoryContextSwitchTo(TopMemoryContext);
2756         MemoryContextDelete(PostmasterContext);
2757         PostmasterContext = NULL;
2758
2759         /*
2760          * Debug: print arguments being passed to backend
2761          */
2762         ereport(DEBUG3,
2763                         (errmsg_internal("%s child[%d]: starting with (",
2764                                                          progname, (int)getpid())));
2765         for (i = 0; i < ac; ++i)
2766                 ereport(DEBUG3,
2767                                 (errmsg_internal("\t%s", av[i])));
2768         ereport(DEBUG3,
2769                         (errmsg_internal(")")));
2770
2771         ClientAuthInProgress = false;           /* client_min_messages is active
2772                                                                                  * now */
2773
2774         return (PostgresMain(ac, av, port->user_name));
2775 }
2776
2777
2778 #ifdef EXEC_BACKEND
2779
2780 /*
2781  * postmaster_forkexec -- fork and exec a postmaster subprocess
2782  *
2783  * The caller must have set up the argv array already, except for argv[2]
2784  * which will be filled with the name of the temp variable file.
2785  *
2786  * Returns the child process PID, or -1 on fork failure (a suitable error
2787  * message has been logged on failure).
2788  *
2789  * All uses of this routine will dispatch to SubPostmasterMain in the
2790  * child process.
2791  */
2792 pid_t
2793 postmaster_forkexec(int argc, char *argv[])
2794 {
2795         Port            port;
2796
2797         /* This entry point passes dummy values for the Port variables */
2798         memset(&port, 0, sizeof(port));
2799         return internal_forkexec(argc, argv, &port);
2800 }
2801
2802 /*
2803  * backend_forkexec -- fork/exec off a backend process
2804  *
2805  * returns the pid of the fork/exec'd process, or -1 on failure
2806  */
2807 static pid_t
2808 backend_forkexec(Port *port)
2809 {
2810         char       *av[4];
2811         int                     ac = 0;
2812
2813         av[ac++] = "postgres";
2814         av[ac++] = "-forkbackend";
2815         av[ac++] = NULL;                        /* filled in by internal_forkexec */
2816
2817         av[ac] = NULL;
2818         Assert(ac < lengthof(av));
2819
2820         return internal_forkexec(ac, av, port);
2821 }
2822
2823 static pid_t
2824 internal_forkexec(int argc, char *argv[], Port *port)
2825 {
2826         pid_t           pid;
2827         char            tmpfilename[MAXPGPATH];
2828
2829         if (!write_backend_variables(tmpfilename, port))
2830                 return -1;                              /* log made by write_backend_variables */
2831
2832         /* Make sure caller set up argv properly */
2833         Assert(argc >= 3);
2834         Assert(argv[argc] == NULL);
2835         Assert(strncmp(argv[1], "-fork", 5) == 0);
2836         Assert(argv[2] == NULL);
2837
2838         /* Insert temp file name after -fork argument */
2839         argv[2] = tmpfilename;
2840
2841 #ifdef WIN32
2842         pid = win32_forkexec(postgres_exec_path, argv);
2843 #else
2844         /* Fire off execv in child */
2845         if ((pid = fork()) == 0)
2846         {
2847                 if (execv(postgres_exec_path, argv) < 0)
2848                 {
2849                         ereport(LOG,
2850                                         (errmsg("could not execute server process \"%s\": %m",
2851                                                         postgres_exec_path)));
2852                         /* We're already in the child process here, can't return */
2853                         exit(1);
2854                 }
2855         }
2856 #endif
2857
2858         return pid;                                     /* Parent returns pid, or -1 on fork
2859                                                                  * failure */
2860 }
2861
2862 /*
2863  * SubPostmasterMain -- Get the fork/exec'd process into a state equivalent
2864  *                      to what it would be if we'd simply forked on Unix, and then
2865  *                      dispatch to the appropriate place.
2866  *
2867  * The first two command line arguments are expected to be "-forkFOO"
2868  * (where FOO indicates which postmaster child we are to become), and
2869  * the name of a variables file that we can read to load data that would
2870  * have been inherited by fork() on Unix.  Remaining arguments go to the
2871  * subprocess FooMain() routine.
2872  */
2873 int
2874 SubPostmasterMain(int argc, char *argv[])
2875 {
2876         Port            port;
2877
2878         /* Do this sooner rather than later... */
2879         IsUnderPostmaster = true;       /* we are a postmaster subprocess now */
2880
2881         MyProcPid = getpid();           /* reset MyProcPid */
2882
2883         /* In EXEC_BACKEND case we will not have inherited these settings */
2884         IsPostmasterEnvironment = true;
2885         whereToSendOutput = None;
2886         pqinitmask();
2887         PG_SETMASK(&BlockSig);
2888
2889         /* Setup essential subsystems */
2890         MemoryContextInit();
2891         InitializeGUCOptions();
2892
2893         /* Check we got appropriate args */
2894         if (argc < 3)
2895                 elog(FATAL, "invalid subpostmaster invocation");
2896
2897         /* Read in file-based context */
2898         memset(&port, 0, sizeof(Port));
2899         read_backend_variables(argv[2], &port);
2900         read_nondefault_variables();
2901
2902         /* Run backend or appropriate child */
2903         if (strcmp(argv[1], "-forkbackend") == 0)
2904         {
2905                 /* BackendRun will close sockets */
2906
2907                 /* Attach process to shared segments */
2908                 CreateSharedMemoryAndSemaphores(false, MaxBackends, 0);
2909
2910 #ifdef USE_SSL
2911                 /*
2912                  *      Need to reinitialize the SSL library in the backend,
2913                  *      since the context structures contain function pointers
2914                  *      and cannot be passed through the parameter file.
2915                  */
2916                 if (EnableSSL)
2917                         secure_initialize();
2918 #endif
2919
2920                 Assert(argc == 3);              /* shouldn't be any more args */
2921                 proc_exit(BackendRun(&port));
2922         }
2923         if (strcmp(argv[1], "-forkboot") == 0)
2924         {
2925                 /* Close the postmaster's sockets */
2926                 ClosePostmasterPorts(false);
2927
2928                 /* Attach process to shared segments */
2929                 CreateSharedMemoryAndSemaphores(false, MaxBackends, 0);
2930
2931                 BootstrapMain(argc - 2, argv + 2);
2932                 proc_exit(0);
2933         }
2934         if (strcmp(argv[1], "-forkarch") == 0)
2935         {
2936                 /* Close the postmaster's sockets */
2937                 ClosePostmasterPorts(false);
2938
2939                 /* Do not want to attach to shared memory */
2940
2941                 PgArchiverMain(argc, argv);
2942                 proc_exit(0);
2943         }
2944         if (strcmp(argv[1], "-forkbuf") == 0)
2945         {
2946                 /* Close the postmaster's sockets */
2947                 ClosePostmasterPorts(false);
2948
2949                 /* Do not want to attach to shared memory */
2950
2951                 PgstatBufferMain(argc, argv);
2952                 proc_exit(0);
2953         }
2954         if (strcmp(argv[1], "-forkcol") == 0)
2955         {
2956                 /*
2957                  * Do NOT close postmaster sockets here, because we are forking
2958                  * from pgstat buffer process, which already did it.
2959                  */
2960
2961                 /* Do not want to attach to shared memory */
2962
2963                 PgstatCollectorMain(argc, argv);
2964                 proc_exit(0);
2965         }
2966         if (strcmp(argv[1], "-forklog") == 0)
2967         {
2968                 /* Close the postmaster's sockets */
2969                 ClosePostmasterPorts(true);
2970
2971                 /* Do not want to attach to shared memory */
2972
2973                 SysLoggerMain(argc, argv);
2974                 proc_exit(0);
2975         }
2976
2977         return 1;                                       /* shouldn't get here */
2978 }
2979 #endif   /* EXEC_BACKEND */
2980
2981
2982 /*
2983  * ExitPostmaster -- cleanup
2984  *
2985  * Do NOT call exit() directly --- always go through here!
2986  */
2987 static void
2988 ExitPostmaster(int status)
2989 {
2990         /* should cleanup shared memory and kill all backends */
2991
2992         /*
2993          * Not sure of the semantics here.      When the Postmaster dies, should
2994          * the backends all be killed? probably not.
2995          *
2996          * MUST         -- vadim 05-10-1999
2997          */
2998
2999         proc_exit(status);
3000 }
3001
3002 /*
3003  * sigusr1_handler - handle signal conditions from child processes
3004  */
3005 static void
3006 sigusr1_handler(SIGNAL_ARGS)
3007 {
3008         int                     save_errno = errno;
3009
3010         PG_SETMASK(&BlockSig);
3011
3012         if (CheckPostmasterSignal(PMSIGNAL_PASSWORD_CHANGE))
3013         {
3014                 /*
3015                  * Password or group file has changed.
3016                  */
3017                 load_user();
3018                 load_group();
3019         }
3020
3021         if (CheckPostmasterSignal(PMSIGNAL_WAKEN_CHILDREN))
3022         {
3023                 /*
3024                  * Send SIGUSR1 to all children (triggers
3025                  * CatchupInterruptHandler). See storage/ipc/sinval[adt].c for the
3026                  * use of this.
3027                  */
3028                 if (Shutdown <= SmartShutdown)
3029                         SignalChildren(SIGUSR1);
3030         }
3031
3032         if (PgArchPID != 0 && Shutdown == NoShutdown)
3033         {
3034                 if (CheckPostmasterSignal(PMSIGNAL_WAKEN_ARCHIVER))
3035                 {
3036                         /*
3037                          * Send SIGUSR1 to archiver process, to wake it up and begin
3038                          * archiving next transaction log file.
3039                          */
3040                         kill(PgArchPID, SIGUSR1);
3041                 }
3042         }
3043
3044         PG_SETMASK(&UnBlockSig);
3045
3046         errno = save_errno;
3047 }
3048
3049
3050 /*
3051  * Dummy signal handler
3052  *
3053  * We use this for signals that we don't actually use in the postmaster,
3054  * but we do use in backends.  If we were to SIG_IGN such signals in the
3055  * postmaster, then a newly started backend might drop a signal that arrives
3056  * before it's able to reconfigure its signal processing.  (See notes in
3057  * tcop/postgres.c.)
3058  */
3059 static void
3060 dummy_handler(SIGNAL_ARGS)
3061 {
3062 }
3063
3064
3065 /*
3066  * CharRemap: given an int in range 0..61, produce textual encoding of it
3067  * per crypt(3) conventions.
3068  */
3069 static char
3070 CharRemap(long ch)
3071 {
3072         if (ch < 0)
3073                 ch = -ch;
3074         ch = ch % 62;
3075
3076         if (ch < 26)
3077                 return 'A' + ch;
3078
3079         ch -= 26;
3080         if (ch < 26)
3081                 return 'a' + ch;
3082
3083         ch -= 26;
3084         return '0' + ch;
3085 }
3086
3087 /*
3088  * RandomSalt
3089  */
3090 static void
3091 RandomSalt(char *cryptSalt, char *md5Salt)
3092 {
3093         long            rand = PostmasterRandom();
3094
3095         cryptSalt[0] = CharRemap(rand % 62);
3096         cryptSalt[1] = CharRemap(rand / 62);
3097
3098         /*
3099          * It's okay to reuse the first random value for one of the MD5 salt
3100          * bytes, since only one of the two salts will be sent to the client.
3101          * After that we need to compute more random bits.
3102          *
3103          * We use % 255, sacrificing one possible byte value, so as to ensure
3104          * that all bits of the random() value participate in the result.
3105          * While at it, add one to avoid generating any null bytes.
3106          */
3107         md5Salt[0] = (rand % 255) + 1;
3108         rand = PostmasterRandom();
3109         md5Salt[1] = (rand % 255) + 1;
3110         rand = PostmasterRandom();
3111         md5Salt[2] = (rand % 255) + 1;
3112         rand = PostmasterRandom();
3113         md5Salt[3] = (rand % 255) + 1;
3114 }
3115
3116 /*
3117  * PostmasterRandom
3118  */
3119 static long
3120 PostmasterRandom(void)
3121 {
3122         static bool initialized = false;
3123
3124         if (!initialized)
3125         {
3126                 Assert(random_seed != 0);
3127                 srandom(random_seed);
3128                 initialized = true;
3129         }
3130
3131         return random();
3132 }
3133
3134 /*
3135  * Count up number of child processes (regular backends only)
3136  */
3137 static int
3138 CountChildren(void)
3139 {
3140         Dlelem     *curr;
3141         int                     cnt = 0;
3142
3143         for (curr = DLGetHead(BackendList); curr; curr = DLGetSucc(curr))
3144                 cnt++;
3145         return cnt;
3146 }
3147
3148
3149 /*
3150  * StartChildProcess -- start a non-backend child process for the postmaster
3151  *
3152  * xlog determines what kind of child will be started.  All child types
3153  * initially go to BootstrapMain, which will handle common setup.
3154  *
3155  * Return value of StartChildProcess is subprocess' PID, or 0 if failed
3156  * to start subprocess.
3157  */
3158 static pid_t
3159 StartChildProcess(int xlop)
3160 {
3161         pid_t           pid;
3162         char       *av[10];
3163         int                     ac = 0;
3164         char            xlbuf[32];
3165
3166 #ifdef LINUX_PROFILE
3167         struct itimerval prof_itimer;
3168 #endif
3169
3170         /*
3171          * Set up command-line arguments for subprocess
3172          */
3173         av[ac++] = "postgres";
3174
3175 #ifdef EXEC_BACKEND
3176         av[ac++] = "-forkboot";
3177         av[ac++] = NULL;                        /* filled in by postmaster_forkexec */
3178 #endif
3179
3180         snprintf(xlbuf, sizeof(xlbuf), "-x%d", xlop);
3181         av[ac++] = xlbuf;
3182
3183         av[ac++] = "-p";
3184         av[ac++] = "template1";
3185
3186         av[ac] = NULL;
3187         Assert(ac < lengthof(av));
3188
3189         /*
3190          * Flush stdio channels (see comments in BackendStartup)
3191          */
3192         fflush(stdout);
3193         fflush(stderr);
3194
3195 #ifdef EXEC_BACKEND
3196
3197         pid = postmaster_forkexec(ac, av);
3198
3199 #else                                                   /* !EXEC_BACKEND */
3200
3201 #ifdef LINUX_PROFILE
3202         /* see comments in BackendStartup */
3203         getitimer(ITIMER_PROF, &prof_itimer);
3204 #endif
3205
3206 #ifdef __BEOS__
3207         /* Specific beos actions before backend startup */
3208         beos_before_backend_startup();
3209 #endif
3210
3211         pid = fork();
3212
3213         if (pid == 0)                           /* child */
3214         {
3215 #ifdef LINUX_PROFILE
3216                 setitimer(ITIMER_PROF, &prof_itimer, NULL);
3217 #endif
3218
3219 #ifdef __BEOS__
3220                 /* Specific beos actions after backend startup */
3221                 beos_backend_startup();
3222 #endif
3223
3224                 IsUnderPostmaster = true;               /* we are a postmaster subprocess
3225                                                                                  * now */
3226
3227                 /* Close the postmaster's sockets */
3228                 ClosePostmasterPorts(false);
3229
3230                 /* Lose the postmaster's on-exit routines and port connections */
3231                 on_exit_reset();
3232
3233                 /* Release postmaster's working memory context */
3234                 MemoryContextSwitchTo(TopMemoryContext);
3235                 MemoryContextDelete(PostmasterContext);
3236                 PostmasterContext = NULL;
3237
3238                 BootstrapMain(ac, av);
3239                 ExitPostmaster(0);
3240         }
3241 #endif   /* EXEC_BACKEND */
3242
3243         if (pid < 0)
3244         {
3245                 /* in parent, fork failed */
3246                 int                     save_errno = errno;
3247
3248 #ifdef __BEOS__
3249                 /* Specific beos actions before backend startup */
3250                 beos_backend_startup_failed();
3251 #endif
3252                 errno = save_errno;
3253                 switch (xlop)
3254                 {
3255                         case BS_XLOG_STARTUP:
3256                                 ereport(LOG,
3257                                                 (errmsg("could not fork startup process: %m")));
3258                                 break;
3259                         case BS_XLOG_BGWRITER:
3260                                 ereport(LOG,
3261                                 (errmsg("could not fork background writer process: %m")));
3262                                 break;
3263                         default:
3264                                 ereport(LOG,
3265                                                 (errmsg("could not fork process: %m")));
3266                                 break;
3267                 }
3268
3269                 /*
3270                  * fork failure is fatal during startup, but there's no need to
3271                  * choke immediately if starting other child types fails.
3272                  */
3273                 if (xlop == BS_XLOG_STARTUP)
3274                         ExitPostmaster(1);
3275                 return 0;
3276         }
3277
3278         /*
3279          * in parent, successful fork
3280          */
3281         return pid;
3282 }
3283
3284
3285 /*
3286  * Create the opts file
3287  */
3288 static bool
3289 CreateOptsFile(int argc, char *argv[], char *fullprogname)
3290 {
3291         char            filename[MAXPGPATH];
3292         FILE       *fp;
3293         int                     i;
3294
3295         snprintf(filename, sizeof(filename), "%s/postmaster.opts", DataDir);
3296
3297         if ((fp = fopen(filename, "w")) == NULL)
3298         {
3299                 elog(LOG, "could not create file \"%s\": %m", filename);
3300                 return false;
3301         }
3302
3303         fprintf(fp, "%s", fullprogname);
3304         for (i = 1; i < argc; i++)
3305                 fprintf(fp, " %s%s%s", SYSTEMQUOTE, argv[i], SYSTEMQUOTE);
3306         fputs("\n", fp);
3307
3308         if (fclose(fp))
3309         {
3310                 elog(LOG, "could not write file \"%s\": %m", filename);
3311                 return false;
3312         }
3313
3314         return true;
3315 }
3316
3317
3318 #ifdef EXEC_BACKEND
3319
3320 /*
3321  * The following need to be available to the read/write_backend_variables
3322  * functions
3323  */
3324 #include "storage/spin.h"
3325
3326 extern slock_t *ShmemLock;
3327 extern slock_t *ShmemIndexLock;
3328 extern void *ShmemIndexAlloc;
3329 typedef struct LWLock LWLock;
3330 extern LWLock *LWLockArray;
3331 extern slock_t *ProcStructLock;
3332 extern int      pgStatSock;
3333
3334 #define write_var(var,fp) fwrite((void*)&(var),sizeof(var),1,fp)
3335 #define read_var(var,fp)  fread((void*)&(var),sizeof(var),1,fp)
3336 #define write_array_var(var,fp) fwrite((void*)(var),sizeof(var),1,fp)
3337 #define read_array_var(var,fp)  fread((void*)(var),sizeof(var),1,fp)
3338
3339 static bool
3340 write_backend_variables(char *filename, Port *port)
3341 {
3342         static unsigned long tmpBackendFileNum = 0;
3343         FILE       *fp;
3344         char            str_buf[MAXPGPATH];
3345
3346         /* Calculate name for temp file in caller's buffer */
3347         Assert(DataDir);
3348         snprintf(filename, MAXPGPATH, "%s/%s/%s.backend_var.%d.%lu",
3349                          DataDir, PG_TEMP_FILES_DIR, PG_TEMP_FILE_PREFIX,
3350                          MyProcPid, ++tmpBackendFileNum);
3351
3352         /* Open file */
3353         fp = AllocateFile(filename, PG_BINARY_W);
3354         if (!fp)
3355         {
3356                 /* As per OpenTemporaryFile... */
3357                 char            dirname[MAXPGPATH];
3358
3359                 snprintf(dirname, MAXPGPATH, "%s/%s", DataDir, PG_TEMP_FILES_DIR);
3360                 mkdir(dirname, S_IRWXU);
3361
3362                 fp = AllocateFile(filename, PG_BINARY_W);
3363                 if (!fp)
3364                 {
3365                         ereport(LOG,
3366                                         (errcode_for_file_access(),
3367                                          errmsg("could not create file \"%s\": %m",
3368                                                         filename)));
3369                         return false;
3370                 }
3371         }
3372
3373         /* Write vars */
3374         write_var(port->sock, fp);
3375         write_var(port->proto, fp);
3376         write_var(port->laddr, fp);
3377         write_var(port->raddr, fp);
3378         write_var(port->canAcceptConnections, fp);
3379         write_var(port->cryptSalt, fp);
3380         write_var(port->md5Salt, fp);
3381
3382         /*
3383          * XXX FIXME later: writing these strings as MAXPGPATH bytes always is
3384          * probably a waste of resources
3385          */
3386
3387         StrNCpy(str_buf, DataDir, MAXPGPATH);
3388         write_array_var(str_buf, fp);
3389
3390         write_array_var(ListenSocket, fp);
3391
3392         write_var(MyCancelKey, fp);
3393
3394         write_var(UsedShmemSegID, fp);
3395         write_var(UsedShmemSegAddr, fp);
3396
3397         write_var(ShmemLock, fp);
3398         write_var(ShmemIndexLock, fp);
3399         write_var(ShmemVariableCache, fp);
3400         write_var(ShmemIndexAlloc, fp);
3401         write_var(ShmemBackendArray, fp);
3402
3403         write_var(LWLockArray, fp);
3404         write_var(ProcStructLock, fp);
3405         write_var(pgStatSock, fp);
3406
3407         write_var(debug_flag, fp);
3408         write_var(PostmasterPid, fp);
3409 #ifdef WIN32
3410         write_var(PostmasterHandle, fp);
3411 #endif
3412
3413         write_var(syslogPipe[0], fp);
3414         write_var(syslogPipe[1], fp);
3415
3416         StrNCpy(str_buf, my_exec_path, MAXPGPATH);
3417         write_array_var(str_buf, fp);
3418
3419         write_array_var(ExtraOptions, fp);
3420
3421         StrNCpy(str_buf, setlocale(LC_COLLATE, NULL), MAXPGPATH);
3422         write_array_var(str_buf, fp);
3423         StrNCpy(str_buf, setlocale(LC_CTYPE, NULL), MAXPGPATH);
3424         write_array_var(str_buf, fp);
3425
3426         /* Release file */
3427         if (FreeFile(fp))
3428         {
3429                 ereport(ERROR,
3430                                 (errcode_for_file_access(),
3431                                  errmsg("could not write to file \"%s\": %m", filename)));
3432                 return false;
3433         }
3434
3435         return true;
3436 }
3437
3438 static void
3439 read_backend_variables(char *filename, Port *port)
3440 {
3441         FILE       *fp;
3442         char            str_buf[MAXPGPATH];
3443
3444         /* Open file */
3445         fp = AllocateFile(filename, PG_BINARY_R);
3446         if (!fp)
3447                 ereport(FATAL,
3448                                 (errcode_for_file_access(),
3449                   errmsg("could not read from backend variables file \"%s\": %m",
3450                                  filename)));
3451
3452         /* Read vars */
3453         read_var(port->sock, fp);
3454         read_var(port->proto, fp);
3455         read_var(port->laddr, fp);
3456         read_var(port->raddr, fp);
3457         read_var(port->canAcceptConnections, fp);
3458         read_var(port->cryptSalt, fp);
3459         read_var(port->md5Salt, fp);
3460
3461         read_array_var(str_buf, fp);
3462         SetDataDir(str_buf);
3463
3464         read_array_var(ListenSocket, fp);
3465
3466         read_var(MyCancelKey, fp);
3467
3468         read_var(UsedShmemSegID, fp);
3469         read_var(UsedShmemSegAddr, fp);
3470
3471         read_var(ShmemLock, fp);
3472         read_var(ShmemIndexLock, fp);
3473         read_var(ShmemVariableCache, fp);
3474         read_var(ShmemIndexAlloc, fp);
3475         read_var(ShmemBackendArray, fp);
3476
3477         read_var(LWLockArray, fp);
3478         read_var(ProcStructLock, fp);
3479         read_var(pgStatSock, fp);
3480
3481         read_var(debug_flag, fp);
3482         read_var(PostmasterPid, fp);
3483 #ifdef WIN32
3484         read_var(PostmasterHandle, fp);
3485 #endif
3486
3487         read_var(syslogPipe[0], fp);
3488         read_var(syslogPipe[1], fp);
3489
3490         read_array_var(str_buf, fp);
3491         StrNCpy(my_exec_path, str_buf, MAXPGPATH);
3492
3493         read_array_var(ExtraOptions, fp);
3494
3495         read_array_var(str_buf, fp);
3496         setlocale(LC_COLLATE, str_buf);
3497         read_array_var(str_buf, fp);
3498         setlocale(LC_CTYPE, str_buf);
3499
3500         /* Release file */
3501         FreeFile(fp);
3502         if (unlink(filename) != 0)
3503                 ereport(WARNING,
3504                                 (errcode_for_file_access(),
3505                                  errmsg("could not remove file \"%s\": %m", filename)));
3506 }
3507
3508
3509 size_t
3510 ShmemBackendArraySize(void)
3511 {
3512         return (NUM_BACKENDARRAY_ELEMS * sizeof(Backend));
3513 }
3514
3515 void
3516 ShmemBackendArrayAllocation(void)
3517 {
3518         size_t          size = ShmemBackendArraySize();
3519
3520         ShmemBackendArray = (Backend *) ShmemAlloc(size);
3521         /* Mark all slots as empty */
3522         memset(ShmemBackendArray, 0, size);
3523 }
3524
3525 static void
3526 ShmemBackendArrayAdd(Backend *bn)
3527 {
3528         int                     i;
3529
3530         /* Find an empty slot */
3531         for (i = 0; i < NUM_BACKENDARRAY_ELEMS; i++)
3532         {
3533                 if (ShmemBackendArray[i].pid == 0)
3534                 {
3535                         ShmemBackendArray[i] = *bn;
3536                         return;
3537                 }
3538         }
3539
3540         ereport(FATAL,
3541                         (errmsg_internal("no free slots in shmem backend array")));
3542 }
3543
3544 static void
3545 ShmemBackendArrayRemove(pid_t pid)
3546 {
3547         int                     i;
3548
3549         for (i = 0; i < NUM_BACKENDARRAY_ELEMS; i++)
3550         {
3551                 if (ShmemBackendArray[i].pid == pid)
3552                 {
3553                         /* Mark the slot as empty */
3554                         ShmemBackendArray[i].pid = 0;
3555                         return;
3556                 }
3557         }
3558
3559         ereport(WARNING,
3560                         (errmsg_internal("could not find backend entry with pid %d",
3561                                                          (int) pid)));
3562 }
3563 #endif   /* EXEC_BACKEND */
3564
3565
3566 #ifdef WIN32
3567
3568 static pid_t
3569 win32_forkexec(const char *path, char *argv[])
3570 {
3571         STARTUPINFO si;
3572         PROCESS_INFORMATION pi;
3573         int                     i;
3574         int                     j;
3575         char            cmdLine[MAXPGPATH * 2];
3576         HANDLE          childHandleCopy;
3577         HANDLE          waiterThread;
3578
3579         /* Format the cmd line */
3580         cmdLine[sizeof(cmdLine) - 1] = '\0';
3581         cmdLine[sizeof(cmdLine) - 2] = '\0';
3582         snprintf(cmdLine, sizeof(cmdLine) - 1, "\"%s\"", path);
3583         i = 0;
3584         while (argv[++i] != NULL)
3585         {
3586                 j = strlen(cmdLine);
3587                 snprintf(cmdLine + j, sizeof(cmdLine) - 1 - j, " \"%s\"", argv[i]);
3588         }
3589         if (cmdLine[sizeof(cmdLine) - 2] != '\0')
3590         {
3591                 elog(LOG, "subprocess command line too long");
3592                 return -1;
3593         }
3594
3595         memset(&pi, 0, sizeof(pi));
3596         memset(&si, 0, sizeof(si));
3597         si.cb = sizeof(si);
3598         if (!CreateProcess(NULL, cmdLine, NULL, NULL, TRUE, 0, NULL, NULL, &si, &pi))
3599         {
3600                 elog(LOG, "CreateProcess call failed (%d): %m", (int) GetLastError());
3601                 return -1;
3602         }
3603
3604         if (!IsUnderPostmaster)
3605         {
3606                 /* We are the Postmaster creating a child... */
3607                 win32_AddChild(pi.dwProcessId, pi.hProcess);
3608         }
3609
3610         if (DuplicateHandle(GetCurrentProcess(),
3611                                                 pi.hProcess,
3612                                                 GetCurrentProcess(),
3613                                                 &childHandleCopy,
3614                                                 0,
3615                                                 FALSE,
3616                                                 DUPLICATE_SAME_ACCESS) == 0)
3617                 ereport(FATAL,
3618                                 (errmsg_internal("could not duplicate child handle: %d",
3619                                                                  (int) GetLastError())));
3620
3621         waiterThread = CreateThread(NULL, 64 * 1024, win32_sigchld_waiter,
3622                                                                 (LPVOID) childHandleCopy, 0, NULL);
3623         if (!waiterThread)
3624                 ereport(FATAL,
3625                    (errmsg_internal("could not create sigchld waiter thread: %d",
3626                                                         (int) GetLastError())));
3627         CloseHandle(waiterThread);
3628
3629         if (IsUnderPostmaster)
3630                 CloseHandle(pi.hProcess);
3631         CloseHandle(pi.hThread);
3632
3633         return pi.dwProcessId;
3634 }
3635
3636 /*
3637  * Note: The following three functions must not be interrupted (eg. by
3638  * signals).  As the Postgres Win32 signalling architecture (currently)
3639  * requires polling, or APC checking functions which aren't used here, this
3640  * is not an issue.
3641  *
3642  * We keep two separate arrays, instead of a single array of pid/HANDLE
3643  * structs, to avoid having to re-create a handle array for
3644  * WaitForMultipleObjects on each call to win32_waitpid.
3645  */
3646
3647 static void
3648 win32_AddChild(pid_t pid, HANDLE handle)
3649 {
3650         Assert(win32_childPIDArray && win32_childHNDArray);
3651         if (win32_numChildren < NUM_BACKENDARRAY_ELEMS)
3652         {
3653                 win32_childPIDArray[win32_numChildren] = pid;
3654                 win32_childHNDArray[win32_numChildren] = handle;
3655                 ++win32_numChildren;
3656         }
3657         else
3658                 ereport(FATAL,
3659                                 (errmsg_internal("no room for child entry with pid %lu",
3660                                                                  (unsigned long) pid)));
3661 }
3662
3663 static void
3664 win32_RemoveChild(pid_t pid)
3665 {
3666         int                     i;
3667
3668         Assert(win32_childPIDArray && win32_childHNDArray);
3669
3670         for (i = 0; i < win32_numChildren; i++)
3671         {
3672                 if (win32_childPIDArray[i] == pid)
3673                 {
3674                         CloseHandle(win32_childHNDArray[i]);
3675
3676                         /* Swap last entry into the "removed" one */
3677                         --win32_numChildren;
3678                         win32_childPIDArray[i] = win32_childPIDArray[win32_numChildren];
3679                         win32_childHNDArray[i] = win32_childHNDArray[win32_numChildren];
3680                         return;
3681                 }
3682         }
3683
3684         ereport(WARNING,
3685                         (errmsg_internal("could not find child entry with pid %lu",
3686                                                          (unsigned long) pid)));
3687 }
3688
3689 static pid_t
3690 win32_waitpid(int *exitstatus)
3691 {
3692         /*
3693          * Note: Do NOT use WaitForMultipleObjectsEx, as we don't want to run
3694          * queued APCs here.
3695          */
3696         int                     index;
3697         DWORD           exitCode;
3698         DWORD           ret;
3699         unsigned long offset;
3700
3701         Assert(win32_childPIDArray && win32_childHNDArray);
3702         elog(DEBUG3, "waiting on %lu children", win32_numChildren);
3703
3704         for (offset = 0; offset < win32_numChildren; offset += MAXIMUM_WAIT_OBJECTS)
3705         {
3706                 unsigned long num = Min(MAXIMUM_WAIT_OBJECTS, win32_numChildren - offset);
3707
3708                 ret = WaitForMultipleObjects(num, &win32_childHNDArray[offset], FALSE, 0);
3709                 switch (ret)
3710                 {
3711                         case WAIT_FAILED:
3712                                 ereport(LOG,
3713                                                 (errmsg_internal("failed to wait on %lu of %lu children: %d",
3714                                                  num, win32_numChildren, (int) GetLastError())));
3715                                 return -1;
3716
3717                         case WAIT_TIMEOUT:
3718                                 /* No children (in this chunk) have finished */
3719                                 break;
3720
3721                         default:
3722
3723                                 /*
3724                                  * Get the exit code, and return the PID of, the
3725                                  * respective process
3726                                  */
3727                                 index = offset + ret - WAIT_OBJECT_0;
3728                                 Assert(index >= 0 && index < win32_numChildren);
3729                                 if (!GetExitCodeProcess(win32_childHNDArray[index], &exitCode))
3730                                 {
3731                                         /*
3732                                          * If we get this far, this should never happen, but,
3733                                          * then again... No choice other than to assume a
3734                                          * catastrophic failure.
3735                                          */
3736                                         ereport(FATAL,
3737                                                         (errmsg_internal("failed to get exit code for child %lu",
3738                                                                                    win32_childPIDArray[index])));
3739                                 }
3740                                 *exitstatus = (int) exitCode;
3741                                 return win32_childPIDArray[index];
3742                 }
3743         }
3744
3745         /* No children have finished */
3746         return -1;
3747 }
3748
3749 /*
3750  * Note! Code below executes on separate threads, one for
3751  * each child process created
3752  */
3753 static DWORD WINAPI
3754 win32_sigchld_waiter(LPVOID param)
3755 {
3756         HANDLE          procHandle = (HANDLE) param;
3757
3758         DWORD           r = WaitForSingleObject(procHandle, INFINITE);
3759
3760         if (r == WAIT_OBJECT_0)
3761                 pg_queue_signal(SIGCHLD);
3762         else
3763                 write_stderr("could not wait on child process handle: error code %d\n",
3764                                          (int) GetLastError());
3765         CloseHandle(procHandle);
3766         return 0;
3767 }
3768
3769 #endif   /* WIN32 */