]> granicus.if.org Git - postgresql/blob - src/backend/postmaster/postmaster.c
Drops in the CreateProcess calls for Win32 (essentially wrapping up the
[postgresql] / src / backend / postmaster / postmaster.c
1 /*-------------------------------------------------------------------------
2  *
3  * postmaster.c
4  *        This program acts as a clearing house for requests to the
5  *        POSTGRES system.      Frontend programs send a startup message
6  *        to the Postmaster and the postmaster uses the info in the
7  *        message to setup a backend process.
8  *
9  *        The postmaster also manages system-wide operations such as
10  *        startup, shutdown, and periodic checkpoints.  The postmaster
11  *        itself doesn't do those operations, mind you --- it just forks
12  *        off a subprocess to do them at the right times.  It also takes
13  *        care of resetting the system if a backend crashes.
14  *
15  *        The postmaster process creates the shared memory and semaphore
16  *        pools during startup, but as a rule does not touch them itself.
17  *        In particular, it is not a member of the PGPROC array of backends
18  *        and so it cannot participate in lock-manager operations.      Keeping
19  *        the postmaster away from shared memory operations makes it simpler
20  *        and more reliable.  The postmaster is almost always able to recover
21  *        from crashes of individual backends by resetting shared memory;
22  *        if it did much with shared memory then it would be prone to crashing
23  *        along with the backends.
24  *
25  *        When a request message is received, we now fork() immediately.
26  *        The child process performs authentication of the request, and
27  *        then becomes a backend if successful.  This allows the auth code
28  *        to be written in a simple single-threaded style (as opposed to the
29  *        crufty "poor man's multitasking" code that used to be needed).
30  *        More importantly, it ensures that blockages in non-multithreaded
31  *        libraries like SSL or PAM cannot cause denial of service to other
32  *        clients.
33  *
34  *
35  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
36  * Portions Copyright (c) 1994, Regents of the University of California
37  *
38  *
39  * IDENTIFICATION
40  *        $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.358 2004/01/11 03:49:31 momjian Exp $
41  *
42  * NOTES
43  *
44  * Initialization:
45  *              The Postmaster sets up a few shared memory data structures
46  *              for the backends.  It should at the very least initialize the
47  *              lock manager.
48  *
49  * Synchronization:
50  *              The Postmaster shares memory with the backends but should avoid
51  *              touching shared memory, so as not to become stuck if a crashing
52  *              backend screws up locks or shared memory.  Likewise, the Postmaster
53  *              should never block on messages from frontend clients.
54  *
55  * Garbage Collection:
56  *              The Postmaster cleans up after backends if they have an emergency
57  *              exit and/or core dump.
58  *
59  *-------------------------------------------------------------------------
60  */
61
62 #include "postgres.h"
63
64 #include <unistd.h>
65 #include <signal.h>
66 #include <sys/wait.h>
67 #include <ctype.h>
68 #include <sys/stat.h>
69 #include <sys/time.h>
70 #include <sys/socket.h>
71 #include <errno.h>
72 #include <fcntl.h>
73 #include <time.h>
74 #include <sys/param.h>
75 #include <netinet/in.h>
76 #include <arpa/inet.h>
77 #include <netdb.h>
78 #include <limits.h>
79
80 #ifdef HAVE_SYS_SELECT_H
81 #include <sys/select.h>
82 #endif
83
84 #ifdef HAVE_GETOPT_H
85 #include <getopt.h>
86 #endif
87
88 #ifdef USE_RENDEZVOUS
89 #include <DNSServiceDiscovery/DNSServiceDiscovery.h>
90 #endif
91
92 #include "catalog/pg_database.h"
93 #include "commands/async.h"
94 #include "lib/dllist.h"
95 #include "libpq/auth.h"
96 #include "libpq/crypt.h"
97 #include "libpq/libpq.h"
98 #include "libpq/pqcomm.h"
99 #include "libpq/pqsignal.h"
100 #include "miscadmin.h"
101 #include "nodes/nodes.h"
102 #include "storage/fd.h"
103 #include "storage/ipc.h"
104 #include "storage/pg_shmem.h"
105 #include "storage/pmsignal.h"
106 #include "storage/proc.h"
107 #include "storage/bufmgr.h"
108 #include "access/xlog.h"
109 #include "tcop/tcopprot.h"
110 #include "utils/guc.h"
111 #include "utils/memutils.h"
112 #include "utils/ps_status.h"
113 #include "bootstrap/bootstrap.h"
114 #include "pgstat.h"
115
116
117 #define INVALID_SOCK    (-1)
118
119 #ifdef HAVE_SIGPROCMASK
120 sigset_t        UnBlockSig,
121                         BlockSig,
122                         AuthBlockSig;
123
124 #else
125 int                     UnBlockSig,
126                         BlockSig,
127                         AuthBlockSig;
128 #endif
129
130 /*
131  * List of active backends (or child processes anyway; we don't actually
132  * know whether a given child has become a backend or is still in the
133  * authorization phase).  This is used mainly to keep track of how many
134  * children we have and send them appropriate signals when necessary.
135  */
136 typedef struct bkend
137 {
138         pid_t           pid;                    /* process id of backend */
139         long            cancel_key;             /* cancel key for cancels for this backend */
140 } Backend;
141
142 static Dllist *BackendList;
143
144 /* The socket number we are listening for connections on */
145 int                     PostPortNumber;
146 char       *UnixSocketDir;
147 char       *VirtualHost;
148
149 /*
150  * MaxBackends is the limit on the number of backends we can start.
151  * Note that a larger MaxBackends value will increase the size of the
152  * shared memory area as well as cause the postmaster to grab more
153  * kernel semaphores, even if you never actually use that many
154  * backends.
155  */
156 int                     MaxBackends;
157
158 /*
159  * ReservedBackends is the number of backends reserved for superuser use.
160  * This number is taken out of the pool size given by MaxBackends so
161  * number of backend slots available to non-superusers is
162  * (MaxBackends - ReservedBackends).  Note what this really means is
163  * "if there are <= ReservedBackends connections available, only superusers
164  * can make new connections" --- pre-existing superuser connections don't
165  * count against the limit.
166  */
167 int                     ReservedBackends;
168
169
170 static char *progname = NULL;
171
172 /* The socket(s) we're listening to. */
173 #define MAXLISTEN       10
174 static int      ListenSocket[MAXLISTEN];
175
176 /* Used to reduce macros tests */
177 #ifdef EXEC_BACKEND
178 const bool      ExecBackend = true;
179
180 #else
181 const bool      ExecBackend = false;
182 #endif
183
184 /*
185  * Set by the -o option
186  */
187 static char ExtraOptions[MAXPGPATH];
188
189 /*
190  * These globals control the behavior of the postmaster in case some
191  * backend dumps core.  Normally, it kills all peers of the dead backend
192  * and reinitializes shared memory.  By specifying -s or -n, we can have
193  * the postmaster stop (rather than kill) peers and not reinitialize
194  * shared data structures.
195  */
196 static bool Reinit = true;
197 static int      SendStop = false;
198
199 /* still more option variables */
200 bool            NetServer = false;      /* listen on TCP/IP */
201 bool            EnableSSL = false;
202 bool            SilentMode = false; /* silent mode (-S) */
203
204 int                     PreAuthDelay = 0;
205 int                     AuthenticationTimeout = 60;
206 int                     CheckPointTimeout = 300;
207 int                     CheckPointWarning = 30;
208 time_t          LastSignalledCheckpoint = 0;
209
210 bool            log_hostname;           /* for ps display */
211 bool            LogSourcePort;
212 bool            Log_connections = false;
213 bool            Db_user_namespace = false;
214
215 char       *rendezvous_name;
216
217 /* For FNCTL_NONBLOCK */
218 #if defined(WIN32) || defined(__BEOS__)
219 long            ioctlsocket_ret=1;
220 #endif
221
222 /* list of library:init-function to be preloaded */
223 char       *preload_libraries_string = NULL;
224
225 /* Startup/shutdown state */
226 static pid_t StartupPID = 0,
227                         ShutdownPID = 0,
228                         CheckPointPID = 0,
229                         BgWriterPID = 0;
230 static time_t checkpointed = 0;
231
232 #define                 NoShutdown              0
233 #define                 SmartShutdown   1
234 #define                 FastShutdown    2
235
236 static int      Shutdown = NoShutdown;
237
238 static bool FatalError = false; /* T if recovering from backend crash */
239
240 bool            ClientAuthInProgress = false;           /* T during new-client
241                                                                                                  * authentication */
242
243 /*
244  * State for assigning random salts and cancel keys.
245  * Also, the global MyCancelKey passes the cancel key assigned to a given
246  * backend from the postmaster to that backend (via fork).
247  */
248
249 static unsigned int random_seed = 0;
250
251 static int      debug_flag = 0;
252
253 extern char *optarg;
254 extern int      optind,
255                         opterr;
256
257 #ifdef HAVE_INT_OPTRESET
258 extern int      optreset;
259 #endif
260
261 /*
262  * postmaster.c - function prototypes
263  */
264 static void pmdaemonize(int argc, char *argv[]);
265 static Port *ConnCreate(int serverFd);
266 static void ConnFree(Port *port);
267 static void reset_shared(unsigned short port);
268 static void SIGHUP_handler(SIGNAL_ARGS);
269 static void pmdie(SIGNAL_ARGS);
270 static void reaper(SIGNAL_ARGS);
271 static void sigusr1_handler(SIGNAL_ARGS);
272 static void dummy_handler(SIGNAL_ARGS);
273 static void CleanupProc(int pid, int exitstatus);
274 static void LogChildExit(int lev, const char *procname,
275                          int pid, int exitstatus);
276 static void BackendInit(Port *port);
277 static int  BackendRun(Port *port);
278 static void ExitPostmaster(int status);
279 static void usage(const char *);
280 static int      ServerLoop(void);
281 static int      BackendStartup(Port *port);
282 static int      ProcessStartupPacket(Port *port, bool SSLdone);
283 static void processCancelRequest(Port *port, void *pkt);
284 static int      initMasks(fd_set *rmask);
285 static void report_fork_failure_to_client(Port *port, int errnum);
286 static enum CAC_state canAcceptConnections(void);
287 static long PostmasterRandom(void);
288 static void RandomSalt(char *cryptSalt, char *md5Salt);
289 static void SignalChildren(int signal);
290 static int      CountChildren(void);
291 static bool CreateOptsFile(int argc, char *argv[]);
292 NON_EXEC_STATIC void SSDataBaseInit(int xlop);
293 static pid_t SSDataBase(int xlop);
294 static void
295 postmaster_error(const char *fmt,...)
296 /* This lets gcc check the format string for consistency. */
297 __attribute__((format(printf, 1, 2)));
298
299 #ifdef EXEC_BACKEND
300 #ifdef WIN32
301 pid_t win32_forkexec(const char* path, char *argv[]);
302 #endif
303
304 static pid_t Backend_forkexec(Port *port);
305
306 static unsigned long tmpBackendFileNum = 0;
307 void read_backend_variables(unsigned long id, Port *port);
308 static bool write_backend_variables(Port *port);
309 #endif
310
311 #define StartupDataBase()               SSDataBase(BS_XLOG_STARTUP)
312 #define CheckPointDataBase()    SSDataBase(BS_XLOG_CHECKPOINT)
313 #define StartBackgroundWriter() SSDataBase(BS_XLOG_BGWRITER)
314 #define ShutdownDataBase()              SSDataBase(BS_XLOG_SHUTDOWN)
315
316
317 static void
318 checkDataDir(const char *checkdir)
319 {
320         char            path[MAXPGPATH];
321         FILE       *fp;
322         struct stat stat_buf;
323
324         if (checkdir == NULL)
325         {
326                 fprintf(stderr,
327                                 gettext("%s does not know where to find the database system data.\n"
328                                                 "You must specify the directory that contains the database system\n"
329                                                 "either by specifying the -D invocation option or by setting the\n"
330                                                 "PGDATA environment variable.\n"),
331                                 progname);
332                 ExitPostmaster(2);
333         }
334
335         if (stat(checkdir, &stat_buf) == -1)
336         {
337                 if (errno == ENOENT)
338                         ereport(FATAL,
339                                         (errcode_for_file_access(),
340                                          errmsg("data directory \"%s\" does not exist",
341                                                         checkdir)));
342                 else
343                         ereport(FATAL,
344                                         (errcode_for_file_access(),
345                          errmsg("could not read permissions of directory \"%s\": %m",
346                                         checkdir)));
347         }
348
349         /*
350          * Check if the directory has group or world access.  If so, reject.
351          *
352          * XXX temporarily suppress check when on Windows, because there may not
353          * be proper support for Unix-y file permissions.  Need to think of a
354          * reasonable check to apply on Windows.
355          */
356 #if !defined(__CYGWIN__) && !defined(WIN32)
357         if (stat_buf.st_mode & (S_IRWXG | S_IRWXO))
358                 ereport(FATAL,
359                                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
360                                  errmsg("data directory \"%s\" has group or world access",
361                                                 checkdir),
362                                  errdetail("Permissions should be u=rwx (0700).")));
363 #endif
364
365         /* Look for PG_VERSION before looking for pg_control */
366         ValidatePgVersion(checkdir);
367
368         snprintf(path, sizeof(path), "%s/global/pg_control", checkdir);
369
370         fp = AllocateFile(path, PG_BINARY_R);
371         if (fp == NULL)
372         {
373                 fprintf(stderr,
374                                 gettext("%s: could not find the database system\n"
375                                                 "Expected to find it in the directory \"%s\",\n"
376                                                 "but could not open file \"%s\": %s\n"),
377                                 progname, checkdir, path, strerror(errno));
378                 ExitPostmaster(2);
379         }
380         FreeFile(fp);
381 }
382
383
384 #ifdef USE_RENDEZVOUS
385
386 /* reg_reply -- empty callback function for DNSServiceRegistrationCreate() */
387 static void
388 reg_reply(DNSServiceRegistrationReplyErrorType errorCode, void *context)
389 {
390
391 }
392 #endif
393
394 int
395 PostmasterMain(int argc, char *argv[])
396 {
397         int                     opt;
398         int                     status;
399         char            original_extraoptions[MAXPGPATH];
400         char       *potential_DataDir = NULL;
401         int                     i;
402
403         *original_extraoptions = '\0';
404
405         progname = argv[0];
406
407         IsPostmasterEnvironment = true;
408
409         /*
410          * Catch standard options before doing much else.  This even works on
411          * systems without getopt_long.
412          */
413         if (argc > 1)
414         {
415                 if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
416                 {
417                         usage(progname);
418                         ExitPostmaster(0);
419                 }
420                 if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
421                 {
422                         puts("postmaster (PostgreSQL) " PG_VERSION);
423                         ExitPostmaster(0);
424                 }
425         }
426
427         /*
428          * for security, no dir or file created can be group or other
429          * accessible
430          */
431         umask((mode_t) 0077);
432
433         MyProcPid = getpid();
434
435         /*
436          * Fire up essential subsystems: memory management
437          */
438         MemoryContextInit();
439
440         /*
441          * By default, palloc() requests in the postmaster will be allocated
442          * in the PostmasterContext, which is space that can be recycled by
443          * backends.  Allocated data that needs to be available to backends
444          * should be allocated in TopMemoryContext.
445          */
446         PostmasterContext = AllocSetContextCreate(TopMemoryContext,
447                                                                                           "Postmaster",
448                                                                                           ALLOCSET_DEFAULT_MINSIZE,
449                                                                                           ALLOCSET_DEFAULT_INITSIZE,
450                                                                                           ALLOCSET_DEFAULT_MAXSIZE);
451         MemoryContextSwitchTo(PostmasterContext);
452
453         IgnoreSystemIndexes(false);
454
455         /*
456          * Options setup
457          */
458         InitializeGUCOptions();
459
460         potential_DataDir = getenv("PGDATA");           /* default value */
461
462         opterr = 1;
463
464         while ((opt = getopt(argc, argv, "A:a:B:b:c:D:d:Fh:ik:lm:MN:no:p:Ss-:")) != -1)
465         {
466                 switch (opt)
467                 {
468                         case 'A':
469 #ifdef USE_ASSERT_CHECKING
470                                 SetConfigOption("debug_assertions", optarg, PGC_POSTMASTER, PGC_S_ARGV);
471 #else
472                                 postmaster_error("assert checking is not compiled in");
473 #endif
474                                 break;
475                         case 'a':
476                                 /* Can no longer set authentication method. */
477                                 break;
478                         case 'B':
479                                 SetConfigOption("shared_buffers", optarg, PGC_POSTMASTER, PGC_S_ARGV);
480                                 break;
481                         case 'b':
482                                 /* Can no longer set the backend executable file to use. */
483                                 break;
484                         case 'D':
485                                 potential_DataDir = optarg;
486                                 break;
487                         case 'd':
488                                 {
489                                         /* Turn on debugging for the postmaster. */
490                                         char       *debugstr = palloc(strlen("debug") + strlen(optarg) + 1);
491
492                                         sprintf(debugstr, "debug%s", optarg);
493                                         SetConfigOption("log_min_messages", debugstr,
494                                                                         PGC_POSTMASTER, PGC_S_ARGV);
495                                         pfree(debugstr);
496                                         debug_flag = atoi(optarg);
497                                         break;
498                                 }
499                         case 'F':
500                                 SetConfigOption("fsync", "false", PGC_POSTMASTER, PGC_S_ARGV);
501                                 break;
502                         case 'h':
503                                 SetConfigOption("virtual_host", optarg, PGC_POSTMASTER, PGC_S_ARGV);
504                                 break;
505                         case 'i':
506                                 SetConfigOption("tcpip_socket", "true", PGC_POSTMASTER, PGC_S_ARGV);
507                                 break;
508                         case 'k':
509                                 SetConfigOption("unix_socket_directory", optarg, PGC_POSTMASTER, PGC_S_ARGV);
510                                 break;
511 #ifdef USE_SSL
512                         case 'l':
513                                 SetConfigOption("ssl", "true", PGC_POSTMASTER, PGC_S_ARGV);
514                                 break;
515 #endif
516                         case 'm':
517                                 /* Multiplexed backends no longer supported. */
518                                 break;
519                         case 'M':
520
521                                 /*
522                                  * ignore this flag.  This may be passed in because the
523                                  * program was run as 'postgres -M' instead of
524                                  * 'postmaster'
525                                  */
526                                 break;
527                         case 'N':
528                                 /* The max number of backends to start. */
529                                 SetConfigOption("max_connections", optarg, PGC_POSTMASTER, PGC_S_ARGV);
530                                 break;
531                         case 'n':
532                                 /* Don't reinit shared mem after abnormal exit */
533                                 Reinit = false;
534                                 break;
535                         case 'o':
536
537                                 /*
538                                  * Other options to pass to the backend on the command
539                                  * line -- useful only for debugging.
540                                  */
541                                 strcat(ExtraOptions, " ");
542                                 strcat(ExtraOptions, optarg);
543                                 strcpy(original_extraoptions, optarg);
544                                 break;
545                         case 'p':
546                                 SetConfigOption("port", optarg, PGC_POSTMASTER, PGC_S_ARGV);
547                                 break;
548                         case 'S':
549
550                                 /*
551                                  * Start in 'S'ilent mode (disassociate from controlling
552                                  * tty). You may also think of this as 'S'ysV mode since
553                                  * it's most badly needed on SysV-derived systems like
554                                  * SVR4 and HP-UX.
555                                  */
556                                 SetConfigOption("silent_mode", "true", PGC_POSTMASTER, PGC_S_ARGV);
557                                 break;
558                         case 's':
559
560                                 /*
561                                  * In the event that some backend dumps core, send
562                                  * SIGSTOP, rather than SIGQUIT, to all its peers.      This
563                                  * lets the wily post_hacker collect core dumps from
564                                  * everyone.
565                                  */
566                                 SendStop = true;
567                                 break;
568                         case 'c':
569                         case '-':
570                                 {
571                                         char       *name,
572                                                            *value;
573
574                                         ParseLongOption(optarg, &name, &value);
575                                         if (!value)
576                                         {
577                                                 if (opt == '-')
578                                                         ereport(ERROR,
579                                                                         (errcode(ERRCODE_SYNTAX_ERROR),
580                                                                          errmsg("--%s requires a value",
581                                                                                         optarg)));
582                                                 else
583                                                         ereport(ERROR,
584                                                                         (errcode(ERRCODE_SYNTAX_ERROR),
585                                                                          errmsg("-c %s requires a value",
586                                                                                         optarg)));
587                                         }
588
589                                         SetConfigOption(name, value, PGC_POSTMASTER, PGC_S_ARGV);
590                                         free(name);
591                                         if (value)
592                                                 free(value);
593                                         break;
594                                 }
595
596                         default:
597                                 fprintf(stderr,
598                                           gettext("Try \"%s --help\" for more information.\n"),
599                                                 progname);
600                                 ExitPostmaster(1);
601                 }
602         }
603
604         /*
605          * Postmaster accepts no non-option switch arguments.
606          */
607         if (optind < argc)
608         {
609                 postmaster_error("invalid argument: \"%s\"", argv[optind]);
610                 fprintf(stderr,
611                                 gettext("Try \"%s --help\" for more information.\n"),
612                                 progname);
613                 ExitPostmaster(1);
614         }
615
616         /*
617          * Now we can set the data directory, and then read postgresql.conf.
618          */
619         checkDataDir(potential_DataDir);        /* issues error messages */
620         SetDataDir(potential_DataDir);
621
622         ProcessConfigFile(PGC_POSTMASTER);
623 #ifdef EXEC_BACKEND
624         write_nondefault_variables(PGC_POSTMASTER);
625 #endif
626
627         /*
628          * Check for invalid combinations of GUC settings.
629          */
630         if (NBuffers < 2 * MaxBackends || NBuffers < 16)
631         {
632                 /*
633                  * Do not accept -B so small that backends are likely to starve
634                  * for lack of buffers.  The specific choices here are somewhat
635                  * arbitrary.
636                  */
637                 postmaster_error("the number of buffers (-B) must be at least twice the number of allowed connections (-N) and at least 16");
638                 ExitPostmaster(1);
639         }
640
641         if (ReservedBackends >= MaxBackends)
642         {
643                 postmaster_error("superuser_reserved_connections must be less than max_connections");
644                 ExitPostmaster(1);
645         }
646
647         /*
648          * Other one-time internal sanity checks can go here.
649          */
650         if (!CheckDateTokenTables())
651         {
652                 postmaster_error("invalid datetoken tables, please fix");
653                 ExitPostmaster(1);
654         }
655
656         /*
657          * Now that we are done processing the postmaster arguments, reset
658          * getopt(3) library so that it will work correctly in subprocesses.
659          */
660         optind = 1;
661 #ifdef HAVE_INT_OPTRESET
662         optreset = 1;                           /* some systems need this too */
663 #endif
664
665         /* For debugging: display postmaster environment */
666         {
667                 extern char **environ;
668                 char      **p;
669
670                 ereport(DEBUG3,
671                                 (errmsg_internal("%s: PostmasterMain: initial environ dump:",
672                                                                  progname)));
673                 ereport(DEBUG3,
674                                 (errmsg_internal("-----------------------------------------")));
675                 for (p = environ; *p; ++p)
676                         ereport(DEBUG3,
677                                         (errmsg_internal("\t%s", *p)));
678                 ereport(DEBUG3,
679                                 (errmsg_internal("-----------------------------------------")));
680         }
681
682         /*
683          * On some systems our dynloader code needs the executable's pathname.
684          */
685         if (FindExec(pg_pathname, progname, "postgres") < 0)
686                 ereport(FATAL,
687                                 (errmsg("%s: could not locate postgres executable",
688                                                 progname)));
689
690         /*
691          * Initialize SSL library, if specified.
692          */
693 #ifdef USE_SSL
694         if (EnableSSL && !NetServer)
695         {
696                 postmaster_error("TCP/IP connections must be enabled for SSL");
697                 ExitPostmaster(1);
698         }
699         if (EnableSSL)
700                 secure_initialize();
701 #endif
702
703         /*
704          * process any libraries that should be preloaded and optionally
705          * pre-initialized
706          */
707         if (preload_libraries_string)
708                 process_preload_libraries(preload_libraries_string);
709
710         /*
711          * Fork away from controlling terminal, if -S specified.
712          *
713          * Must do this before we grab any interlock files, else the interlocks
714          * will show the wrong PID.
715          */
716         if (SilentMode)
717                 pmdaemonize(argc, argv);
718
719         /*
720          * Create lockfile for data directory.
721          *
722          * We want to do this before we try to grab the input sockets, because
723          * the data directory interlock is more reliable than the socket-file
724          * interlock (thanks to whoever decided to put socket files in /tmp
725          * :-(). For the same reason, it's best to grab the TCP socket before
726          * the Unix socket.
727          */
728         CreateDataDirLockFile(DataDir, true);
729
730         /*
731          * Remove old temporary files.  At this point there can be no other
732          * Postgres processes running in this directory, so this should be
733          * safe.
734          */
735         RemovePgTempFiles();
736
737         /*
738          * Establish input sockets.
739          */
740         for (i = 0; i < MAXLISTEN; i++)
741                 ListenSocket[i] = -1;
742
743         if (NetServer)
744         {
745                 if (VirtualHost && VirtualHost[0])
746                 {
747                         char       *curhost,
748                                            *endptr;
749                         char            c = 0;
750
751                         curhost = VirtualHost;
752                         for (;;)
753                         {
754                                 while (*curhost == ' ') /* skip any extra spaces */
755                                         curhost++;
756                                 if (*curhost == '\0')
757                                         break;
758                                 endptr = strchr(curhost, ' ');
759                                 if (endptr)
760                                 {
761                                         c = *endptr;
762                                         *endptr = '\0';
763                                 }
764                                 status = StreamServerPort(AF_UNSPEC, curhost,
765                                                                                   (unsigned short) PostPortNumber,
766                                                                                   UnixSocketDir,
767                                                                                   ListenSocket, MAXLISTEN);
768                                 if (status != STATUS_OK)
769                                         ereport(FATAL,
770                                          (errmsg("could not create listen socket for \"%s\"",
771                                                          curhost)));
772                                 if (endptr)
773                                 {
774                                         *endptr = c;
775                                         curhost = endptr + 1;
776                                 }
777                                 else
778                                         break;
779                         }
780                 }
781                 else
782                 {
783                         status = StreamServerPort(AF_UNSPEC, NULL,
784                                                                           (unsigned short) PostPortNumber,
785                                                                           UnixSocketDir,
786                                                                           ListenSocket, MAXLISTEN);
787                         if (status != STATUS_OK)
788                                 ereport(FATAL,
789                                           (errmsg("could not create TCP/IP listen socket")));
790                 }
791
792 #ifdef USE_RENDEZVOUS
793                 if (rendezvous_name != NULL)
794                 {
795                         DNSServiceRegistrationCreate(rendezvous_name,
796                                                                                  "_postgresql._tcp.",
797                                                                                  "",
798                                                                                  htonl(PostPortNumber),
799                                                                                  "",
800                                                                  (DNSServiceRegistrationReply) reg_reply,
801                                                                                  NULL);
802                 }
803 #endif
804         }
805
806 #ifdef HAVE_UNIX_SOCKETS
807         status = StreamServerPort(AF_UNIX, NULL,
808                                                           (unsigned short) PostPortNumber,
809                                                           UnixSocketDir,
810                                                           ListenSocket, MAXLISTEN);
811         if (status != STATUS_OK)
812                 ereport(FATAL,
813                                 (errmsg("could not create Unix-domain socket")));
814 #endif
815
816         XLOGPathInit();
817
818         /*
819          * Set up shared memory and semaphores.
820          */
821         reset_shared(PostPortNumber);
822
823         /*
824          * Initialize the list of active backends.
825          */
826         BackendList = DLNewList();
827
828         /*
829          * Record postmaster options.  We delay this till now to avoid
830          * recording bogus options (eg, NBuffers too high for available
831          * memory).
832          */
833         if (!CreateOptsFile(argc, argv))
834                 ExitPostmaster(1);
835
836         /*
837          * Set up signal handlers for the postmaster process.
838          *
839          * CAUTION: when changing this list, check for side-effects on the signal
840          * handling setup of child processes.  See tcop/postgres.c,
841          * bootstrap/bootstrap.c, and postmaster/pgstat.c.
842          */
843         pqinitmask();
844         PG_SETMASK(&BlockSig);
845
846         pqsignal(SIGHUP, SIGHUP_handler);       /* reread config file and have
847                                                                                  * children do same */
848         pqsignal(SIGINT, pmdie);        /* send SIGTERM and ShutdownDataBase */
849         pqsignal(SIGQUIT, pmdie);       /* send SIGQUIT and die */
850         pqsignal(SIGTERM, pmdie);       /* wait for children and ShutdownDataBase */
851         pqsignal(SIGALRM, SIG_IGN); /* ignored */
852         pqsignal(SIGPIPE, SIG_IGN); /* ignored */
853         pqsignal(SIGUSR1, sigusr1_handler); /* message from child process */
854         pqsignal(SIGUSR2, dummy_handler);       /* unused, reserve for children */
855         pqsignal(SIGCHLD, reaper);      /* handle child termination */
856         pqsignal(SIGTTIN, SIG_IGN); /* ignored */
857         pqsignal(SIGTTOU, SIG_IGN); /* ignored */
858         /* ignore SIGXFSZ, so that ulimit violations work like disk full */
859 #ifdef SIGXFSZ
860         pqsignal(SIGXFSZ, SIG_IGN); /* ignored */
861 #endif
862
863         /*
864          * Reset whereToSendOutput from Debug (its starting state) to None.
865          * This prevents ereport from sending log messages to stderr unless
866          * the syslog/stderr switch permits.  We don't do this until the
867          * postmaster is fully launched, since startup failures may as well be
868          * reported to stderr.
869          */
870         whereToSendOutput = None;
871
872         /*
873          * On many platforms, the first call of localtime() incurs significant
874          * overhead to load timezone info from the system configuration files.
875          * By doing it once in the postmaster, we avoid having to do it in
876          * every started child process.  The savings are not huge, but they
877          * add up...
878          */
879         {
880                 time_t          now = time(NULL);
881
882                 (void) localtime(&now);
883         }
884
885         /*
886          * Initialize and try to startup the statistics collector process
887          */
888         pgstat_init();
889         pgstat_start();
890
891         /*
892          * Load cached files for client authentication.
893          */
894         load_hba();
895         load_ident();
896         load_user();
897         load_group();
898
899         /*
900          * We're ready to rock and roll...
901          */
902         StartupPID = StartupDataBase();
903
904         status = ServerLoop();
905
906         /*
907          * ServerLoop probably shouldn't ever return, but if it does, close
908          * down.
909          */
910         ExitPostmaster(status != STATUS_OK);
911
912         return 0;                                       /* not reached */
913 }
914
915 static void
916 pmdaemonize(int argc, char *argv[])
917 {
918         int                     i;
919         pid_t           pid;
920
921 #ifdef LINUX_PROFILE
922         struct itimerval prof_itimer;
923 #endif
924
925 #ifdef LINUX_PROFILE
926         /* see comments in BackendRun */
927         getitimer(ITIMER_PROF, &prof_itimer);
928 #endif
929
930 #ifdef WIN32
931         /* FIXME: [fork/exec] to be implemented? */
932         abort();
933 #else
934         pid = fork();
935 #endif
936         if (pid == (pid_t) -1)
937         {
938                 postmaster_error("could not fork background process: %s",
939                                                  strerror(errno));
940                 ExitPostmaster(1);
941         }
942         else if (pid)
943         {                                                       /* parent */
944                 /* Parent should just exit, without doing any atexit cleanup */
945                 _exit(0);
946         }
947
948 #ifdef LINUX_PROFILE
949         setitimer(ITIMER_PROF, &prof_itimer, NULL);
950 #endif
951
952         MyProcPid = getpid();           /* reset MyProcPid to child */
953
954 /* GH: If there's no setsid(), we hopefully don't need silent mode.
955  * Until there's a better solution.
956  */
957 #ifdef HAVE_SETSID
958         if (setsid() < 0)
959         {
960                 postmaster_error("could not dissociate from controlling TTY: %s",
961                                                  strerror(errno));
962                 ExitPostmaster(1);
963         }
964 #endif
965         i = open(NULL_DEV, O_RDWR | PG_BINARY);
966         dup2(i, 0);
967         dup2(i, 1);
968         dup2(i, 2);
969         close(i);
970 }
971
972
973
974 /*
975  * Print out help message
976  */
977 static void
978 usage(const char *progname)
979 {
980         printf(gettext("%s is the PostgreSQL server.\n\n"), progname);
981         printf(gettext("Usage:\n  %s [OPTION]...\n\n"), progname);
982         printf(gettext("Options:\n"));
983 #ifdef USE_ASSERT_CHECKING
984         printf(gettext("  -A 1|0          enable/disable run-time assert checking\n"));
985 #endif
986         printf(gettext("  -B NBUFFERS     number of shared buffers\n"));
987         printf(gettext("  -c NAME=VALUE   set run-time parameter\n"));
988         printf(gettext("  -d 1-5          debugging level\n"));
989         printf(gettext("  -D DATADIR      database directory\n"));
990         printf(gettext("  -F              turn fsync off\n"));
991         printf(gettext("  -h HOSTNAME     host name or IP address to listen on\n"));
992         printf(gettext("  -i              enable TCP/IP connections\n"));
993         printf(gettext("  -k DIRECTORY    Unix-domain socket location\n"));
994 #ifdef USE_SSL
995         printf(gettext("  -l              enable SSL connections\n"));
996 #endif
997         printf(gettext("  -N MAX-CONNECT  maximum number of allowed connections\n"));
998         printf(gettext("  -o OPTIONS      pass \"OPTIONS\" to each server process\n"));
999         printf(gettext("  -p PORT         port number to listen on\n"));
1000         printf(gettext("  -S              silent mode (start in background without logging output)\n"));
1001         printf(gettext("  --help          show this help, then exit\n"));
1002         printf(gettext("  --version       output version information, then exit\n"));
1003
1004         printf(gettext("\nDeveloper options:\n"));
1005         printf(gettext("  -n              do not reinitialize shared memory after abnormal exit\n"));
1006         printf(gettext("  -s              send SIGSTOP to all backend servers if one dies\n"));
1007
1008         printf(gettext("\nPlease read the documentation for the complete list of run-time\n"
1009                                    "configuration settings and how to set them on the command line or in\n"
1010                                    "the configuration file.\n\n"
1011                                    "Report bugs to <pgsql-bugs@postgresql.org>.\n"));
1012 }
1013
1014 static int
1015 ServerLoop(void)
1016 {
1017         fd_set          readmask;
1018         int                     nSockets;
1019         struct timeval now,
1020                                 later;
1021         struct timezone tz;
1022         int                     i;
1023
1024         gettimeofday(&now, &tz);
1025
1026         nSockets = initMasks(&readmask);
1027
1028         for (;;)
1029         {
1030                 Port       *port;
1031                 fd_set          rmask;
1032                 struct timeval timeout;
1033
1034                 /*
1035                  * The timeout for the select() below is normally set on the basis
1036                  * of the time to the next checkpoint.  However, if for some
1037                  * reason we don't have a next-checkpoint time, time out after 60
1038                  * seconds. This keeps checkpoint scheduling from locking up when
1039                  * we get new connection requests infrequently (since we are
1040                  * likely to detect checkpoint completion just after enabling
1041                  * signals below, after we've already made the decision about how
1042                  * long to wait this time).
1043                  */
1044                 timeout.tv_sec = 60;
1045                 timeout.tv_usec = 0;
1046
1047                 if (CheckPointPID == 0 && checkpointed &&
1048                         Shutdown == NoShutdown && !FatalError && random_seed != 0)
1049                 {
1050                         time_t          now = time(NULL);
1051
1052                         if (CheckPointTimeout + checkpointed > now)
1053                         {
1054                                 /*
1055                                  * Not time for checkpoint yet, so set select timeout
1056                                  */
1057                                 timeout.tv_sec = CheckPointTimeout + checkpointed - now;
1058                         }
1059                         else
1060                         {
1061                                 /* Time to make the checkpoint... */
1062                                 CheckPointPID = CheckPointDataBase();
1063
1064                                 /*
1065                                  * if fork failed, schedule another try at 0.1 normal
1066                                  * delay
1067                                  */
1068                                 if (CheckPointPID == 0)
1069                                 {
1070                                         timeout.tv_sec = CheckPointTimeout / 10;
1071                                         checkpointed = now + timeout.tv_sec - CheckPointTimeout;
1072                                 }
1073                         }
1074                 }
1075
1076                 /*
1077                  * If no background writer process is running and we should
1078                  * do background writing, start one. It doesn't matter if
1079                  * this fails, we'll just try again later.
1080                  */
1081                 if (BgWriterPID == 0 && BgWriterPercent > 0 &&
1082                                 Shutdown == NoShutdown && !FatalError && random_seed != 0)
1083                 {
1084                         BgWriterPID = StartBackgroundWriter();
1085                 }
1086
1087                 /*
1088                  * Wait for something to happen.
1089                  */
1090                 memcpy((char *) &rmask, (char *) &readmask, sizeof(fd_set));
1091
1092                 PG_SETMASK(&UnBlockSig);
1093
1094                 if (select(nSockets, &rmask, NULL, NULL, &timeout) < 0)
1095                 {
1096                         PG_SETMASK(&BlockSig);
1097                         if (errno == EINTR || errno == EWOULDBLOCK)
1098                                 continue;
1099                         ereport(LOG,
1100                                         (errcode_for_socket_access(),
1101                                          errmsg("select() failed in postmaster: %m")));
1102                         return STATUS_ERROR;
1103                 }
1104
1105                 /*
1106                  * Block all signals until we wait again.  (This makes it safe for
1107                  * our signal handlers to do nontrivial work.)
1108                  */
1109                 PG_SETMASK(&BlockSig);
1110
1111                 /*
1112                  * Select a random seed at the time of first receiving a request.
1113                  */
1114                 while (random_seed == 0)
1115                 {
1116                         gettimeofday(&later, &tz);
1117
1118                         /*
1119                          * We are not sure how much precision is in tv_usec, so we
1120                          * swap the nibbles of 'later' and XOR them with 'now'. On the
1121                          * off chance that the result is 0, we loop until it isn't.
1122                          */
1123                         random_seed = now.tv_usec ^
1124                                 ((later.tv_usec << 16) |
1125                                  ((later.tv_usec >> 16) & 0xffff));
1126                 }
1127
1128                 /*
1129                  * New connection pending on any of our sockets? If so, fork a
1130                  * child process to deal with it.
1131                  */
1132                 for (i = 0; i < MAXLISTEN; i++)
1133                 {
1134                         if (ListenSocket[i] == -1)
1135                                 break;
1136                         if (FD_ISSET(ListenSocket[i], &rmask))
1137                         {
1138                                 port = ConnCreate(ListenSocket[i]);
1139                                 if (port)
1140                                 {
1141                                         BackendStartup(port);
1142
1143                                         /*
1144                                          * We no longer need the open socket or port structure
1145                                          * in this process
1146                                          */
1147                                         StreamClose(port->sock);
1148                                         ConnFree(port);
1149                                 }
1150                         }
1151                 }
1152
1153                 /* If we have lost the stats collector, try to start a new one */
1154                 if (!pgstat_is_running)
1155                         pgstat_start();
1156         }
1157 }
1158
1159
1160 /*
1161  * Initialise the masks for select() for the ports
1162  * we are listening on.  Return the number of sockets to listen on.
1163  */
1164
1165 static int
1166 initMasks(fd_set *rmask)
1167 {
1168         int                     nsocks = -1;
1169         int                     i;
1170
1171         FD_ZERO(rmask);
1172
1173         for (i = 0; i < MAXLISTEN; i++)
1174         {
1175                 int                     fd = ListenSocket[i];
1176
1177                 if (fd == -1)
1178                         break;
1179                 FD_SET(fd, rmask);
1180                 if (fd > nsocks)
1181                         nsocks = fd;
1182         }
1183
1184         return nsocks + 1;
1185 }
1186
1187
1188 /*
1189  * Read the startup packet and do something according to it.
1190  *
1191  * Returns STATUS_OK or STATUS_ERROR, or might call ereport(FATAL) and
1192  * not return at all.
1193  *
1194  * (Note that ereport(FATAL) stuff is sent to the client, so only use it
1195  * if that's what you want.  Return STATUS_ERROR if you don't want to
1196  * send anything to the client, which would typically be appropriate
1197  * if we detect a communications failure.)
1198  */
1199 static int
1200 ProcessStartupPacket(Port *port, bool SSLdone)
1201 {
1202         int32           len;
1203         void       *buf;
1204         ProtocolVersion proto;
1205         MemoryContext oldcontext;
1206
1207         if (pq_getbytes((char *) &len, 4) == EOF)
1208         {
1209                 /*
1210                  * EOF after SSLdone probably means the client didn't like our
1211                  * response to NEGOTIATE_SSL_CODE.      That's not an error condition,
1212                  * so don't clutter the log with a complaint.
1213                  */
1214                 if (!SSLdone)
1215                         ereport(COMMERROR,
1216                                         (errcode(ERRCODE_PROTOCOL_VIOLATION),
1217                                          errmsg("incomplete startup packet")));
1218                 return STATUS_ERROR;
1219         }
1220
1221         len = ntohl(len);
1222         len -= 4;
1223
1224         if (len < (int32) sizeof(ProtocolVersion) ||
1225                 len > MAX_STARTUP_PACKET_LENGTH)
1226         {
1227                 ereport(COMMERROR,
1228                                 (errcode(ERRCODE_PROTOCOL_VIOLATION),
1229                                  errmsg("invalid length of startup packet")));
1230                 return STATUS_ERROR;
1231         }
1232
1233         /*
1234          * Allocate at least the size of an old-style startup packet, plus one
1235          * extra byte, and make sure all are zeroes.  This ensures we will
1236          * have null termination of all strings, in both fixed- and
1237          * variable-length packet layouts.
1238          */
1239         if (len <= (int32) sizeof(StartupPacket))
1240                 buf = palloc0(sizeof(StartupPacket) + 1);
1241         else
1242                 buf = palloc0(len + 1);
1243
1244         if (pq_getbytes(buf, len) == EOF)
1245         {
1246                 ereport(COMMERROR,
1247                                 (errcode(ERRCODE_PROTOCOL_VIOLATION),
1248                                  errmsg("incomplete startup packet")));
1249                 return STATUS_ERROR;
1250         }
1251
1252         /*
1253          * The first field is either a protocol version number or a special
1254          * request code.
1255          */
1256         port->proto = proto = ntohl(*((ProtocolVersion *) buf));
1257
1258         if (proto == CANCEL_REQUEST_CODE)
1259         {
1260 #ifdef EXEC_BACKEND
1261                 abort(); /* FIXME: [fork/exec] Whoops. Not handled... yet */
1262 #else
1263                 processCancelRequest(port, buf);
1264 #endif
1265                 return 127;                             /* XXX */
1266         }
1267
1268         if (proto == NEGOTIATE_SSL_CODE && !SSLdone)
1269         {
1270                 char            SSLok;
1271
1272 #ifdef USE_SSL
1273                 /* No SSL when disabled or on Unix sockets */
1274                 if (!EnableSSL || IS_AF_UNIX(port->laddr.addr.ss_family))
1275                         SSLok = 'N';
1276                 else
1277                         SSLok = 'S';            /* Support for SSL */
1278 #else
1279                 SSLok = 'N';                    /* No support for SSL */
1280 #endif
1281                 if (send(port->sock, &SSLok, 1, 0) != 1)
1282                 {
1283                         ereport(COMMERROR,
1284                                         (errcode_for_socket_access(),
1285                                  errmsg("failed to send SSL negotiation response: %m")));
1286                         return STATUS_ERROR;    /* close the connection */
1287                 }
1288
1289 #ifdef USE_SSL
1290                 if (SSLok == 'S' && secure_open_server(port) == -1)
1291                         return STATUS_ERROR;
1292 #endif
1293                 /* regular startup packet, cancel, etc packet should follow... */
1294                 /* but not another SSL negotiation request */
1295                 return ProcessStartupPacket(port, true);
1296         }
1297
1298         /* Could add additional special packet types here */
1299
1300         /*
1301          * Set FrontendProtocol now so that ereport() knows what format to
1302          * send if we fail during startup.
1303          */
1304         FrontendProtocol = proto;
1305
1306         /* Check we can handle the protocol the frontend is using. */
1307
1308         if (PG_PROTOCOL_MAJOR(proto) < PG_PROTOCOL_MAJOR(PG_PROTOCOL_EARLIEST) ||
1309           PG_PROTOCOL_MAJOR(proto) > PG_PROTOCOL_MAJOR(PG_PROTOCOL_LATEST) ||
1310         (PG_PROTOCOL_MAJOR(proto) == PG_PROTOCOL_MAJOR(PG_PROTOCOL_LATEST) &&
1311          PG_PROTOCOL_MINOR(proto) > PG_PROTOCOL_MINOR(PG_PROTOCOL_LATEST)))
1312                 ereport(FATAL,
1313                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1314                                  errmsg("unsupported frontend protocol %u.%u: server supports %u.0 to %u.%u",
1315                                           PG_PROTOCOL_MAJOR(proto), PG_PROTOCOL_MINOR(proto),
1316                                                 PG_PROTOCOL_MAJOR(PG_PROTOCOL_EARLIEST),
1317                                                 PG_PROTOCOL_MAJOR(PG_PROTOCOL_LATEST),
1318                                                 PG_PROTOCOL_MINOR(PG_PROTOCOL_LATEST))));
1319
1320         /*
1321          * Now fetch parameters out of startup packet and save them into the
1322          * Port structure.      All data structures attached to the Port struct
1323          * must be allocated in TopMemoryContext so that they won't disappear
1324          * when we pass them to PostgresMain (see BackendRun).  We need not
1325          * worry about leaking this storage on failure, since we aren't in the
1326          * postmaster process anymore.
1327          */
1328         oldcontext = MemoryContextSwitchTo(TopMemoryContext);
1329
1330         if (PG_PROTOCOL_MAJOR(proto) >= 3)
1331         {
1332                 int32           offset = sizeof(ProtocolVersion);
1333
1334                 /*
1335                  * Scan packet body for name/option pairs.      We can assume any
1336                  * string beginning within the packet body is null-terminated,
1337                  * thanks to zeroing extra byte above.
1338                  */
1339                 port->guc_options = NIL;
1340
1341                 while (offset < len)
1342                 {
1343                         char       *nameptr = ((char *) buf) + offset;
1344                         int32           valoffset;
1345                         char       *valptr;
1346
1347                         if (*nameptr == '\0')
1348                                 break;                  /* found packet terminator */
1349                         valoffset = offset + strlen(nameptr) + 1;
1350                         if (valoffset >= len)
1351                                 break;                  /* missing value, will complain below */
1352                         valptr = ((char *) buf) + valoffset;
1353
1354                         if (strcmp(nameptr, "database") == 0)
1355                                 port->database_name = pstrdup(valptr);
1356                         else if (strcmp(nameptr, "user") == 0)
1357                                 port->user_name = pstrdup(valptr);
1358                         else if (strcmp(nameptr, "options") == 0)
1359                                 port->cmdline_options = pstrdup(valptr);
1360                         else
1361                         {
1362                                 /* Assume it's a generic GUC option */
1363                                 port->guc_options = lappend(port->guc_options,
1364                                                                                         pstrdup(nameptr));
1365                                 port->guc_options = lappend(port->guc_options,
1366                                                                                         pstrdup(valptr));
1367                         }
1368                         offset = valoffset + strlen(valptr) + 1;
1369                 }
1370
1371                 /*
1372                  * If we didn't find a packet terminator exactly at the end of the
1373                  * given packet length, complain.
1374                  */
1375                 if (offset != len - 1)
1376                         ereport(FATAL,
1377                                         (errcode(ERRCODE_PROTOCOL_VIOLATION),
1378                                          errmsg("invalid startup packet layout: expected terminator as last byte")));
1379         }
1380         else
1381         {
1382                 /*
1383                  * Get the parameters from the old-style, fixed-width-fields
1384                  * startup packet as C strings.  The packet destination was
1385                  * cleared first so a short packet has zeros silently added.  We
1386                  * have to be prepared to truncate the pstrdup result for oversize
1387                  * fields, though.
1388                  */
1389                 StartupPacket *packet = (StartupPacket *) buf;
1390
1391                 port->database_name = pstrdup(packet->database);
1392                 if (strlen(port->database_name) > sizeof(packet->database))
1393                         port->database_name[sizeof(packet->database)] = '\0';
1394                 port->user_name = pstrdup(packet->user);
1395                 if (strlen(port->user_name) > sizeof(packet->user))
1396                         port->user_name[sizeof(packet->user)] = '\0';
1397                 port->cmdline_options = pstrdup(packet->options);
1398                 if (strlen(port->cmdline_options) > sizeof(packet->options))
1399                         port->cmdline_options[sizeof(packet->options)] = '\0';
1400                 port->guc_options = NIL;
1401         }
1402
1403         /* Check a user name was given. */
1404         if (port->user_name == NULL || port->user_name[0] == '\0')
1405                 ereport(FATAL,
1406                                 (errcode(ERRCODE_INVALID_AUTHORIZATION_SPECIFICATION),
1407                  errmsg("no PostgreSQL user name specified in startup packet")));
1408
1409         /* The database defaults to the user name. */
1410         if (port->database_name == NULL || port->database_name[0] == '\0')
1411                 port->database_name = pstrdup(port->user_name);
1412
1413         if (Db_user_namespace)
1414         {
1415                 /*
1416                  * If user@, it is a global user, remove '@'. We only want to do
1417                  * this if there is an '@' at the end and no earlier in the user
1418                  * string or they may fake as a local user of another database
1419                  * attaching to this database.
1420                  */
1421                 if (strchr(port->user_name, '@') ==
1422                         port->user_name + strlen(port->user_name) - 1)
1423                         *strchr(port->user_name, '@') = '\0';
1424                 else
1425                 {
1426                         /* Append '@' and dbname */
1427                         char       *db_user;
1428
1429                         db_user = palloc(strlen(port->user_name) +
1430                                                          strlen(port->database_name) + 2);
1431                         sprintf(db_user, "%s@%s", port->user_name, port->database_name);
1432                         port->user_name = db_user;
1433                 }
1434         }
1435
1436         /*
1437          * Truncate given database and user names to length of a Postgres
1438          * name.  This avoids lookup failures when overlength names are given.
1439          */
1440         if (strlen(port->database_name) >= NAMEDATALEN)
1441                 port->database_name[NAMEDATALEN - 1] = '\0';
1442         if (strlen(port->user_name) >= NAMEDATALEN)
1443                 port->user_name[NAMEDATALEN - 1] = '\0';
1444
1445         /*
1446          * Done putting stuff in TopMemoryContext.
1447          */
1448         MemoryContextSwitchTo(oldcontext);
1449
1450         /*
1451          * If we're going to reject the connection due to database state, say
1452          * so now instead of wasting cycles on an authentication exchange.
1453          * (This also allows a pg_ping utility to be written.)
1454          */
1455         switch (port->canAcceptConnections)
1456         {
1457                 case CAC_STARTUP:
1458                         ereport(FATAL,
1459                                         (errcode(ERRCODE_CANNOT_CONNECT_NOW),
1460                                          errmsg("the database system is starting up")));
1461                         break;
1462                 case CAC_SHUTDOWN:
1463                         ereport(FATAL,
1464                                         (errcode(ERRCODE_CANNOT_CONNECT_NOW),
1465                                          errmsg("the database system is shutting down")));
1466                         break;
1467                 case CAC_RECOVERY:
1468                         ereport(FATAL,
1469                                         (errcode(ERRCODE_CANNOT_CONNECT_NOW),
1470                                          errmsg("the database system is in recovery mode")));
1471                         break;
1472                 case CAC_TOOMANY:
1473                         ereport(FATAL,
1474                                         (errcode(ERRCODE_TOO_MANY_CONNECTIONS),
1475                                          errmsg("sorry, too many clients already")));
1476                         break;
1477                 case CAC_OK:
1478                 default:
1479                         break;
1480         }
1481
1482         return STATUS_OK;
1483 }
1484
1485
1486 /*
1487  * The client has sent a cancel request packet, not a normal
1488  * start-a-new-connection packet.  Perform the necessary processing.
1489  * Nothing is sent back to the client.
1490  */
1491 static void
1492 processCancelRequest(Port *port, void *pkt)
1493 {
1494         CancelRequestPacket *canc = (CancelRequestPacket *) pkt;
1495         int                     backendPID;
1496         long            cancelAuthCode;
1497         Dlelem     *curr;
1498         Backend    *bp;
1499
1500         backendPID = (int) ntohl(canc->backendPID);
1501         cancelAuthCode = (long) ntohl(canc->cancelAuthCode);
1502
1503         if (backendPID == CheckPointPID)
1504         {
1505                 ereport(DEBUG2,
1506                                 (errmsg_internal("ignoring cancel request for checkpoint process %d",
1507                                                                  backendPID)));
1508                 return;
1509         }
1510         else if (backendPID == BgWriterPID)
1511         {
1512                 ereport(DEBUG2,
1513                                 (errmsg_internal("ignoring cancel request for bgwriter process %d",
1514                                                                  backendPID)));
1515                 return;
1516         }
1517 #ifdef EXEC_BACKEND
1518         else
1519                 AttachSharedMemoryAndSemaphores();
1520 #endif
1521
1522         /* See if we have a matching backend */
1523
1524         for (curr = DLGetHead(BackendList); curr; curr = DLGetSucc(curr))
1525         {
1526                 bp = (Backend *) DLE_VAL(curr);
1527                 if (bp->pid == backendPID)
1528                 {
1529                         if (bp->cancel_key == cancelAuthCode)
1530                         {
1531                                 /* Found a match; signal that backend to cancel current op */
1532                                 ereport(DEBUG2,
1533                                                 (errmsg_internal("processing cancel request: sending SIGINT to process %d",
1534                                                                                  backendPID)));
1535                                 kill(bp->pid, SIGINT);
1536                         }
1537                         else
1538                                 /* Right PID, wrong key: no way, Jose */
1539                                 ereport(DEBUG2,
1540                                                 (errmsg_internal("bad key in cancel request for process %d",
1541                                                                                  backendPID)));
1542                         return;
1543                 }
1544         }
1545
1546         /* No matching backend */
1547         ereport(DEBUG2,
1548                         (errmsg_internal("bad pid in cancel request for process %d",
1549                                                          backendPID)));
1550 }
1551
1552 /*
1553  * canAcceptConnections --- check to see if database state allows connections.
1554  */
1555 static enum CAC_state
1556 canAcceptConnections(void)
1557 {
1558         /* Can't start backends when in startup/shutdown/recovery state. */
1559         if (Shutdown > NoShutdown)
1560                 return CAC_SHUTDOWN;
1561         if (StartupPID)
1562                 return CAC_STARTUP;
1563         if (FatalError)
1564                 return CAC_RECOVERY;
1565
1566         /*
1567          * Don't start too many children.
1568          *
1569          * We allow more connections than we can have backends here because some
1570          * might still be authenticating; they might fail auth, or some
1571          * existing backend might exit before the auth cycle is completed. The
1572          * exact MaxBackends limit is enforced when a new backend tries to
1573          * join the shared-inval backend array.
1574          */
1575         if (CountChildren() >= 2 * MaxBackends)
1576                 return CAC_TOOMANY;
1577
1578         return CAC_OK;
1579 }
1580
1581
1582 /*
1583  * ConnCreate -- create a local connection data structure
1584  */
1585 static Port *
1586 ConnCreate(int serverFd)
1587 {
1588         Port       *port;
1589
1590         if (!(port = (Port *) calloc(1, sizeof(Port))))
1591         {
1592                 ereport(LOG,
1593                                 (errcode(ERRCODE_OUT_OF_MEMORY),
1594                                  errmsg("out of memory")));
1595                 ExitPostmaster(1);
1596         }
1597
1598         if (StreamConnection(serverFd, port) != STATUS_OK)
1599         {
1600                 StreamClose(port->sock);
1601                 ConnFree(port);
1602                 port = NULL;
1603         }
1604         else
1605         {
1606                 /*
1607                  * Precompute password salt values to use for this connection.
1608                  * It's slightly annoying to do this long in advance of knowing
1609                  * whether we'll need 'em or not, but we must do the random()
1610                  * calls before we fork, not after.  Else the postmaster's random
1611                  * sequence won't get advanced, and all backends would end up
1612                  * using the same salt...
1613                  */
1614                 RandomSalt(port->cryptSalt, port->md5Salt);
1615         }
1616
1617         return port;
1618 }
1619
1620
1621 /*
1622  * ConnFree -- free a local connection data structure
1623  */
1624 static void
1625 ConnFree(Port *conn)
1626 {
1627 #ifdef USE_SSL
1628         secure_close(conn);
1629 #endif
1630         free(conn);
1631 }
1632
1633
1634 /*
1635  * ClosePostmasterPorts -- close all the postmaster's open sockets
1636  *
1637  * This is called during child process startup to release file descriptors
1638  * that are not needed by that child process.  The postmaster still has
1639  * them open, of course.
1640  */
1641 void
1642 ClosePostmasterPorts(bool pgstat_too)
1643 {
1644         int                     i;
1645
1646         /* Close the listen sockets */
1647         for (i = 0; i < MAXLISTEN; i++)
1648         {
1649                 if (ListenSocket[i] != -1)
1650                 {
1651                         StreamClose(ListenSocket[i]);
1652                         ListenSocket[i] = -1;
1653                 }
1654         }
1655
1656         /* Close pgstat control sockets, unless we're starting pgstat itself */
1657         if (pgstat_too)
1658                 pgstat_close_sockets();
1659 }
1660
1661
1662 /*
1663  * reset_shared -- reset shared memory and semaphores
1664  */
1665 static void
1666 reset_shared(unsigned short port)
1667 {
1668         /*
1669          * Create or re-create shared memory and semaphores.
1670          *
1671          * Note: in each "cycle of life" we will normally assign the same IPC
1672          * keys (if using SysV shmem and/or semas), since the port number is
1673          * used to determine IPC keys.  This helps ensure that we will clean
1674          * up dead IPC objects if the postmaster crashes and is restarted.
1675          */
1676         CreateSharedMemoryAndSemaphores(false, MaxBackends, port);
1677 }
1678
1679
1680 /*
1681  * SIGHUP -- reread config files, and tell children to do same
1682  */
1683 static void
1684 SIGHUP_handler(SIGNAL_ARGS)
1685 {
1686         int                     save_errno = errno;
1687
1688         PG_SETMASK(&BlockSig);
1689
1690         if (Shutdown <= SmartShutdown)
1691         {
1692                 ereport(LOG,
1693                          (errmsg("received SIGHUP, reloading configuration files")));
1694                 ProcessConfigFile(PGC_SIGHUP);
1695 #ifdef EXEC_BACKEND
1696                 write_nondefault_variables(PGC_SIGHUP);
1697 #endif
1698                 SignalChildren(SIGHUP);
1699                 load_hba();
1700                 load_ident();
1701
1702                 /*
1703                  * Tell the background writer to terminate so that we
1704                  * will start a new one with a possibly changed config
1705                  */
1706                 if (BgWriterPID != 0)
1707                         kill(BgWriterPID, SIGTERM);
1708         }
1709
1710         PG_SETMASK(&UnBlockSig);
1711
1712         errno = save_errno;
1713 }
1714
1715
1716
1717 /*
1718  * pmdie -- signal handler for processing various postmaster signals.
1719  */
1720 static void
1721 pmdie(SIGNAL_ARGS)
1722 {
1723         int                     save_errno = errno;
1724
1725         PG_SETMASK(&BlockSig);
1726
1727         ereport(DEBUG2,
1728                         (errmsg_internal("postmaster received signal %d",
1729                                                          postgres_signal_arg)));
1730
1731         switch (postgres_signal_arg)
1732         {
1733                 case SIGTERM:
1734
1735                         /*
1736                          * Smart Shutdown:
1737                          *
1738                          * Wait for children to end their work and ShutdownDataBase.
1739                          */
1740                         if (BgWriterPID != 0)
1741                                 kill(BgWriterPID, SIGTERM);
1742                         if (Shutdown >= SmartShutdown)
1743                                 break;
1744                         Shutdown = SmartShutdown;
1745                         ereport(LOG,
1746                                         (errmsg("received smart shutdown request")));
1747                         if (DLGetHead(BackendList)) /* let reaper() handle this */
1748                                 break;
1749
1750                         /*
1751                          * No children left. Shutdown data base system.
1752                          */
1753                         if (StartupPID > 0 || FatalError)       /* let reaper() handle
1754                                                                                                  * this */
1755                                 break;
1756                         if (ShutdownPID > 0)
1757                         {
1758                                 elog(PANIC, "shutdown process %d already running",
1759                                          (int) ShutdownPID);
1760                                 abort();
1761                         }
1762
1763                         ShutdownPID = ShutdownDataBase();
1764                         break;
1765
1766                 case SIGINT:
1767
1768                         /*
1769                          * Fast Shutdown:
1770                          *
1771                          * abort all children with SIGTERM (rollback active transactions
1772                          * and exit) and ShutdownDataBase when they are gone.
1773                          */
1774                         if (BgWriterPID != 0)
1775                                 kill(BgWriterPID, SIGTERM);
1776                         if (Shutdown >= FastShutdown)
1777                                 break;
1778                         ereport(LOG,
1779                                         (errmsg("received fast shutdown request")));
1780                         if (DLGetHead(BackendList)) /* let reaper() handle this */
1781                         {
1782                                 Shutdown = FastShutdown;
1783                                 if (!FatalError)
1784                                 {
1785                                         ereport(LOG,
1786                                                         (errmsg("aborting any active transactions")));
1787                                         SignalChildren(SIGTERM);
1788                                 }
1789                                 break;
1790                         }
1791                         if (Shutdown > NoShutdown)
1792                         {
1793                                 Shutdown = FastShutdown;
1794                                 break;
1795                         }
1796                         Shutdown = FastShutdown;
1797
1798                         /*
1799                          * No children left. Shutdown data base system.
1800                          */
1801                         if (StartupPID > 0 || FatalError)       /* let reaper() handle
1802                                                                                                  * this */
1803                                 break;
1804                         if (ShutdownPID > 0)
1805                         {
1806                                 elog(PANIC, "shutdown process %d already running",
1807                                          (int) ShutdownPID);
1808                                 abort();
1809                         }
1810
1811                         ShutdownPID = ShutdownDataBase();
1812                         break;
1813
1814                 case SIGQUIT:
1815
1816                         /*
1817                          * Immediate Shutdown:
1818                          *
1819                          * abort all children with SIGQUIT and exit without attempt to
1820                          * properly shutdown data base system.
1821                          */
1822                         if (BgWriterPID != 0)
1823                                 kill(BgWriterPID, SIGQUIT);
1824                         ereport(LOG,
1825                                         (errmsg("received immediate shutdown request")));
1826                         if (ShutdownPID > 0)
1827                                 kill(ShutdownPID, SIGQUIT);
1828                         if (StartupPID > 0)
1829                                 kill(StartupPID, SIGQUIT);
1830                         if (DLGetHead(BackendList))
1831                                 SignalChildren(SIGQUIT);
1832                         ExitPostmaster(0);
1833                         break;
1834         }
1835
1836         PG_SETMASK(&UnBlockSig);
1837
1838         errno = save_errno;
1839 }
1840
1841 /*
1842  * Reaper -- signal handler to cleanup after a backend (child) dies.
1843  */
1844 static void
1845 reaper(SIGNAL_ARGS)
1846 {
1847         int                     save_errno = errno;
1848
1849 #ifdef WIN32
1850 #warning fix waidpid for Win32
1851 #else
1852 #ifdef HAVE_WAITPID
1853         int                     status;                 /* backend exit status */
1854
1855 #else
1856         union wait      status;                 /* backend exit status */
1857 #endif
1858         int                     exitstatus;
1859         int                     pid;                    /* process id of dead backend */
1860
1861         PG_SETMASK(&BlockSig);
1862
1863         ereport(DEBUG4,
1864                         (errmsg_internal("reaping dead processes")));
1865 #ifdef HAVE_WAITPID
1866         while ((pid = waitpid(-1, &status, WNOHANG)) > 0)
1867         {
1868                 exitstatus = status;
1869 #else
1870         while ((pid = wait3(&status, WNOHANG, NULL)) > 0)
1871         {
1872                 exitstatus = status.w_status;
1873 #endif
1874
1875                 /*
1876                  * Check if this child was the statistics collector. If so, try to
1877                  * start a new one.  (If fail, we'll try again in future cycles of
1878                  * the main loop.)
1879                  */
1880                 if (pgstat_ispgstat(pid))
1881                 {
1882                         LogChildExit(LOG, gettext("statistics collector process"),
1883                                                  pid, exitstatus);
1884                         pgstat_start();
1885                         continue;
1886                 }
1887
1888                 /*
1889                  * Check if this child was a shutdown or startup process.
1890                  */
1891                 if (ShutdownPID > 0 && pid == ShutdownPID)
1892                 {
1893                         if (exitstatus != 0)
1894                         {
1895                                 LogChildExit(LOG, gettext("shutdown process"),
1896                                                          pid, exitstatus);
1897                                 ExitPostmaster(1);
1898                         }
1899                         /* Normal postmaster exit is here */
1900                         ExitPostmaster(0);
1901                 }
1902
1903                 if (StartupPID > 0 && pid == StartupPID)
1904                 {
1905                         if (exitstatus != 0)
1906                         {
1907                                 LogChildExit(LOG, gettext("startup process"),
1908                                                          pid, exitstatus);
1909                                 ereport(LOG,
1910                                                 (errmsg("aborting startup due to startup process failure")));
1911                                 ExitPostmaster(1);
1912                         }
1913                         StartupPID = 0;
1914
1915                         /*
1916                          * Startup succeeded - remember its ID and RedoRecPtr.
1917                          *
1918                          * NB: this MUST happen before we fork a checkpoint or shutdown
1919                          * subprocess, else they will have wrong local ThisStartUpId.
1920                          */
1921                         SetThisStartUpID();
1922
1923                         FatalError = false; /* done with recovery */
1924
1925                         /*
1926                          * Arrange for first checkpoint to occur after standard delay.
1927                          */
1928                         CheckPointPID = 0;
1929                         checkpointed = time(NULL);
1930
1931                         if (BgWriterPID == 0 && BgWriterPercent > 0 &&
1932                                 Shutdown == NoShutdown && !FatalError && random_seed != 0)
1933                         {
1934                                 BgWriterPID = StartBackgroundWriter();
1935                         }
1936
1937                         /*
1938                          * Go to shutdown mode if a shutdown request was pending.
1939                          */
1940                         if (Shutdown > NoShutdown)
1941                         {
1942                                 if (ShutdownPID > 0)
1943                                 {
1944                                         elog(PANIC, "startup process %d died while shutdown process %d already running",
1945                                                  pid, (int) ShutdownPID);
1946                                         abort();
1947                                 }
1948                                 ShutdownPID = ShutdownDataBase();
1949                         }
1950
1951                         goto reaper_done;
1952                 }
1953
1954                 /*
1955                  * Else do standard child cleanup.
1956                  */
1957                 CleanupProc(pid, exitstatus);
1958
1959         }                                                       /* loop over pending child-death reports */
1960 #endif
1961
1962         if (FatalError)
1963         {
1964                 /*
1965                  * Wait for all children exit, then reset shmem and
1966                  * StartupDataBase.
1967                  */
1968                 if (DLGetHead(BackendList) || StartupPID > 0 || ShutdownPID > 0)
1969                         goto reaper_done;
1970                 ereport(LOG,
1971                         (errmsg("all server processes terminated; reinitializing")));
1972
1973                 shmem_exit(0);
1974                 reset_shared(PostPortNumber);
1975
1976                 StartupPID = StartupDataBase();
1977
1978                 goto reaper_done;
1979         }
1980
1981         if (Shutdown > NoShutdown)
1982         {
1983                 if (DLGetHead(BackendList))
1984                         goto reaper_done;
1985                 if (StartupPID > 0 || ShutdownPID > 0)
1986                         goto reaper_done;
1987                 ShutdownPID = ShutdownDataBase();
1988         }
1989
1990 reaper_done:
1991         PG_SETMASK(&UnBlockSig);
1992
1993         errno = save_errno;
1994 }
1995
1996 /*
1997  * CleanupProc -- cleanup after terminated backend.
1998  *
1999  * Remove all local state associated with backend.
2000  */
2001 static void
2002 CleanupProc(int pid,
2003                         int exitstatus)         /* child's exit status. */
2004 {
2005         Dlelem     *curr,
2006                            *next;
2007         Backend    *bp;
2008
2009         LogChildExit(DEBUG2, gettext("child process"), pid, exitstatus);
2010
2011         /*
2012          * If a backend dies in an ugly way (i.e. exit status not 0) then we
2013          * must signal all other backends to quickdie.  If exit status is zero
2014          * we assume everything is hunky dory and simply remove the backend
2015          * from the active backend list.
2016          */
2017         if (exitstatus == 0)
2018         {
2019                 curr = DLGetHead(BackendList);
2020                 while (curr)
2021                 {
2022                         bp = (Backend *) DLE_VAL(curr);
2023                         if (bp->pid == pid)
2024                         {
2025                                 DLRemove(curr);
2026                                 free(bp);
2027                                 DLFreeElem(curr);
2028                                 break;
2029                         }
2030                         curr = DLGetSucc(curr);
2031                 }
2032
2033                 if (pid == CheckPointPID)
2034                 {
2035                         CheckPointPID = 0;
2036                         if (!FatalError)
2037                         {
2038                                 checkpointed = time(NULL);
2039                                 /* Update RedoRecPtr for future child backends */
2040                                 GetSavedRedoRecPtr();
2041                         }
2042                 }
2043                 else if (pid == BgWriterPID)
2044                         BgWriterPID = 0;
2045                 else
2046                         pgstat_beterm(pid);
2047
2048                 return;
2049         }
2050
2051         /* below here we're dealing with a non-normal exit */
2052
2053         /* Make log entry unless we did so already */
2054         if (!FatalError)
2055         {
2056                 LogChildExit(LOG,
2057                                  (pid == CheckPointPID) ? gettext("checkpoint process") :
2058                                  (pid == BgWriterPID) ? gettext("bgwriter process") :
2059                                          gettext("server process"),
2060                                          pid, exitstatus);
2061                 ereport(LOG,
2062                           (errmsg("terminating any other active server processes")));
2063         }
2064
2065         curr = DLGetHead(BackendList);
2066         while (curr)
2067         {
2068                 next = DLGetSucc(curr);
2069                 bp = (Backend *) DLE_VAL(curr);
2070                 if (bp->pid != pid)
2071                 {
2072                         /*
2073                          * This backend is still alive.  Unless we did so already,
2074                          * tell it to commit hara-kiri.
2075                          *
2076                          * SIGQUIT is the special signal that says exit without proc_exit
2077                          * and let the user know what's going on. But if SendStop is
2078                          * set (-s on command line), then we send SIGSTOP instead, so
2079                          * that we can get core dumps from all backends by hand.
2080                          */
2081                         if (!FatalError)
2082                         {
2083                                 ereport(DEBUG2,
2084                                                 (errmsg_internal("sending %s to process %d",
2085                                                                                  (SendStop ? "SIGSTOP" : "SIGQUIT"),
2086                                                                                  (int) bp->pid)));
2087                                 kill(bp->pid, (SendStop ? SIGSTOP : SIGQUIT));
2088                         }
2089                 }
2090                 else
2091                 {
2092                         /*
2093                          * Found entry for freshly-dead backend, so remove it.
2094                          */
2095                         DLRemove(curr);
2096                         free(bp);
2097                         DLFreeElem(curr);
2098                 }
2099                 curr = next;
2100         }
2101
2102         if (pid == CheckPointPID)
2103         {
2104                 CheckPointPID = 0;
2105                 checkpointed = 0;
2106         }
2107         else if (pid == BgWriterPID)
2108         {
2109                 BgWriterPID = 0;
2110         }
2111         else
2112         {
2113                 /*
2114                  * Tell the collector about backend termination
2115                  */
2116                 pgstat_beterm(pid);
2117         }
2118
2119         FatalError = true;
2120 }
2121
2122 /*
2123  * Log the death of a child process.
2124  */
2125 static void
2126 LogChildExit(int lev, const char *procname, int pid, int exitstatus)
2127 {
2128         if (WIFEXITED(exitstatus))
2129                 ereport(lev,
2130
2131                 /*
2132                  * translator: %s is a noun phrase describing a child process,
2133                  * such as "server process"
2134                  */
2135                                 (errmsg("%s (PID %d) exited with exit code %d",
2136                                                 procname, pid, WEXITSTATUS(exitstatus))));
2137         else if (WIFSIGNALED(exitstatus))
2138                 ereport(lev,
2139
2140                 /*
2141                  * translator: %s is a noun phrase describing a child process,
2142                  * such as "server process"
2143                  */
2144                                 (errmsg("%s (PID %d) was terminated by signal %d",
2145                                                 procname, pid, WTERMSIG(exitstatus))));
2146         else
2147                 ereport(lev,
2148
2149                 /*
2150                  * translator: %s is a noun phrase describing a child process,
2151                  * such as "server process"
2152                  */
2153                                 (errmsg("%s (PID %d) exited with unexpected status %d",
2154                                                 procname, pid, exitstatus)));
2155 }
2156
2157 /*
2158  * Send a signal to all backend children.
2159  */
2160 static void
2161 SignalChildren(int signal)
2162 {
2163         Dlelem     *curr,
2164                            *next;
2165         Backend    *bp;
2166
2167         curr = DLGetHead(BackendList);
2168         while (curr)
2169         {
2170                 next = DLGetSucc(curr);
2171                 bp = (Backend *) DLE_VAL(curr);
2172
2173                 if (bp->pid != MyProcPid)
2174                 {
2175                         ereport(DEBUG2,
2176                                         (errmsg_internal("sending signal %d to process %d",
2177                                                                          signal,
2178                                                                          (int) bp->pid)));
2179                         kill(bp->pid, signal);
2180                 }
2181
2182                 curr = next;
2183         }
2184 }
2185
2186 /*
2187  * BackendStartup -- start backend process
2188  *
2189  * returns: STATUS_ERROR if the fork failed, STATUS_OK otherwise.
2190  */
2191 static int
2192 BackendStartup(Port *port)
2193 {
2194         Backend    *bn;                         /* for backend cleanup */
2195         pid_t           pid;
2196
2197 #ifdef LINUX_PROFILE
2198         struct itimerval prof_itimer;
2199 #endif
2200
2201         /*
2202          * Compute the cancel key that will be assigned to this backend. The
2203          * backend will have its own copy in the forked-off process' value of
2204          * MyCancelKey, so that it can transmit the key to the frontend.
2205          */
2206         MyCancelKey = PostmasterRandom();
2207
2208         /*
2209          * Make room for backend data structure.  Better before the fork() so
2210          * we can handle failure cleanly.
2211          */
2212         bn = (Backend *) malloc(sizeof(Backend));
2213         if (!bn)
2214         {
2215                 ereport(LOG,
2216                                 (errcode(ERRCODE_OUT_OF_MEMORY),
2217                                  errmsg("out of memory")));
2218                 return STATUS_ERROR;
2219         }
2220
2221         /*
2222          * Flush stdio channels just before fork, to avoid double-output
2223          * problems. Ideally we'd use fflush(NULL) here, but there are still a
2224          * few non-ANSI stdio libraries out there (like SunOS 4.1.x) that
2225          * coredump if we do. Presently stdout and stderr are the only stdio
2226          * output channels used by the postmaster, so fflush'ing them should
2227          * be sufficient.
2228          */
2229         fflush(stdout);
2230         fflush(stderr);
2231
2232 #ifdef LINUX_PROFILE
2233
2234         /*
2235          * Linux's fork() resets the profiling timer in the child process. If
2236          * we want to profile child processes then we need to save and restore
2237          * the timer setting.  This is a waste of time if not profiling,
2238          * however, so only do it if commanded by specific -DLINUX_PROFILE
2239          * switch.
2240          */
2241         getitimer(ITIMER_PROF, &prof_itimer);
2242 #endif
2243
2244 #ifdef __BEOS__
2245         /* Specific beos actions before backend startup */
2246         beos_before_backend_startup();
2247 #endif
2248
2249         port->canAcceptConnections = canAcceptConnections();
2250 #ifdef EXEC_BACKEND
2251         pid = Backend_forkexec(port);
2252 #else
2253         pid = fork();
2254
2255         if (pid == 0)                           /* child */
2256         {
2257 #ifdef LINUX_PROFILE
2258                 setitimer(ITIMER_PROF, &prof_itimer, NULL);
2259 #endif
2260
2261 #ifdef __BEOS__
2262                 /* Specific beos backend startup actions */
2263                 beos_backend_startup();
2264 #endif
2265                 free(bn);
2266
2267                 proc_exit(BackendRun(port));
2268         }
2269 #endif
2270
2271         /* in parent, error */
2272         if (pid < 0)
2273         {
2274                 int                     save_errno = errno;
2275
2276 #ifdef __BEOS__
2277                 /* Specific beos backend startup actions */
2278                 beos_backend_startup_failed();
2279 #endif
2280                 free(bn);
2281                 errno = save_errno;
2282                 ereport(LOG,
2283                           (errmsg("could not fork new process for connection: %m")));
2284                 report_fork_failure_to_client(port, save_errno);
2285                 return STATUS_ERROR;
2286         }
2287
2288         /* in parent, normal */
2289         ereport(DEBUG2,
2290                         (errmsg_internal("forked new backend, pid=%d socket=%d",
2291                                                          (int) pid, port->sock)));
2292
2293         /*
2294          * Everything's been successful, it's safe to add this backend to our
2295          * list of backends.
2296          */
2297         bn->pid = pid;
2298         bn->cancel_key = MyCancelKey;
2299         DLAddHead(BackendList, DLNewElem(bn));
2300
2301         return STATUS_OK;
2302 }
2303
2304 /*
2305  * Try to report backend fork() failure to client before we close the
2306  * connection.  Since we do not care to risk blocking the postmaster on
2307  * this connection, we set the connection to non-blocking and try only once.
2308  *
2309  * This is grungy special-purpose code; we cannot use backend libpq since
2310  * it's not up and running.
2311  */
2312 static void
2313 report_fork_failure_to_client(Port *port, int errnum)
2314 {
2315         char            buffer[1000];
2316
2317         /* Format the error message packet (always V2 protocol) */
2318         snprintf(buffer, sizeof(buffer), "E%s%s\n",
2319                          gettext("could not fork new process for connection: "),
2320                          strerror(errnum));
2321
2322         /* Set port to non-blocking.  Don't do send() if this fails */
2323         if (FCNTL_NONBLOCK(port->sock) < 0)
2324                 return;
2325
2326         send(port->sock, buffer, strlen(buffer) + 1, 0);
2327 }
2328
2329
2330 /*
2331  * split_opts -- split a string of options and append it to an argv array
2332  *
2333  * NB: the string is destructively modified!
2334  *
2335  * Since no current POSTGRES arguments require any quoting characters,
2336  * we can use the simple-minded tactic of assuming each set of space-
2337  * delimited characters is a separate argv element.
2338  *
2339  * If you don't like that, well, we *used* to pass the whole option string
2340  * as ONE argument to execl(), which was even less intelligent...
2341  */
2342 static void
2343 split_opts(char **argv, int *argcp, char *s)
2344 {
2345         while (s && *s)
2346         {
2347                 while (isspace((unsigned char) *s))
2348                         ++s;
2349                 if (*s == '\0')
2350                         break;
2351                 argv[(*argcp)++] = s;
2352                 while (*s && !isspace((unsigned char) *s))
2353                         ++s;
2354                 if (*s)
2355                         *s++ = '\0';
2356         }
2357 }
2358
2359
2360 /*
2361  * BackendInit/Run -- perform authentication [BackendInit], and if successful,
2362  *              set up the backend's argument list [BackendRun] and invoke
2363  *              backend main()
2364  *
2365  * returns:
2366  *              Shouldn't return at all.
2367  *              If PostgresMain() fails, return status.
2368  */
2369 static void
2370 BackendInit(Port *port)
2371 {
2372         int                     status;
2373         struct timeval now;
2374         struct timezone tz;
2375         char            remote_host[NI_MAXHOST];
2376         char            remote_port[NI_MAXSERV];
2377
2378         IsUnderPostmaster = true;       /* we are a postmaster subprocess now */
2379
2380         ClientAuthInProgress = true;    /* limit visibility of log messages */
2381
2382         /* We don't want the postmaster's proc_exit() handlers */
2383         on_exit_reset();
2384
2385         /*
2386          * Signal handlers setting is moved to tcop/postgres...
2387          */
2388
2389         /* Save port etc. for ps status */
2390         MyProcPort = port;
2391
2392         /* Reset MyProcPid to new backend's pid */
2393         MyProcPid = getpid();
2394
2395         /*
2396          * Initialize libpq and enable reporting of ereport errors to the
2397          * client. Must do this now because authentication uses libpq to send
2398          * messages.
2399          */
2400         pq_init();                                      /* initialize libpq to talk to client */
2401         whereToSendOutput = Remote; /* now safe to ereport to client */
2402
2403         /*
2404          * We arrange for a simple exit(0) if we receive SIGTERM or SIGQUIT
2405          * during any client authentication related communication. Otherwise
2406          * the postmaster cannot shutdown the database FAST or IMMED cleanly
2407          * if a buggy client blocks a backend during authentication.
2408          */
2409         pqsignal(SIGTERM, authdie);
2410         pqsignal(SIGQUIT, authdie);
2411         pqsignal(SIGALRM, authdie);
2412         PG_SETMASK(&AuthBlockSig);
2413
2414         /*
2415          * Get the remote host name and port for logging and status display.
2416          */
2417         remote_host[0] = '\0';
2418         remote_port[0] = '\0';
2419         if (getnameinfo_all(&port->raddr.addr, port->raddr.salen,
2420                                                 remote_host, sizeof(remote_host),
2421                                                 remote_port, sizeof(remote_port),
2422                                    (log_hostname ? 0 : NI_NUMERICHOST) | NI_NUMERICSERV))
2423         {
2424                 getnameinfo_all(&port->raddr.addr, port->raddr.salen,
2425                                                 remote_host, sizeof(remote_host),
2426                                                 remote_port, sizeof(remote_port),
2427                                                 NI_NUMERICHOST | NI_NUMERICSERV);
2428         }
2429
2430         if (Log_connections)
2431                 ereport(LOG,
2432                                 (errmsg("connection received: host=%s port=%s",
2433                                                 remote_host, remote_port)));
2434
2435         if (LogSourcePort)
2436         {
2437                 /* modify remote_host for use in ps status */
2438                 char            tmphost[NI_MAXHOST];
2439
2440                 snprintf(tmphost, sizeof(tmphost), "%s:%s", remote_host, remote_port);
2441                 StrNCpy(remote_host, tmphost, sizeof(remote_host));
2442         }
2443
2444         /*
2445          * Ready to begin client interaction.  We will give up and exit(0)
2446          * after a time delay, so that a broken client can't hog a connection
2447          * indefinitely.  PreAuthDelay doesn't count against the time limit.
2448          */
2449         if (!enable_sig_alarm(AuthenticationTimeout * 1000, false))
2450                 elog(FATAL, "could not set timer for authorization timeout");
2451
2452         /*
2453          * Receive the startup packet (which might turn out to be a cancel
2454          * request packet).
2455          */
2456         status = ProcessStartupPacket(port, false);
2457
2458         if (status != STATUS_OK)
2459                 proc_exit(0);
2460
2461         /*
2462          * Now that we have the user and database name, we can set the process
2463          * title for ps.  It's good to do this as early as possible in
2464          * startup.
2465          */
2466         init_ps_display(port->user_name, port->database_name, remote_host);
2467         set_ps_display("authentication");
2468
2469         /*
2470          * Now perform authentication exchange.
2471          */
2472         ClientAuthentication(port); /* might not return, if failure */
2473
2474         /*
2475          * Done with authentication.  Disable timeout, and prevent
2476          * SIGTERM/SIGQUIT again until backend startup is complete.
2477          */
2478         if (!disable_sig_alarm(false))
2479                 elog(FATAL, "could not disable timer for authorization timeout");
2480         PG_SETMASK(&BlockSig);
2481
2482         if (Log_connections)
2483                 ereport(LOG,
2484                                 (errmsg("connection authorized: user=%s database=%s",
2485                                                 port->user_name, port->database_name)));
2486
2487         /*
2488          * Don't want backend to be able to see the postmaster random number
2489          * generator state.  We have to clobber the static random_seed *and*
2490          * start a new random sequence in the random() library function.
2491          */
2492         random_seed = 0;
2493         gettimeofday(&now, &tz);
2494         srandom((unsigned int) now.tv_usec);
2495 }
2496
2497
2498 static int
2499 BackendRun(Port *port)
2500 {
2501         char      **av;
2502         int                     maxac;
2503         int                     ac;
2504         char            debugbuf[32];
2505         char            protobuf[32];
2506         int                     i;
2507
2508         /*
2509          * Let's clean up ourselves as the postmaster child, and
2510          * close the postmaster's other sockets
2511          */
2512         ClosePostmasterPorts(true);
2513
2514         /*
2515          * PreAuthDelay is a debugging aid for investigating problems in the
2516          * authentication cycle: it can be set in postgresql.conf to allow
2517          * time to attach to the newly-forked backend with a debugger. (See
2518          * also the -W backend switch, which we allow clients to pass through
2519          * PGOPTIONS, but it is not honored until after authentication.)
2520          */
2521         if (PreAuthDelay > 0)
2522                 sleep(PreAuthDelay);
2523
2524         /* Will exit on failure */
2525         BackendInit(port);
2526
2527
2528         /* ----------------
2529          * Now, build the argv vector that will be given to PostgresMain.
2530          *
2531          * The layout of the command line is
2532          *              postgres [secure switches] -p databasename [insecure switches]
2533          * where the switches after -p come from the client request.
2534          *
2535          * The maximum possible number of commandline arguments that could come
2536          * from ExtraOptions or port->cmdline_options is (strlen + 1) / 2; see
2537          * split_opts().
2538          * ----------------
2539          */
2540         maxac = 10;                                     /* for fixed args supplied below */
2541         maxac += (strlen(ExtraOptions) + 1) / 2;
2542         if (port->cmdline_options)
2543                 maxac += (strlen(port->cmdline_options) + 1) / 2;
2544
2545         av = (char **) MemoryContextAlloc(TopMemoryContext,
2546                                                                           maxac * sizeof(char *));
2547         ac = 0;
2548
2549         av[ac++] = "postgres";
2550
2551         /*
2552          * Pass the requested debugging level along to the backend.
2553          */
2554         if (debug_flag > 0)
2555         {
2556                 snprintf(debugbuf, sizeof(debugbuf), "-d%d", debug_flag);
2557                 av[ac++] = debugbuf;
2558         }
2559
2560         /*
2561          * Pass any backend switches specified with -o in the postmaster's own
2562          * command line.  We assume these are secure.
2563          */
2564         split_opts(av, &ac, ExtraOptions);
2565
2566         /* Tell the backend what protocol the frontend is using. */
2567         snprintf(protobuf, sizeof(protobuf), "-v%u", port->proto);
2568         av[ac++] = protobuf;
2569
2570 #ifdef EXEC_BACKEND
2571         /* pass data dir before end of secure switches (-p) */
2572         av[ac++] = "-D";
2573         av[ac++] = DataDir;
2574 #endif
2575
2576         /*
2577          * Tell the backend it is being called from the postmaster, and which
2578          * database to use.  -p marks the end of secure switches.
2579          */
2580         av[ac++] = "-p";
2581         av[ac++] = port->database_name;
2582
2583         /*
2584          * Pass the (insecure) option switches from the connection request.
2585          * (It's OK to mangle port->cmdline_options now.)
2586          */
2587         if (port->cmdline_options)
2588                 split_opts(av, &ac, port->cmdline_options);
2589
2590         av[ac] = NULL;
2591
2592         Assert(ac < maxac);
2593
2594         /*
2595          * Release postmaster's working memory context so that backend can
2596          * recycle the space.  Note this does not trash *MyProcPort, because
2597          * ConnCreate() allocated that space with malloc() ... else we'd need
2598          * to copy the Port data here.  Also, subsidiary data such as the
2599          * username isn't lost either; see ProcessStartupPacket().
2600          */
2601         MemoryContextSwitchTo(TopMemoryContext);
2602 #ifndef EXEC_BACKEND
2603         MemoryContextDelete(PostmasterContext);
2604 #endif
2605         PostmasterContext = NULL;
2606
2607         /*
2608          * Debug: print arguments being passed to backend
2609          */
2610         ereport(DEBUG3,
2611                         (errmsg_internal("%s child[%d]: starting with (",
2612                                                          progname, getpid())));
2613         for (i = 0; i < ac; ++i)
2614                 ereport(DEBUG3,
2615                                 (errmsg_internal("\t%s", av[i])));
2616         ereport(DEBUG3,
2617                         (errmsg_internal(")")));
2618
2619         ClientAuthInProgress = false;           /* client_min_messages is active
2620                                                                                  * now */
2621
2622         return (PostgresMain(ac, av, port->user_name));
2623 }
2624
2625
2626 #ifdef EXEC_BACKEND
2627
2628
2629 /*
2630  * SubPostmasterMain -- prepare the fork/exec'd process to be in an equivalent
2631  *                      state (for calling BackendRun) as a forked process.
2632  *
2633  * returns:
2634  *              Shouldn't return at all.
2635  */
2636 void
2637 SubPostmasterMain(int argc, char* argv[])
2638 {
2639         unsigned long   backendID;
2640         Port                    port;
2641
2642         memset((void*)&port, 0, sizeof(Port));
2643         Assert(argc == 2);
2644
2645         /* Setup global context */
2646         MemoryContextInit();
2647         InitializeGUCOptions();
2648
2649         /* Parse passed-in context */
2650         argc = 0;
2651         backendID               = (unsigned long)atol(argv[argc++]);
2652         DataDir                 = strdup(argv[argc++]);
2653
2654         /* Read in file-based context */
2655         read_nondefault_variables();
2656         read_backend_variables(backendID,&port);
2657
2658         /* FIXME: [fork/exec] Ugh */
2659         load_hba();
2660         load_ident();
2661         load_user();
2662         load_group();
2663
2664         /* Run backend */
2665         proc_exit(BackendRun(&port));
2666 }
2667
2668
2669 /*
2670  * Backend_forkexec -- fork/exec off a backend process
2671  *
2672  * returns:
2673  *              the pid of the fork/exec'd process
2674  */
2675 static pid_t
2676 Backend_forkexec(Port *port)
2677 {
2678         pid_t pid;
2679         char *av[5];
2680         int ac = 0, bufc = 0, i;
2681         char buf[2][MAXPGPATH];
2682
2683         if (!write_backend_variables(port))
2684                 return -1; /* log made by write_backend_variables */
2685
2686         av[ac++] = "postgres";
2687         av[ac++] = "-forkexec";
2688
2689         /* Format up context to pass to exec'd process */
2690         snprintf(buf[bufc++],MAXPGPATH,"%lu",tmpBackendFileNum);
2691         /* FIXME: [fork/exec] whitespaces in directories? */
2692         snprintf(buf[bufc++],MAXPGPATH,"%s",DataDir);
2693
2694         /* Add to the arg list */
2695         Assert(bufc <= lengthof(buf));
2696         for (i = 0; i < bufc; i++)
2697                 av[ac++] = buf[i];
2698
2699         /* FIXME: [fork/exec] ExtraOptions? */
2700
2701         av[ac++] = NULL;
2702         Assert(ac <= lengthof(av));
2703
2704 #ifdef WIN32
2705         pid = win32_forkexec(pg_pathname,av); /* logs on error */
2706 #else
2707         /* Fire off execv in child */
2708         if ((pid = fork()) == 0 && (execv(pg_pathname,av) == -1))
2709                 /*
2710                  * FIXME: [fork/exec] suggestions for what to do here?
2711                  *  Probably OK to issue error (unlike pgstat case)
2712                  */
2713                 abort();
2714 #endif
2715         return pid; /* Parent returns pid */
2716 }
2717
2718 #endif
2719
2720
2721 /*
2722  * ExitPostmaster -- cleanup
2723  *
2724  * Do NOT call exit() directly --- always go through here!
2725  */
2726 static void
2727 ExitPostmaster(int status)
2728 {
2729         /* should cleanup shared memory and kill all backends */
2730
2731         /*
2732          * Not sure of the semantics here.      When the Postmaster dies, should
2733          * the backends all be killed? probably not.
2734          *
2735          * MUST         -- vadim 05-10-1999
2736          */
2737         /* Should I use true instead? */
2738         ClosePostmasterPorts(false);
2739
2740         proc_exit(status);
2741 }
2742
2743 /*
2744  * sigusr1_handler - handle signal conditions from child processes
2745  */
2746 static void
2747 sigusr1_handler(SIGNAL_ARGS)
2748 {
2749         int                     save_errno = errno;
2750
2751         PG_SETMASK(&BlockSig);
2752
2753         if (CheckPostmasterSignal(PMSIGNAL_DO_CHECKPOINT))
2754         {
2755                 if (CheckPointWarning != 0)
2756                 {
2757                         /*
2758                          * This only times checkpoints forced by running out of
2759                          * segment files.  Other checkpoints could reduce the
2760                          * frequency of forced checkpoints.
2761                          */
2762                         time_t          now = time(NULL);
2763
2764                         if (LastSignalledCheckpoint != 0)
2765                         {
2766                                 int                     elapsed_secs = now - LastSignalledCheckpoint;
2767
2768                                 if (elapsed_secs < CheckPointWarning)
2769                                         ereport(LOG,
2770                                                         (errmsg("checkpoints are occurring too frequently (%d seconds apart)",
2771                                                                         elapsed_secs),
2772                                         errhint("Consider increasing the configuration parameter \"checkpoint_segments\".")));
2773                         }
2774                         LastSignalledCheckpoint = now;
2775                 }
2776
2777                 /*
2778                  * Request to schedule a checkpoint
2779                  *
2780                  * Ignore request if checkpoint is already running or checkpointing
2781                  * is currently disabled
2782                  */
2783                 if (CheckPointPID == 0 && checkpointed &&
2784                         Shutdown == NoShutdown && !FatalError && random_seed != 0)
2785                 {
2786                         CheckPointPID = CheckPointDataBase();
2787                         /* note: if fork fails, CheckPointPID stays 0; nothing happens */
2788                 }
2789         }
2790
2791         if (CheckPostmasterSignal(PMSIGNAL_PASSWORD_CHANGE))
2792         {
2793                 /*
2794                  * Password or group file has changed.
2795                  */
2796                 load_user();
2797                 load_group();
2798         }
2799
2800         if (CheckPostmasterSignal(PMSIGNAL_WAKEN_CHILDREN))
2801         {
2802                 /*
2803                  * Send SIGUSR2 to all children (triggers AsyncNotifyHandler). See
2804                  * storage/ipc/sinvaladt.c for the use of this.
2805                  */
2806                 if (Shutdown == NoShutdown)
2807                         SignalChildren(SIGUSR2);
2808         }
2809
2810         PG_SETMASK(&UnBlockSig);
2811
2812         errno = save_errno;
2813 }
2814
2815
2816 /*
2817  * Dummy signal handler
2818  *
2819  * We use this for signals that we don't actually use in the postmaster,
2820  * but we do use in backends.  If we SIG_IGN such signals in the postmaster,
2821  * then a newly started backend might drop a signal that arrives before it's
2822  * able to reconfigure its signal processing.  (See notes in postgres.c.)
2823  */
2824 static void
2825 dummy_handler(SIGNAL_ARGS)
2826 {
2827 }
2828
2829
2830 /*
2831  * CharRemap: given an int in range 0..61, produce textual encoding of it
2832  * per crypt(3) conventions.
2833  */
2834 static char
2835 CharRemap(long ch)
2836 {
2837         if (ch < 0)
2838                 ch = -ch;
2839         ch = ch % 62;
2840
2841         if (ch < 26)
2842                 return 'A' + ch;
2843
2844         ch -= 26;
2845         if (ch < 26)
2846                 return 'a' + ch;
2847
2848         ch -= 26;
2849         return '0' + ch;
2850 }
2851
2852 /*
2853  * RandomSalt
2854  */
2855 static void
2856 RandomSalt(char *cryptSalt, char *md5Salt)
2857 {
2858         long            rand = PostmasterRandom();
2859
2860         cryptSalt[0] = CharRemap(rand % 62);
2861         cryptSalt[1] = CharRemap(rand / 62);
2862
2863         /*
2864          * It's okay to reuse the first random value for one of the MD5 salt
2865          * bytes, since only one of the two salts will be sent to the client.
2866          * After that we need to compute more random bits.
2867          *
2868          * We use % 255, sacrificing one possible byte value, so as to ensure
2869          * that all bits of the random() value participate in the result.
2870          * While at it, add one to avoid generating any null bytes.
2871          */
2872         md5Salt[0] = (rand % 255) + 1;
2873         rand = PostmasterRandom();
2874         md5Salt[1] = (rand % 255) + 1;
2875         rand = PostmasterRandom();
2876         md5Salt[2] = (rand % 255) + 1;
2877         rand = PostmasterRandom();
2878         md5Salt[3] = (rand % 255) + 1;
2879 }
2880
2881 /*
2882  * PostmasterRandom
2883  */
2884 static long
2885 PostmasterRandom(void)
2886 {
2887         static bool initialized = false;
2888
2889         if (!initialized)
2890         {
2891                 Assert(random_seed != 0);
2892                 srandom(random_seed);
2893                 initialized = true;
2894         }
2895
2896         return random();
2897 }
2898
2899 /*
2900  * Count up number of child processes.
2901  */
2902 static int
2903 CountChildren(void)
2904 {
2905         Dlelem     *curr;
2906         Backend    *bp;
2907         int                     cnt = 0;
2908
2909         for (curr = DLGetHead(BackendList); curr; curr = DLGetSucc(curr))
2910         {
2911                 bp = (Backend *) DLE_VAL(curr);
2912                 if (bp->pid != MyProcPid)
2913                         cnt++;
2914         }
2915         if (CheckPointPID != 0)
2916                 cnt--;
2917         if (BgWriterPID != 0)
2918                 cnt--;
2919         return cnt;
2920 }
2921
2922 /*
2923  * Fire off a subprocess for startup/shutdown/checkpoint.
2924  *
2925  * Return value of SSDataBase is subprocess' PID, or 0 if failed to start subprocess
2926  * (0 is returned only for checkpoint case).
2927  *
2928  * note: in the EXEC_BACKEND case, we delay the fork until argument list has been
2929  *      established
2930  */
2931 NON_EXEC_STATIC void
2932 SSDataBaseInit(int xlop)
2933 {
2934         const char *statmsg;
2935
2936         IsUnderPostmaster = true;               /* we are a postmaster subprocess
2937                                                                          * now */
2938
2939         /* Lose the postmaster's on-exit routines and port connections */
2940         on_exit_reset();
2941
2942         /*
2943          * Identify myself via ps
2944          */
2945         switch (xlop)
2946         {
2947                 case BS_XLOG_STARTUP:
2948                         statmsg = "startup subprocess";
2949                         break;
2950                 case BS_XLOG_CHECKPOINT:
2951                         statmsg = "checkpoint subprocess";
2952                         break;
2953                 case BS_XLOG_BGWRITER:
2954                         statmsg = "bgwriter subprocess";
2955                         break;
2956                 case BS_XLOG_SHUTDOWN:
2957                         statmsg = "shutdown subprocess";
2958                         break;
2959                 default:
2960                         statmsg = "??? subprocess";
2961                         break;
2962         }
2963         init_ps_display(statmsg, "", "");
2964         set_ps_display("");
2965 }
2966
2967
2968 static pid_t
2969 SSDataBase(int xlop)
2970 {
2971         pid_t           pid;
2972         Backend    *bn;
2973 #ifndef EXEC_BACKEND
2974 #ifdef LINUX_PROFILE
2975         struct itimerval prof_itimer;
2976 #endif
2977 #else
2978         char            idbuf[32];
2979 #endif
2980
2981         fflush(stdout);
2982         fflush(stderr);
2983
2984 #ifndef EXEC_BACKEND
2985 #ifdef LINUX_PROFILE
2986         /* see comments in BackendRun */
2987         getitimer(ITIMER_PROF, &prof_itimer);
2988 #endif
2989
2990 #ifdef __BEOS__
2991         /* Specific beos actions before backend startup */
2992         beos_before_backend_startup();
2993 #endif
2994
2995         /* Non EXEC_BACKEND case; fork here */
2996         if ((pid = fork()) == 0)        /* child */
2997 #endif
2998         {
2999                 char       *av[10];
3000                 int                     ac = 0;
3001                 char            nbbuf[32];
3002                 char            xlbuf[32];
3003
3004 #ifndef EXEC_BACKEND
3005 #ifdef LINUX_PROFILE
3006                 setitimer(ITIMER_PROF, &prof_itimer, NULL);
3007 #endif
3008
3009 #ifdef __BEOS__
3010                 /* Specific beos actions after backend startup */
3011                 beos_backend_startup();
3012 #endif
3013
3014                 /* Close the postmaster's sockets */
3015                 ClosePostmasterPorts(true);
3016
3017                 SSDataBaseInit(xlop);
3018 #else
3019                 if (!write_backend_variables(NULL))
3020                         return -1; /* log issued by write_backend_variables */
3021 #endif
3022
3023                 /* Set up command-line arguments for subprocess */
3024                 av[ac++] = "postgres";
3025
3026 #ifdef EXEC_BACKEND
3027                 av[ac++] = "-boot";
3028 #endif
3029                 snprintf(nbbuf, sizeof(nbbuf), "-B%d", NBuffers);
3030                 av[ac++] = nbbuf;
3031
3032                 snprintf(xlbuf, sizeof(xlbuf), "-x%d", xlop);
3033                 av[ac++] = xlbuf;
3034
3035 #ifdef EXEC_BACKEND
3036                 /* pass data dir before end of secure switches (-p) */
3037                 av[ac++] = "-D";
3038                 av[ac++] = DataDir;
3039
3040                 /* and the backend identifier + dbname */
3041                 snprintf(idbuf, sizeof(idbuf), "-p%lu,template1", tmpBackendFileNum);
3042                 av[ac++] = idbuf;
3043 #else
3044                 av[ac++] = "-p";
3045                 av[ac++] = "template1";
3046 #endif
3047
3048                 av[ac] = NULL;
3049
3050                 Assert(ac < lengthof(av));
3051
3052 #ifdef EXEC_BACKEND
3053                 /* EXEC_BACKEND case; fork/exec here */
3054 #ifdef WIN32
3055                 pid = win32_forkexec(pg_pathname,av); /* logs on error */
3056 #else
3057                 if ((pid = fork()) == 0 && (execv(pg_pathname,av) == -1))
3058                 {
3059                         /* in child */
3060                         elog(ERROR,"unable to execv in SSDataBase: %m");
3061                         exit(0);
3062                 }
3063 #endif
3064 #else
3065                 BootstrapMain(ac, av);
3066                 ExitPostmaster(0);
3067 #endif
3068         }
3069
3070         /* in parent */
3071         if (pid < 0)
3072         {
3073 #ifndef EXEC_BACKEND
3074 #ifdef __BEOS__
3075                 /* Specific beos actions before backend startup */
3076                 beos_backend_startup_failed();
3077 #endif
3078 #endif
3079                 switch (xlop)
3080                 {
3081                         case BS_XLOG_STARTUP:
3082                                 ereport(LOG,
3083                                                 (errmsg("could not fork startup process: %m")));
3084                                 break;
3085                         case BS_XLOG_CHECKPOINT:
3086                                 ereport(LOG,
3087                                           (errmsg("could not fork checkpoint process: %m")));
3088                                 break;
3089                         case BS_XLOG_BGWRITER:
3090                                 ereport(LOG,
3091                                           (errmsg("could not fork bgwriter process: %m")));
3092                                 break;
3093                         case BS_XLOG_SHUTDOWN:
3094                                 ereport(LOG,
3095                                                 (errmsg("could not fork shutdown process: %m")));
3096                                 break;
3097                         default:
3098                                 ereport(LOG,
3099                                                 (errmsg("could not fork process: %m")));
3100                                 break;
3101                 }
3102
3103                 /*
3104                  * fork failure is fatal during startup/shutdown, but there's no
3105                  * need to choke if a routine checkpoint or starting a background
3106                  * writer fails.
3107                  */
3108                 if (xlop == BS_XLOG_CHECKPOINT)
3109                         return 0;
3110                 if (xlop == BS_XLOG_BGWRITER)
3111                         return 0;
3112                 ExitPostmaster(1);
3113         }
3114
3115         /*
3116          * The startup and shutdown processes are not considered normal
3117          * backends, but the checkpoint and bgwriter processes are.
3118          * They must be added to the list of backends.
3119          */
3120         if (xlop == BS_XLOG_CHECKPOINT || xlop == BS_XLOG_BGWRITER)
3121         {
3122                 if (!(bn = (Backend *) malloc(sizeof(Backend))))
3123                 {
3124                         ereport(LOG,
3125                                         (errcode(ERRCODE_OUT_OF_MEMORY),
3126                                          errmsg("out of memory")));
3127                         ExitPostmaster(1);
3128                 }
3129
3130                 bn->pid = pid;
3131                 bn->cancel_key = PostmasterRandom();
3132                 DLAddHead(BackendList, DLNewElem(bn));
3133
3134                 /*
3135                  * Since this code is executed periodically, it's a fine place to
3136                  * do other actions that should happen every now and then on no
3137                  * particular schedule.  Such as...
3138                  */
3139                 TouchSocketFile();
3140                 TouchSocketLockFile();
3141         }
3142
3143         return pid;
3144 }
3145
3146
3147 /*
3148  * Create the opts file
3149  */
3150 static bool
3151 CreateOptsFile(int argc, char *argv[])
3152 {
3153         char            fullprogname[MAXPGPATH];
3154         char            filename[MAXPGPATH];
3155         FILE       *fp;
3156         int                     i;
3157
3158         if (FindExec(fullprogname, argv[0], "postmaster") < 0)
3159                 return false;
3160
3161         snprintf(filename, sizeof(filename), "%s/postmaster.opts", DataDir);
3162
3163         if ((fp = fopen(filename, "w")) == NULL)
3164         {
3165                 elog(LOG, "could not create file \"%s\": %m", filename);
3166                 return false;
3167         }
3168
3169         fprintf(fp, "%s", fullprogname);
3170         for (i = 1; i < argc; i++)
3171                 fprintf(fp, " '%s'", argv[i]);
3172         fputs("\n", fp);
3173
3174         fflush(fp);
3175         if (ferror(fp))
3176         {
3177                 elog(LOG, "could not write file \"%s\": %m", filename);
3178                 fclose(fp);
3179                 return false;
3180         }
3181
3182         fclose(fp);
3183         return true;
3184 }
3185
3186 /*
3187  * This should be used only for reporting "interactive" errors (essentially,
3188  * bogus arguments on the command line).  Once the postmaster is launched,
3189  * use ereport.  In particular, don't use this for anything that occurs
3190  * after pmdaemonize.
3191  */
3192 static void
3193 postmaster_error(const char *fmt,...)
3194 {
3195         va_list         ap;
3196
3197         fprintf(stderr, "%s: ", progname);
3198         va_start(ap, fmt);
3199         vfprintf(stderr, gettext(fmt), ap);
3200         va_end(ap);
3201         fprintf(stderr, "\n");
3202 }
3203
3204
3205 #ifdef EXEC_BACKEND
3206
3207 /*
3208  * The following need to be available to the read/write_backend_variables
3209  * functions
3210  */
3211 #include "storage/spin.h"
3212 extern XLogRecPtr RedoRecPtr;
3213 extern XLogwrtResult LogwrtResult;
3214 extern slock_t *ShmemLock;
3215 extern slock_t *ShmemIndexLock;
3216 extern void *ShmemIndexAlloc;
3217 typedef struct LWLock LWLock;
3218 extern LWLock *LWLockArray;
3219 extern slock_t  *ProcStructLock;
3220 extern int      pgStatSock;
3221
3222 #define write_var(var,fp) fwrite((void*)&(var),sizeof(var),1,fp)
3223 #define read_var(var,fp)  fread((void*)&(var),sizeof(var),1,fp)
3224 #define get_tmp_backend_file_name(buf,id)       \
3225                 do {                                                            \
3226                         Assert(DataDir);                                \
3227                         sprintf((buf),                                  \
3228                                 "%s/%s/%s.backend_var.%lu",     \
3229                                 DataDir,                                        \
3230                                 PG_TEMP_FILES_DIR,                      \
3231                                 PG_TEMP_FILE_PREFIX,            \
3232                                 (id));                                          \
3233                 } while (0)
3234
3235 static bool
3236 write_backend_variables(Port *port)
3237 {
3238         char    filename[MAXPGPATH];
3239         FILE    *fp;
3240         get_tmp_backend_file_name(filename,++tmpBackendFileNum);
3241
3242         /* Open file */
3243         fp = AllocateFile(filename, PG_BINARY_W);
3244         if (!fp)
3245         {
3246                 /* As per OpenTemporaryFile... */
3247                 char dirname[MAXPGPATH];
3248                 sprintf(dirname,"%s/%s",DataDir,PG_TEMP_FILES_DIR);
3249                 mkdir(dirname, S_IRWXU);
3250
3251                 fp = AllocateFile(filename, PG_BINARY_W);
3252                 if (!fp)
3253                 {
3254                         ereport(ERROR,
3255                                 (errcode_for_file_access(),
3256                                 errmsg("could not write to file \"%s\": %m", filename)));
3257                         return false;
3258                 }
3259         }
3260
3261         /* Write vars */
3262         if (port)
3263         {
3264                 write_var(port->sock,fp);
3265                 write_var(port->proto,fp);
3266                 write_var(port->laddr,fp);
3267                 write_var(port->raddr,fp);
3268                 write_var(port->canAcceptConnections,fp);
3269                 write_var(port->cryptSalt,fp);
3270                 write_var(port->md5Salt,fp);
3271         }
3272         write_var(MyCancelKey,fp);
3273
3274         write_var(RedoRecPtr,fp);
3275         write_var(LogwrtResult,fp);
3276
3277         write_var(UsedShmemSegID,fp);
3278         write_var(UsedShmemSegAddr,fp);
3279
3280         write_var(ShmemLock,fp);
3281         write_var(ShmemIndexLock,fp);
3282         write_var(ShmemVariableCache,fp);
3283         write_var(ShmemIndexAlloc,fp);
3284
3285         write_var(LWLockArray,fp);
3286         write_var(ProcStructLock,fp);
3287         write_var(pgStatSock,fp);
3288
3289         write_var(PreAuthDelay,fp);
3290         write_var(debug_flag,fp);
3291
3292         /* Release file */
3293         FreeFile(fp);
3294         return true;
3295 }
3296
3297 void
3298 read_backend_variables(unsigned long id, Port *port)
3299 {
3300         char    filename[MAXPGPATH];
3301         FILE    *fp;
3302         get_tmp_backend_file_name(filename,id);
3303
3304         /* Open file */
3305         fp = AllocateFile(filename, PG_BINARY_R);
3306         if (!fp)
3307         {
3308                 ereport(ERROR,
3309                         (errcode_for_file_access(),
3310                         errmsg("could not read from backend_variables file \"%s\": %m", filename)));
3311                 return;
3312         }
3313
3314         /* Read vars */
3315         if (port)
3316         {
3317                 read_var(port->sock,fp);
3318                 read_var(port->proto,fp);
3319                 read_var(port->laddr,fp);
3320                 read_var(port->raddr,fp);
3321                 read_var(port->canAcceptConnections,fp);
3322                 read_var(port->cryptSalt,fp);
3323                 read_var(port->md5Salt,fp);
3324         }
3325         read_var(MyCancelKey,fp);
3326
3327         read_var(RedoRecPtr,fp);
3328         read_var(LogwrtResult,fp);
3329
3330         read_var(UsedShmemSegID,fp);
3331         read_var(UsedShmemSegAddr,fp);
3332
3333         read_var(ShmemLock,fp);
3334         read_var(ShmemIndexLock,fp);
3335         read_var(ShmemVariableCache,fp);
3336         read_var(ShmemIndexAlloc,fp);
3337
3338         read_var(LWLockArray,fp);
3339         read_var(ProcStructLock,fp);
3340         read_var(pgStatSock,fp);
3341
3342         read_var(PreAuthDelay,fp);
3343         read_var(debug_flag,fp);
3344
3345         /* Release file */
3346         FreeFile(fp);
3347         if (unlink(filename) != 0)
3348                 ereport(WARNING,
3349                                 (errcode_for_file_access(),
3350                                  errmsg("could not remove file \"%s\": %m", filename)));
3351 }
3352
3353 #endif
3354
3355 #ifdef WIN32
3356
3357 pid_t win32_forkexec(const char* path, char *argv[])
3358 {
3359         STARTUPINFO si;
3360         PROCESS_INFORMATION pi;
3361         char *p;
3362         int i;
3363         char cmdLine[MAXPGPATH];
3364
3365         /* Format the cmd line */
3366         snprintf(cmdLine,sizeof(cmdLine),"%s",path);
3367         i = 0;
3368         while (argv[++i] != NULL)
3369         {
3370                 /* FIXME: [fork/exec] some strlen checks might be prudent here */
3371                 strcat(cmdLine," ");
3372                 strcat(cmdLine,argv[i]);
3373         }
3374
3375         /*
3376          * The following snippet can disappear when we consistently
3377          * use forward slashes.
3378          */
3379         p = cmdLine;
3380         while (*(p++) != '\0')
3381                 if (*p == '/') *p = '\\';
3382
3383         memset(&pi,0,sizeof(pi));
3384         memset(&si,0,sizeof(si));
3385         si.cb = sizeof(si);
3386         if (!CreateProcess(NULL,cmdLine,NULL,NULL,TRUE,0,NULL,NULL,&si,&pi))
3387         {
3388                 elog(ERROR,"CreateProcess call failed (%d): %m",GetLastError());
3389                 return -1;
3390         }
3391
3392         /*
3393            FIXME: [fork/exec] we might need to keep the following handle/s,
3394            depending on how we implement signalling.
3395         */
3396         CloseHandle(pi.hProcess);
3397         CloseHandle(pi.hThread);
3398
3399         return pi.dwProcessId;
3400 }
3401
3402 #endif