]> granicus.if.org Git - postgresql/blob - src/backend/postmaster/syslogger.c
919cc49fa945f27642f8723fefec486cc6657478
[postgresql] / src / backend / postmaster / syslogger.c
1 /*-------------------------------------------------------------------------
2  *
3  * syslogger.c
4  *
5  * The system logger (syslogger) is new in Postgres 8.0. It catches all
6  * stderr output from the postmaster, backends, and other subprocesses
7  * by redirecting to a pipe, and writes it to a set of logfiles.
8  * It's possible to have size and age limits for the logfile configured
9  * in postgresql.conf. If these limits are reached or passed, the
10  * current logfile is closed and a new one is created (rotated).
11  * The logfiles are stored in a subdirectory (configurable in
12  * postgresql.conf), using a user-selectable naming scheme.
13  *
14  * Author: Andreas Pflug <pgadmin@pse-consulting.de>
15  *
16  * Copyright (c) 2004-2012, PostgreSQL Global Development Group
17  *
18  *
19  * IDENTIFICATION
20  *        src/backend/postmaster/syslogger.c
21  *
22  *-------------------------------------------------------------------------
23  */
24 #include "postgres.h"
25
26 #include <fcntl.h>
27 #include <signal.h>
28 #include <time.h>
29 #include <unistd.h>
30 #include <sys/stat.h>
31 #include <sys/time.h>
32
33 #include "lib/stringinfo.h"
34 #include "libpq/pqsignal.h"
35 #include "miscadmin.h"
36 #include "nodes/pg_list.h"
37 #include "pgtime.h"
38 #include "postmaster/fork_process.h"
39 #include "postmaster/postmaster.h"
40 #include "postmaster/syslogger.h"
41 #include "storage/ipc.h"
42 #include "storage/latch.h"
43 #include "storage/pg_shmem.h"
44 #include "utils/guc.h"
45 #include "utils/ps_status.h"
46 #include "utils/timestamp.h"
47
48 /*
49  * We really want line-buffered mode for logfile output, but Windows does
50  * not have it, and interprets _IOLBF as _IOFBF (bozos).  So use _IONBF
51  * instead on Windows.
52  */
53 #ifdef WIN32
54 #define LBF_MODE        _IONBF
55 #else
56 #define LBF_MODE        _IOLBF
57 #endif
58
59 /*
60  * We read() into a temp buffer twice as big as a chunk, so that any fragment
61  * left after processing can be moved down to the front and we'll still have
62  * room to read a full chunk.
63  */
64 #define READ_BUF_SIZE (2 * PIPE_CHUNK_SIZE)
65
66
67 /*
68  * GUC parameters.      Logging_collector cannot be changed after postmaster
69  * start, but the rest can change at SIGHUP.
70  */
71 bool            Logging_collector = false;
72 int                     Log_RotationAge = HOURS_PER_DAY * MINS_PER_HOUR;
73 int                     Log_RotationSize = 10 * 1024;
74 char       *Log_directory = NULL;
75 char       *Log_filename = NULL;
76 bool            Log_truncate_on_rotation = false;
77 int                     Log_file_mode = S_IRUSR | S_IWUSR;
78
79 /*
80  * Globally visible state (used by elog.c)
81  */
82 bool            am_syslogger = false;
83
84 extern bool redirection_done;
85
86 /*
87  * Private state
88  */
89 static pg_time_t next_rotation_time;
90 static bool pipe_eof_seen = false;
91 static bool rotation_disabled = false;
92 static FILE *syslogFile = NULL;
93 static FILE *csvlogFile = NULL;
94 static char *last_file_name = NULL;
95 static char *last_csv_file_name = NULL;
96 static Latch sysLoggerLatch;
97
98 /*
99  * Buffers for saving partial messages from different backends.
100  *
101  * Keep NBUFFER_LISTS lists of these, with the entry for a given source pid
102  * being in the list numbered (pid % NBUFFER_LISTS), so as to cut down on
103  * the number of entries we have to examine for any one incoming message.
104  * There must never be more than one entry for the same source pid.
105  *
106  * An inactive buffer is not removed from its list, just held for re-use.
107  * An inactive buffer has pid == 0 and undefined contents of data.
108  */
109 typedef struct
110 {
111         int32           pid;                    /* PID of source process */
112         StringInfoData data;            /* accumulated data, as a StringInfo */
113 } save_buffer;
114
115 #define NBUFFER_LISTS 256
116 static List *buffer_lists[NBUFFER_LISTS];
117
118 /* These must be exported for EXEC_BACKEND case ... annoying */
119 #ifndef WIN32
120 int                     syslogPipe[2] = {-1, -1};
121 #else
122 HANDLE          syslogPipe[2] = {0, 0};
123 #endif
124
125 #ifdef WIN32
126 static HANDLE threadHandle = 0;
127 static CRITICAL_SECTION sysloggerSection;
128 #endif
129
130 /*
131  * Flags set by interrupt handlers for later service in the main loop.
132  */
133 static volatile sig_atomic_t got_SIGHUP = false;
134 static volatile sig_atomic_t rotation_requested = false;
135
136
137 /* Local subroutines */
138 #ifdef EXEC_BACKEND
139 static pid_t syslogger_forkexec(void);
140 static void syslogger_parseArgs(int argc, char *argv[]);
141 #endif
142 NON_EXEC_STATIC void SysLoggerMain(int argc, char *argv[]) __attribute__((noreturn));
143 static void process_pipe_input(char *logbuffer, int *bytes_in_logbuffer);
144 static void flush_pipe_input(char *logbuffer, int *bytes_in_logbuffer);
145 static void open_csvlogfile(void);
146 static FILE *logfile_open(const char *filename, const char *mode,
147                          bool allow_errors);
148
149 #ifdef WIN32
150 static unsigned int __stdcall pipeThread(void *arg);
151 #endif
152 static void logfile_rotate(bool time_based_rotation, int size_rotation_for);
153 static char *logfile_getname(pg_time_t timestamp, const char *suffix);
154 static void set_next_rotation_time(void);
155 static void sigHupHandler(SIGNAL_ARGS);
156 static void sigUsr1Handler(SIGNAL_ARGS);
157
158
159 /*
160  * Main entry point for syslogger process
161  * argc/argv parameters are valid only in EXEC_BACKEND case.
162  */
163 NON_EXEC_STATIC void
164 SysLoggerMain(int argc, char *argv[])
165 {
166 #ifndef WIN32
167         char            logbuffer[READ_BUF_SIZE];
168         int                     bytes_in_logbuffer = 0;
169 #endif
170         char       *currentLogDir;
171         char       *currentLogFilename;
172         int                     currentLogRotationAge;
173         pg_time_t       now;
174
175         IsUnderPostmaster = true;       /* we are a postmaster subprocess now */
176
177         MyProcPid = getpid();           /* reset MyProcPid */
178
179         MyStartTime = time(NULL);       /* set our start time in case we call elog */
180         now = MyStartTime;
181
182 #ifdef EXEC_BACKEND
183         syslogger_parseArgs(argc, argv);
184 #endif   /* EXEC_BACKEND */
185
186         am_syslogger = true;
187
188         init_ps_display("logger process", "", "", "");
189
190         /*
191          * If we restarted, our stderr is already redirected into our own input
192          * pipe.  This is of course pretty useless, not to mention that it
193          * interferes with detecting pipe EOF.  Point stderr to /dev/null. This
194          * assumes that all interesting messages generated in the syslogger will
195          * come through elog.c and will be sent to write_syslogger_file.
196          */
197         if (redirection_done)
198         {
199                 int                     fd = open(DEVNULL, O_WRONLY, 0);
200
201                 /*
202                  * The closes might look redundant, but they are not: we want to be
203                  * darn sure the pipe gets closed even if the open failed.      We can
204                  * survive running with stderr pointing nowhere, but we can't afford
205                  * to have extra pipe input descriptors hanging around.
206                  */
207                 close(fileno(stdout));
208                 close(fileno(stderr));
209                 if (fd != -1)
210                 {
211                         dup2(fd, fileno(stdout));
212                         dup2(fd, fileno(stderr));
213                         close(fd);
214                 }
215         }
216
217         /*
218          * Syslogger's own stderr can't be the syslogPipe, so set it back to text
219          * mode if we didn't just close it. (It was set to binary in
220          * SubPostmasterMain).
221          */
222 #ifdef WIN32
223         else
224                 _setmode(_fileno(stderr), _O_TEXT);
225 #endif
226
227         /*
228          * Also close our copy of the write end of the pipe.  This is needed to
229          * ensure we can detect pipe EOF correctly.  (But note that in the restart
230          * case, the postmaster already did this.)
231          */
232 #ifndef WIN32
233         if (syslogPipe[1] >= 0)
234                 close(syslogPipe[1]);
235         syslogPipe[1] = -1;
236 #else
237         if (syslogPipe[1])
238                 CloseHandle(syslogPipe[1]);
239         syslogPipe[1] = 0;
240 #endif
241
242         /*
243          * If possible, make this process a group leader, so that the postmaster
244          * can signal any child processes too.  (syslogger probably never has any
245          * child processes, but for consistency we make all postmaster child
246          * processes do this.)
247          */
248 #ifdef HAVE_SETSID
249         if (setsid() < 0)
250                 elog(FATAL, "setsid() failed: %m");
251 #endif
252
253         /* Initialize private latch for use by signal handlers */
254         InitLatch(&sysLoggerLatch);
255
256         /*
257          * Properly accept or ignore signals the postmaster might send us
258          *
259          * Note: we ignore all termination signals, and instead exit only when all
260          * upstream processes are gone, to ensure we don't miss any dying gasps of
261          * broken backends...
262          */
263
264         pqsignal(SIGHUP, sigHupHandler);        /* set flag to read config file */
265         pqsignal(SIGINT, SIG_IGN);
266         pqsignal(SIGTERM, SIG_IGN);
267         pqsignal(SIGQUIT, SIG_IGN);
268         pqsignal(SIGALRM, SIG_IGN);
269         pqsignal(SIGPIPE, SIG_IGN);
270         pqsignal(SIGUSR1, sigUsr1Handler);      /* request log rotation */
271         pqsignal(SIGUSR2, SIG_IGN);
272
273         /*
274          * Reset some signals that are accepted by postmaster but not here
275          */
276         pqsignal(SIGCHLD, SIG_DFL);
277         pqsignal(SIGTTIN, SIG_DFL);
278         pqsignal(SIGTTOU, SIG_DFL);
279         pqsignal(SIGCONT, SIG_DFL);
280         pqsignal(SIGWINCH, SIG_DFL);
281
282         PG_SETMASK(&UnBlockSig);
283
284 #ifdef WIN32
285         /* Fire up separate data transfer thread */
286         InitializeCriticalSection(&sysloggerSection);
287         EnterCriticalSection(&sysloggerSection);
288
289         threadHandle = (HANDLE) _beginthreadex(NULL, 0, pipeThread, NULL, 0, NULL);
290         if (threadHandle == 0)
291                 elog(FATAL, "could not create syslogger data transfer thread: %m");
292 #endif   /* WIN32 */
293
294         /* remember active logfile parameters */
295         currentLogDir = pstrdup(Log_directory);
296         currentLogFilename = pstrdup(Log_filename);
297         currentLogRotationAge = Log_RotationAge;
298         /* set next planned rotation time */
299         set_next_rotation_time();
300
301         /* main worker loop */
302         for (;;)
303         {
304                 bool            time_based_rotation = false;
305                 int                     size_rotation_for = 0;
306                 long            cur_timeout;
307                 int                     cur_flags;
308
309 #ifndef WIN32
310                 int                     rc;
311 #endif
312
313                 /* Clear any already-pending wakeups */
314                 ResetLatch(&sysLoggerLatch);
315
316                 /*
317                  * Process any requests or signals received recently.
318                  */
319                 if (got_SIGHUP)
320                 {
321                         got_SIGHUP = false;
322                         ProcessConfigFile(PGC_SIGHUP);
323
324                         /*
325                          * Check if the log directory or filename pattern changed in
326                          * postgresql.conf. If so, force rotation to make sure we're
327                          * writing the logfiles in the right place.
328                          */
329                         if (strcmp(Log_directory, currentLogDir) != 0)
330                         {
331                                 pfree(currentLogDir);
332                                 currentLogDir = pstrdup(Log_directory);
333                                 rotation_requested = true;
334
335                                 /*
336                                  * Also, create new directory if not present; ignore errors
337                                  */
338                                 mkdir(Log_directory, S_IRWXU);
339                         }
340                         if (strcmp(Log_filename, currentLogFilename) != 0)
341                         {
342                                 pfree(currentLogFilename);
343                                 currentLogFilename = pstrdup(Log_filename);
344                                 rotation_requested = true;
345                         }
346
347                         /*
348                          * If rotation time parameter changed, reset next rotation time,
349                          * but don't immediately force a rotation.
350                          */
351                         if (currentLogRotationAge != Log_RotationAge)
352                         {
353                                 currentLogRotationAge = Log_RotationAge;
354                                 set_next_rotation_time();
355                         }
356
357                         /*
358                          * If we had a rotation-disabling failure, re-enable rotation
359                          * attempts after SIGHUP, and force one immediately.
360                          */
361                         if (rotation_disabled)
362                         {
363                                 rotation_disabled = false;
364                                 rotation_requested = true;
365                         }
366                 }
367
368                 if (Log_RotationAge > 0 && !rotation_disabled)
369                 {
370                         /* Do a logfile rotation if it's time */
371                         now = (pg_time_t) time(NULL);
372                         if (now >= next_rotation_time)
373                                 rotation_requested = time_based_rotation = true;
374                 }
375
376                 if (!rotation_requested && Log_RotationSize > 0 && !rotation_disabled)
377                 {
378                         /* Do a rotation if file is too big */
379                         if (ftell(syslogFile) >= Log_RotationSize * 1024L)
380                         {
381                                 rotation_requested = true;
382                                 size_rotation_for |= LOG_DESTINATION_STDERR;
383                         }
384                         if (csvlogFile != NULL &&
385                                 ftell(csvlogFile) >= Log_RotationSize * 1024L)
386                         {
387                                 rotation_requested = true;
388                                 size_rotation_for |= LOG_DESTINATION_CSVLOG;
389                         }
390                 }
391
392                 if (rotation_requested)
393                 {
394                         /*
395                          * Force rotation when both values are zero. It means the request
396                          * was sent by pg_rotate_logfile.
397                          */
398                         if (!time_based_rotation && size_rotation_for == 0)
399                                 size_rotation_for = LOG_DESTINATION_STDERR | LOG_DESTINATION_CSVLOG;
400                         logfile_rotate(time_based_rotation, size_rotation_for);
401                 }
402
403                 /*
404                  * Calculate time till next time-based rotation, so that we don't
405                  * sleep longer than that.      We assume the value of "now" obtained
406                  * above is still close enough.  Note we can't make this calculation
407                  * until after calling logfile_rotate(), since it will advance
408                  * next_rotation_time.
409                  */
410                 if (Log_RotationAge > 0 && !rotation_disabled)
411                 {
412                         if (now < next_rotation_time)
413                                 cur_timeout = (next_rotation_time - now) * 1000L;               /* msec */
414                         else
415                                 cur_timeout = 0;
416                         cur_flags = WL_TIMEOUT;
417                 }
418                 else
419                 {
420                         cur_timeout = -1L;
421                         cur_flags = 0;
422                 }
423
424                 /*
425                  * Sleep until there's something to do
426                  */
427 #ifndef WIN32
428                 rc = WaitLatchOrSocket(&sysLoggerLatch,
429                                                            WL_LATCH_SET | WL_SOCKET_READABLE | cur_flags,
430                                                            syslogPipe[0],
431                                                            cur_timeout);
432
433                 if (rc & WL_SOCKET_READABLE)
434                 {
435                         int                     bytesRead;
436
437                         bytesRead = read(syslogPipe[0],
438                                                          logbuffer + bytes_in_logbuffer,
439                                                          sizeof(logbuffer) - bytes_in_logbuffer);
440                         if (bytesRead < 0)
441                         {
442                                 if (errno != EINTR)
443                                         ereport(LOG,
444                                                         (errcode_for_socket_access(),
445                                                          errmsg("could not read from logger pipe: %m")));
446                         }
447                         else if (bytesRead > 0)
448                         {
449                                 bytes_in_logbuffer += bytesRead;
450                                 process_pipe_input(logbuffer, &bytes_in_logbuffer);
451                                 continue;
452                         }
453                         else
454                         {
455                                 /*
456                                  * Zero bytes read when select() is saying read-ready means
457                                  * EOF on the pipe: that is, there are no longer any processes
458                                  * with the pipe write end open.  Therefore, the postmaster
459                                  * and all backends are shut down, and we are done.
460                                  */
461                                 pipe_eof_seen = true;
462
463                                 /* if there's any data left then force it out now */
464                                 flush_pipe_input(logbuffer, &bytes_in_logbuffer);
465                         }
466                 }
467 #else                                                   /* WIN32 */
468
469                 /*
470                  * On Windows we leave it to a separate thread to transfer data and
471                  * detect pipe EOF.  The main thread just wakes up to handle SIGHUP
472                  * and rotation conditions.
473                  *
474                  * Server code isn't generally thread-safe, so we ensure that only one
475                  * of the threads is active at a time by entering the critical section
476                  * whenever we're not sleeping.
477                  */
478                 LeaveCriticalSection(&sysloggerSection);
479
480                 (void) WaitLatch(&sysLoggerLatch,
481                                                  WL_LATCH_SET | cur_flags,
482                                                  cur_timeout);
483
484                 EnterCriticalSection(&sysloggerSection);
485 #endif   /* WIN32 */
486
487                 if (pipe_eof_seen)
488                 {
489                         /*
490                          * seeing this message on the real stderr is annoying - so we make
491                          * it DEBUG1 to suppress in normal use.
492                          */
493                         ereport(DEBUG1,
494                                         (errmsg("logger shutting down")));
495
496                         /*
497                          * Normal exit from the syslogger is here.      Note that we
498                          * deliberately do not close syslogFile before exiting; this is to
499                          * allow for the possibility of elog messages being generated
500                          * inside proc_exit.  Regular exit() will take care of flushing
501                          * and closing stdio channels.
502                          */
503                         proc_exit(0);
504                 }
505         }
506 }
507
508 /*
509  * Postmaster subroutine to start a syslogger subprocess.
510  */
511 int
512 SysLogger_Start(void)
513 {
514         pid_t           sysloggerPid;
515         char       *filename;
516
517         if (!Logging_collector)
518                 return 0;
519
520         /*
521          * If first time through, create the pipe which will receive stderr
522          * output.
523          *
524          * If the syslogger crashes and needs to be restarted, we continue to use
525          * the same pipe (indeed must do so, since extant backends will be writing
526          * into that pipe).
527          *
528          * This means the postmaster must continue to hold the read end of the
529          * pipe open, so we can pass it down to the reincarnated syslogger. This
530          * is a bit klugy but we have little choice.
531          */
532 #ifndef WIN32
533         if (syslogPipe[0] < 0)
534         {
535                 if (pipe(syslogPipe) < 0)
536                         ereport(FATAL,
537                                         (errcode_for_socket_access(),
538                                          (errmsg("could not create pipe for syslog: %m"))));
539         }
540 #else
541         if (!syslogPipe[0])
542         {
543                 SECURITY_ATTRIBUTES sa;
544
545                 memset(&sa, 0, sizeof(SECURITY_ATTRIBUTES));
546                 sa.nLength = sizeof(SECURITY_ATTRIBUTES);
547                 sa.bInheritHandle = TRUE;
548
549                 if (!CreatePipe(&syslogPipe[0], &syslogPipe[1], &sa, 32768))
550                         ereport(FATAL,
551                                         (errcode_for_file_access(),
552                                          (errmsg("could not create pipe for syslog: %m"))));
553         }
554 #endif
555
556         /*
557          * Create log directory if not present; ignore errors
558          */
559         mkdir(Log_directory, S_IRWXU);
560
561         /*
562          * The initial logfile is created right in the postmaster, to verify that
563          * the Log_directory is writable.
564          */
565         filename = logfile_getname(time(NULL), NULL);
566
567         syslogFile = logfile_open(filename, "a", false);
568
569         pfree(filename);
570
571 #ifdef EXEC_BACKEND
572         switch ((sysloggerPid = syslogger_forkexec()))
573 #else
574         switch ((sysloggerPid = fork_process()))
575 #endif
576         {
577                 case -1:
578                         ereport(LOG,
579                                         (errmsg("could not fork system logger: %m")));
580                         return 0;
581
582 #ifndef EXEC_BACKEND
583                 case 0:
584                         /* in postmaster child ... */
585                         /* Close the postmaster's sockets */
586                         ClosePostmasterPorts(true);
587
588                         /* Lose the postmaster's on-exit routines */
589                         on_exit_reset();
590
591                         /* Drop our connection to postmaster's shared memory, as well */
592                         PGSharedMemoryDetach();
593
594                         /* do the work */
595                         SysLoggerMain(0, NULL);
596                         break;
597 #endif
598
599                 default:
600                         /* success, in postmaster */
601
602                         /* now we redirect stderr, if not done already */
603                         if (!redirection_done)
604                         {
605 #ifndef WIN32
606                                 fflush(stdout);
607                                 if (dup2(syslogPipe[1], fileno(stdout)) < 0)
608                                         ereport(FATAL,
609                                                         (errcode_for_file_access(),
610                                                          errmsg("could not redirect stdout: %m")));
611                                 fflush(stderr);
612                                 if (dup2(syslogPipe[1], fileno(stderr)) < 0)
613                                         ereport(FATAL,
614                                                         (errcode_for_file_access(),
615                                                          errmsg("could not redirect stderr: %m")));
616                                 /* Now we are done with the write end of the pipe. */
617                                 close(syslogPipe[1]);
618                                 syslogPipe[1] = -1;
619 #else
620                                 int                     fd;
621
622                                 /*
623                                  * open the pipe in binary mode and make sure stderr is binary
624                                  * after it's been dup'ed into, to avoid disturbing the pipe
625                                  * chunking protocol.
626                                  */
627                                 fflush(stderr);
628                                 fd = _open_osfhandle((intptr_t) syslogPipe[1],
629                                                                          _O_APPEND | _O_BINARY);
630                                 if (dup2(fd, _fileno(stderr)) < 0)
631                                         ereport(FATAL,
632                                                         (errcode_for_file_access(),
633                                                          errmsg("could not redirect stderr: %m")));
634                                 close(fd);
635                                 _setmode(_fileno(stderr), _O_BINARY);
636
637                                 /*
638                                  * Now we are done with the write end of the pipe.
639                                  * CloseHandle() must not be called because the preceding
640                                  * close() closes the underlying handle.
641                                  */
642                                 syslogPipe[1] = 0;
643 #endif
644                                 redirection_done = true;
645                         }
646
647                         /* postmaster will never write the file; close it */
648                         fclose(syslogFile);
649                         syslogFile = NULL;
650                         return (int) sysloggerPid;
651         }
652
653         /* we should never reach here */
654         return 0;
655 }
656
657
658 #ifdef EXEC_BACKEND
659
660 /*
661  * syslogger_forkexec() -
662  *
663  * Format up the arglist for, then fork and exec, a syslogger process
664  */
665 static pid_t
666 syslogger_forkexec(void)
667 {
668         char       *av[10];
669         int                     ac = 0;
670         char            filenobuf[32];
671
672         av[ac++] = "postgres";
673         av[ac++] = "--forklog";
674         av[ac++] = NULL;                        /* filled in by postmaster_forkexec */
675
676         /* static variables (those not passed by write_backend_variables) */
677 #ifndef WIN32
678         if (syslogFile != NULL)
679                 snprintf(filenobuf, sizeof(filenobuf), "%d",
680                                  fileno(syslogFile));
681         else
682                 strcpy(filenobuf, "-1");
683 #else                                                   /* WIN32 */
684         if (syslogFile != NULL)
685                 snprintf(filenobuf, sizeof(filenobuf), "%ld",
686                                  (long) _get_osfhandle(_fileno(syslogFile)));
687         else
688                 strcpy(filenobuf, "0");
689 #endif   /* WIN32 */
690         av[ac++] = filenobuf;
691
692         av[ac] = NULL;
693         Assert(ac < lengthof(av));
694
695         return postmaster_forkexec(ac, av);
696 }
697
698 /*
699  * syslogger_parseArgs() -
700  *
701  * Extract data from the arglist for exec'ed syslogger process
702  */
703 static void
704 syslogger_parseArgs(int argc, char *argv[])
705 {
706         int                     fd;
707
708         Assert(argc == 4);
709         argv += 3;
710
711 #ifndef WIN32
712         fd = atoi(*argv++);
713         if (fd != -1)
714         {
715                 syslogFile = fdopen(fd, "a");
716                 setvbuf(syslogFile, NULL, LBF_MODE, 0);
717         }
718 #else                                                   /* WIN32 */
719         fd = atoi(*argv++);
720         if (fd != 0)
721         {
722                 fd = _open_osfhandle(fd, _O_APPEND | _O_TEXT);
723                 if (fd > 0)
724                 {
725                         syslogFile = fdopen(fd, "a");
726                         setvbuf(syslogFile, NULL, LBF_MODE, 0);
727                 }
728         }
729 #endif   /* WIN32 */
730 }
731 #endif   /* EXEC_BACKEND */
732
733
734 /* --------------------------------
735  *              pipe protocol handling
736  * --------------------------------
737  */
738
739 /*
740  * Process data received through the syslogger pipe.
741  *
742  * This routine interprets the log pipe protocol which sends log messages as
743  * (hopefully atomic) chunks - such chunks are detected and reassembled here.
744  *
745  * The protocol has a header that starts with two nul bytes, then has a 16 bit
746  * length, the pid of the sending process, and a flag to indicate if it is
747  * the last chunk in a message. Incomplete chunks are saved until we read some
748  * more, and non-final chunks are accumulated until we get the final chunk.
749  *
750  * All of this is to avoid 2 problems:
751  * . partial messages being written to logfiles (messes rotation), and
752  * . messages from different backends being interleaved (messages garbled).
753  *
754  * Any non-protocol messages are written out directly. These should only come
755  * from non-PostgreSQL sources, however (e.g. third party libraries writing to
756  * stderr).
757  *
758  * logbuffer is the data input buffer, and *bytes_in_logbuffer is the number
759  * of bytes present.  On exit, any not-yet-eaten data is left-justified in
760  * logbuffer, and *bytes_in_logbuffer is updated.
761  */
762 static void
763 process_pipe_input(char *logbuffer, int *bytes_in_logbuffer)
764 {
765         char       *cursor = logbuffer;
766         int                     count = *bytes_in_logbuffer;
767         int                     dest = LOG_DESTINATION_STDERR;
768
769         /* While we have enough for a header, process data... */
770         while (count >= (int) sizeof(PipeProtoHeader))
771         {
772                 PipeProtoHeader p;
773                 int                     chunklen;
774
775                 /* Do we have a valid header? */
776                 memcpy(&p, cursor, sizeof(PipeProtoHeader));
777                 if (p.nuls[0] == '\0' && p.nuls[1] == '\0' &&
778                         p.len > 0 && p.len <= PIPE_MAX_PAYLOAD &&
779                         p.pid != 0 &&
780                         (p.is_last == 't' || p.is_last == 'f' ||
781                          p.is_last == 'T' || p.is_last == 'F'))
782                 {
783                         List       *buffer_list;
784                         ListCell   *cell;
785                         save_buffer *existing_slot = NULL,
786                                            *free_slot = NULL;
787                         StringInfo      str;
788
789                         chunklen = PIPE_HEADER_SIZE + p.len;
790
791                         /* Fall out of loop if we don't have the whole chunk yet */
792                         if (count < chunklen)
793                                 break;
794
795                         dest = (p.is_last == 'T' || p.is_last == 'F') ?
796                                 LOG_DESTINATION_CSVLOG : LOG_DESTINATION_STDERR;
797
798                         /* Locate any existing buffer for this source pid */
799                         buffer_list = buffer_lists[p.pid % NBUFFER_LISTS];
800                         foreach(cell, buffer_list)
801                         {
802                                 save_buffer *buf = (save_buffer *) lfirst(cell);
803
804                                 if (buf->pid == p.pid)
805                                 {
806                                         existing_slot = buf;
807                                         break;
808                                 }
809                                 if (buf->pid == 0 && free_slot == NULL)
810                                         free_slot = buf;
811                         }
812
813                         if (p.is_last == 'f' || p.is_last == 'F')
814                         {
815                                 /*
816                                  * Save a complete non-final chunk in a per-pid buffer
817                                  */
818                                 if (existing_slot != NULL)
819                                 {
820                                         /* Add chunk to data from preceding chunks */
821                                         str = &(existing_slot->data);
822                                         appendBinaryStringInfo(str,
823                                                                                    cursor + PIPE_HEADER_SIZE,
824                                                                                    p.len);
825                                 }
826                                 else
827                                 {
828                                         /* First chunk of message, save in a new buffer */
829                                         if (free_slot == NULL)
830                                         {
831                                                 /*
832                                                  * Need a free slot, but there isn't one in the list,
833                                                  * so create a new one and extend the list with it.
834                                                  */
835                                                 free_slot = palloc(sizeof(save_buffer));
836                                                 buffer_list = lappend(buffer_list, free_slot);
837                                                 buffer_lists[p.pid % NBUFFER_LISTS] = buffer_list;
838                                         }
839                                         free_slot->pid = p.pid;
840                                         str = &(free_slot->data);
841                                         initStringInfo(str);
842                                         appendBinaryStringInfo(str,
843                                                                                    cursor + PIPE_HEADER_SIZE,
844                                                                                    p.len);
845                                 }
846                         }
847                         else
848                         {
849                                 /*
850                                  * Final chunk --- add it to anything saved for that pid, and
851                                  * either way write the whole thing out.
852                                  */
853                                 if (existing_slot != NULL)
854                                 {
855                                         str = &(existing_slot->data);
856                                         appendBinaryStringInfo(str,
857                                                                                    cursor + PIPE_HEADER_SIZE,
858                                                                                    p.len);
859                                         write_syslogger_file(str->data, str->len, dest);
860                                         /* Mark the buffer unused, and reclaim string storage */
861                                         existing_slot->pid = 0;
862                                         pfree(str->data);
863                                 }
864                                 else
865                                 {
866                                         /* The whole message was one chunk, evidently. */
867                                         write_syslogger_file(cursor + PIPE_HEADER_SIZE, p.len,
868                                                                                  dest);
869                                 }
870                         }
871
872                         /* Finished processing this chunk */
873                         cursor += chunklen;
874                         count -= chunklen;
875                 }
876                 else
877                 {
878                         /* Process non-protocol data */
879
880                         /*
881                          * Look for the start of a protocol header.  If found, dump data
882                          * up to there and repeat the loop.  Otherwise, dump it all and
883                          * fall out of the loop.  (Note: we want to dump it all if at all
884                          * possible, so as to avoid dividing non-protocol messages across
885                          * logfiles.  We expect that in many scenarios, a non-protocol
886                          * message will arrive all in one read(), and we want to respect
887                          * the read() boundary if possible.)
888                          */
889                         for (chunklen = 1; chunklen < count; chunklen++)
890                         {
891                                 if (cursor[chunklen] == '\0')
892                                         break;
893                         }
894                         /* fall back on the stderr log as the destination */
895                         write_syslogger_file(cursor, chunklen, LOG_DESTINATION_STDERR);
896                         cursor += chunklen;
897                         count -= chunklen;
898                 }
899         }
900
901         /* We don't have a full chunk, so left-align what remains in the buffer */
902         if (count > 0 && cursor != logbuffer)
903                 memmove(logbuffer, cursor, count);
904         *bytes_in_logbuffer = count;
905 }
906
907 /*
908  * Force out any buffered data
909  *
910  * This is currently used only at syslogger shutdown, but could perhaps be
911  * useful at other times, so it is careful to leave things in a clean state.
912  */
913 static void
914 flush_pipe_input(char *logbuffer, int *bytes_in_logbuffer)
915 {
916         int                     i;
917
918         /* Dump any incomplete protocol messages */
919         for (i = 0; i < NBUFFER_LISTS; i++)
920         {
921                 List       *list = buffer_lists[i];
922                 ListCell   *cell;
923
924                 foreach(cell, list)
925                 {
926                         save_buffer *buf = (save_buffer *) lfirst(cell);
927
928                         if (buf->pid != 0)
929                         {
930                                 StringInfo      str = &(buf->data);
931
932                                 write_syslogger_file(str->data, str->len,
933                                                                          LOG_DESTINATION_STDERR);
934                                 /* Mark the buffer unused, and reclaim string storage */
935                                 buf->pid = 0;
936                                 pfree(str->data);
937                         }
938                 }
939         }
940
941         /*
942          * Force out any remaining pipe data as-is; we don't bother trying to
943          * remove any protocol headers that may exist in it.
944          */
945         if (*bytes_in_logbuffer > 0)
946                 write_syslogger_file(logbuffer, *bytes_in_logbuffer,
947                                                          LOG_DESTINATION_STDERR);
948         *bytes_in_logbuffer = 0;
949 }
950
951
952 /* --------------------------------
953  *              logfile routines
954  * --------------------------------
955  */
956
957 /*
958  * Write text to the currently open logfile
959  *
960  * This is exported so that elog.c can call it when am_syslogger is true.
961  * This allows the syslogger process to record elog messages of its own,
962  * even though its stderr does not point at the syslog pipe.
963  */
964 void
965 write_syslogger_file(const char *buffer, int count, int destination)
966 {
967         int                     rc;
968         FILE       *logfile;
969
970         if (destination == LOG_DESTINATION_CSVLOG && csvlogFile == NULL)
971                 open_csvlogfile();
972
973         logfile = destination == LOG_DESTINATION_CSVLOG ? csvlogFile : syslogFile;
974         rc = fwrite(buffer, 1, count, logfile);
975
976         /* can't use ereport here because of possible recursion */
977         if (rc != count)
978                 write_stderr("could not write to log file: %s\n", strerror(errno));
979 }
980
981 #ifdef WIN32
982
983 /*
984  * Worker thread to transfer data from the pipe to the current logfile.
985  *
986  * We need this because on Windows, WaitforMultipleObjects does not work on
987  * unnamed pipes: it always reports "signaled", so the blocking ReadFile won't
988  * allow for SIGHUP; and select is for sockets only.
989  */
990 static unsigned int __stdcall
991 pipeThread(void *arg)
992 {
993         char            logbuffer[READ_BUF_SIZE];
994         int                     bytes_in_logbuffer = 0;
995
996         for (;;)
997         {
998                 DWORD           bytesRead;
999                 BOOL            result;
1000
1001                 result = ReadFile(syslogPipe[0],
1002                                                   logbuffer + bytes_in_logbuffer,
1003                                                   sizeof(logbuffer) - bytes_in_logbuffer,
1004                                                   &bytesRead, 0);
1005
1006                 /*
1007                  * Enter critical section before doing anything that might touch
1008                  * global state shared by the main thread. Anything that uses
1009                  * palloc()/pfree() in particular are not safe outside the critical
1010                  * section.
1011                  */
1012                 EnterCriticalSection(&sysloggerSection);
1013                 if (!result)
1014                 {
1015                         DWORD           error = GetLastError();
1016
1017                         if (error == ERROR_HANDLE_EOF ||
1018                                 error == ERROR_BROKEN_PIPE)
1019                                 break;
1020                         _dosmaperr(error);
1021                         ereport(LOG,
1022                                         (errcode_for_file_access(),
1023                                          errmsg("could not read from logger pipe: %m")));
1024                 }
1025                 else if (bytesRead > 0)
1026                 {
1027                         bytes_in_logbuffer += bytesRead;
1028                         process_pipe_input(logbuffer, &bytes_in_logbuffer);
1029                 }
1030                 LeaveCriticalSection(&sysloggerSection);
1031         }
1032
1033         /* We exit the above loop only upon detecting pipe EOF */
1034         pipe_eof_seen = true;
1035
1036         /* if there's any data left then force it out now */
1037         flush_pipe_input(logbuffer, &bytes_in_logbuffer);
1038
1039         /* set the latch to waken the main thread, which will quit */
1040         SetLatch(&sysLoggerLatch);
1041
1042         LeaveCriticalSection(&sysloggerSection);
1043         _endthread();
1044         return 0;
1045 }
1046 #endif   /* WIN32 */
1047
1048 /*
1049  * open the csv log file - we do this opportunistically, because
1050  * we don't know if CSV logging will be wanted.
1051  */
1052 static void
1053 open_csvlogfile(void)
1054 {
1055         char       *filename;
1056
1057         filename = logfile_getname(time(NULL), ".csv");
1058
1059         csvlogFile = logfile_open(filename, "a", false);
1060
1061         pfree(filename);
1062 }
1063
1064 /*
1065  * Open a new logfile with proper permissions and buffering options.
1066  *
1067  * If allow_errors is true, we just log any open failure and return NULL
1068  * (with errno still correct for the fopen failure).
1069  * Otherwise, errors are treated as fatal.
1070  */
1071 static FILE *
1072 logfile_open(const char *filename, const char *mode, bool allow_errors)
1073 {
1074         FILE       *fh;
1075         mode_t          oumask;
1076
1077         /*
1078          * Note we do not let Log_file_mode disable IWUSR, since we certainly want
1079          * to be able to write the files ourselves.
1080          */
1081         oumask = umask((mode_t) ((~(Log_file_mode | S_IWUSR)) & (S_IRWXU | S_IRWXG | S_IRWXO)));
1082         fh = fopen(filename, mode);
1083         umask(oumask);
1084
1085         if (fh)
1086         {
1087                 setvbuf(fh, NULL, LBF_MODE, 0);
1088
1089 #ifdef WIN32
1090                 /* use CRLF line endings on Windows */
1091                 _setmode(_fileno(fh), _O_TEXT);
1092 #endif
1093         }
1094         else
1095         {
1096                 int                     save_errno = errno;
1097
1098                 ereport(allow_errors ? LOG : FATAL,
1099                                 (errcode_for_file_access(),
1100                                  errmsg("could not open log file \"%s\": %m",
1101                                                 filename)));
1102                 errno = save_errno;
1103         }
1104
1105         return fh;
1106 }
1107
1108 /*
1109  * perform logfile rotation
1110  */
1111 static void
1112 logfile_rotate(bool time_based_rotation, int size_rotation_for)
1113 {
1114         char       *filename;
1115         char       *csvfilename = NULL;
1116         pg_time_t       fntime;
1117         FILE       *fh;
1118
1119         rotation_requested = false;
1120
1121         /*
1122          * When doing a time-based rotation, invent the new logfile name based on
1123          * the planned rotation time, not current time, to avoid "slippage" in the
1124          * file name when we don't do the rotation immediately.
1125          */
1126         if (time_based_rotation)
1127                 fntime = next_rotation_time;
1128         else
1129                 fntime = time(NULL);
1130         filename = logfile_getname(fntime, NULL);
1131         if (csvlogFile != NULL)
1132                 csvfilename = logfile_getname(fntime, ".csv");
1133
1134         /*
1135          * Decide whether to overwrite or append.  We can overwrite if (a)
1136          * Log_truncate_on_rotation is set, (b) the rotation was triggered by
1137          * elapsed time and not something else, and (c) the computed file name is
1138          * different from what we were previously logging into.
1139          *
1140          * Note: during the first rotation after forking off from the postmaster,
1141          * last_file_name will be NULL.  (We don't bother to set it in the
1142          * postmaster because it ain't gonna work in the EXEC_BACKEND case.) So we
1143          * will always append in that situation, even though truncating would
1144          * usually be safe.
1145          *
1146          * For consistency, we treat CSV logs the same even though they aren't
1147          * opened in the postmaster.
1148          */
1149         if (time_based_rotation || (size_rotation_for & LOG_DESTINATION_STDERR))
1150         {
1151                 if (Log_truncate_on_rotation && time_based_rotation &&
1152                         last_file_name != NULL &&
1153                         strcmp(filename, last_file_name) != 0)
1154                         fh = logfile_open(filename, "w", true);
1155                 else
1156                         fh = logfile_open(filename, "a", true);
1157
1158                 if (!fh)
1159                 {
1160                         /*
1161                          * ENFILE/EMFILE are not too surprising on a busy system; just
1162                          * keep using the old file till we manage to get a new one.
1163                          * Otherwise, assume something's wrong with Log_directory and stop
1164                          * trying to create files.
1165                          */
1166                         if (errno != ENFILE && errno != EMFILE)
1167                         {
1168                                 ereport(LOG,
1169                                                 (errmsg("disabling automatic rotation (use SIGHUP to re-enable)")));
1170                                 rotation_disabled = true;
1171                         }
1172
1173                         if (filename)
1174                                 pfree(filename);
1175                         if (csvfilename)
1176                                 pfree(csvfilename);
1177                         return;
1178                 }
1179
1180                 fclose(syslogFile);
1181                 syslogFile = fh;
1182
1183                 /* instead of pfree'ing filename, remember it for next time */
1184                 if (last_file_name != NULL)
1185                         pfree(last_file_name);
1186                 last_file_name = filename;
1187                 filename = NULL;
1188         }
1189
1190         /* Same as above, but for csv file. */
1191
1192         if (csvlogFile != NULL &&
1193                 (time_based_rotation || (size_rotation_for & LOG_DESTINATION_CSVLOG)))
1194         {
1195                 if (Log_truncate_on_rotation && time_based_rotation &&
1196                         last_csv_file_name != NULL &&
1197                         strcmp(csvfilename, last_csv_file_name) != 0)
1198                         fh = logfile_open(csvfilename, "w", true);
1199                 else
1200                         fh = logfile_open(csvfilename, "a", true);
1201
1202                 if (!fh)
1203                 {
1204                         /*
1205                          * ENFILE/EMFILE are not too surprising on a busy system; just
1206                          * keep using the old file till we manage to get a new one.
1207                          * Otherwise, assume something's wrong with Log_directory and stop
1208                          * trying to create files.
1209                          */
1210                         if (errno != ENFILE && errno != EMFILE)
1211                         {
1212                                 ereport(LOG,
1213                                                 (errmsg("disabling automatic rotation (use SIGHUP to re-enable)")));
1214                                 rotation_disabled = true;
1215                         }
1216
1217                         if (filename)
1218                                 pfree(filename);
1219                         if (csvfilename)
1220                                 pfree(csvfilename);
1221                         return;
1222                 }
1223
1224                 fclose(csvlogFile);
1225                 csvlogFile = fh;
1226
1227                 /* instead of pfree'ing filename, remember it for next time */
1228                 if (last_csv_file_name != NULL)
1229                         pfree(last_csv_file_name);
1230                 last_csv_file_name = csvfilename;
1231                 csvfilename = NULL;
1232         }
1233
1234         if (filename)
1235                 pfree(filename);
1236         if (csvfilename)
1237                 pfree(csvfilename);
1238
1239         set_next_rotation_time();
1240 }
1241
1242
1243 /*
1244  * construct logfile name using timestamp information
1245  *
1246  * If suffix isn't NULL, append it to the name, replacing any ".log"
1247  * that may be in the pattern.
1248  *
1249  * Result is palloc'd.
1250  */
1251 static char *
1252 logfile_getname(pg_time_t timestamp, const char *suffix)
1253 {
1254         char       *filename;
1255         int                     len;
1256
1257         filename = palloc(MAXPGPATH);
1258
1259         snprintf(filename, MAXPGPATH, "%s/", Log_directory);
1260
1261         len = strlen(filename);
1262
1263         /* treat Log_filename as a strftime pattern */
1264         pg_strftime(filename + len, MAXPGPATH - len, Log_filename,
1265                                 pg_localtime(&timestamp, log_timezone));
1266
1267         if (suffix != NULL)
1268         {
1269                 len = strlen(filename);
1270                 if (len > 4 && (strcmp(filename + (len - 4), ".log") == 0))
1271                         len -= 4;
1272                 strlcpy(filename + len, suffix, MAXPGPATH - len);
1273         }
1274
1275         return filename;
1276 }
1277
1278 /*
1279  * Determine the next planned rotation time, and store in next_rotation_time.
1280  */
1281 static void
1282 set_next_rotation_time(void)
1283 {
1284         pg_time_t       now;
1285         struct pg_tm *tm;
1286         int                     rotinterval;
1287
1288         /* nothing to do if time-based rotation is disabled */
1289         if (Log_RotationAge <= 0)
1290                 return;
1291
1292         /*
1293          * The requirements here are to choose the next time > now that is a
1294          * "multiple" of the log rotation interval.  "Multiple" can be interpreted
1295          * fairly loosely.      In this version we align to log_timezone rather than
1296          * GMT.
1297          */
1298         rotinterval = Log_RotationAge * SECS_PER_MINUTE;        /* convert to seconds */
1299         now = (pg_time_t) time(NULL);
1300         tm = pg_localtime(&now, log_timezone);
1301         now += tm->tm_gmtoff;
1302         now -= now % rotinterval;
1303         now += rotinterval;
1304         now -= tm->tm_gmtoff;
1305         next_rotation_time = now;
1306 }
1307
1308 /* --------------------------------
1309  *              signal handler routines
1310  * --------------------------------
1311  */
1312
1313 /* SIGHUP: set flag to reload config file */
1314 static void
1315 sigHupHandler(SIGNAL_ARGS)
1316 {
1317         int                     save_errno = errno;
1318
1319         got_SIGHUP = true;
1320         SetLatch(&sysLoggerLatch);
1321
1322         errno = save_errno;
1323 }
1324
1325 /* SIGUSR1: set flag to rotate logfile */
1326 static void
1327 sigUsr1Handler(SIGNAL_ARGS)
1328 {
1329         int                     save_errno = errno;
1330
1331         rotation_requested = true;
1332         SetLatch(&sysLoggerLatch);
1333
1334         errno = save_errno;
1335 }