]> granicus.if.org Git - postgresql/commitdiff
Change the logic to decide when to delete old WAL segments, so that it
authorHeikki Linnakangas <heikki.linnakangas@iki.fi>
Mon, 12 Apr 2010 09:52:29 +0000 (09:52 +0000)
committerHeikki Linnakangas <heikki.linnakangas@iki.fi>
Mon, 12 Apr 2010 09:52:29 +0000 (09:52 +0000)
doesn't take into account how far the WAL senders are. This way a hung
WAL sender doesn't prevent old WAL segments from being recycled/removed
in the primary, ultimately causing the disk to fill up. Instead add
standby_keep_segments setting to control how many old WAL segments are
kept in the primary. This also makes it more reliable to use streaming
replication without WAL archiving, assuming that you set
standby_keep_segments high enough.

doc/src/sgml/config.sgml
doc/src/sgml/high-availability.sgml
src/backend/access/transam/xlog.c
src/backend/replication/walsender.c
src/backend/utils/misc/guc.c
src/backend/utils/misc/postgresql.conf.sample
src/include/access/xlog.h

index 81b0ba34457509b1e857fda5744eea0c995ba1a7..0ca5e402e0c42183973ecf4c650322dc403ab603 100644 (file)
@@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/config.sgml,v 1.262 2010/04/03 07:22:53 petere Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/config.sgml,v 1.263 2010/04/12 09:52:29 heikki Exp $ -->
 
 <chapter Id="runtime-config">
   <title>Server Configuration</title>
@@ -1823,6 +1823,34 @@ archive_command = 'copy "%p" "C:\\server\\archivedir\\%f"'  # Windows
        </para>
        </listitem>
       </varlistentry>
+
+      <varlistentry id="guc-standby-keep-segments" xreflabel="standby_keep_segments">
+       <term><varname>standby_keep_segments</varname> (<type>integer</type>)</term>
+       <indexterm>
+        <primary><varname>standby_keep_segments</> configuration parameter</primary>
+       </indexterm>
+       <listitem>
+       <para>
+        Specifies the number of log file segments kept in <filename>pg_xlog</>
+        directory, in case a standby server needs to fetch them via streaming
+        replciation. Each segment is normally 16 megabytes. If a standby
+        server connected to the primary falls behind more than
+        <varname>standby_keep_segments</> segments, the primary might remove
+        a WAL segment still needed by the standby and the replication
+        connection will be terminated.
+
+        This sets only the minimum number of segments retained for standby
+        purposes, the system might need to retain more segments for WAL
+        archival or to recover from a checkpoint. If <varname>standby_keep_segments</>
+        is zero (the default), the system doesn't keep any extra segments
+        for standby purposes, and the number of old WAL segments available
+        for standbys is determined based only on the location of the previous
+        checkpoint and status of WAL archival.
+        This parameter can only be set in the <filename>postgresql.conf</>
+        file or on the server command line.
+       </para>
+       </listitem>
+      </varlistentry>
      </variablelist>
     </sect2>
     <sect2 id="runtime-config-standby">
index 13b783bc8642315cf383ba35f48aa74d7ebddf99..cff0339b523562ab7ca0aa8ca55d8672bd24e3c1 100644 (file)
@@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/high-availability.sgml,v 1.58 2010/04/03 07:22:54 petere Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/high-availability.sgml,v 1.59 2010/04/12 09:52:29 heikki Exp $ -->
 
 <chapter id="high-availability">
  <title>High Availability, Load Balancing, and Replication</title>
@@ -732,7 +732,12 @@ trigger_file = '/path/to/trigger_file'
     Streaming replication relies on file-based continuous archiving for
     making the base backup and for allowing the standby to catch up if it is
     disconnected from the primary for long enough for the primary to
-    delete old WAL files still required by the standby.
+    delete old WAL files still required by the standby. It is possible
+    to use streaming replication without WAL archiving, but if a standby
+    falls behind too much, the primary will delete old WAL files still
+    needed by the standby, and the standby will have to be manually restored
+    from a base backup. You can control how long the primary retains old WAL
+    segments using the <varname>standby_keep_segments</> setting.
    </para>
 
    <para>
index 12392f8cfc010eab77a0db5e1af884b9b6c301ab..c5b7f7a98ceaf4c0411ce0dcd00b30159eb15e5b 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.391 2010/04/07 10:58:49 heikki Exp $
+ * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.392 2010/04/12 09:52:29 heikki Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -66,6 +66,7 @@
 
 /* User-settable parameters */
 int                    CheckPointSegments = 3;
+int                    StandbySegments = 0;
 int                    XLOGbuffers = 8;
 int                    XLogArchiveTimeout = 0;
 bool           XLogArchiveMode = false;
@@ -356,6 +357,8 @@ typedef struct XLogCtlData
        uint32          ckptXidEpoch;   /* nextXID & epoch of latest checkpoint */
        TransactionId ckptXid;
        XLogRecPtr      asyncCommitLSN; /* LSN of newest async commit */
+       uint32          lastRemovedLog; /* latest removed/recycled XLOG segment */
+       uint32          lastRemovedSeg;
 
        /* Protected by WALWriteLock: */
        XLogCtlWrite Write;
@@ -3149,6 +3152,22 @@ PreallocXlogFiles(XLogRecPtr endptr)
        }
 }
 
+/*
+ * Get the log/seg of the latest removed or recycled WAL segment.
+ * Returns 0 if no WAL segments have been removed since startup.
+ */
+void
+XLogGetLastRemoved(uint32 *log, uint32 *seg)
+{
+       /* use volatile pointer to prevent code rearrangement */
+       volatile XLogCtlData *xlogctl = XLogCtl;
+
+       SpinLockAcquire(&xlogctl->info_lck);
+       *log = xlogctl->lastRemovedLog;
+       *seg = xlogctl->lastRemovedSeg;
+       SpinLockRelease(&xlogctl->info_lck);
+}
+
 /*
  * Recycle or remove all log files older or equal to passed log/seg#
  *
@@ -3170,6 +3189,20 @@ RemoveOldXlogFiles(uint32 log, uint32 seg, XLogRecPtr endptr)
        char            newpath[MAXPGPATH];
 #endif
        struct stat statbuf;
+       /* use volatile pointer to prevent code rearrangement */
+       volatile XLogCtlData *xlogctl = XLogCtl;
+
+       /* Update the last removed location in shared memory first */
+       SpinLockAcquire(&xlogctl->info_lck);
+       if (log > xlogctl->lastRemovedLog ||
+               (log == xlogctl->lastRemovedLog && seg > xlogctl->lastRemovedSeg))
+       {
+               xlogctl->lastRemovedLog = log;
+               xlogctl->lastRemovedSeg = seg;
+       }
+       SpinLockRelease(&xlogctl->info_lck);
+
+       elog(DEBUG1, "removing WAL segments older than %X/%X", log, seg);
 
        /*
         * Initialize info about where to try to recycle to.  We allow recycling
@@ -7172,36 +7205,51 @@ CreateCheckPoint(int flags)
        smgrpostckpt();
 
        /*
-        * If there's connected standby servers doing XLOG streaming, don't delete
-        * XLOG files that have not been streamed to all of them yet. This does
-        * nothing to prevent them from being deleted when the standby is
-        * disconnected (e.g because of network problems), but at least it avoids
-        * an open replication connection from failing because of that.
+        * Delete old log files (those no longer needed even for previous
+        * checkpoint or the standbys in XLOG streaming).
         */
-       if ((_logId || _logSeg) && max_wal_senders > 0)
+       if (_logId || _logSeg)
        {
-               XLogRecPtr      oldest;
-               uint32          log;
-               uint32          seg;
-
-               oldest = GetOldestWALSendPointer();
-               if (oldest.xlogid != 0 || oldest.xrecoff != 0)
+               /*
+                * Calculate the last segment that we need to retain because of
+                * standby_keep_segments, by subtracting StandbySegments from the
+                * new checkpoint location.
+                */
+               if (StandbySegments > 0)
                {
-                       XLByteToSeg(oldest, log, seg);
+                       uint32          log;
+                       uint32          seg;
+                       int                     d_log;
+                       int                     d_seg;
+
+                       XLByteToSeg(recptr, log, seg);
+
+                       d_seg = StandbySegments % XLogSegsPerFile;
+                       d_log = StandbySegments / XLogSegsPerFile;
+                       if (seg < d_seg)
+                       {
+                               d_log += 1;
+                               seg = seg - d_seg + XLogSegsPerFile;
+                       }
+                       else
+                               seg = seg - d_seg;
+                       /* avoid underflow, don't go below (0,1) */
+                       if (log < d_log || (log == d_log && seg == 0))
+                       {
+                               log = 0;
+                               seg = 1;
+                       }
+                       else
+                               log = log - d_log;
+
+                       /* don't delete WAL segments newer than the calculated segment */
                        if (log < _logId || (log == _logId && seg < _logSeg))
                        {
                                _logId = log;
                                _logSeg = seg;
                        }
                }
-       }
 
-       /*
-        * Delete old log files (those no longer needed even for previous
-        * checkpoint or the standbys in XLOG streaming).
-        */
-       if (_logId || _logSeg)
-       {
                PrevLogSeg(_logId, _logSeg);
                RemoveOldXlogFiles(_logId, _logSeg, recptr);
        }
index e04e5ba65caf50019da8442b9c7e7fe89cf1a45b..aa8fbc1a40b7e227837b0ff043e4a0e15f4a6d4d 100644 (file)
@@ -30,7 +30,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/replication/walsender.c,v 1.14 2010/04/01 00:43:29 rhaas Exp $
+ *       $PostgreSQL: pgsql/src/backend/replication/walsender.c,v 1.15 2010/04/12 09:52:29 heikki Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -508,6 +508,10 @@ XLogRead(char *buf, XLogRecPtr recptr, Size nbytes)
 {
        char            path[MAXPGPATH];
        uint32          startoff;
+       uint32          lastRemovedLog;
+       uint32          lastRemovedSeg;
+       uint32          log;
+       uint32          seg;
 
        while (nbytes > 0)
        {
@@ -527,10 +531,27 @@ XLogRead(char *buf, XLogRecPtr recptr, Size nbytes)
 
                        sendFile = BasicOpenFile(path, O_RDONLY | PG_BINARY, 0);
                        if (sendFile < 0)
-                               ereport(FATAL,  /* XXX: Why FATAL? */
-                                               (errcode_for_file_access(),
-                                                errmsg("could not open file \"%s\" (log file %u, segment %u): %m",
-                                                               path, sendId, sendSeg)));
+                       {
+                               /*
+                                * If the file is not found, assume it's because the
+                                * standby asked for a too old WAL segment that has already
+                                * been removed or recycled.
+                                */
+                               if (errno == ENOENT)
+                               {
+                                       char filename[MAXFNAMELEN];
+                                       XLogFileName(filename, ThisTimeLineID, sendId, sendSeg);
+                                       ereport(ERROR,
+                                                       (errcode_for_file_access(),
+                                                        errmsg("requested WAL segment %s has already been removed",
+                                                                       filename)));
+                               }
+                               else
+                                       ereport(ERROR,
+                                                       (errcode_for_file_access(),
+                                                        errmsg("could not open file \"%s\" (log file %u, segment %u): %m",
+                                                                       path, sendId, sendSeg)));
+                       }
                        sendOff = 0;
                }
 
@@ -538,7 +559,7 @@ XLogRead(char *buf, XLogRecPtr recptr, Size nbytes)
                if (sendOff != startoff)
                {
                        if (lseek(sendFile, (off_t) startoff, SEEK_SET) < 0)
-                               ereport(FATAL,
+                               ereport(ERROR,
                                                (errcode_for_file_access(),
                                                 errmsg("could not seek in log file %u, segment %u to offset %u: %m",
                                                                sendId, sendSeg, startoff)));
@@ -553,7 +574,7 @@ XLogRead(char *buf, XLogRecPtr recptr, Size nbytes)
 
                readbytes = read(sendFile, buf, segbytes);
                if (readbytes <= 0)
-                       ereport(FATAL,
+                       ereport(ERROR,
                                        (errcode_for_file_access(),
                        errmsg("could not read from log file %u, segment %u, offset %u, "
                                   "length %lu: %m",
@@ -566,6 +587,26 @@ XLogRead(char *buf, XLogRecPtr recptr, Size nbytes)
                nbytes -= readbytes;
                buf += readbytes;
        }
+
+       /*
+        * After reading into the buffer, check that what we read was valid.
+        * We do this after reading, because even though the segment was present
+        * when we opened it, it might get recycled or removed while we read it.
+        * The read() succeeds in that case, but the data we tried to read might
+        * already have been overwritten with new WAL records.
+        */
+       XLogGetLastRemoved(&lastRemovedLog, &lastRemovedSeg);
+       XLByteToPrevSeg(recptr, log, seg);
+       if (log < lastRemovedLog ||
+               (log == lastRemovedLog && seg <= lastRemovedSeg))
+       {
+               char filename[MAXFNAMELEN];
+               XLogFileName(filename, ThisTimeLineID, log, seg);
+               ereport(ERROR,
+                               (errcode_for_file_access(),
+                                errmsg("requested WAL segment %s has already been removed",
+                                               filename)));
+       }
 }
 
 /*
@@ -801,6 +842,12 @@ WalSndShmemInit(void)
        }
 }
 
+/*
+ * This isn't currently used for anything. Monitoring tools might be
+ * interested in the future, and we'll need something like this in the
+ * future for synchronous replication.
+ */
+#ifdef NOT_USED
 /*
  * Returns the oldest Send position among walsenders. Or InvalidXLogRecPtr
  * if none.
@@ -834,3 +881,4 @@ GetOldestWALSendPointer(void)
        }
        return oldest;
 }
+#endif
index 5f8cc494893fb918ea4e51616f9d14a633183e83..9d72a0e57363853bb8a7dc5dbfc5a63f2cb532b3 100644 (file)
@@ -10,7 +10,7 @@
  * Written by Peter Eisentraut <peter_e@gmx.net>.
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.546 2010/04/01 00:43:29 rhaas Exp $
+ *       $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.547 2010/04/12 09:52:29 heikki Exp $
  *
  *--------------------------------------------------------------------
  */
@@ -1647,6 +1647,15 @@ static struct config_int ConfigureNamesInt[] =
                0, 0, 60, NULL, NULL
        },
 
+       {
+               {"standby_keep_segments", PGC_SIGHUP, WAL_CHECKPOINTS,
+                       gettext_noop("Sets the number of WAL files held for standby servers"),
+                       NULL
+               },
+               &StandbySegments,
+               0, 0, INT_MAX, NULL, NULL
+       },
+
        {
                {"checkpoint_segments", PGC_SIGHUP, WAL_CHECKPOINTS,
                        gettext_noop("Sets the maximum distance in log segments between automatic WAL checkpoints."),
index 02f1df01038d7a4cbd79e5f6a65dc7d27d325762..48c09d14670c826453fa9d497a2256705f1ea921 100644 (file)
 
 #max_wal_senders = 0           # max number of walsender processes
 #wal_sender_delay = 200ms      # 1-10000 milliseconds
+#standby_keep_segments = 0     # in logfile segments, 16MB each; 0 disables
 
 
 #------------------------------------------------------------------------------
index 9a66e9134d4c04057207ecb43285ca93d9a7f970..de7406a808b86b79b9294f7644f428ac41e33313 100644 (file)
@@ -6,7 +6,7 @@
  * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/access/xlog.h,v 1.105 2010/04/01 00:43:29 rhaas Exp $
+ * $PostgreSQL: pgsql/src/include/access/xlog.h,v 1.106 2010/04/12 09:52:29 heikki Exp $
  */
 #ifndef XLOG_H
 #define XLOG_H
@@ -187,6 +187,7 @@ extern XLogRecPtr XactLastRecEnd;
 
 /* these variables are GUC parameters related to XLOG */
 extern int     CheckPointSegments;
+extern int     StandbySegments;
 extern int     XLOGbuffers;
 extern bool XLogArchiveMode;
 extern char *XLogArchiveCommand;
@@ -267,6 +268,7 @@ extern int XLogFileInit(uint32 log, uint32 seg,
 extern int     XLogFileOpen(uint32 log, uint32 seg);
 
 
+extern void XLogGetLastRemoved(uint32 *log, uint32 *seg);
 extern void XLogSetAsyncCommitLSN(XLogRecPtr record);
 
 extern void RestoreBkpBlocks(XLogRecPtr lsn, XLogRecord *record, bool cleanup);