4 * PostgreSQL write-ahead log manager
6 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
9 * src/include/access/xlog.h
14 #include "access/rmgr.h"
15 #include "access/xlogdefs.h"
16 #include "access/xloginsert.h"
17 #include "access/xlogreader.h"
18 #include "datatype/timestamp.h"
19 #include "lib/stringinfo.h"
20 #include "nodes/pg_list.h"
21 #include "storage/fd.h"
25 #define SYNC_METHOD_FSYNC 0
26 #define SYNC_METHOD_FDATASYNC 1
27 #define SYNC_METHOD_OPEN 2 /* for O_SYNC */
28 #define SYNC_METHOD_FSYNC_WRITETHROUGH 3
29 #define SYNC_METHOD_OPEN_DSYNC 4 /* for O_DSYNC */
30 extern int sync_method;
32 extern PGDLLIMPORT TimeLineID ThisTimeLineID; /* current TLI */
35 * Prior to 8.4, all activity during recovery was carried out by the startup
36 * process. This local variable continues to be used in many parts of the
37 * code to indicate actions taken by RecoveryManagers. Other processes that
38 * potentially perform work during recovery should check RecoveryInProgress().
39 * See XLogCtl notes in xlog.c.
41 extern bool InRecovery;
44 * Like InRecovery, standbyState is only valid in the startup process.
45 * In all other processes it will have the value STANDBY_DISABLED (so
46 * InHotStandby will read as false).
48 * In DISABLED state, we're performing crash recovery or hot standby was
49 * disabled in postgresql.conf.
51 * In INITIALIZED state, we've run InitRecoveryTransactionEnvironment, but
52 * we haven't yet processed a RUNNING_XACTS or shutdown-checkpoint WAL record
53 * to initialize our master-transaction tracking system.
55 * When the transaction tracking is initialized, we enter the SNAPSHOT_PENDING
56 * state. The tracked information might still be incomplete, so we can't allow
57 * connections yet, but redo functions must update the in-memory state when
60 * In SNAPSHOT_READY mode, we have full knowledge of transactions that are
61 * (or were) running in the master at the current WAL location. Snapshots
62 * can be taken, and read-only queries can be run.
68 STANDBY_SNAPSHOT_PENDING,
69 STANDBY_SNAPSHOT_READY
72 extern HotStandbyState standbyState;
74 #define InHotStandby (standbyState >= STANDBY_SNAPSHOT_PENDING)
77 * Recovery target type.
78 * Only set during a Point in Time recovery, not when in standby mode.
82 RECOVERY_TARGET_UNSET,
87 RECOVERY_TARGET_IMMEDIATE
91 * Recovery target TimeLine goal
95 RECOVERY_TARGET_TIMELINE_CONTROLFILE,
96 RECOVERY_TARGET_TIMELINE_LATEST,
97 RECOVERY_TARGET_TIMELINE_NUMERIC
98 } RecoveryTargetTimeLineGoal;
100 extern XLogRecPtr ProcLastRecPtr;
101 extern XLogRecPtr XactLastRecEnd;
102 extern PGDLLIMPORT XLogRecPtr XactLastCommitEnd;
104 extern bool reachedConsistency;
106 /* these variables are GUC parameters related to XLOG */
107 extern int wal_segment_size;
108 extern int min_wal_size_mb;
109 extern int max_wal_size_mb;
110 extern int wal_keep_segments;
111 extern int XLOGbuffers;
112 extern int XLogArchiveTimeout;
113 extern int wal_retrieve_retry_interval;
114 extern char *XLogArchiveCommand;
115 extern bool EnableHotStandby;
116 extern bool fullPageWrites;
117 extern bool wal_log_hints;
118 extern bool wal_compression;
119 extern bool wal_init_zero;
120 extern bool wal_recycle;
121 extern bool *wal_consistency_checking;
122 extern char *wal_consistency_checking_string;
123 extern bool log_checkpoints;
124 extern char *recoveryRestoreCommand;
125 extern char *recoveryEndCommand;
126 extern char *archiveCleanupCommand;
127 extern bool recoveryTargetInclusive;
128 extern int recoveryTargetAction;
129 extern int recovery_min_apply_delay;
130 extern char *PrimaryConnInfo;
131 extern char *PrimarySlotName;
133 /* indirectly set via GUC system */
134 extern TransactionId recoveryTargetXid;
135 extern char *recovery_target_time_string;
136 extern const char *recoveryTargetName;
137 extern XLogRecPtr recoveryTargetLSN;
138 extern RecoveryTargetType recoveryTarget;
139 extern char *PromoteTriggerFile;
140 extern RecoveryTargetTimeLineGoal recoveryTargetTimeLineGoal;
141 extern TimeLineID recoveryTargetTLIRequested;
142 extern TimeLineID recoveryTargetTLI;
144 extern int CheckPointSegments;
146 /* option set locally in startup process only when signal files exist */
147 extern bool StandbyModeRequested;
148 extern bool StandbyMode;
151 typedef enum ArchiveMode
153 ARCHIVE_MODE_OFF = 0, /* disabled */
154 ARCHIVE_MODE_ON, /* enabled while server is running normally */
155 ARCHIVE_MODE_ALWAYS /* enabled always (even during recovery) */
157 extern int XLogArchiveMode;
160 typedef enum WalLevel
162 WAL_LEVEL_MINIMAL = 0,
167 extern PGDLLIMPORT int wal_level;
169 /* Is WAL archiving enabled (always or only while server is running normally)? */
170 #define XLogArchivingActive() \
171 (AssertMacro(XLogArchiveMode == ARCHIVE_MODE_OFF || wal_level >= WAL_LEVEL_REPLICA), XLogArchiveMode > ARCHIVE_MODE_OFF)
172 /* Is WAL archiving enabled always (even during recovery)? */
173 #define XLogArchivingAlways() \
174 (AssertMacro(XLogArchiveMode == ARCHIVE_MODE_OFF || wal_level >= WAL_LEVEL_REPLICA), XLogArchiveMode == ARCHIVE_MODE_ALWAYS)
175 #define XLogArchiveCommandSet() (XLogArchiveCommand[0] != '\0')
178 * Is WAL-logging necessary for archival or log-shipping, or can we skip
179 * WAL-logging if we fsync() the data before committing instead?
181 #define XLogIsNeeded() (wal_level >= WAL_LEVEL_REPLICA)
184 * Is a full-page image needed for hint bit updates?
186 * Normally, we don't WAL-log hint bit updates, but if checksums are enabled,
187 * we have to protect them against torn page writes. When you only set
188 * individual bits on a page, it's still consistent no matter what combination
189 * of the bits make it to disk, but the checksum wouldn't match. Also WAL-log
190 * them if forced by wal_log_hints=on.
192 #define XLogHintBitIsNeeded() (DataChecksumsEnabled() || wal_log_hints)
194 /* Do we need to WAL-log information required only for Hot Standby and logical replication? */
195 #define XLogStandbyInfoActive() (wal_level >= WAL_LEVEL_REPLICA)
197 /* Do we need to WAL-log information required only for logical replication? */
198 #define XLogLogicalInfoActive() (wal_level >= WAL_LEVEL_LOGICAL)
201 extern bool XLOG_DEBUG;
205 * OR-able request flag bits for checkpoints. The "cause" bits are used only
206 * for logging purposes. Note: the flags must be defined so that it's
207 * sensible to OR together request flags arising from different requestors.
210 /* These directly affect the behavior of CreateCheckPoint and subsidiaries */
211 #define CHECKPOINT_IS_SHUTDOWN 0x0001 /* Checkpoint is for shutdown */
212 #define CHECKPOINT_END_OF_RECOVERY 0x0002 /* Like shutdown checkpoint, but
213 * issued at end of WAL recovery */
214 #define CHECKPOINT_IMMEDIATE 0x0004 /* Do it without delays */
215 #define CHECKPOINT_FORCE 0x0008 /* Force even if no activity */
216 #define CHECKPOINT_FLUSH_ALL 0x0010 /* Flush all pages, including those
217 * belonging to unlogged tables */
218 /* These are important to RequestCheckpoint */
219 #define CHECKPOINT_WAIT 0x0020 /* Wait for completion */
220 #define CHECKPOINT_REQUESTED 0x0040 /* Checkpoint request has been made */
221 /* These indicate the cause of a checkpoint request */
222 #define CHECKPOINT_CAUSE_XLOG 0x0080 /* XLOG consumption */
223 #define CHECKPOINT_CAUSE_TIME 0x0100 /* Elapsed time */
226 * Flag bits for the record being inserted, set using XLogSetRecordFlags().
228 #define XLOG_INCLUDE_ORIGIN 0x01 /* include the replication origin */
229 #define XLOG_MARK_UNIMPORTANT 0x02 /* record not important for durability */
232 /* Checkpoint statistics */
233 typedef struct CheckpointStatsData
235 TimestampTz ckpt_start_t; /* start of checkpoint */
236 TimestampTz ckpt_write_t; /* start of flushing buffers */
237 TimestampTz ckpt_sync_t; /* start of fsyncs */
238 TimestampTz ckpt_sync_end_t; /* end of fsyncs */
239 TimestampTz ckpt_end_t; /* end of checkpoint */
241 int ckpt_bufs_written; /* # of buffers written */
243 int ckpt_segs_added; /* # of new xlog segments created */
244 int ckpt_segs_removed; /* # of xlog segments deleted */
245 int ckpt_segs_recycled; /* # of xlog segments recycled */
247 int ckpt_sync_rels; /* # of relations synced */
248 uint64 ckpt_longest_sync; /* Longest sync for one relation */
249 uint64 ckpt_agg_sync_time; /* The sum of all the individual sync
250 * times, which is not necessarily the
251 * same as the total elapsed time for the
252 * entire sync phase. */
253 } CheckpointStatsData;
255 extern CheckpointStatsData CheckpointStats;
259 extern XLogRecPtr XLogInsertRecord(struct XLogRecData *rdata,
262 extern void XLogFlush(XLogRecPtr RecPtr);
263 extern bool XLogBackgroundFlush(void);
264 extern bool XLogNeedsFlush(XLogRecPtr RecPtr);
265 extern int XLogFileInit(XLogSegNo segno, bool *use_existent, bool use_lock);
266 extern int XLogFileOpen(XLogSegNo segno);
268 extern void CheckXLogRemoved(XLogSegNo segno, TimeLineID tli);
269 extern XLogSegNo XLogGetLastRemovedSegno(void);
270 extern void XLogSetAsyncXactLSN(XLogRecPtr record);
271 extern void XLogSetReplicationSlotMinimumLSN(XLogRecPtr lsn);
273 extern void xlog_redo(XLogReaderState *record);
274 extern void xlog_desc(StringInfo buf, XLogReaderState *record);
275 extern const char *xlog_identify(uint8 info);
277 extern void issue_xlog_fsync(int fd, XLogSegNo segno);
279 extern bool RecoveryInProgress(void);
280 extern bool HotStandbyActive(void);
281 extern bool HotStandbyActiveInReplay(void);
282 extern bool XLogInsertAllowed(void);
283 extern void GetXLogReceiptTime(TimestampTz *rtime, bool *fromStream);
284 extern XLogRecPtr GetXLogReplayRecPtr(TimeLineID *replayTLI);
285 extern XLogRecPtr GetXLogInsertRecPtr(void);
286 extern XLogRecPtr GetXLogWriteRecPtr(void);
287 extern bool RecoveryIsPaused(void);
288 extern void SetRecoveryPause(bool recoveryPause);
289 extern TimestampTz GetLatestXTime(void);
290 extern TimestampTz GetCurrentChunkReplayStartTime(void);
291 extern char *XLogFileNameP(TimeLineID tli, XLogSegNo segno);
293 extern void UpdateControlFile(void);
294 extern uint64 GetSystemIdentifier(void);
295 extern char *GetMockAuthenticationNonce(void);
296 extern bool DataChecksumsEnabled(void);
297 extern XLogRecPtr GetFakeLSNForUnloggedRel(void);
298 extern Size XLOGShmemSize(void);
299 extern void XLOGShmemInit(void);
300 extern void BootStrapXLOG(void);
301 extern void LocalProcessControlFile(bool reset);
302 extern void StartupXLOG(void);
303 extern void ShutdownXLOG(int code, Datum arg);
304 extern void InitXLOGAccess(void);
305 extern void CreateCheckPoint(int flags);
306 extern bool CreateRestartPoint(int flags);
307 extern void XLogPutNextOid(Oid nextOid);
308 extern XLogRecPtr XLogRestorePoint(const char *rpName);
309 extern void UpdateFullPageWrites(void);
310 extern void GetFullPageWriteInfo(XLogRecPtr *RedoRecPtr_p, bool *doPageWrites_p);
311 extern XLogRecPtr GetRedoRecPtr(void);
312 extern XLogRecPtr GetInsertRecPtr(void);
313 extern XLogRecPtr GetFlushRecPtr(void);
314 extern XLogRecPtr GetLastImportantRecPtr(void);
315 extern void RemovePromoteSignalFiles(void);
317 extern bool CheckPromoteSignal(void);
318 extern void WakeupRecovery(void);
319 extern void SetWalWriterSleeping(bool sleeping);
321 extern void XLogRequestWalReceiverReply(void);
323 extern void assign_max_wal_size(int newval, void *extra);
324 extern void assign_checkpoint_completion_target(double newval, void *extra);
327 * Routines to start, stop, and get status of a base backup.
331 * Session-level status of base backups
333 * This is used in parallel with the shared memory status to control parallel
334 * execution of base backup functions for a given session, be it a backend
335 * dedicated to replication or a normal backend connected to a database. The
336 * update of the session-level status happens at the same time as the shared
337 * memory counters to keep a consistent global and local state of the backups
340 typedef enum SessionBackupState
343 SESSION_BACKUP_EXCLUSIVE,
344 SESSION_BACKUP_NON_EXCLUSIVE
345 } SessionBackupState;
347 extern XLogRecPtr do_pg_start_backup(const char *backupidstr, bool fast,
348 TimeLineID *starttli_p, StringInfo labelfile,
349 List **tablespaces, StringInfo tblspcmapfile, bool infotbssize,
350 bool needtblspcmapfile);
351 extern XLogRecPtr do_pg_stop_backup(char *labelfile, bool waitforarchive,
352 TimeLineID *stoptli_p);
353 extern void do_pg_abort_backup(void);
354 extern SessionBackupState get_backup_status(void);
356 /* File path names (all relative to $PGDATA) */
357 #define RECOVERY_SIGNAL_FILE "recovery.signal"
358 #define STANDBY_SIGNAL_FILE "standby.signal"
359 #define BACKUP_LABEL_FILE "backup_label"
360 #define BACKUP_LABEL_OLD "backup_label.old"
362 #define TABLESPACE_MAP "tablespace_map"
363 #define TABLESPACE_MAP_OLD "tablespace_map.old"
365 /* files to signal promotion to primary */
366 #define PROMOTE_SIGNAL_FILE "promote"
367 #define FALLBACK_PROMOTE_SIGNAL_FILE "fallback_promote"