1 /*-------------------------------------------------------------------------
4 * A utility to "zero out" the xlog when it's corrupt beyond recovery.
5 * Can also rebuild pg_control if needed.
7 * The theory of reset operation is fairly simple:
8 * 1. Read the existing pg_control (which will include the last
9 * checkpoint record). If it is an old format then update to
11 * 2. If pg_control is corrupt, attempt to rebuild the values,
12 * by scanning the old xlog; if it fail then try to guess it.
13 * 3. Modify pg_control to reflect a "shutdown" state with a checkpoint
14 * record at the start of xlog.
15 * 4. Flush the existing xlog files and write a new segment with
16 * just a checkpoint record in it. The new segment is positioned
17 * just past the end of the old xlog, so that existing LSNs in
18 * data pages will appear to be "in the past".
20 * The algorithm of restoring the pg_control value from old xlog file:
21 * 1. Retrieve all of the active xlog files from xlog direcotry into a list
22 * by increasing order, according their timeline, log id, segment id.
23 * 2. Search the list to find the oldest xlog file of the lastest time line.
24 * 3. Search the records from the oldest xlog file of latest time line
25 * to the latest xlog file of latest time line, if the checkpoint record
26 * has been found, update the latest checkpoint and previous checkpoint.
27 * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
28 * Portions Copyright (c) 1994, Regents of the University of California
31 *-------------------------------------------------------------------------
46 #include "access/multixact.h"
47 #include "access/xlog.h"
48 #include "access/xlog_internal.h"
49 #include "catalog/catversion.h"
50 #include "catalog/pg_control.h"
59 static ControlFileData ControlFile; /* pg_control values */
60 static uint32 newXlogId,
61 newXlogSeg; /* ID/Segment of new XLOG segment */
62 static const char *progname;
63 static uint64 sysidentifier=-1;
66 * We use a list to store the active xlog files we had found in the
67 * xlog directory in increasing order according the time line, logid,
71 typedef struct XLogFileName {
76 struct XLogFileName *next;
80 static XLogFileName *xlogfilelist=NULL;
82 /* LastXLogfile is the latest file in the latest time line,
83 CurXLogfile is the oldest file in the lastest time line
85 static XLogFileName *CurXLogFile, *LastXLogFile;
87 /* The last checkpoint found in xlog file.*/
88 static CheckPoint lastcheckpoint;
90 /* The last and previous checkpoint pointers found in xlog file.*/
91 static XLogRecPtr prevchkp, lastchkp;
93 /* the database state.*/
94 static DBState state=DB_SHUTDOWNED;
96 /* the total checkpoint numbers which had been found in the xlog file.*/
97 static int found_checkpoint=0;
100 static bool ReadControlFile(void);
101 static bool RestoreControlValues(int mode);
102 static void PrintControlValues(void);
103 static void UpdateCtlFile4Reset(void);
104 static void RewriteControlFile(void);
105 static void KillExistingXLOG(void);
106 static void WriteEmptyXLOG(void);
107 static void usage(void);
109 static void GetXLogFiles(void);
110 static bool ValidXLogFileName(char * fname);
111 static bool ValidXLogFileHeader(XLogFileName *segfile);
112 static bool ValidXLOGPageHeader(XLogPageHeader hdr, uint tli, uint id, uint seg);
113 static bool CmpXLogFileOT(XLogFileName * f1, XLogFileName *f2);
114 static bool IsNextSeg(XLogFileName *prev, XLogFileName *cur);
115 static void InsertXLogFile( char * fname );
116 static bool ReadXLogPage(void);
117 static bool RecordIsValid(XLogRecord *record, XLogRecPtr recptr);
118 static bool FetchRecord(void);
119 static void UpdateCheckPoint(XLogRecord *record);
120 static void SelectStartXLog(void);
121 static int SearchLastCheckpoint(void);
122 static int OpenXLogFile(XLogFileName *sf);
123 static void CleanUpList(XLogFileName *list);
126 main(int argc, char *argv[])
130 bool restore = false;
131 bool noupdate = false;
132 TransactionId set_xid = 0;
134 MultiXactId set_mxid = 0;
135 MultiXactOffset set_mxoff = -1;
136 uint32 minXlogTli = 0,
144 char path[MAXPGPATH];
145 bool ctlcorrupted = false;
146 bool PidLocked = false;
148 set_pglocale_pgservice(argv[0], "pg_resetxlog");
150 progname = get_progname(argv[0]);
154 if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
159 if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
161 puts("pg_resetxlog (PostgreSQL) " PG_VERSION);
167 while ((c = getopt(argc, argv, "fl:m:no:O:x:r")) != -1)
184 set_xid = strtoul(optarg, &endptr, 0);
185 if (endptr == optarg || *endptr != '\0')
187 fprintf(stderr, _("%s: invalid argument for option -x\n"), progname);
188 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
193 fprintf(stderr, _("%s: transaction ID (-x) must not be 0\n"), progname);
199 set_oid = strtoul(optarg, &endptr, 0);
200 if (endptr == optarg || *endptr != '\0')
202 fprintf(stderr, _("%s: invalid argument for option -o\n"), progname);
203 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
208 fprintf(stderr, _("%s: OID (-o) must not be 0\n"), progname);
214 set_mxid = strtoul(optarg, &endptr, 0);
215 if (endptr == optarg || *endptr != '\0')
217 fprintf(stderr, _("%s: invalid argument for option -m\n"), progname);
218 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
223 fprintf(stderr, _("%s: multitransaction ID (-m) must not be 0\n"), progname);
229 set_mxoff = strtoul(optarg, &endptr, 0);
230 if (endptr == optarg || *endptr != '\0')
232 fprintf(stderr, _("%s: invalid argument for option -O\n"), progname);
233 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
238 fprintf(stderr, _("%s: multitransaction offset (-O) must not be -1\n"), progname);
244 minXlogTli = strtoul(optarg, &endptr, 0);
245 if (endptr == optarg || *endptr != ',')
247 fprintf(stderr, _("%s: invalid argument for option -l\n"), progname);
248 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
251 minXlogId = strtoul(endptr + 1, &endptr2, 0);
252 if (endptr2 == endptr + 1 || *endptr2 != ',')
254 fprintf(stderr, _("%s: invalid argument for option -l\n"), progname);
255 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
258 minXlogSeg = strtoul(endptr2 + 1, &endptr3, 0);
259 if (endptr3 == endptr2 + 1 || *endptr3 != '\0')
261 fprintf(stderr, _("%s: invalid argument for option -l\n"), progname);
262 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
268 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
275 fprintf(stderr, _("%s: no data directory specified\n"), progname);
276 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
281 * Don't allow pg_resetxlog to be run as root, to avoid overwriting the
282 * ownership of files in the data directory. We need only check for root
283 * -- any other user won't have sufficient permissions to modify files in
284 * the data directory.
289 fprintf(stderr, _("%s: cannot be executed by \"root\"\n"),
291 fprintf(stderr, _("You must run %s as the PostgreSQL superuser.\n"),
297 DataDir = argv[optind];
299 if (chdir(DataDir) < 0)
301 fprintf(stderr, _("%s: could not change directory to \"%s\": %s\n"),
302 progname, DataDir, strerror(errno));
307 * Check for a postmaster lock file --- if there is one, refuse to
308 * proceed, on grounds we might be interfering with a live installation.
310 snprintf(path, MAXPGPATH, "%s/postmaster.pid", DataDir);
312 if ((fd = open(path, O_RDONLY)) < 0)
316 fprintf(stderr, _("%s: could not open file \"%s\" for reading: %s\n"), progname, path, strerror(errno));
326 * Attempt to read the existing pg_control file
328 if (!ReadControlFile())
330 /* The control file has been corruptted.*/
335 * Adjust fields if required by switches. (Do this now so that printout,
336 * if any, includes these values.)
339 ControlFile.checkPointCopy.nextXid = set_xid;
342 ControlFile.checkPointCopy.nextOid = set_oid;
345 ControlFile.checkPointCopy.nextMulti = set_mxid;
348 ControlFile.checkPointCopy.nextMultiOffset = set_mxoff;
350 if (minXlogTli > ControlFile.checkPointCopy.ThisTimeLineID)
351 ControlFile.checkPointCopy.ThisTimeLineID = minXlogTli;
353 if (minXlogId > ControlFile.logId ||
354 (minXlogId == ControlFile.logId &&
355 minXlogSeg > ControlFile.logSeg))
357 ControlFile.logId = minXlogId;
358 ControlFile.logSeg = minXlogSeg;
361 /* retore the broken control file from WAL file.*/
365 /* If the control fine is fine, don't touch it.*/
368 printf(_("\nThe control file seems fine, not need to restore it.\n"));
369 printf(_("If you want to restore it anyway, use -f option, but this also will reset the log file.\n"));
374 /* Try to restore control values from old xlog file, or complain it.*/
375 if (RestoreControlValues(WAL))
377 /* Success in restoring the checkpoint information from old xlog file.*/
380 PrintControlValues();
382 /* In case the postmaster is crashed.
383 * But it may be dangerous for the living one.
384 * It may need a more good way.
388 ControlFile.state = DB_IN_PRODUCTION;
390 /* Write the new control file. */
391 RewriteControlFile();
392 printf(_("\nThe control file had been restored.\n"));
396 /* Fail in restoring the checkpoint information from old xlog file. */
397 printf(_("\nCan not restore the control file from XLog file..\n"));
398 printf(_("\nIf you want to restore it anyway, use -f option to guess the information, but this also will reset the log file.\n"));
406 fprintf(stderr, _("%s: lock file \"%s\" exists\n"
407 "Is a server running? If not, delete the lock file and try again.\n"),
413 * Print out the values in control file if -n is given. if the control file is
414 * corrupted, then inform user to restore it first.
420 /* The control file is fine, print the values out.*/
421 PrintControlValues();
425 /* The control file is corrupted.*/
426 printf(_("The control file had been corrupted.\n"));
427 printf(_("Please use -r option to restore it first.\n"));
433 * Don't reset from a dirty pg_control without -f, either.
435 if (ControlFile.state != DB_SHUTDOWNED && !force && !ctlcorrupted)
437 printf(_("The database server was not shut down cleanly.\n"
438 "Resetting the transaction log may cause data to be lost.\n"
439 "If you want to proceed anyway, use -f to force reset.\n"));
444 * Try to reset the xlog file.
447 /* If the control file is corrupted, and -f option is given, resotre it first.*/
452 if (!RestoreControlValues(WAL))
454 printf(_("fails to recover the control file from old xlog files, so we had to guess it.\n"));
455 RestoreControlValues(GUESS);
457 printf(_("Restored the control file from old xlog files.\n"));
461 printf(_("Control file corrupted.\nIf you want to proceed anyway, use -f to force reset.\n"));
466 /* Reset the xlog fille.*/
467 UpdateCtlFile4Reset();
468 RewriteControlFile();
471 printf(_("Transaction log reset\n"));
477 * Try to read the existing pg_control file.
479 * This routine is also responsible for updating old pg_control versions
480 * to the current format. (Currently we don't do anything of the sort.)
483 ReadControlFile(void)
490 if ((fd = open(XLOG_CONTROL_FILE, O_RDONLY)) < 0)
493 * If pg_control is not there at all, or we can't read it, the odds
494 * are we've been handed a bad DataDir path, so give up. User can do
495 * "touch pg_control" to force us to proceed.
497 fprintf(stderr, _("%s: could not open file \"%s\" for reading: %s\n"),
498 progname, XLOG_CONTROL_FILE, strerror(errno));
500 fprintf(stderr, _("If you are sure the data directory path is correct, execute\n"
507 /* Use malloc to ensure we have a maxaligned buffer */
508 buffer = (char *) malloc(PG_CONTROL_SIZE);
510 len = read(fd, buffer, PG_CONTROL_SIZE);
513 fprintf(stderr, _("%s: could not read file \"%s\": %s\n"),
514 progname, XLOG_CONTROL_FILE, strerror(errno));
519 if (len >= sizeof(ControlFileData) &&
520 ((ControlFileData *) buffer)->pg_control_version == PG_CONTROL_VERSION)
526 offsetof(ControlFileData, crc));
529 if (EQ_CRC32(crc, ((ControlFileData *) buffer)->crc))
532 memcpy(&ControlFile, buffer, sizeof(ControlFile));
536 fprintf(stderr, _("%s: pg_control exists but has invalid CRC; proceed with caution\n"),
538 /* We will use the data anyway, but treat it as guessed. */
539 memcpy(&ControlFile, buffer, sizeof(ControlFile));
543 /* Looks like it's a mess. */
544 fprintf(stderr, _("%s: pg_control exists but is broken or unknown version; ignoring it\n"),
553 * Restore the pg_control values by scanning old xlog files or by guessing it.
556 * WAL: Restore the pg_control values by scanning old xlog files.
557 * GUESS: Restore the pg_control values by guessing.
559 * TRUE: success in restoring.
560 * FALSE: fail to restore the values.
564 RestoreControlValues(int mode)
568 bool successed = true;
571 * Set up a completely default set of pg_control values.
573 memset(&ControlFile, 0, sizeof(ControlFile));
575 ControlFile.pg_control_version = PG_CONTROL_VERSION;
576 ControlFile.catalog_version_no = CATALOG_VERSION_NO;
579 * update the checkpoint value in control file,by searching
580 * xlog segment file, or just guessing it.
584 int result = SearchLastCheckpoint();
586 if (result > 0) /* The last checkpoint had been found. */
588 ControlFile.checkPointCopy = lastcheckpoint;
589 ControlFile.checkPointCopy.ThisTimeLineID = LastXLogFile->tli;
590 ControlFile.checkPoint = lastchkp;
591 ControlFile.prevCheckPoint = prevchkp;
593 ControlFile.logId = LastXLogFile->logid;
594 ControlFile.logSeg = LastXLogFile->seg + 1;
595 ControlFile.state = state;
600 /* Clean up the list. */
601 CleanUpList(xlogfilelist);
605 ControlFile.checkPointCopy.ThisTimeLineID = 2;
606 ControlFile.checkPointCopy.redo.xlogid = 0;
607 ControlFile.checkPointCopy.redo.xrecoff = SizeOfXLogLongPHD;
608 ControlFile.checkPointCopy.undo = ControlFile.checkPointCopy.redo;
609 ControlFile.checkPointCopy.nextXid = (TransactionId) 514; /* XXX */
610 ControlFile.checkPointCopy.nextOid = FirstBootstrapObjectId;
611 ControlFile.checkPointCopy.nextMulti = FirstMultiXactId;
612 ControlFile.checkPointCopy.nextMultiOffset = 0;
613 ControlFile.checkPointCopy.time = time(NULL);
614 ControlFile.checkPoint = ControlFile.checkPointCopy.redo;
617 * Create a new unique installation identifier, since we can no longer
618 * use any old XLOG records. See notes in xlog.c about the algorithm.
620 gettimeofday(&tv, NULL);
621 sysidentifier = ((uint64) tv.tv_sec) << 32;
622 sysidentifier |= (uint32) (tv.tv_sec | tv.tv_usec);
623 ControlFile.state = DB_SHUTDOWNED;
627 ControlFile.time = time(NULL);
628 ControlFile.system_identifier = sysidentifier;
629 ControlFile.maxAlign = MAXIMUM_ALIGNOF;
630 ControlFile.floatFormat = FLOATFORMAT_VALUE;
631 ControlFile.blcksz = BLCKSZ;
632 ControlFile.relseg_size = RELSEG_SIZE;
633 ControlFile.xlog_blcksz = XLOG_BLCKSZ;
634 ControlFile.xlog_seg_size = XLOG_SEG_SIZE;
635 ControlFile.nameDataLen = NAMEDATALEN;
636 ControlFile.indexMaxKeys = INDEX_MAX_KEYS;
637 #ifdef HAVE_INT64_TIMESTAMP
638 ControlFile.enableIntTimes = TRUE;
640 ControlFile.enableIntTimes = FALSE;
642 ControlFile.localeBuflen = LOCALE_NAME_BUFLEN;
644 localeptr = setlocale(LC_COLLATE, "");
647 fprintf(stderr, _("%s: invalid LC_COLLATE setting\n"), progname);
650 StrNCpy(ControlFile.lc_collate, localeptr, LOCALE_NAME_BUFLEN);
652 localeptr = setlocale(LC_CTYPE, "");
655 fprintf(stderr, _("%s: invalid LC_CTYPE setting\n"), progname);
658 StrNCpy(ControlFile.lc_ctype, localeptr, LOCALE_NAME_BUFLEN);
665 * Print the out pg_control values.
667 * NB: this display should be just those fields that will not be
668 * reset by RewriteControlFile().
671 PrintControlValues(void)
673 char sysident_str[32];
675 printf(_("pg_control values:\n\n"));
678 * Format system_identifier separately to keep platform-dependent format
679 * code out of the translatable message string.
681 snprintf(sysident_str, sizeof(sysident_str), UINT64_FORMAT,
682 ControlFile.system_identifier);
684 printf(_("pg_control version number: %u\n"), ControlFile.pg_control_version);
685 printf(_("Catalog version number: %u\n"), ControlFile.catalog_version_no);
686 printf(_("Database system identifier: %s\n"), sysident_str);
687 printf(_("Current log file ID: %u\n"), ControlFile.logId);
688 printf(_("Next log file segment: %u\n"), ControlFile.logSeg);
689 printf(_("Latest checkpoint's TimeLineID: %u\n"), ControlFile.checkPointCopy.ThisTimeLineID);
690 printf(_("Latest checkpoint's NextXID: %u\n"), ControlFile.checkPointCopy.nextXid);
691 printf(_("Latest checkpoint's NextOID: %u\n"), ControlFile.checkPointCopy.nextOid);
692 printf(_("Latest checkpoint's NextMultiXactId: %u\n"), ControlFile.checkPointCopy.nextMulti);
693 printf(_("Latest checkpoint's NextMultiOffset: %u\n"), ControlFile.checkPointCopy.nextMultiOffset);
694 printf(_("Maximum data alignment: %u\n"), ControlFile.maxAlign);
695 /* we don't print floatFormat since can't say much useful about it */
696 printf(_("Database block size: %u\n"), ControlFile.blcksz);
697 printf(_("Blocks per segment of large relation: %u\n"), ControlFile.relseg_size);
698 printf(_("WAL block size: %u\n"), ControlFile.xlog_blcksz);
699 printf(_("Bytes per WAL segment: %u\n"), ControlFile.xlog_seg_size);
700 printf(_("Maximum length of identifiers: %u\n"), ControlFile.nameDataLen);
701 printf(_("Maximum columns in an index: %u\n"), ControlFile.indexMaxKeys);
702 printf(_("Date/time type storage: %s\n"),
703 (ControlFile.enableIntTimes ? _("64-bit integers") : _("floating-point numbers")));
704 printf(_("Maximum length of locale name: %u\n"), ControlFile.localeBuflen);
705 printf(_("LC_COLLATE: %s\n"), ControlFile.lc_collate);
706 printf(_("LC_CTYPE: %s\n"), ControlFile.lc_ctype);
710 * Update the control file before reseting it.
713 UpdateCtlFile4Reset(void)
716 * Adjust fields as needed to force an empty XLOG starting at the next
719 newXlogId = ControlFile.logId;
720 newXlogSeg = ControlFile.logSeg;
722 /* adjust in case we are changing segment size */
723 newXlogSeg *= ControlFile.xlog_seg_size;
724 newXlogSeg = (newXlogSeg + XLogSegSize - 1) / XLogSegSize;
726 /* be sure we wrap around correctly at end of a logfile */
727 NextLogSeg(newXlogId, newXlogSeg);
729 /* Now we can force the recorded xlog seg size to the right thing. */
730 ControlFile.xlog_seg_size = XLogSegSize;
732 ControlFile.checkPointCopy.redo.xlogid = newXlogId;
733 ControlFile.checkPointCopy.redo.xrecoff =
734 newXlogSeg * XLogSegSize + SizeOfXLogLongPHD;
735 ControlFile.checkPointCopy.undo = ControlFile.checkPointCopy.redo;
736 ControlFile.checkPointCopy.time = time(NULL);
738 ControlFile.state = DB_SHUTDOWNED;
739 ControlFile.time = time(NULL);
740 ControlFile.logId = newXlogId;
741 ControlFile.logSeg = newXlogSeg + 1;
742 ControlFile.checkPoint = ControlFile.checkPointCopy.redo;
743 ControlFile.prevCheckPoint.xlogid = 0;
744 ControlFile.prevCheckPoint.xrecoff = 0;
748 * Write out the new pg_control file.
751 RewriteControlFile(void)
754 char buffer[PG_CONTROL_SIZE]; /* need not be aligned */
757 /* Contents are protected with a CRC */
758 INIT_CRC32(ControlFile.crc);
759 COMP_CRC32(ControlFile.crc,
760 (char *) &ControlFile,
761 offsetof(ControlFileData, crc));
762 FIN_CRC32(ControlFile.crc);
765 * We write out PG_CONTROL_SIZE bytes into pg_control, zero-padding the
766 * excess over sizeof(ControlFileData). This reduces the odds of
767 * premature-EOF errors when reading pg_control. We'll still fail when we
768 * check the contents of the file, but hopefully with a more specific
769 * error than "couldn't read pg_control".
771 if (sizeof(ControlFileData) > PG_CONTROL_SIZE)
774 _("%s: internal error -- sizeof(ControlFileData) is too large ... fix PG_CONTROL_SIZE\n"),
779 memset(buffer, 0, PG_CONTROL_SIZE);
780 memcpy(buffer, &ControlFile, sizeof(ControlFileData));
782 unlink(XLOG_CONTROL_FILE);
784 fd = open(XLOG_CONTROL_FILE,
785 O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
789 fprintf(stderr, _("%s: could not create pg_control file: %s\n"),
790 progname, strerror(errno));
795 if (write(fd, buffer, PG_CONTROL_SIZE) != PG_CONTROL_SIZE)
797 /* if write didn't set errno, assume problem is no disk space */
800 fprintf(stderr, _("%s: could not write pg_control file: %s\n"),
801 progname, strerror(errno));
807 fprintf(stderr, _("%s: fsync error: %s\n"), progname, strerror(errno));
816 * Remove existing XLOG files
819 KillExistingXLOG(void)
823 char path[MAXPGPATH];
825 xldir = opendir(XLOGDIR);
828 fprintf(stderr, _("%s: could not open directory \"%s\": %s\n"),
829 progname, XLOGDIR, strerror(errno));
834 while ((xlde = readdir(xldir)) != NULL)
836 if (strlen(xlde->d_name) == 24 &&
837 strspn(xlde->d_name, "0123456789ABCDEF") == 24)
839 snprintf(path, MAXPGPATH, "%s/%s", XLOGDIR, xlde->d_name);
840 if (unlink(path) < 0)
842 fprintf(stderr, _("%s: could not delete file \"%s\": %s\n"),
843 progname, path, strerror(errno));
851 * This fix is in mingw cvs (runtime/mingwex/dirent.c rev 1.4), but not in
854 if (GetLastError() == ERROR_NO_MORE_FILES)
860 fprintf(stderr, _("%s: could not read from directory \"%s\": %s\n"),
861 progname, XLOGDIR, strerror(errno));
869 * Write an empty XLOG file, containing only the checkpoint record
870 * already set up in ControlFile.
877 XLogLongPageHeader longpage;
880 char path[MAXPGPATH];
884 /* Use malloc() to ensure buffer is MAXALIGNED */
885 buffer = (char *) malloc(XLOG_BLCKSZ);
886 page = (XLogPageHeader) buffer;
887 memset(buffer, 0, XLOG_BLCKSZ);
889 /* Set up the XLOG page header */
890 page->xlp_magic = XLOG_PAGE_MAGIC;
891 page->xlp_info = XLP_LONG_HEADER;
892 page->xlp_tli = ControlFile.checkPointCopy.ThisTimeLineID;
893 page->xlp_pageaddr.xlogid =
894 ControlFile.checkPointCopy.redo.xlogid;
895 page->xlp_pageaddr.xrecoff =
896 ControlFile.checkPointCopy.redo.xrecoff - SizeOfXLogLongPHD;
897 longpage = (XLogLongPageHeader) page;
898 longpage->xlp_sysid = ControlFile.system_identifier;
899 longpage->xlp_seg_size = XLogSegSize;
900 longpage->xlp_xlog_blcksz = XLOG_BLCKSZ;
902 /* Insert the initial checkpoint record */
903 record = (XLogRecord *) ((char *) page + SizeOfXLogLongPHD);
904 record->xl_prev.xlogid = 0;
905 record->xl_prev.xrecoff = 0;
906 record->xl_xid = InvalidTransactionId;
907 record->xl_tot_len = SizeOfXLogRecord + sizeof(CheckPoint);
908 record->xl_len = sizeof(CheckPoint);
909 record->xl_info = XLOG_CHECKPOINT_SHUTDOWN;
910 record->xl_rmid = RM_XLOG_ID;
911 memcpy(XLogRecGetData(record), &ControlFile.checkPointCopy,
915 COMP_CRC32(crc, &ControlFile.checkPointCopy, sizeof(CheckPoint));
916 COMP_CRC32(crc, (char *) record + sizeof(pg_crc32),
917 SizeOfXLogRecord - sizeof(pg_crc32));
919 record->xl_crc = crc;
921 /* Write the first page */
922 XLogFilePath(path, ControlFile.checkPointCopy.ThisTimeLineID,
923 newXlogId, newXlogSeg);
927 fd = open(path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
931 fprintf(stderr, _("%s: could not open file \"%s\": %s\n"),
932 progname, path, strerror(errno));
937 if (write(fd, buffer, XLOG_BLCKSZ) != XLOG_BLCKSZ)
939 /* if write didn't set errno, assume problem is no disk space */
942 fprintf(stderr, _("%s: could not write file \"%s\": %s\n"),
943 progname, path, strerror(errno));
947 /* Fill the rest of the file with zeroes */
948 memset(buffer, 0, XLOG_BLCKSZ);
949 for (nbytes = XLOG_BLCKSZ; nbytes < XLogSegSize; nbytes += XLOG_BLCKSZ)
952 if (write(fd, buffer, XLOG_BLCKSZ) != XLOG_BLCKSZ)
956 fprintf(stderr, _("%s: could not write file \"%s\": %s\n"),
957 progname, path, strerror(errno));
964 fprintf(stderr, _("%s: fsync error: %s\n"), progname, strerror(errno));
975 printf(_("%s resets the PostgreSQL transaction log.\n\n"), progname);
976 printf(_("Usage:\n %s [OPTION]... DATADIR\n\n"), progname);
977 printf(_("Options:\n"));
978 printf(_(" -f force reset xlog to be done, if the control file is corrupted, then try to restore it.\n"));
979 printf(_(" -r restore the pg_control file from old XLog files, resets is not done..\n"));
980 printf(_(" -l TLI,FILE,SEG force minimum WAL starting location for new transaction log\n"));
981 printf(_(" -n show extracted control values of existing pg_control file.\n"));
982 printf(_(" -m multiXID set next multi transaction ID\n"));
983 printf(_(" -o OID set next OID\n"));
984 printf(_(" -O multiOffset set next multi transaction offset\n"));
985 printf(_(" -x XID set next transaction ID\n"));
986 printf(_(" --help show this help, then exit\n"));
987 printf(_(" --version output version information, then exit\n"));
988 printf(_("\nReport bugs to <pgsql-bugs@postgresql.org>.\n"));
994 * The following routines are mainly used for getting pg_control values
995 * from the xlog file.
998 /* some local varaibles.*/
999 static int logFd=0; /* kernel FD for current input file */
1000 static int logRecOff; /* offset of next record in page */
1001 static char pageBuffer[BLCKSZ]; /* current page */
1002 static XLogRecPtr curRecPtr; /* logical address of current record */
1003 static XLogRecPtr prevRecPtr; /* logical address of previous record */
1004 static char *readRecordBuf = NULL; /* ReadRecord result area */
1005 static uint32 readRecordBufSize = 0;
1006 static int32 logPageOff; /* offset of current page in file */
1007 static uint32 logId; /* current log file id */
1008 static uint32 logSeg; /* current log file segment */
1009 static uint32 logTli; /* current log file timeline */
1012 * Get existing XLOG files
1018 struct dirent *xlde;
1020 /* Open the xlog direcotry.*/
1021 xldir = opendir(XLOGDIR);
1024 fprintf(stderr, _("%s: could not open directory \"%s\": %s\n"),
1025 progname, XLOGDIR, strerror(errno));
1029 /* Search the directory, insert the segment files into the xlogfilelist.*/
1031 while ((xlde = readdir(xldir)) != NULL)
1033 if (ValidXLogFileName(xlde->d_name)) {
1034 /* XLog file is found, insert it into the xlogfilelist.*/
1035 InsertXLogFile(xlde->d_name);
1040 if (GetLastError() == ERROR_NO_MORE_FILES)
1046 fprintf(stderr, _("%s: could not read from directory \"%s\": %s\n"),
1047 progname, XLOGDIR, strerror(errno));
1054 * Insert a file while had been found in the xlog folder into xlogfilelist.
1055 * The xlogfile list is matained in a increasing order.
1057 * The input parameter is the name of the xlog file, the name is assumpted
1061 InsertXLogFile( char * fname )
1063 XLogFileName * NewSegFile, *Curr, *Prev;
1064 bool append2end = false;
1066 /* Allocate a new node for the new file. */
1067 NewSegFile = (XLogFileName *) malloc(sizeof(XLogFileName));
1068 strcpy(NewSegFile->fname,fname); /* setup the name */
1069 /* extract the time line, logid, and segment number from the name.*/
1070 sscanf(fname, "%8x%8x%8x", &(NewSegFile->tli), &(NewSegFile->logid), &(NewSegFile->seg));
1071 NewSegFile->next = NULL;
1073 /* Ensure the xlog file is active and valid.*/
1074 if (! ValidXLogFileHeader(NewSegFile))
1080 /* the list is empty.*/
1081 if ( xlogfilelist == NULL ) {
1082 xlogfilelist = NewSegFile;
1086 /* try to search the list and find the insert point. */
1087 Prev=Curr=xlogfilelist;
1088 while( CmpXLogFileOT(NewSegFile, Curr))
1090 /* the node is appended to the end of the list.*/
1091 if (Curr->next == NULL)
1100 /* Insert the new node to the list.*/
1103 /* We need to append the new node to the end of the list */
1104 Curr->next = NewSegFile;
1108 NewSegFile->next = Curr;
1109 /* prev should not be the list head. */
1110 if ( Prev != NULL && Prev != xlogfilelist)
1112 Prev->next = NewSegFile;
1115 /* Update the list head if it is needed.*/
1116 if ((Curr == xlogfilelist) && !append2end)
1118 xlogfilelist = NewSegFile;
1124 * compare two xlog file from their name to see which one is latest.
1126 * Return true for file 2 is the lastest file.
1130 CmpXLogFileOT(XLogFileName * f1, XLogFileName *f2)
1132 if (f2->tli >= f1->tli)
1134 if (f2->logid >= f1->logid)
1136 if (f2->seg > f1->seg) return false;
1143 /* check is two segment file is continous.*/
1145 IsNextSeg(XLogFileName *prev, XLogFileName *cur)
1147 uint32 logid, logseg;
1149 if (prev->tli != cur->tli) return false;
1151 logid = prev->logid;
1153 NextLogSeg(logid, logseg);
1155 if ((logid == cur->logid) && (logseg == cur->seg)) return true;
1163 * Select the oldest xlog file in the latest time line.
1166 SelectStartXLog( void )
1169 CurXLogFile = xlogfilelist;
1171 if (xlogfilelist == NULL)
1176 tmp=LastXLogFile=CurXLogFile=xlogfilelist;
1178 while(tmp->next != NULL)
1182 * we should ensure that from the first to
1183 * the last segment file is continous.
1185 if (!IsNextSeg(tmp, tmp->next))
1187 CurXLogFile = tmp->next;
1197 * Check if the file is a valid xlog file.
1199 * Return true for the input file is a valid xlog file.
1201 * The input parameter is the name of the xlog file.
1205 ValidXLogFileName(char * fname)
1207 uint logTLI, logId, logSeg;
1208 if (strlen(fname) != 24 ||
1209 strspn(fname, "0123456789ABCDEF") != 24 ||
1210 sscanf(fname, "%8x%8x%8x", &logTLI, &logId, &logSeg) != 3)
1216 /* Ensure the xlog file is active and valid.*/
1218 ValidXLogFileHeader(XLogFileName *segfile)
1221 char buffer[BLCKSZ];
1222 char path[MAXPGPATH];
1225 snprintf(path, MAXPGPATH, "%s/%s", XLOGDIR, segfile->fname);
1226 fd = open(path, O_RDONLY | PG_BINARY, 0);
1231 nread = read(fd, buffer, BLCKSZ);
1232 if (nread == BLCKSZ)
1234 XLogPageHeader hdr = (XLogPageHeader)buffer;
1236 if (ValidXLOGPageHeader(hdr, segfile->tli, segfile->logid, segfile->seg))
1246 ValidXLOGPageHeader(XLogPageHeader hdr, uint tli, uint id, uint seg)
1250 if (hdr->xlp_magic != XLOG_PAGE_MAGIC)
1254 if ((hdr->xlp_info & ~XLP_ALL_FLAGS) != 0)
1258 if (hdr->xlp_info & XLP_LONG_HEADER)
1260 XLogLongPageHeader longhdr = (XLogLongPageHeader) hdr;
1262 if (longhdr->xlp_seg_size != XLogSegSize)
1266 /* Get the system identifier from the segment file header.*/
1267 sysidentifier = ((XLogLongPageHeader) pageBuffer)->xlp_sysid;
1270 recaddr.xlogid = id;
1271 recaddr.xrecoff = seg * XLogSegSize + logPageOff;
1272 if (!XLByteEQ(hdr->xlp_pageaddr, recaddr))
1277 if (hdr->xlp_tli != tli)
1285 /* Read another page, if possible */
1291 /* Need to advance to the new segment file.*/
1292 if ( logPageOff >= XLogSegSize )
1298 /* Need to open the segement file.*/
1299 if ((logFd <= 0) && (CurXLogFile != NULL))
1301 if (OpenXLogFile(CurXLogFile) < 0)
1305 CurXLogFile = CurXLogFile->next;
1308 /* Read a page from the openning segement file.*/
1309 nread = read(logFd, pageBuffer, BLCKSZ);
1311 if (nread == BLCKSZ)
1313 logPageOff += BLCKSZ;
1314 if (ValidXLOGPageHeader( (XLogPageHeader)pageBuffer, logTli, logId, logSeg))
1322 * CRC-check an XLOG record. We do not believe the contents of an XLOG
1323 * record (other than to the minimal extent of computing the amount of
1324 * data to read in) until we've checked the CRCs.
1326 * We assume all of the record has been read into memory at *record.
1329 RecordIsValid(XLogRecord *record, XLogRecPtr recptr)
1333 uint32 len = record->xl_len;
1337 /* First the rmgr data */
1339 COMP_CRC32(crc, XLogRecGetData(record), len);
1341 /* Add in the backup blocks, if any */
1342 blk = (char *) XLogRecGetData(record) + len;
1343 for (i = 0; i < XLR_MAX_BKP_BLOCKS; i++)
1347 if (!(record->xl_info & XLR_SET_BKP_BLOCK(i)))
1350 memcpy(&bkpb, blk, sizeof(BkpBlock));
1351 if (bkpb.hole_offset + bkpb.hole_length > BLCKSZ)
1355 blen = sizeof(BkpBlock) + BLCKSZ - bkpb.hole_length;
1356 COMP_CRC32(crc, blk, blen);
1360 /* Check that xl_tot_len agrees with our calculation */
1361 if (blk != (char *) record + record->xl_tot_len)
1366 /* Finally include the record header */
1367 COMP_CRC32(crc, (char *) record + sizeof(pg_crc32),
1368 SizeOfXLogRecord - sizeof(pg_crc32));
1371 if (!EQ_CRC32(record->xl_crc, crc))
1382 * Attempt to read an XLOG record into readRecordBuf.
1389 XLogContRecord *contrecord;
1390 uint32 len, total_len;
1393 while (logRecOff <= 0 || logRecOff > BLCKSZ - SizeOfXLogRecord)
1395 /* Need to advance to new page */
1396 if (! ReadXLogPage())
1401 logRecOff = XLogPageHeaderSize((XLogPageHeader) pageBuffer);
1402 if ((((XLogPageHeader) pageBuffer)->xlp_info & ~XLP_LONG_HEADER) != 0)
1404 /* Check for a continuation record */
1405 if (((XLogPageHeader) pageBuffer)->xlp_info & XLP_FIRST_IS_CONTRECORD)
1407 contrecord = (XLogContRecord *) (pageBuffer + logRecOff);
1408 logRecOff += MAXALIGN(contrecord->xl_rem_len + SizeOfXLogContRecord);
1413 curRecPtr.xlogid = logId;
1414 curRecPtr.xrecoff = logSeg * XLogSegSize + logPageOff + logRecOff;
1415 record = (XLogRecord *) (pageBuffer + logRecOff);
1417 if (record->xl_len == 0)
1422 total_len = record->xl_tot_len;
1425 * Allocate or enlarge readRecordBuf as needed. To avoid useless
1426 * small increases, round its size to a multiple of BLCKSZ, and make
1427 * sure it's at least 4*BLCKSZ to start with. (That is enough for all
1428 * "normal" records, but very large commit or abort records might need
1431 if (total_len > readRecordBufSize)
1433 uint32 newSize = total_len;
1435 newSize += BLCKSZ - (newSize % BLCKSZ);
1436 newSize = Max(newSize, 4 * BLCKSZ);
1438 free(readRecordBuf);
1439 readRecordBuf = (char *) malloc(newSize);
1442 readRecordBufSize = 0;
1445 readRecordBufSize = newSize;
1448 buffer = readRecordBuf;
1449 len = BLCKSZ - curRecPtr.xrecoff % BLCKSZ; /* available in block */
1450 if (total_len > len)
1452 /* Need to reassemble record */
1453 uint32 gotlen = len;
1455 memcpy(buffer, record, len);
1456 record = (XLogRecord *) buffer;
1460 uint32 pageHeaderSize;
1462 if (!ReadXLogPage())
1466 if (!(((XLogPageHeader) pageBuffer)->xlp_info & XLP_FIRST_IS_CONTRECORD))
1470 pageHeaderSize = XLogPageHeaderSize((XLogPageHeader) pageBuffer);
1471 contrecord = (XLogContRecord *) (pageBuffer + pageHeaderSize);
1472 if (contrecord->xl_rem_len == 0 ||
1473 total_len != (contrecord->xl_rem_len + gotlen))
1477 len = BLCKSZ - pageHeaderSize - SizeOfXLogContRecord;
1478 if (contrecord->xl_rem_len > len)
1480 memcpy(buffer, (char *)contrecord + SizeOfXLogContRecord, len);
1485 memcpy(buffer, (char *) contrecord + SizeOfXLogContRecord,
1486 contrecord->xl_rem_len);
1487 logRecOff = MAXALIGN(pageHeaderSize + SizeOfXLogContRecord + contrecord->xl_rem_len);
1490 if (!RecordIsValid(record, curRecPtr))
1496 /* Record is contained in this page */
1497 memcpy(buffer, record, total_len);
1498 record = (XLogRecord *) buffer;
1499 logRecOff += MAXALIGN(total_len);
1500 if (!RecordIsValid(record, curRecPtr))
1509 * if the record is checkpoint, update the lastest checkpoint record.
1512 UpdateCheckPoint(XLogRecord *record)
1514 uint8 info = record->xl_info & ~XLR_INFO_MASK;
1516 if ((info == XLOG_CHECKPOINT_SHUTDOWN) ||
1517 (info == XLOG_CHECKPOINT_ONLINE))
1519 CheckPoint *chkpoint = (CheckPoint*) XLogRecGetData(record);
1520 prevchkp = lastchkp;
1521 lastchkp = curRecPtr;
1522 lastcheckpoint = *chkpoint;
1524 /* update the database state.*/
1527 case XLOG_CHECKPOINT_SHUTDOWN:
1528 state = DB_SHUTDOWNED;
1530 case XLOG_CHECKPOINT_ONLINE:
1531 state = DB_IN_PRODUCTION;
1534 found_checkpoint ++ ;
1539 OpenXLogFile(XLogFileName *sf)
1542 char path[MAXPGPATH];
1544 if ( logFd > 0 ) close(logFd);
1546 /* Open a Xlog segment file. */
1547 snprintf(path, MAXPGPATH, "%s/%s", XLOGDIR, sf->fname);
1548 logFd = open(path, O_RDONLY | PG_BINARY, 0);
1552 fprintf(stderr, _("%s: Can not open xlog file %s.\n"), progname,path);
1556 /* Setup the parameter for searching. */
1557 logPageOff = -BLCKSZ; /* so 1st increment in readXLogPage gives 0 */
1566 * Search the lastest checkpoint in the lastest XLog segment file.
1568 * The return value is the total checkpoints which had been found
1569 * in the XLog segment file.
1572 SearchLastCheckpoint(void)
1575 /* retrive all of the active xlog files from xlog direcotry
1576 * into a list by increasing order, according their timeline,
1577 * log id, segment id.
1581 /* Select the oldest segment file in the lastest time line.*/
1584 /* No segment file was found.*/
1585 if ( CurXLogFile == NULL )
1591 logFd=logId=logSeg=logTli=0;
1594 * Search the XLog segment file from beginning to end,
1595 * if checkpoint record is found, then update the
1596 * latest check point.
1598 while (FetchRecord())
1600 /* To see if the record is checkpoint record. */
1601 if (((XLogRecord *) readRecordBuf)->xl_rmid == RM_XLOG_ID)
1602 UpdateCheckPoint((XLogRecord *) readRecordBuf);
1603 prevRecPtr = curRecPtr;
1606 /* We can not know clearly if we had reached the end.
1607 * But just check if we reach the last segment file,
1608 * if it is not, then some problem there.
1609 * (We need a better way to know the abnormal broken during the search)
1611 if ((logId != LastXLogFile->logid) && (logSeg != LastXLogFile->seg))
1617 * return the checkpoints which had been found yet,
1618 * let others know how much checkpointes are found.
1620 return found_checkpoint;
1623 /* Clean up the allocated list.*/
1625 CleanUpList(XLogFileName *list)