1 /*-------------------------------------------------------------------------
4 * A utility to "zero out" the xlog when it's corrupt beyond recovery.
5 * Can also rebuild pg_control if needed.
7 * The theory of operation is fairly simple:
8 * 1. Read the existing pg_control (which will include the last
9 * checkpoint record). If it is an old format then update to
11 * 2. If pg_control is corrupt, attempt to intuit reasonable values,
12 * by scanning the old xlog if necessary.
13 * 3. Modify pg_control to reflect a "shutdown" state with a checkpoint
14 * record at the start of xlog.
15 * 4. Flush the existing xlog files and write a new segment with
16 * just a checkpoint record in it. The new segment is positioned
17 * just past the end of the old xlog, so that existing LSNs in
18 * data pages will appear to be "in the past".
19 * This is all pretty straightforward except for the intuition part of
23 * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
24 * Portions Copyright (c) 1994, Regents of the University of California
26 * $PostgreSQL: pgsql/src/bin/pg_resetxlog/pg_resetxlog.c,v 1.33 2005/06/02 05:55:29 tgl Exp $
28 *-------------------------------------------------------------------------
43 #include "access/multixact.h"
44 #include "access/xlog.h"
45 #include "access/xlog_internal.h"
46 #include "catalog/catversion.h"
47 #include "catalog/pg_control.h"
53 char XLogDir[MAXPGPATH]; /* not static, see xlog_internal.h */
54 static char ControlFilePath[MAXPGPATH];
56 static ControlFileData ControlFile; /* pg_control values */
57 static uint32 newXlogId,
58 newXlogSeg; /* ID/Segment of new XLOG segment */
59 static bool guessed = false; /* T if we had to guess at any values */
60 static const char *progname;
62 static bool ReadControlFile(void);
63 static void GuessControlValues(void);
64 static void PrintControlValues(bool guessed);
65 static void RewriteControlFile(void);
66 static void KillExistingXLOG(void);
67 static void WriteEmptyXLOG(void);
68 static void usage(void);
72 main(int argc, char *argv[])
76 bool noupdate = false;
77 TransactionId set_xid = 0;
79 MultiXactId set_mxid = 0;
80 uint32 minXlogTli = 0,
90 set_pglocale_pgservice(argv[0], "pg_resetxlog");
92 progname = get_progname(argv[0]);
96 if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
101 if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
103 puts("pg_resetxlog (PostgreSQL) " PG_VERSION);
109 while ((c = getopt(argc, argv, "fl:m:no:x:")) != -1)
122 set_xid = strtoul(optarg, &endptr, 0);
123 if (endptr == optarg || *endptr != '\0')
125 fprintf(stderr, _("%s: invalid argument for option -x\n"), progname);
126 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
131 fprintf(stderr, _("%s: transaction ID (-x) must not be 0\n"), progname);
137 set_oid = strtoul(optarg, &endptr, 0);
138 if (endptr == optarg || *endptr != '\0')
140 fprintf(stderr, _("%s: invalid argument for option -o\n"), progname);
141 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
146 fprintf(stderr, _("%s: OID (-o) must not be 0\n"), progname);
152 set_mxid = strtoul(optarg, &endptr, 0);
153 if (endptr == optarg || *endptr != '\0')
155 fprintf(stderr, _("%s: invalid argument for option -m\n"), progname);
156 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
161 fprintf(stderr, _("%s: multi transaction ID (-m) must not be 0\n"), progname);
167 minXlogTli = strtoul(optarg, &endptr, 0);
168 if (endptr == optarg || *endptr != ',')
170 fprintf(stderr, _("%s: invalid argument for option -l\n"), progname);
171 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
174 minXlogId = strtoul(endptr + 1, &endptr2, 0);
175 if (endptr2 == endptr + 1 || *endptr2 != ',')
177 fprintf(stderr, _("%s: invalid argument for option -l\n"), progname);
178 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
181 minXlogSeg = strtoul(endptr2 + 1, &endptr3, 0);
182 if (endptr3 == endptr2 + 1 || *endptr3 != '\0')
184 fprintf(stderr, _("%s: invalid argument for option -l\n"), progname);
185 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
191 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
198 fprintf(stderr, _("%s: no data directory specified\n"), progname);
199 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
204 * Don't allow pg_resetxlog to be run as root, to avoid
205 * overwriting the ownership of files in the data directory. We
206 * need only check for root -- any other user won't have
207 * sufficient permissions to modify files in the data directory.
210 #ifndef __BEOS__ /* no root check on BeOS */
213 fprintf(stderr, _("%s: cannot be executed by \"root\"\n"),
215 fprintf(stderr, _("You must run %s as the PostgreSQL superuser.\n"),
222 DataDir = argv[optind];
223 snprintf(XLogDir, MAXPGPATH, "%s/pg_xlog", DataDir);
224 snprintf(ControlFilePath, MAXPGPATH, "%s/global/pg_control", DataDir);
227 * Check for a postmaster lock file --- if there is one, refuse to
228 * proceed, on grounds we might be interfering with a live
231 snprintf(path, MAXPGPATH, "%s/postmaster.pid", DataDir);
233 if ((fd = open(path, O_RDONLY)) < 0)
237 fprintf(stderr, _("%s: could not open file \"%s\" for reading: %s\n"), progname, path, strerror(errno));
243 fprintf(stderr, _("%s: lock file \"%s\" exists\n"
244 "Is a server running? If not, delete the lock file and try again.\n"),
250 * Attempt to read the existing pg_control file
252 if (!ReadControlFile())
253 GuessControlValues();
256 * Adjust fields if required by switches. (Do this now so that
257 * printout, if any, includes these values.)
260 ControlFile.checkPointCopy.nextXid = set_xid;
263 ControlFile.checkPointCopy.nextOid = set_oid;
266 ControlFile.checkPointCopy.nextMulti = set_mxid;
268 if (minXlogTli > ControlFile.checkPointCopy.ThisTimeLineID)
269 ControlFile.checkPointCopy.ThisTimeLineID = minXlogTli;
271 if (minXlogId > ControlFile.logId ||
272 (minXlogId == ControlFile.logId &&
273 minXlogSeg > ControlFile.logSeg))
275 ControlFile.logId = minXlogId;
276 ControlFile.logSeg = minXlogSeg;
280 * If we had to guess anything, and -f was not given, just print the
281 * guessed values and exit. Also print if -n is given.
283 if ((guessed && !force) || noupdate)
285 PrintControlValues(guessed);
288 printf(_("\nIf these values seem acceptable, use -f to force reset.\n"));
296 * Don't reset from a dirty pg_control without -f, either.
298 if (ControlFile.state != DB_SHUTDOWNED && !force)
300 printf(_("The database server was not shut down cleanly.\n"
301 "Resetting the transaction log may cause data to be lost.\n"
302 "If you want to proceed anyway, use -f to force reset.\n"));
307 * Else, do the dirty deed.
309 RewriteControlFile();
313 printf(_("Transaction log reset\n"));
319 * Try to read the existing pg_control file.
321 * This routine is also responsible for updating old pg_control versions
322 * to the current format. (Currently we don't do anything of the sort.)
325 ReadControlFile(void)
332 if ((fd = open(ControlFilePath, O_RDONLY)) < 0)
335 * If pg_control is not there at all, or we can't read it, the
336 * odds are we've been handed a bad DataDir path, so give up. User
337 * can do "touch pg_control" to force us to proceed.
339 fprintf(stderr, _("%s: could not open file \"%s\" for reading: %s\n"),
340 progname, ControlFilePath, strerror(errno));
342 fprintf(stderr, _("If you are sure the data directory path is correct, execute\n"
349 /* Use malloc to ensure we have a maxaligned buffer */
350 buffer = (char *) malloc(BLCKSZ);
352 len = read(fd, buffer, BLCKSZ);
355 fprintf(stderr, _("%s: could not read file \"%s\": %s\n"),
356 progname, ControlFilePath, strerror(errno));
361 if (len >= sizeof(ControlFileData) &&
362 ((ControlFileData *) buffer)->pg_control_version == PG_CONTROL_VERSION)
368 offsetof(ControlFileData, crc));
371 if (EQ_CRC32(crc, ((ControlFileData *) buffer)->crc))
374 memcpy(&ControlFile, buffer, sizeof(ControlFile));
378 fprintf(stderr, _("%s: pg_control exists but has invalid CRC; proceed with caution\n"),
380 /* We will use the data anyway, but treat it as guessed. */
381 memcpy(&ControlFile, buffer, sizeof(ControlFile));
386 /* Looks like it's a mess. */
387 fprintf(stderr, _("%s: pg_control exists but is broken or unknown version; ignoring it\n"),
394 * Guess at pg_control values when we can't read the old ones.
397 GuessControlValues(void)
399 uint64 sysidentifier;
404 * Set up a completely default set of pg_control values.
407 memset(&ControlFile, 0, sizeof(ControlFile));
409 ControlFile.pg_control_version = PG_CONTROL_VERSION;
410 ControlFile.catalog_version_no = CATALOG_VERSION_NO;
413 * Create a new unique installation identifier, since we can no longer
414 * use any old XLOG records. See notes in xlog.c about the algorithm.
416 gettimeofday(&tv, NULL);
417 sysidentifier = ((uint64) tv.tv_sec) << 32;
418 sysidentifier |= (uint32) (tv.tv_sec | tv.tv_usec);
420 ControlFile.system_identifier = sysidentifier;
422 ControlFile.checkPointCopy.redo.xlogid = 0;
423 ControlFile.checkPointCopy.redo.xrecoff = SizeOfXLogLongPHD;
424 ControlFile.checkPointCopy.undo = ControlFile.checkPointCopy.redo;
425 ControlFile.checkPointCopy.ThisTimeLineID = 1;
426 ControlFile.checkPointCopy.nextXid = (TransactionId) 514; /* XXX */
427 ControlFile.checkPointCopy.nextOid = FirstBootstrapObjectId;
428 ControlFile.checkPointCopy.nextMulti = FirstMultiXactId;
429 ControlFile.checkPointCopy.time = time(NULL);
431 ControlFile.state = DB_SHUTDOWNED;
432 ControlFile.time = time(NULL);
433 ControlFile.logId = 0;
434 ControlFile.logSeg = 1;
435 ControlFile.checkPoint = ControlFile.checkPointCopy.redo;
437 ControlFile.blcksz = BLCKSZ;
438 ControlFile.relseg_size = RELSEG_SIZE;
439 ControlFile.xlog_seg_size = XLOG_SEG_SIZE;
440 ControlFile.nameDataLen = NAMEDATALEN;
441 ControlFile.indexMaxKeys = INDEX_MAX_KEYS;
442 #ifdef HAVE_INT64_TIMESTAMP
443 ControlFile.enableIntTimes = TRUE;
445 ControlFile.enableIntTimes = FALSE;
447 ControlFile.localeBuflen = LOCALE_NAME_BUFLEN;
449 localeptr = setlocale(LC_COLLATE, "");
452 fprintf(stderr, _("%s: invalid LC_COLLATE setting\n"), progname);
455 StrNCpy(ControlFile.lc_collate, localeptr, LOCALE_NAME_BUFLEN);
456 localeptr = setlocale(LC_CTYPE, "");
459 fprintf(stderr, _("%s: invalid LC_CTYPE setting\n"), progname);
462 StrNCpy(ControlFile.lc_ctype, localeptr, LOCALE_NAME_BUFLEN);
465 * XXX eventually, should try to grovel through old XLOG to develop
466 * more accurate values for TimeLineID, nextXID, and nextOID.
472 * Print the guessed pg_control values when we had to guess.
474 * NB: this display should be just those fields that will not be
475 * reset by RewriteControlFile().
478 PrintControlValues(bool guessed)
480 char sysident_str[32];
483 printf(_("Guessed pg_control values:\n\n"));
485 printf(_("pg_control values:\n\n"));
488 * Format system_identifier separately to keep platform-dependent
489 * format code out of the translatable message string.
491 snprintf(sysident_str, sizeof(sysident_str), UINT64_FORMAT,
492 ControlFile.system_identifier);
494 printf(_("pg_control version number: %u\n"), ControlFile.pg_control_version);
495 printf(_("Catalog version number: %u\n"), ControlFile.catalog_version_no);
496 printf(_("Database system identifier: %s\n"), sysident_str);
497 printf(_("Current log file ID: %u\n"), ControlFile.logId);
498 printf(_("Next log file segment: %u\n"), ControlFile.logSeg);
499 printf(_("Latest checkpoint's TimeLineID: %u\n"), ControlFile.checkPointCopy.ThisTimeLineID);
500 printf(_("Latest checkpoint's NextXID: %u\n"), ControlFile.checkPointCopy.nextXid);
501 printf(_("Latest checkpoint's NextOID: %u\n"), ControlFile.checkPointCopy.nextOid);
502 printf(_("Latest checkpoint's NextMultiXactId: %u\n"), ControlFile.checkPointCopy.nextMulti);
503 printf(_("Database block size: %u\n"), ControlFile.blcksz);
504 printf(_("Blocks per segment of large relation: %u\n"), ControlFile.relseg_size);
505 printf(_("Maximum length of identifiers: %u\n"), ControlFile.nameDataLen);
506 printf(_("Maximum columns in an index: %u\n"), ControlFile.indexMaxKeys);
507 printf(_("Date/time type storage: %s\n"),
508 (ControlFile.enableIntTimes ? _("64-bit integers") : _("floating-point numbers")));
509 printf(_("Maximum length of locale name: %u\n"), ControlFile.localeBuflen);
510 printf(_("LC_COLLATE: %s\n"), ControlFile.lc_collate);
511 printf(_("LC_CTYPE: %s\n"), ControlFile.lc_ctype);
516 * Write out the new pg_control file.
519 RewriteControlFile(void)
522 char buffer[BLCKSZ]; /* need not be aligned */
525 * Adjust fields as needed to force an empty XLOG starting at the next
528 newXlogId = ControlFile.logId;
529 newXlogSeg = ControlFile.logSeg;
531 /* adjust in case we are changing segment size */
532 newXlogSeg *= ControlFile.xlog_seg_size;
533 newXlogSeg = (newXlogSeg + XLogSegSize - 1) / XLogSegSize;
535 /* be sure we wrap around correctly at end of a logfile */
536 NextLogSeg(newXlogId, newXlogSeg);
538 /* Now we can force the recorded xlog seg size to the right thing. */
539 ControlFile.xlog_seg_size = XLogSegSize;
541 ControlFile.checkPointCopy.redo.xlogid = newXlogId;
542 ControlFile.checkPointCopy.redo.xrecoff =
543 newXlogSeg * XLogSegSize + SizeOfXLogLongPHD;
544 ControlFile.checkPointCopy.undo = ControlFile.checkPointCopy.redo;
545 ControlFile.checkPointCopy.time = time(NULL);
547 ControlFile.state = DB_SHUTDOWNED;
548 ControlFile.time = time(NULL);
549 ControlFile.logId = newXlogId;
550 ControlFile.logSeg = newXlogSeg + 1;
551 ControlFile.checkPoint = ControlFile.checkPointCopy.redo;
552 ControlFile.prevCheckPoint.xlogid = 0;
553 ControlFile.prevCheckPoint.xrecoff = 0;
555 /* Contents are protected with a CRC */
556 INIT_CRC32(ControlFile.crc);
557 COMP_CRC32(ControlFile.crc,
558 (char *) &ControlFile,
559 offsetof(ControlFileData, crc));
560 FIN_CRC32(ControlFile.crc);
563 * We write out BLCKSZ bytes into pg_control, zero-padding the excess
564 * over sizeof(ControlFileData). This reduces the odds of
565 * premature-EOF errors when reading pg_control. We'll still fail
566 * when we check the contents of the file, but hopefully with a more
567 * specific error than "couldn't read pg_control".
569 if (sizeof(ControlFileData) > BLCKSZ)
572 _("%s: internal error -- sizeof(ControlFileData) is too large ... fix xlog.c\n"),
577 memset(buffer, 0, BLCKSZ);
578 memcpy(buffer, &ControlFile, sizeof(ControlFileData));
580 unlink(ControlFilePath);
582 fd = open(ControlFilePath, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, S_IRUSR | S_IWUSR);
585 fprintf(stderr, _("%s: could not create pg_control file: %s\n"),
586 progname, strerror(errno));
591 if (write(fd, buffer, BLCKSZ) != BLCKSZ)
593 /* if write didn't set errno, assume problem is no disk space */
596 fprintf(stderr, _("%s: could not write pg_control file: %s\n"),
597 progname, strerror(errno));
603 fprintf(stderr, _("%s: fsync error: %s\n"), progname, strerror(errno));
612 * Remove existing XLOG files
615 KillExistingXLOG(void)
619 char path[MAXPGPATH];
621 xldir = opendir(XLogDir);
624 fprintf(stderr, _("%s: could not open directory \"%s\": %s\n"),
625 progname, XLogDir, strerror(errno));
630 while ((xlde = readdir(xldir)) != NULL)
632 if (strlen(xlde->d_name) == 24 &&
633 strspn(xlde->d_name, "0123456789ABCDEF") == 24)
635 snprintf(path, MAXPGPATH, "%s/%s", XLogDir, xlde->d_name);
636 if (unlink(path) < 0)
638 fprintf(stderr, _("%s: could not delete file \"%s\": %s\n"),
639 progname, path, strerror(errno));
648 * This fix is in mingw cvs (runtime/mingwex/dirent.c rev 1.4), but
649 * not in released version
651 if (GetLastError() == ERROR_NO_MORE_FILES)
657 fprintf(stderr, _("%s: could not read from directory \"%s\": %s\n"),
658 progname, XLogDir, strerror(errno));
666 * Write an empty XLOG file, containing only the checkpoint record
667 * already set up in ControlFile.
674 XLogLongPageHeader longpage;
677 char path[MAXPGPATH];
681 /* Use malloc() to ensure buffer is MAXALIGNED */
682 buffer = (char *) malloc(BLCKSZ);
683 page = (XLogPageHeader) buffer;
684 memset(buffer, 0, BLCKSZ);
686 /* Set up the XLOG page header */
687 page->xlp_magic = XLOG_PAGE_MAGIC;
688 page->xlp_info = XLP_LONG_HEADER;
689 page->xlp_tli = ControlFile.checkPointCopy.ThisTimeLineID;
690 page->xlp_pageaddr.xlogid =
691 ControlFile.checkPointCopy.redo.xlogid;
692 page->xlp_pageaddr.xrecoff =
693 ControlFile.checkPointCopy.redo.xrecoff - SizeOfXLogLongPHD;
694 longpage = (XLogLongPageHeader) page;
695 longpage->xlp_sysid = ControlFile.system_identifier;
696 longpage->xlp_seg_size = XLogSegSize;
698 /* Insert the initial checkpoint record */
699 record = (XLogRecord *) ((char *) page + SizeOfXLogLongPHD);
700 record->xl_prev.xlogid = 0;
701 record->xl_prev.xrecoff = 0;
702 record->xl_xid = InvalidTransactionId;
703 record->xl_tot_len = SizeOfXLogRecord + sizeof(CheckPoint);
704 record->xl_len = sizeof(CheckPoint);
705 record->xl_info = XLOG_CHECKPOINT_SHUTDOWN;
706 record->xl_rmid = RM_XLOG_ID;
707 memcpy(XLogRecGetData(record), &ControlFile.checkPointCopy,
711 COMP_CRC32(crc, &ControlFile.checkPointCopy, sizeof(CheckPoint));
712 COMP_CRC32(crc, (char *) record + sizeof(pg_crc32),
713 SizeOfXLogRecord - sizeof(pg_crc32));
715 record->xl_crc = crc;
717 /* Write the first page */
718 XLogFilePath(path, ControlFile.checkPointCopy.ThisTimeLineID,
719 newXlogId, newXlogSeg);
723 fd = open(path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
727 fprintf(stderr, _("%s: could not open file \"%s\": %s\n"),
728 progname, path, strerror(errno));
733 if (write(fd, buffer, BLCKSZ) != BLCKSZ)
735 /* if write didn't set errno, assume problem is no disk space */
738 fprintf(stderr, _("%s: could not write file \"%s\": %s\n"),
739 progname, path, strerror(errno));
743 /* Fill the rest of the file with zeroes */
744 memset(buffer, 0, BLCKSZ);
745 for (nbytes = BLCKSZ; nbytes < XLogSegSize; nbytes += BLCKSZ)
748 if (write(fd, buffer, BLCKSZ) != BLCKSZ)
752 fprintf(stderr, _("%s: could not write file \"%s\": %s\n"),
753 progname, path, strerror(errno));
760 fprintf(stderr, _("%s: fsync error: %s\n"), progname, strerror(errno));
771 printf(_("%s resets the PostgreSQL transaction log.\n\n"), progname);
772 printf(_("Usage:\n %s [OPTION]... DATADIR\n\n"), progname);
773 printf(_("Options:\n"));
774 printf(_(" -f force update to be done\n"));
775 printf(_(" -l TLI,FILE,SEG force minimum WAL starting location for new transaction log\n"));
776 printf(_(" -n no update, just show extracted control values (for testing)\n"));
777 printf(_(" -o OID set next OID\n"));
778 printf(_(" -x XID set next transaction ID\n"));
779 printf(_(" -m multiXID set next multi transaction ID\n"));
780 printf(_(" --help show this help, then exit\n"));
781 printf(_(" --version output version information, then exit\n"));
782 printf(_("\nReport bugs to <pgsql-bugs@postgresql.org>.\n"));