1 /*-------------------------------------------------------------------------
4 * A utility to "zero out" the xlog when it's corrupt beyond recovery.
5 * Can also rebuild pg_control if needed.
7 * The theory of operation is fairly simple:
8 * 1. Read the existing pg_control (which will include the last
9 * checkpoint record). If it is an old format then update to
11 * 2. If pg_control is corrupt, attempt to intuit reasonable values,
12 * by scanning the old xlog if necessary.
13 * 3. Modify pg_control to reflect a "shutdown" state with a checkpoint
14 * record at the start of xlog.
15 * 4. Flush the existing xlog files and write a new segment with
16 * just a checkpoint record in it. The new segment is positioned
17 * just past the end of the old xlog, so that existing LSNs in
18 * data pages will appear to be "in the past".
19 * This is all pretty straightforward except for the intuition part of
23 * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
24 * Portions Copyright (c) 1994, Regents of the University of California
26 * $PostgreSQL: pgsql/src/bin/pg_resetxlog/pg_resetxlog.c,v 1.53 2006/10/04 00:30:05 momjian Exp $
28 *-------------------------------------------------------------------------
43 #include "access/transam.h"
44 #include "access/multixact.h"
45 #include "access/xlog_internal.h"
46 #include "catalog/catversion.h"
47 #include "catalog/pg_control.h"
53 static ControlFileData ControlFile; /* pg_control values */
54 static uint32 newXlogId,
55 newXlogSeg; /* ID/Segment of new XLOG segment */
56 static bool guessed = false; /* T if we had to guess at any values */
57 static const char *progname;
59 static bool ReadControlFile(void);
60 static void GuessControlValues(void);
61 static void PrintControlValues(bool guessed);
62 static void RewriteControlFile(void);
63 static void KillExistingXLOG(void);
64 static void WriteEmptyXLOG(void);
65 static void usage(void);
69 main(int argc, char *argv[])
73 bool noupdate = false;
74 uint32 set_xid_epoch = -1;
75 TransactionId set_xid = 0;
77 MultiXactId set_mxid = 0;
78 MultiXactOffset set_mxoff = -1;
79 uint32 minXlogTli = 0,
89 set_pglocale_pgservice(argv[0], "pg_resetxlog");
91 progname = get_progname(argv[0]);
95 if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
100 if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
102 puts("pg_resetxlog (PostgreSQL) " PG_VERSION);
108 while ((c = getopt(argc, argv, "fl:m:no:O:x:e:")) != -1)
121 set_xid_epoch = strtoul(optarg, &endptr, 0);
122 if (endptr == optarg || *endptr != '\0')
124 fprintf(stderr, _("%s: invalid argument for option -e\n"), progname);
125 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
128 if (set_xid_epoch == -1)
130 fprintf(stderr, _("%s: transaction ID epoch (-e) must not be -1\n"), progname);
136 set_xid = strtoul(optarg, &endptr, 0);
137 if (endptr == optarg || *endptr != '\0')
139 fprintf(stderr, _("%s: invalid argument for option -x\n"), progname);
140 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
145 fprintf(stderr, _("%s: transaction ID (-x) must not be 0\n"), progname);
151 set_oid = strtoul(optarg, &endptr, 0);
152 if (endptr == optarg || *endptr != '\0')
154 fprintf(stderr, _("%s: invalid argument for option -o\n"), progname);
155 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
160 fprintf(stderr, _("%s: OID (-o) must not be 0\n"), progname);
166 set_mxid = strtoul(optarg, &endptr, 0);
167 if (endptr == optarg || *endptr != '\0')
169 fprintf(stderr, _("%s: invalid argument for option -m\n"), progname);
170 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
175 fprintf(stderr, _("%s: multitransaction ID (-m) must not be 0\n"), progname);
181 set_mxoff = strtoul(optarg, &endptr, 0);
182 if (endptr == optarg || *endptr != '\0')
184 fprintf(stderr, _("%s: invalid argument for option -O\n"), progname);
185 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
190 fprintf(stderr, _("%s: multitransaction offset (-O) must not be -1\n"), progname);
196 minXlogTli = strtoul(optarg, &endptr, 0);
197 if (endptr == optarg || *endptr != ',')
199 fprintf(stderr, _("%s: invalid argument for option -l\n"), progname);
200 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
203 minXlogId = strtoul(endptr + 1, &endptr2, 0);
204 if (endptr2 == endptr + 1 || *endptr2 != ',')
206 fprintf(stderr, _("%s: invalid argument for option -l\n"), progname);
207 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
210 minXlogSeg = strtoul(endptr2 + 1, &endptr3, 0);
211 if (endptr3 == endptr2 + 1 || *endptr3 != '\0')
213 fprintf(stderr, _("%s: invalid argument for option -l\n"), progname);
214 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
220 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
227 fprintf(stderr, _("%s: no data directory specified\n"), progname);
228 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
233 * Don't allow pg_resetxlog to be run as root, to avoid overwriting the
234 * ownership of files in the data directory. We need only check for root
235 * -- any other user won't have sufficient permissions to modify files in
236 * the data directory.
241 fprintf(stderr, _("%s: cannot be executed by \"root\"\n"),
243 fprintf(stderr, _("You must run %s as the PostgreSQL superuser.\n"),
249 DataDir = argv[optind];
251 if (chdir(DataDir) < 0)
253 fprintf(stderr, _("%s: could not change directory to \"%s\": %s\n"),
254 progname, DataDir, strerror(errno));
259 * Check for a postmaster lock file --- if there is one, refuse to
260 * proceed, on grounds we might be interfering with a live installation.
262 snprintf(path, MAXPGPATH, "%s/postmaster.pid", DataDir);
264 if ((fd = open(path, O_RDONLY, 0)) < 0)
268 fprintf(stderr, _("%s: could not open file \"%s\" for reading: %s\n"), progname, path, strerror(errno));
274 fprintf(stderr, _("%s: lock file \"%s\" exists\n"
275 "Is a server running? If not, delete the lock file and try again.\n"),
281 * Attempt to read the existing pg_control file
283 if (!ReadControlFile())
284 GuessControlValues();
287 * Adjust fields if required by switches. (Do this now so that printout,
288 * if any, includes these values.)
290 if (set_xid_epoch != -1)
291 ControlFile.checkPointCopy.nextXidEpoch = set_xid_epoch;
294 ControlFile.checkPointCopy.nextXid = set_xid;
297 ControlFile.checkPointCopy.nextOid = set_oid;
300 ControlFile.checkPointCopy.nextMulti = set_mxid;
303 ControlFile.checkPointCopy.nextMultiOffset = set_mxoff;
305 if (minXlogTli > ControlFile.checkPointCopy.ThisTimeLineID)
306 ControlFile.checkPointCopy.ThisTimeLineID = minXlogTli;
308 if (minXlogId > ControlFile.logId ||
309 (minXlogId == ControlFile.logId &&
310 minXlogSeg > ControlFile.logSeg))
312 ControlFile.logId = minXlogId;
313 ControlFile.logSeg = minXlogSeg;
317 * If we had to guess anything, and -f was not given, just print the
318 * guessed values and exit. Also print if -n is given.
320 if ((guessed && !force) || noupdate)
322 PrintControlValues(guessed);
325 printf(_("\nIf these values seem acceptable, use -f to force reset.\n"));
333 * Don't reset from a dirty pg_control without -f, either.
335 if (ControlFile.state != DB_SHUTDOWNED && !force)
337 printf(_("The database server was not shut down cleanly.\n"
338 "Resetting the transaction log may cause data to be lost.\n"
339 "If you want to proceed anyway, use -f to force reset.\n"));
344 * Else, do the dirty deed.
346 RewriteControlFile();
350 printf(_("Transaction log reset\n"));
356 * Try to read the existing pg_control file.
358 * This routine is also responsible for updating old pg_control versions
359 * to the current format. (Currently we don't do anything of the sort.)
362 ReadControlFile(void)
369 if ((fd = open(XLOG_CONTROL_FILE, O_RDONLY, 0)) < 0)
372 * If pg_control is not there at all, or we can't read it, the odds
373 * are we've been handed a bad DataDir path, so give up. User can do
374 * "touch pg_control" to force us to proceed.
376 fprintf(stderr, _("%s: could not open file \"%s\" for reading: %s\n"),
377 progname, XLOG_CONTROL_FILE, strerror(errno));
379 fprintf(stderr, _("If you are sure the data directory path is correct, execute\n"
386 /* Use malloc to ensure we have a maxaligned buffer */
387 buffer = (char *) malloc(PG_CONTROL_SIZE);
389 len = read(fd, buffer, PG_CONTROL_SIZE);
392 fprintf(stderr, _("%s: could not read file \"%s\": %s\n"),
393 progname, XLOG_CONTROL_FILE, strerror(errno));
398 if (len >= sizeof(ControlFileData) &&
399 ((ControlFileData *) buffer)->pg_control_version == PG_CONTROL_VERSION)
405 offsetof(ControlFileData, crc));
408 if (EQ_CRC32(crc, ((ControlFileData *) buffer)->crc))
411 memcpy(&ControlFile, buffer, sizeof(ControlFile));
415 fprintf(stderr, _("%s: pg_control exists but has invalid CRC; proceed with caution\n"),
417 /* We will use the data anyway, but treat it as guessed. */
418 memcpy(&ControlFile, buffer, sizeof(ControlFile));
423 /* Looks like it's a mess. */
424 fprintf(stderr, _("%s: pg_control exists but is broken or unknown version; ignoring it\n"),
431 * Guess at pg_control values when we can't read the old ones.
434 GuessControlValues(void)
436 uint64 sysidentifier;
441 * Set up a completely default set of pg_control values.
444 memset(&ControlFile, 0, sizeof(ControlFile));
446 ControlFile.pg_control_version = PG_CONTROL_VERSION;
447 ControlFile.catalog_version_no = CATALOG_VERSION_NO;
450 * Create a new unique installation identifier, since we can no longer use
451 * any old XLOG records. See notes in xlog.c about the algorithm.
453 gettimeofday(&tv, NULL);
454 sysidentifier = ((uint64) tv.tv_sec) << 32;
455 sysidentifier |= (uint32) (tv.tv_sec | tv.tv_usec);
457 ControlFile.system_identifier = sysidentifier;
459 ControlFile.checkPointCopy.redo.xlogid = 0;
460 ControlFile.checkPointCopy.redo.xrecoff = SizeOfXLogLongPHD;
461 ControlFile.checkPointCopy.undo = ControlFile.checkPointCopy.redo;
462 ControlFile.checkPointCopy.ThisTimeLineID = 1;
463 ControlFile.checkPointCopy.nextXidEpoch = 0;
464 ControlFile.checkPointCopy.nextXid = (TransactionId) 514; /* XXX */
465 ControlFile.checkPointCopy.nextOid = FirstBootstrapObjectId;
466 ControlFile.checkPointCopy.nextMulti = FirstMultiXactId;
467 ControlFile.checkPointCopy.nextMultiOffset = 0;
468 ControlFile.checkPointCopy.time = time(NULL);
470 ControlFile.state = DB_SHUTDOWNED;
471 ControlFile.time = time(NULL);
472 ControlFile.logId = 0;
473 ControlFile.logSeg = 1;
474 ControlFile.checkPoint = ControlFile.checkPointCopy.redo;
476 ControlFile.maxAlign = MAXIMUM_ALIGNOF;
477 ControlFile.floatFormat = FLOATFORMAT_VALUE;
478 ControlFile.blcksz = BLCKSZ;
479 ControlFile.relseg_size = RELSEG_SIZE;
480 ControlFile.xlog_blcksz = XLOG_BLCKSZ;
481 ControlFile.xlog_seg_size = XLOG_SEG_SIZE;
482 ControlFile.nameDataLen = NAMEDATALEN;
483 ControlFile.indexMaxKeys = INDEX_MAX_KEYS;
484 #ifdef HAVE_INT64_TIMESTAMP
485 ControlFile.enableIntTimes = TRUE;
487 ControlFile.enableIntTimes = FALSE;
489 ControlFile.localeBuflen = LOCALE_NAME_BUFLEN;
491 localeptr = setlocale(LC_COLLATE, "");
494 fprintf(stderr, _("%s: invalid LC_COLLATE setting\n"), progname);
497 StrNCpy(ControlFile.lc_collate, localeptr, LOCALE_NAME_BUFLEN);
498 localeptr = setlocale(LC_CTYPE, "");
501 fprintf(stderr, _("%s: invalid LC_CTYPE setting\n"), progname);
504 StrNCpy(ControlFile.lc_ctype, localeptr, LOCALE_NAME_BUFLEN);
507 * XXX eventually, should try to grovel through old XLOG to develop more
508 * accurate values for TimeLineID, nextXID, etc.
514 * Print the guessed pg_control values when we had to guess.
516 * NB: this display should be just those fields that will not be
517 * reset by RewriteControlFile().
520 PrintControlValues(bool guessed)
522 char sysident_str[32];
525 printf(_("Guessed pg_control values:\n\n"));
527 printf(_("pg_control values:\n\n"));
530 * Format system_identifier separately to keep platform-dependent format
531 * code out of the translatable message string.
533 snprintf(sysident_str, sizeof(sysident_str), UINT64_FORMAT,
534 ControlFile.system_identifier);
536 printf(_("pg_control version number: %u\n"),
537 ControlFile.pg_control_version);
538 printf(_("Catalog version number: %u\n"),
539 ControlFile.catalog_version_no);
540 printf(_("Database system identifier: %s\n"),
542 printf(_("Current log file ID: %u\n"),
544 printf(_("Next log file segment: %u\n"),
546 printf(_("Latest checkpoint's TimeLineID: %u\n"),
547 ControlFile.checkPointCopy.ThisTimeLineID);
548 printf(_("Latest checkpoint's NextXID: %u/%u\n"),
549 ControlFile.checkPointCopy.nextXidEpoch,
550 ControlFile.checkPointCopy.nextXid);
551 printf(_("Latest checkpoint's NextOID: %u\n"),
552 ControlFile.checkPointCopy.nextOid);
553 printf(_("Latest checkpoint's NextMultiXactId: %u\n"),
554 ControlFile.checkPointCopy.nextMulti);
555 printf(_("Latest checkpoint's NextMultiOffset: %u\n"),
556 ControlFile.checkPointCopy.nextMultiOffset);
557 printf(_("Maximum data alignment: %u\n"),
558 ControlFile.maxAlign);
559 /* we don't print floatFormat since can't say much useful about it */
560 printf(_("Database block size: %u\n"),
562 printf(_("Blocks per segment of large relation: %u\n"),
563 ControlFile.relseg_size);
564 printf(_("WAL block size: %u\n"),
565 ControlFile.xlog_blcksz);
566 printf(_("Bytes per WAL segment: %u\n"),
567 ControlFile.xlog_seg_size);
568 printf(_("Maximum length of identifiers: %u\n"),
569 ControlFile.nameDataLen);
570 printf(_("Maximum columns in an index: %u\n"),
571 ControlFile.indexMaxKeys);
572 printf(_("Date/time type storage: %s\n"),
573 (ControlFile.enableIntTimes ? _("64-bit integers") : _("floating-point numbers")));
574 printf(_("Maximum length of locale name: %u\n"),
575 ControlFile.localeBuflen);
576 printf(_("LC_COLLATE: %s\n"),
577 ControlFile.lc_collate);
578 printf(_("LC_CTYPE: %s\n"),
579 ControlFile.lc_ctype);
584 * Write out the new pg_control file.
587 RewriteControlFile(void)
590 char buffer[PG_CONTROL_SIZE]; /* need not be aligned */
593 * Adjust fields as needed to force an empty XLOG starting at the next
596 newXlogId = ControlFile.logId;
597 newXlogSeg = ControlFile.logSeg;
599 /* adjust in case we are changing segment size */
600 newXlogSeg *= ControlFile.xlog_seg_size;
601 newXlogSeg = (newXlogSeg + XLogSegSize - 1) / XLogSegSize;
603 /* be sure we wrap around correctly at end of a logfile */
604 NextLogSeg(newXlogId, newXlogSeg);
606 /* Now we can force the recorded xlog seg size to the right thing. */
607 ControlFile.xlog_seg_size = XLogSegSize;
609 ControlFile.checkPointCopy.redo.xlogid = newXlogId;
610 ControlFile.checkPointCopy.redo.xrecoff =
611 newXlogSeg * XLogSegSize + SizeOfXLogLongPHD;
612 ControlFile.checkPointCopy.undo = ControlFile.checkPointCopy.redo;
613 ControlFile.checkPointCopy.time = time(NULL);
615 ControlFile.state = DB_SHUTDOWNED;
616 ControlFile.time = time(NULL);
617 ControlFile.logId = newXlogId;
618 ControlFile.logSeg = newXlogSeg + 1;
619 ControlFile.checkPoint = ControlFile.checkPointCopy.redo;
620 ControlFile.prevCheckPoint.xlogid = 0;
621 ControlFile.prevCheckPoint.xrecoff = 0;
622 ControlFile.minRecoveryPoint.xlogid = 0;
623 ControlFile.minRecoveryPoint.xrecoff = 0;
625 /* Contents are protected with a CRC */
626 INIT_CRC32(ControlFile.crc);
627 COMP_CRC32(ControlFile.crc,
628 (char *) &ControlFile,
629 offsetof(ControlFileData, crc));
630 FIN_CRC32(ControlFile.crc);
633 * We write out PG_CONTROL_SIZE bytes into pg_control, zero-padding the
634 * excess over sizeof(ControlFileData). This reduces the odds of
635 * premature-EOF errors when reading pg_control. We'll still fail when we
636 * check the contents of the file, but hopefully with a more specific
637 * error than "couldn't read pg_control".
639 if (sizeof(ControlFileData) > PG_CONTROL_SIZE)
642 _("%s: internal error -- sizeof(ControlFileData) is too large ... fix PG_CONTROL_SIZE\n"),
647 memset(buffer, 0, PG_CONTROL_SIZE);
648 memcpy(buffer, &ControlFile, sizeof(ControlFileData));
650 unlink(XLOG_CONTROL_FILE);
652 fd = open(XLOG_CONTROL_FILE,
653 O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
657 fprintf(stderr, _("%s: could not create pg_control file: %s\n"),
658 progname, strerror(errno));
663 if (write(fd, buffer, PG_CONTROL_SIZE) != PG_CONTROL_SIZE)
665 /* if write didn't set errno, assume problem is no disk space */
668 fprintf(stderr, _("%s: could not write pg_control file: %s\n"),
669 progname, strerror(errno));
675 fprintf(stderr, _("%s: fsync error: %s\n"), progname, strerror(errno));
684 * Remove existing XLOG files
687 KillExistingXLOG(void)
691 char path[MAXPGPATH];
693 xldir = opendir(XLOGDIR);
696 fprintf(stderr, _("%s: could not open directory \"%s\": %s\n"),
697 progname, XLOGDIR, strerror(errno));
702 while ((xlde = readdir(xldir)) != NULL)
704 if (strlen(xlde->d_name) == 24 &&
705 strspn(xlde->d_name, "0123456789ABCDEF") == 24)
707 snprintf(path, MAXPGPATH, "%s/%s", XLOGDIR, xlde->d_name);
708 if (unlink(path) < 0)
710 fprintf(stderr, _("%s: could not delete file \"%s\": %s\n"),
711 progname, path, strerror(errno));
720 * This fix is in mingw cvs (runtime/mingwex/dirent.c rev 1.4), but not in
723 if (GetLastError() == ERROR_NO_MORE_FILES)
729 fprintf(stderr, _("%s: could not read from directory \"%s\": %s\n"),
730 progname, XLOGDIR, strerror(errno));
738 * Write an empty XLOG file, containing only the checkpoint record
739 * already set up in ControlFile.
746 XLogLongPageHeader longpage;
749 char path[MAXPGPATH];
753 /* Use malloc() to ensure buffer is MAXALIGNED */
754 buffer = (char *) malloc(XLOG_BLCKSZ);
755 page = (XLogPageHeader) buffer;
756 memset(buffer, 0, XLOG_BLCKSZ);
758 /* Set up the XLOG page header */
759 page->xlp_magic = XLOG_PAGE_MAGIC;
760 page->xlp_info = XLP_LONG_HEADER;
761 page->xlp_tli = ControlFile.checkPointCopy.ThisTimeLineID;
762 page->xlp_pageaddr.xlogid =
763 ControlFile.checkPointCopy.redo.xlogid;
764 page->xlp_pageaddr.xrecoff =
765 ControlFile.checkPointCopy.redo.xrecoff - SizeOfXLogLongPHD;
766 longpage = (XLogLongPageHeader) page;
767 longpage->xlp_sysid = ControlFile.system_identifier;
768 longpage->xlp_seg_size = XLogSegSize;
769 longpage->xlp_xlog_blcksz = XLOG_BLCKSZ;
771 /* Insert the initial checkpoint record */
772 record = (XLogRecord *) ((char *) page + SizeOfXLogLongPHD);
773 record->xl_prev.xlogid = 0;
774 record->xl_prev.xrecoff = 0;
775 record->xl_xid = InvalidTransactionId;
776 record->xl_tot_len = SizeOfXLogRecord + sizeof(CheckPoint);
777 record->xl_len = sizeof(CheckPoint);
778 record->xl_info = XLOG_CHECKPOINT_SHUTDOWN;
779 record->xl_rmid = RM_XLOG_ID;
780 memcpy(XLogRecGetData(record), &ControlFile.checkPointCopy,
784 COMP_CRC32(crc, &ControlFile.checkPointCopy, sizeof(CheckPoint));
785 COMP_CRC32(crc, (char *) record + sizeof(pg_crc32),
786 SizeOfXLogRecord - sizeof(pg_crc32));
788 record->xl_crc = crc;
790 /* Write the first page */
791 XLogFilePath(path, ControlFile.checkPointCopy.ThisTimeLineID,
792 newXlogId, newXlogSeg);
796 fd = open(path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
800 fprintf(stderr, _("%s: could not open file \"%s\": %s\n"),
801 progname, path, strerror(errno));
806 if (write(fd, buffer, XLOG_BLCKSZ) != XLOG_BLCKSZ)
808 /* if write didn't set errno, assume problem is no disk space */
811 fprintf(stderr, _("%s: could not write file \"%s\": %s\n"),
812 progname, path, strerror(errno));
816 /* Fill the rest of the file with zeroes */
817 memset(buffer, 0, XLOG_BLCKSZ);
818 for (nbytes = XLOG_BLCKSZ; nbytes < XLogSegSize; nbytes += XLOG_BLCKSZ)
821 if (write(fd, buffer, XLOG_BLCKSZ) != XLOG_BLCKSZ)
825 fprintf(stderr, _("%s: could not write file \"%s\": %s\n"),
826 progname, path, strerror(errno));
833 fprintf(stderr, _("%s: fsync error: %s\n"), progname, strerror(errno));
844 printf(_("%s resets the PostgreSQL transaction log.\n\n"), progname);
845 printf(_("Usage:\n %s [OPTION]... DATADIR\n\n"), progname);
846 printf(_("Options:\n"));
847 printf(_(" -f force update to be done\n"));
848 printf(_(" -l TLI,FILE,SEG force minimum WAL starting location for new transaction log\n"));
849 printf(_(" -m XID set next multitransaction ID\n"));
850 printf(_(" -n no update, just show extracted control values (for testing)\n"));
851 printf(_(" -o OID set next OID\n"));
852 printf(_(" -O OFFSET set next multitransaction offset\n"));
853 printf(_(" -x XID set next transaction ID\n"));
854 printf(_(" -e XIDEPOCH set next transaction ID epoch\n"));
855 printf(_(" --help show this help, then exit\n"));
856 printf(_(" --version output version information, then exit\n"));
857 printf(_("\nReport bugs to <pgsql-bugs@postgresql.org>.\n"));