1 /*-------------------------------------------------------------------------
4 * A utility to "zero out" the xlog when it's corrupt beyond recovery.
5 * Can also rebuild pg_control if needed.
7 * The theory of operation is fairly simple:
8 * 1. Read the existing pg_control (which will include the last
9 * checkpoint record). If it is an old format then update to
11 * 2. If pg_control is corrupt, attempt to intuit reasonable values,
12 * by scanning the old xlog if necessary.
13 * 3. Modify pg_control to reflect a "shutdown" state with a checkpoint
14 * record at the start of xlog.
15 * 4. Flush the existing xlog files and write a new segment with
16 * just a checkpoint record in it. The new segment is positioned
17 * just past the end of the old xlog, so that existing LSNs in
18 * data pages will appear to be "in the past".
19 * This is all pretty straightforward except for the intuition part of
23 * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
24 * Portions Copyright (c) 1994, Regents of the University of California
26 * $PostgreSQL: pgsql/src/bin/pg_resetxlog/pg_resetxlog.c,v 1.41 2006/04/03 23:35:04 tgl Exp $
28 *-------------------------------------------------------------------------
43 #include "access/multixact.h"
44 #include "access/xlog.h"
45 #include "access/xlog_internal.h"
46 #include "catalog/catversion.h"
47 #include "catalog/pg_control.h"
53 static ControlFileData ControlFile; /* pg_control values */
54 static uint32 newXlogId,
55 newXlogSeg; /* ID/Segment of new XLOG segment */
56 static bool guessed = false; /* T if we had to guess at any values */
57 static const char *progname;
59 static bool ReadControlFile(void);
60 static void GuessControlValues(void);
61 static void PrintControlValues(bool guessed);
62 static void RewriteControlFile(void);
63 static void KillExistingXLOG(void);
64 static void WriteEmptyXLOG(void);
65 static void usage(void);
69 main(int argc, char *argv[])
73 bool noupdate = false;
74 TransactionId set_xid = 0;
76 MultiXactId set_mxid = 0;
77 MultiXactOffset set_mxoff = -1;
78 uint32 minXlogTli = 0,
88 set_pglocale_pgservice(argv[0], "pg_resetxlog");
90 progname = get_progname(argv[0]);
94 if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
99 if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
101 puts("pg_resetxlog (PostgreSQL) " PG_VERSION);
107 while ((c = getopt(argc, argv, "fl:m:no:O:x:")) != -1)
120 set_xid = strtoul(optarg, &endptr, 0);
121 if (endptr == optarg || *endptr != '\0')
123 fprintf(stderr, _("%s: invalid argument for option -x\n"), progname);
124 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
129 fprintf(stderr, _("%s: transaction ID (-x) must not be 0\n"), progname);
135 set_oid = strtoul(optarg, &endptr, 0);
136 if (endptr == optarg || *endptr != '\0')
138 fprintf(stderr, _("%s: invalid argument for option -o\n"), progname);
139 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
144 fprintf(stderr, _("%s: OID (-o) must not be 0\n"), progname);
150 set_mxid = strtoul(optarg, &endptr, 0);
151 if (endptr == optarg || *endptr != '\0')
153 fprintf(stderr, _("%s: invalid argument for option -m\n"), progname);
154 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
159 fprintf(stderr, _("%s: multitransaction ID (-m) must not be 0\n"), progname);
165 set_mxoff = strtoul(optarg, &endptr, 0);
166 if (endptr == optarg || *endptr != '\0')
168 fprintf(stderr, _("%s: invalid argument for option -O\n"), progname);
169 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
174 fprintf(stderr, _("%s: multitransaction offset (-O) must not be -1\n"), progname);
180 minXlogTli = strtoul(optarg, &endptr, 0);
181 if (endptr == optarg || *endptr != ',')
183 fprintf(stderr, _("%s: invalid argument for option -l\n"), progname);
184 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
187 minXlogId = strtoul(endptr + 1, &endptr2, 0);
188 if (endptr2 == endptr + 1 || *endptr2 != ',')
190 fprintf(stderr, _("%s: invalid argument for option -l\n"), progname);
191 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
194 minXlogSeg = strtoul(endptr2 + 1, &endptr3, 0);
195 if (endptr3 == endptr2 + 1 || *endptr3 != '\0')
197 fprintf(stderr, _("%s: invalid argument for option -l\n"), progname);
198 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
204 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
211 fprintf(stderr, _("%s: no data directory specified\n"), progname);
212 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
217 * Don't allow pg_resetxlog to be run as root, to avoid overwriting the
218 * ownership of files in the data directory. We need only check for root
219 * -- any other user won't have sufficient permissions to modify files in
220 * the data directory.
225 fprintf(stderr, _("%s: cannot be executed by \"root\"\n"),
227 fprintf(stderr, _("You must run %s as the PostgreSQL superuser.\n"),
233 DataDir = argv[optind];
235 if (chdir(DataDir) < 0)
237 fprintf(stderr, _("%s: could not change directory to \"%s\": %s\n"),
238 progname, DataDir, strerror(errno));
243 * Check for a postmaster lock file --- if there is one, refuse to
244 * proceed, on grounds we might be interfering with a live installation.
246 snprintf(path, MAXPGPATH, "%s/postmaster.pid", DataDir);
248 if ((fd = open(path, O_RDONLY)) < 0)
252 fprintf(stderr, _("%s: could not open file \"%s\" for reading: %s\n"), progname, path, strerror(errno));
258 fprintf(stderr, _("%s: lock file \"%s\" exists\n"
259 "Is a server running? If not, delete the lock file and try again.\n"),
265 * Attempt to read the existing pg_control file
267 if (!ReadControlFile())
268 GuessControlValues();
271 * Adjust fields if required by switches. (Do this now so that printout,
272 * if any, includes these values.)
275 ControlFile.checkPointCopy.nextXid = set_xid;
278 ControlFile.checkPointCopy.nextOid = set_oid;
281 ControlFile.checkPointCopy.nextMulti = set_mxid;
284 ControlFile.checkPointCopy.nextMultiOffset = set_mxoff;
286 if (minXlogTli > ControlFile.checkPointCopy.ThisTimeLineID)
287 ControlFile.checkPointCopy.ThisTimeLineID = minXlogTli;
289 if (minXlogId > ControlFile.logId ||
290 (minXlogId == ControlFile.logId &&
291 minXlogSeg > ControlFile.logSeg))
293 ControlFile.logId = minXlogId;
294 ControlFile.logSeg = minXlogSeg;
298 * If we had to guess anything, and -f was not given, just print the
299 * guessed values and exit. Also print if -n is given.
301 if ((guessed && !force) || noupdate)
303 PrintControlValues(guessed);
306 printf(_("\nIf these values seem acceptable, use -f to force reset.\n"));
314 * Don't reset from a dirty pg_control without -f, either.
316 if (ControlFile.state != DB_SHUTDOWNED && !force)
318 printf(_("The database server was not shut down cleanly.\n"
319 "Resetting the transaction log may cause data to be lost.\n"
320 "If you want to proceed anyway, use -f to force reset.\n"));
325 * Else, do the dirty deed.
327 RewriteControlFile();
331 printf(_("Transaction log reset\n"));
337 * Try to read the existing pg_control file.
339 * This routine is also responsible for updating old pg_control versions
340 * to the current format. (Currently we don't do anything of the sort.)
343 ReadControlFile(void)
350 if ((fd = open(XLOG_CONTROL_FILE, O_RDONLY)) < 0)
353 * If pg_control is not there at all, or we can't read it, the odds
354 * are we've been handed a bad DataDir path, so give up. User can do
355 * "touch pg_control" to force us to proceed.
357 fprintf(stderr, _("%s: could not open file \"%s\" for reading: %s\n"),
358 progname, XLOG_CONTROL_FILE, strerror(errno));
360 fprintf(stderr, _("If you are sure the data directory path is correct, execute\n"
367 /* Use malloc to ensure we have a maxaligned buffer */
368 buffer = (char *) malloc(BLCKSZ);
370 len = read(fd, buffer, BLCKSZ);
373 fprintf(stderr, _("%s: could not read file \"%s\": %s\n"),
374 progname, XLOG_CONTROL_FILE, strerror(errno));
379 if (len >= sizeof(ControlFileData) &&
380 ((ControlFileData *) buffer)->pg_control_version == PG_CONTROL_VERSION)
386 offsetof(ControlFileData, crc));
389 if (EQ_CRC32(crc, ((ControlFileData *) buffer)->crc))
392 memcpy(&ControlFile, buffer, sizeof(ControlFile));
396 fprintf(stderr, _("%s: pg_control exists but has invalid CRC; proceed with caution\n"),
398 /* We will use the data anyway, but treat it as guessed. */
399 memcpy(&ControlFile, buffer, sizeof(ControlFile));
404 /* Looks like it's a mess. */
405 fprintf(stderr, _("%s: pg_control exists but is broken or unknown version; ignoring it\n"),
412 * Guess at pg_control values when we can't read the old ones.
415 GuessControlValues(void)
417 uint64 sysidentifier;
422 * Set up a completely default set of pg_control values.
425 memset(&ControlFile, 0, sizeof(ControlFile));
427 ControlFile.pg_control_version = PG_CONTROL_VERSION;
428 ControlFile.catalog_version_no = CATALOG_VERSION_NO;
431 * Create a new unique installation identifier, since we can no longer use
432 * any old XLOG records. See notes in xlog.c about the algorithm.
434 gettimeofday(&tv, NULL);
435 sysidentifier = ((uint64) tv.tv_sec) << 32;
436 sysidentifier |= (uint32) (tv.tv_sec | tv.tv_usec);
438 ControlFile.system_identifier = sysidentifier;
440 ControlFile.checkPointCopy.redo.xlogid = 0;
441 ControlFile.checkPointCopy.redo.xrecoff = SizeOfXLogLongPHD;
442 ControlFile.checkPointCopy.undo = ControlFile.checkPointCopy.redo;
443 ControlFile.checkPointCopy.ThisTimeLineID = 1;
444 ControlFile.checkPointCopy.nextXid = (TransactionId) 514; /* XXX */
445 ControlFile.checkPointCopy.nextOid = FirstBootstrapObjectId;
446 ControlFile.checkPointCopy.nextMulti = FirstMultiXactId;
447 ControlFile.checkPointCopy.nextMultiOffset = 0;
448 ControlFile.checkPointCopy.time = time(NULL);
450 ControlFile.state = DB_SHUTDOWNED;
451 ControlFile.time = time(NULL);
452 ControlFile.logId = 0;
453 ControlFile.logSeg = 1;
454 ControlFile.checkPoint = ControlFile.checkPointCopy.redo;
456 ControlFile.maxAlign = MAXIMUM_ALIGNOF;
457 ControlFile.floatFormat = FLOATFORMAT_VALUE;
458 ControlFile.blcksz = BLCKSZ;
459 ControlFile.relseg_size = RELSEG_SIZE;
460 ControlFile.xlog_blcksz = XLOG_BLCKSZ;
461 ControlFile.xlog_seg_size = XLOG_SEG_SIZE;
462 ControlFile.nameDataLen = NAMEDATALEN;
463 ControlFile.indexMaxKeys = INDEX_MAX_KEYS;
464 #ifdef HAVE_INT64_TIMESTAMP
465 ControlFile.enableIntTimes = TRUE;
467 ControlFile.enableIntTimes = FALSE;
469 ControlFile.localeBuflen = LOCALE_NAME_BUFLEN;
471 localeptr = setlocale(LC_COLLATE, "");
474 fprintf(stderr, _("%s: invalid LC_COLLATE setting\n"), progname);
477 StrNCpy(ControlFile.lc_collate, localeptr, LOCALE_NAME_BUFLEN);
478 localeptr = setlocale(LC_CTYPE, "");
481 fprintf(stderr, _("%s: invalid LC_CTYPE setting\n"), progname);
484 StrNCpy(ControlFile.lc_ctype, localeptr, LOCALE_NAME_BUFLEN);
487 * XXX eventually, should try to grovel through old XLOG to develop more
488 * accurate values for TimeLineID, nextXID, etc.
494 * Print the guessed pg_control values when we had to guess.
496 * NB: this display should be just those fields that will not be
497 * reset by RewriteControlFile().
500 PrintControlValues(bool guessed)
502 char sysident_str[32];
505 printf(_("Guessed pg_control values:\n\n"));
507 printf(_("pg_control values:\n\n"));
510 * Format system_identifier separately to keep platform-dependent format
511 * code out of the translatable message string.
513 snprintf(sysident_str, sizeof(sysident_str), UINT64_FORMAT,
514 ControlFile.system_identifier);
516 printf(_("pg_control version number: %u\n"), ControlFile.pg_control_version);
517 printf(_("Catalog version number: %u\n"), ControlFile.catalog_version_no);
518 printf(_("Database system identifier: %s\n"), sysident_str);
519 printf(_("Current log file ID: %u\n"), ControlFile.logId);
520 printf(_("Next log file segment: %u\n"), ControlFile.logSeg);
521 printf(_("Latest checkpoint's TimeLineID: %u\n"), ControlFile.checkPointCopy.ThisTimeLineID);
522 printf(_("Latest checkpoint's NextXID: %u\n"), ControlFile.checkPointCopy.nextXid);
523 printf(_("Latest checkpoint's NextOID: %u\n"), ControlFile.checkPointCopy.nextOid);
524 printf(_("Latest checkpoint's NextMultiXactId: %u\n"), ControlFile.checkPointCopy.nextMulti);
525 printf(_("Latest checkpoint's NextMultiOffset: %u\n"), ControlFile.checkPointCopy.nextMultiOffset);
526 printf(_("Maximum data alignment: %u\n"), ControlFile.maxAlign);
527 /* we don't print floatFormat since can't say much useful about it */
528 printf(_("Database block size: %u\n"), ControlFile.blcksz);
529 printf(_("Blocks per segment of large relation: %u\n"), ControlFile.relseg_size);
530 printf(_("WAL block size: %u\n"), ControlFile.xlog_blcksz);
531 printf(_("Bytes per WAL segment: %u\n"), ControlFile.xlog_seg_size);
532 printf(_("Maximum length of identifiers: %u\n"), ControlFile.nameDataLen);
533 printf(_("Maximum columns in an index: %u\n"), ControlFile.indexMaxKeys);
534 printf(_("Date/time type storage: %s\n"),
535 (ControlFile.enableIntTimes ? _("64-bit integers") : _("floating-point numbers")));
536 printf(_("Maximum length of locale name: %u\n"), ControlFile.localeBuflen);
537 printf(_("LC_COLLATE: %s\n"), ControlFile.lc_collate);
538 printf(_("LC_CTYPE: %s\n"), ControlFile.lc_ctype);
543 * Write out the new pg_control file.
546 RewriteControlFile(void)
549 char buffer[BLCKSZ]; /* need not be aligned */
552 * Adjust fields as needed to force an empty XLOG starting at the next
555 newXlogId = ControlFile.logId;
556 newXlogSeg = ControlFile.logSeg;
558 /* adjust in case we are changing segment size */
559 newXlogSeg *= ControlFile.xlog_seg_size;
560 newXlogSeg = (newXlogSeg + XLogSegSize - 1) / XLogSegSize;
562 /* be sure we wrap around correctly at end of a logfile */
563 NextLogSeg(newXlogId, newXlogSeg);
565 /* Now we can force the recorded xlog seg size to the right thing. */
566 ControlFile.xlog_seg_size = XLogSegSize;
568 ControlFile.checkPointCopy.redo.xlogid = newXlogId;
569 ControlFile.checkPointCopy.redo.xrecoff =
570 newXlogSeg * XLogSegSize + SizeOfXLogLongPHD;
571 ControlFile.checkPointCopy.undo = ControlFile.checkPointCopy.redo;
572 ControlFile.checkPointCopy.time = time(NULL);
574 ControlFile.state = DB_SHUTDOWNED;
575 ControlFile.time = time(NULL);
576 ControlFile.logId = newXlogId;
577 ControlFile.logSeg = newXlogSeg + 1;
578 ControlFile.checkPoint = ControlFile.checkPointCopy.redo;
579 ControlFile.prevCheckPoint.xlogid = 0;
580 ControlFile.prevCheckPoint.xrecoff = 0;
582 /* Contents are protected with a CRC */
583 INIT_CRC32(ControlFile.crc);
584 COMP_CRC32(ControlFile.crc,
585 (char *) &ControlFile,
586 offsetof(ControlFileData, crc));
587 FIN_CRC32(ControlFile.crc);
590 * We write out BLCKSZ bytes into pg_control, zero-padding the excess over
591 * sizeof(ControlFileData). This reduces the odds of premature-EOF errors
592 * when reading pg_control. We'll still fail when we check the contents
593 * of the file, but hopefully with a more specific error than "couldn't
596 if (sizeof(ControlFileData) > BLCKSZ)
599 _("%s: internal error -- sizeof(ControlFileData) is too large ... fix xlog.c\n"),
604 memset(buffer, 0, BLCKSZ);
605 memcpy(buffer, &ControlFile, sizeof(ControlFileData));
607 unlink(XLOG_CONTROL_FILE);
609 fd = open(XLOG_CONTROL_FILE,
610 O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
614 fprintf(stderr, _("%s: could not create pg_control file: %s\n"),
615 progname, strerror(errno));
620 if (write(fd, buffer, BLCKSZ) != BLCKSZ)
622 /* if write didn't set errno, assume problem is no disk space */
625 fprintf(stderr, _("%s: could not write pg_control file: %s\n"),
626 progname, strerror(errno));
632 fprintf(stderr, _("%s: fsync error: %s\n"), progname, strerror(errno));
641 * Remove existing XLOG files
644 KillExistingXLOG(void)
648 char path[MAXPGPATH];
650 xldir = opendir(XLOGDIR);
653 fprintf(stderr, _("%s: could not open directory \"%s\": %s\n"),
654 progname, XLOGDIR, strerror(errno));
659 while ((xlde = readdir(xldir)) != NULL)
661 if (strlen(xlde->d_name) == 24 &&
662 strspn(xlde->d_name, "0123456789ABCDEF") == 24)
664 snprintf(path, MAXPGPATH, "%s/%s", XLOGDIR, xlde->d_name);
665 if (unlink(path) < 0)
667 fprintf(stderr, _("%s: could not delete file \"%s\": %s\n"),
668 progname, path, strerror(errno));
677 * This fix is in mingw cvs (runtime/mingwex/dirent.c rev 1.4), but not in
680 if (GetLastError() == ERROR_NO_MORE_FILES)
686 fprintf(stderr, _("%s: could not read from directory \"%s\": %s\n"),
687 progname, XLOGDIR, strerror(errno));
695 * Write an empty XLOG file, containing only the checkpoint record
696 * already set up in ControlFile.
703 XLogLongPageHeader longpage;
706 char path[MAXPGPATH];
710 /* Use malloc() to ensure buffer is MAXALIGNED */
711 buffer = (char *) malloc(XLOG_BLCKSZ);
712 page = (XLogPageHeader) buffer;
713 memset(buffer, 0, XLOG_BLCKSZ);
715 /* Set up the XLOG page header */
716 page->xlp_magic = XLOG_PAGE_MAGIC;
717 page->xlp_info = XLP_LONG_HEADER;
718 page->xlp_tli = ControlFile.checkPointCopy.ThisTimeLineID;
719 page->xlp_pageaddr.xlogid =
720 ControlFile.checkPointCopy.redo.xlogid;
721 page->xlp_pageaddr.xrecoff =
722 ControlFile.checkPointCopy.redo.xrecoff - SizeOfXLogLongPHD;
723 longpage = (XLogLongPageHeader) page;
724 longpage->xlp_sysid = ControlFile.system_identifier;
725 longpage->xlp_seg_size = XLogSegSize;
727 /* Insert the initial checkpoint record */
728 record = (XLogRecord *) ((char *) page + SizeOfXLogLongPHD);
729 record->xl_prev.xlogid = 0;
730 record->xl_prev.xrecoff = 0;
731 record->xl_xid = InvalidTransactionId;
732 record->xl_tot_len = SizeOfXLogRecord + sizeof(CheckPoint);
733 record->xl_len = sizeof(CheckPoint);
734 record->xl_info = XLOG_CHECKPOINT_SHUTDOWN;
735 record->xl_rmid = RM_XLOG_ID;
736 memcpy(XLogRecGetData(record), &ControlFile.checkPointCopy,
740 COMP_CRC32(crc, &ControlFile.checkPointCopy, sizeof(CheckPoint));
741 COMP_CRC32(crc, (char *) record + sizeof(pg_crc32),
742 SizeOfXLogRecord - sizeof(pg_crc32));
744 record->xl_crc = crc;
746 /* Write the first page */
747 XLogFilePath(path, ControlFile.checkPointCopy.ThisTimeLineID,
748 newXlogId, newXlogSeg);
752 fd = open(path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
756 fprintf(stderr, _("%s: could not open file \"%s\": %s\n"),
757 progname, path, strerror(errno));
762 if (write(fd, buffer, XLOG_BLCKSZ) != XLOG_BLCKSZ)
764 /* if write didn't set errno, assume problem is no disk space */
767 fprintf(stderr, _("%s: could not write file \"%s\": %s\n"),
768 progname, path, strerror(errno));
772 /* Fill the rest of the file with zeroes */
773 memset(buffer, 0, XLOG_BLCKSZ);
774 for (nbytes = XLOG_BLCKSZ; nbytes < XLogSegSize; nbytes += XLOG_BLCKSZ)
777 if (write(fd, buffer, XLOG_BLCKSZ) != XLOG_BLCKSZ)
781 fprintf(stderr, _("%s: could not write file \"%s\": %s\n"),
782 progname, path, strerror(errno));
789 fprintf(stderr, _("%s: fsync error: %s\n"), progname, strerror(errno));
800 printf(_("%s resets the PostgreSQL transaction log.\n\n"), progname);
801 printf(_("Usage:\n %s [OPTION]... DATADIR\n\n"), progname);
802 printf(_("Options:\n"));
803 printf(_(" -f force update to be done\n"));
804 printf(_(" -l TLI,FILE,SEG force minimum WAL starting location for new transaction log\n"));
805 printf(_(" -m XID set next multitransaction ID\n"));
806 printf(_(" -n no update, just show extracted control values (for testing)\n"));
807 printf(_(" -o OID set next OID\n"));
808 printf(_(" -O OFFSET set next multitransaction offset\n"));
809 printf(_(" -x XID set next transaction ID\n"));
810 printf(_(" --help show this help, then exit\n"));
811 printf(_(" --version output version information, then exit\n"));
812 printf(_("\nReport bugs to <pgsql-bugs@postgresql.org>.\n"));