1 /*-------------------------------------------------------------------------
4 * A utility to "zero out" the xlog when it's corrupt beyond recovery.
5 * Can also rebuild pg_control if needed.
7 * The theory of operation is fairly simple:
8 * 1. Read the existing pg_control (which will include the last
9 * checkpoint record). If it is an old format then update to
11 * 2. If pg_control is corrupt, attempt to intuit reasonable values,
12 * by scanning the old xlog if necessary.
13 * 3. Modify pg_control to reflect a "shutdown" state with a checkpoint
14 * record at the start of xlog.
15 * 4. Flush the existing xlog files and write a new segment with
16 * just a checkpoint record in it. The new segment is positioned
17 * just past the end of the old xlog, so that existing LSNs in
18 * data pages will appear to be "in the past".
19 * This is all pretty straightforward except for the intuition part of
23 * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
24 * Portions Copyright (c) 1994, Regents of the University of California
26 * src/bin/pg_resetwal/pg_resetwal.c
28 *-------------------------------------------------------------------------
32 * We have to use postgres.h not postgres_fe.h here, because there's so much
33 * backend-only stuff in the XLOG include files we need. But we need a
34 * frontend-ish environment otherwise. Hence this ugly hack.
47 #include "access/transam.h"
48 #include "access/tuptoaster.h"
49 #include "access/multixact.h"
50 #include "access/xlog.h"
51 #include "access/xlog_internal.h"
52 #include "catalog/catversion.h"
53 #include "catalog/pg_control.h"
54 #include "common/fe_memutils.h"
55 #include "common/file_perm.h"
56 #include "common/restricted_token.h"
57 #include "storage/large_object.h"
58 #include "pg_getopt.h"
59 #include "getopt_long.h"
62 static ControlFileData ControlFile; /* pg_control values */
63 static XLogSegNo newXlogSegNo; /* new XLOG segment # */
64 static bool guessed = false; /* T if we had to guess at any values */
65 static const char *progname;
66 static uint32 set_xid_epoch = (uint32) -1;
67 static TransactionId set_xid = 0;
68 static TransactionId set_oldest_commit_ts_xid = 0;
69 static TransactionId set_newest_commit_ts_xid = 0;
70 static Oid set_oid = 0;
71 static MultiXactId set_mxid = 0;
72 static MultiXactOffset set_mxoff = (MultiXactOffset) -1;
73 static uint32 minXlogTli = 0;
74 static XLogSegNo minXlogSegNo = 0;
76 static int set_wal_segsize;
78 static void CheckDataVersion(void);
79 static bool ReadControlFile(void);
80 static void GuessControlValues(void);
81 static void PrintControlValues(bool guessed);
82 static void PrintNewControlValues(void);
83 static void RewriteControlFile(void);
84 static void FindEndOfXLOG(void);
85 static void KillExistingXLOG(void);
86 static void KillExistingArchiveStatus(void);
87 static void WriteEmptyXLOG(void);
88 static void usage(void);
92 main(int argc, char *argv[])
94 static struct option long_options[] = {
95 {"commit-timestamp-ids", required_argument, NULL, 'c'},
96 {"pgdata", required_argument, NULL, 'D'},
97 {"epoch", required_argument, NULL, 'e'},
98 {"force", no_argument, NULL, 'f'},
99 {"next-wal-file", required_argument, NULL, 'l'},
100 {"multixact-ids", required_argument, NULL, 'm'},
101 {"dry-run", no_argument, NULL, 'n'},
102 {"next-oid", required_argument, NULL, 'o'},
103 {"multixact-offset", required_argument, NULL, 'O'},
104 {"next-transaction-id", required_argument, NULL, 'x'},
105 {"wal-segsize", required_argument, NULL, 1},
111 bool noupdate = false;
112 MultiXactId set_oldestmxid = 0;
115 char *DataDir = NULL;
116 char *log_fname = NULL;
119 set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_resetwal"));
121 progname = get_progname(argv[0]);
125 if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
130 if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
132 puts("pg_resetwal (PostgreSQL) " PG_VERSION);
138 while ((c = getopt_long(argc, argv, "c:D:e:fl:m:no:O:x:", long_options, NULL)) != -1)
155 set_xid_epoch = strtoul(optarg, &endptr, 0);
156 if (endptr == optarg || *endptr != '\0')
159 translator: the second %s is a command line argument (-e, etc) */
160 fprintf(stderr, _("%s: invalid argument for option %s\n"), progname, "-e");
161 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
164 if (set_xid_epoch == -1)
166 fprintf(stderr, _("%s: transaction ID epoch (-e) must not be -1\n"), progname);
172 set_xid = strtoul(optarg, &endptr, 0);
173 if (endptr == optarg || *endptr != '\0')
175 fprintf(stderr, _("%s: invalid argument for option %s\n"), progname, "-x");
176 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
181 fprintf(stderr, _("%s: transaction ID (-x) must not be 0\n"), progname);
187 set_oldest_commit_ts_xid = strtoul(optarg, &endptr, 0);
188 if (endptr == optarg || *endptr != ',')
190 fprintf(stderr, _("%s: invalid argument for option %s\n"), progname, "-c");
191 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
194 set_newest_commit_ts_xid = strtoul(endptr + 1, &endptr2, 0);
195 if (endptr2 == endptr + 1 || *endptr2 != '\0')
197 fprintf(stderr, _("%s: invalid argument for option %s\n"), progname, "-c");
198 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
202 if (set_oldest_commit_ts_xid < 2 &&
203 set_oldest_commit_ts_xid != 0)
205 fprintf(stderr, _("%s: transaction ID (-c) must be either 0 or greater than or equal to 2\n"), progname);
209 if (set_newest_commit_ts_xid < 2 &&
210 set_newest_commit_ts_xid != 0)
212 fprintf(stderr, _("%s: transaction ID (-c) must be either 0 or greater than or equal to 2\n"), progname);
218 set_oid = strtoul(optarg, &endptr, 0);
219 if (endptr == optarg || *endptr != '\0')
221 fprintf(stderr, _("%s: invalid argument for option %s\n"), progname, "-o");
222 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
227 fprintf(stderr, _("%s: OID (-o) must not be 0\n"), progname);
233 set_mxid = strtoul(optarg, &endptr, 0);
234 if (endptr == optarg || *endptr != ',')
236 fprintf(stderr, _("%s: invalid argument for option %s\n"), progname, "-m");
237 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
241 set_oldestmxid = strtoul(endptr + 1, &endptr2, 0);
242 if (endptr2 == endptr + 1 || *endptr2 != '\0')
244 fprintf(stderr, _("%s: invalid argument for option %s\n"), progname, "-m");
245 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
250 fprintf(stderr, _("%s: multitransaction ID (-m) must not be 0\n"), progname);
255 * XXX It'd be nice to have more sanity checks here, e.g. so
256 * that oldest is not wrapped around w.r.t. nextMulti.
258 if (set_oldestmxid == 0)
260 fprintf(stderr, _("%s: oldest multitransaction ID (-m) must not be 0\n"),
267 set_mxoff = strtoul(optarg, &endptr, 0);
268 if (endptr == optarg || *endptr != '\0')
270 fprintf(stderr, _("%s: invalid argument for option %s\n"), progname, "-O");
271 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
276 fprintf(stderr, _("%s: multitransaction offset (-O) must not be -1\n"), progname);
282 if (strspn(optarg, "01234567890ABCDEFabcdef") != XLOG_FNAME_LEN)
284 fprintf(stderr, _("%s: invalid argument for option %s\n"), progname, "-l");
285 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
290 * XLogFromFileName requires wal segment size which is not yet
291 * set. Hence wal details are set later on.
293 log_fname = pg_strdup(optarg);
297 set_wal_segsize = strtol(optarg, &endptr, 10) * 1024 * 1024;
298 if (endptr == optarg || *endptr != '\0')
301 _("%s: argument of --wal-segsize must be a number\n"),
305 if (!IsValidWalSegSize(set_wal_segsize))
308 _("%s: argument of --wal-segsize must be a power of 2 between 1 and 1024\n"),
315 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
320 if (DataDir == NULL && optind < argc)
321 DataDir = argv[optind++];
323 /* Complain if any arguments remain */
326 fprintf(stderr, _("%s: too many command-line arguments (first is \"%s\")\n"),
327 progname, argv[optind]);
328 fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
335 fprintf(stderr, _("%s: no data directory specified\n"), progname);
336 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
341 * Don't allow pg_resetwal to be run as root, to avoid overwriting the
342 * ownership of files in the data directory. We need only check for root
343 * -- any other user won't have sufficient permissions to modify files in
344 * the data directory.
349 fprintf(stderr, _("%s: cannot be executed by \"root\"\n"),
351 fprintf(stderr, _("You must run %s as the PostgreSQL superuser.\n"),
357 get_restricted_token(progname);
359 /* Set mask based on PGDATA permissions */
360 if (!GetDataDirectoryCreatePerm(DataDir))
362 fprintf(stderr, _("%s: could not read permissions of directory \"%s\": %s\n"),
363 progname, DataDir, strerror(errno));
369 if (chdir(DataDir) < 0)
371 fprintf(stderr, _("%s: could not change directory to \"%s\": %s\n"),
372 progname, DataDir, strerror(errno));
376 /* Check that data directory matches our server version */
380 * Check for a postmaster lock file --- if there is one, refuse to
381 * proceed, on grounds we might be interfering with a live installation.
383 if ((fd = open("postmaster.pid", O_RDONLY, 0)) < 0)
387 fprintf(stderr, _("%s: could not open file \"%s\" for reading: %s\n"),
388 progname, "postmaster.pid", strerror(errno));
394 fprintf(stderr, _("%s: lock file \"%s\" exists\n"
395 "Is a server running? If not, delete the lock file and try again.\n"),
396 progname, "postmaster.pid");
401 * Attempt to read the existing pg_control file
403 if (!ReadControlFile())
404 GuessControlValues();
407 * If no new WAL segment size was specified, use the control file value.
409 if (set_wal_segsize != 0)
410 WalSegSz = set_wal_segsize;
412 WalSegSz = ControlFile.xlog_seg_size;
414 if (log_fname != NULL)
415 XLogFromFileName(log_fname, &minXlogTli, &minXlogSegNo, WalSegSz);
418 * Also look at existing segment files to set up newXlogSegNo
423 * If we're not going to proceed with the reset, print the current control
426 if ((guessed && !force) || noupdate)
427 PrintControlValues(guessed);
430 * Adjust fields if required by switches. (Do this now so that printout,
431 * if any, includes these values.)
433 if (set_xid_epoch != -1)
434 ControlFile.checkPointCopy.nextXidEpoch = set_xid_epoch;
438 ControlFile.checkPointCopy.nextXid = set_xid;
441 * For the moment, just set oldestXid to a value that will force
442 * immediate autovacuum-for-wraparound. It's not clear whether adding
443 * user control of this is useful, so let's just do something that's
444 * reasonably safe. The magic constant here corresponds to the
445 * maximum allowed value of autovacuum_freeze_max_age.
447 ControlFile.checkPointCopy.oldestXid = set_xid - 2000000000;
448 if (ControlFile.checkPointCopy.oldestXid < FirstNormalTransactionId)
449 ControlFile.checkPointCopy.oldestXid += FirstNormalTransactionId;
450 ControlFile.checkPointCopy.oldestXidDB = InvalidOid;
453 if (set_oldest_commit_ts_xid != 0)
454 ControlFile.checkPointCopy.oldestCommitTsXid = set_oldest_commit_ts_xid;
455 if (set_newest_commit_ts_xid != 0)
456 ControlFile.checkPointCopy.newestCommitTsXid = set_newest_commit_ts_xid;
459 ControlFile.checkPointCopy.nextOid = set_oid;
463 ControlFile.checkPointCopy.nextMulti = set_mxid;
465 ControlFile.checkPointCopy.oldestMulti = set_oldestmxid;
466 if (ControlFile.checkPointCopy.oldestMulti < FirstMultiXactId)
467 ControlFile.checkPointCopy.oldestMulti += FirstMultiXactId;
468 ControlFile.checkPointCopy.oldestMultiDB = InvalidOid;
472 ControlFile.checkPointCopy.nextMultiOffset = set_mxoff;
474 if (minXlogTli > ControlFile.checkPointCopy.ThisTimeLineID)
476 ControlFile.checkPointCopy.ThisTimeLineID = minXlogTli;
477 ControlFile.checkPointCopy.PrevTimeLineID = minXlogTli;
480 if (set_wal_segsize != 0)
481 ControlFile.xlog_seg_size = WalSegSz;
483 if (minXlogSegNo > newXlogSegNo)
484 newXlogSegNo = minXlogSegNo;
487 * If we had to guess anything, and -f was not given, just print the
488 * guessed values and exit. Also print if -n is given.
490 if ((guessed && !force) || noupdate)
492 PrintNewControlValues();
495 printf(_("\nIf these values seem acceptable, use -f to force reset.\n"));
503 * Don't reset from a dirty pg_control without -f, either.
505 if (ControlFile.state != DB_SHUTDOWNED && !force)
507 printf(_("The database server was not shut down cleanly.\n"
508 "Resetting the write-ahead log might cause data to be lost.\n"
509 "If you want to proceed anyway, use -f to force reset.\n"));
514 * Else, do the dirty deed.
516 RewriteControlFile();
518 KillExistingArchiveStatus();
521 printf(_("Write-ahead log reset\n"));
527 * Look at the version string stored in PG_VERSION and decide if this utility
528 * can be run safely or not.
530 * We don't want to inject pg_control and WAL files that are for a different
531 * major version; that can't do anything good. Note that we don't treat
532 * mismatching version info in pg_control as a reason to bail out, because
533 * recovering from a corrupted pg_control is one of the main reasons for this
534 * program to exist at all. However, PG_VERSION is unlikely to get corrupted,
535 * and if it were it would be easy to fix by hand. So let's make this check
536 * to prevent simple user errors.
539 CheckDataVersion(void)
541 const char *ver_file = "PG_VERSION";
546 if ((ver_fd = fopen(ver_file, "r")) == NULL)
548 fprintf(stderr, _("%s: could not open file \"%s\" for reading: %s\n"),
549 progname, ver_file, strerror(errno));
553 /* version number has to be the first line read */
554 if (!fgets(rawline, sizeof(rawline), ver_fd))
558 fprintf(stderr, _("%s: unexpected empty file \"%s\"\n"),
563 fprintf(stderr, _("%s: could not read file \"%s\": %s\n"),
564 progname, ver_file, strerror(errno));
569 /* remove trailing newline, handling Windows newlines as well */
570 len = strlen(rawline);
571 if (len > 0 && rawline[len - 1] == '\n')
573 rawline[--len] = '\0';
574 if (len > 0 && rawline[len - 1] == '\r')
575 rawline[--len] = '\0';
578 if (strcmp(rawline, PG_MAJORVERSION) != 0)
580 fprintf(stderr, _("%s: data directory is of wrong version\n"
581 "File \"%s\" contains \"%s\", which is not compatible with this program's version \"%s\".\n"),
582 progname, ver_file, rawline, PG_MAJORVERSION);
591 * Try to read the existing pg_control file.
593 * This routine is also responsible for updating old pg_control versions
594 * to the current format. (Currently we don't do anything of the sort.)
597 ReadControlFile(void)
604 if ((fd = open(XLOG_CONTROL_FILE, O_RDONLY | PG_BINARY, 0)) < 0)
607 * If pg_control is not there at all, or we can't read it, the odds
608 * are we've been handed a bad DataDir path, so give up. User can do
609 * "touch pg_control" to force us to proceed.
611 fprintf(stderr, _("%s: could not open file \"%s\" for reading: %s\n"),
612 progname, XLOG_CONTROL_FILE, strerror(errno));
614 fprintf(stderr, _("If you are sure the data directory path is correct, execute\n"
621 /* Use malloc to ensure we have a maxaligned buffer */
622 buffer = (char *) pg_malloc(PG_CONTROL_FILE_SIZE);
624 len = read(fd, buffer, PG_CONTROL_FILE_SIZE);
627 fprintf(stderr, _("%s: could not read file \"%s\": %s\n"),
628 progname, XLOG_CONTROL_FILE, strerror(errno));
633 if (len >= sizeof(ControlFileData) &&
634 ((ControlFileData *) buffer)->pg_control_version == PG_CONTROL_VERSION)
640 offsetof(ControlFileData, crc));
643 if (!EQ_CRC32C(crc, ((ControlFileData *) buffer)->crc))
645 /* We will use the data but treat it as guessed. */
647 _("%s: pg_control exists but has invalid CRC; proceed with caution\n"),
652 memcpy(&ControlFile, buffer, sizeof(ControlFile));
654 /* return false if WAL segment size is not valid */
655 if (!IsValidWalSegSize(ControlFile.xlog_seg_size))
658 ngettext("%s: pg_control specifies invalid WAL segment size (%d byte); proceed with caution\n",
659 "%s: pg_control specifies invalid WAL segment size (%d bytes); proceed with caution\n",
660 ControlFile.xlog_seg_size),
661 progname, ControlFile.xlog_seg_size);
668 /* Looks like it's a mess. */
669 fprintf(stderr, _("%s: pg_control exists but is broken or wrong version; ignoring it\n"),
676 * Guess at pg_control values when we can't read the old ones.
679 GuessControlValues(void)
681 uint64 sysidentifier;
685 * Set up a completely default set of pg_control values.
688 memset(&ControlFile, 0, sizeof(ControlFile));
690 ControlFile.pg_control_version = PG_CONTROL_VERSION;
691 ControlFile.catalog_version_no = CATALOG_VERSION_NO;
694 * Create a new unique installation identifier, since we can no longer use
695 * any old XLOG records. See notes in xlog.c about the algorithm.
697 gettimeofday(&tv, NULL);
698 sysidentifier = ((uint64) tv.tv_sec) << 32;
699 sysidentifier |= ((uint64) tv.tv_usec) << 12;
700 sysidentifier |= getpid() & 0xFFF;
702 ControlFile.system_identifier = sysidentifier;
704 ControlFile.checkPointCopy.redo = SizeOfXLogLongPHD;
705 ControlFile.checkPointCopy.ThisTimeLineID = 1;
706 ControlFile.checkPointCopy.PrevTimeLineID = 1;
707 ControlFile.checkPointCopy.fullPageWrites = false;
708 ControlFile.checkPointCopy.nextXidEpoch = 0;
709 ControlFile.checkPointCopy.nextXid = FirstNormalTransactionId;
710 ControlFile.checkPointCopy.nextOid = FirstBootstrapObjectId;
711 ControlFile.checkPointCopy.nextMulti = FirstMultiXactId;
712 ControlFile.checkPointCopy.nextMultiOffset = 0;
713 ControlFile.checkPointCopy.oldestXid = FirstNormalTransactionId;
714 ControlFile.checkPointCopy.oldestXidDB = InvalidOid;
715 ControlFile.checkPointCopy.oldestMulti = FirstMultiXactId;
716 ControlFile.checkPointCopy.oldestMultiDB = InvalidOid;
717 ControlFile.checkPointCopy.time = (pg_time_t) time(NULL);
718 ControlFile.checkPointCopy.oldestActiveXid = InvalidTransactionId;
720 ControlFile.state = DB_SHUTDOWNED;
721 ControlFile.time = (pg_time_t) time(NULL);
722 ControlFile.checkPoint = ControlFile.checkPointCopy.redo;
723 ControlFile.unloggedLSN = 1;
725 /* minRecoveryPoint, backupStartPoint and backupEndPoint can be left zero */
727 ControlFile.wal_level = WAL_LEVEL_MINIMAL;
728 ControlFile.wal_log_hints = false;
729 ControlFile.track_commit_timestamp = false;
730 ControlFile.MaxConnections = 100;
731 ControlFile.max_worker_processes = 8;
732 ControlFile.max_prepared_xacts = 0;
733 ControlFile.max_locks_per_xact = 64;
735 ControlFile.maxAlign = MAXIMUM_ALIGNOF;
736 ControlFile.floatFormat = FLOATFORMAT_VALUE;
737 ControlFile.blcksz = BLCKSZ;
738 ControlFile.relseg_size = RELSEG_SIZE;
739 ControlFile.xlog_blcksz = XLOG_BLCKSZ;
740 ControlFile.xlog_seg_size = DEFAULT_XLOG_SEG_SIZE;
741 ControlFile.nameDataLen = NAMEDATALEN;
742 ControlFile.indexMaxKeys = INDEX_MAX_KEYS;
743 ControlFile.toast_max_chunk_size = TOAST_MAX_CHUNK_SIZE;
744 ControlFile.loblksize = LOBLKSIZE;
745 ControlFile.float4ByVal = FLOAT4PASSBYVAL;
746 ControlFile.float8ByVal = FLOAT8PASSBYVAL;
749 * XXX eventually, should try to grovel through old XLOG to develop more
750 * accurate values for TimeLineID, nextXID, etc.
756 * Print the guessed pg_control values when we had to guess.
758 * NB: this display should be just those fields that will not be
759 * reset by RewriteControlFile().
762 PrintControlValues(bool guessed)
764 char sysident_str[32];
767 printf(_("Guessed pg_control values:\n\n"));
769 printf(_("Current pg_control values:\n\n"));
772 * Format system_identifier separately to keep platform-dependent format
773 * code out of the translatable message string.
775 snprintf(sysident_str, sizeof(sysident_str), UINT64_FORMAT,
776 ControlFile.system_identifier);
778 printf(_("pg_control version number: %u\n"),
779 ControlFile.pg_control_version);
780 printf(_("Catalog version number: %u\n"),
781 ControlFile.catalog_version_no);
782 printf(_("Database system identifier: %s\n"),
784 printf(_("Latest checkpoint's TimeLineID: %u\n"),
785 ControlFile.checkPointCopy.ThisTimeLineID);
786 printf(_("Latest checkpoint's full_page_writes: %s\n"),
787 ControlFile.checkPointCopy.fullPageWrites ? _("on") : _("off"));
788 printf(_("Latest checkpoint's NextXID: %u:%u\n"),
789 ControlFile.checkPointCopy.nextXidEpoch,
790 ControlFile.checkPointCopy.nextXid);
791 printf(_("Latest checkpoint's NextOID: %u\n"),
792 ControlFile.checkPointCopy.nextOid);
793 printf(_("Latest checkpoint's NextMultiXactId: %u\n"),
794 ControlFile.checkPointCopy.nextMulti);
795 printf(_("Latest checkpoint's NextMultiOffset: %u\n"),
796 ControlFile.checkPointCopy.nextMultiOffset);
797 printf(_("Latest checkpoint's oldestXID: %u\n"),
798 ControlFile.checkPointCopy.oldestXid);
799 printf(_("Latest checkpoint's oldestXID's DB: %u\n"),
800 ControlFile.checkPointCopy.oldestXidDB);
801 printf(_("Latest checkpoint's oldestActiveXID: %u\n"),
802 ControlFile.checkPointCopy.oldestActiveXid);
803 printf(_("Latest checkpoint's oldestMultiXid: %u\n"),
804 ControlFile.checkPointCopy.oldestMulti);
805 printf(_("Latest checkpoint's oldestMulti's DB: %u\n"),
806 ControlFile.checkPointCopy.oldestMultiDB);
807 printf(_("Latest checkpoint's oldestCommitTsXid:%u\n"),
808 ControlFile.checkPointCopy.oldestCommitTsXid);
809 printf(_("Latest checkpoint's newestCommitTsXid:%u\n"),
810 ControlFile.checkPointCopy.newestCommitTsXid);
811 printf(_("Maximum data alignment: %u\n"),
812 ControlFile.maxAlign);
813 /* we don't print floatFormat since can't say much useful about it */
814 printf(_("Database block size: %u\n"),
816 printf(_("Blocks per segment of large relation: %u\n"),
817 ControlFile.relseg_size);
818 printf(_("WAL block size: %u\n"),
819 ControlFile.xlog_blcksz);
820 printf(_("Bytes per WAL segment: %u\n"),
821 ControlFile.xlog_seg_size);
822 printf(_("Maximum length of identifiers: %u\n"),
823 ControlFile.nameDataLen);
824 printf(_("Maximum columns in an index: %u\n"),
825 ControlFile.indexMaxKeys);
826 printf(_("Maximum size of a TOAST chunk: %u\n"),
827 ControlFile.toast_max_chunk_size);
828 printf(_("Size of a large-object chunk: %u\n"),
829 ControlFile.loblksize);
830 /* This is no longer configurable, but users may still expect to see it: */
831 printf(_("Date/time type storage: %s\n"),
832 _("64-bit integers"));
833 printf(_("Float4 argument passing: %s\n"),
834 (ControlFile.float4ByVal ? _("by value") : _("by reference")));
835 printf(_("Float8 argument passing: %s\n"),
836 (ControlFile.float8ByVal ? _("by value") : _("by reference")));
837 printf(_("Data page checksum version: %u\n"),
838 ControlFile.data_checksum_version);
843 * Print the values to be changed.
846 PrintNewControlValues(void)
848 char fname[MAXFNAMELEN];
850 /* This will be always printed in order to keep format same. */
851 printf(_("\n\nValues to be changed:\n\n"));
853 XLogFileName(fname, ControlFile.checkPointCopy.ThisTimeLineID,
854 newXlogSegNo, WalSegSz);
855 printf(_("First log segment after reset: %s\n"), fname);
859 printf(_("NextMultiXactId: %u\n"),
860 ControlFile.checkPointCopy.nextMulti);
861 printf(_("OldestMultiXid: %u\n"),
862 ControlFile.checkPointCopy.oldestMulti);
863 printf(_("OldestMulti's DB: %u\n"),
864 ControlFile.checkPointCopy.oldestMultiDB);
869 printf(_("NextMultiOffset: %u\n"),
870 ControlFile.checkPointCopy.nextMultiOffset);
875 printf(_("NextOID: %u\n"),
876 ControlFile.checkPointCopy.nextOid);
881 printf(_("NextXID: %u\n"),
882 ControlFile.checkPointCopy.nextXid);
883 printf(_("OldestXID: %u\n"),
884 ControlFile.checkPointCopy.oldestXid);
885 printf(_("OldestXID's DB: %u\n"),
886 ControlFile.checkPointCopy.oldestXidDB);
889 if (set_xid_epoch != -1)
891 printf(_("NextXID epoch: %u\n"),
892 ControlFile.checkPointCopy.nextXidEpoch);
895 if (set_oldest_commit_ts_xid != 0)
897 printf(_("oldestCommitTsXid: %u\n"),
898 ControlFile.checkPointCopy.oldestCommitTsXid);
900 if (set_newest_commit_ts_xid != 0)
902 printf(_("newestCommitTsXid: %u\n"),
903 ControlFile.checkPointCopy.newestCommitTsXid);
906 if (set_wal_segsize != 0)
908 printf(_("Bytes per WAL segment: %u\n"),
909 ControlFile.xlog_seg_size);
915 * Write out the new pg_control file.
918 RewriteControlFile(void)
921 char buffer[PG_CONTROL_FILE_SIZE]; /* need not be aligned */
924 * For good luck, apply the same static assertions as in backend's
925 * WriteControlFile().
927 StaticAssertStmt(sizeof(ControlFileData) <= PG_CONTROL_MAX_SAFE_SIZE,
928 "pg_control is too large for atomic disk writes");
929 StaticAssertStmt(sizeof(ControlFileData) <= PG_CONTROL_FILE_SIZE,
930 "sizeof(ControlFileData) exceeds PG_CONTROL_FILE_SIZE");
933 * Adjust fields as needed to force an empty XLOG starting at
936 XLogSegNoOffsetToRecPtr(newXlogSegNo, SizeOfXLogLongPHD, WalSegSz,
937 ControlFile.checkPointCopy.redo);
938 ControlFile.checkPointCopy.time = (pg_time_t) time(NULL);
940 ControlFile.state = DB_SHUTDOWNED;
941 ControlFile.time = (pg_time_t) time(NULL);
942 ControlFile.checkPoint = ControlFile.checkPointCopy.redo;
943 ControlFile.minRecoveryPoint = 0;
944 ControlFile.minRecoveryPointTLI = 0;
945 ControlFile.backupStartPoint = 0;
946 ControlFile.backupEndPoint = 0;
947 ControlFile.backupEndRequired = false;
950 * Force the defaults for max_* settings. The values don't really matter
951 * as long as wal_level='minimal'; the postmaster will reset these fields
954 ControlFile.wal_level = WAL_LEVEL_MINIMAL;
955 ControlFile.wal_log_hints = false;
956 ControlFile.track_commit_timestamp = false;
957 ControlFile.MaxConnections = 100;
958 ControlFile.max_worker_processes = 8;
959 ControlFile.max_prepared_xacts = 0;
960 ControlFile.max_locks_per_xact = 64;
962 /* Contents are protected with a CRC */
963 INIT_CRC32C(ControlFile.crc);
964 COMP_CRC32C(ControlFile.crc,
965 (char *) &ControlFile,
966 offsetof(ControlFileData, crc));
967 FIN_CRC32C(ControlFile.crc);
970 * We write out PG_CONTROL_FILE_SIZE bytes into pg_control, zero-padding
971 * the excess over sizeof(ControlFileData). This reduces the odds of
972 * premature-EOF errors when reading pg_control. We'll still fail when we
973 * check the contents of the file, but hopefully with a more specific
974 * error than "couldn't read pg_control".
976 memset(buffer, 0, PG_CONTROL_FILE_SIZE);
977 memcpy(buffer, &ControlFile, sizeof(ControlFileData));
979 unlink(XLOG_CONTROL_FILE);
981 fd = open(XLOG_CONTROL_FILE,
982 O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
983 pg_file_create_mode);
986 fprintf(stderr, _("%s: could not create pg_control file: %s\n"),
987 progname, strerror(errno));
992 if (write(fd, buffer, PG_CONTROL_FILE_SIZE) != PG_CONTROL_FILE_SIZE)
994 /* if write didn't set errno, assume problem is no disk space */
997 fprintf(stderr, _("%s: could not write pg_control file: %s\n"),
998 progname, strerror(errno));
1004 fprintf(stderr, _("%s: fsync error: %s\n"), progname, strerror(errno));
1013 * Scan existing XLOG files and determine the highest existing WAL address
1015 * On entry, ControlFile.checkPointCopy.redo and ControlFile.xlog_seg_size
1016 * are assumed valid (note that we allow the old xlog seg size to differ
1017 * from what we're using). On exit, newXlogId and newXlogSeg are set to
1018 * suitable values for the beginning of replacement WAL (in our seg size).
1024 struct dirent *xlde;
1025 uint64 segs_per_xlogid;
1029 * Initialize the max() computation using the last checkpoint address from
1030 * old pg_control. Note that for the moment we are working with segment
1031 * numbering according to the old xlog seg size.
1033 segs_per_xlogid = (UINT64CONST(0x0000000100000000) / ControlFile.xlog_seg_size);
1034 newXlogSegNo = ControlFile.checkPointCopy.redo / ControlFile.xlog_seg_size;
1037 * Scan the pg_wal directory to find existing WAL segment files. We assume
1038 * any present have been used; in most scenarios this should be
1039 * conservative, because of xlog.c's attempts to pre-create files.
1041 xldir = opendir(XLOGDIR);
1044 fprintf(stderr, _("%s: could not open directory \"%s\": %s\n"),
1045 progname, XLOGDIR, strerror(errno));
1049 while (errno = 0, (xlde = readdir(xldir)) != NULL)
1051 if (IsXLogFileName(xlde->d_name) ||
1052 IsPartialXLogFileName(xlde->d_name))
1060 * Note: We don't use XLogFromFileName here, because we want to
1061 * use the segment size from the control file, not the size the
1062 * pg_resetwal binary was compiled with
1064 sscanf(xlde->d_name, "%08X%08X%08X", &tli, &log, &seg);
1065 segno = ((uint64) log) * segs_per_xlogid + seg;
1068 * Note: we take the max of all files found, regardless of their
1069 * timelines. Another possibility would be to ignore files of
1070 * timelines other than the target TLI, but this seems safer.
1071 * Better too large a result than too small...
1073 if (segno > newXlogSegNo)
1074 newXlogSegNo = segno;
1080 fprintf(stderr, _("%s: could not read directory \"%s\": %s\n"),
1081 progname, XLOGDIR, strerror(errno));
1085 if (closedir(xldir))
1087 fprintf(stderr, _("%s: could not close directory \"%s\": %s\n"),
1088 progname, XLOGDIR, strerror(errno));
1093 * Finally, convert to new xlog seg size, and advance by one to ensure we
1094 * are in virgin territory.
1096 xlogbytepos = newXlogSegNo * ControlFile.xlog_seg_size;
1097 newXlogSegNo = (xlogbytepos + ControlFile.xlog_seg_size - 1) / WalSegSz;
1103 * Remove existing XLOG files
1106 KillExistingXLOG(void)
1109 struct dirent *xlde;
1110 char path[MAXPGPATH + sizeof(XLOGDIR)];
1112 xldir = opendir(XLOGDIR);
1115 fprintf(stderr, _("%s: could not open directory \"%s\": %s\n"),
1116 progname, XLOGDIR, strerror(errno));
1120 while (errno = 0, (xlde = readdir(xldir)) != NULL)
1122 if (IsXLogFileName(xlde->d_name) ||
1123 IsPartialXLogFileName(xlde->d_name))
1125 snprintf(path, sizeof(path), "%s/%s", XLOGDIR, xlde->d_name);
1126 if (unlink(path) < 0)
1128 fprintf(stderr, _("%s: could not delete file \"%s\": %s\n"),
1129 progname, path, strerror(errno));
1137 fprintf(stderr, _("%s: could not read directory \"%s\": %s\n"),
1138 progname, XLOGDIR, strerror(errno));
1142 if (closedir(xldir))
1144 fprintf(stderr, _("%s: could not close directory \"%s\": %s\n"),
1145 progname, XLOGDIR, strerror(errno));
1152 * Remove existing archive status files
1155 KillExistingArchiveStatus(void)
1157 #define ARCHSTATDIR XLOGDIR "/archive_status"
1160 struct dirent *xlde;
1161 char path[MAXPGPATH + sizeof(ARCHSTATDIR)];
1163 xldir = opendir(ARCHSTATDIR);
1166 fprintf(stderr, _("%s: could not open directory \"%s\": %s\n"),
1167 progname, ARCHSTATDIR, strerror(errno));
1171 while (errno = 0, (xlde = readdir(xldir)) != NULL)
1173 if (strspn(xlde->d_name, "0123456789ABCDEF") == XLOG_FNAME_LEN &&
1174 (strcmp(xlde->d_name + XLOG_FNAME_LEN, ".ready") == 0 ||
1175 strcmp(xlde->d_name + XLOG_FNAME_LEN, ".done") == 0 ||
1176 strcmp(xlde->d_name + XLOG_FNAME_LEN, ".partial.ready") == 0 ||
1177 strcmp(xlde->d_name + XLOG_FNAME_LEN, ".partial.done") == 0))
1179 snprintf(path, sizeof(path), "%s/%s", ARCHSTATDIR, xlde->d_name);
1180 if (unlink(path) < 0)
1182 fprintf(stderr, _("%s: could not delete file \"%s\": %s\n"),
1183 progname, path, strerror(errno));
1191 fprintf(stderr, _("%s: could not read directory \"%s\": %s\n"),
1192 progname, ARCHSTATDIR, strerror(errno));
1196 if (closedir(xldir))
1198 fprintf(stderr, _("%s: could not close directory \"%s\": %s\n"),
1199 progname, ARCHSTATDIR, strerror(errno));
1206 * Write an empty XLOG file, containing only the checkpoint record
1207 * already set up in ControlFile.
1210 WriteEmptyXLOG(void)
1213 XLogPageHeader page;
1214 XLogLongPageHeader longpage;
1217 char path[MAXPGPATH];
1222 /* Use malloc() to ensure buffer is MAXALIGNED */
1223 buffer = (char *) pg_malloc(XLOG_BLCKSZ);
1224 page = (XLogPageHeader) buffer;
1225 memset(buffer, 0, XLOG_BLCKSZ);
1227 /* Set up the XLOG page header */
1228 page->xlp_magic = XLOG_PAGE_MAGIC;
1229 page->xlp_info = XLP_LONG_HEADER;
1230 page->xlp_tli = ControlFile.checkPointCopy.ThisTimeLineID;
1231 page->xlp_pageaddr = ControlFile.checkPointCopy.redo - SizeOfXLogLongPHD;
1232 longpage = (XLogLongPageHeader) page;
1233 longpage->xlp_sysid = ControlFile.system_identifier;
1234 longpage->xlp_seg_size = WalSegSz;
1235 longpage->xlp_xlog_blcksz = XLOG_BLCKSZ;
1237 /* Insert the initial checkpoint record */
1238 recptr = (char *) page + SizeOfXLogLongPHD;
1239 record = (XLogRecord *) recptr;
1240 record->xl_prev = 0;
1241 record->xl_xid = InvalidTransactionId;
1242 record->xl_tot_len = SizeOfXLogRecord + SizeOfXLogRecordDataHeaderShort + sizeof(CheckPoint);
1243 record->xl_info = XLOG_CHECKPOINT_SHUTDOWN;
1244 record->xl_rmid = RM_XLOG_ID;
1246 recptr += SizeOfXLogRecord;
1247 *(recptr++) = (char) XLR_BLOCK_ID_DATA_SHORT;
1248 *(recptr++) = sizeof(CheckPoint);
1249 memcpy(recptr, &ControlFile.checkPointCopy,
1250 sizeof(CheckPoint));
1253 COMP_CRC32C(crc, ((char *) record) + SizeOfXLogRecord, record->xl_tot_len - SizeOfXLogRecord);
1254 COMP_CRC32C(crc, (char *) record, offsetof(XLogRecord, xl_crc));
1256 record->xl_crc = crc;
1258 /* Write the first page */
1259 XLogFilePath(path, ControlFile.checkPointCopy.ThisTimeLineID,
1260 newXlogSegNo, WalSegSz);
1264 fd = open(path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
1265 pg_file_create_mode);
1268 fprintf(stderr, _("%s: could not open file \"%s\": %s\n"),
1269 progname, path, strerror(errno));
1274 if (write(fd, buffer, XLOG_BLCKSZ) != XLOG_BLCKSZ)
1276 /* if write didn't set errno, assume problem is no disk space */
1279 fprintf(stderr, _("%s: could not write file \"%s\": %s\n"),
1280 progname, path, strerror(errno));
1284 /* Fill the rest of the file with zeroes */
1285 memset(buffer, 0, XLOG_BLCKSZ);
1286 for (nbytes = XLOG_BLCKSZ; nbytes < WalSegSz; nbytes += XLOG_BLCKSZ)
1289 if (write(fd, buffer, XLOG_BLCKSZ) != XLOG_BLCKSZ)
1293 fprintf(stderr, _("%s: could not write file \"%s\": %s\n"),
1294 progname, path, strerror(errno));
1301 fprintf(stderr, _("%s: fsync error: %s\n"), progname, strerror(errno));
1312 printf(_("%s resets the PostgreSQL write-ahead log.\n\n"), progname);
1313 printf(_("Usage:\n %s [OPTION]... DATADIR\n\n"), progname);
1314 printf(_("Options:\n"));
1315 printf(_(" -c, --commit-timestamp-ids=XID,XID\n"
1316 " set oldest and newest transactions bearing\n"
1317 " commit timestamp (zero means no change)\n"));
1318 printf(_(" [-D, --pgdata=]DATADIR data directory\n"));
1319 printf(_(" -e, --epoch=XIDEPOCH set next transaction ID epoch\n"));
1320 printf(_(" -f, --force force update to be done\n"));
1321 printf(_(" -l, --next-wal-file=WALFILE set minimum starting location for new WAL\n"));
1322 printf(_(" -m, --multixact-ids=MXID,MXID set next and oldest multitransaction ID\n"));
1323 printf(_(" -n, --dry-run no update, just show what would be done\n"));
1324 printf(_(" -o, --next-oid=OID set next OID\n"));
1325 printf(_(" -O, --multixact-offset=OFFSET set next multitransaction offset\n"));
1326 printf(_(" -V, --version output version information, then exit\n"));
1327 printf(_(" -x, --next-transaction-id=XID set next transaction ID\n"));
1328 printf(_(" --wal-segsize=SIZE size of WAL segments, in megabytes\n"));
1329 printf(_(" -?, --help show this help, then exit\n"));
1330 printf(_("\nReport bugs to <pgsql-bugs@postgresql.org>.\n"));