]> granicus.if.org Git - postgresql/blob - src/bin/pg_resetxlog/pg_resetxlog.c
Don't waste the last segment of each 4GB logical log file.
[postgresql] / src / bin / pg_resetxlog / pg_resetxlog.c
1 /*-------------------------------------------------------------------------
2  *
3  * pg_resetxlog.c
4  *        A utility to "zero out" the xlog when it's corrupt beyond recovery.
5  *        Can also rebuild pg_control if needed.
6  *
7  * The theory of operation is fairly simple:
8  *        1. Read the existing pg_control (which will include the last
9  *               checkpoint record).  If it is an old format then update to
10  *               current format.
11  *        2. If pg_control is corrupt, attempt to intuit reasonable values,
12  *               by scanning the old xlog if necessary.
13  *        3. Modify pg_control to reflect a "shutdown" state with a checkpoint
14  *               record at the start of xlog.
15  *        4. Flush the existing xlog files and write a new segment with
16  *               just a checkpoint record in it.  The new segment is positioned
17  *               just past the end of the old xlog, so that existing LSNs in
18  *               data pages will appear to be "in the past".
19  * This is all pretty straightforward except for the intuition part of
20  * step 2 ...
21  *
22  *
23  * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
24  * Portions Copyright (c) 1994, Regents of the University of California
25  *
26  * src/bin/pg_resetxlog/pg_resetxlog.c
27  *
28  *-------------------------------------------------------------------------
29  */
30
31 /*
32  * We have to use postgres.h not postgres_fe.h here, because there's so much
33  * backend-only stuff in the XLOG include files we need.  But we need a
34  * frontend-ish environment otherwise.  Hence this ugly hack.
35  */
36 #define FRONTEND 1
37
38 #include "postgres.h"
39
40 #include <dirent.h>
41 #include <fcntl.h>
42 #include <locale.h>
43 #include <sys/stat.h>
44 #include <sys/time.h>
45 #include <time.h>
46 #include <unistd.h>
47 #ifdef HAVE_GETOPT_H
48 #include <getopt.h>
49 #endif
50
51 #include "access/transam.h"
52 #include "access/tuptoaster.h"
53 #include "access/multixact.h"
54 #include "access/xlog_internal.h"
55 #include "catalog/catversion.h"
56 #include "catalog/pg_control.h"
57
58 extern int      optind;
59 extern char *optarg;
60
61
62 static ControlFileData ControlFile;             /* pg_control values */
63 static XLogSegNo newXlogSegNo;  /* new XLOG segment # */
64 static bool guessed = false;    /* T if we had to guess at any values */
65 static const char *progname;
66
67 static bool ReadControlFile(void);
68 static void GuessControlValues(void);
69 static void PrintControlValues(bool guessed);
70 static void RewriteControlFile(void);
71 static void FindEndOfXLOG(void);
72 static void KillExistingXLOG(void);
73 static void KillExistingArchiveStatus(void);
74 static void WriteEmptyXLOG(void);
75 static void usage(void);
76
77
78 int
79 main(int argc, char *argv[])
80 {
81         int                     c;
82         bool            force = false;
83         bool            noupdate = false;
84         uint32          set_xid_epoch = (uint32) -1;
85         TransactionId set_xid = 0;
86         Oid                     set_oid = 0;
87         MultiXactId set_mxid = 0;
88         MultiXactOffset set_mxoff = (MultiXactOffset) -1;
89         uint32          minXlogTli = 0;
90         XLogSegNo       minXlogSegNo = 0;
91         char       *endptr;
92         char       *DataDir;
93         int                     fd;
94         char            path[MAXPGPATH];
95
96         set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_resetxlog"));
97
98         progname = get_progname(argv[0]);
99
100         if (argc > 1)
101         {
102                 if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
103                 {
104                         usage();
105                         exit(0);
106                 }
107                 if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
108                 {
109                         puts("pg_resetxlog (PostgreSQL) " PG_VERSION);
110                         exit(0);
111                 }
112         }
113
114
115         while ((c = getopt(argc, argv, "fl:m:no:O:x:e:")) != -1)
116         {
117                 switch (c)
118                 {
119                         case 'f':
120                                 force = true;
121                                 break;
122
123                         case 'n':
124                                 noupdate = true;
125                                 break;
126
127                         case 'e':
128                                 set_xid_epoch = strtoul(optarg, &endptr, 0);
129                                 if (endptr == optarg || *endptr != '\0')
130                                 {
131                                         fprintf(stderr, _("%s: invalid argument for option -e\n"), progname);
132                                         fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
133                                         exit(1);
134                                 }
135                                 if (set_xid_epoch == -1)
136                                 {
137                                         fprintf(stderr, _("%s: transaction ID epoch (-e) must not be -1\n"), progname);
138                                         exit(1);
139                                 }
140                                 break;
141
142                         case 'x':
143                                 set_xid = strtoul(optarg, &endptr, 0);
144                                 if (endptr == optarg || *endptr != '\0')
145                                 {
146                                         fprintf(stderr, _("%s: invalid argument for option -x\n"), progname);
147                                         fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
148                                         exit(1);
149                                 }
150                                 if (set_xid == 0)
151                                 {
152                                         fprintf(stderr, _("%s: transaction ID (-x) must not be 0\n"), progname);
153                                         exit(1);
154                                 }
155                                 break;
156
157                         case 'o':
158                                 set_oid = strtoul(optarg, &endptr, 0);
159                                 if (endptr == optarg || *endptr != '\0')
160                                 {
161                                         fprintf(stderr, _("%s: invalid argument for option -o\n"), progname);
162                                         fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
163                                         exit(1);
164                                 }
165                                 if (set_oid == 0)
166                                 {
167                                         fprintf(stderr, _("%s: OID (-o) must not be 0\n"), progname);
168                                         exit(1);
169                                 }
170                                 break;
171
172                         case 'm':
173                                 set_mxid = strtoul(optarg, &endptr, 0);
174                                 if (endptr == optarg || *endptr != '\0')
175                                 {
176                                         fprintf(stderr, _("%s: invalid argument for option -m\n"), progname);
177                                         fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
178                                         exit(1);
179                                 }
180                                 if (set_mxid == 0)
181                                 {
182                                         fprintf(stderr, _("%s: multitransaction ID (-m) must not be 0\n"), progname);
183                                         exit(1);
184                                 }
185                                 break;
186
187                         case 'O':
188                                 set_mxoff = strtoul(optarg, &endptr, 0);
189                                 if (endptr == optarg || *endptr != '\0')
190                                 {
191                                         fprintf(stderr, _("%s: invalid argument for option -O\n"), progname);
192                                         fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
193                                         exit(1);
194                                 }
195                                 if (set_mxoff == -1)
196                                 {
197                                         fprintf(stderr, _("%s: multitransaction offset (-O) must not be -1\n"), progname);
198                                         exit(1);
199                                 }
200                                 break;
201
202                         case 'l':
203                                 if (strspn(optarg, "01234567890ABCDEFabcdef") != 24)
204                                 {
205                                         fprintf(stderr, _("%s: invalid argument for option -l\n"), progname);
206                                         fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
207                                         exit(1);
208                                 }
209                                 XLogFromFileName(optarg, &minXlogTli, &minXlogSegNo);
210                                 break;
211
212                         default:
213                                 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
214                                 exit(1);
215                 }
216         }
217
218         if (optind == argc)
219         {
220                 fprintf(stderr, _("%s: no data directory specified\n"), progname);
221                 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
222                 exit(1);
223         }
224
225         /*
226          * Don't allow pg_resetxlog to be run as root, to avoid overwriting the
227          * ownership of files in the data directory. We need only check for root
228          * -- any other user won't have sufficient permissions to modify files in
229          * the data directory.
230          */
231 #ifndef WIN32
232         if (geteuid() == 0)
233         {
234                 fprintf(stderr, _("%s: cannot be executed by \"root\"\n"),
235                                 progname);
236                 fprintf(stderr, _("You must run %s as the PostgreSQL superuser.\n"),
237                                 progname);
238                 exit(1);
239         }
240 #endif
241
242         DataDir = argv[optind];
243
244         if (chdir(DataDir) < 0)
245         {
246                 fprintf(stderr, _("%s: could not change directory to \"%s\": %s\n"),
247                                 progname, DataDir, strerror(errno));
248                 exit(1);
249         }
250
251         /*
252          * Check for a postmaster lock file --- if there is one, refuse to
253          * proceed, on grounds we might be interfering with a live installation.
254          */
255         snprintf(path, MAXPGPATH, "%s/postmaster.pid", DataDir);
256
257         if ((fd = open(path, O_RDONLY, 0)) < 0)
258         {
259                 if (errno != ENOENT)
260                 {
261                         fprintf(stderr, _("%s: could not open file \"%s\" for reading: %s\n"), progname, path, strerror(errno));
262                         exit(1);
263                 }
264         }
265         else
266         {
267                 fprintf(stderr, _("%s: lock file \"%s\" exists\n"
268                                                   "Is a server running?  If not, delete the lock file and try again.\n"),
269                                 progname, path);
270                 exit(1);
271         }
272
273         /*
274          * Attempt to read the existing pg_control file
275          */
276         if (!ReadControlFile())
277                 GuessControlValues();
278
279         /*
280          * Also look at existing segment files to set up newXlogSegNo
281          */
282         FindEndOfXLOG();
283
284         /*
285          * Adjust fields if required by switches.  (Do this now so that printout,
286          * if any, includes these values.)
287          */
288         if (set_xid_epoch != -1)
289                 ControlFile.checkPointCopy.nextXidEpoch = set_xid_epoch;
290
291         if (set_xid != 0)
292         {
293                 ControlFile.checkPointCopy.nextXid = set_xid;
294
295                 /*
296                  * For the moment, just set oldestXid to a value that will force
297                  * immediate autovacuum-for-wraparound.  It's not clear whether adding
298                  * user control of this is useful, so let's just do something that's
299                  * reasonably safe.  The magic constant here corresponds to the
300                  * maximum allowed value of autovacuum_freeze_max_age.
301                  */
302                 ControlFile.checkPointCopy.oldestXid = set_xid - 2000000000;
303                 if (ControlFile.checkPointCopy.oldestXid < FirstNormalTransactionId)
304                         ControlFile.checkPointCopy.oldestXid += FirstNormalTransactionId;
305                 ControlFile.checkPointCopy.oldestXidDB = InvalidOid;
306         }
307
308         if (set_oid != 0)
309                 ControlFile.checkPointCopy.nextOid = set_oid;
310
311         if (set_mxid != 0)
312                 ControlFile.checkPointCopy.nextMulti = set_mxid;
313
314         if (set_mxoff != -1)
315                 ControlFile.checkPointCopy.nextMultiOffset = set_mxoff;
316
317         if (minXlogTli > ControlFile.checkPointCopy.ThisTimeLineID)
318                 ControlFile.checkPointCopy.ThisTimeLineID = minXlogTli;
319
320         if (minXlogSegNo > newXlogSegNo)
321                 newXlogSegNo = minXlogSegNo;
322
323         /*
324          * If we had to guess anything, and -f was not given, just print the
325          * guessed values and exit.  Also print if -n is given.
326          */
327         if ((guessed && !force) || noupdate)
328         {
329                 PrintControlValues(guessed);
330                 if (!noupdate)
331                 {
332                         printf(_("\nIf these values seem acceptable, use -f to force reset.\n"));
333                         exit(1);
334                 }
335                 else
336                         exit(0);
337         }
338
339         /*
340          * Don't reset from a dirty pg_control without -f, either.
341          */
342         if (ControlFile.state != DB_SHUTDOWNED && !force)
343         {
344                 printf(_("The database server was not shut down cleanly.\n"
345                            "Resetting the transaction log might cause data to be lost.\n"
346                                  "If you want to proceed anyway, use -f to force reset.\n"));
347                 exit(1);
348         }
349
350         /*
351          * Else, do the dirty deed.
352          */
353         RewriteControlFile();
354         KillExistingXLOG();
355         KillExistingArchiveStatus();
356         WriteEmptyXLOG();
357
358         printf(_("Transaction log reset\n"));
359         return 0;
360 }
361
362
363 /*
364  * Try to read the existing pg_control file.
365  *
366  * This routine is also responsible for updating old pg_control versions
367  * to the current format.  (Currently we don't do anything of the sort.)
368  */
369 static bool
370 ReadControlFile(void)
371 {
372         int                     fd;
373         int                     len;
374         char       *buffer;
375         pg_crc32        crc;
376
377         if ((fd = open(XLOG_CONTROL_FILE, O_RDONLY | PG_BINARY, 0)) < 0)
378         {
379                 /*
380                  * If pg_control is not there at all, or we can't read it, the odds
381                  * are we've been handed a bad DataDir path, so give up. User can do
382                  * "touch pg_control" to force us to proceed.
383                  */
384                 fprintf(stderr, _("%s: could not open file \"%s\" for reading: %s\n"),
385                                 progname, XLOG_CONTROL_FILE, strerror(errno));
386                 if (errno == ENOENT)
387                         fprintf(stderr, _("If you are sure the data directory path is correct, execute\n"
388                                                           "  touch %s\n"
389                                                           "and try again.\n"),
390                                         XLOG_CONTROL_FILE);
391                 exit(1);
392         }
393
394         /* Use malloc to ensure we have a maxaligned buffer */
395         buffer = (char *) malloc(PG_CONTROL_SIZE);
396
397         len = read(fd, buffer, PG_CONTROL_SIZE);
398         if (len < 0)
399         {
400                 fprintf(stderr, _("%s: could not read file \"%s\": %s\n"),
401                                 progname, XLOG_CONTROL_FILE, strerror(errno));
402                 exit(1);
403         }
404         close(fd);
405
406         if (len >= sizeof(ControlFileData) &&
407           ((ControlFileData *) buffer)->pg_control_version == PG_CONTROL_VERSION)
408         {
409                 /* Check the CRC. */
410                 INIT_CRC32(crc);
411                 COMP_CRC32(crc,
412                                    buffer,
413                                    offsetof(ControlFileData, crc));
414                 FIN_CRC32(crc);
415
416                 if (EQ_CRC32(crc, ((ControlFileData *) buffer)->crc))
417                 {
418                         /* Valid data... */
419                         memcpy(&ControlFile, buffer, sizeof(ControlFile));
420                         return true;
421                 }
422
423                 fprintf(stderr, _("%s: pg_control exists but has invalid CRC; proceed with caution\n"),
424                                 progname);
425                 /* We will use the data anyway, but treat it as guessed. */
426                 memcpy(&ControlFile, buffer, sizeof(ControlFile));
427                 guessed = true;
428                 return true;
429         }
430
431         /* Looks like it's a mess. */
432         fprintf(stderr, _("%s: pg_control exists but is broken or unknown version; ignoring it\n"),
433                         progname);
434         return false;
435 }
436
437
438 /*
439  * Guess at pg_control values when we can't read the old ones.
440  */
441 static void
442 GuessControlValues(void)
443 {
444         uint64          sysidentifier;
445         struct timeval tv;
446
447         /*
448          * Set up a completely default set of pg_control values.
449          */
450         guessed = true;
451         memset(&ControlFile, 0, sizeof(ControlFile));
452
453         ControlFile.pg_control_version = PG_CONTROL_VERSION;
454         ControlFile.catalog_version_no = CATALOG_VERSION_NO;
455
456         /*
457          * Create a new unique installation identifier, since we can no longer use
458          * any old XLOG records.  See notes in xlog.c about the algorithm.
459          */
460         gettimeofday(&tv, NULL);
461         sysidentifier = ((uint64) tv.tv_sec) << 32;
462         sysidentifier |= (uint32) (tv.tv_sec | tv.tv_usec);
463
464         ControlFile.system_identifier = sysidentifier;
465
466         ControlFile.checkPointCopy.redo.xlogid = 0;
467         ControlFile.checkPointCopy.redo.xrecoff = SizeOfXLogLongPHD;
468         ControlFile.checkPointCopy.ThisTimeLineID = 1;
469         ControlFile.checkPointCopy.fullPageWrites = false;
470         ControlFile.checkPointCopy.nextXidEpoch = 0;
471         ControlFile.checkPointCopy.nextXid = FirstNormalTransactionId;
472         ControlFile.checkPointCopy.nextOid = FirstBootstrapObjectId;
473         ControlFile.checkPointCopy.nextMulti = FirstMultiXactId;
474         ControlFile.checkPointCopy.nextMultiOffset = 0;
475         ControlFile.checkPointCopy.oldestXid = FirstNormalTransactionId;
476         ControlFile.checkPointCopy.oldestXidDB = InvalidOid;
477         ControlFile.checkPointCopy.time = (pg_time_t) time(NULL);
478         ControlFile.checkPointCopy.oldestActiveXid = InvalidTransactionId;
479
480         ControlFile.state = DB_SHUTDOWNED;
481         ControlFile.time = (pg_time_t) time(NULL);
482         ControlFile.checkPoint = ControlFile.checkPointCopy.redo;
483
484         /* minRecoveryPoint, backupStartPoint and backupEndPoint can be left zero */
485
486         ControlFile.wal_level = WAL_LEVEL_MINIMAL;
487         ControlFile.MaxConnections = 100;
488         ControlFile.max_prepared_xacts = 0;
489         ControlFile.max_locks_per_xact = 64;
490
491         ControlFile.maxAlign = MAXIMUM_ALIGNOF;
492         ControlFile.floatFormat = FLOATFORMAT_VALUE;
493         ControlFile.blcksz = BLCKSZ;
494         ControlFile.relseg_size = RELSEG_SIZE;
495         ControlFile.xlog_blcksz = XLOG_BLCKSZ;
496         ControlFile.xlog_seg_size = XLOG_SEG_SIZE;
497         ControlFile.nameDataLen = NAMEDATALEN;
498         ControlFile.indexMaxKeys = INDEX_MAX_KEYS;
499         ControlFile.toast_max_chunk_size = TOAST_MAX_CHUNK_SIZE;
500 #ifdef HAVE_INT64_TIMESTAMP
501         ControlFile.enableIntTimes = true;
502 #else
503         ControlFile.enableIntTimes = false;
504 #endif
505         ControlFile.float4ByVal = FLOAT4PASSBYVAL;
506         ControlFile.float8ByVal = FLOAT8PASSBYVAL;
507
508         /*
509          * XXX eventually, should try to grovel through old XLOG to develop more
510          * accurate values for TimeLineID, nextXID, etc.
511          */
512 }
513
514
515 /*
516  * Print the guessed pg_control values when we had to guess.
517  *
518  * NB: this display should be just those fields that will not be
519  * reset by RewriteControlFile().
520  */
521 static void
522 PrintControlValues(bool guessed)
523 {
524         char            sysident_str[32];
525         char            fname[MAXFNAMELEN];
526
527         if (guessed)
528                 printf(_("Guessed pg_control values:\n\n"));
529         else
530                 printf(_("pg_control values:\n\n"));
531
532         /*
533          * Format system_identifier separately to keep platform-dependent format
534          * code out of the translatable message string.
535          */
536         snprintf(sysident_str, sizeof(sysident_str), UINT64_FORMAT,
537                          ControlFile.system_identifier);
538
539         XLogFileName(fname, ControlFile.checkPointCopy.ThisTimeLineID, newXlogSegNo);
540
541         printf(_("First log segment after reset:        %s\n"),
542                    fname);
543         printf(_("pg_control version number:            %u\n"),
544                    ControlFile.pg_control_version);
545         printf(_("Catalog version number:               %u\n"),
546                    ControlFile.catalog_version_no);
547         printf(_("Database system identifier:           %s\n"),
548                    sysident_str);
549         printf(_("Latest checkpoint's TimeLineID:       %u\n"),
550                    ControlFile.checkPointCopy.ThisTimeLineID);
551         printf(_("Latest checkpoint's full_page_writes: %s\n"),
552                    ControlFile.checkPointCopy.fullPageWrites ? _("on") : _("off"));
553         printf(_("Latest checkpoint's NextXID:          %u/%u\n"),
554                    ControlFile.checkPointCopy.nextXidEpoch,
555                    ControlFile.checkPointCopy.nextXid);
556         printf(_("Latest checkpoint's NextOID:          %u\n"),
557                    ControlFile.checkPointCopy.nextOid);
558         printf(_("Latest checkpoint's NextMultiXactId:  %u\n"),
559                    ControlFile.checkPointCopy.nextMulti);
560         printf(_("Latest checkpoint's NextMultiOffset:  %u\n"),
561                    ControlFile.checkPointCopy.nextMultiOffset);
562         printf(_("Latest checkpoint's oldestXID:        %u\n"),
563                    ControlFile.checkPointCopy.oldestXid);
564         printf(_("Latest checkpoint's oldestXID's DB:   %u\n"),
565                    ControlFile.checkPointCopy.oldestXidDB);
566         printf(_("Latest checkpoint's oldestActiveXID:  %u\n"),
567                    ControlFile.checkPointCopy.oldestActiveXid);
568         printf(_("Maximum data alignment:               %u\n"),
569                    ControlFile.maxAlign);
570         /* we don't print floatFormat since can't say much useful about it */
571         printf(_("Database block size:                  %u\n"),
572                    ControlFile.blcksz);
573         printf(_("Blocks per segment of large relation: %u\n"),
574                    ControlFile.relseg_size);
575         printf(_("WAL block size:                       %u\n"),
576                    ControlFile.xlog_blcksz);
577         printf(_("Bytes per WAL segment:                %u\n"),
578                    ControlFile.xlog_seg_size);
579         printf(_("Maximum length of identifiers:        %u\n"),
580                    ControlFile.nameDataLen);
581         printf(_("Maximum columns in an index:          %u\n"),
582                    ControlFile.indexMaxKeys);
583         printf(_("Maximum size of a TOAST chunk:        %u\n"),
584                    ControlFile.toast_max_chunk_size);
585         printf(_("Date/time type storage:               %s\n"),
586                    (ControlFile.enableIntTimes ? _("64-bit integers") : _("floating-point numbers")));
587         printf(_("Float4 argument passing:              %s\n"),
588                    (ControlFile.float4ByVal ? _("by value") : _("by reference")));
589         printf(_("Float8 argument passing:              %s\n"),
590                    (ControlFile.float8ByVal ? _("by value") : _("by reference")));
591 }
592
593
594 /*
595  * Write out the new pg_control file.
596  */
597 static void
598 RewriteControlFile(void)
599 {
600         int                     fd;
601         char            buffer[PG_CONTROL_SIZE];                /* need not be aligned */
602
603         /*
604          * Adjust fields as needed to force an empty XLOG starting at
605          * newXlogSegNo.
606          */
607         XLogSegNoOffsetToRecPtr(newXlogSegNo, SizeOfXLogLongPHD,
608                                                         ControlFile.checkPointCopy.redo);
609         ControlFile.checkPointCopy.time = (pg_time_t) time(NULL);
610
611         ControlFile.state = DB_SHUTDOWNED;
612         ControlFile.time = (pg_time_t) time(NULL);
613         ControlFile.checkPoint = ControlFile.checkPointCopy.redo;
614         ControlFile.prevCheckPoint.xlogid = 0;
615         ControlFile.prevCheckPoint.xrecoff = 0;
616         ControlFile.minRecoveryPoint.xlogid = 0;
617         ControlFile.minRecoveryPoint.xrecoff = 0;
618         ControlFile.backupStartPoint.xlogid = 0;
619         ControlFile.backupStartPoint.xrecoff = 0;
620         ControlFile.backupEndPoint.xlogid = 0;
621         ControlFile.backupEndPoint.xrecoff = 0;
622         ControlFile.backupEndRequired = false;
623
624         /*
625          * Force the defaults for max_* settings. The values don't really matter
626          * as long as wal_level='minimal'; the postmaster will reset these fields
627          * anyway at startup.
628          */
629         ControlFile.wal_level = WAL_LEVEL_MINIMAL;
630         ControlFile.MaxConnections = 100;
631         ControlFile.max_prepared_xacts = 0;
632         ControlFile.max_locks_per_xact = 64;
633
634         /* Now we can force the recorded xlog seg size to the right thing. */
635         ControlFile.xlog_seg_size = XLogSegSize;
636
637         /* Contents are protected with a CRC */
638         INIT_CRC32(ControlFile.crc);
639         COMP_CRC32(ControlFile.crc,
640                            (char *) &ControlFile,
641                            offsetof(ControlFileData, crc));
642         FIN_CRC32(ControlFile.crc);
643
644         /*
645          * We write out PG_CONTROL_SIZE bytes into pg_control, zero-padding the
646          * excess over sizeof(ControlFileData).  This reduces the odds of
647          * premature-EOF errors when reading pg_control.  We'll still fail when we
648          * check the contents of the file, but hopefully with a more specific
649          * error than "couldn't read pg_control".
650          */
651         if (sizeof(ControlFileData) > PG_CONTROL_SIZE)
652         {
653                 fprintf(stderr,
654                                 _("%s: internal error -- sizeof(ControlFileData) is too large ... fix PG_CONTROL_SIZE\n"),
655                                 progname);
656                 exit(1);
657         }
658
659         memset(buffer, 0, PG_CONTROL_SIZE);
660         memcpy(buffer, &ControlFile, sizeof(ControlFileData));
661
662         unlink(XLOG_CONTROL_FILE);
663
664         fd = open(XLOG_CONTROL_FILE,
665                           O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
666                           S_IRUSR | S_IWUSR);
667         if (fd < 0)
668         {
669                 fprintf(stderr, _("%s: could not create pg_control file: %s\n"),
670                                 progname, strerror(errno));
671                 exit(1);
672         }
673
674         errno = 0;
675         if (write(fd, buffer, PG_CONTROL_SIZE) != PG_CONTROL_SIZE)
676         {
677                 /* if write didn't set errno, assume problem is no disk space */
678                 if (errno == 0)
679                         errno = ENOSPC;
680                 fprintf(stderr, _("%s: could not write pg_control file: %s\n"),
681                                 progname, strerror(errno));
682                 exit(1);
683         }
684
685         if (fsync(fd) != 0)
686         {
687                 fprintf(stderr, _("%s: fsync error: %s\n"), progname, strerror(errno));
688                 exit(1);
689         }
690
691         close(fd);
692 }
693
694
695 /*
696  * Scan existing XLOG files and determine the highest existing WAL address
697  *
698  * On entry, ControlFile.checkPointCopy.redo and ControlFile.xlog_seg_size
699  * are assumed valid (note that we allow the old xlog seg size to differ
700  * from what we're using).  On exit, newXlogId and newXlogSeg are set to
701  * suitable values for the beginning of replacement WAL (in our seg size).
702  */
703 static void
704 FindEndOfXLOG(void)
705 {
706         DIR                *xldir;
707         struct dirent *xlde;
708         uint64          segs_per_xlogid;
709         uint64          xlogbytepos;
710
711         /*
712          * Initialize the max() computation using the last checkpoint address from
713          * old pg_control.      Note that for the moment we are working with segment
714          * numbering according to the old xlog seg size.
715          */
716         segs_per_xlogid = (0x100000000L / ControlFile.xlog_seg_size);
717         newXlogSegNo = ((uint64) ControlFile.checkPointCopy.redo.xlogid) * segs_per_xlogid
718                 + (ControlFile.checkPointCopy.redo.xrecoff / ControlFile.xlog_seg_size);
719
720         /*
721          * Scan the pg_xlog directory to find existing WAL segment files. We
722          * assume any present have been used; in most scenarios this should be
723          * conservative, because of xlog.c's attempts to pre-create files.
724          */
725         xldir = opendir(XLOGDIR);
726         if (xldir == NULL)
727         {
728                 fprintf(stderr, _("%s: could not open directory \"%s\": %s\n"),
729                                 progname, XLOGDIR, strerror(errno));
730                 exit(1);
731         }
732
733         errno = 0;
734         while ((xlde = readdir(xldir)) != NULL)
735         {
736                 if (strlen(xlde->d_name) == 24 &&
737                         strspn(xlde->d_name, "0123456789ABCDEF") == 24)
738                 {
739                         unsigned int tli,
740                                                 log,
741                                                 seg;
742                         XLogSegNo       segno;
743
744                         sscanf(xlde->d_name, "%08X%08X%08X", &tli, &log, &seg);
745                         segno = ((uint64) log) * segs_per_xlogid + seg;
746
747                         /*
748                          * Note: we take the max of all files found, regardless of their
749                          * timelines.  Another possibility would be to ignore files of
750                          * timelines other than the target TLI, but this seems safer.
751                          * Better too large a result than too small...
752                          */
753                         if (segno > newXlogSegNo)
754                                 newXlogSegNo = segno;
755                 }
756                 errno = 0;
757         }
758 #ifdef WIN32
759
760         /*
761          * This fix is in mingw cvs (runtime/mingwex/dirent.c rev 1.4), but not in
762          * released version
763          */
764         if (GetLastError() == ERROR_NO_MORE_FILES)
765                 errno = 0;
766 #endif
767
768         if (errno)
769         {
770                 fprintf(stderr, _("%s: could not read from directory \"%s\": %s\n"),
771                                 progname, XLOGDIR, strerror(errno));
772                 exit(1);
773         }
774         closedir(xldir);
775
776         /*
777          * Finally, convert to new xlog seg size, and advance by one to ensure we
778          * are in virgin territory.
779          */
780         xlogbytepos = newXlogSegNo * ControlFile.xlog_seg_size;
781         newXlogSegNo = (xlogbytepos + XLogSegSize - 1) / XLogSegSize;
782         newXlogSegNo++;
783 }
784
785
786 /*
787  * Remove existing XLOG files
788  */
789 static void
790 KillExistingXLOG(void)
791 {
792         DIR                *xldir;
793         struct dirent *xlde;
794         char            path[MAXPGPATH];
795
796         xldir = opendir(XLOGDIR);
797         if (xldir == NULL)
798         {
799                 fprintf(stderr, _("%s: could not open directory \"%s\": %s\n"),
800                                 progname, XLOGDIR, strerror(errno));
801                 exit(1);
802         }
803
804         errno = 0;
805         while ((xlde = readdir(xldir)) != NULL)
806         {
807                 if (strlen(xlde->d_name) == 24 &&
808                         strspn(xlde->d_name, "0123456789ABCDEF") == 24)
809                 {
810                         snprintf(path, MAXPGPATH, "%s/%s", XLOGDIR, xlde->d_name);
811                         if (unlink(path) < 0)
812                         {
813                                 fprintf(stderr, _("%s: could not delete file \"%s\": %s\n"),
814                                                 progname, path, strerror(errno));
815                                 exit(1);
816                         }
817                 }
818                 errno = 0;
819         }
820 #ifdef WIN32
821
822         /*
823          * This fix is in mingw cvs (runtime/mingwex/dirent.c rev 1.4), but not in
824          * released version
825          */
826         if (GetLastError() == ERROR_NO_MORE_FILES)
827                 errno = 0;
828 #endif
829
830         if (errno)
831         {
832                 fprintf(stderr, _("%s: could not read from directory \"%s\": %s\n"),
833                                 progname, XLOGDIR, strerror(errno));
834                 exit(1);
835         }
836         closedir(xldir);
837 }
838
839
840 /*
841  * Remove existing archive status files
842  */
843 static void
844 KillExistingArchiveStatus(void)
845 {
846         DIR                *xldir;
847         struct dirent *xlde;
848         char            path[MAXPGPATH];
849
850 #define ARCHSTATDIR XLOGDIR "/archive_status"
851
852         xldir = opendir(ARCHSTATDIR);
853         if (xldir == NULL)
854         {
855                 fprintf(stderr, _("%s: could not open directory \"%s\": %s\n"),
856                                 progname, ARCHSTATDIR, strerror(errno));
857                 exit(1);
858         }
859
860         errno = 0;
861         while ((xlde = readdir(xldir)) != NULL)
862         {
863                 if (strspn(xlde->d_name, "0123456789ABCDEF") == 24 &&
864                         (strcmp(xlde->d_name + 24, ".ready") == 0 ||
865                          strcmp(xlde->d_name + 24, ".done") == 0))
866                 {
867                         snprintf(path, MAXPGPATH, "%s/%s", ARCHSTATDIR, xlde->d_name);
868                         if (unlink(path) < 0)
869                         {
870                                 fprintf(stderr, _("%s: could not delete file \"%s\": %s\n"),
871                                                 progname, path, strerror(errno));
872                                 exit(1);
873                         }
874                 }
875                 errno = 0;
876         }
877 #ifdef WIN32
878
879         /*
880          * This fix is in mingw cvs (runtime/mingwex/dirent.c rev 1.4), but not in
881          * released version
882          */
883         if (GetLastError() == ERROR_NO_MORE_FILES)
884                 errno = 0;
885 #endif
886
887         if (errno)
888         {
889                 fprintf(stderr, _("%s: could not read from directory \"%s\": %s\n"),
890                                 progname, ARCHSTATDIR, strerror(errno));
891                 exit(1);
892         }
893         closedir(xldir);
894 }
895
896
897 /*
898  * Write an empty XLOG file, containing only the checkpoint record
899  * already set up in ControlFile.
900  */
901 static void
902 WriteEmptyXLOG(void)
903 {
904         char       *buffer;
905         XLogPageHeader page;
906         XLogLongPageHeader longpage;
907         XLogRecord *record;
908         pg_crc32        crc;
909         char            path[MAXPGPATH];
910         int                     fd;
911         int                     nbytes;
912
913         /* Use malloc() to ensure buffer is MAXALIGNED */
914         buffer = (char *) malloc(XLOG_BLCKSZ);
915         page = (XLogPageHeader) buffer;
916         memset(buffer, 0, XLOG_BLCKSZ);
917
918         /* Set up the XLOG page header */
919         page->xlp_magic = XLOG_PAGE_MAGIC;
920         page->xlp_info = XLP_LONG_HEADER;
921         page->xlp_tli = ControlFile.checkPointCopy.ThisTimeLineID;
922         page->xlp_pageaddr.xlogid =
923                 ControlFile.checkPointCopy.redo.xlogid;
924         page->xlp_pageaddr.xrecoff =
925                 ControlFile.checkPointCopy.redo.xrecoff - SizeOfXLogLongPHD;
926         longpage = (XLogLongPageHeader) page;
927         longpage->xlp_sysid = ControlFile.system_identifier;
928         longpage->xlp_seg_size = XLogSegSize;
929         longpage->xlp_xlog_blcksz = XLOG_BLCKSZ;
930
931         /* Insert the initial checkpoint record */
932         record = (XLogRecord *) ((char *) page + SizeOfXLogLongPHD);
933         record->xl_prev.xlogid = 0;
934         record->xl_prev.xrecoff = 0;
935         record->xl_xid = InvalidTransactionId;
936         record->xl_tot_len = SizeOfXLogRecord + sizeof(CheckPoint);
937         record->xl_len = sizeof(CheckPoint);
938         record->xl_info = XLOG_CHECKPOINT_SHUTDOWN;
939         record->xl_rmid = RM_XLOG_ID;
940         memcpy(XLogRecGetData(record), &ControlFile.checkPointCopy,
941                    sizeof(CheckPoint));
942
943         INIT_CRC32(crc);
944         COMP_CRC32(crc, &ControlFile.checkPointCopy, sizeof(CheckPoint));
945         COMP_CRC32(crc, (char *) record + sizeof(pg_crc32),
946                            SizeOfXLogRecord - sizeof(pg_crc32));
947         FIN_CRC32(crc);
948         record->xl_crc = crc;
949
950         /* Write the first page */
951         XLogFilePath(path, ControlFile.checkPointCopy.ThisTimeLineID, newXlogSegNo);
952
953         unlink(path);
954
955         fd = open(path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
956                           S_IRUSR | S_IWUSR);
957         if (fd < 0)
958         {
959                 fprintf(stderr, _("%s: could not open file \"%s\": %s\n"),
960                                 progname, path, strerror(errno));
961                 exit(1);
962         }
963
964         errno = 0;
965         if (write(fd, buffer, XLOG_BLCKSZ) != XLOG_BLCKSZ)
966         {
967                 /* if write didn't set errno, assume problem is no disk space */
968                 if (errno == 0)
969                         errno = ENOSPC;
970                 fprintf(stderr, _("%s: could not write file \"%s\": %s\n"),
971                                 progname, path, strerror(errno));
972                 exit(1);
973         }
974
975         /* Fill the rest of the file with zeroes */
976         memset(buffer, 0, XLOG_BLCKSZ);
977         for (nbytes = XLOG_BLCKSZ; nbytes < XLogSegSize; nbytes += XLOG_BLCKSZ)
978         {
979                 errno = 0;
980                 if (write(fd, buffer, XLOG_BLCKSZ) != XLOG_BLCKSZ)
981                 {
982                         if (errno == 0)
983                                 errno = ENOSPC;
984                         fprintf(stderr, _("%s: could not write file \"%s\": %s\n"),
985                                         progname, path, strerror(errno));
986                         exit(1);
987                 }
988         }
989
990         if (fsync(fd) != 0)
991         {
992                 fprintf(stderr, _("%s: fsync error: %s\n"), progname, strerror(errno));
993                 exit(1);
994         }
995
996         close(fd);
997 }
998
999
1000 static void
1001 usage(void)
1002 {
1003         printf(_("%s resets the PostgreSQL transaction log.\n\n"), progname);
1004         printf(_("Usage:\n  %s [OPTION]... DATADIR\n\n"), progname);
1005         printf(_("Options:\n"));
1006         printf(_("  -e XIDEPOCH      set next transaction ID epoch\n"));
1007         printf(_("  -f               force update to be done\n"));
1008         printf(_("  -l TLI,FILE,SEG  force minimum WAL starting location for new transaction log\n"));
1009         printf(_("  -m XID           set next multitransaction ID\n"));
1010         printf(_("  -n               no update, just show extracted control values (for testing)\n"));
1011         printf(_("  -o OID           set next OID\n"));
1012         printf(_("  -O OFFSET        set next multitransaction offset\n"));
1013         printf(_("  -V, --version    output version information, then exit\n"));
1014         printf(_("  -x XID           set next transaction ID\n"));
1015         printf(_("  -?, --help       show this help, then exit\n"));
1016         printf(_("\nReport bugs to <pgsql-bugs@postgresql.org>.\n"));
1017 }