]> granicus.if.org Git - postgresql/blob - src/bin/pg_resetxlog/pg_resetxlog.c
In pg_resetxlog.c, uint -> uint32, for Win32 port.
[postgresql] / src / bin / pg_resetxlog / pg_resetxlog.c
1 /*-------------------------------------------------------------------------
2  *
3  * pg_resetxlog.c
4  *        A utility to "zero out" the xlog when it's corrupt beyond recovery.
5  *        Can also rebuild pg_control if needed.
6  *
7  * The theory of reset operation is fairly simple:
8  *        1. Read the existing pg_control (which will include the last
9  *               checkpoint record).  If it is an old format then update to
10  *               current format.
11  *        2. If pg_control is corrupt, attempt to rebuild the values,
12  *               by scanning the old xlog; if it fail then try to guess it.
13  *        3. Modify pg_control to reflect a "shutdown" state with a checkpoint
14  *               record at the start of xlog.
15  *        4. Flush the existing xlog files and write a new segment with
16  *               just a checkpoint record in it.  The new segment is positioned
17  *               just past the end of the old xlog, so that existing LSNs in
18  *               data pages will appear to be "in the past".
19  *
20  * The algorithm of restoring the pg_control value from old xlog file:
21  *      1. Retrieve all of the active xlog files from xlog direcotry into a list 
22  *         by increasing order, according their timeline, log id, segment id.
23  *      2. Search the list to find the oldest xlog file of the lastest time line.
24  *      3. Search the records from the oldest xlog file of latest time line
25  *         to the latest xlog file of latest time line, if the checkpoint record
26  *         has been found, update the latest checkpoint and previous checkpoint.
27  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
28  * Portions Copyright (c) 1994, Regents of the University of California
29  *
30  *
31  *-------------------------------------------------------------------------
32  */
33 #include "postgres.h"
34
35 #include <dirent.h>
36 #include <fcntl.h>
37 #include <locale.h>
38 #include <sys/stat.h>
39 #include <sys/time.h>
40 #include <time.h>
41 #include <unistd.h>
42 #ifdef HAVE_GETOPT_H
43 #include <getopt.h>
44 #endif
45
46 #include "access/multixact.h"
47 #include "access/xlog.h"
48 #include "access/xlog_internal.h"
49 #include "catalog/catversion.h"
50 #include "catalog/pg_control.h"
51
52 #define GUESS   0
53 #define WAL     1
54
55 extern int      optind;
56 extern char *optarg;
57
58
59 static ControlFileData ControlFile;             /* pg_control values */
60 static uint32 newXlogId,
61                         newXlogSeg;                     /* ID/Segment of new XLOG segment */
62 static const char *progname;
63 static uint64           sysidentifier=-1;
64
65 /* 
66  * We use a list to store the active xlog files we had found in the 
67  * xlog directory in increasing order according the time line, logid, 
68  * segment id.
69  * 
70  */
71 typedef struct XLogFileName {
72         TimeLineID tli; 
73         uint32 logid; 
74         uint32 seg;
75         char fname[256];
76         struct XLogFileName *next;
77 }       XLogFileName;
78
79 /* The list head */
80 static XLogFileName *xlogfilelist=NULL;
81
82 /* LastXLogfile is the latest file in the latest time line, 
83    CurXLogfile is the oldest file in the lastest time line
84    */
85 static XLogFileName *CurXLogFile, *LastXLogFile; 
86
87 /* The last checkpoint found in xlog file.*/
88 static CheckPoint      lastcheckpoint;
89
90 /* The last and previous checkpoint pointers found in xlog file.*/
91 static XLogRecPtr       prevchkp, lastchkp; 
92
93 /* the database state.*/
94 static DBState  state=DB_SHUTDOWNED; 
95
96 /* the total checkpoint numbers which had been found in the xlog file.*/
97 static int              found_checkpoint=0;     
98
99
100 static bool ReadControlFile(void);
101 static bool RestoreControlValues(int mode);
102 static void PrintControlValues(void);
103 static void UpdateCtlFile4Reset(void);
104 static void RewriteControlFile(void);
105 static void KillExistingXLOG(void);
106 static void WriteEmptyXLOG(void);
107 static void usage(void);
108
109 static void GetXLogFiles(void);
110 static bool ValidXLogFileName(char * fname);
111 static bool ValidXLogFileHeader(XLogFileName *segfile);
112 static bool ValidXLOGPageHeader(XLogPageHeader hdr, uint32 tli, uint32 id, uint32 seg);
113 static bool CmpXLogFileOT(XLogFileName * f1, XLogFileName *f2);
114 static bool IsNextSeg(XLogFileName *prev, XLogFileName *cur);
115 static void InsertXLogFile( char * fname );
116 static bool ReadXLogPage(void);
117 static bool RecordIsValid(XLogRecord *record, XLogRecPtr recptr);
118 static bool FetchRecord(void);
119 static void UpdateCheckPoint(XLogRecord *record);
120 static void SelectStartXLog(void);
121 static int SearchLastCheckpoint(void);
122 static int OpenXLogFile(XLogFileName *sf);
123 static void CleanUpList(XLogFileName *list);
124
125 int
126 main(int argc, char *argv[])
127 {
128         int                     c;
129         bool            force = false;
130         bool            restore = false;
131         bool            noupdate = false;
132         TransactionId set_xid = 0;
133         Oid                     set_oid = 0;
134         MultiXactId set_mxid = 0;
135         MultiXactOffset set_mxoff = -1;
136         uint32          minXlogTli = 0,
137                                 minXlogId = 0,
138                                 minXlogSeg = 0;
139         char       *endptr;
140         char       *endptr2;
141         char       *endptr3;
142         char       *DataDir;
143         int                     fd;
144         char            path[MAXPGPATH];
145         bool            ctlcorrupted = false;
146         bool            PidLocked = false;
147         
148         set_pglocale_pgservice(argv[0], "pg_resetxlog");
149
150         progname = get_progname(argv[0]);
151
152         if (argc > 1)
153         {
154                 if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
155                 {
156                         usage();
157                         exit(0);
158                 }
159                 if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
160                 {
161                         puts("pg_resetxlog (PostgreSQL) " PG_VERSION);
162                         exit(0);
163                 }
164         }
165
166
167         while ((c = getopt(argc, argv, "fl:m:no:O:x:r")) != -1)
168         {
169                 switch (c)
170                 {
171                         case 'f':
172                                 force = true;
173                                 break;
174                                 
175                         case 'r':
176                                 restore = true;
177                                 break;
178                                 
179                         case 'n':
180                                 noupdate = true;
181                                 break;
182
183                         case 'x':
184                                 set_xid = strtoul(optarg, &endptr, 0);
185                                 if (endptr == optarg || *endptr != '\0')
186                                 {
187                                         fprintf(stderr, _("%s: invalid argument for option -x\n"), progname);
188                                         fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
189                                         exit(1);
190                                 }
191                                 if (set_xid == 0)
192                                 {
193                                         fprintf(stderr, _("%s: transaction ID (-x) must not be 0\n"), progname);
194                                         exit(1);
195                                 }
196                                 break;
197
198                         case 'o':
199                                 set_oid = strtoul(optarg, &endptr, 0);
200                                 if (endptr == optarg || *endptr != '\0')
201                                 {
202                                         fprintf(stderr, _("%s: invalid argument for option -o\n"), progname);
203                                         fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
204                                         exit(1);
205                                 }
206                                 if (set_oid == 0)
207                                 {
208                                         fprintf(stderr, _("%s: OID (-o) must not be 0\n"), progname);
209                                         exit(1);
210                                 }
211                                 break;
212
213                         case 'm':
214                                 set_mxid = strtoul(optarg, &endptr, 0);
215                                 if (endptr == optarg || *endptr != '\0')
216                                 {
217                                         fprintf(stderr, _("%s: invalid argument for option -m\n"), progname);
218                                         fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
219                                         exit(1);
220                                 }
221                                 if (set_mxid == 0)
222                                 {
223                                         fprintf(stderr, _("%s: multitransaction ID (-m) must not be 0\n"), progname);
224                                         exit(1);
225                                 }
226                                 break;
227
228                         case 'O':
229                                 set_mxoff = strtoul(optarg, &endptr, 0);
230                                 if (endptr == optarg || *endptr != '\0')
231                                 {
232                                         fprintf(stderr, _("%s: invalid argument for option -O\n"), progname);
233                                         fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
234                                         exit(1);
235                                 }
236                                 if (set_mxoff == -1)
237                                 {
238                                         fprintf(stderr, _("%s: multitransaction offset (-O) must not be -1\n"), progname);
239                                         exit(1);
240                                 }
241                                 break;
242
243                         case 'l':
244                                 minXlogTli = strtoul(optarg, &endptr, 0);
245                                 if (endptr == optarg || *endptr != ',')
246                                 {
247                                         fprintf(stderr, _("%s: invalid argument for option -l\n"), progname);
248                                         fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
249                                         exit(1);
250                                 }
251                                 minXlogId = strtoul(endptr + 1, &endptr2, 0);
252                                 if (endptr2 == endptr + 1 || *endptr2 != ',')
253                                 {
254                                         fprintf(stderr, _("%s: invalid argument for option -l\n"), progname);
255                                         fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
256                                         exit(1);
257                                 }
258                                 minXlogSeg = strtoul(endptr2 + 1, &endptr3, 0);
259                                 if (endptr3 == endptr2 + 1 || *endptr3 != '\0')
260                                 {
261                                         fprintf(stderr, _("%s: invalid argument for option -l\n"), progname);
262                                         fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
263                                         exit(1);
264                                 }
265                                 break;
266
267                         default:
268                                 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
269                                 exit(1);
270                 }
271         }
272
273         if (optind == argc)
274         {
275                 fprintf(stderr, _("%s: no data directory specified\n"), progname);
276                 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
277                 exit(1);
278         }
279
280         /*
281          * Don't allow pg_resetxlog to be run as root, to avoid overwriting the
282          * ownership of files in the data directory. We need only check for root
283          * -- any other user won't have sufficient permissions to modify files in
284          * the data directory.
285          */
286 #ifndef WIN32
287         if (geteuid() == 0)
288         {
289                 fprintf(stderr, _("%s: cannot be executed by \"root\"\n"),
290                                 progname);
291                 fprintf(stderr, _("You must run %s as the PostgreSQL superuser.\n"),
292                                 progname);
293                 exit(1);
294         }
295 #endif
296
297         DataDir = argv[optind];
298
299         if (chdir(DataDir) < 0)
300         {
301                 fprintf(stderr, _("%s: could not change directory to \"%s\": %s\n"),
302                                 progname, DataDir, strerror(errno));
303                 exit(1);
304         }
305
306         /*
307          * Check for a postmaster lock file --- if there is one, refuse to
308          * proceed, on grounds we might be interfering with a live installation.
309          */
310         snprintf(path, MAXPGPATH, "%s/postmaster.pid", DataDir);
311
312         if ((fd = open(path, O_RDONLY)) < 0)
313         {
314                 if (errno != ENOENT)
315                 {
316                         fprintf(stderr, _("%s: could not open file \"%s\" for reading: %s\n"), progname, path, strerror(errno));
317                         exit(1);
318                 }
319         }
320         else
321         {
322                 PidLocked = true;
323         }
324
325         /*
326          * Attempt to read the existing pg_control file
327          */
328         if (!ReadControlFile())
329         {
330                 /* The control file has been corruptted.*/
331                 ctlcorrupted = true;
332         }
333
334         /*
335          * Adjust fields if required by switches.  (Do this now so that printout,
336          * if any, includes these values.)
337          */
338         if (set_xid != 0)
339                 ControlFile.checkPointCopy.nextXid = set_xid;
340
341         if (set_oid != 0)
342                 ControlFile.checkPointCopy.nextOid = set_oid;
343
344         if (set_mxid != 0)
345                 ControlFile.checkPointCopy.nextMulti = set_mxid;
346
347         if (set_mxoff != -1)
348                 ControlFile.checkPointCopy.nextMultiOffset = set_mxoff;
349
350         if (minXlogTli > ControlFile.checkPointCopy.ThisTimeLineID)
351                 ControlFile.checkPointCopy.ThisTimeLineID = minXlogTli;
352
353         if (minXlogId > ControlFile.logId ||
354                 (minXlogId == ControlFile.logId &&
355                  minXlogSeg > ControlFile.logSeg))
356         {
357                 ControlFile.logId = minXlogId;
358                 ControlFile.logSeg = minXlogSeg;
359         }
360
361         /* retore the broken control file from WAL file.*/
362         if (restore)
363         {
364
365                 /* If the control fine is fine, don't touch it.*/
366                 if ( !ctlcorrupted )
367                 {
368                         printf(_("\nThe control file seems fine, not need to restore it.\n"));
369                         printf(_("If you want to restore it anyway, use -f option, but this also will reset the log file.\n"));
370                         exit(0);
371                 }
372                 
373                 
374                 /* Try to restore control values from old xlog file, or complain it.*/
375                 if (RestoreControlValues(WAL))
376                 {
377                         /* Success in restoring the checkpoint information from old xlog file.*/
378                         
379                         /* Print it out.*/
380                         PrintControlValues();
381
382                         /* In case the postmaster is crashed.
383                          * But it may be dangerous for the living one.
384                          * It may need a more good way.
385                          */
386                         if (PidLocked)
387                         {
388                                 ControlFile.state = DB_IN_PRODUCTION;
389                         }
390                         /* Write the new control file. */
391                         RewriteControlFile();
392                         printf(_("\nThe control file had been restored.\n"));
393                 } 
394                 else 
395                 { 
396                         /* Fail in restoring the checkpoint information from old xlog file. */
397                         printf(_("\nCan not restore the control file from XLog file..\n"));
398                         printf(_("\nIf you want to restore it anyway, use -f option to guess the information, but this also will reset the log file.\n"));
399                 }
400
401                 exit(0);
402                 
403         }       
404         if (PidLocked)
405         {  
406                 fprintf(stderr, _("%s: lock file \"%s\" exists\n"
407                                                   "Is a server running?  If not, delete the lock file and try again.\n"),
408                                 progname, path);
409                 exit(1);
410
411         }
412         /*
413         * Print out the values in control file if -n is given. if the control file is 
414         * corrupted, then inform user to restore it first.
415          */
416         if (noupdate)
417         {
418                 if (!ctlcorrupted)
419                 {
420                         /* The control file is fine, print the values out.*/
421                         PrintControlValues();
422                         exit(0);
423                 }
424                 else{
425                         /* The control file is corrupted.*/
426                         printf(_("The control file had been corrupted.\n"));
427                         printf(_("Please use -r option to restore it first.\n"));
428                         exit(1);
429                         }
430         }
431
432         /*
433          * Don't reset from a dirty pg_control without -f, either.
434          */
435         if (ControlFile.state != DB_SHUTDOWNED && !force && !ctlcorrupted)
436         {
437                 printf(_("The database server was not shut down cleanly.\n"
438                                  "Resetting the transaction log may cause data to be lost.\n"
439                                  "If you want to proceed anyway, use -f to force reset.\n"));
440                 exit(1);
441         }
442
443 /*
444          * Try to reset the xlog file.
445          */
446          
447         /* If the control file is corrupted, and -f option is given, resotre it first.*/
448         if ( ctlcorrupted )
449         {
450                 if (force)
451                 {
452                         if (!RestoreControlValues(WAL))
453                         {
454                                 printf(_("fails to recover the control file from old xlog files, so we had to guess it.\n"));
455                                 RestoreControlValues(GUESS);
456                         }
457                         printf(_("Restored the control file from old xlog files.\n"));
458                 }
459                 else
460                 {
461                         printf(_("Control file corrupted.\nIf you want to proceed anyway, use -f to force reset.\n"));
462                         exit(1);
463                         }
464         } 
465         
466         /* Reset the xlog fille.*/
467         UpdateCtlFile4Reset();
468         RewriteControlFile();
469         KillExistingXLOG();
470         WriteEmptyXLOG();
471         printf(_("Transaction log reset\n"));   
472         return 0;
473 }
474
475
476 /*
477  * Try to read the existing pg_control file.
478  *
479  * This routine is also responsible for updating old pg_control versions
480  * to the current format.  (Currently we don't do anything of the sort.)
481  */
482 static bool
483 ReadControlFile(void)
484 {
485         int                     fd;
486         int                     len;
487         char       *buffer;
488         pg_crc32        crc;
489
490         if ((fd = open(XLOG_CONTROL_FILE, O_RDONLY)) < 0)
491         {
492                 /*
493                  * If pg_control is not there at all, or we can't read it, the odds
494                  * are we've been handed a bad DataDir path, so give up. User can do
495                  * "touch pg_control" to force us to proceed.
496                  */
497                 fprintf(stderr, _("%s: could not open file \"%s\" for reading: %s\n"),
498                                 progname, XLOG_CONTROL_FILE, strerror(errno));
499                 if (errno == ENOENT)
500                         fprintf(stderr, _("If you are sure the data directory path is correct, execute\n"
501                                                           "  touch %s\n"
502                                                           "and try again.\n"),
503                                         XLOG_CONTROL_FILE);
504                 exit(1);
505         }
506
507         /* Use malloc to ensure we have a maxaligned buffer */
508         buffer = (char *) malloc(PG_CONTROL_SIZE);
509
510         len = read(fd, buffer, PG_CONTROL_SIZE);
511         if (len < 0)
512         {
513                 fprintf(stderr, _("%s: could not read file \"%s\": %s\n"),
514                                 progname, XLOG_CONTROL_FILE, strerror(errno));
515                 exit(1);
516         }
517         close(fd);
518
519         if (len >= sizeof(ControlFileData) &&
520           ((ControlFileData *) buffer)->pg_control_version == PG_CONTROL_VERSION)
521         {
522                 /* Check the CRC. */
523                 INIT_CRC32(crc);
524                 COMP_CRC32(crc,
525                                    buffer,
526                                    offsetof(ControlFileData, crc));
527                 FIN_CRC32(crc);
528
529                 if (EQ_CRC32(crc, ((ControlFileData *) buffer)->crc))
530                 {
531                         /* Valid data... */
532                         memcpy(&ControlFile, buffer, sizeof(ControlFile));
533                         return true;
534                 }
535
536                 fprintf(stderr, _("%s: pg_control exists but has invalid CRC; proceed with caution\n"),
537                                 progname);
538                 /* We will use the data anyway, but treat it as guessed. */
539                 memcpy(&ControlFile, buffer, sizeof(ControlFile));
540                 return true;
541         }
542
543         /* Looks like it's a mess. */
544         fprintf(stderr, _("%s: pg_control exists but is broken or unknown version; ignoring it\n"),
545                         progname);
546         return false;
547 }
548
549
550
551
552 /*
553  *  Restore the pg_control values by scanning old xlog files or by guessing it.
554  *
555  * Input parameter:
556  *      WAL:  Restore the pg_control values by scanning old xlog files.
557  *      GUESS: Restore the pg_control values by guessing.
558  * Return:
559  *      TRUE: success in restoring.
560  *      FALSE: fail to restore the values. 
561  * 
562  */
563 static bool 
564 RestoreControlValues(int mode)
565 {
566         struct timeval tv;
567         char       *localeptr;
568         bool    successed = true;
569
570         /*
571          * Set up a completely default set of pg_control values.
572          */
573         memset(&ControlFile, 0, sizeof(ControlFile));
574
575         ControlFile.pg_control_version = PG_CONTROL_VERSION;
576         ControlFile.catalog_version_no = CATALOG_VERSION_NO;
577
578         /* 
579          * update the checkpoint value in control file,by searching 
580          * xlog segment file, or just guessing it.
581          */
582         if (mode == WAL)
583         {
584                 int result = SearchLastCheckpoint();
585
586                 if (result > 0) /* The last checkpoint had been found. */
587                 {
588                         ControlFile.checkPointCopy = lastcheckpoint;
589                         ControlFile.checkPointCopy.ThisTimeLineID = LastXLogFile->tli;
590                         ControlFile.checkPoint = lastchkp;
591                         ControlFile.prevCheckPoint = prevchkp;
592
593                         ControlFile.logId = LastXLogFile->logid;
594                         ControlFile.logSeg = LastXLogFile->seg + 1;
595                         ControlFile.state = state;
596                 }
597                 else
598                         successed = false;
599                 
600                 /* Clean up the list. */
601                 CleanUpList(xlogfilelist);              
602         }
603         else    /* GUESS */
604         {
605                 ControlFile.checkPointCopy.ThisTimeLineID = 2;
606                 ControlFile.checkPointCopy.redo.xlogid = 0;
607                 ControlFile.checkPointCopy.redo.xrecoff = SizeOfXLogLongPHD;
608                 ControlFile.checkPointCopy.undo = ControlFile.checkPointCopy.redo;
609                 ControlFile.checkPointCopy.nextXid = (TransactionId) 514;       /* XXX */
610                 ControlFile.checkPointCopy.nextOid = FirstBootstrapObjectId;
611                 ControlFile.checkPointCopy.nextMulti = FirstMultiXactId;
612                 ControlFile.checkPointCopy.nextMultiOffset = 0;
613                 ControlFile.checkPointCopy.time = time(NULL);
614                 ControlFile.checkPoint = ControlFile.checkPointCopy.redo;
615
616                 /*
617                  * Create a new unique installation identifier, since we can no longer
618                  * use any old XLOG records.  See notes in xlog.c about the algorithm.
619                  */
620                 gettimeofday(&tv, NULL);
621                 sysidentifier = ((uint64) tv.tv_sec) << 32;
622                 sysidentifier |= (uint32) (tv.tv_sec | tv.tv_usec);
623                 ControlFile.state = DB_SHUTDOWNED;
624                 
625         }
626
627         ControlFile.time = time(NULL);
628         ControlFile.system_identifier = sysidentifier;
629         ControlFile.maxAlign = MAXIMUM_ALIGNOF;
630         ControlFile.floatFormat = FLOATFORMAT_VALUE;
631         ControlFile.blcksz = BLCKSZ;
632         ControlFile.relseg_size = RELSEG_SIZE;
633         ControlFile.xlog_blcksz = XLOG_BLCKSZ;
634         ControlFile.xlog_seg_size = XLOG_SEG_SIZE;
635         ControlFile.nameDataLen = NAMEDATALEN;
636         ControlFile.indexMaxKeys = INDEX_MAX_KEYS;
637 #ifdef HAVE_INT64_TIMESTAMP
638         ControlFile.enableIntTimes = TRUE;
639 #else
640         ControlFile.enableIntTimes = FALSE;
641 #endif
642         ControlFile.localeBuflen = LOCALE_NAME_BUFLEN;
643
644         localeptr = setlocale(LC_COLLATE, "");
645         if (!localeptr)
646         {
647                 fprintf(stderr, _("%s: invalid LC_COLLATE setting\n"), progname);
648                 exit(1);
649         }
650         StrNCpy(ControlFile.lc_collate, localeptr, LOCALE_NAME_BUFLEN);
651
652         localeptr = setlocale(LC_CTYPE, "");
653         if (!localeptr)
654         {
655                 fprintf(stderr, _("%s: invalid LC_CTYPE setting\n"), progname);
656                 exit(1);
657         }
658         StrNCpy(ControlFile.lc_ctype, localeptr, LOCALE_NAME_BUFLEN);
659
660         return successed;       
661 }
662
663
664 /*
665  * Print the out pg_control values.
666  *
667  * NB: this display should be just those fields that will not be
668  * reset by RewriteControlFile().
669  */
670 static void
671 PrintControlValues(void)
672 {
673         char            sysident_str[32];
674
675         printf(_("pg_control values:\n\n"));
676
677         /*
678          * Format system_identifier separately to keep platform-dependent format
679          * code out of the translatable message string.
680          */
681         snprintf(sysident_str, sizeof(sysident_str), UINT64_FORMAT,
682                          ControlFile.system_identifier);
683
684         printf(_("pg_control version number:            %u\n"), ControlFile.pg_control_version);
685         printf(_("Catalog version number:               %u\n"), ControlFile.catalog_version_no);
686         printf(_("Database system identifier:           %s\n"), sysident_str);
687         printf(_("Current log file ID:                  %u\n"), ControlFile.logId);
688         printf(_("Next log file segment:                %u\n"), ControlFile.logSeg);
689         printf(_("Latest checkpoint's TimeLineID:       %u\n"), ControlFile.checkPointCopy.ThisTimeLineID);
690         printf(_("Latest checkpoint's NextXID:          %u\n"), ControlFile.checkPointCopy.nextXid);
691         printf(_("Latest checkpoint's NextOID:          %u\n"), ControlFile.checkPointCopy.nextOid);
692         printf(_("Latest checkpoint's NextMultiXactId:  %u\n"), ControlFile.checkPointCopy.nextMulti);
693         printf(_("Latest checkpoint's NextMultiOffset:  %u\n"), ControlFile.checkPointCopy.nextMultiOffset);
694         printf(_("Maximum data alignment:               %u\n"), ControlFile.maxAlign);
695         /* we don't print floatFormat since can't say much useful about it */
696         printf(_("Database block size:                  %u\n"), ControlFile.blcksz);
697         printf(_("Blocks per segment of large relation: %u\n"), ControlFile.relseg_size);
698         printf(_("WAL block size:                       %u\n"), ControlFile.xlog_blcksz);
699         printf(_("Bytes per WAL segment:                %u\n"), ControlFile.xlog_seg_size);
700         printf(_("Maximum length of identifiers:        %u\n"), ControlFile.nameDataLen);
701         printf(_("Maximum columns in an index:          %u\n"), ControlFile.indexMaxKeys);
702         printf(_("Date/time type storage:               %s\n"),
703                    (ControlFile.enableIntTimes ? _("64-bit integers") : _("floating-point numbers")));
704         printf(_("Maximum length of locale name:        %u\n"), ControlFile.localeBuflen);
705         printf(_("LC_COLLATE:                           %s\n"), ControlFile.lc_collate);
706         printf(_("LC_CTYPE:                             %s\n"), ControlFile.lc_ctype);
707 }
708
709 /*
710 * Update the control file before reseting it.
711 */
712 static void 
713 UpdateCtlFile4Reset(void)
714 {
715         /*
716          * Adjust fields as needed to force an empty XLOG starting at the next
717          * available segment.
718          */
719         newXlogId = ControlFile.logId;
720         newXlogSeg = ControlFile.logSeg;
721
722         /* adjust in case we are changing segment size */
723         newXlogSeg *= ControlFile.xlog_seg_size;
724         newXlogSeg = (newXlogSeg + XLogSegSize - 1) / XLogSegSize;
725
726         /* be sure we wrap around correctly at end of a logfile */
727         NextLogSeg(newXlogId, newXlogSeg);
728
729         /* Now we can force the recorded xlog seg size to the right thing. */
730         ControlFile.xlog_seg_size = XLogSegSize;
731
732         ControlFile.checkPointCopy.redo.xlogid = newXlogId;
733         ControlFile.checkPointCopy.redo.xrecoff =
734                 newXlogSeg * XLogSegSize + SizeOfXLogLongPHD;
735         ControlFile.checkPointCopy.undo = ControlFile.checkPointCopy.redo;
736         ControlFile.checkPointCopy.time = time(NULL);
737
738         ControlFile.state = DB_SHUTDOWNED;
739         ControlFile.time = time(NULL);
740         ControlFile.logId = newXlogId;
741         ControlFile.logSeg = newXlogSeg + 1;
742         ControlFile.checkPoint = ControlFile.checkPointCopy.redo;
743         ControlFile.prevCheckPoint.xlogid = 0;
744         ControlFile.prevCheckPoint.xrecoff = 0;
745 }
746
747 /*
748  * Write out the new pg_control file.
749  */
750 static void
751 RewriteControlFile(void)
752 {
753         int                     fd;
754         char            buffer[PG_CONTROL_SIZE]; /* need not be aligned */
755
756
757         /* Contents are protected with a CRC */
758         INIT_CRC32(ControlFile.crc);
759         COMP_CRC32(ControlFile.crc,
760                            (char *) &ControlFile,
761                            offsetof(ControlFileData, crc));
762         FIN_CRC32(ControlFile.crc);
763
764         /*
765          * We write out PG_CONTROL_SIZE bytes into pg_control, zero-padding the
766          * excess over sizeof(ControlFileData).  This reduces the odds of
767          * premature-EOF errors when reading pg_control.  We'll still fail when we
768          * check the contents of the file, but hopefully with a more specific
769          * error than "couldn't read pg_control".
770          */
771         if (sizeof(ControlFileData) > PG_CONTROL_SIZE)
772         {
773                 fprintf(stderr,
774                                 _("%s: internal error -- sizeof(ControlFileData) is too large ... fix PG_CONTROL_SIZE\n"),
775                                 progname);
776                 exit(1);
777         }
778
779         memset(buffer, 0, PG_CONTROL_SIZE);
780         memcpy(buffer, &ControlFile, sizeof(ControlFileData));
781
782         unlink(XLOG_CONTROL_FILE);
783
784         fd = open(XLOG_CONTROL_FILE,
785                           O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
786                           S_IRUSR | S_IWUSR);
787         if (fd < 0)
788         {
789                 fprintf(stderr, _("%s: could not create pg_control file: %s\n"),
790                                 progname, strerror(errno));
791                 exit(1);
792         }
793
794         errno = 0;
795         if (write(fd, buffer, PG_CONTROL_SIZE) != PG_CONTROL_SIZE)
796         {
797                 /* if write didn't set errno, assume problem is no disk space */
798                 if (errno == 0)
799                         errno = ENOSPC;
800                 fprintf(stderr, _("%s: could not write pg_control file: %s\n"),
801                                 progname, strerror(errno));
802                 exit(1);
803         }
804
805         if (fsync(fd) != 0)
806         {
807                 fprintf(stderr, _("%s: fsync error: %s\n"), progname, strerror(errno));
808                 exit(1);
809         }
810
811         close(fd);
812 }
813
814
815 /*
816  * Remove existing XLOG files
817  */
818 static void
819 KillExistingXLOG(void)
820 {
821         DIR                *xldir;
822         struct dirent *xlde;
823         char            path[MAXPGPATH];
824
825         xldir = opendir(XLOGDIR);
826         if (xldir == NULL)
827         {
828                 fprintf(stderr, _("%s: could not open directory \"%s\": %s\n"),
829                                 progname, XLOGDIR, strerror(errno));
830                 exit(1);
831         }
832
833         errno = 0;
834         while ((xlde = readdir(xldir)) != NULL)
835         {
836                 if (strlen(xlde->d_name) == 24 &&
837                         strspn(xlde->d_name, "0123456789ABCDEF") == 24)
838                 {
839                         snprintf(path, MAXPGPATH, "%s/%s", XLOGDIR, xlde->d_name);
840                         if (unlink(path) < 0)
841                         {
842                                 fprintf(stderr, _("%s: could not delete file \"%s\": %s\n"),
843                                                 progname, path, strerror(errno));
844                                 exit(1);
845                         }
846                 }
847                 errno = 0;
848         }
849 #ifdef WIN32
850         /*
851          * This fix is in mingw cvs (runtime/mingwex/dirent.c rev 1.4), but not in
852          * released version
853          */
854         if (GetLastError() == ERROR_NO_MORE_FILES)
855                 errno = 0;
856 #endif
857
858         if (errno)
859         {
860                 fprintf(stderr, _("%s: could not read from directory \"%s\": %s\n"),
861                                 progname, XLOGDIR, strerror(errno));
862                 exit(1);
863         }
864         closedir(xldir);
865 }
866
867
868 /*
869  * Write an empty XLOG file, containing only the checkpoint record
870  * already set up in ControlFile.
871  */
872 static void
873 WriteEmptyXLOG(void)
874 {
875         char       *buffer;
876         XLogPageHeader page;
877         XLogLongPageHeader longpage;
878         XLogRecord *record;
879         pg_crc32        crc;
880         char            path[MAXPGPATH];
881         int                     fd;
882         int                     nbytes;
883
884         /* Use malloc() to ensure buffer is MAXALIGNED */
885         buffer = (char *) malloc(XLOG_BLCKSZ);
886         page = (XLogPageHeader) buffer;
887         memset(buffer, 0, XLOG_BLCKSZ);
888
889         /* Set up the XLOG page header */
890         page->xlp_magic = XLOG_PAGE_MAGIC;
891         page->xlp_info = XLP_LONG_HEADER;
892         page->xlp_tli = ControlFile.checkPointCopy.ThisTimeLineID;
893         page->xlp_pageaddr.xlogid =
894                 ControlFile.checkPointCopy.redo.xlogid;
895         page->xlp_pageaddr.xrecoff =
896                 ControlFile.checkPointCopy.redo.xrecoff - SizeOfXLogLongPHD;
897         longpage = (XLogLongPageHeader) page;
898         longpage->xlp_sysid = ControlFile.system_identifier;
899         longpage->xlp_seg_size = XLogSegSize;
900         longpage->xlp_xlog_blcksz = XLOG_BLCKSZ;
901
902         /* Insert the initial checkpoint record */
903         record = (XLogRecord *) ((char *) page + SizeOfXLogLongPHD);
904         record->xl_prev.xlogid = 0;
905         record->xl_prev.xrecoff = 0;
906         record->xl_xid = InvalidTransactionId;
907         record->xl_tot_len = SizeOfXLogRecord + sizeof(CheckPoint);
908         record->xl_len = sizeof(CheckPoint);
909         record->xl_info = XLOG_CHECKPOINT_SHUTDOWN;
910         record->xl_rmid = RM_XLOG_ID;
911         memcpy(XLogRecGetData(record), &ControlFile.checkPointCopy,
912                    sizeof(CheckPoint));
913
914         INIT_CRC32(crc);
915         COMP_CRC32(crc, &ControlFile.checkPointCopy, sizeof(CheckPoint));
916         COMP_CRC32(crc, (char *) record + sizeof(pg_crc32),
917                            SizeOfXLogRecord - sizeof(pg_crc32));
918         FIN_CRC32(crc);
919         record->xl_crc = crc;
920
921         /* Write the first page */
922         XLogFilePath(path, ControlFile.checkPointCopy.ThisTimeLineID,
923                                  newXlogId, newXlogSeg);
924
925         unlink(path);
926
927         fd = open(path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
928                           S_IRUSR | S_IWUSR);
929         if (fd < 0)
930         {
931                 fprintf(stderr, _("%s: could not open file \"%s\": %s\n"),
932                                 progname, path, strerror(errno));
933                 exit(1);
934         }
935
936         errno = 0;
937         if (write(fd, buffer, XLOG_BLCKSZ) != XLOG_BLCKSZ)
938         {
939                 /* if write didn't set errno, assume problem is no disk space */
940                 if (errno == 0)
941                         errno = ENOSPC;
942                 fprintf(stderr, _("%s: could not write file \"%s\": %s\n"),
943                                 progname, path, strerror(errno));
944                 exit(1);
945         }
946
947         /* Fill the rest of the file with zeroes */
948         memset(buffer, 0, XLOG_BLCKSZ);
949         for (nbytes = XLOG_BLCKSZ; nbytes < XLogSegSize; nbytes += XLOG_BLCKSZ)
950         {
951                 errno = 0;
952                 if (write(fd, buffer, XLOG_BLCKSZ) != XLOG_BLCKSZ)
953                 {
954                         if (errno == 0)
955                                 errno = ENOSPC;
956                         fprintf(stderr, _("%s: could not write file \"%s\": %s\n"),
957                                         progname, path, strerror(errno));
958                         exit(1);
959                 }
960         }
961
962         if (fsync(fd) != 0)
963         {
964                 fprintf(stderr, _("%s: fsync error: %s\n"), progname, strerror(errno));
965                 exit(1);
966         }
967
968         close(fd);
969 }
970
971
972 static void
973 usage(void)
974 {
975         printf(_("%s resets the PostgreSQL transaction log.\n\n"), progname);
976         printf(_("Usage:\n  %s [OPTION]... DATADIR\n\n"), progname);
977         printf(_("Options:\n"));
978         printf(_("  -f              force reset xlog to be done, if the control file is corrupted, then try to restore it.\n"));
979         printf(_("  -r              restore the pg_control file from old XLog files, resets is not done..\n")); 
980         printf(_("  -l TLI,FILE,SEG force minimum WAL starting location for new transaction log\n"));
981         printf(_("  -n              show extracted control values of existing pg_control file.\n"));
982         printf(_("  -m multiXID     set next multi transaction ID\n"));
983         printf(_("  -o OID          set next OID\n"));
984         printf(_("  -O multiOffset  set next multi transaction offset\n"));
985         printf(_("  -x XID          set next transaction ID\n"));
986         printf(_("  --help          show this help, then exit\n"));
987         printf(_("  --version       output version information, then exit\n"));
988         printf(_("\nReport bugs to <pgsql-bugs@postgresql.org>.\n"));
989 }
990
991
992
993 /*
994  * The following routines are mainly used for getting pg_control values 
995  * from the xlog file.
996  */
997
998  /* some local varaibles.*/
999 static int              logFd=0; /* kernel FD for current input file */
1000 static int              logRecOff;      /* offset of next record in page */
1001 static char             pageBuffer[BLCKSZ];     /* current page */
1002 static XLogRecPtr       curRecPtr;      /* logical address of current record */
1003 static XLogRecPtr       prevRecPtr;     /* logical address of previous record */
1004 static char             *readRecordBuf = NULL; /* ReadRecord result area */
1005 static uint32           readRecordBufSize = 0;
1006 static int32            logPageOff;     /* offset of current page in file */
1007 static uint32           logId;          /* current log file id */
1008 static uint32           logSeg;         /* current log file segment */
1009 static uint32           logTli;         /* current log file timeline */
1010
1011 /*
1012  * Get existing XLOG files
1013  */
1014 static void
1015 GetXLogFiles(void)
1016 {
1017         DIR                *xldir;
1018         struct dirent *xlde;
1019
1020         /* Open the xlog direcotry.*/
1021         xldir = opendir(XLOGDIR);
1022         if (xldir == NULL)
1023         {
1024                 fprintf(stderr, _("%s: could not open directory \"%s\": %s\n"),
1025                                 progname, XLOGDIR, strerror(errno));
1026                 exit(1);
1027         }
1028
1029         /* Search the directory, insert the segment files into the xlogfilelist.*/
1030         errno = 0;
1031         while ((xlde = readdir(xldir)) != NULL)
1032         {
1033                 if (ValidXLogFileName(xlde->d_name)) {
1034                         /* XLog file is found, insert it into the xlogfilelist.*/
1035                         InsertXLogFile(xlde->d_name);
1036                 };
1037                 errno = 0;
1038         }
1039 #ifdef WIN32
1040         if (GetLastError() == ERROR_NO_MORE_FILES)
1041                 errno = 0;
1042 #endif
1043
1044         if (errno)
1045         {
1046                 fprintf(stderr, _("%s: could not read from directory \"%s\": %s\n"),
1047                                 progname, XLOGDIR, strerror(errno));
1048                 exit(1);
1049         }
1050         closedir(xldir);
1051 }
1052
1053 /*
1054  * Insert a file while had been found in the xlog folder into xlogfilelist.
1055  * The xlogfile list is matained in a increasing order.
1056  * 
1057  * The input parameter is the name of the xlog  file, the name is assumpted
1058  * valid.
1059  */
1060 static void 
1061 InsertXLogFile( char * fname )
1062 {
1063         XLogFileName * NewSegFile, *Curr, *Prev;
1064         bool append2end = false;
1065
1066         /* Allocate a new node for the new file. */
1067         NewSegFile = (XLogFileName *) malloc(sizeof(XLogFileName));
1068         strcpy(NewSegFile->fname,fname); /* setup the name */
1069         /* extract the time line, logid, and segment number from the name.*/
1070         sscanf(fname, "%8x%8x%8x", &(NewSegFile->tli), &(NewSegFile->logid), &(NewSegFile->seg));
1071         NewSegFile->next = NULL;
1072         
1073         /* Ensure the xlog file is active and valid.*/
1074         if (! ValidXLogFileHeader(NewSegFile))
1075         {
1076                 free(NewSegFile);
1077                 return;
1078         }
1079         
1080         /* the list is empty.*/
1081         if ( xlogfilelist == NULL ) {
1082                 xlogfilelist = NewSegFile;
1083                 return;
1084         };
1085
1086     /* try to search the list and find the insert point. */
1087         Prev=Curr=xlogfilelist;
1088         while( CmpXLogFileOT(NewSegFile, Curr))
1089     {
1090                 /* the node is appended to the end of the list.*/
1091                 if (Curr->next == NULL)
1092                 {
1093                         append2end = true;
1094                         break;
1095                 }
1096                 Prev=Curr;
1097                 Curr = Curr->next;
1098         }
1099         
1100         /* Insert the new node to the list.*/
1101         if ( append2end )
1102         {
1103                 /* We need to append the new node to the end of the list */             
1104                 Curr->next = NewSegFile;
1105         } 
1106         else 
1107         {
1108                 NewSegFile->next = Curr;
1109                 /* prev should not be the list head. */
1110                 if ( Prev != NULL && Prev != xlogfilelist)
1111                 {
1112                         Prev->next = NewSegFile;
1113                 }
1114         }
1115         /* Update the list head if it is needed.*/
1116         if ((Curr == xlogfilelist) && !append2end) 
1117         {
1118                 xlogfilelist = NewSegFile;
1119         }
1120         
1121 }
1122
1123 /*
1124  * compare two xlog file from their name to see which one is latest.
1125  *
1126  * Return true for file 2 is the lastest file.
1127  *
1128  */
1129 static bool
1130 CmpXLogFileOT(XLogFileName * f1, XLogFileName *f2)
1131 {
1132         if (f2->tli >= f1->tli)
1133         {
1134                 if (f2->logid >= f1->logid)
1135                 {
1136                         if (f2->seg > f1->seg) return false;
1137                 }
1138         }
1139         return true;
1140
1141 }
1142
1143 /* check is two segment file is continous.*/
1144 static bool 
1145 IsNextSeg(XLogFileName *prev, XLogFileName *cur)
1146 {
1147         uint32 logid, logseg;
1148         
1149         if (prev->tli != cur->tli) return false;
1150         
1151         logid = prev->logid;
1152         logseg = prev->seg;
1153         NextLogSeg(logid, logseg);
1154         
1155         if ((logid == cur->logid) && (logseg == cur->seg)) return true;
1156
1157         return false;
1158
1159 }
1160
1161
1162 /*
1163 * Select the oldest xlog file in the latest time line. 
1164 */
1165 static void
1166 SelectStartXLog( void )
1167 {
1168         XLogFileName *tmp;
1169         CurXLogFile = xlogfilelist;
1170         
1171         if (xlogfilelist == NULL) 
1172         {
1173                 return;
1174         }
1175         
1176         tmp=LastXLogFile=CurXLogFile=xlogfilelist;
1177         
1178         while(tmp->next != NULL)
1179         {
1180                 
1181                 /* 
1182                  * we should ensure that from the first to 
1183                  * the last segment file is continous.
1184                  * */
1185                 if (!IsNextSeg(tmp, tmp->next)) 
1186                 {
1187                         CurXLogFile = tmp->next;
1188                 }
1189                 tmp=tmp->next;
1190         }
1191
1192         LastXLogFile = tmp;
1193
1194 }
1195
1196 /*
1197  * Check if the file is a valid xlog file.
1198  *
1199  * Return true for the input file is a valid xlog file.
1200  * 
1201  * The input parameter is the name of the xlog file.
1202  * 
1203  */
1204 static bool
1205 ValidXLogFileName(char * fname)
1206 {
1207         uint32 logTLI, logId, logSeg;
1208         if (strlen(fname) != 24 || 
1209             strspn(fname, "0123456789ABCDEF") != 24 ||
1210             sscanf(fname, "%8x%8x%8x", &logTLI, &logId, &logSeg) != 3)
1211                 return false;
1212         return true;
1213
1214 }
1215
1216 /* Ensure the xlog file is active and valid.*/
1217 static bool 
1218 ValidXLogFileHeader(XLogFileName *segfile)
1219 {
1220         int fd;
1221         char buffer[BLCKSZ];
1222         char            path[MAXPGPATH];
1223         size_t nread;
1224         
1225         snprintf(path, MAXPGPATH, "%s/%s", XLOGDIR, segfile->fname);
1226         fd = open(path, O_RDONLY | PG_BINARY, 0);
1227         if (fd < 0)
1228         {
1229                 return false;
1230         }
1231         nread = read(fd, buffer, BLCKSZ);
1232         if (nread == BLCKSZ)
1233         {
1234                 XLogPageHeader hdr = (XLogPageHeader)buffer;
1235                 
1236                 if (ValidXLOGPageHeader(hdr, segfile->tli, segfile->logid, segfile->seg))
1237                 {
1238                         return true;
1239                 }
1240
1241         }
1242         return false;
1243
1244 }
1245 static bool
1246 ValidXLOGPageHeader(XLogPageHeader hdr, uint32 tli, uint32 id, uint32 seg)
1247 {
1248         XLogRecPtr      recaddr;
1249
1250         if (hdr->xlp_magic != XLOG_PAGE_MAGIC)
1251         {
1252                 return false;
1253         }
1254         if ((hdr->xlp_info & ~XLP_ALL_FLAGS) != 0)
1255         {
1256                 return false;
1257         }
1258         if (hdr->xlp_info & XLP_LONG_HEADER)
1259         {
1260                 XLogLongPageHeader longhdr = (XLogLongPageHeader) hdr;
1261
1262                 if (longhdr->xlp_seg_size != XLogSegSize)
1263                 {
1264                         return false;
1265                 }
1266                 /* Get the system identifier from the segment file header.*/
1267                 sysidentifier = ((XLogLongPageHeader) pageBuffer)->xlp_sysid;
1268         }
1269                 
1270         recaddr.xlogid = id;
1271         recaddr.xrecoff = seg * XLogSegSize + logPageOff;
1272         if (!XLByteEQ(hdr->xlp_pageaddr, recaddr))
1273         {
1274                 return false;
1275         }
1276
1277         if (hdr->xlp_tli != tli)
1278         {
1279                 return false;
1280         }
1281         return true;
1282 }
1283
1284
1285 /* Read another page, if possible */
1286 static bool
1287 ReadXLogPage(void)
1288 {
1289         size_t nread;
1290         
1291         /* Need to advance to the new segment file.*/
1292         if ( logPageOff >= XLogSegSize ) 
1293         { 
1294                 close(logFd);
1295                 logFd = 0;
1296         }
1297         
1298         /* Need to open the segement file.*/
1299         if ((logFd <= 0) && (CurXLogFile != NULL))
1300         {
1301                 if (OpenXLogFile(CurXLogFile) < 0)
1302                 {
1303                         return false;
1304                 }
1305                 CurXLogFile = CurXLogFile->next;
1306         }
1307         
1308         /* Read a page from the openning segement file.*/
1309         nread = read(logFd, pageBuffer, BLCKSZ);
1310
1311         if (nread == BLCKSZ)
1312         {
1313                 logPageOff += BLCKSZ;
1314                 if (ValidXLOGPageHeader( (XLogPageHeader)pageBuffer, logTli, logId, logSeg))
1315                         return true;
1316         } 
1317         
1318         return false;
1319 }
1320
1321 /*
1322  * CRC-check an XLOG record.  We do not believe the contents of an XLOG
1323  * record (other than to the minimal extent of computing the amount of
1324  * data to read in) until we've checked the CRCs.
1325  *
1326  * We assume all of the record has been read into memory at *record.
1327  */
1328 static bool
1329 RecordIsValid(XLogRecord *record, XLogRecPtr recptr)
1330 {
1331         pg_crc32        crc;
1332         int                     i;
1333         uint32          len = record->xl_len;
1334         BkpBlock        bkpb;
1335         char       *blk;
1336
1337         /* First the rmgr data */
1338         INIT_CRC32(crc);
1339         COMP_CRC32(crc, XLogRecGetData(record), len);
1340
1341         /* Add in the backup blocks, if any */
1342         blk = (char *) XLogRecGetData(record) + len;
1343         for (i = 0; i < XLR_MAX_BKP_BLOCKS; i++)
1344         {
1345                 uint32  blen;
1346
1347                 if (!(record->xl_info & XLR_SET_BKP_BLOCK(i)))
1348                         continue;
1349
1350                 memcpy(&bkpb, blk, sizeof(BkpBlock));
1351                 if (bkpb.hole_offset + bkpb.hole_length > BLCKSZ)
1352                 {
1353                         return false;
1354                 }
1355                 blen = sizeof(BkpBlock) + BLCKSZ - bkpb.hole_length;
1356                 COMP_CRC32(crc, blk, blen);
1357                 blk += blen;
1358         }
1359
1360         /* Check that xl_tot_len agrees with our calculation */
1361         if (blk != (char *) record + record->xl_tot_len)
1362         {
1363                 return false;
1364         }
1365
1366         /* Finally include the record header */
1367         COMP_CRC32(crc, (char *) record + sizeof(pg_crc32),
1368                            SizeOfXLogRecord - sizeof(pg_crc32));
1369         FIN_CRC32(crc);
1370
1371         if (!EQ_CRC32(record->xl_crc, crc))
1372         {
1373                 return false;
1374         }
1375
1376         return true;
1377 }
1378
1379
1380
1381 /*
1382  * Attempt to read an XLOG record into readRecordBuf.
1383  */
1384 static bool
1385 FetchRecord(void)
1386 {
1387         char       *buffer;
1388         XLogRecord *record;
1389         XLogContRecord *contrecord;
1390         uint32          len, total_len;
1391
1392
1393         while (logRecOff <= 0 || logRecOff > BLCKSZ - SizeOfXLogRecord)
1394         {
1395                 /* Need to advance to new page */
1396                 if (! ReadXLogPage())
1397                 {
1398                         return false;
1399                 }
1400                 
1401                 logRecOff = XLogPageHeaderSize((XLogPageHeader) pageBuffer);
1402                 if ((((XLogPageHeader) pageBuffer)->xlp_info & ~XLP_LONG_HEADER) != 0)
1403                 {
1404                         /* Check for a continuation record */
1405                         if (((XLogPageHeader) pageBuffer)->xlp_info & XLP_FIRST_IS_CONTRECORD)
1406                         {
1407                                 contrecord = (XLogContRecord *) (pageBuffer + logRecOff);
1408                                 logRecOff += MAXALIGN(contrecord->xl_rem_len + SizeOfXLogContRecord);
1409                         }
1410                 }
1411         }
1412
1413         curRecPtr.xlogid = logId;
1414         curRecPtr.xrecoff = logSeg * XLogSegSize + logPageOff + logRecOff;
1415         record = (XLogRecord *) (pageBuffer + logRecOff);
1416
1417         if (record->xl_len == 0)
1418         {
1419                 return false;
1420         }
1421
1422         total_len = record->xl_tot_len;
1423
1424         /*
1425          * Allocate or enlarge readRecordBuf as needed.  To avoid useless
1426          * small increases, round its size to a multiple of BLCKSZ, and make
1427          * sure it's at least 4*BLCKSZ to start with.  (That is enough for all
1428          * "normal" records, but very large commit or abort records might need
1429          * more space.)
1430          */
1431         if (total_len > readRecordBufSize)
1432         {
1433                 uint32          newSize = total_len;
1434
1435                 newSize += BLCKSZ - (newSize % BLCKSZ);
1436                 newSize = Max(newSize, 4 * BLCKSZ);
1437                 if (readRecordBuf)
1438                         free(readRecordBuf);
1439                 readRecordBuf = (char *) malloc(newSize);
1440                 if (!readRecordBuf)
1441                 {
1442                         readRecordBufSize = 0;
1443                         return false;
1444                 }
1445                 readRecordBufSize = newSize;
1446         }
1447
1448         buffer = readRecordBuf;
1449         len = BLCKSZ - curRecPtr.xrecoff % BLCKSZ; /* available in block */
1450         if (total_len > len)
1451         {
1452                 /* Need to reassemble record */
1453                 uint32                  gotlen = len;
1454
1455                 memcpy(buffer, record, len);
1456                 record = (XLogRecord *) buffer;
1457                 buffer += len;
1458                 for (;;)
1459                 {
1460                         uint32  pageHeaderSize;
1461
1462                         if (!ReadXLogPage())
1463                         {
1464                                 return false;
1465                         }
1466                         if (!(((XLogPageHeader) pageBuffer)->xlp_info & XLP_FIRST_IS_CONTRECORD))
1467                         {
1468                                 return false;
1469                         }
1470                         pageHeaderSize = XLogPageHeaderSize((XLogPageHeader) pageBuffer);
1471                         contrecord = (XLogContRecord *) (pageBuffer + pageHeaderSize);
1472                         if (contrecord->xl_rem_len == 0 || 
1473                                 total_len != (contrecord->xl_rem_len + gotlen))
1474                         {
1475                                 return false;
1476                         }
1477                         len = BLCKSZ - pageHeaderSize - SizeOfXLogContRecord;
1478                         if (contrecord->xl_rem_len > len)
1479                         {
1480                                 memcpy(buffer, (char *)contrecord + SizeOfXLogContRecord, len);
1481                                 gotlen += len;
1482                                 buffer += len;
1483                                 continue;
1484                         }
1485                         memcpy(buffer, (char *) contrecord + SizeOfXLogContRecord,
1486                                    contrecord->xl_rem_len);
1487                         logRecOff = MAXALIGN(pageHeaderSize + SizeOfXLogContRecord + contrecord->xl_rem_len);
1488                         break;
1489                 }
1490                 if (!RecordIsValid(record, curRecPtr))
1491                 {
1492                         return false;
1493                 }
1494                 return true;
1495         }
1496         /* Record is contained in this page */
1497         memcpy(buffer, record, total_len);
1498         record = (XLogRecord *) buffer;
1499         logRecOff += MAXALIGN(total_len);
1500         if (!RecordIsValid(record, curRecPtr))
1501         {
1502
1503                 return false;
1504         }
1505         return true;
1506 }
1507
1508 /*
1509  * if the record is checkpoint, update the lastest checkpoint record.
1510  */
1511 static void
1512 UpdateCheckPoint(XLogRecord *record)
1513 {
1514         uint8   info = record->xl_info & ~XLR_INFO_MASK;
1515         
1516         if ((info == XLOG_CHECKPOINT_SHUTDOWN) ||
1517                 (info == XLOG_CHECKPOINT_ONLINE))
1518         {
1519                  CheckPoint *chkpoint = (CheckPoint*) XLogRecGetData(record);
1520                  prevchkp = lastchkp;
1521                  lastchkp = curRecPtr;
1522                  lastcheckpoint = *chkpoint;
1523                  
1524                  /* update the database state.*/
1525                  switch(info)
1526                  {
1527                         case XLOG_CHECKPOINT_SHUTDOWN:
1528                                 state = DB_SHUTDOWNED;
1529                                 break;
1530                         case XLOG_CHECKPOINT_ONLINE:
1531                                 state = DB_IN_PRODUCTION;
1532                                 break;
1533                  }
1534                  found_checkpoint ++ ;
1535         }
1536 }
1537
1538 static int
1539 OpenXLogFile(XLogFileName *sf)
1540 {
1541
1542         char            path[MAXPGPATH];
1543
1544         if ( logFd > 0 ) close(logFd);
1545         
1546         /* Open a  Xlog segment file. */
1547         snprintf(path, MAXPGPATH, "%s/%s", XLOGDIR, sf->fname);
1548         logFd = open(path, O_RDONLY | PG_BINARY, 0);
1549     
1550         if (logFd < 0)
1551         {
1552                 fprintf(stderr, _("%s: Can not open xlog file %s.\n"), progname,path);          
1553                 return -1;
1554         }
1555         
1556         /* Setup the parameter for searching. */
1557         logPageOff = -BLCKSZ;           /* so 1st increment in readXLogPage gives 0 */
1558         logRecOff = 0;
1559         logId = sf->logid;
1560         logSeg = sf->seg;
1561         logTli = sf->tli;
1562         return logFd;
1563 }
1564
1565 /*
1566  * Search the lastest checkpoint in the lastest XLog segment file.
1567  *
1568  * The return value is the total checkpoints which had been found 
1569  * in the XLog segment file. 
1570  */
1571 static int 
1572 SearchLastCheckpoint(void)
1573 {
1574
1575         /* retrive all of the active xlog files from xlog direcotry 
1576          * into a list by increasing order, according their timeline, 
1577          * log id, segment id.
1578         */
1579         GetXLogFiles();
1580         
1581         /* Select the oldest segment file in the lastest time line.*/
1582         SelectStartXLog();
1583         
1584         /* No segment file was found.*/
1585         if ( CurXLogFile == NULL ) 
1586         {
1587                 return 0;
1588         }
1589
1590         /* initial it . */
1591         logFd=logId=logSeg=logTli=0;
1592
1593         /* 
1594          * Search the XLog segment file from beginning to end, 
1595          * if checkpoint record is found, then update the 
1596          * latest check point.
1597          */
1598         while (FetchRecord())
1599         {
1600                 /* To see if the record is checkpoint record. */
1601                 if (((XLogRecord *) readRecordBuf)->xl_rmid == RM_XLOG_ID)
1602                         UpdateCheckPoint((XLogRecord *) readRecordBuf);
1603                 prevRecPtr = curRecPtr;
1604         }
1605
1606         /* We can not know clearly if we had reached the end.
1607          * But just check if we reach the last segment file,
1608          * if it is not, then some problem there.
1609          * (We need a better way to know the abnormal broken during the search)
1610          */
1611         if ((logId != LastXLogFile->logid) && (logSeg != LastXLogFile->seg))
1612         {
1613                 return 0;
1614         }
1615         
1616         /* 
1617          * return the checkpoints which had been found yet, 
1618          * let others know how much checkpointes are found. 
1619          */
1620         return found_checkpoint;
1621 }
1622
1623 /* Clean up the allocated list.*/
1624 static void
1625 CleanUpList(XLogFileName *list)
1626 {
1627
1628         XLogFileName *tmp;
1629         tmp = list;
1630         while(list != NULL)
1631         {
1632                 tmp=list->next;
1633                 free(list);
1634                 list=tmp;
1635         }
1636         
1637 }
1638