]> granicus.if.org Git - postgresql/commitdiff
Change CRCs in WAL records from 64bit to 32bit for performance reasons.
authorTom Lane <tgl@sss.pgh.pa.us>
Thu, 2 Jun 2005 05:55:29 +0000 (05:55 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Thu, 2 Jun 2005 05:55:29 +0000 (05:55 +0000)
Instead of a separate CRC on each backup block, include backup blocks
in their parent WAL record's CRC; this is important to ensure that the
backup block really goes with the WAL record, ie there was not a page
tear right at the start of the backup block.  Implement a simple form
of compression of backup blocks: drop any run of zeroes starting at
pd_lower, so as not to store the unused 'hole' that commonly exists in
PG heap and index pages.  Tweak PageRepairFragmentation and related
routines to ensure they keep the unused space zeroed, so that the above
compression method remains effective.  All per recent discussions.

src/backend/access/nbtree/nbtpage.c
src/backend/access/nbtree/nbtxlog.c
src/backend/access/transam/xlog.c
src/backend/storage/page/bufpage.c
src/backend/utils/hash/pg_crc.c
src/bin/pg_controldata/pg_controldata.c
src/bin/pg_resetxlog/pg_resetxlog.c
src/include/access/xlog.h
src/include/access/xlog_internal.h
src/include/catalog/pg_control.h
src/include/utils/pg_crc.h

index ea023253189b15878da77d91808b4faa5486893d..b9d42bad6d2aba1b97a0e9ae901277d4089568bc 100644 (file)
@@ -9,7 +9,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.84 2005/05/07 21:32:23 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.85 2005/06/02 05:55:28 tgl Exp $
  *
  *     NOTES
  *        Postgres btree pages look like ordinary relation pages.      The opaque
@@ -113,6 +113,13 @@ _bt_initmetapage(Page page, BlockNumber rootbknum, uint32 level)
 
        metaopaque = (BTPageOpaque) PageGetSpecialPointer(page);
        metaopaque->btpo_flags = BTP_META;
+
+       /*
+        * Set pd_lower just past the end of the metadata.  This is not
+        * essential but it makes the page look compressible to xlog.c.
+        */
+       ((PageHeader) page)->pd_lower =
+               ((char *) metad + sizeof(BTMetaPageData)) - (char *) page;
 }
 
 /*
index ade60619a3d0de564bfbf56900f6b2838db4f85b..536bc17718039860b30e048fe4a9726cd7882e06 100644 (file)
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.20 2005/03/22 06:17:03 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.21 2005/06/02 05:55:28 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -135,6 +135,13 @@ _bt_restore_meta(Relation reln, XLogRecPtr lsn,
        pageop = (BTPageOpaque) PageGetSpecialPointer(metapg);
        pageop->btpo_flags = BTP_META;
 
+       /*
+        * Set pd_lower just past the end of the metadata.  This is not
+        * essential but it makes the page look compressible to xlog.c.
+        */
+       ((PageHeader) metapg)->pd_lower =
+               ((char *) md + sizeof(BTMetaPageData)) - (char *) metapg;
+
        PageSetLSN(metapg, lsn);
        PageSetTLI(metapg, ThisTimeLineID);
        LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
index 2352313b051dac931163d9652677984382dbe969..27f6354987d71b1ac6bab8a957dd9d2262a1bbfa 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.194 2005/05/31 19:10:28 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.195 2005/06/02 05:55:28 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -434,6 +434,7 @@ static void exitArchiveRecovery(TimeLineID endTLI,
                                        uint32 endLogId, uint32 endLogSeg);
 static bool recoveryStopsHere(XLogRecord *record, bool *includeThis);
 
+static void SetBkpBlock(BkpBlock *bkpb, Buffer buffer);
 static bool AdvanceXLInsertBuffer(void);
 static void XLogWrite(XLogwrtRqst WriteRqst);
 static int XLogFileInit(uint32 log, uint32 seg,
@@ -499,8 +500,10 @@ XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata)
        bool            dtbuf_bkp[XLR_MAX_BKP_BLOCKS];
        BkpBlock        dtbuf_xlg[XLR_MAX_BKP_BLOCKS];
        XLogRecPtr      dtbuf_lsn[XLR_MAX_BKP_BLOCKS];
-       XLogRecData dtbuf_rdt[2 * XLR_MAX_BKP_BLOCKS];
-       crc64           rdata_crc;
+       XLogRecData dtbuf_rdt1[XLR_MAX_BKP_BLOCKS];
+       XLogRecData dtbuf_rdt2[XLR_MAX_BKP_BLOCKS];
+       XLogRecData dtbuf_rdt3[XLR_MAX_BKP_BLOCKS];
+       pg_crc32        rdata_crc;
        uint32          len,
                                write_len;
        unsigned        i;
@@ -531,8 +534,10 @@ XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata)
        /*
         * Here we scan the rdata list, determine which buffers must be backed
         * up, and compute the CRC values for the data.  Note that the record
-        * header isn't added into the CRC yet since we don't know the final
-        * length or info bits quite yet.
+        * header isn't added into the CRC initially since we don't know the
+        * final length or info bits quite yet.  Thus, the CRC will represent
+        * the CRC of the whole record in the order "rdata, then backup blocks,
+        * then record header".
         *
         * We may have to loop back to here if a race condition is detected
         * below. We could prevent the race by doing all this work while
@@ -553,7 +558,7 @@ begin:;
                dtbuf_bkp[i] = false;
        }
 
-       INIT_CRC64(rdata_crc);
+       INIT_CRC32(rdata_crc);
        len = 0;
        for (rdt = rdata;;)
        {
@@ -561,7 +566,7 @@ begin:;
                {
                        /* Simple data, just include it */
                        len += rdt->len;
-                       COMP_CRC64(rdata_crc, rdt->data, rdt->len);
+                       COMP_CRC32(rdata_crc, rdt->data, rdt->len);
                }
                else
                {
@@ -576,7 +581,7 @@ begin:;
                                        else if (rdt->data)
                                        {
                                                len += rdt->len;
-                                               COMP_CRC64(rdata_crc, rdt->data, rdt->len);
+                                               COMP_CRC32(rdata_crc, rdt->data, rdt->len);
                                        }
                                        break;
                                }
@@ -591,26 +596,14 @@ begin:;
                                        dtbuf_lsn[i] = *((XLogRecPtr *) BufferGetBlock(rdt->buffer));
                                        if (XLByteLE(dtbuf_lsn[i], RedoRecPtr))
                                        {
-                                               crc64           dtcrc;
-
                                                dtbuf_bkp[i] = true;
+                                               SetBkpBlock(&(dtbuf_xlg[i]), rdt->buffer);
                                                rdt->data = NULL;
-                                               INIT_CRC64(dtcrc);
-                                               COMP_CRC64(dtcrc,
-                                                                  BufferGetBlock(dtbuf[i]),
-                                                                  BLCKSZ);
-                                               dtbuf_xlg[i].node = BufferGetFileNode(dtbuf[i]);
-                                               dtbuf_xlg[i].block = BufferGetBlockNumber(dtbuf[i]);
-                                               COMP_CRC64(dtcrc,
-                                                               (char *) &(dtbuf_xlg[i]) + sizeof(crc64),
-                                                                  sizeof(BkpBlock) - sizeof(crc64));
-                                               FIN_CRC64(dtcrc);
-                                               dtbuf_xlg[i].crc = dtcrc;
                                        }
                                        else if (rdt->data)
                                        {
                                                len += rdt->len;
-                                               COMP_CRC64(rdata_crc, rdt->data, rdt->len);
+                                               COMP_CRC32(rdata_crc, rdt->data, rdt->len);
                                        }
                                        break;
                                }
@@ -625,6 +618,39 @@ begin:;
                rdt = rdt->next;
        }
 
+       /*
+        * Now add the backup block headers and data into the CRC
+        */
+       for (i = 0; i < XLR_MAX_BKP_BLOCKS; i++)
+       {
+               if (dtbuf_bkp[i])
+               {
+                       BkpBlock   *bkpb = &(dtbuf_xlg[i]);
+                       char       *page;
+
+                       COMP_CRC32(rdata_crc,
+                                          (char *) bkpb,
+                                          sizeof(BkpBlock));
+                       page = (char *) BufferGetBlock(dtbuf[i]);
+                       if (bkpb->hole_length == 0)
+                       {
+                               COMP_CRC32(rdata_crc,
+                                                  page,
+                                                  BLCKSZ);
+                       }
+                       else
+                       {
+                               /* must skip the hole */
+                               COMP_CRC32(rdata_crc,
+                                                  page,
+                                                  bkpb->hole_offset);
+                               COMP_CRC32(rdata_crc,
+                                                  page + (bkpb->hole_offset + bkpb->hole_length),
+                                                  BLCKSZ - (bkpb->hole_offset + bkpb->hole_length));
+                       }
+               }
+       }
+
        /*
         * NOTE: the test for len == 0 here is somewhat fishy, since in theory
         * all of the rmgr data might have been suppressed in favor of backup
@@ -713,23 +739,49 @@ begin:;
        write_len = len;
        for (i = 0; i < XLR_MAX_BKP_BLOCKS; i++)
        {
+               BkpBlock   *bkpb;
+               char       *page;
+
                if (dtbuf[i] == InvalidBuffer || !(dtbuf_bkp[i]))
                        continue;
 
                info |= XLR_SET_BKP_BLOCK(i);
 
-               rdt->next = &(dtbuf_rdt[2 * i]);
+               bkpb = &(dtbuf_xlg[i]);
+               page = (char *) BufferGetBlock(dtbuf[i]);
+
+               rdt->next = &(dtbuf_rdt1[i]);
+               rdt = rdt->next;
 
-               dtbuf_rdt[2 * i].data = (char *) &(dtbuf_xlg[i]);
-               dtbuf_rdt[2 * i].len = sizeof(BkpBlock);
+               rdt->data = (char *) bkpb;
+               rdt->len = sizeof(BkpBlock);
                write_len += sizeof(BkpBlock);
 
-               rdt = dtbuf_rdt[2 * i].next = &(dtbuf_rdt[2 * i + 1]);
+               rdt->next = &(dtbuf_rdt2[i]);
+               rdt = rdt->next;
 
-               dtbuf_rdt[2 * i + 1].data = (char *) BufferGetBlock(dtbuf[i]);
-               dtbuf_rdt[2 * i + 1].len = BLCKSZ;
-               write_len += BLCKSZ;
-               dtbuf_rdt[2 * i + 1].next = NULL;
+               if (bkpb->hole_length == 0)
+               {
+                       rdt->data = page;
+                       rdt->len = BLCKSZ;
+                       write_len += BLCKSZ;
+                       rdt->next = NULL;
+               }
+               else
+               {
+                       /* must skip the hole */
+                       rdt->data = page;
+                       rdt->len = bkpb->hole_offset;
+                       write_len += bkpb->hole_offset;
+
+                       rdt->next = &(dtbuf_rdt3[i]);
+                       rdt = rdt->next;
+
+                       rdt->data = page + (bkpb->hole_offset + bkpb->hole_length);
+                       rdt->len = BLCKSZ - (bkpb->hole_offset + bkpb->hole_length);
+                       write_len += rdt->len;
+                       rdt->next = NULL;
+               }
        }
 
        /*
@@ -752,14 +804,15 @@ begin:;
 
        record->xl_prev = Insert->PrevRecord;
        record->xl_xid = GetCurrentTransactionIdIfAny();
+       record->xl_tot_len = SizeOfXLogRecord + write_len;
        record->xl_len = len;           /* doesn't include backup blocks */
        record->xl_info = info;
        record->xl_rmid = rmid;
 
-       /* Now we can finish computing the main CRC */
-       COMP_CRC64(rdata_crc, (char *) record + sizeof(crc64),
-                          SizeOfXLogRecord - sizeof(crc64));
-       FIN_CRC64(rdata_crc);
+       /* Now we can finish computing the record's CRC */
+       COMP_CRC32(rdata_crc, (char *) record + sizeof(pg_crc32),
+                          SizeOfXLogRecord - sizeof(pg_crc32));
+       FIN_CRC32(rdata_crc);
        record->xl_crc = rdata_crc;
 
        /* Compute record's XLOG location */
@@ -884,6 +937,46 @@ begin:;
        return (RecPtr);
 }
 
+/*
+ * Fill a BkpBlock struct given a buffer containing the page to be saved
+ *
+ * This is nontrivial only because it has to decide whether to apply "hole
+ * compression".
+ */
+static void
+SetBkpBlock(BkpBlock *bkpb, Buffer buffer)
+{
+       PageHeader      page;
+       uint16          offset;
+       uint16          length;
+
+       /* Save page identity info */
+       bkpb->node = BufferGetFileNode(buffer);
+       bkpb->block = BufferGetBlockNumber(buffer);
+
+       /* Test whether there is a "hole" containing zeroes in the page */
+       page = (PageHeader) BufferGetBlock(buffer);
+       offset = page->pd_lower;
+       /* Check if pd_lower appears sane at all */
+       if (offset >= SizeOfPageHeaderData && offset < BLCKSZ)
+       {
+               char   *spd = (char *) page + offset;
+               char   *epd = (char *) page + BLCKSZ;
+               char   *pd = spd;
+
+               while (pd < epd && *pd == '\0')
+                       pd++;
+
+               length = pd - spd;
+               if (length == 0)
+                       offset = 0;
+       }
+       else
+               offset = length = 0;
+       bkpb->hole_offset = offset;
+       bkpb->hole_length = length;
+}
+
 /*
  * XLogArchiveNotify
  *
@@ -2276,7 +2369,7 @@ RestoreBkpBlocks(XLogRecord *record, XLogRecPtr lsn)
                if (!(record->xl_info & XLR_SET_BKP_BLOCK(i)))
                        continue;
 
-               memcpy((char *) &bkpb, blk, sizeof(BkpBlock));
+               memcpy(&bkpb, blk, sizeof(BkpBlock));
                blk += sizeof(BkpBlock);
 
                reln = XLogOpenRelation(true, record->xl_rmid, bkpb.node);
@@ -2287,7 +2380,21 @@ RestoreBkpBlocks(XLogRecord *record, XLogRecPtr lsn)
                        if (BufferIsValid(buffer))
                        {
                                page = (Page) BufferGetPage(buffer);
-                               memcpy((char *) page, blk, BLCKSZ);
+
+                               if (bkpb.hole_length == 0)
+                               {
+                                       memcpy((char *) page, blk, BLCKSZ);
+                               }
+                               else
+                               {
+                                       /* must zero-fill the hole */
+                                       MemSet((char *) page, 0, BLCKSZ);
+                                       memcpy((char *) page, blk, bkpb.hole_offset);
+                                       memcpy((char *) page + (bkpb.hole_offset + bkpb.hole_length),
+                                                  blk + bkpb.hole_offset,
+                                                  BLCKSZ - (bkpb.hole_offset + bkpb.hole_length));
+                               }
+
                                PageSetLSN(page, lsn);
                                PageSetTLI(page, ThisTimeLineID);
                                LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
@@ -2295,7 +2402,7 @@ RestoreBkpBlocks(XLogRecord *record, XLogRecPtr lsn)
                        }
                }
 
-               blk += BLCKSZ;
+               blk += BLCKSZ - bkpb.hole_length;
        }
 }
 
@@ -2309,53 +2416,61 @@ RestoreBkpBlocks(XLogRecord *record, XLogRecPtr lsn)
 static bool
 RecordIsValid(XLogRecord *record, XLogRecPtr recptr, int emode)
 {
-       crc64           crc;
-       crc64           cbuf;
+       pg_crc32        crc;
        int                     i;
        uint32          len = record->xl_len;
+       BkpBlock        bkpb;
        char       *blk;
 
-       /* Check CRC of rmgr data and record header */
-       INIT_CRC64(crc);
-       COMP_CRC64(crc, XLogRecGetData(record), len);
-       COMP_CRC64(crc, (char *) record + sizeof(crc64),
-                          SizeOfXLogRecord - sizeof(crc64));
-       FIN_CRC64(crc);
+       /* First the rmgr data */
+       INIT_CRC32(crc);
+       COMP_CRC32(crc, XLogRecGetData(record), len);
 
-       if (!EQ_CRC64(record->xl_crc, crc))
-       {
-               ereport(emode,
-                               (errmsg("incorrect resource manager data checksum in record at %X/%X",
-                                               recptr.xlogid, recptr.xrecoff)));
-               return (false);
-       }
-
-       /* Check CRCs of backup blocks, if any */
+       /* Add in the backup blocks, if any */
        blk = (char *) XLogRecGetData(record) + len;
        for (i = 0; i < XLR_MAX_BKP_BLOCKS; i++)
        {
+               uint32  blen;
+
                if (!(record->xl_info & XLR_SET_BKP_BLOCK(i)))
                        continue;
 
-               INIT_CRC64(crc);
-               COMP_CRC64(crc, blk + sizeof(BkpBlock), BLCKSZ);
-               COMP_CRC64(crc, blk + sizeof(crc64),
-                                  sizeof(BkpBlock) - sizeof(crc64));
-               FIN_CRC64(crc);
-               memcpy((char *) &cbuf, blk, sizeof(crc64));             /* don't assume
-                                                                                                                * alignment */
-
-               if (!EQ_CRC64(cbuf, crc))
+               memcpy(&bkpb, blk, sizeof(BkpBlock));
+               if (bkpb.hole_offset + bkpb.hole_length > BLCKSZ)
                {
                        ereport(emode,
-                                       (errmsg("incorrect checksum of backup block %d in record at %X/%X",
-                                                       i + 1, recptr.xlogid, recptr.xrecoff)));
-                       return (false);
+                                       (errmsg("incorrect hole size in record at %X/%X",
+                                                       recptr.xlogid, recptr.xrecoff)));
+                       return false;
                }
-               blk += sizeof(BkpBlock) + BLCKSZ;
+               blen = sizeof(BkpBlock) + BLCKSZ - bkpb.hole_length;
+               COMP_CRC32(crc, blk, blen);
+               blk += blen;
+       }
+
+       /* Check that xl_tot_len agrees with our calculation */
+       if (blk != (char *) record + record->xl_tot_len)
+       {
+               ereport(emode,
+                               (errmsg("incorrect total length in record at %X/%X",
+                                               recptr.xlogid, recptr.xrecoff)));
+               return false;
        }
 
-       return (true);
+       /* Finally include the record header */
+       COMP_CRC32(crc, (char *) record + sizeof(pg_crc32),
+                          SizeOfXLogRecord - sizeof(pg_crc32));
+       FIN_CRC32(crc);
+
+       if (!EQ_CRC32(record->xl_crc, crc))
+       {
+               ereport(emode,
+                               (errmsg("incorrect resource manager data checksum in record at %X/%X",
+                                               recptr.xlogid, recptr.xrecoff)));
+               return false;
+       }
+
+       return true;
 }
 
 /*
@@ -2382,7 +2497,6 @@ ReadRecord(XLogRecPtr *RecPtr, int emode)
        uint32          targetPageOff;
        uint32          targetRecOff;
        uint32          pageHeaderSize;
-       unsigned        i;
 
        if (readBuf == NULL)
        {
@@ -2518,6 +2632,15 @@ got_record:;
                                                RecPtr->xlogid, RecPtr->xrecoff)));
                goto next_record_is_invalid;
        }
+       if (record->xl_tot_len < SizeOfXLogRecord + record->xl_len ||
+               record->xl_tot_len > SizeOfXLogRecord + record->xl_len +
+               XLR_MAX_BKP_BLOCKS * (sizeof(BkpBlock) + BLCKSZ))
+       {
+               ereport(emode,
+                               (errmsg("invalid record length at %X/%X",
+                                               RecPtr->xlogid, RecPtr->xrecoff)));
+               goto next_record_is_invalid;
+       }
        if (record->xl_rmid > RM_MAX_ID)
        {
                ereport(emode,
@@ -2557,18 +2680,6 @@ got_record:;
                }
        }
 
-       /*
-        * Compute total length of record including any appended backup
-        * blocks.
-        */
-       total_len = SizeOfXLogRecord + record->xl_len;
-       for (i = 0; i < XLR_MAX_BKP_BLOCKS; i++)
-       {
-               if (!(record->xl_info & XLR_SET_BKP_BLOCK(i)))
-                       continue;
-               total_len += sizeof(BkpBlock) + BLCKSZ;
-       }
-
        /*
         * Allocate or enlarge readRecordBuf as needed.  To avoid useless
         * small increases, round its size to a multiple of BLCKSZ, and make
@@ -2576,6 +2687,7 @@ got_record:;
         * "normal" records, but very large commit or abort records might need
         * more space.)
         */
+       total_len = record->xl_tot_len;
        if (total_len > readRecordBufSize)
        {
                uint32          newSize = total_len;
@@ -2666,15 +2778,15 @@ got_record:;
                        goto next_record_is_invalid;
                pageHeaderSize = XLogPageHeaderSize((XLogPageHeader) readBuf);
                if (BLCKSZ - SizeOfXLogRecord >= pageHeaderSize +
-                       SizeOfXLogContRecord + MAXALIGN(contrecord->xl_rem_len))
+                       MAXALIGN(SizeOfXLogContRecord + contrecord->xl_rem_len))
                {
                        nextRecord = (XLogRecord *) ((char *) contrecord +
-                               SizeOfXLogContRecord + MAXALIGN(contrecord->xl_rem_len));
+                               MAXALIGN(SizeOfXLogContRecord + contrecord->xl_rem_len));
                }
                EndRecPtr.xlogid = readId;
                EndRecPtr.xrecoff = readSeg * XLogSegSize + readOff +
-                       pageHeaderSize + SizeOfXLogContRecord +
-                       MAXALIGN(contrecord->xl_rem_len);
+                       pageHeaderSize +
+                       MAXALIGN(SizeOfXLogContRecord + contrecord->xl_rem_len);
                ReadRecPtr = *RecPtr;
                return record;
        }
@@ -3194,11 +3306,11 @@ WriteControlFile(void)
        StrNCpy(ControlFile->lc_ctype, localeptr, LOCALE_NAME_BUFLEN);
 
        /* Contents are protected with a CRC */
-       INIT_CRC64(ControlFile->crc);
-       COMP_CRC64(ControlFile->crc,
-                          (char *) ControlFile + sizeof(crc64),
-                          sizeof(ControlFileData) - sizeof(crc64));
-       FIN_CRC64(ControlFile->crc);
+       INIT_CRC32(ControlFile->crc);
+       COMP_CRC32(ControlFile->crc,
+                          (char *) ControlFile,
+                          offsetof(ControlFileData, crc));
+       FIN_CRC32(ControlFile->crc);
 
        /*
         * We write out BLCKSZ bytes into pg_control, zero-padding the excess
@@ -3247,7 +3359,7 @@ WriteControlFile(void)
 static void
 ReadControlFile(void)
 {
-       crc64           crc;
+       pg_crc32        crc;
        int                     fd;
 
        /*
@@ -3281,13 +3393,13 @@ ReadControlFile(void)
                                        ControlFile->pg_control_version, PG_CONTROL_VERSION),
                                 errhint("It looks like you need to initdb.")));
        /* Now check the CRC. */
-       INIT_CRC64(crc);
-       COMP_CRC64(crc,
-                          (char *) ControlFile + sizeof(crc64),
-                          sizeof(ControlFileData) - sizeof(crc64));
-       FIN_CRC64(crc);
+       INIT_CRC32(crc);
+       COMP_CRC32(crc,
+                          (char *) ControlFile,
+                          offsetof(ControlFileData, crc));
+       FIN_CRC32(crc);
 
-       if (!EQ_CRC64(crc, ControlFile->crc))
+       if (!EQ_CRC32(crc, ControlFile->crc))
                ereport(FATAL,
                                (errmsg("incorrect checksum in control file")));
 
@@ -3396,11 +3508,11 @@ UpdateControlFile(void)
 {
        int                     fd;
 
-       INIT_CRC64(ControlFile->crc);
-       COMP_CRC64(ControlFile->crc,
-                          (char *) ControlFile + sizeof(crc64),
-                          sizeof(ControlFileData) - sizeof(crc64));
-       FIN_CRC64(ControlFile->crc);
+       INIT_CRC32(ControlFile->crc);
+       COMP_CRC32(ControlFile->crc,
+                          (char *) ControlFile,
+                          offsetof(ControlFileData, crc));
+       FIN_CRC32(ControlFile->crc);
 
        fd = BasicOpenFile(ControlFilePath, O_RDWR | PG_BINARY, S_IRUSR | S_IWUSR);
        if (fd < 0)
@@ -3525,7 +3637,7 @@ BootStrapXLOG(void)
        bool            use_existent;
        uint64          sysidentifier;
        struct timeval tv;
-       crc64           crc;
+       pg_crc32        crc;
 
        /*
         * Select a hopefully-unique system identifier code for this
@@ -3582,16 +3694,17 @@ BootStrapXLOG(void)
        record->xl_prev.xlogid = 0;
        record->xl_prev.xrecoff = 0;
        record->xl_xid = InvalidTransactionId;
+       record->xl_tot_len = SizeOfXLogRecord + sizeof(checkPoint);
        record->xl_len = sizeof(checkPoint);
        record->xl_info = XLOG_CHECKPOINT_SHUTDOWN;
        record->xl_rmid = RM_XLOG_ID;
        memcpy(XLogRecGetData(record), &checkPoint, sizeof(checkPoint));
 
-       INIT_CRC64(crc);
-       COMP_CRC64(crc, &checkPoint, sizeof(checkPoint));
-       COMP_CRC64(crc, (char *) record + sizeof(crc64),
-                          SizeOfXLogRecord - sizeof(crc64));
-       FIN_CRC64(crc);
+       INIT_CRC32(crc);
+       COMP_CRC32(crc, &checkPoint, sizeof(checkPoint));
+       COMP_CRC32(crc, (char *) record + sizeof(pg_crc32),
+                          SizeOfXLogRecord - sizeof(pg_crc32));
+       FIN_CRC32(crc);
        record->xl_crc = crc;
 
        /* Create first XLOG segment file */
@@ -4694,7 +4807,8 @@ ReadCheckpointRecord(XLogRecPtr RecPtr, int whichChkpt)
                }
                return NULL;
        }
-       if (record->xl_len != sizeof(CheckPoint))
+       if (record->xl_len != sizeof(CheckPoint) ||
+               record->xl_tot_len != SizeOfXLogRecord + sizeof(CheckPoint))
        {
                switch (whichChkpt)
                {
index c33a0011e600aca23b9a0d6d997707aea2205772..8f8ba9e0d2b364f3059fc2f7cc21070fdebdfa53 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/storage/page/bufpage.c,v 1.63 2005/03/22 06:17:03 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/storage/page/bufpage.c,v 1.64 2005/06/02 05:55:28 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -357,7 +357,7 @@ PageRepairFragmentation(Page page, OffsetNumber *unused)
                        lp = PageGetItemId(page, i + 1);
                        lp->lp_len = 0;         /* indicate unused & deallocated */
                }
-               ((PageHeader) page)->pd_upper = pd_special;
+               ((PageHeader) page)->pd_upper = pd_upper = pd_special;
        }
        else
        {                                                       /* nused != 0 */
@@ -411,11 +411,17 @@ PageRepairFragmentation(Page page, OffsetNumber *unused)
                        lp->lp_off = upper;
                }
 
-               ((PageHeader) page)->pd_upper = upper;
+               ((PageHeader) page)->pd_upper = pd_upper = upper;
 
                pfree(itemidbase);
        }
 
+       /*
+        * Zero out the now-free space.  This is not essential, but it allows
+        * xlog.c to compress WAL data better.
+        */
+       MemSet((char *) page + pd_lower, 0, pd_upper - pd_lower);
+
        return (nline - nused);
 }
 
@@ -525,6 +531,13 @@ PageIndexTupleDelete(Page page, OffsetNumber offnum)
        phdr->pd_upper += size;
        phdr->pd_lower -= sizeof(ItemIdData);
 
+       /*
+        * Zero out the just-freed space.  This is not essential, but it allows
+        * xlog.c to compress WAL data better.
+        */
+       MemSet((char *) page + phdr->pd_lower, 0, sizeof(ItemIdData));
+       MemSet(addr, 0, size);
+
        /*
         * Finally, we need to adjust the linp entries that remain.
         *
@@ -672,8 +685,14 @@ PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
                lp->lp_off = upper;
        }
 
-       phdr->pd_lower = SizeOfPageHeaderData + nused * sizeof(ItemIdData);
-       phdr->pd_upper = upper;
+       phdr->pd_lower = pd_lower = SizeOfPageHeaderData + nused * sizeof(ItemIdData);
+       phdr->pd_upper = pd_upper = upper;
+
+       /*
+        * Zero out the now-free space.  This is not essential, but it allows
+        * xlog.c to compress WAL data better.
+        */
+       MemSet((char *) page + pd_lower, 0, pd_upper - pd_lower);
 
        pfree(itemidbase);
 }
index bf23242a5ad74d5b239fc866f33234c5dd7890cb..211da1aa729a51a28d88c089addb82ba288712ea 100644 (file)
@@ -1,14 +1,25 @@
 /*-------------------------------------------------------------------------
  *
  * pg_crc.c
- *       PostgreSQL 64-bit CRC support
+ *       PostgreSQL CRC support
+ *
+ * See Ross Williams' excellent introduction
+ * A PAINLESS GUIDE TO CRC ERROR DETECTION ALGORITHMS, available from
+ * ftp://ftp.rocksoft.com/papers/crc_v3.txt or several other net sites.
+ *
+ * We use a normal (not "reflected", in Williams' terms) CRC, using initial
+ * all-ones register contents and a final bit inversion.
+ *
+ * The 64-bit variant is not used as of PostgreSQL 8.1, but we retain the
+ * code for possible future use.
+ *
  *
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/utils/hash/pg_crc.c,v 1.12 2004/12/31 22:01:37 pgsql Exp $
+ *       $PostgreSQL: pgsql/src/backend/utils/hash/pg_crc.c,v 1.13 2005/06/02 05:55:29 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 #include "utils/pg_crc.h"
 
 
+/*
+ * This table is based on the polynomial
+ *     x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1.
+ * (This is the same polynomial used in Ethernet checksums, for instance.)
+ */
+const uint32 pg_crc32_table[256] = {
+       0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA,
+       0x076DC419, 0x706AF48F, 0xE963A535, 0x9E6495A3,
+       0x0EDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988,
+       0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91,
+       0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE,
+       0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7,
+       0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC,
+       0x14015C4F, 0x63066CD9, 0xFA0F3D63, 0x8D080DF5,
+       0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172,
+       0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B,
+       0x35B5A8FA, 0x42B2986C, 0xDBBBC9D6, 0xACBCF940,
+       0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59,
+       0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116,
+       0x21B4F4B5, 0x56B3C423, 0xCFBA9599, 0xB8BDA50F,
+       0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924,
+       0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D,
+       0x76DC4190, 0x01DB7106, 0x98D220BC, 0xEFD5102A,
+       0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433,
+       0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818,
+       0x7F6A0DBB, 0x086D3D2D, 0x91646C97, 0xE6635C01,
+       0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E,
+       0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457,
+       0x65B0D9C6, 0x12B7E950, 0x8BBEB8EA, 0xFCB9887C,
+       0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65,
+       0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2,
+       0x4ADFA541, 0x3DD895D7, 0xA4D1C46D, 0xD3D6F4FB,
+       0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0,
+       0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9,
+       0x5005713C, 0x270241AA, 0xBE0B1010, 0xC90C2086,
+       0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F,
+       0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4,
+       0x59B33D17, 0x2EB40D81, 0xB7BD5C3B, 0xC0BA6CAD,
+       0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A,
+       0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683,
+       0xE3630B12, 0x94643B84, 0x0D6D6A3E, 0x7A6A5AA8,
+       0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1,
+       0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE,
+       0xF762575D, 0x806567CB, 0x196C3671, 0x6E6B06E7,
+       0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC,
+       0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5,
+       0xD6D6A3E8, 0xA1D1937E, 0x38D8C2C4, 0x4FDFF252,
+       0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B,
+       0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60,
+       0xDF60EFC3, 0xA867DF55, 0x316E8EEF, 0x4669BE79,
+       0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236,
+       0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F,
+       0xC5BA3BBE, 0xB2BD0B28, 0x2BB45A92, 0x5CB36A04,
+       0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D,
+       0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A,
+       0x9C0906A9, 0xEB0E363F, 0x72076785, 0x05005713,
+       0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38,
+       0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21,
+       0x86D3D2D4, 0xF1D4E242, 0x68DDB3F8, 0x1FDA836E,
+       0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777,
+       0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C,
+       0x8F659EFF, 0xF862AE69, 0x616BFFD3, 0x166CCF45,
+       0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2,
+       0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB,
+       0xAED16A4A, 0xD9D65ADC, 0x40DF0B66, 0x37D83BF0,
+       0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9,
+       0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6,
+       0xBAD03605, 0xCDD70693, 0x54DE5729, 0x23D967BF,
+       0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94,
+       0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D
+};
+
+
+#ifdef PROVIDE_64BIT_CRC
+
+/*
+ * This table is based on the polynomial
+ *
+ * x^64 + x^62 + x^57 + x^55 + x^54 + x^53 + x^52 + x^47 + x^46 + x^45 +
+ * x^40 + x^39 + x^38 + x^37 + x^35 + x^33 + x^32 + x^31 + x^29 + x^27 +
+ * x^24 + x^23 + x^22 + x^21 + x^19 + x^17 + x^13 + x^12 + x^10 + x^9 +
+ * x^7 + x^4 + x + 1
+ *
+ * which is borrowed from the DLT1 spec
+ * (ECMA-182, available from http://www.ecma.ch/ecma1/STAND/ECMA-182.HTM)
+ */
+
 #ifdef INT64_IS_BUSTED
 
-const uint32 crc_table0[256] = {
+const uint32 pg_crc64_table0[256] = {
        0x00000000, 0xA9EA3693,
        0x53D46D26, 0xFA3E5BB5,
        0x0E42ECDF, 0xA7A8DA4C,
@@ -150,7 +248,7 @@ const uint32 crc_table0[256] = {
        0x676F8394, 0xCE85B507
 };
 
-const uint32 crc_table1[256] = {
+const uint32 pg_crc64_table1[256] = {
        0x00000000, 0x42F0E1EB,
        0x85E1C3D7, 0xC711223C,
        0x49336645, 0x0BC387AE,
@@ -283,7 +381,7 @@ const uint32 crc_table1[256] = {
 
 #else                                                  /* int64 works */
 
-const uint64 crc_table[256] = {
+const uint64 pg_crc64_table[256] = {
        UINT64CONST(0x0000000000000000), UINT64CONST(0x42F0E1EBA9EA3693),
        UINT64CONST(0x85E1C3D753D46D26), UINT64CONST(0xC711223CFA3E5BB5),
        UINT64CONST(0x493366450E42ECDF), UINT64CONST(0x0BC387AEA7A8DA4C),
@@ -415,3 +513,5 @@ const uint64 crc_table[256] = {
 };
 
 #endif   /* INT64_IS_BUSTED */
+
+#endif /* PROVIDE_64BIT_CRC */
index d89a934dfc2abf2798a82d6eb5d3d2ec5d745e92..77f61af06f467851216584e8fcae2eb40c13f8b2 100644 (file)
@@ -6,7 +6,7 @@
  * copyright (c) Oliver Elphick <olly@lfix.co.uk>, 2001;
  * licence: BSD
  *
- * $PostgreSQL: pgsql/src/bin/pg_controldata/pg_controldata.c,v 1.23 2005/04/28 21:47:16 tgl Exp $
+ * $PostgreSQL: pgsql/src/bin/pg_controldata/pg_controldata.c,v 1.24 2005/06/02 05:55:29 tgl Exp $
  */
 #include "postgres.h"
 
@@ -66,7 +66,7 @@ main(int argc, char *argv[])
        int                     fd;
        char            ControlFilePath[MAXPGPATH];
        char       *DataDir;
-       crc64           crc;
+       pg_crc32        crc;
        char            pgctime_str[128];
        char            ckpttime_str[128];
        char            sysident_str[32];
@@ -120,13 +120,13 @@ main(int argc, char *argv[])
        close(fd);
 
        /* Check the CRC. */
-       INIT_CRC64(crc);
-       COMP_CRC64(crc,
-                          (char *) &ControlFile + sizeof(crc64),
-                          sizeof(ControlFileData) - sizeof(crc64));
-       FIN_CRC64(crc);
+       INIT_CRC32(crc);
+       COMP_CRC32(crc,
+                          (char *) &ControlFile,
+                          offsetof(ControlFileData, crc));
+       FIN_CRC32(crc);
 
-       if (!EQ_CRC64(crc, ControlFile.crc))
+       if (!EQ_CRC32(crc, ControlFile.crc))
                printf(_("WARNING: Calculated CRC checksum does not match value stored in file.\n"
                                 "Either the file is corrupt, or it has a different layout than this program\n"
                         "is expecting.  The results below are untrustworthy.\n\n"));
index cabc5c00124cf4e9cf070e3171285f9507cf2427..6eceb0a3543943893ce9179736d1949df5485e77 100644 (file)
@@ -23,7 +23,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/bin/pg_resetxlog/pg_resetxlog.c,v 1.32 2005/04/28 21:47:16 tgl Exp $
+ * $PostgreSQL: pgsql/src/bin/pg_resetxlog/pg_resetxlog.c,v 1.33 2005/06/02 05:55:29 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -327,7 +327,7 @@ ReadControlFile(void)
        int                     fd;
        int                     len;
        char       *buffer;
-       crc64           crc;
+       pg_crc32        crc;
 
        if ((fd = open(ControlFilePath, O_RDONLY)) < 0)
        {
@@ -362,13 +362,13 @@ ReadControlFile(void)
                ((ControlFileData *) buffer)->pg_control_version == PG_CONTROL_VERSION)
        {
                /* Check the CRC. */
-               INIT_CRC64(crc);
-               COMP_CRC64(crc,
-                                  buffer + sizeof(crc64),
-                                  sizeof(ControlFileData) - sizeof(crc64));
-               FIN_CRC64(crc);
+               INIT_CRC32(crc);
+               COMP_CRC32(crc,
+                                  buffer,
+                                  offsetof(ControlFileData, crc));
+               FIN_CRC32(crc);
 
-               if (EQ_CRC64(crc, ((ControlFileData *) buffer)->crc))
+               if (EQ_CRC32(crc, ((ControlFileData *) buffer)->crc))
                {
                        /* Valid data... */
                        memcpy(&ControlFile, buffer, sizeof(ControlFile));
@@ -553,11 +553,11 @@ RewriteControlFile(void)
        ControlFile.prevCheckPoint.xrecoff = 0;
 
        /* Contents are protected with a CRC */
-       INIT_CRC64(ControlFile.crc);
-       COMP_CRC64(ControlFile.crc,
-                          (char *) &ControlFile + sizeof(crc64),
-                          sizeof(ControlFileData) - sizeof(crc64));
-       FIN_CRC64(ControlFile.crc);
+       INIT_CRC32(ControlFile.crc);
+       COMP_CRC32(ControlFile.crc,
+                          (char *) &ControlFile,
+                          offsetof(ControlFileData, crc));
+       FIN_CRC32(ControlFile.crc);
 
        /*
         * We write out BLCKSZ bytes into pg_control, zero-padding the excess
@@ -673,7 +673,7 @@ WriteEmptyXLOG(void)
        XLogPageHeader page;
        XLogLongPageHeader longpage;
        XLogRecord *record;
-       crc64           crc;
+       pg_crc32        crc;
        char            path[MAXPGPATH];
        int                     fd;
        int                     nbytes;
@@ -700,17 +700,18 @@ WriteEmptyXLOG(void)
        record->xl_prev.xlogid = 0;
        record->xl_prev.xrecoff = 0;
        record->xl_xid = InvalidTransactionId;
+       record->xl_tot_len = SizeOfXLogRecord + sizeof(CheckPoint);
        record->xl_len = sizeof(CheckPoint);
        record->xl_info = XLOG_CHECKPOINT_SHUTDOWN;
        record->xl_rmid = RM_XLOG_ID;
        memcpy(XLogRecGetData(record), &ControlFile.checkPointCopy,
                   sizeof(CheckPoint));
 
-       INIT_CRC64(crc);
-       COMP_CRC64(crc, &ControlFile.checkPointCopy, sizeof(CheckPoint));
-       COMP_CRC64(crc, (char *) record + sizeof(crc64),
-                          SizeOfXLogRecord - sizeof(crc64));
-       FIN_CRC64(crc);
+       INIT_CRC32(crc);
+       COMP_CRC32(crc, &ControlFile.checkPointCopy, sizeof(CheckPoint));
+       COMP_CRC32(crc, (char *) record + sizeof(pg_crc32),
+                          SizeOfXLogRecord - sizeof(pg_crc32));
+       FIN_CRC32(crc);
        record->xl_crc = crc;
 
        /* Write the first page */
index ab471738970a2a529d4f702a2fb00284cf42092a..1d1aa9c15268abd1dd0470922989515769ced4f3 100644 (file)
@@ -6,7 +6,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/access/xlog.h,v 1.61 2005/05/20 14:53:26 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/access/xlog.h,v 1.62 2005/06/02 05:55:29 tgl Exp $
  */
 #ifndef XLOG_H
 #define XLOG_H
 
 
 /*
- * Header for each record in XLOG
+ * The overall layout of an XLOG record is:
+ *             Fixed-size header (XLogRecord struct)
+ *             rmgr-specific data
+ *             BkpBlock
+ *             backup block data
+ *             BkpBlock
+ *             backup block data
+ *             ...
  *
- * NOTE: xl_len counts only the rmgr data, not the XLogRecord header,
- * and also not any backup blocks appended to the record (which are signaled
- * by xl_info flag bits).  The total space needed for an XLOG record is
- * really:
- *
- * SizeOfXLogRecord + xl_len + n_backup_blocks * (sizeof(BkpBlock) + BLCKSZ)
+ * where there can be zero to three backup blocks (as signaled by xl_info flag
+ * bits).  XLogRecord structs always start on MAXALIGN boundaries in the WAL
+ * files, and we round up SizeOfXLogRecord so that the rmgr data is also
+ * guaranteed to begin on a MAXALIGN boundary.  However, no padding is added
+ * to align BkpBlock structs or backup block data.
  *
- * rounded up to a MAXALIGN boundary (so that all xlog records start on
- * MAXALIGN boundaries).
+ * NOTE: xl_len counts only the rmgr data, not the XLogRecord header,
+ * and also not any backup blocks.  xl_tot_len counts everything.  Neither
+ * length field is rounded up to an alignment boundary.
  */
 typedef struct XLogRecord
 {
-       crc64           xl_crc;                 /* CRC for this record */
+       pg_crc32        xl_crc;                 /* CRC for this record */
        XLogRecPtr      xl_prev;                /* ptr to previous record in log */
        TransactionId xl_xid;           /* xact id */
+       uint32          xl_tot_len;             /* total len of entire record */
        uint32          xl_len;                 /* total len of rmgr data */
        uint8           xl_info;                /* flag bits, see below */
        RmgrId          xl_rmid;                /* resource manager for this record */
index 75842328db4b47bc517e608f18c7f16a7143e50e..a0b0b761ccb2f2162138c45628c1f144b6c4dfd0 100644 (file)
@@ -11,7 +11,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/access/xlog_internal.h,v 1.6 2004/12/31 22:03:21 pgsql Exp $
+ * $PostgreSQL: pgsql/src/include/access/xlog_internal.h,v 1.7 2005/06/02 05:55:29 tgl Exp $
  */
 #ifndef XLOG_INTERNAL_H
 #define XLOG_INTERNAL_H
 /*
  * Header info for a backup block appended to an XLOG record.
  *
- * Note that the backup block has its own CRC, and is not covered by
- * the CRC of the XLOG record proper.  Also note that we don't attempt
- * to align either the BkpBlock struct or the block's data.
+ * As a trivial form of data compression, the XLOG code is aware that
+ * PG data pages usually contain an unused "hole" in the middle, which
+ * contains only zero bytes.  If hole_length > 0 then we have removed
+ * such a "hole" from the stored data (and it's not counted in the
+ * XLOG record's CRC, either).  Hence, the amount of block data actually
+ * present following the BkpBlock struct is BLCKSZ - hole_length bytes.
+ *
+ * Note that we don't attempt to align either the BkpBlock struct or the
+ * block's data.  So, the struct must be copied to aligned local storage
+ * before use.
  */
 typedef struct BkpBlock
 {
-       crc64           crc;
-       RelFileNode node;
-       BlockNumber block;
+       RelFileNode node;                       /* relation containing block */
+       BlockNumber block;                      /* block number */
+       uint16          hole_offset;    /* number of bytes before "hole" */
+       uint16          hole_length;    /* number of bytes in "hole" */
+
+       /* ACTUAL BLOCK DATA FOLLOWS AT END OF STRUCT */
 } BkpBlock;
 
 /*
@@ -42,8 +52,9 @@ typedef struct BkpBlock
  * XLogRecord header will never be split across pages; if there's less than
  * SizeOfXLogRecord space left at the end of a page, we just waste it.)
  *
- * Note that xl_rem_len includes backup-block data, unlike xl_len in the
- * initial header.
+ * Note that xl_rem_len includes backup-block data; that is, it tracks
+ * xl_tot_len not xl_len in the initial header.  Also note that the
+ * continuation data isn't necessarily aligned.
  */
 typedef struct XLogContRecord
 {
@@ -53,12 +64,12 @@ typedef struct XLogContRecord
 
 } XLogContRecord;
 
-#define SizeOfXLogContRecord   MAXALIGN(sizeof(XLogContRecord))
+#define SizeOfXLogContRecord   sizeof(XLogContRecord)
 
 /*
  * Each page of XLOG file has a header like this:
  */
-#define XLOG_PAGE_MAGIC 0xD05C /* can be used as WAL version indicator */
+#define XLOG_PAGE_MAGIC 0xD05D /* can be used as WAL version indicator */
 
 typedef struct XLogPageHeaderData
 {
index e60a879424a3462ba0873c9ef8115958d6fadd61..3f96b6bf261734a9cda9b1a9267f16f30a2e5d6d 100644 (file)
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/catalog/pg_control.h,v 1.21 2005/04/28 21:47:17 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/pg_control.h,v 1.22 2005/06/02 05:55:29 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -22,7 +22,7 @@
 
 
 /* Version identifier for this pg_control format */
-#define PG_CONTROL_VERSION     81
+#define PG_CONTROL_VERSION     810
 
 /*
  * Body of CheckPoint XLOG records.  This is declared here because we keep
@@ -73,12 +73,17 @@ typedef enum DBState
 
 typedef struct ControlFileData
 {
-       crc64           crc;                    /* CRC for remainder of struct */
+       /*
+        * Unique system identifier --- to ensure we match up xlog files with
+        * the installation that produced them.
+        */
+       uint64          system_identifier;
 
        /*
-        * Version identifier information.      Keep these fields at the front,
+        * Version identifier information.      Keep these fields at the same offset,
         * especially pg_control_version; they won't be real useful if they
-        * move around.
+        * move around.  (For historical reasons they must be 8 bytes into
+        * the file rather than immediately at the front.)
         *
         * pg_control_version identifies the format of pg_control itself.
         * catalog_version_no identifies the format of the system catalogs.
@@ -90,12 +95,6 @@ typedef struct ControlFileData
        uint32          pg_control_version;             /* PG_CONTROL_VERSION */
        uint32          catalog_version_no;             /* see catversion.h */
 
-       /*
-        * Unique system identifier --- to ensure we match up xlog files with
-        * the installation that produced them.
-        */
-       uint64          system_identifier;
-
        /*
         * System status data
         */
@@ -127,6 +126,9 @@ typedef struct ControlFileData
        uint32          localeBuflen;
        char            lc_collate[LOCALE_NAME_BUFLEN];
        char            lc_ctype[LOCALE_NAME_BUFLEN];
+
+       /* CRC of all above ... MUST BE LAST! */
+       pg_crc32        crc;
 } ControlFileData;
 
 #endif   /* PG_CONTROL_H */
index 6638f75d74e4f7794b667d5c19f0d93e321ce246..5bf9ed76335e8b07d5d02030c49c56d123d3f9d5 100644 (file)
@@ -1,32 +1,65 @@
 /*
  * pg_crc.h
  *
- * PostgreSQL 64-bit CRC support
+ * PostgreSQL CRC support
+ *
+ * See Ross Williams' excellent introduction
+ * A PAINLESS GUIDE TO CRC ERROR DETECTION ALGORITHMS, available from
+ * ftp://ftp.rocksoft.com/papers/crc_v3.txt or several other net sites.
+ *
+ * We use a normal (not "reflected", in Williams' terms) CRC, using initial
+ * all-ones register contents and a final bit inversion.
+ *
+ * The 64-bit variant is not used as of PostgreSQL 8.1, but we retain the
+ * code for possible future use.
+ *
  *
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/utils/pg_crc.h,v 1.12 2004/12/31 22:03:46 pgsql Exp $
+ * $PostgreSQL: pgsql/src/include/utils/pg_crc.h,v 1.13 2005/06/02 05:55:29 tgl Exp $
  */
 #ifndef PG_CRC_H
 #define PG_CRC_H
 
+
+typedef uint32 pg_crc32;
+
+/* Initialize a CRC accumulator */
+#define INIT_CRC32(crc) ((crc) = 0xFFFFFFFF)
+
+/* Finish a CRC calculation */
+#define FIN_CRC32(crc) ((crc) ^= 0xFFFFFFFF)
+
+/* Accumulate some (more) bytes into a CRC */
+#define COMP_CRC32(crc, data, len)     \
+do { \
+       unsigned char *__data = (unsigned char *) (data); \
+       uint32          __len = (len); \
+\
+       while (__len-- > 0) \
+       { \
+               int             __tab_index = ((int) ((crc) >> 24) ^ *__data++) & 0xFF; \
+               (crc) = pg_crc32_table[__tab_index] ^ ((crc) << 8); \
+       } \
+} while (0)
+
+/* Check for equality of two CRCs */
+#define EQ_CRC32(c1,c2)  ((c1) == (c2))
+
+/* Constant table for CRC calculation */
+extern const uint32 pg_crc32_table[];
+
+
+#ifdef PROVIDE_64BIT_CRC
+
 /*
  * If we have a 64-bit integer type, then a 64-bit CRC looks just like the
- * usual sort of implementation.  (See Ross Williams' excellent introduction
- * A PAINLESS GUIDE TO CRC ERROR DETECTION ALGORITHMS, available from
- * ftp://ftp.rocksoft.com/papers/crc_v3.txt or several other net sites.)
- * If we have no working 64-bit type, then fake it with two 32-bit registers.
- *
- * The present implementation is a normal (not "reflected", in Williams'
- * terms) 64-bit CRC, using initial all-ones register contents and a final
- * bit inversion.  The chosen polynomial is borrowed from the DLT1 spec
- * (ECMA-182, available from http://www.ecma.ch/ecma1/STAND/ECMA-182.HTM):
- *
- * x^64 + x^62 + x^57 + x^55 + x^54 + x^53 + x^52 + x^47 + x^46 + x^45 +
- * x^40 + x^39 + x^38 + x^37 + x^35 + x^33 + x^32 + x^31 + x^29 + x^27 +
- * x^24 + x^23 + x^22 + x^21 + x^19 + x^17 + x^13 + x^12 + x^10 + x^9 +
- * x^7 + x^4 + x + 1
+ * usual sort of implementation.  If we have no working 64-bit type, then
+ * fake it with two 32-bit registers.  (Note: experience has shown that the
+ * two-32-bit-registers code is as fast as, or even much faster than, the
+ * 64-bit code on all but true 64-bit machines.  INT64_IS_BUSTED is therefore
+ * probably the wrong control symbol to use to select the implementation.)
  */
 
 #ifdef INT64_IS_BUSTED
  * all machines, we could do a configure test to decide how to order the
  * two fields, but it seems not worth the trouble.
  */
-typedef struct crc64
+typedef struct pg_crc64
 {
        uint32          crc0;
        uint32          crc1;
-} crc64;
+} pg_crc64;
 
 /* Initialize a CRC accumulator */
 #define INIT_CRC64(crc) ((crc).crc0 = 0xffffffff, (crc).crc1 = 0xffffffff)
@@ -62,8 +95,8 @@ do { \
        while (__len-- > 0) \
        { \
                int             __tab_index = ((int) (__crc1 >> 24) ^ *__data++) & 0xFF; \
-               __crc1 = crc_table1[__tab_index] ^ ((__crc1 << 8) | (__crc0 >> 24)); \
-               __crc0 = crc_table0[__tab_index] ^ (__crc0 << 8); \
+               __crc1 = pg_crc64_table1[__tab_index] ^ ((__crc1 << 8) | (__crc0 >> 24)); \
+               __crc0 = pg_crc64_table0[__tab_index] ^ (__crc0 << 8); \
        } \
        (crc).crc0 = __crc0; \
        (crc).crc1 = __crc1; \
@@ -73,15 +106,15 @@ do { \
 #define EQ_CRC64(c1,c2)  ((c1).crc0 == (c2).crc0 && (c1).crc1 == (c2).crc1)
 
 /* Constant table for CRC calculation */
-extern const uint32 crc_table0[];
-extern const uint32 crc_table1[];
+extern const uint32 pg_crc64_table0[];
+extern const uint32 pg_crc64_table1[];
 
 #else                                                  /* int64 works */
 
-typedef struct crc64
+typedef struct pg_crc64
 {
        uint64          crc0;
-} crc64;
+} pg_crc64;
 
 /* Initialize a CRC accumulator */
 #define INIT_CRC64(crc) ((crc).crc0 = UINT64CONST(0xffffffffffffffff))
@@ -99,7 +132,7 @@ do { \
        while (__len-- > 0) \
        { \
                int             __tab_index = ((int) (__crc0 >> 56) ^ *__data++) & 0xFF; \
-               __crc0 = crc_table[__tab_index] ^ (__crc0 << 8); \
+               __crc0 = pg_crc64_table[__tab_index] ^ (__crc0 << 8); \
        } \
        (crc).crc0 = __crc0; \
 } while (0)
@@ -108,7 +141,9 @@ do { \
 #define EQ_CRC64(c1,c2)  ((c1).crc0 == (c2).crc0)
 
 /* Constant table for CRC calculation */
-extern const uint64 crc_table[];
+extern const uint64 pg_crc64_table[];
 #endif   /* INT64_IS_BUSTED */
 
+#endif /* PROVIDE_64BIT_CRC */
+
 #endif   /* PG_CRC_H */