From: Tom Lane Date: Thu, 2 Jun 2005 05:55:29 +0000 (+0000) Subject: Change CRCs in WAL records from 64bit to 32bit for performance reasons. X-Git-Tag: REL8_1_0BETA1~674 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=21fda22ec46deb7734f793ef4d7fa6c226b4c78e;p=postgresql Change CRCs in WAL records from 64bit to 32bit for performance reasons. Instead of a separate CRC on each backup block, include backup blocks in their parent WAL record's CRC; this is important to ensure that the backup block really goes with the WAL record, ie there was not a page tear right at the start of the backup block. Implement a simple form of compression of backup blocks: drop any run of zeroes starting at pd_lower, so as not to store the unused 'hole' that commonly exists in PG heap and index pages. Tweak PageRepairFragmentation and related routines to ensure they keep the unused space zeroed, so that the above compression method remains effective. All per recent discussions. --- diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c index ea02325318..b9d42bad6d 100644 --- a/src/backend/access/nbtree/nbtpage.c +++ b/src/backend/access/nbtree/nbtpage.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.84 2005/05/07 21:32:23 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.85 2005/06/02 05:55:28 tgl Exp $ * * NOTES * Postgres btree pages look like ordinary relation pages. The opaque @@ -113,6 +113,13 @@ _bt_initmetapage(Page page, BlockNumber rootbknum, uint32 level) metaopaque = (BTPageOpaque) PageGetSpecialPointer(page); metaopaque->btpo_flags = BTP_META; + + /* + * Set pd_lower just past the end of the metadata. This is not + * essential but it makes the page look compressible to xlog.c. + */ + ((PageHeader) page)->pd_lower = + ((char *) metad + sizeof(BTMetaPageData)) - (char *) page; } /* diff --git a/src/backend/access/nbtree/nbtxlog.c b/src/backend/access/nbtree/nbtxlog.c index ade60619a3..536bc17718 100644 --- a/src/backend/access/nbtree/nbtxlog.c +++ b/src/backend/access/nbtree/nbtxlog.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.20 2005/03/22 06:17:03 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.21 2005/06/02 05:55:28 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -135,6 +135,13 @@ _bt_restore_meta(Relation reln, XLogRecPtr lsn, pageop = (BTPageOpaque) PageGetSpecialPointer(metapg); pageop->btpo_flags = BTP_META; + /* + * Set pd_lower just past the end of the metadata. This is not + * essential but it makes the page look compressible to xlog.c. + */ + ((PageHeader) metapg)->pd_lower = + ((char *) md + sizeof(BTMetaPageData)) - (char *) metapg; + PageSetLSN(metapg, lsn); PageSetTLI(metapg, ThisTimeLineID); LockBuffer(metabuf, BUFFER_LOCK_UNLOCK); diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 2352313b05..27f6354987 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.194 2005/05/31 19:10:28 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.195 2005/06/02 05:55:28 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -434,6 +434,7 @@ static void exitArchiveRecovery(TimeLineID endTLI, uint32 endLogId, uint32 endLogSeg); static bool recoveryStopsHere(XLogRecord *record, bool *includeThis); +static void SetBkpBlock(BkpBlock *bkpb, Buffer buffer); static bool AdvanceXLInsertBuffer(void); static void XLogWrite(XLogwrtRqst WriteRqst); static int XLogFileInit(uint32 log, uint32 seg, @@ -499,8 +500,10 @@ XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata) bool dtbuf_bkp[XLR_MAX_BKP_BLOCKS]; BkpBlock dtbuf_xlg[XLR_MAX_BKP_BLOCKS]; XLogRecPtr dtbuf_lsn[XLR_MAX_BKP_BLOCKS]; - XLogRecData dtbuf_rdt[2 * XLR_MAX_BKP_BLOCKS]; - crc64 rdata_crc; + XLogRecData dtbuf_rdt1[XLR_MAX_BKP_BLOCKS]; + XLogRecData dtbuf_rdt2[XLR_MAX_BKP_BLOCKS]; + XLogRecData dtbuf_rdt3[XLR_MAX_BKP_BLOCKS]; + pg_crc32 rdata_crc; uint32 len, write_len; unsigned i; @@ -531,8 +534,10 @@ XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata) /* * Here we scan the rdata list, determine which buffers must be backed * up, and compute the CRC values for the data. Note that the record - * header isn't added into the CRC yet since we don't know the final - * length or info bits quite yet. + * header isn't added into the CRC initially since we don't know the + * final length or info bits quite yet. Thus, the CRC will represent + * the CRC of the whole record in the order "rdata, then backup blocks, + * then record header". * * We may have to loop back to here if a race condition is detected * below. We could prevent the race by doing all this work while @@ -553,7 +558,7 @@ begin:; dtbuf_bkp[i] = false; } - INIT_CRC64(rdata_crc); + INIT_CRC32(rdata_crc); len = 0; for (rdt = rdata;;) { @@ -561,7 +566,7 @@ begin:; { /* Simple data, just include it */ len += rdt->len; - COMP_CRC64(rdata_crc, rdt->data, rdt->len); + COMP_CRC32(rdata_crc, rdt->data, rdt->len); } else { @@ -576,7 +581,7 @@ begin:; else if (rdt->data) { len += rdt->len; - COMP_CRC64(rdata_crc, rdt->data, rdt->len); + COMP_CRC32(rdata_crc, rdt->data, rdt->len); } break; } @@ -591,26 +596,14 @@ begin:; dtbuf_lsn[i] = *((XLogRecPtr *) BufferGetBlock(rdt->buffer)); if (XLByteLE(dtbuf_lsn[i], RedoRecPtr)) { - crc64 dtcrc; - dtbuf_bkp[i] = true; + SetBkpBlock(&(dtbuf_xlg[i]), rdt->buffer); rdt->data = NULL; - INIT_CRC64(dtcrc); - COMP_CRC64(dtcrc, - BufferGetBlock(dtbuf[i]), - BLCKSZ); - dtbuf_xlg[i].node = BufferGetFileNode(dtbuf[i]); - dtbuf_xlg[i].block = BufferGetBlockNumber(dtbuf[i]); - COMP_CRC64(dtcrc, - (char *) &(dtbuf_xlg[i]) + sizeof(crc64), - sizeof(BkpBlock) - sizeof(crc64)); - FIN_CRC64(dtcrc); - dtbuf_xlg[i].crc = dtcrc; } else if (rdt->data) { len += rdt->len; - COMP_CRC64(rdata_crc, rdt->data, rdt->len); + COMP_CRC32(rdata_crc, rdt->data, rdt->len); } break; } @@ -625,6 +618,39 @@ begin:; rdt = rdt->next; } + /* + * Now add the backup block headers and data into the CRC + */ + for (i = 0; i < XLR_MAX_BKP_BLOCKS; i++) + { + if (dtbuf_bkp[i]) + { + BkpBlock *bkpb = &(dtbuf_xlg[i]); + char *page; + + COMP_CRC32(rdata_crc, + (char *) bkpb, + sizeof(BkpBlock)); + page = (char *) BufferGetBlock(dtbuf[i]); + if (bkpb->hole_length == 0) + { + COMP_CRC32(rdata_crc, + page, + BLCKSZ); + } + else + { + /* must skip the hole */ + COMP_CRC32(rdata_crc, + page, + bkpb->hole_offset); + COMP_CRC32(rdata_crc, + page + (bkpb->hole_offset + bkpb->hole_length), + BLCKSZ - (bkpb->hole_offset + bkpb->hole_length)); + } + } + } + /* * NOTE: the test for len == 0 here is somewhat fishy, since in theory * all of the rmgr data might have been suppressed in favor of backup @@ -713,23 +739,49 @@ begin:; write_len = len; for (i = 0; i < XLR_MAX_BKP_BLOCKS; i++) { + BkpBlock *bkpb; + char *page; + if (dtbuf[i] == InvalidBuffer || !(dtbuf_bkp[i])) continue; info |= XLR_SET_BKP_BLOCK(i); - rdt->next = &(dtbuf_rdt[2 * i]); + bkpb = &(dtbuf_xlg[i]); + page = (char *) BufferGetBlock(dtbuf[i]); + + rdt->next = &(dtbuf_rdt1[i]); + rdt = rdt->next; - dtbuf_rdt[2 * i].data = (char *) &(dtbuf_xlg[i]); - dtbuf_rdt[2 * i].len = sizeof(BkpBlock); + rdt->data = (char *) bkpb; + rdt->len = sizeof(BkpBlock); write_len += sizeof(BkpBlock); - rdt = dtbuf_rdt[2 * i].next = &(dtbuf_rdt[2 * i + 1]); + rdt->next = &(dtbuf_rdt2[i]); + rdt = rdt->next; - dtbuf_rdt[2 * i + 1].data = (char *) BufferGetBlock(dtbuf[i]); - dtbuf_rdt[2 * i + 1].len = BLCKSZ; - write_len += BLCKSZ; - dtbuf_rdt[2 * i + 1].next = NULL; + if (bkpb->hole_length == 0) + { + rdt->data = page; + rdt->len = BLCKSZ; + write_len += BLCKSZ; + rdt->next = NULL; + } + else + { + /* must skip the hole */ + rdt->data = page; + rdt->len = bkpb->hole_offset; + write_len += bkpb->hole_offset; + + rdt->next = &(dtbuf_rdt3[i]); + rdt = rdt->next; + + rdt->data = page + (bkpb->hole_offset + bkpb->hole_length); + rdt->len = BLCKSZ - (bkpb->hole_offset + bkpb->hole_length); + write_len += rdt->len; + rdt->next = NULL; + } } /* @@ -752,14 +804,15 @@ begin:; record->xl_prev = Insert->PrevRecord; record->xl_xid = GetCurrentTransactionIdIfAny(); + record->xl_tot_len = SizeOfXLogRecord + write_len; record->xl_len = len; /* doesn't include backup blocks */ record->xl_info = info; record->xl_rmid = rmid; - /* Now we can finish computing the main CRC */ - COMP_CRC64(rdata_crc, (char *) record + sizeof(crc64), - SizeOfXLogRecord - sizeof(crc64)); - FIN_CRC64(rdata_crc); + /* Now we can finish computing the record's CRC */ + COMP_CRC32(rdata_crc, (char *) record + sizeof(pg_crc32), + SizeOfXLogRecord - sizeof(pg_crc32)); + FIN_CRC32(rdata_crc); record->xl_crc = rdata_crc; /* Compute record's XLOG location */ @@ -884,6 +937,46 @@ begin:; return (RecPtr); } +/* + * Fill a BkpBlock struct given a buffer containing the page to be saved + * + * This is nontrivial only because it has to decide whether to apply "hole + * compression". + */ +static void +SetBkpBlock(BkpBlock *bkpb, Buffer buffer) +{ + PageHeader page; + uint16 offset; + uint16 length; + + /* Save page identity info */ + bkpb->node = BufferGetFileNode(buffer); + bkpb->block = BufferGetBlockNumber(buffer); + + /* Test whether there is a "hole" containing zeroes in the page */ + page = (PageHeader) BufferGetBlock(buffer); + offset = page->pd_lower; + /* Check if pd_lower appears sane at all */ + if (offset >= SizeOfPageHeaderData && offset < BLCKSZ) + { + char *spd = (char *) page + offset; + char *epd = (char *) page + BLCKSZ; + char *pd = spd; + + while (pd < epd && *pd == '\0') + pd++; + + length = pd - spd; + if (length == 0) + offset = 0; + } + else + offset = length = 0; + bkpb->hole_offset = offset; + bkpb->hole_length = length; +} + /* * XLogArchiveNotify * @@ -2276,7 +2369,7 @@ RestoreBkpBlocks(XLogRecord *record, XLogRecPtr lsn) if (!(record->xl_info & XLR_SET_BKP_BLOCK(i))) continue; - memcpy((char *) &bkpb, blk, sizeof(BkpBlock)); + memcpy(&bkpb, blk, sizeof(BkpBlock)); blk += sizeof(BkpBlock); reln = XLogOpenRelation(true, record->xl_rmid, bkpb.node); @@ -2287,7 +2380,21 @@ RestoreBkpBlocks(XLogRecord *record, XLogRecPtr lsn) if (BufferIsValid(buffer)) { page = (Page) BufferGetPage(buffer); - memcpy((char *) page, blk, BLCKSZ); + + if (bkpb.hole_length == 0) + { + memcpy((char *) page, blk, BLCKSZ); + } + else + { + /* must zero-fill the hole */ + MemSet((char *) page, 0, BLCKSZ); + memcpy((char *) page, blk, bkpb.hole_offset); + memcpy((char *) page + (bkpb.hole_offset + bkpb.hole_length), + blk + bkpb.hole_offset, + BLCKSZ - (bkpb.hole_offset + bkpb.hole_length)); + } + PageSetLSN(page, lsn); PageSetTLI(page, ThisTimeLineID); LockBuffer(buffer, BUFFER_LOCK_UNLOCK); @@ -2295,7 +2402,7 @@ RestoreBkpBlocks(XLogRecord *record, XLogRecPtr lsn) } } - blk += BLCKSZ; + blk += BLCKSZ - bkpb.hole_length; } } @@ -2309,53 +2416,61 @@ RestoreBkpBlocks(XLogRecord *record, XLogRecPtr lsn) static bool RecordIsValid(XLogRecord *record, XLogRecPtr recptr, int emode) { - crc64 crc; - crc64 cbuf; + pg_crc32 crc; int i; uint32 len = record->xl_len; + BkpBlock bkpb; char *blk; - /* Check CRC of rmgr data and record header */ - INIT_CRC64(crc); - COMP_CRC64(crc, XLogRecGetData(record), len); - COMP_CRC64(crc, (char *) record + sizeof(crc64), - SizeOfXLogRecord - sizeof(crc64)); - FIN_CRC64(crc); + /* First the rmgr data */ + INIT_CRC32(crc); + COMP_CRC32(crc, XLogRecGetData(record), len); - if (!EQ_CRC64(record->xl_crc, crc)) - { - ereport(emode, - (errmsg("incorrect resource manager data checksum in record at %X/%X", - recptr.xlogid, recptr.xrecoff))); - return (false); - } - - /* Check CRCs of backup blocks, if any */ + /* Add in the backup blocks, if any */ blk = (char *) XLogRecGetData(record) + len; for (i = 0; i < XLR_MAX_BKP_BLOCKS; i++) { + uint32 blen; + if (!(record->xl_info & XLR_SET_BKP_BLOCK(i))) continue; - INIT_CRC64(crc); - COMP_CRC64(crc, blk + sizeof(BkpBlock), BLCKSZ); - COMP_CRC64(crc, blk + sizeof(crc64), - sizeof(BkpBlock) - sizeof(crc64)); - FIN_CRC64(crc); - memcpy((char *) &cbuf, blk, sizeof(crc64)); /* don't assume - * alignment */ - - if (!EQ_CRC64(cbuf, crc)) + memcpy(&bkpb, blk, sizeof(BkpBlock)); + if (bkpb.hole_offset + bkpb.hole_length > BLCKSZ) { ereport(emode, - (errmsg("incorrect checksum of backup block %d in record at %X/%X", - i + 1, recptr.xlogid, recptr.xrecoff))); - return (false); + (errmsg("incorrect hole size in record at %X/%X", + recptr.xlogid, recptr.xrecoff))); + return false; } - blk += sizeof(BkpBlock) + BLCKSZ; + blen = sizeof(BkpBlock) + BLCKSZ - bkpb.hole_length; + COMP_CRC32(crc, blk, blen); + blk += blen; + } + + /* Check that xl_tot_len agrees with our calculation */ + if (blk != (char *) record + record->xl_tot_len) + { + ereport(emode, + (errmsg("incorrect total length in record at %X/%X", + recptr.xlogid, recptr.xrecoff))); + return false; } - return (true); + /* Finally include the record header */ + COMP_CRC32(crc, (char *) record + sizeof(pg_crc32), + SizeOfXLogRecord - sizeof(pg_crc32)); + FIN_CRC32(crc); + + if (!EQ_CRC32(record->xl_crc, crc)) + { + ereport(emode, + (errmsg("incorrect resource manager data checksum in record at %X/%X", + recptr.xlogid, recptr.xrecoff))); + return false; + } + + return true; } /* @@ -2382,7 +2497,6 @@ ReadRecord(XLogRecPtr *RecPtr, int emode) uint32 targetPageOff; uint32 targetRecOff; uint32 pageHeaderSize; - unsigned i; if (readBuf == NULL) { @@ -2518,6 +2632,15 @@ got_record:; RecPtr->xlogid, RecPtr->xrecoff))); goto next_record_is_invalid; } + if (record->xl_tot_len < SizeOfXLogRecord + record->xl_len || + record->xl_tot_len > SizeOfXLogRecord + record->xl_len + + XLR_MAX_BKP_BLOCKS * (sizeof(BkpBlock) + BLCKSZ)) + { + ereport(emode, + (errmsg("invalid record length at %X/%X", + RecPtr->xlogid, RecPtr->xrecoff))); + goto next_record_is_invalid; + } if (record->xl_rmid > RM_MAX_ID) { ereport(emode, @@ -2557,18 +2680,6 @@ got_record:; } } - /* - * Compute total length of record including any appended backup - * blocks. - */ - total_len = SizeOfXLogRecord + record->xl_len; - for (i = 0; i < XLR_MAX_BKP_BLOCKS; i++) - { - if (!(record->xl_info & XLR_SET_BKP_BLOCK(i))) - continue; - total_len += sizeof(BkpBlock) + BLCKSZ; - } - /* * Allocate or enlarge readRecordBuf as needed. To avoid useless * small increases, round its size to a multiple of BLCKSZ, and make @@ -2576,6 +2687,7 @@ got_record:; * "normal" records, but very large commit or abort records might need * more space.) */ + total_len = record->xl_tot_len; if (total_len > readRecordBufSize) { uint32 newSize = total_len; @@ -2666,15 +2778,15 @@ got_record:; goto next_record_is_invalid; pageHeaderSize = XLogPageHeaderSize((XLogPageHeader) readBuf); if (BLCKSZ - SizeOfXLogRecord >= pageHeaderSize + - SizeOfXLogContRecord + MAXALIGN(contrecord->xl_rem_len)) + MAXALIGN(SizeOfXLogContRecord + contrecord->xl_rem_len)) { nextRecord = (XLogRecord *) ((char *) contrecord + - SizeOfXLogContRecord + MAXALIGN(contrecord->xl_rem_len)); + MAXALIGN(SizeOfXLogContRecord + contrecord->xl_rem_len)); } EndRecPtr.xlogid = readId; EndRecPtr.xrecoff = readSeg * XLogSegSize + readOff + - pageHeaderSize + SizeOfXLogContRecord + - MAXALIGN(contrecord->xl_rem_len); + pageHeaderSize + + MAXALIGN(SizeOfXLogContRecord + contrecord->xl_rem_len); ReadRecPtr = *RecPtr; return record; } @@ -3194,11 +3306,11 @@ WriteControlFile(void) StrNCpy(ControlFile->lc_ctype, localeptr, LOCALE_NAME_BUFLEN); /* Contents are protected with a CRC */ - INIT_CRC64(ControlFile->crc); - COMP_CRC64(ControlFile->crc, - (char *) ControlFile + sizeof(crc64), - sizeof(ControlFileData) - sizeof(crc64)); - FIN_CRC64(ControlFile->crc); + INIT_CRC32(ControlFile->crc); + COMP_CRC32(ControlFile->crc, + (char *) ControlFile, + offsetof(ControlFileData, crc)); + FIN_CRC32(ControlFile->crc); /* * We write out BLCKSZ bytes into pg_control, zero-padding the excess @@ -3247,7 +3359,7 @@ WriteControlFile(void) static void ReadControlFile(void) { - crc64 crc; + pg_crc32 crc; int fd; /* @@ -3281,13 +3393,13 @@ ReadControlFile(void) ControlFile->pg_control_version, PG_CONTROL_VERSION), errhint("It looks like you need to initdb."))); /* Now check the CRC. */ - INIT_CRC64(crc); - COMP_CRC64(crc, - (char *) ControlFile + sizeof(crc64), - sizeof(ControlFileData) - sizeof(crc64)); - FIN_CRC64(crc); + INIT_CRC32(crc); + COMP_CRC32(crc, + (char *) ControlFile, + offsetof(ControlFileData, crc)); + FIN_CRC32(crc); - if (!EQ_CRC64(crc, ControlFile->crc)) + if (!EQ_CRC32(crc, ControlFile->crc)) ereport(FATAL, (errmsg("incorrect checksum in control file"))); @@ -3396,11 +3508,11 @@ UpdateControlFile(void) { int fd; - INIT_CRC64(ControlFile->crc); - COMP_CRC64(ControlFile->crc, - (char *) ControlFile + sizeof(crc64), - sizeof(ControlFileData) - sizeof(crc64)); - FIN_CRC64(ControlFile->crc); + INIT_CRC32(ControlFile->crc); + COMP_CRC32(ControlFile->crc, + (char *) ControlFile, + offsetof(ControlFileData, crc)); + FIN_CRC32(ControlFile->crc); fd = BasicOpenFile(ControlFilePath, O_RDWR | PG_BINARY, S_IRUSR | S_IWUSR); if (fd < 0) @@ -3525,7 +3637,7 @@ BootStrapXLOG(void) bool use_existent; uint64 sysidentifier; struct timeval tv; - crc64 crc; + pg_crc32 crc; /* * Select a hopefully-unique system identifier code for this @@ -3582,16 +3694,17 @@ BootStrapXLOG(void) record->xl_prev.xlogid = 0; record->xl_prev.xrecoff = 0; record->xl_xid = InvalidTransactionId; + record->xl_tot_len = SizeOfXLogRecord + sizeof(checkPoint); record->xl_len = sizeof(checkPoint); record->xl_info = XLOG_CHECKPOINT_SHUTDOWN; record->xl_rmid = RM_XLOG_ID; memcpy(XLogRecGetData(record), &checkPoint, sizeof(checkPoint)); - INIT_CRC64(crc); - COMP_CRC64(crc, &checkPoint, sizeof(checkPoint)); - COMP_CRC64(crc, (char *) record + sizeof(crc64), - SizeOfXLogRecord - sizeof(crc64)); - FIN_CRC64(crc); + INIT_CRC32(crc); + COMP_CRC32(crc, &checkPoint, sizeof(checkPoint)); + COMP_CRC32(crc, (char *) record + sizeof(pg_crc32), + SizeOfXLogRecord - sizeof(pg_crc32)); + FIN_CRC32(crc); record->xl_crc = crc; /* Create first XLOG segment file */ @@ -4694,7 +4807,8 @@ ReadCheckpointRecord(XLogRecPtr RecPtr, int whichChkpt) } return NULL; } - if (record->xl_len != sizeof(CheckPoint)) + if (record->xl_len != sizeof(CheckPoint) || + record->xl_tot_len != SizeOfXLogRecord + sizeof(CheckPoint)) { switch (whichChkpt) { diff --git a/src/backend/storage/page/bufpage.c b/src/backend/storage/page/bufpage.c index c33a0011e6..8f8ba9e0d2 100644 --- a/src/backend/storage/page/bufpage.c +++ b/src/backend/storage/page/bufpage.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/page/bufpage.c,v 1.63 2005/03/22 06:17:03 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/storage/page/bufpage.c,v 1.64 2005/06/02 05:55:28 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -357,7 +357,7 @@ PageRepairFragmentation(Page page, OffsetNumber *unused) lp = PageGetItemId(page, i + 1); lp->lp_len = 0; /* indicate unused & deallocated */ } - ((PageHeader) page)->pd_upper = pd_special; + ((PageHeader) page)->pd_upper = pd_upper = pd_special; } else { /* nused != 0 */ @@ -411,11 +411,17 @@ PageRepairFragmentation(Page page, OffsetNumber *unused) lp->lp_off = upper; } - ((PageHeader) page)->pd_upper = upper; + ((PageHeader) page)->pd_upper = pd_upper = upper; pfree(itemidbase); } + /* + * Zero out the now-free space. This is not essential, but it allows + * xlog.c to compress WAL data better. + */ + MemSet((char *) page + pd_lower, 0, pd_upper - pd_lower); + return (nline - nused); } @@ -525,6 +531,13 @@ PageIndexTupleDelete(Page page, OffsetNumber offnum) phdr->pd_upper += size; phdr->pd_lower -= sizeof(ItemIdData); + /* + * Zero out the just-freed space. This is not essential, but it allows + * xlog.c to compress WAL data better. + */ + MemSet((char *) page + phdr->pd_lower, 0, sizeof(ItemIdData)); + MemSet(addr, 0, size); + /* * Finally, we need to adjust the linp entries that remain. * @@ -672,8 +685,14 @@ PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems) lp->lp_off = upper; } - phdr->pd_lower = SizeOfPageHeaderData + nused * sizeof(ItemIdData); - phdr->pd_upper = upper; + phdr->pd_lower = pd_lower = SizeOfPageHeaderData + nused * sizeof(ItemIdData); + phdr->pd_upper = pd_upper = upper; + + /* + * Zero out the now-free space. This is not essential, but it allows + * xlog.c to compress WAL data better. + */ + MemSet((char *) page + pd_lower, 0, pd_upper - pd_lower); pfree(itemidbase); } diff --git a/src/backend/utils/hash/pg_crc.c b/src/backend/utils/hash/pg_crc.c index bf23242a5a..211da1aa72 100644 --- a/src/backend/utils/hash/pg_crc.c +++ b/src/backend/utils/hash/pg_crc.c @@ -1,14 +1,25 @@ /*------------------------------------------------------------------------- * * pg_crc.c - * PostgreSQL 64-bit CRC support + * PostgreSQL CRC support + * + * See Ross Williams' excellent introduction + * A PAINLESS GUIDE TO CRC ERROR DETECTION ALGORITHMS, available from + * ftp://ftp.rocksoft.com/papers/crc_v3.txt or several other net sites. + * + * We use a normal (not "reflected", in Williams' terms) CRC, using initial + * all-ones register contents and a final bit inversion. + * + * The 64-bit variant is not used as of PostgreSQL 8.1, but we retain the + * code for possible future use. + * * * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/hash/pg_crc.c,v 1.12 2004/12/31 22:01:37 pgsql Exp $ + * $PostgreSQL: pgsql/src/backend/utils/hash/pg_crc.c,v 1.13 2005/06/02 05:55:29 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -17,9 +28,96 @@ #include "utils/pg_crc.h" +/* + * This table is based on the polynomial + * x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1. + * (This is the same polynomial used in Ethernet checksums, for instance.) + */ +const uint32 pg_crc32_table[256] = { + 0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, + 0x076DC419, 0x706AF48F, 0xE963A535, 0x9E6495A3, + 0x0EDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988, + 0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91, + 0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE, + 0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7, + 0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC, + 0x14015C4F, 0x63066CD9, 0xFA0F3D63, 0x8D080DF5, + 0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172, + 0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B, + 0x35B5A8FA, 0x42B2986C, 0xDBBBC9D6, 0xACBCF940, + 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59, + 0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116, + 0x21B4F4B5, 0x56B3C423, 0xCFBA9599, 0xB8BDA50F, + 0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924, + 0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D, + 0x76DC4190, 0x01DB7106, 0x98D220BC, 0xEFD5102A, + 0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433, + 0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818, + 0x7F6A0DBB, 0x086D3D2D, 0x91646C97, 0xE6635C01, + 0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E, + 0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457, + 0x65B0D9C6, 0x12B7E950, 0x8BBEB8EA, 0xFCB9887C, + 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65, + 0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2, + 0x4ADFA541, 0x3DD895D7, 0xA4D1C46D, 0xD3D6F4FB, + 0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0, + 0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9, + 0x5005713C, 0x270241AA, 0xBE0B1010, 0xC90C2086, + 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F, + 0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, + 0x59B33D17, 0x2EB40D81, 0xB7BD5C3B, 0xC0BA6CAD, + 0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A, + 0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683, + 0xE3630B12, 0x94643B84, 0x0D6D6A3E, 0x7A6A5AA8, + 0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1, + 0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE, + 0xF762575D, 0x806567CB, 0x196C3671, 0x6E6B06E7, + 0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC, + 0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5, + 0xD6D6A3E8, 0xA1D1937E, 0x38D8C2C4, 0x4FDFF252, + 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B, + 0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60, + 0xDF60EFC3, 0xA867DF55, 0x316E8EEF, 0x4669BE79, + 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236, + 0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F, + 0xC5BA3BBE, 0xB2BD0B28, 0x2BB45A92, 0x5CB36A04, + 0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D, + 0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A, + 0x9C0906A9, 0xEB0E363F, 0x72076785, 0x05005713, + 0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38, + 0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21, + 0x86D3D2D4, 0xF1D4E242, 0x68DDB3F8, 0x1FDA836E, + 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777, + 0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C, + 0x8F659EFF, 0xF862AE69, 0x616BFFD3, 0x166CCF45, + 0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2, + 0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB, + 0xAED16A4A, 0xD9D65ADC, 0x40DF0B66, 0x37D83BF0, + 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9, + 0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, + 0xBAD03605, 0xCDD70693, 0x54DE5729, 0x23D967BF, + 0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94, + 0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D +}; + + +#ifdef PROVIDE_64BIT_CRC + +/* + * This table is based on the polynomial + * + * x^64 + x^62 + x^57 + x^55 + x^54 + x^53 + x^52 + x^47 + x^46 + x^45 + + * x^40 + x^39 + x^38 + x^37 + x^35 + x^33 + x^32 + x^31 + x^29 + x^27 + + * x^24 + x^23 + x^22 + x^21 + x^19 + x^17 + x^13 + x^12 + x^10 + x^9 + + * x^7 + x^4 + x + 1 + * + * which is borrowed from the DLT1 spec + * (ECMA-182, available from http://www.ecma.ch/ecma1/STAND/ECMA-182.HTM) + */ + #ifdef INT64_IS_BUSTED -const uint32 crc_table0[256] = { +const uint32 pg_crc64_table0[256] = { 0x00000000, 0xA9EA3693, 0x53D46D26, 0xFA3E5BB5, 0x0E42ECDF, 0xA7A8DA4C, @@ -150,7 +248,7 @@ const uint32 crc_table0[256] = { 0x676F8394, 0xCE85B507 }; -const uint32 crc_table1[256] = { +const uint32 pg_crc64_table1[256] = { 0x00000000, 0x42F0E1EB, 0x85E1C3D7, 0xC711223C, 0x49336645, 0x0BC387AE, @@ -283,7 +381,7 @@ const uint32 crc_table1[256] = { #else /* int64 works */ -const uint64 crc_table[256] = { +const uint64 pg_crc64_table[256] = { UINT64CONST(0x0000000000000000), UINT64CONST(0x42F0E1EBA9EA3693), UINT64CONST(0x85E1C3D753D46D26), UINT64CONST(0xC711223CFA3E5BB5), UINT64CONST(0x493366450E42ECDF), UINT64CONST(0x0BC387AEA7A8DA4C), @@ -415,3 +513,5 @@ const uint64 crc_table[256] = { }; #endif /* INT64_IS_BUSTED */ + +#endif /* PROVIDE_64BIT_CRC */ diff --git a/src/bin/pg_controldata/pg_controldata.c b/src/bin/pg_controldata/pg_controldata.c index d89a934dfc..77f61af06f 100644 --- a/src/bin/pg_controldata/pg_controldata.c +++ b/src/bin/pg_controldata/pg_controldata.c @@ -6,7 +6,7 @@ * copyright (c) Oliver Elphick , 2001; * licence: BSD * - * $PostgreSQL: pgsql/src/bin/pg_controldata/pg_controldata.c,v 1.23 2005/04/28 21:47:16 tgl Exp $ + * $PostgreSQL: pgsql/src/bin/pg_controldata/pg_controldata.c,v 1.24 2005/06/02 05:55:29 tgl Exp $ */ #include "postgres.h" @@ -66,7 +66,7 @@ main(int argc, char *argv[]) int fd; char ControlFilePath[MAXPGPATH]; char *DataDir; - crc64 crc; + pg_crc32 crc; char pgctime_str[128]; char ckpttime_str[128]; char sysident_str[32]; @@ -120,13 +120,13 @@ main(int argc, char *argv[]) close(fd); /* Check the CRC. */ - INIT_CRC64(crc); - COMP_CRC64(crc, - (char *) &ControlFile + sizeof(crc64), - sizeof(ControlFileData) - sizeof(crc64)); - FIN_CRC64(crc); + INIT_CRC32(crc); + COMP_CRC32(crc, + (char *) &ControlFile, + offsetof(ControlFileData, crc)); + FIN_CRC32(crc); - if (!EQ_CRC64(crc, ControlFile.crc)) + if (!EQ_CRC32(crc, ControlFile.crc)) printf(_("WARNING: Calculated CRC checksum does not match value stored in file.\n" "Either the file is corrupt, or it has a different layout than this program\n" "is expecting. The results below are untrustworthy.\n\n")); diff --git a/src/bin/pg_resetxlog/pg_resetxlog.c b/src/bin/pg_resetxlog/pg_resetxlog.c index cabc5c0012..6eceb0a354 100644 --- a/src/bin/pg_resetxlog/pg_resetxlog.c +++ b/src/bin/pg_resetxlog/pg_resetxlog.c @@ -23,7 +23,7 @@ * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/bin/pg_resetxlog/pg_resetxlog.c,v 1.32 2005/04/28 21:47:16 tgl Exp $ + * $PostgreSQL: pgsql/src/bin/pg_resetxlog/pg_resetxlog.c,v 1.33 2005/06/02 05:55:29 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -327,7 +327,7 @@ ReadControlFile(void) int fd; int len; char *buffer; - crc64 crc; + pg_crc32 crc; if ((fd = open(ControlFilePath, O_RDONLY)) < 0) { @@ -362,13 +362,13 @@ ReadControlFile(void) ((ControlFileData *) buffer)->pg_control_version == PG_CONTROL_VERSION) { /* Check the CRC. */ - INIT_CRC64(crc); - COMP_CRC64(crc, - buffer + sizeof(crc64), - sizeof(ControlFileData) - sizeof(crc64)); - FIN_CRC64(crc); + INIT_CRC32(crc); + COMP_CRC32(crc, + buffer, + offsetof(ControlFileData, crc)); + FIN_CRC32(crc); - if (EQ_CRC64(crc, ((ControlFileData *) buffer)->crc)) + if (EQ_CRC32(crc, ((ControlFileData *) buffer)->crc)) { /* Valid data... */ memcpy(&ControlFile, buffer, sizeof(ControlFile)); @@ -553,11 +553,11 @@ RewriteControlFile(void) ControlFile.prevCheckPoint.xrecoff = 0; /* Contents are protected with a CRC */ - INIT_CRC64(ControlFile.crc); - COMP_CRC64(ControlFile.crc, - (char *) &ControlFile + sizeof(crc64), - sizeof(ControlFileData) - sizeof(crc64)); - FIN_CRC64(ControlFile.crc); + INIT_CRC32(ControlFile.crc); + COMP_CRC32(ControlFile.crc, + (char *) &ControlFile, + offsetof(ControlFileData, crc)); + FIN_CRC32(ControlFile.crc); /* * We write out BLCKSZ bytes into pg_control, zero-padding the excess @@ -673,7 +673,7 @@ WriteEmptyXLOG(void) XLogPageHeader page; XLogLongPageHeader longpage; XLogRecord *record; - crc64 crc; + pg_crc32 crc; char path[MAXPGPATH]; int fd; int nbytes; @@ -700,17 +700,18 @@ WriteEmptyXLOG(void) record->xl_prev.xlogid = 0; record->xl_prev.xrecoff = 0; record->xl_xid = InvalidTransactionId; + record->xl_tot_len = SizeOfXLogRecord + sizeof(CheckPoint); record->xl_len = sizeof(CheckPoint); record->xl_info = XLOG_CHECKPOINT_SHUTDOWN; record->xl_rmid = RM_XLOG_ID; memcpy(XLogRecGetData(record), &ControlFile.checkPointCopy, sizeof(CheckPoint)); - INIT_CRC64(crc); - COMP_CRC64(crc, &ControlFile.checkPointCopy, sizeof(CheckPoint)); - COMP_CRC64(crc, (char *) record + sizeof(crc64), - SizeOfXLogRecord - sizeof(crc64)); - FIN_CRC64(crc); + INIT_CRC32(crc); + COMP_CRC32(crc, &ControlFile.checkPointCopy, sizeof(CheckPoint)); + COMP_CRC32(crc, (char *) record + sizeof(pg_crc32), + SizeOfXLogRecord - sizeof(pg_crc32)); + FIN_CRC32(crc); record->xl_crc = crc; /* Write the first page */ diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index ab47173897..1d1aa9c152 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/access/xlog.h,v 1.61 2005/05/20 14:53:26 momjian Exp $ + * $PostgreSQL: pgsql/src/include/access/xlog.h,v 1.62 2005/06/02 05:55:29 tgl Exp $ */ #ifndef XLOG_H #define XLOG_H @@ -19,23 +19,31 @@ /* - * Header for each record in XLOG + * The overall layout of an XLOG record is: + * Fixed-size header (XLogRecord struct) + * rmgr-specific data + * BkpBlock + * backup block data + * BkpBlock + * backup block data + * ... * - * NOTE: xl_len counts only the rmgr data, not the XLogRecord header, - * and also not any backup blocks appended to the record (which are signaled - * by xl_info flag bits). The total space needed for an XLOG record is - * really: - * - * SizeOfXLogRecord + xl_len + n_backup_blocks * (sizeof(BkpBlock) + BLCKSZ) + * where there can be zero to three backup blocks (as signaled by xl_info flag + * bits). XLogRecord structs always start on MAXALIGN boundaries in the WAL + * files, and we round up SizeOfXLogRecord so that the rmgr data is also + * guaranteed to begin on a MAXALIGN boundary. However, no padding is added + * to align BkpBlock structs or backup block data. * - * rounded up to a MAXALIGN boundary (so that all xlog records start on - * MAXALIGN boundaries). + * NOTE: xl_len counts only the rmgr data, not the XLogRecord header, + * and also not any backup blocks. xl_tot_len counts everything. Neither + * length field is rounded up to an alignment boundary. */ typedef struct XLogRecord { - crc64 xl_crc; /* CRC for this record */ + pg_crc32 xl_crc; /* CRC for this record */ XLogRecPtr xl_prev; /* ptr to previous record in log */ TransactionId xl_xid; /* xact id */ + uint32 xl_tot_len; /* total len of entire record */ uint32 xl_len; /* total len of rmgr data */ uint8 xl_info; /* flag bits, see below */ RmgrId xl_rmid; /* resource manager for this record */ diff --git a/src/include/access/xlog_internal.h b/src/include/access/xlog_internal.h index 75842328db..a0b0b761cc 100644 --- a/src/include/access/xlog_internal.h +++ b/src/include/access/xlog_internal.h @@ -11,7 +11,7 @@ * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/access/xlog_internal.h,v 1.6 2004/12/31 22:03:21 pgsql Exp $ + * $PostgreSQL: pgsql/src/include/access/xlog_internal.h,v 1.7 2005/06/02 05:55:29 tgl Exp $ */ #ifndef XLOG_INTERNAL_H #define XLOG_INTERNAL_H @@ -25,15 +25,25 @@ /* * Header info for a backup block appended to an XLOG record. * - * Note that the backup block has its own CRC, and is not covered by - * the CRC of the XLOG record proper. Also note that we don't attempt - * to align either the BkpBlock struct or the block's data. + * As a trivial form of data compression, the XLOG code is aware that + * PG data pages usually contain an unused "hole" in the middle, which + * contains only zero bytes. If hole_length > 0 then we have removed + * such a "hole" from the stored data (and it's not counted in the + * XLOG record's CRC, either). Hence, the amount of block data actually + * present following the BkpBlock struct is BLCKSZ - hole_length bytes. + * + * Note that we don't attempt to align either the BkpBlock struct or the + * block's data. So, the struct must be copied to aligned local storage + * before use. */ typedef struct BkpBlock { - crc64 crc; - RelFileNode node; - BlockNumber block; + RelFileNode node; /* relation containing block */ + BlockNumber block; /* block number */ + uint16 hole_offset; /* number of bytes before "hole" */ + uint16 hole_length; /* number of bytes in "hole" */ + + /* ACTUAL BLOCK DATA FOLLOWS AT END OF STRUCT */ } BkpBlock; /* @@ -42,8 +52,9 @@ typedef struct BkpBlock * XLogRecord header will never be split across pages; if there's less than * SizeOfXLogRecord space left at the end of a page, we just waste it.) * - * Note that xl_rem_len includes backup-block data, unlike xl_len in the - * initial header. + * Note that xl_rem_len includes backup-block data; that is, it tracks + * xl_tot_len not xl_len in the initial header. Also note that the + * continuation data isn't necessarily aligned. */ typedef struct XLogContRecord { @@ -53,12 +64,12 @@ typedef struct XLogContRecord } XLogContRecord; -#define SizeOfXLogContRecord MAXALIGN(sizeof(XLogContRecord)) +#define SizeOfXLogContRecord sizeof(XLogContRecord) /* * Each page of XLOG file has a header like this: */ -#define XLOG_PAGE_MAGIC 0xD05C /* can be used as WAL version indicator */ +#define XLOG_PAGE_MAGIC 0xD05D /* can be used as WAL version indicator */ typedef struct XLogPageHeaderData { diff --git a/src/include/catalog/pg_control.h b/src/include/catalog/pg_control.h index e60a879424..3f96b6bf26 100644 --- a/src/include/catalog/pg_control.h +++ b/src/include/catalog/pg_control.h @@ -8,7 +8,7 @@ * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/catalog/pg_control.h,v 1.21 2005/04/28 21:47:17 tgl Exp $ + * $PostgreSQL: pgsql/src/include/catalog/pg_control.h,v 1.22 2005/06/02 05:55:29 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -22,7 +22,7 @@ /* Version identifier for this pg_control format */ -#define PG_CONTROL_VERSION 81 +#define PG_CONTROL_VERSION 810 /* * Body of CheckPoint XLOG records. This is declared here because we keep @@ -73,12 +73,17 @@ typedef enum DBState typedef struct ControlFileData { - crc64 crc; /* CRC for remainder of struct */ + /* + * Unique system identifier --- to ensure we match up xlog files with + * the installation that produced them. + */ + uint64 system_identifier; /* - * Version identifier information. Keep these fields at the front, + * Version identifier information. Keep these fields at the same offset, * especially pg_control_version; they won't be real useful if they - * move around. + * move around. (For historical reasons they must be 8 bytes into + * the file rather than immediately at the front.) * * pg_control_version identifies the format of pg_control itself. * catalog_version_no identifies the format of the system catalogs. @@ -90,12 +95,6 @@ typedef struct ControlFileData uint32 pg_control_version; /* PG_CONTROL_VERSION */ uint32 catalog_version_no; /* see catversion.h */ - /* - * Unique system identifier --- to ensure we match up xlog files with - * the installation that produced them. - */ - uint64 system_identifier; - /* * System status data */ @@ -127,6 +126,9 @@ typedef struct ControlFileData uint32 localeBuflen; char lc_collate[LOCALE_NAME_BUFLEN]; char lc_ctype[LOCALE_NAME_BUFLEN]; + + /* CRC of all above ... MUST BE LAST! */ + pg_crc32 crc; } ControlFileData; #endif /* PG_CONTROL_H */ diff --git a/src/include/utils/pg_crc.h b/src/include/utils/pg_crc.h index 6638f75d74..5bf9ed7633 100644 --- a/src/include/utils/pg_crc.h +++ b/src/include/utils/pg_crc.h @@ -1,32 +1,65 @@ /* * pg_crc.h * - * PostgreSQL 64-bit CRC support + * PostgreSQL CRC support + * + * See Ross Williams' excellent introduction + * A PAINLESS GUIDE TO CRC ERROR DETECTION ALGORITHMS, available from + * ftp://ftp.rocksoft.com/papers/crc_v3.txt or several other net sites. + * + * We use a normal (not "reflected", in Williams' terms) CRC, using initial + * all-ones register contents and a final bit inversion. + * + * The 64-bit variant is not used as of PostgreSQL 8.1, but we retain the + * code for possible future use. + * * * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/utils/pg_crc.h,v 1.12 2004/12/31 22:03:46 pgsql Exp $ + * $PostgreSQL: pgsql/src/include/utils/pg_crc.h,v 1.13 2005/06/02 05:55:29 tgl Exp $ */ #ifndef PG_CRC_H #define PG_CRC_H + +typedef uint32 pg_crc32; + +/* Initialize a CRC accumulator */ +#define INIT_CRC32(crc) ((crc) = 0xFFFFFFFF) + +/* Finish a CRC calculation */ +#define FIN_CRC32(crc) ((crc) ^= 0xFFFFFFFF) + +/* Accumulate some (more) bytes into a CRC */ +#define COMP_CRC32(crc, data, len) \ +do { \ + unsigned char *__data = (unsigned char *) (data); \ + uint32 __len = (len); \ +\ + while (__len-- > 0) \ + { \ + int __tab_index = ((int) ((crc) >> 24) ^ *__data++) & 0xFF; \ + (crc) = pg_crc32_table[__tab_index] ^ ((crc) << 8); \ + } \ +} while (0) + +/* Check for equality of two CRCs */ +#define EQ_CRC32(c1,c2) ((c1) == (c2)) + +/* Constant table for CRC calculation */ +extern const uint32 pg_crc32_table[]; + + +#ifdef PROVIDE_64BIT_CRC + /* * If we have a 64-bit integer type, then a 64-bit CRC looks just like the - * usual sort of implementation. (See Ross Williams' excellent introduction - * A PAINLESS GUIDE TO CRC ERROR DETECTION ALGORITHMS, available from - * ftp://ftp.rocksoft.com/papers/crc_v3.txt or several other net sites.) - * If we have no working 64-bit type, then fake it with two 32-bit registers. - * - * The present implementation is a normal (not "reflected", in Williams' - * terms) 64-bit CRC, using initial all-ones register contents and a final - * bit inversion. The chosen polynomial is borrowed from the DLT1 spec - * (ECMA-182, available from http://www.ecma.ch/ecma1/STAND/ECMA-182.HTM): - * - * x^64 + x^62 + x^57 + x^55 + x^54 + x^53 + x^52 + x^47 + x^46 + x^45 + - * x^40 + x^39 + x^38 + x^37 + x^35 + x^33 + x^32 + x^31 + x^29 + x^27 + - * x^24 + x^23 + x^22 + x^21 + x^19 + x^17 + x^13 + x^12 + x^10 + x^9 + - * x^7 + x^4 + x + 1 + * usual sort of implementation. If we have no working 64-bit type, then + * fake it with two 32-bit registers. (Note: experience has shown that the + * two-32-bit-registers code is as fast as, or even much faster than, the + * 64-bit code on all but true 64-bit machines. INT64_IS_BUSTED is therefore + * probably the wrong control symbol to use to select the implementation.) */ #ifdef INT64_IS_BUSTED @@ -39,11 +72,11 @@ * all machines, we could do a configure test to decide how to order the * two fields, but it seems not worth the trouble. */ -typedef struct crc64 +typedef struct pg_crc64 { uint32 crc0; uint32 crc1; -} crc64; +} pg_crc64; /* Initialize a CRC accumulator */ #define INIT_CRC64(crc) ((crc).crc0 = 0xffffffff, (crc).crc1 = 0xffffffff) @@ -62,8 +95,8 @@ do { \ while (__len-- > 0) \ { \ int __tab_index = ((int) (__crc1 >> 24) ^ *__data++) & 0xFF; \ - __crc1 = crc_table1[__tab_index] ^ ((__crc1 << 8) | (__crc0 >> 24)); \ - __crc0 = crc_table0[__tab_index] ^ (__crc0 << 8); \ + __crc1 = pg_crc64_table1[__tab_index] ^ ((__crc1 << 8) | (__crc0 >> 24)); \ + __crc0 = pg_crc64_table0[__tab_index] ^ (__crc0 << 8); \ } \ (crc).crc0 = __crc0; \ (crc).crc1 = __crc1; \ @@ -73,15 +106,15 @@ do { \ #define EQ_CRC64(c1,c2) ((c1).crc0 == (c2).crc0 && (c1).crc1 == (c2).crc1) /* Constant table for CRC calculation */ -extern const uint32 crc_table0[]; -extern const uint32 crc_table1[]; +extern const uint32 pg_crc64_table0[]; +extern const uint32 pg_crc64_table1[]; #else /* int64 works */ -typedef struct crc64 +typedef struct pg_crc64 { uint64 crc0; -} crc64; +} pg_crc64; /* Initialize a CRC accumulator */ #define INIT_CRC64(crc) ((crc).crc0 = UINT64CONST(0xffffffffffffffff)) @@ -99,7 +132,7 @@ do { \ while (__len-- > 0) \ { \ int __tab_index = ((int) (__crc0 >> 56) ^ *__data++) & 0xFF; \ - __crc0 = crc_table[__tab_index] ^ (__crc0 << 8); \ + __crc0 = pg_crc64_table[__tab_index] ^ (__crc0 << 8); \ } \ (crc).crc0 = __crc0; \ } while (0) @@ -108,7 +141,9 @@ do { \ #define EQ_CRC64(c1,c2) ((c1).crc0 == (c2).crc0) /* Constant table for CRC calculation */ -extern const uint64 crc_table[]; +extern const uint64 pg_crc64_table[]; #endif /* INT64_IS_BUSTED */ +#endif /* PROVIDE_64BIT_CRC */ + #endif /* PG_CRC_H */