]> granicus.if.org Git - postgresql/commitdiff
Fix problems with loss of tuple commit status bits during WAL redo of
authorTom Lane <tgl@sss.pgh.pa.us>
Thu, 26 Sep 2002 22:46:29 +0000 (22:46 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Thu, 26 Sep 2002 22:46:29 +0000 (22:46 +0000)
VACUUM FULL tuple moves.  Store full-width t_infomask in WAL, rather
than storing low 8 bits and expecting to be able to reconstruct upper
bits.  While at it, remove redundant t_oid field from WAL headers
(the OID, if present, is now recorded in the data portion of the tuple).
WAL version number bumped --- this does not force an initdb, you can
instead run pg_resetxlog after a clean shutdown of the old postmaster.

src/backend/access/heap/heapam.c
src/include/access/htup.h
src/include/access/xlog.h

index d7df449a637b8d4a370dfafdee4b4a26d563965b..e5e509f92ca8d6adb4d7d53829603dfdc7f02ba0 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/access/heap/heapam.c,v 1.148 2002/09/04 20:31:09 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/access/heap/heapam.c,v 1.149 2002/09/26 22:46:29 tgl Exp $
  *
  *
  * INTERFACE ROUTINES
@@ -1185,10 +1185,14 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid)
                rdata[0].len = SizeOfHeapInsert;
                rdata[0].next = &(rdata[1]);
 
-               xlhdr.t_oid = HeapTupleGetOid(tup);
                xlhdr.t_natts = tup->t_data->t_natts;
+               xlhdr.t_infomask = tup->t_data->t_infomask;
                xlhdr.t_hoff = tup->t_data->t_hoff;
-               xlhdr.mask = tup->t_data->t_infomask;
+               /*
+                * note we mark rdata[1] as belonging to buffer; if XLogInsert
+                * decides to write the whole page to the xlog, we don't need to
+                * store xl_heap_header in the xlog.
+                */
                rdata[1].buffer = buffer;
                rdata[1].data = (char *) &xlhdr;
                rdata[1].len = SizeOfHeapHeader;
@@ -1200,7 +1204,11 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid)
                rdata[2].len = tup->t_len - offsetof(HeapTupleHeaderData, t_bits);
                rdata[2].next = NULL;
 
-               /* If this is the single and first tuple on page... */
+               /*
+                * If this is the single and first tuple on page, we can reinit the
+                * page instead of restoring the whole thing.  Set flag, and hide
+                * buffer references from XLogInsert.
+                */
                if (ItemPointerGetOffsetNumber(&(tup->t_self)) == FirstOffsetNumber &&
                        PageGetMaxOffsetNumber(page) == FirstOffsetNumber)
                {
@@ -2041,11 +2049,10 @@ log_heap_update(Relation reln, Buffer oldbuf, ItemPointerData from,
        rdata[1].len = 0;
        rdata[1].next = &(rdata[2]);
 
-       xlhdr.hdr.t_oid = HeapTupleGetOid(newtup);
        xlhdr.hdr.t_natts = newtup->t_data->t_natts;
+       xlhdr.hdr.t_infomask = newtup->t_data->t_infomask;
        xlhdr.hdr.t_hoff = newtup->t_data->t_hoff;
-       xlhdr.hdr.mask = newtup->t_data->t_infomask;
-       if (move)                                       /* remember xmin & xmax */
+       if (move)                                       /* remember xmax & xmin */
        {
                TransactionId xid[2];   /* xmax, xmin */
 
@@ -2060,6 +2067,10 @@ log_heap_update(Relation reln, Buffer oldbuf, ItemPointerData from,
                           2 * sizeof(TransactionId));
                hsize += 2 * sizeof(TransactionId);
        }
+       /*
+        * As with insert records, we need not store the rdata[2] segment
+        * if we decide to store the whole buffer instead.
+        */
        rdata[2].buffer = newbuf;
        rdata[2].data = (char *) &xlhdr;
        rdata[2].len = hsize;
@@ -2276,18 +2287,16 @@ heap_xlog_insert(bool redo, XLogRecPtr lsn, XLogRecord *record)
                htup = &tbuf.hdr;
                MemSet((char *) htup, 0, sizeof(HeapTupleHeaderData));
                /* PG73FORMAT: get bitmap [+ padding] [+ oid] + data */
-               memcpy((char *) &tbuf + offsetof(HeapTupleHeaderData, t_bits),
+               memcpy((char *) htup + offsetof(HeapTupleHeaderData, t_bits),
                           (char *) xlrec + SizeOfHeapInsert + SizeOfHeapHeader,
                           newlen);
                newlen += offsetof(HeapTupleHeaderData, t_bits);
                htup->t_natts = xlhdr.t_natts;
+               htup->t_infomask = xlhdr.t_infomask;
                htup->t_hoff = xlhdr.t_hoff;
-               htup->t_infomask = HEAP_XMAX_INVALID | xlhdr.mask;
                HeapTupleHeaderSetXmin(htup, record->xl_xid);
                HeapTupleHeaderSetCmin(htup, FirstCommandId);
                htup->t_ctid = xlrec->target.tid;
-               if (reln->rd_rel->relhasoids)
-                       HeapTupleHeaderSetOid(htup, xlhdr.t_oid);
 
                offnum = PageAddItem(page, (Item) htup, newlen, offnum,
                                                         LP_USED | OverwritePageMode);
@@ -2454,34 +2463,27 @@ newsame:;
                htup = &tbuf.hdr;
                MemSet((char *) htup, 0, sizeof(HeapTupleHeaderData));
                /* PG73FORMAT: get bitmap [+ padding] [+ oid] + data */
-               memcpy((char *) &tbuf + offsetof(HeapTupleHeaderData, t_bits),
+               memcpy((char *) htup + offsetof(HeapTupleHeaderData, t_bits),
                           (char *) xlrec + hsize,
                           newlen);
                newlen += offsetof(HeapTupleHeaderData, t_bits);
                htup->t_natts = xlhdr.t_natts;
+               htup->t_infomask = xlhdr.t_infomask;
                htup->t_hoff = xlhdr.t_hoff;
-               if (reln->rd_rel->relhasoids)
-                       HeapTupleHeaderSetOid(htup, xlhdr.t_oid);
 
                if (move)
                {
                        TransactionId xid[2];           /* xmax, xmin */
 
-                       hsize = SizeOfHeapUpdate + SizeOfHeapHeader;
                        memcpy((char *) xid,
-                                  (char *) xlrec + hsize, 2 * sizeof(TransactionId));
-                       htup->t_infomask = xlhdr.mask;
-                       htup->t_infomask &= ~(HEAP_XMIN_COMMITTED |
-                                                                 HEAP_XMIN_INVALID |
-                                                                 HEAP_MOVED_OFF);
-                       htup->t_infomask |= HEAP_MOVED_IN;
+                                  (char *) xlrec + SizeOfHeapUpdate + SizeOfHeapHeader,
+                                  2 * sizeof(TransactionId));
                        HeapTupleHeaderSetXmin(htup, xid[1]);
                        HeapTupleHeaderSetXmax(htup, xid[0]);
                        HeapTupleHeaderSetXvac(htup, record->xl_xid);
                }
                else
                {
-                       htup->t_infomask = HEAP_XMAX_INVALID | xlhdr.mask;
                        HeapTupleHeaderSetXmin(htup, record->xl_xid);
                        HeapTupleHeaderSetCmin(htup, FirstCommandId);
                }
index 6da127e4d4b028de2d3d8eadf8f66e756d88d348..f340c8143ac73e7944cd6f8123216d11fa782515 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: htup.h,v 1.60 2002/09/04 20:31:37 momjian Exp $
+ * $Id: htup.h,v 1.61 2002/09/26 22:46:29 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -261,6 +261,8 @@ do { \
 
 
 /*
+ * WAL record definitions for heapam.c's WAL operations
+ *
  * XLOG allows to store some information in high 4 bits of log
  * record xl_info field
  */
@@ -300,15 +302,22 @@ typedef struct xl_heap_delete
 
 #define SizeOfHeapDelete       (offsetof(xl_heap_delete, target) + SizeOfHeapTid)
 
+/*
+ * We don't store the whole fixed part (HeapTupleHeaderData) of an inserted
+ * or updated tuple in WAL; we can save a few bytes by reconstructing the
+ * fields that are available elsewhere in the WAL record, or perhaps just
+ * plain needn't be reconstructed.  These are the fields we must store.
+ * NOTE: t_hoff could be recomputed, but we may as well store it because
+ * it will come for free due to alignment considerations.
+ */
 typedef struct xl_heap_header
 {
-       Oid                     t_oid;
        int16           t_natts;
+       uint16          t_infomask;
        uint8           t_hoff;
-       uint8           mask;                   /* low 8 bits of t_infomask */
 } xl_heap_header;
 
-#define SizeOfHeapHeader       (offsetof(xl_heap_header, mask) + sizeof(uint8))
+#define SizeOfHeapHeader       (offsetof(xl_heap_header, t_hoff) + sizeof(uint8))
 
 /* This is what we need to know about insert */
 typedef struct xl_heap_insert
@@ -340,6 +349,8 @@ typedef struct xl_heap_clean
 
 #define SizeOfHeapClean (offsetof(xl_heap_clean, block) + sizeof(BlockNumber))
 
+
+
 /*
  * MaxTupleSize is the maximum allowed size of a tuple, including header and
  * MAXALIGN alignment padding. Basically it's BLCKSZ minus the other stuff
index bf2ce611f7f4fd18f2a9ceeadc44431acad3645f..c8c49936803e1fc1318fc1991a587703d333ed22 100644 (file)
@@ -6,7 +6,7 @@
  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: xlog.h,v 1.37 2002/09/04 20:31:37 momjian Exp $
+ * $Id: xlog.h,v 1.38 2002/09/26 22:46:29 tgl Exp $
  */
 #ifndef XLOG_H
 #define XLOG_H
@@ -110,7 +110,7 @@ typedef struct XLogContRecord
 /*
  * Each page of XLOG file has a header like this:
  */
-#define XLOG_PAGE_MAGIC 0xD059 /* can be used as WAL version indicator */
+#define XLOG_PAGE_MAGIC 0xD05A /* can be used as WAL version indicator */
 
 typedef struct XLogPageHeaderData
 {