]> granicus.if.org Git - postgresql/commitdiff
Restructure local-buffer handling per recent pghackers discussion.
authorTom Lane <tgl@sss.pgh.pa.us>
Tue, 6 Aug 2002 02:36:35 +0000 (02:36 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Tue, 6 Aug 2002 02:36:35 +0000 (02:36 +0000)
The local buffer manager is no longer used for newly-created relations
(unless they are TEMP); a new non-TEMP relation goes through the shared
bufmgr and thus will participate normally in checkpoints.  But TEMP relations
use the local buffer manager throughout their lifespan.  Also, operations
in TEMP relations are not logged in WAL, thus improving performance.
Since it's no longer necessary to fsync relations as they move out of the
local buffers into shared buffers, quite a lot of smgr.c/md.c/fd.c code
is no longer needed and has been removed: there's no concept of a dirty
relation anymore in md.c/fd.c, and we never fsync anything but WAL.
Still TODO: improve local buffer management algorithms so that it would
be reasonable to increase NLocBuffer.

28 files changed:
src/backend/access/heap/heapam.c
src/backend/access/heap/hio.c
src/backend/access/heap/tuptoaster.c
src/backend/access/nbtree/nbtinsert.c
src/backend/access/nbtree/nbtpage.c
src/backend/access/transam/xact.c
src/backend/access/transam/xlog.c
src/backend/catalog/heap.c
src/backend/catalog/indexing.c
src/backend/commands/sequence.c
src/backend/commands/vacuum.c
src/backend/commands/vacuumlazy.c
src/backend/executor/execUtils.c
src/backend/storage/buffer/buf_init.c
src/backend/storage/buffer/bufmgr.c
src/backend/storage/buffer/localbuf.c
src/backend/storage/file/fd.c
src/backend/storage/smgr/md.c
src/backend/storage/smgr/mm.c
src/backend/storage/smgr/smgr.c
src/backend/utils/cache/relcache.c
src/include/access/xlog.h
src/include/storage/buf_internals.h
src/include/storage/bufmgr.h
src/include/storage/fd.h
src/include/storage/smgr.h
src/include/utils/rel.h
src/include/utils/relcache.h

index 1abb938fdf8d6be76a31e15bbfa47ae8dea07b31..e9f69476283118541461e860696caf3a697c974f 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/access/heap/heapam.c,v 1.143 2002/07/30 16:08:33 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/access/heap/heapam.c,v 1.144 2002/08/06 02:36:33 tgl Exp $
  *
  *
  * INTERFACE ROUTINES
@@ -1155,6 +1155,7 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid)
        pgstat_count_heap_insert(&relation->pgstat_info);
 
        /* XLOG stuff */
+       if (!relation->rd_istemp)
        {
                xl_heap_insert xlrec;
                xl_heap_header xlhdr;
@@ -1204,6 +1205,12 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid)
                PageSetLSN(page, recptr);
                PageSetSUI(page, ThisStartUpID);
        }
+       else
+       {
+               /* No XLOG record, but still need to flag that XID exists on disk */
+               MyXactMadeTempRelUpdate = true;
+       }
+
        END_CRIT_SECTION();
 
        LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
@@ -1323,12 +1330,15 @@ l1:
        }
 
        START_CRIT_SECTION();
+
        /* store transaction information of xact deleting the tuple */
        tp.t_data->t_infomask &= ~(HEAP_XMAX_COMMITTED |
                                                         HEAP_XMAX_INVALID | HEAP_MARKED_FOR_UPDATE);
        HeapTupleHeaderSetXmax(tp.t_data, GetCurrentTransactionId());
        HeapTupleHeaderSetCmax(tp.t_data, cid);
+
        /* XLOG stuff */
+       if (!relation->rd_istemp)
        {
                xl_heap_delete xlrec;
                XLogRecPtr      recptr;
@@ -1351,12 +1361,17 @@ l1:
                PageSetLSN(dp, recptr);
                PageSetSUI(dp, ThisStartUpID);
        }
+       else
+       {
+               /* No XLOG record, but still need to flag that XID exists on disk */
+               MyXactMadeTempRelUpdate = true;
+       }
+
        END_CRIT_SECTION();
 
        LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
 
 #ifdef TUPLE_TOASTER_ACTIVE
-
        /*
         * If the relation has toastable attributes, we need to delete no
         * longer needed items there too.  We have to do this before
@@ -1659,6 +1674,7 @@ l2:
        oldtup.t_data->t_ctid = newtup->t_self;
 
        /* XLOG stuff */
+       if (!relation->rd_istemp)
        {
                XLogRecPtr      recptr = log_heap_update(relation, buffer, oldtup.t_self,
                                                                                         newbuf, newtup, false);
@@ -1671,6 +1687,11 @@ l2:
                PageSetLSN(BufferGetPage(buffer), recptr);
                PageSetSUI(BufferGetPage(buffer), ThisStartUpID);
        }
+       else
+       {
+               /* No XLOG record, but still need to flag that XID exists on disk */
+               MyXactMadeTempRelUpdate = true;
+       }
 
        END_CRIT_SECTION();
 
@@ -1927,6 +1948,9 @@ log_heap_clean(Relation reln, Buffer buffer, char *unused, int unlen)
        XLogRecPtr      recptr;
        XLogRecData rdata[3];
 
+       /* Caller should not call me on a temp relation */
+       Assert(!reln->rd_istemp);
+
        xlrec.node = reln->rd_node;
        xlrec.block = BufferGetBlockNumber(buffer);
        rdata[0].buffer = InvalidBuffer;
@@ -1978,6 +2002,9 @@ log_heap_update(Relation reln, Buffer oldbuf, ItemPointerData from,
        Page            page = BufferGetPage(newbuf);
        uint8           info = (move) ? XLOG_HEAP_MOVE : XLOG_HEAP_UPDATE;
 
+       /* Caller should not call me on a temp relation */
+       Assert(!reln->rd_istemp);
+
        xlrec.target.node = reln->rd_node;
        xlrec.target.tid = from;
        xlrec.newtid = newtup->t_self;
@@ -2012,7 +2039,8 @@ log_heap_update(Relation reln, Buffer oldbuf, ItemPointerData from,
                        xid[0] = HeapTupleHeaderGetXmax(newtup->t_data);
                xid[1] = HeapTupleHeaderGetXmin(newtup->t_data);
                memcpy((char *) &xlhdr + hsize,
-                      (char *) xid,            2 * sizeof(TransactionId));
+                      (char *) xid,
+                          2 * sizeof(TransactionId));
                hsize += 2 * sizeof(TransactionId);
        }
        rdata[2].buffer = newbuf;
index 602ad748d9be9b2a8493f00794e759430daf80a9..67eb4ad7e244ba2055033ee7f1aae10c6845b93d 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $Id: hio.c,v 1.45 2002/06/20 20:29:25 momjian Exp $
+ *       $Id: hio.c,v 1.46 2002/08/06 02:36:33 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -102,6 +102,7 @@ RelationGetBufferForTuple(Relation relation, Size len,
        Size            pageFreeSpace;
        BlockNumber targetBlock,
                                otherBlock;
+       bool            needLock;
 
        len = MAXALIGN(len);            /* be conservative */
 
@@ -231,9 +232,12 @@ RelationGetBufferForTuple(Relation relation, Size len,
         *
         * We have to use a lock to ensure no one else is extending the rel at
         * the same time, else we will both try to initialize the same new
-        * page.
+        * page.  We can skip locking for new or temp relations, however,
+        * since no one else could be accessing them.
         */
-       if (!relation->rd_myxactonly)
+       needLock = !(relation->rd_isnew || relation->rd_istemp);
+
+       if (needLock)
                LockPage(relation, 0, ExclusiveLock);
 
        /*
@@ -249,7 +253,7 @@ RelationGetBufferForTuple(Relation relation, Size len,
         * Release the file-extension lock; it's now OK for someone else to
         * extend the relation some more.
         */
-       if (!relation->rd_myxactonly)
+       if (needLock)
                UnlockPage(relation, 0, ExclusiveLock);
 
        /*
index 2945cf3458cb567ba7dedb522bb0ccf41c9c8bcd..1c09af2b30840eed8df8c1dea10ed862b1cb859d 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/access/heap/tuptoaster.c,v 1.33 2002/07/20 05:16:56 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/access/heap/tuptoaster.c,v 1.34 2002/08/06 02:36:33 tgl Exp $
  *
  *
  * INTERFACE ROUTINES
@@ -915,7 +915,7 @@ toast_save_datum(Relation rel, Datum value)
                 */
                idxres = index_insert(toastidx, t_values, t_nulls,
                                                          &(toasttup->t_self),
-                                                         toastrel, toastidx->rd_uniqueindex);
+                                                         toastrel, toastidx->rd_index->indisunique);
                if (idxres == NULL)
                        elog(ERROR, "Failed to insert index entry for TOAST tuple");
 
index c0190859b7bfef787023bc09dc4ee6ff443fcd45..16d63e03c99e517e415dc989d84afefe82717b50 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.94 2002/07/02 05:48:44 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.95 2002/08/06 02:36:33 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -623,8 +623,11 @@ _bt_insertuple(Relation rel, Buffer buf,
        BTPageOpaque pageop = (BTPageOpaque) PageGetSpecialPointer(page);
 
        START_CRIT_SECTION();
+
        _bt_pgaddtup(rel, page, itemsz, btitem, newitemoff, "page");
+
        /* XLOG stuff */
+       if (!rel->rd_istemp)
        {
                xl_btree_insert xlrec;
                uint8           flag = XLOG_BTREE_INSERT;
@@ -866,6 +869,9 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
         * NO ELOG(ERROR) till right sibling is updated.
         */
        START_CRIT_SECTION();
+
+       /* XLOG stuff */
+       if (!rel->rd_istemp)
        {
                xl_btree_split xlrec;
                int                     flag = (newitemonleft) ?
@@ -891,7 +897,7 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
                BlockIdSet(&(xlrec.rightblk), ropaque->btpo_next);
 
                /*
-                * Dirrect access to page is not good but faster - we should
+                * Direct access to page is not good but faster - we should
                 * implement some new func in page API.
                 */
                xlrec.leftlen = ((PageHeader) leftpage)->pd_special -
@@ -1352,6 +1358,7 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf)
        (metad->btm_level)++;
 
        /* XLOG stuff */
+       if (!rel->rd_istemp)
        {
                xl_btree_newroot xlrec;
                XLogRecPtr      recptr;
@@ -1366,7 +1373,7 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf)
                rdata[0].next = &(rdata[1]);
 
                /*
-                * Dirrect access to page is not good but faster - we should
+                * Direct access to page is not good but faster - we should
                 * implement some new func in page API.
                 */
                rdata[1].buffer = InvalidBuffer;
@@ -1388,6 +1395,7 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf)
                PageSetLSN(rpage, recptr);
                PageSetSUI(rpage, ThisStartUpID);
        }
+
        END_CRIT_SECTION();
 
        /* write and let go of metapage buffer */
index 386cb6a07a55e2600ff9d25d262d053f0603b12e..110de69406669b2c9e6a8e6350b5acbb55144172 100644 (file)
@@ -9,7 +9,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtpage.c,v 1.57 2002/06/20 20:29:25 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtpage.c,v 1.58 2002/08/06 02:36:33 tgl Exp $
  *
  *     NOTES
  *        Postgres btree pages look like ordinary relation pages.      The opaque
@@ -173,6 +173,7 @@ _bt_getroot(Relation rel, int access)
                        rootopaque->btpo_flags |= (BTP_LEAF | BTP_ROOT);
 
                        /* XLOG stuff */
+                       if (!rel->rd_istemp)
                        {
                                xl_btree_newroot xlrec;
                                XLogRecPtr      recptr;
@@ -187,7 +188,8 @@ _bt_getroot(Relation rel, int access)
                                rdata.next = NULL;
 
                                recptr = XLogInsert(RM_BTREE_ID,
-                                                  XLOG_BTREE_NEWROOT | XLOG_BTREE_LEAF, &rdata);
+                                                                       XLOG_BTREE_NEWROOT | XLOG_BTREE_LEAF,
+                                                                       &rdata);
 
                                PageSetLSN(rootpage, recptr);
                                PageSetSUI(rootpage, ThisStartUpID);
@@ -457,6 +459,7 @@ _bt_itemdel(Relation rel, Buffer buf, ItemPointer tid)
        PageIndexTupleDelete(page, offno);
 
        /* XLOG stuff */
+       if (!rel->rd_istemp)
        {
                xl_btree_delete xlrec;
                XLogRecPtr      recptr;
index 3a992f6ccfe44b9169cc879baaf9d9f32a285026..c9b60daef563cc764a6ef6429ad3d6fb92fd9325 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/access/transam/xact.c,v 1.129 2002/08/02 22:36:05 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/access/transam/xact.c,v 1.130 2002/08/06 02:36:33 tgl Exp $
  *
  * NOTES
  *             Transaction aborts can now occur two ways:
@@ -505,44 +505,32 @@ AtStart_Memory(void)
  * ----------------------------------------------------------------
  */
 
-/* --------------------------------
+/*
  *             RecordTransactionCommit
- *
- *             Note: the two calls to BufferManagerFlush() exist to ensure
- *                       that data pages are written before log pages.  These
- *                       explicit calls should be replaced by a more efficient
- *                       ordered page write scheme in the buffer manager
- *                       -cim 3/18/90
- * --------------------------------
  */
 void
 RecordTransactionCommit(void)
 {
-       TransactionId xid;
-       bool            leak;
-
-       leak = BufferPoolCheckLeak();
-
-       xid = GetCurrentTransactionId();
-
        /*
-        * We only need to log the commit in xlog and clog if the transaction made
-        * any transaction-controlled XLOG entries.  (Otherwise, its XID appears
-        * nowhere in permanent storage, so no one will ever care if it
-        * committed.)  However, we must flush XLOG to disk if we made any XLOG
-        * entries, whether in or out of transaction control.  For example, if we
-        * reported a nextval() result to the client, this ensures that any XLOG
-        * record generated by nextval will hit the disk before we report the
-        * transaction committed.
+        * If we made neither any XLOG entries nor any temp-rel updates,
+        * we can omit recording the transaction commit at all.
         */
-       if (MyXactMadeXLogEntry)
+       if (MyXactMadeXLogEntry || MyXactMadeTempRelUpdate)
        {
+               TransactionId xid = GetCurrentTransactionId();
                XLogRecPtr      recptr;
 
+               /* Tell bufmgr and smgr to prepare for commit */
                BufmgrCommit();
 
                START_CRIT_SECTION();
 
+               /*
+                * We only need to log the commit in xlog if the transaction made any
+                * transaction-controlled XLOG entries.  (Otherwise, its XID appears
+                * nowhere in permanent storage, so no one else will ever care if it
+                * committed.)
+                */
                if (MyLastRecPtr.xrecoff != 0)
                {
                        /* Need to emit a commit record */
@@ -567,30 +555,48 @@ RecordTransactionCommit(void)
                }
 
                /*
-                * Sleep before flush! So we can flush more than one commit
-                * records per single fsync.  (The idea is some other backend may
-                * do the XLogFlush while we're sleeping.  This needs work still,
-                * because on most Unixen, the minimum select() delay is 10msec or
-                * more, which is way too long.)
-                *
-                * We do not sleep if enableFsync is not turned on, nor if there are
-                * fewer than CommitSiblings other backends with active
-                * transactions.
+                * We must flush our XLOG entries to disk if we made any XLOG entries,
+                * whether in or out of transaction control.  For example, if we
+                * reported a nextval() result to the client, this ensures that any
+                * XLOG record generated by nextval will hit the disk before we report
+                * the transaction committed.
                 */
-               if (CommitDelay > 0 && enableFsync &&
-                       CountActiveBackends() >= CommitSiblings)
+               if (MyXactMadeXLogEntry)
                {
-                       struct timeval delay;
+                       /*
+                        * Sleep before flush! So we can flush more than one commit
+                        * records per single fsync.  (The idea is some other backend may
+                        * do the XLogFlush while we're sleeping.  This needs work still,
+                        * because on most Unixen, the minimum select() delay is 10msec or
+                        * more, which is way too long.)
+                        *
+                        * We do not sleep if enableFsync is not turned on, nor if there
+                        * are fewer than CommitSiblings other backends with active
+                        * transactions.
+                        */
+                       if (CommitDelay > 0 && enableFsync &&
+                               CountActiveBackends() >= CommitSiblings)
+                       {
+                               struct timeval delay;
 
-                       delay.tv_sec = 0;
-                       delay.tv_usec = CommitDelay;
-                       (void) select(0, NULL, NULL, NULL, &delay);
-               }
+                               delay.tv_sec = 0;
+                               delay.tv_usec = CommitDelay;
+                               (void) select(0, NULL, NULL, NULL, &delay);
+                       }
 
-               XLogFlush(recptr);
+                       XLogFlush(recptr);
+               }
 
-               /* Mark the transaction committed in clog, if needed */
-               if (MyLastRecPtr.xrecoff != 0)
+               /*
+                * We must mark the transaction committed in clog if its XID appears
+                * either in permanent rels or in local temporary rels.  We test
+                * this by seeing if we made transaction-controlled entries *OR*
+                * local-rel tuple updates.  Note that if we made only the latter,
+                * we have not emitted an XLOG record for our commit, and so in the
+                * event of a crash the clog update might be lost.  This is okay
+                * because no one else will ever care whether we committed.
+                */
+               if (MyLastRecPtr.xrecoff != 0 || MyXactMadeTempRelUpdate)
                        TransactionIdCommit(xid);
 
                END_CRIT_SECTION();
@@ -599,12 +605,10 @@ RecordTransactionCommit(void)
        /* Break the chain of back-links in the XLOG records I output */
        MyLastRecPtr.xrecoff = 0;
        MyXactMadeXLogEntry = false;
+       MyXactMadeTempRelUpdate = false;
 
        /* Show myself as out of the transaction in PGPROC array */
        MyProc->logRec.xrecoff = 0;
-
-       if (leak)
-               ResetBufferPool(true);
 }
 
 
@@ -615,8 +619,10 @@ RecordTransactionCommit(void)
 static void
 AtCommit_Cache(void)
 {
-       /* Check for relcache reference-count leaks */
-       AtEOXactRelationCache(true);
+       /*
+        * Clean up the relation cache.
+        */
+       AtEOXact_RelationCache(true);
        /*
         * Make catalog changes visible to all backends.
         */
@@ -679,45 +685,60 @@ AtCommit_Memory(void)
  * ----------------------------------------------------------------
  */
 
-/* --------------------------------
+/*
  *             RecordTransactionAbort
- * --------------------------------
  */
 static void
 RecordTransactionAbort(void)
 {
-       TransactionId xid = GetCurrentTransactionId();
-
        /*
-        * We only need to log the abort in xlog and clog if the transaction made
-        * any transaction-controlled XLOG entries.  (Otherwise, its XID appears
-        * nowhere in permanent storage, so no one will ever care if it
-        * committed.)  We do not flush XLOG to disk in any case, since the
-        * default assumption after a crash would be that we aborted, anyway.
-        *
-        * Extra check here is to catch case that we aborted partway through
-        * RecordTransactionCommit ...
+        * If we made neither any transaction-controlled XLOG entries nor any
+        * temp-rel updates, we can omit recording the transaction abort at all.
+        * No one will ever care that it aborted.
         */
-       if (MyLastRecPtr.xrecoff != 0 && !TransactionIdDidCommit(xid))
+       if (MyLastRecPtr.xrecoff != 0 || MyXactMadeTempRelUpdate)
        {
-               XLogRecData rdata;
-               xl_xact_abort xlrec;
-               XLogRecPtr      recptr;
+               TransactionId xid = GetCurrentTransactionId();
 
-               xlrec.xtime = time(NULL);
-               rdata.buffer = InvalidBuffer;
-               rdata.data = (char *) (&xlrec);
-               rdata.len = SizeOfXactAbort;
-               rdata.next = NULL;
+               /*
+                * Catch the scenario where we aborted partway through
+                * RecordTransactionCommit ...
+                */
+               if (TransactionIdDidCommit(xid))
+                       elog(PANIC, "RecordTransactionAbort: xact %u already committed",
+                                xid);
 
                START_CRIT_SECTION();
 
                /*
-                * SHOULD SAVE ARRAY OF RELFILENODE-s TO DROP
+                * We only need to log the abort in XLOG if the transaction made any
+                * transaction-controlled XLOG entries.  (Otherwise, its XID appears
+                * nowhere in permanent storage, so no one else will ever care if it
+                * committed.)  We do not flush XLOG to disk in any case, since the
+                * default assumption after a crash would be that we aborted, anyway.
                 */
-               recptr = XLogInsert(RM_XACT_ID, XLOG_XACT_ABORT, &rdata);
+               if (MyLastRecPtr.xrecoff != 0)
+               {
+                       XLogRecData rdata;
+                       xl_xact_abort xlrec;
+                       XLogRecPtr      recptr;
+
+                       xlrec.xtime = time(NULL);
+                       rdata.buffer = InvalidBuffer;
+                       rdata.data = (char *) (&xlrec);
+                       rdata.len = SizeOfXactAbort;
+                       rdata.next = NULL;
+
+                       /*
+                        * SHOULD SAVE ARRAY OF RELFILENODE-s TO DROP
+                        */
+                       recptr = XLogInsert(RM_XACT_ID, XLOG_XACT_ABORT, &rdata);
+               }
 
-               /* Mark the transaction aborted in clog */
+               /*
+                * Mark the transaction aborted in clog.  This is not absolutely
+                * necessary but we may as well do it while we are here.
+                */
                TransactionIdAbort(xid);
 
                END_CRIT_SECTION();
@@ -726,14 +747,10 @@ RecordTransactionAbort(void)
        /* Break the chain of back-links in the XLOG records I output */
        MyLastRecPtr.xrecoff = 0;
        MyXactMadeXLogEntry = false;
+       MyXactMadeTempRelUpdate = false;
 
        /* Show myself as out of the transaction in PGPROC array */
        MyProc->logRec.xrecoff = 0;
-
-       /*
-        * Tell bufmgr and smgr to release resources.
-        */
-       ResetBufferPool(false);         /* false -> is abort */
 }
 
 /* --------------------------------
@@ -743,7 +760,7 @@ RecordTransactionAbort(void)
 static void
 AtAbort_Cache(void)
 {
-       AtEOXactRelationCache(false);
+       AtEOXact_RelationCache(false);
        AtEOXactInvalidationMessages(false);
 }
 
@@ -975,7 +992,6 @@ CommitTransaction(void)
         * noncritical resource releasing.
         */
 
-       RelationPurgeLocalRelation(true);
        smgrDoPendingDeletes(true);
 
        AtEOXact_GUC(true);
@@ -989,6 +1005,8 @@ CommitTransaction(void)
        AtCommit_Locks();
        AtEOXact_CatCache(true);
        AtCommit_Memory();
+       AtEOXact_Buffers(true);
+       smgrabort();
        AtEOXact_Files();
 
        /* Count transaction commit in statistics collector */
@@ -1076,7 +1094,6 @@ AbortTransaction(void)
                LWLockRelease(SInvalLock);
        }
 
-       RelationPurgeLocalRelation(false);
        smgrDoPendingDeletes(false);
 
        AtEOXact_GUC(false);
@@ -1089,6 +1106,7 @@ AbortTransaction(void)
        AtAbort_Cache();
        AtEOXact_CatCache(false);
        AtAbort_Memory();
+       AtEOXact_Buffers(false);
        AtEOXact_Files();
        AtAbort_Locks();
 
index 872722b856c7009f950dc95d69c0232238feac74..fbe61e5691c47a231bf35476c368ca8beb9437ce 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Header: /cvsroot/pgsql/src/backend/access/transam/xlog.c,v 1.100 2002/08/05 01:24:13 thomas Exp $
+ * $Header: /cvsroot/pgsql/src/backend/access/transam/xlog.c,v 1.101 2002/08/06 02:36:33 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -136,11 +136,20 @@ bool              InRecovery = false;
  * to be set true.  The latter can be used to test whether the current xact
  * made any loggable changes (including out-of-xact changes, such as
  * sequence updates).
+ *
+ * When we insert/update/delete a tuple in a temporary relation, we do not
+ * make any XLOG record, since we don't care about recovering the state of
+ * the temp rel after a crash.  However, we will still need to remember
+ * whether our transaction committed or aborted in that case.  So, we must
+ * set MyXactMadeTempRelUpdate true to indicate that the XID will be of
+ * interest later.
  */
 XLogRecPtr     MyLastRecPtr = {0, 0};
 
 bool           MyXactMadeXLogEntry = false;
 
+bool           MyXactMadeTempRelUpdate = false;
+
 /*
  * ProcLastRecPtr points to the start of the last XLOG record inserted by the
  * current backend.  It is updated for all inserts, transaction-controlled
@@ -2923,6 +2932,7 @@ ShutdownXLOG(void)
        /* suppress in-transaction check in CreateCheckPoint */
        MyLastRecPtr.xrecoff = 0;
        MyXactMadeXLogEntry = false;
+       MyXactMadeTempRelUpdate = false;
 
        CritSectionCount++;
        CreateDummyCaches();
@@ -3084,12 +3094,10 @@ CreateCheckPoint(bool shutdown)
 
        /*
         * Having constructed the checkpoint record, ensure all shmem disk
-        * buffers are flushed to disk.
+        * buffers and commit-log buffers are flushed to disk.
         */
-       FlushBufferPool();
-
-       /* And commit-log buffers, too */
        CheckPointCLOG();
+       FlushBufferPool();
 
        /*
         * Now insert the checkpoint record into XLOG.
index b61ad73212721e203786d1620540a810f1fab7b8..6bf905c6ea23e19304068d2ea6ccdd37acee6fee 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/catalog/heap.c,v 1.218 2002/08/05 03:29:16 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/catalog/heap.c,v 1.219 2002/08/06 02:36:33 tgl Exp $
  *
  *
  * INTERFACE ROUTINES
@@ -1919,7 +1919,7 @@ heap_truncate(Oid rid)
         * a rel created in the current xact (which would be deleted on abort,
         * anyway).
         */
-       if (IsTransactionBlock() && !rel->rd_myxactonly)
+       if (IsTransactionBlock() && !rel->rd_isnew)
                elog(ERROR, "TRUNCATE TABLE cannot run inside a transaction block");
 
        /*
index 7f558b4d9deeda50df687fa1ab1d24d1cfed8433..4206c33edb3cd5418562a98bcd2e6c78a7091b31 100644 (file)
@@ -9,7 +9,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/catalog/indexing.c,v 1.100 2002/08/05 03:29:16 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/catalog/indexing.c,v 1.101 2002/08/06 02:36:34 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -121,7 +121,7 @@ CatalogIndexInsert(CatalogIndexState indstate, HeapTuple heapTuple)
                                                          nullv,        /* info on nulls */
                                                          &(heapTuple->t_self),         /* tid of heap tuple */
                                                          heapRelation,
-                                                         relationDescs[i]->rd_uniqueindex);
+                                                         relationDescs[i]->rd_index->indisunique);
 
                if (result)
                        pfree(result);
index f8a05b619de484897ddb9e88cec4e34b8ef0627d..a33fcd24a40f02769fabb11ce6e7107f92156850 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/commands/sequence.c,v 1.83 2002/07/16 22:12:19 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/commands/sequence.c,v 1.84 2002/08/06 02:36:34 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -237,6 +237,7 @@ DefineSequence(CreateSeqStmt *seq)
         * means two log records instead of one :-(
         */
        LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
+
        START_CRIT_SECTION();
 
        {
@@ -260,6 +261,8 @@ DefineSequence(CreateSeqStmt *seq)
                tuple->t_data->t_infomask |= HEAP_XMIN_COMMITTED;
        }
 
+       /* XLOG stuff */
+       if (!rel->rd_istemp)
        {
                xl_seq_rec      xlrec;
                XLogRecPtr      recptr;
@@ -287,6 +290,7 @@ DefineSequence(CreateSeqStmt *seq)
                PageSetLSN(page, recptr);
                PageSetSUI(page, ThisStartUpID);
        }
+
        END_CRIT_SECTION();
 
        LockBuffer(buf, BUFFER_LOCK_UNLOCK);
@@ -437,7 +441,9 @@ nextval(PG_FUNCTION_ARGS)
        elm->cached = last;                     /* last fetched number */
 
        START_CRIT_SECTION();
-       if (logit)
+
+       /* XLOG stuff */
+       if (logit && !seqrel->rd_istemp)
        {
                xl_seq_rec      xlrec;
                XLogRecPtr      recptr;
@@ -449,9 +455,11 @@ nextval(PG_FUNCTION_ARGS)
                rdata[0].len = sizeof(xl_seq_rec);
                rdata[0].next = &(rdata[1]);
 
+               /* set values that will be saved in xlog */
                seq->last_value = next;
                seq->is_called = true;
                seq->log_cnt = 0;
+
                rdata[1].buffer = InvalidBuffer;
                rdata[1].data = (char *) page + ((PageHeader) page)->pd_upper;
                rdata[1].len = ((PageHeader) page)->pd_special -
@@ -468,6 +476,7 @@ nextval(PG_FUNCTION_ARGS)
        seq->last_value = last;         /* last fetched number */
        seq->is_called = true;
        seq->log_cnt = log;                     /* how much is logged */
+
        END_CRIT_SECTION();
 
        LockBuffer(buf, BUFFER_LOCK_UNLOCK);
@@ -550,6 +559,9 @@ do_setval(RangeVar *sequence, int64 next, bool iscalled)
                                                                 * values) */
 
        START_CRIT_SECTION();
+
+       /* XLOG stuff */
+       if (!seqrel->rd_istemp)
        {
                xl_seq_rec      xlrec;
                XLogRecPtr      recptr;
@@ -562,9 +574,11 @@ do_setval(RangeVar *sequence, int64 next, bool iscalled)
                rdata[0].len = sizeof(xl_seq_rec);
                rdata[0].next = &(rdata[1]);
 
+               /* set values that will be saved in xlog */
                seq->last_value = next;
                seq->is_called = true;
                seq->log_cnt = 0;
+
                rdata[1].buffer = InvalidBuffer;
                rdata[1].data = (char *) page + ((PageHeader) page)->pd_upper;
                rdata[1].len = ((PageHeader) page)->pd_special -
@@ -576,10 +590,12 @@ do_setval(RangeVar *sequence, int64 next, bool iscalled)
                PageSetLSN(page, recptr);
                PageSetSUI(page, ThisStartUpID);
        }
+
        /* save info in sequence relation */
        seq->last_value = next;         /* last fetched number */
        seq->is_called = iscalled;
        seq->log_cnt = (iscalled) ? 0 : 1;
+
        END_CRIT_SECTION();
 
        LockBuffer(buf, BUFFER_LOCK_UNLOCK);
index c893ea86a09faef3d2fa15ddd048d3a74708c495..8d2cd4da58c1d1ef7f56a4c0c3917123694f4114 100644 (file)
@@ -13,7 +13,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.232 2002/07/20 05:16:57 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.233 2002/08/06 02:36:34 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1899,6 +1899,8 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
                                        newtup.t_data = (HeapTupleHeader) PageGetItem(ToPage, newitemid);
                                        ItemPointerSet(&(newtup.t_self), destvacpage->blkno, newoff);
 
+                                       /* XLOG stuff */
+                                       if (!onerel->rd_istemp)
                                        {
                                                XLogRecPtr      recptr =
                                                log_heap_move(onerel, Cbuf, tuple.t_self,
@@ -1912,6 +1914,12 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
                                                PageSetLSN(ToPage, recptr);
                                                PageSetSUI(ToPage, ThisStartUpID);
                                        }
+                                       else
+                                       {
+                                               /* No XLOG record, but still need to flag that XID exists on disk */
+                                               MyXactMadeTempRelUpdate = true;
+                                       }
+
                                        END_CRIT_SECTION();
 
                                        if (destvacpage->blkno > last_move_dest_block)
@@ -2042,6 +2050,8 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
                        tuple.t_data->t_infomask |= HEAP_MOVED_OFF;
                        HeapTupleHeaderSetXvac(tuple.t_data, myXID);
 
+                       /* XLOG stuff */
+                       if (!onerel->rd_istemp)
                        {
                                XLogRecPtr      recptr =
                                log_heap_move(onerel, buf, tuple.t_self,
@@ -2052,6 +2062,12 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
                                PageSetLSN(ToPage, recptr);
                                PageSetSUI(ToPage, ThisStartUpID);
                        }
+                       else
+                       {
+                               /* No XLOG record, but still need to flag that XID exists on disk */
+                               MyXactMadeTempRelUpdate = true;
+                       }
+
                        END_CRIT_SECTION();
 
                        cur_page->offsets_used++;
@@ -2321,8 +2337,13 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
 
                        }
                        Assert(vacpage->offsets_free == num_tuples);
+
                        START_CRIT_SECTION();
+
                        uncnt = PageRepairFragmentation(page, unused);
+
+                       /* XLOG stuff */
+                       if (!onerel->rd_istemp)
                        {
                                XLogRecPtr      recptr;
 
@@ -2331,7 +2352,14 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
                                PageSetLSN(page, recptr);
                                PageSetSUI(page, ThisStartUpID);
                        }
+                       else
+                       {
+                               /* No XLOG record, but still need to flag that XID exists on disk */
+                               MyXactMadeTempRelUpdate = true;
+                       }
+
                        END_CRIT_SECTION();
+
                        LockBuffer(buf, BUFFER_LOCK_UNLOCK);
                        WriteBuffer(buf);
                }
@@ -2450,12 +2478,17 @@ vacuum_page(Relation onerel, Buffer buffer, VacPage vacpage)
        Assert(vacpage->offsets_used == 0);
 
        START_CRIT_SECTION();
+
        for (i = 0; i < vacpage->offsets_free; i++)
        {
                itemid = PageGetItemId(page, vacpage->offsets[i]);
                itemid->lp_flags &= ~LP_USED;
        }
+
        uncnt = PageRepairFragmentation(page, unused);
+
+       /* XLOG stuff */
+       if (!onerel->rd_istemp)
        {
                XLogRecPtr      recptr;
 
@@ -2464,6 +2497,12 @@ vacuum_page(Relation onerel, Buffer buffer, VacPage vacpage)
                PageSetLSN(page, recptr);
                PageSetSUI(page, ThisStartUpID);
        }
+       else
+       {
+               /* No XLOG record, but still need to flag that XID exists on disk */
+               MyXactMadeTempRelUpdate = true;
+       }
+
        END_CRIT_SECTION();
 }
 
index bbf9e39ae80496038633a64b44af52d58358c58b..4fb613cc67e522844e30380fd854f844198f1971 100644 (file)
@@ -31,7 +31,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/commands/vacuumlazy.c,v 1.17 2002/07/20 05:16:57 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/commands/vacuumlazy.c,v 1.18 2002/08/06 02:36:34 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -523,6 +523,8 @@ lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer,
 
        uncnt = PageRepairFragmentation(page, unused);
 
+       /* XLOG stuff */
+       if (!onerel->rd_istemp)
        {
                XLogRecPtr      recptr;
 
@@ -531,6 +533,12 @@ lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer,
                PageSetLSN(page, recptr);
                PageSetSUI(page, ThisStartUpID);
        }
+       else
+       {
+               /* No XLOG record, but still need to flag that XID exists on disk */
+               MyXactMadeTempRelUpdate = true;
+       }
+
        END_CRIT_SECTION();
 
        return tupindex;
index c2bd2de48abe47931c1d062f1d18f7fe23414d84..24f232469b0ac281ca05986ecb86a1d4a59f3518 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/executor/execUtils.c,v 1.87 2002/07/20 05:16:58 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/executor/execUtils.c,v 1.88 2002/08/06 02:36:34 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -691,7 +691,7 @@ ExecInsertIndexTuples(TupleTableSlot *slot,
                                                          nullv,        /* info on nulls */
                                                          &(heapTuple->t_self),         /* tid of heap tuple */
                                                          heapRelation,
-                                                         relationDescs[i]->rd_uniqueindex && !is_vacuum);
+                                                         relationDescs[i]->rd_index->indisunique && !is_vacuum);
 
                /*
                 * keep track of index inserts for debugging
index 6132b732f864f9376a0cfc05d2c7e04f7b08c7aa..a8c56562f2d6074299a98f64830562b2e73b84a3 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/storage/buffer/buf_init.c,v 1.49 2002/06/20 20:29:34 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/storage/buffer/buf_init.c,v 1.50 2002/08/06 02:36:34 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -258,7 +258,7 @@ ShutdownBufferPoolAccess(void)
        /* Release any buffer context locks we are holding */
        UnlockBuffers();
        /* Release any buffer reference counts we are holding */
-       ResetBufferPool(false);
+       AtEOXact_Buffers(false);
 }
 
 /* -----------------------------------------------------
index b2c19e99f47b48ac439a9d5c73810a0ddc8a4793..1ca7af3b775774b6dfdecc083dcab982bbe8396e 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/storage/buffer/bufmgr.c,v 1.127 2002/07/02 05:47:37 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/storage/buffer/bufmgr.c,v 1.128 2002/08/06 02:36:34 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 #include "pgstat.h"
 
 #define BufferGetLSN(bufHdr)   \
-       (*((XLogRecPtr*)MAKE_PTR((bufHdr)->data)))
+       (*((XLogRecPtr*) MAKE_PTR((bufHdr)->data)))
 
 
-extern long int ReadBufferCount;
-extern long int ReadLocalBufferCount;
-extern long int BufferHitCount;
-extern long int LocalBufferHitCount;
-extern long int BufferFlushCount;
-extern long int LocalBufferFlushCount;
-
 static void WaitIO(BufferDesc *buf);
 static void StartBufferIO(BufferDesc *buf, bool forInput);
 static void TerminateBufferIO(BufferDesc *buf);
@@ -82,16 +75,12 @@ static Buffer ReadBufferInternal(Relation reln, BlockNumber blockNum,
                                   bool bufferLockHeld);
 static BufferDesc *BufferAlloc(Relation reln, BlockNumber blockNum,
                        bool *foundPtr);
-static int     ReleaseBufferWithBufferLock(Buffer buffer);
 static int     BufferReplace(BufferDesc *bufHdr);
 #ifdef NOT_USED
 void           PrintBufferDescs(void);
 #endif
 
 static void write_buffer(Buffer buffer, bool unpin);
-static void drop_relfilenode_buffers(RelFileNode rnode,
-                                     bool do_local, bool do_both);
-static int release_buffer(Buffer buffer, bool havelock);
 
 /*
  * ReadBuffer -- returns a buffer containing the requested
@@ -140,7 +129,7 @@ ReadBufferInternal(Relation reln, BlockNumber blockNum,
        bool            isLocalBuf;
 
        isExtend = (blockNum == P_NEW);
-       isLocalBuf = reln->rd_myxactonly;
+       isLocalBuf = reln->rd_istemp;
 
        if (isLocalBuf)
        {
@@ -684,10 +673,10 @@ ReleaseAndReadBuffer(Buffer buffer,
 /*
  * BufferSync -- Write all dirty buffers in the pool.
  *
- * This is called at checkpoint time and write out all dirty buffers.
+ * This is called at checkpoint time and writes out all dirty shared buffers.
  */
 void
-BufferSync()
+BufferSync(void)
 {
        int                     i;
        BufferDesc *bufHdr;
@@ -780,8 +769,7 @@ BufferSync()
                        status = smgrblindwrt(DEFAULT_SMGR,
                                                                  bufHdr->tag.rnode,
                                                                  bufHdr->tag.blockNum,
-                                                                 (char *) MAKE_PTR(bufHdr->data),
-                                                                 true);                /* must fsync */
+                                                                 (char *) MAKE_PTR(bufHdr->data));
                }
                else
                {
@@ -908,19 +896,16 @@ ResetBufferUsage(void)
        NDirectFileWrite = 0;
 }
 
-/* ----------------------------------------------
- *             ResetBufferPool
- *
- *             This routine is supposed to be called when a transaction aborts.
- *             It will release all the buffer pins held by the transaction.
- *             Currently, we also call it during commit if BufferPoolCheckLeak
- *             detected a problem --- in that case, isCommit is TRUE, and we
- *             only clean up buffer pin counts.
+/*
+ *             AtEOXact_Buffers - clean up at end of transaction.
  *
- * ----------------------------------------------
+ *             During abort, we need to release any buffer pins we're holding
+ *             (this cleans up in case elog interrupted a routine that pins a
+ *             buffer).  During commit, we shouldn't need to do that, but check
+ *             anyway to see if anyone leaked a buffer reference count.
  */
 void
-ResetBufferPool(bool isCommit)
+AtEOXact_Buffers(bool isCommit)
 {
        int                     i;
 
@@ -928,7 +913,16 @@ ResetBufferPool(bool isCommit)
        {
                if (PrivateRefCount[i] != 0)
                {
-                       BufferDesc *buf = &BufferDescriptors[i];
+                       BufferDesc *buf = &(BufferDescriptors[i]);
+
+                       if (isCommit)
+                               elog(WARNING,
+                                        "Buffer Leak: [%03d] (freeNext=%d, freePrev=%d, "
+                                        "rel=%u/%u, blockNum=%u, flags=0x%x, refcount=%d %ld)",
+                                        i, buf->freeNext, buf->freePrev,
+                                        buf->tag.rnode.tblNode, buf->tag.rnode.relNode,
+                                        buf->tag.blockNum, buf->flags,
+                                        buf->refcount, PrivateRefCount[i]);
 
                        PrivateRefCount[i] = 1;         /* make sure we release shared pin */
                        LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
@@ -938,48 +932,15 @@ ResetBufferPool(bool isCommit)
                }
        }
 
-       ResetLocalBufferPool();
-
-       if (!isCommit)
-               smgrabort();
+       AtEOXact_LocalBuffers(isCommit);
 }
 
 /*
- * BufferPoolCheckLeak
- *
- *             check if there is buffer leak
- */
-bool
-BufferPoolCheckLeak(void)
-{
-       int                     i;
-       bool            result = false;
-
-       for (i = 0; i < NBuffers; i++)
-       {
-               if (PrivateRefCount[i] != 0)
-               {
-                       BufferDesc *buf = &(BufferDescriptors[i]);
-
-                       elog(WARNING,
-                                "Buffer Leak: [%03d] (freeNext=%d, freePrev=%d, \
-rel=%u/%u, blockNum=%u, flags=0x%x, refcount=%d %ld)",
-                                i, buf->freeNext, buf->freePrev,
-                                buf->tag.rnode.tblNode, buf->tag.rnode.relNode,
-                                buf->tag.blockNum, buf->flags,
-                                buf->refcount, PrivateRefCount[i]);
-                       result = true;
-               }
-       }
-       return result;
-}
-
-/* ------------------------------------------------
  * FlushBufferPool
  *
- * Flush all dirty blocks in buffer pool to disk
- * at the checkpoint time
- * ------------------------------------------------
+ * Flush all dirty blocks in buffer pool to disk at the checkpoint time.
+ * Local relations do not participate in checkpoints, so they don't need to be
+ * flushed.
  */
 void
 FlushBufferPool(void)
@@ -989,16 +950,13 @@ FlushBufferPool(void)
 }
 
 /*
- * At the commit time we have to flush local buffer pool only
+ * Do whatever is needed to prepare for commit at the bufmgr and smgr levels
  */
 void
 BufmgrCommit(void)
 {
-       LocalBufferSync();
+       /* Nothing to do in bufmgr anymore... */
 
-       /*
-        * All files created in current transaction will be fsync-ed
-        */
        smgrcommit();
 }
 
@@ -1051,15 +1009,15 @@ BufferReplace(BufferDesc *bufHdr)
 
        if (reln != (Relation) NULL)
        {
-               status = smgrwrite(DEFAULT_SMGR, reln, bufHdr->tag.blockNum,
+               status = smgrwrite(DEFAULT_SMGR, reln,
+                                                  bufHdr->tag.blockNum,
                                                   (char *) MAKE_PTR(bufHdr->data));
        }
        else
        {
                status = smgrblindwrt(DEFAULT_SMGR, bufHdr->tag.rnode,
                                                          bufHdr->tag.blockNum,
-                                                         (char *) MAKE_PTR(bufHdr->data),
-                                                         false);       /* no fsync */
+                                                         (char *) MAKE_PTR(bufHdr->data));
        }
 
        /* drop relcache refcnt incremented by RelationNodeCacheGetRelation */
@@ -1091,31 +1049,55 @@ RelationGetNumberOfBlocks(Relation relation)
 {
        /*
         * relation->rd_nblocks should be accurate already if the relation is
-        * myxactonly.  (XXX how safe is that really?)  Don't call smgr on a
-        * view, either.
+        * new or temp, because no one else should be modifying it.  Otherwise
+        * we need to ask the smgr for the current physical file length.
+        *
+        * Don't call smgr on a view, either.
         */
        if (relation->rd_rel->relkind == RELKIND_VIEW)
                relation->rd_nblocks = 0;
-       else if (!relation->rd_myxactonly)
+       else if (!relation->rd_isnew && !relation->rd_istemp)
                relation->rd_nblocks = smgrnblocks(DEFAULT_SMGR, relation);
        return relation->rd_nblocks;
 }
 
-/*
- * drop_relfilenode_buffers -- common functionality for
- *                             DropRelationBuffers and
- *                             DropRelFileNodeBuffers
+/* ---------------------------------------------------------------------
+ *             DropRelationBuffers
  *
- *             XXX currently it sequentially searches the buffer pool, should be
- *             changed to more clever ways of searching.
+ *             This function removes all the buffered pages for a relation
+ *             from the buffer pool.  Dirty pages are simply dropped, without
+ *             bothering to write them out first.      This is NOT rollback-able,
+ *             and so should be used only with extreme caution!
+ *
+ *             We assume that the caller holds an exclusive lock on the relation,
+ *             which should assure that no new buffers will be acquired for the rel
+ *             meanwhile.
+ * --------------------------------------------------------------------
  */
-static void
-drop_relfilenode_buffers(RelFileNode rnode, bool do_local, bool do_both)
+void
+DropRelationBuffers(Relation rel)
+{
+       DropRelFileNodeBuffers(rel->rd_node, rel->rd_istemp);
+}
+
+/* ---------------------------------------------------------------------
+ *             DropRelFileNodeBuffers
+ *
+ *             This is the same as DropRelationBuffers, except that the target
+ *             relation is specified by RelFileNode and temp status.
+ *
+ *             This is NOT rollback-able.      One legitimate use is to clear the
+ *             buffer cache of buffers for a relation that is being deleted
+ *             during transaction abort.
+ * --------------------------------------------------------------------
+ */
+void
+DropRelFileNodeBuffers(RelFileNode rnode, bool istemp)
 {
        int                     i;
        BufferDesc *bufHdr;
 
-       if (do_local)
+       if (istemp)
        {
                for (i = 0; i < NLocBuffer; i++)
                {
@@ -1128,8 +1110,7 @@ drop_relfilenode_buffers(RelFileNode rnode, bool do_local, bool do_both)
                                bufHdr->tag.rnode.relNode = InvalidOid;
                        }
                }
-               if (!do_both)
-                       return;
+               return;
        }
 
        LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
@@ -1160,18 +1141,19 @@ recheck:
                        bufHdr->cntxDirty = false;
 
                        /*
-                        * Release any refcount we may have.
-                        *
-                        * This is very probably dead code, and if it isn't then it's
-                        * probably wrong.      I added the Assert to find out --- tgl
-                        * 11/99.
+                        * Release any refcount we may have.  If someone else has a
+                        * pin on the buffer, we got trouble.
                         */
                        if (!(bufHdr->flags & BM_FREE))
                        {
-                               /* Assert checks that buffer will actually get freed! */
-                               Assert(PrivateRefCount[i - 1] == 1 &&
-                                          bufHdr->refcount == 1);
-                               ReleaseBufferWithBufferLock(i);
+                               /* the sole pin should be ours */
+                               if (bufHdr->refcount != 1 || PrivateRefCount[i - 1] == 0)
+                                       elog(FATAL, "DropRelFileNodeBuffers: block %u is referenced (private %ld, global %d)",
+                                                bufHdr->tag.blockNum,
+                                                PrivateRefCount[i - 1], bufHdr->refcount);
+                               /* Make sure it will be released */
+                               PrivateRefCount[i - 1] = 1;
+                               UnpinBuffer(bufHdr);
                        }
 
                        /*
@@ -1184,43 +1166,6 @@ recheck:
        LWLockRelease(BufMgrLock);
 }
 
-/* ---------------------------------------------------------------------
- *             DropRelationBuffers
- *
- *             This function removes all the buffered pages for a relation
- *             from the buffer pool.  Dirty pages are simply dropped, without
- *             bothering to write them out first.      This is NOT rollback-able,
- *             and so should be used only with extreme caution!
- *
- *             We assume that the caller holds an exclusive lock on the relation,
- *             which should assure that no new buffers will be acquired for the rel
- *             meanwhile.
- * --------------------------------------------------------------------
- */
-void
-DropRelationBuffers(Relation rel)
-{
-       drop_relfilenode_buffers(rel->rd_node, rel->rd_myxactonly, false);
-}
-
-/* ---------------------------------------------------------------------
- *             DropRelFileNodeBuffers
- *
- *             This is the same as DropRelationBuffers, except that the target
- *             relation is specified by RelFileNode.
- *
- *             This is NOT rollback-able.      One legitimate use is to clear the
- *             buffer cache of buffers for a relation that is being deleted
- *             during transaction abort.
- * --------------------------------------------------------------------
- */
-void
-DropRelFileNodeBuffers(RelFileNode rnode)
-{
-       /* We have to search both local and shared buffers... */
-       drop_relfilenode_buffers(rnode, true, true);
-}
-
 /* ---------------------------------------------------------------------
  *             DropBuffers
  *
@@ -1296,7 +1241,7 @@ recheck:
  */
 #ifdef NOT_USED
 void
-PrintBufferDescs()
+PrintBufferDescs(void)
 {
        int                     i;
        BufferDesc *buf = BufferDescriptors;
@@ -1331,7 +1276,7 @@ blockNum=%u, flags=0x%x, refcount=%d %ld)",
 
 #ifdef NOT_USED
 void
-PrintPinnedBufs()
+PrintPinnedBufs(void)
 {
        int                     i;
        BufferDesc *buf = BufferDescriptors;
@@ -1351,33 +1296,6 @@ blockNum=%u, flags=0x%x, refcount=%d %ld)",
 }
 #endif
 
-/*
- * BufferPoolBlowaway
- *
- * this routine is solely for the purpose of experiments -- sometimes
- * you may want to blowaway whatever is left from the past in buffer
- * pool and start measuring some performance with a clean empty buffer
- * pool.
- */
-#ifdef NOT_USED
-void
-BufferPoolBlowaway()
-{
-       int                     i;
-
-       BufferSync();
-       for (i = 1; i <= NBuffers; i++)
-       {
-               if (BufferIsValid(i))
-               {
-                       while (BufferIsValid(i))
-                               ReleaseBuffer(i);
-               }
-               BufTableDelete(&BufferDescriptors[i - 1]);
-       }
-}
-#endif
-
 /* ---------------------------------------------------------------------
  *             FlushRelationBuffers
  *
@@ -1428,7 +1346,7 @@ FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock)
        XLogRecPtr      recptr;
        int                     status;
 
-       if (rel->rd_myxactonly)
+       if (rel->rd_istemp)
        {
                for (i = 0; i < NLocBuffer; i++)
                {
@@ -1544,12 +1462,14 @@ FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock)
        return 0;
 }
 
+#undef ReleaseBuffer
+
 /*
- * release_buffer -- common functionality for
- *                   ReleaseBuffer and ReleaseBufferWithBufferLock
+ * ReleaseBuffer -- remove the pin on a buffer without
+ *             marking it dirty.
  */
-static int
-release_buffer(Buffer buffer, bool havelock)
+int
+ReleaseBuffer(Buffer buffer)
 {
        BufferDesc *bufHdr;
 
@@ -1570,41 +1490,14 @@ release_buffer(Buffer buffer, bool havelock)
                PrivateRefCount[buffer - 1]--;
        else
        {
-               if (!havelock)
-                       LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
-
+               LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
                UnpinBuffer(bufHdr);
-
-               if (!havelock)
-                       LWLockRelease(BufMgrLock);
+               LWLockRelease(BufMgrLock);
        }
 
        return STATUS_OK;
 }
 
-#undef ReleaseBuffer
-
-/*
- * ReleaseBuffer -- remove the pin on a buffer without
- *             marking it dirty.
- */
-int
-ReleaseBuffer(Buffer buffer)
-{
-       return release_buffer(buffer, false);
-}
-
-/*
- * ReleaseBufferWithBufferLock
- *             Same as ReleaseBuffer except we hold the bufmgr lock
- */
-static int
-ReleaseBufferWithBufferLock(Buffer buffer)
-{
-       return release_buffer(buffer, true);
-}
-
-
 #ifdef NOT_USED
 void
 IncrBufferRefCount_Debug(char *file, int line, Buffer buffer)
@@ -1847,10 +1740,13 @@ SetBufferCommitInfoNeedsSave(Buffer buffer)
        BufferDesc *bufHdr;
 
        if (BufferIsLocal(buffer))
+       {
+               WriteLocalBuffer(buffer, false);
                return;
+       }
 
        if (BAD_BUFFER_ID(buffer))
-               return;
+               elog(ERROR, "SetBufferCommitInfoNeedsSave: bad buffer %d", buffer);
 
        bufHdr = &BufferDescriptors[buffer - 1];
 
index d5edc570b6ec07f3eeb1cd224861898d7280bf53..50168c8b306b7eb9baca79b9cf4a4d91d38c5ab2 100644 (file)
@@ -1,48 +1,37 @@
 /*-------------------------------------------------------------------------
  *
  * localbuf.c
- *       local buffer manager. Fast buffer manager for temporary tables
- *       or special cases when the operation is not visible to other backends.
- *
- *       When a relation is being created, the descriptor will have rd_islocal
- *       set to indicate that the local buffer manager should be used. During
- *       the same transaction the relation is being created, any inserts or
- *       selects from the newly created relation will use the local buffer
- *       pool. rd_islocal is reset at the end of a transaction (commit/abort).
- *       This is useful for queries like SELECT INTO TABLE and create index.
+ *       local buffer manager. Fast buffer manager for temporary tables,
+ *       which never need to be WAL-logged or checkpointed, etc.
  *
  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994-5, Regents of the University of California
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/storage/buffer/localbuf.c,v 1.44 2002/06/20 20:29:34 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/storage/buffer/localbuf.c,v 1.45 2002/08/06 02:36:34 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 #include "postgres.h"
 
-#include <sys/types.h>
-#include <sys/file.h>
-#include <math.h>
-#include <signal.h>
-
-#include "executor/execdebug.h"
 #include "storage/buf_internals.h"
 #include "storage/bufmgr.h"
 #include "storage/smgr.h"
 #include "utils/relcache.h"
 
-extern long int LocalBufferFlushCount;
 
+/*#define LBDEBUG*/
+
+/* should be a GUC parameter some day */
 int                    NLocBuffer = 64;
+
 BufferDesc *LocalBufferDescriptors = NULL;
 Block     *LocalBufferBlockPointers = NULL;
 long      *LocalRefCount = NULL;
 
 static int     nextFreeLocalBuf = 0;
 
-/*#define LBDEBUG*/
 
 /*
  * LocalBufferAlloc -
@@ -61,11 +50,11 @@ LocalBufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr)
                        reln->rd_node.relNode &&
                        LocalBufferDescriptors[i].tag.blockNum == blockNum)
                {
-
 #ifdef LBDEBUG
                        fprintf(stderr, "LB ALLOC (%u,%d) %d\n",
                                        RelationGetRelid(reln), blockNum, -i - 1);
 #endif
+
                        LocalRefCount[i]++;
                        *foundPtr = TRUE;
                        return &LocalBufferDescriptors[i];
@@ -94,14 +83,17 @@ LocalBufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr)
                elog(ERROR, "no empty local buffer.");
 
        /*
-        * this buffer is not referenced but it might still be dirty (the last
-        * transaction to touch it doesn't need its contents but has not
-        * flushed it).  if that's the case, write it out before reusing it!
+        * this buffer is not referenced but it might still be dirty.
+        * if that's the case, write it out before reusing it!
         */
        if (bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty)
        {
                Relation        bufrel = RelationNodeCacheGetRelation(bufHdr->tag.rnode);
 
+               /*
+                * The relcache is not supposed to throw away temp rels, so this
+                * should always succeed.
+                */
                Assert(bufrel != NULL);
 
                /* flush this page */
@@ -113,26 +105,19 @@ LocalBufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr)
                RelationDecrementReferenceCount(bufrel);
        }
 
-       /*
-        * it's all ours now.
-        *
-        * We need not in tblNode currently but will in future I think, when
-        * we'll give up rel->rd_fd to fmgr cache.
-        */
-       bufHdr->tag.rnode = reln->rd_node;
-       bufHdr->tag.blockNum = blockNum;
-       bufHdr->flags &= ~BM_DIRTY;
-       bufHdr->cntxDirty = false;
-
        /*
         * lazy memory allocation: allocate space on first use of a buffer.
+        *
+        * Note this path cannot be taken for a buffer that was previously
+        * in use, so it's okay to do it (and possibly error out) before
+        * marking the buffer as valid.
         */
        if (bufHdr->data == (SHMEM_OFFSET) 0)
        {
                char       *data = (char *) malloc(BLCKSZ);
 
                if (data == NULL)
-                       elog(FATAL, "Out of memory in LocalBufferAlloc");
+                       elog(ERROR, "Out of memory in LocalBufferAlloc");
 
                /*
                 * This is a bit of a hack: bufHdr->data needs to be a shmem
@@ -147,13 +132,24 @@ LocalBufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr)
                LocalBufferBlockPointers[-(bufHdr->buf_id + 2)] = (Block) data;
        }
 
+       /*
+        * it's all ours now.
+        *
+        * We need not in tblNode currently but will in future I think, when
+        * we'll give up rel->rd_fd to fmgr cache.
+        */
+       bufHdr->tag.rnode = reln->rd_node;
+       bufHdr->tag.blockNum = blockNum;
+       bufHdr->flags &= ~BM_DIRTY;
+       bufHdr->cntxDirty = false;
+
        *foundPtr = FALSE;
        return bufHdr;
 }
 
 /*
  * WriteLocalBuffer -
- *       writes out a local buffer
+ *       writes out a local buffer (actually, just marks it dirty)
  */
 void
 WriteLocalBuffer(Buffer buffer, bool release)
@@ -180,7 +176,7 @@ WriteLocalBuffer(Buffer buffer, bool release)
  * InitLocalBuffer -
  *       init the local buffer cache. Since most queries (esp. multi-user ones)
  *       don't involve local buffers, we delay allocating actual memory for the
- *       buffer until we need it.
+ *       buffers until we need them; just make the buffer headers here.
  */
 void
 InitLocalBuffer(void)
@@ -211,65 +207,30 @@ InitLocalBuffer(void)
 }
 
 /*
- * LocalBufferSync
- *
- * Flush all dirty buffers in the local buffer cache at commit time.
- * Since the buffer cache is only used for keeping relations visible
- * during a transaction, we will not need these buffers again.
+ * AtEOXact_LocalBuffers - clean up at end of transaction.
  *
- * Note that we have to *flush* local buffers because of them are not
- * visible to checkpoint makers. But we can skip XLOG flush check.
+ * This is just like AtEOXact_Buffers, but for local buffers.
  */
 void
-LocalBufferSync(void)
+AtEOXact_LocalBuffers(bool isCommit)
 {
        int                     i;
 
        for (i = 0; i < NLocBuffer; i++)
        {
-               BufferDesc *buf = &LocalBufferDescriptors[i];
-               Relation        bufrel;
-
-               if (buf->flags & BM_DIRTY || buf->cntxDirty)
+               if (LocalRefCount[i] != 0)
                {
-#ifdef LBDEBUG
-                       fprintf(stderr, "LB SYNC %d\n", -i - 1);
-#endif
-                       bufrel = RelationNodeCacheGetRelation(buf->tag.rnode);
-
-                       Assert(bufrel != NULL);
+                       BufferDesc *buf = &(LocalBufferDescriptors[i]);
 
-                       smgrwrite(DEFAULT_SMGR, bufrel, buf->tag.blockNum,
-                                         (char *) MAKE_PTR(buf->data));
-                       smgrmarkdirty(DEFAULT_SMGR, bufrel, buf->tag.blockNum);
-                       LocalBufferFlushCount++;
+                       if (isCommit)
+                               elog(WARNING,
+                                        "Local Buffer Leak: [%03d] (rel=%u/%u, blockNum=%u, flags=0x%x, refcount=%d %ld)",
+                                        i,
+                                        buf->tag.rnode.tblNode, buf->tag.rnode.relNode,
+                                        buf->tag.blockNum, buf->flags,
+                                        buf->refcount, LocalRefCount[i]);
 
-                       /* drop relcache refcount from RelationNodeCacheGetRelation */
-                       RelationDecrementReferenceCount(bufrel);
-
-                       buf->flags &= ~BM_DIRTY;
-                       buf->cntxDirty = false;
+                       LocalRefCount[i] = 0;
                }
        }
-
-       MemSet(LocalRefCount, 0, sizeof(long) * NLocBuffer);
-       nextFreeLocalBuf = 0;
-}
-
-void
-ResetLocalBufferPool(void)
-{
-       int                     i;
-
-       for (i = 0; i < NLocBuffer; i++)
-       {
-               BufferDesc *buf = &LocalBufferDescriptors[i];
-
-               buf->tag.rnode.relNode = InvalidOid;
-               buf->flags &= ~BM_DIRTY;
-               buf->cntxDirty = false;
-       }
-
-       MemSet(LocalRefCount, 0, sizeof(long) * NLocBuffer);
-       nextFreeLocalBuf = 0;
 }
index 391a078e602ea168a8d0cecd78d2f35e41eb6bf7..8be2ed219b99b6343e54f159c8681b103db26daa 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/storage/file/fd.c,v 1.92 2002/06/20 20:29:34 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/storage/file/fd.c,v 1.93 2002/08/06 02:36:34 tgl Exp $
  *
  * NOTES:
  *
@@ -119,8 +119,7 @@ typedef struct vfd
        unsigned short fdstate;         /* bitflags for VFD's state */
 
 /* these are the assigned bits in fdstate: */
-#define FD_DIRTY               (1 << 0)        /* written to, but not yet fsync'd */
-#define FD_TEMPORARY   (1 << 1)        /* should be unlinked when closed */
+#define FD_TEMPORARY   (1 << 0)        /* should be unlinked when closed */
 
        File            nextFree;               /* link to next free VFD, if in freelist */
        File            lruMoreRecently;        /* doubly linked recency-of-use list */
@@ -396,15 +395,6 @@ LruDelete(File file)
        vfdP->seekPos = (long) lseek(vfdP->fd, 0L, SEEK_CUR);
        Assert(vfdP->seekPos != -1L);
 
-       /* if we have written to the file, sync it before closing */
-       if (vfdP->fdstate & FD_DIRTY)
-       {
-               if (pg_fsync(vfdP->fd))
-                       elog(LOG, "LruDelete: failed to fsync %s: %m",
-                                vfdP->fileName);
-               vfdP->fdstate &= ~FD_DIRTY;
-       }
-
        /* close the file */
        if (close(vfdP->fd))
                elog(LOG, "LruDelete: failed to close %s: %m",
@@ -725,17 +715,8 @@ fileNameOpenFile(FileName fileName,
        /* Saved flags are adjusted to be OK for re-opening file */
        vfdP->fileFlags = fileFlags & ~(O_CREAT | O_TRUNC | O_EXCL);
        vfdP->fileMode = fileMode;
-
        vfdP->seekPos = 0;
-
-       /*
-        * Have to fsync file on commit. Alternative way - log file creation
-        * and fsync log before actual file creation.
-        */
-       if (fileFlags & O_CREAT)
-               vfdP->fdstate = FD_DIRTY;
-       else
-               vfdP->fdstate = 0x0;
+       vfdP->fdstate = 0x0;
 
        return file;
 }
@@ -841,15 +822,6 @@ FileClose(File file)
                /* remove the file from the lru ring */
                Delete(file);
 
-               /* if we did any writes, sync the file before closing */
-               if (vfdP->fdstate & FD_DIRTY)
-               {
-                       if (pg_fsync(vfdP->fd))
-                               elog(LOG, "FileClose: failed to fsync %s: %m",
-                                        vfdP->fileName);
-                       vfdP->fdstate &= ~FD_DIRTY;
-               }
-
                /* close the file */
                if (close(vfdP->fd))
                        elog(LOG, "FileClose: failed to close %s: %m",
@@ -1022,108 +994,11 @@ FileTruncate(File file, long offset)
        DO_DB(elog(LOG, "FileTruncate %d (%s)",
                           file, VfdCache[file].fileName));
 
-       FileSync(file);
        FileAccess(file);
        returnCode = ftruncate(VfdCache[file].fd, (size_t) offset);
        return returnCode;
 }
 
-/*
- * FileSync --- if a file is marked as dirty, fsync it.
- *
- * The FD_DIRTY bit is slightly misnamed: it doesn't mean that we need to
- * write the file, but that we *have* written it and need to execute an
- * fsync() to ensure the changes are down on disk before we mark the current
- * transaction committed.
- *
- * FD_DIRTY is set by FileWrite or by an explicit FileMarkDirty() call.
- * It is cleared after successfully fsync'ing the file.  FileClose() will
- * fsync a dirty File that is about to be closed, since there will be no
- * other place to remember the need to fsync after the VFD is gone.
- *
- * Note that the DIRTY bit is logically associated with the actual disk file,
- * not with any particular kernel FD we might have open for it.  We assume
- * that fsync will force out any dirty buffers for that file, whether or not
- * they were written through the FD being used for the fsync call --- they
- * might even have been written by some other backend!
- *
- * Note also that LruDelete currently fsyncs a dirty file that it is about
- * to close the kernel file descriptor for.  The idea there is to avoid
- * having to re-open the kernel descriptor later.  But it's not real clear
- * that this is a performance win; we could end up fsyncing the same file
- * multiple times in a transaction, which would probably cost more time
- * than is saved by avoiding an open() call.  This should be studied.
- *
- * This routine used to think it could skip the fsync if the file is
- * physically closed, but that is now WRONG; see comments for FileMarkDirty.
- */
-int
-FileSync(File file)
-{
-       int                     returnCode;
-
-       Assert(FileIsValid(file));
-
-       if (!(VfdCache[file].fdstate & FD_DIRTY))
-       {
-               /* Need not sync if file is not dirty. */
-               returnCode = 0;
-       }
-       else if (!enableFsync)
-       {
-               /* Don't force the file open if pg_fsync isn't gonna sync it. */
-               returnCode = 0;
-               VfdCache[file].fdstate &= ~FD_DIRTY;
-       }
-       else
-       {
-               /*
-                * We don't use FileAccess() because we don't want to force the
-                * file to the front of the LRU ring; we aren't expecting to
-                * access it again soon.
-                */
-               if (FileIsNotOpen(file))
-               {
-                       returnCode = LruInsert(file);
-                       if (returnCode != 0)
-                               return returnCode;
-               }
-               returnCode = pg_fsync(VfdCache[file].fd);
-               if (returnCode == 0)
-                       VfdCache[file].fdstate &= ~FD_DIRTY;
-       }
-
-       return returnCode;
-}
-
-/*
- * FileMarkDirty --- mark a file as needing fsync at transaction commit.
- *
- * Since FileWrite marks the file dirty, this routine is not needed in
- * normal use. It is called when the buffer manager detects that some other
- * backend has written out a shared buffer that this backend dirtied (but
- * didn't write) in the current xact.  In that scenario, we need to fsync
- * the file before we can commit.  We cannot assume that the other backend
- * has fsync'd the file yet; we need to do our own fsync to ensure that
- * (a) the disk page is written and (b) this backend's commit is delayed
- * until the write is complete.
- *
- * Note we are assuming that an fsync issued by this backend will write
- * kernel disk buffers that were dirtied by another backend.  Furthermore,
- * it doesn't matter whether we currently have the file physically open;
- * we must fsync even if we have to re-open the file to do it.
- */
-void
-FileMarkDirty(File file)
-{
-       Assert(FileIsValid(file));
-
-       DO_DB(elog(LOG, "FileMarkDirty: %d (%s)",
-                          file, VfdCache[file].fileName));
-
-       VfdCache[file].fdstate |= FD_DIRTY;
-}
-
 
 /*
  * Routines that want to use stdio (ie, FILE*) should use AllocateFile
@@ -1142,7 +1017,6 @@ FileMarkDirty(File file)
  *
  * Ideally this should be the *only* direct call of fopen() in the backend.
  */
-
 FILE *
 AllocateFile(char *name, char *mode)
 {
@@ -1229,12 +1103,6 @@ closeAllVfds(void)
  * exit (it doesn't particularly care which).  All still-open temporary-file
  * VFDs are closed, which also causes the underlying files to be deleted.
  * Furthermore, all "allocated" stdio files are closed.
- *
- * This routine is not involved in fsync'ing non-temporary files at xact
- * commit; that is done by FileSync under control of the buffer manager.
- * During a commit, that is done *before* control gets here.  If we still
- * have any needs-fsync bits set when we get here, we assume this is abort
- * and clear them.
  */
 void
 AtEOXact_Files(void)
@@ -1249,8 +1117,6 @@ AtEOXact_Files(void)
                        if ((VfdCache[i].fdstate & FD_TEMPORARY) &&
                                VfdCache[i].fileName != NULL)
                                FileClose(i);
-                       else
-                               VfdCache[i].fdstate &= ~FD_DIRTY;
                }
        }
 
index 978d85d4868ab5b710088940df861ec5347d994a..25051a9799cabaf80fb79fe8a882a748b76f9fd0 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/storage/smgr/md.c,v 1.91 2002/06/20 20:29:35 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/storage/smgr/md.c,v 1.92 2002/08/06 02:36:34 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -381,16 +381,7 @@ mdclose_fd(int fd)
 
                /* if not closed already */
                if (v->mdfd_vfd >= 0)
-               {
-                       /*
-                        * We sync the file descriptor so that we don't need to reopen
-                        * it at transaction commit to force changes to disk.  (This
-                        * is not really optional, because we are about to forget that
-                        * the file even exists...)
-                        */
-                       FileSync(v->mdfd_vfd);
                        FileClose(v->mdfd_vfd);
-               }
                /* Now free vector */
                v = v->mdfd_chain;
                if (ov != &Md_fdvec[fd])
@@ -403,16 +394,7 @@ mdclose_fd(int fd)
        if (v != (MdfdVec *) NULL)
        {
                if (v->mdfd_vfd >= 0)
-               {
-                       /*
-                        * We sync the file descriptor so that we don't need to reopen
-                        * it at transaction commit to force changes to disk.  (This
-                        * is not really optional, because we are about to forget that
-                        * the file even exists...)
-                        */
-                       FileSync(v->mdfd_vfd);
                        FileClose(v->mdfd_vfd);
-               }
        }
 #endif
 
@@ -497,56 +479,16 @@ mdwrite(Relation reln, BlockNumber blocknum, char *buffer)
        return SM_SUCCESS;
 }
 
-/*
- *     mdflush() -- Synchronously write a block to disk.
- *
- *             This is exactly like mdwrite(), but doesn't return until the file
- *             system buffer cache has been flushed.
- */
-int
-mdflush(Relation reln, BlockNumber blocknum, char *buffer)
-{
-       int                     status;
-       long            seekpos;
-       MdfdVec    *v;
-
-       v = _mdfd_getseg(reln, blocknum);
-
-#ifndef LET_OS_MANAGE_FILESIZE
-       seekpos = (long) (BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)));
-#ifdef DIAGNOSTIC
-       if (seekpos >= BLCKSZ * RELSEG_SIZE)
-               elog(FATAL, "seekpos too big!");
-#endif
-#else
-       seekpos = (long) (BLCKSZ * (blocknum));
-#endif
-
-       if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos)
-               return SM_FAIL;
-
-       /* write and sync the block */
-       status = SM_SUCCESS;
-       if (FileWrite(v->mdfd_vfd, buffer, BLCKSZ) != BLCKSZ
-               || FileSync(v->mdfd_vfd) < 0)
-               status = SM_FAIL;
-
-       return status;
-}
-
 /*
  *     mdblindwrt() -- Write a block to disk blind.
  *
- *             We have to be able to do this using only the name and OID of
- *             the database and relation in which the block belongs.  Otherwise
- *             this is much like mdwrite().  If dofsync is TRUE, then we fsync
- *             the file, making it more like mdflush().
+ *             We have to be able to do this using only the rnode of the relation
+ *             in which the block belongs.  Otherwise this is much like mdwrite().
  */
 int
 mdblindwrt(RelFileNode rnode,
                   BlockNumber blkno,
-                  char *buffer,
-                  bool dofsync)
+                  char *buffer)
 {
        int                     status;
        long            seekpos;
@@ -568,7 +510,6 @@ mdblindwrt(RelFileNode rnode,
 #endif
 
        errno = 0;
-
        if (lseek(fd, seekpos, SEEK_SET) != seekpos)
        {
                elog(LOG, "mdblindwrt: lseek(%ld) failed: %m", seekpos);
@@ -578,7 +519,7 @@ mdblindwrt(RelFileNode rnode,
 
        status = SM_SUCCESS;
 
-       /* write and optionally sync the block */
+       /* write the block */
        errno = 0;
        if (write(fd, buffer, BLCKSZ) != BLCKSZ)
        {
@@ -598,54 +539,6 @@ mdblindwrt(RelFileNode rnode,
        return status;
 }
 
-/*
- *     mdmarkdirty() -- Mark the specified block "dirty" (ie, needs fsync).
- *
- *             Returns SM_SUCCESS or SM_FAIL.
- */
-int
-mdmarkdirty(Relation reln, BlockNumber blkno)
-{
-       MdfdVec    *v;
-
-       v = _mdfd_getseg(reln, blkno);
-
-       FileMarkDirty(v->mdfd_vfd);
-
-       return SM_SUCCESS;
-}
-
-/*
- *     mdblindmarkdirty() -- Mark the specified block "dirty" (ie, needs fsync).
- *
- *             We have to be able to do this using only the name and OID of
- *             the database and relation in which the block belongs.  Otherwise
- *             this is much like mdmarkdirty().  However, we do the fsync immediately
- *             rather than building md/fd datastructures to postpone it till later.
- */
-int
-mdblindmarkdirty(RelFileNode rnode,
-                                BlockNumber blkno)
-{
-       int                     status;
-       int                     fd;
-
-       fd = _mdfd_blind_getseg(rnode, blkno);
-
-       if (fd < 0)
-               return SM_FAIL;
-
-       status = SM_SUCCESS;
-
-       if (pg_fsync(fd) < 0)
-               status = SM_FAIL;
-
-       if (close(fd) < 0)
-               status = SM_FAIL;
-
-       return status;
-}
-
 /*
  *     mdnblocks() -- Get the number of blocks stored in a relation.
  *
@@ -796,61 +689,36 @@ mdtruncate(Relation reln, BlockNumber nblocks)
 /*
  *     mdcommit() -- Commit a transaction.
  *
- *             All changes to magnetic disk relations must be forced to stable
- *             storage.  This routine makes a pass over the private table of
- *             file descriptors.  Any descriptors to which we have done writes,
- *             but not synced, are synced here.
- *
  *             Returns SM_SUCCESS or SM_FAIL with errno set as appropriate.
  */
 int
-mdcommit()
+mdcommit(void)
 {
-       int                     i;
-       MdfdVec    *v;
-
-       for (i = 0; i < CurFd; i++)
-       {
-               v = &Md_fdvec[i];
-               if (v->mdfd_flags & MDFD_FREE)
-                       continue;
-               /* Sync the file entry */
-#ifndef LET_OS_MANAGE_FILESIZE
-               for (; v != (MdfdVec *) NULL; v = v->mdfd_chain)
-#else
-               if (v != (MdfdVec *) NULL)
-#endif
-               {
-                       if (FileSync(v->mdfd_vfd) < 0)
-                               return SM_FAIL;
-               }
-       }
-
+       /*
+        * We don't actually have to do anything here...
+        */
        return SM_SUCCESS;
 }
 
 /*
  *     mdabort() -- Abort a transaction.
  *
- *             Changes need not be forced to disk at transaction abort.  We mark
- *             all file descriptors as clean here.  Always returns SM_SUCCESS.
+ *             Changes need not be forced to disk at transaction abort.
  */
 int
-mdabort()
+mdabort(void)
 {
        /*
-        * We don't actually have to do anything here.  fd.c will discard
-        * fsync-needed bits in its AtEOXact_Files() routine.
+        * We don't actually have to do anything here...
         */
        return SM_SUCCESS;
 }
 
 /*
- *     mdsync() -- Sync storage.
- *
+ *     mdsync() -- Sync previous writes to stable storage.
  */
 int
-mdsync()
+mdsync(void)
 {
        sync();
        if (IsUnderPostmaster)
@@ -861,11 +729,9 @@ mdsync()
 
 /*
  *     _fdvec_alloc () -- grab a free (or new) md file descriptor vector.
- *
  */
-static
-int
-_fdvec_alloc()
+static int
+_fdvec_alloc(void)
 {
        MdfdVec    *nvec;
        int                     fdvec,
index 89396d173c9966ba5820ee10a619d8446e6fc9bf..739e938fe286e6e8c0a02929d6a331c0e80b18cb 100644 (file)
@@ -11,7 +11,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/storage/smgr/Attic/mm.c,v 1.31 2002/06/20 20:29:36 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/storage/smgr/Attic/mm.c,v 1.32 2002/08/06 02:36:34 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -81,7 +81,7 @@ static HTAB *MMCacheHT;
 static HTAB *MMRelCacheHT;
 
 int
-mminit()
+mminit(void)
 {
        char       *mmcacheblk;
        int                     mmsize = 0;
@@ -151,7 +151,7 @@ mminit()
 }
 
 int
-mmshutdown()
+mmshutdown(void)
 {
        return SM_SUCCESS;
 }
@@ -442,31 +442,16 @@ mmwrite(Relation reln, BlockNumber blocknum, char *buffer)
        return SM_SUCCESS;
 }
 
-/*
- *     mmflush() -- Synchronously write a block to stable storage.
- *
- *             For main-memory relations, this is exactly equivalent to mmwrite().
- */
-int
-mmflush(Relation reln, BlockNumber blocknum, char *buffer)
-{
-       return mmwrite(reln, blocknum, buffer);
-}
-
 /*
  *     mmblindwrt() -- Write a block to stable storage blind.
  *
- *             We have to be able to do this using only the name and OID of
- *             the database and relation in which the block belongs.
+ *             We have to be able to do this using only the rnode of the relation
+ *             in which the block belongs.  Otherwise this is much like mmwrite().
  */
 int
-mmblindwrt(char *dbstr,
-                  char *relstr,
-                  Oid dbid,
-                  Oid relid,
+mmblindwrt(RelFileNode rnode,
                   BlockNumber blkno,
-                  char *buffer,
-                  bool dofsync)
+                  char *buffer)
 {
        return SM_FAIL;
 }
@@ -512,7 +497,7 @@ mmnblocks(Relation reln)
  *             Returns SM_SUCCESS or SM_FAIL with errno set as appropriate.
  */
 int
-mmcommit()
+mmcommit(void)
 {
        return SM_SUCCESS;
 }
@@ -522,7 +507,7 @@ mmcommit()
  */
 
 int
-mmabort()
+mmabort(void)
 {
        return SM_SUCCESS;
 }
@@ -536,7 +521,7 @@ mmabort()
  *             manager will use.
  */
 int
-MMShmemSize()
+MMShmemSize(void)
 {
        int                     size = 0;
 
index a7fb23b4427167ab170645af9c461d6075b650a0..252781d9c3fbbccf436e953980635a3b2fcd4cfb 100644 (file)
@@ -11,7 +11,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/storage/smgr/smgr.c,v 1.57 2002/06/20 20:29:36 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/storage/smgr/smgr.c,v 1.58 2002/08/06 02:36:34 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -40,12 +40,8 @@ typedef struct f_smgr
                                                                                  char *buffer);
        int                     (*smgr_write) (Relation reln, BlockNumber blocknum,
                                                                                   char *buffer);
-       int                     (*smgr_flush) (Relation reln, BlockNumber blocknum,
-                                                                                  char *buffer);
        int                     (*smgr_blindwrt) (RelFileNode rnode, BlockNumber blkno,
-                                                                                         char *buffer, bool dofsync);
-       int                     (*smgr_markdirty) (Relation reln, BlockNumber blkno);
-       int                     (*smgr_blindmarkdirty) (RelFileNode, BlockNumber blkno);
+                                                                                         char *buffer);
        BlockNumber (*smgr_nblocks) (Relation reln);
        BlockNumber (*smgr_truncate) (Relation reln, BlockNumber nblocks);
        int                     (*smgr_commit) (void);  /* may be NULL */
@@ -62,15 +58,15 @@ static f_smgr smgrsw[] = {
 
        /* magnetic disk */
        {mdinit, NULL, mdcreate, mdunlink, mdextend, mdopen, mdclose,
-               mdread, mdwrite, mdflush, mdblindwrt, mdmarkdirty, mdblindmarkdirty,
+               mdread, mdwrite, mdblindwrt,
                mdnblocks, mdtruncate, mdcommit, mdabort, mdsync
        },
 
 #ifdef STABLE_MEMORY_STORAGE
        /* main memory */
        {mminit, mmshutdown, mmcreate, mmunlink, mmextend, mmopen, mmclose,
-               mmread, mmwrite, mmflush, mmblindwrt, mmmarkdirty, mmblindmarkdirty,
-       mmnblocks, NULL, mmcommit, mmabort},
+               mmread, mmwrite, mmblindwrt,
+               mmnblocks, NULL, mmcommit, mmabort, NULL},
 #endif
 };
 
@@ -110,6 +106,7 @@ typedef struct PendingRelDelete
 {
        RelFileNode relnode;            /* relation that may need to be deleted */
        int16           which;                  /* which storage manager? */
+       bool            isTemp;                 /* is it a temporary relation? */
        bool            atCommit;               /* T=delete at commit; F=delete at abort */
        struct PendingRelDelete *next;          /* linked-list link */
 } PendingRelDelete;
@@ -123,7 +120,7 @@ static PendingRelDelete *pendingDeletes = NULL; /* head of linked list */
  *
  */
 int
-smgrinit()
+smgrinit(void)
 {
        int                     i;
 
@@ -181,6 +178,7 @@ smgrcreate(int16 which, Relation reln)
                MemoryContextAlloc(TopMemoryContext, sizeof(PendingRelDelete));
        pending->relnode = reln->rd_node;
        pending->which = which;
+       pending->isTemp = reln->rd_istemp;
        pending->atCommit = false;      /* delete if abort */
        pending->next = pendingDeletes;
        pendingDeletes = pending;
@@ -208,6 +206,7 @@ smgrunlink(int16 which, Relation reln)
                MemoryContextAlloc(TopMemoryContext, sizeof(PendingRelDelete));
        pending->relnode = reln->rd_node;
        pending->which = which;
+       pending->isTemp = reln->rd_istemp;
        pending->atCommit = true;       /* delete if commit */
        pending->next = pendingDeletes;
        pendingDeletes = pending;
@@ -312,8 +311,10 @@ smgrread(int16 which, Relation reln, BlockNumber blocknum, char *buffer)
 /*
  *     smgrwrite() -- Write the supplied buffer out.
  *
- *             This is not a synchronous write -- the interface for that is
- *             smgrflush().  The buffer is written out via the appropriate
+ *             This is not a synchronous write -- the block is not necessarily
+ *             on disk at return, only dumped out to the kernel.
+ *
+ *             The buffer is written out via the appropriate
  *             storage manager.  This routine returns SM_SUCCESS or aborts
  *             the current transaction.
  */
@@ -331,23 +332,6 @@ smgrwrite(int16 which, Relation reln, BlockNumber blocknum, char *buffer)
        return status;
 }
 
-/*
- *     smgrflush() -- A synchronous smgrwrite().
- */
-int
-smgrflush(int16 which, Relation reln, BlockNumber blocknum, char *buffer)
-{
-       int                     status;
-
-       status = (*(smgrsw[which].smgr_flush)) (reln, blocknum, buffer);
-
-       if (status == SM_FAIL)
-               elog(ERROR, "cannot flush block %d of %s to stable store: %m",
-                        blocknum, RelationGetRelationName(reln));
-
-       return status;
-}
-
 /*
  *     smgrblindwrt() -- Write a page out blind.
  *
@@ -357,20 +341,18 @@ smgrflush(int16 which, Relation reln, BlockNumber blocknum, char *buffer)
  *             that has not yet committed, which created a new relation.  In
  *             this case, the buffer manager will call smgrblindwrt() with
  *             the name and OID of the database and the relation to which the
- *             buffer belongs.  Every storage manager must be able to force
- *             this page down to stable storage in this circumstance.  The
- *             write should be synchronous if dofsync is true.
+ *             buffer belongs.  Every storage manager must be able to write
+ *             this page out to stable storage in this circumstance.
  */
 int
 smgrblindwrt(int16 which,
                         RelFileNode rnode,
                         BlockNumber blkno,
-                        char *buffer,
-                        bool dofsync)
+                        char *buffer)
 {
        int                     status;
 
-       status = (*(smgrsw[which].smgr_blindwrt)) (rnode, blkno, buffer, dofsync);
+       status = (*(smgrsw[which].smgr_blindwrt)) (rnode, blkno, buffer);
 
        if (status == SM_FAIL)
                elog(ERROR, "cannot write block %d of %u/%u blind: %m",
@@ -379,53 +361,6 @@ smgrblindwrt(int16 which,
        return status;
 }
 
-/*
- *     smgrmarkdirty() -- Mark a page dirty (needs fsync).
- *
- *             Mark the specified page as needing to be fsync'd before commit.
- *             Ordinarily, the storage manager will do this implicitly during
- *             smgrwrite().  However, the buffer manager may discover that some
- *             other backend has written a buffer that we dirtied in the current
- *             transaction.  In that case, we still need to fsync the file to be
- *             sure the page is down to disk before we commit.
- */
-int
-smgrmarkdirty(int16 which,
-                         Relation reln,
-                         BlockNumber blkno)
-{
-       int                     status;
-
-       status = (*(smgrsw[which].smgr_markdirty)) (reln, blkno);
-
-       if (status == SM_FAIL)
-               elog(ERROR, "cannot mark block %d of %s: %m",
-                        blkno, RelationGetRelationName(reln));
-
-       return status;
-}
-
-/*
- *     smgrblindmarkdirty() -- Mark a page dirty, "blind".
- *
- *             Just like smgrmarkdirty, except we don't have a reldesc.
- */
-int
-smgrblindmarkdirty(int16 which,
-                                  RelFileNode rnode,
-                                  BlockNumber blkno)
-{
-       int                     status;
-
-       status = (*(smgrsw[which].smgr_blindmarkdirty)) (rnode, blkno);
-
-       if (status == SM_FAIL)
-               elog(ERROR, "cannot mark block %d of %u/%u blind: %m",
-                        blkno, rnode.tblNode, rnode.relNode);
-
-       return status;
-}
-
 /*
  *     smgrnblocks() -- Calculate the number of POSTGRES blocks in the
  *                                      supplied relation.
@@ -504,7 +439,7 @@ smgrDoPendingDeletes(bool isCommit)
                         * any in the commit case, but there can be in the abort
                         * case).
                         */
-                       DropRelFileNodeBuffers(pending->relnode);
+                       DropRelFileNodeBuffers(pending->relnode, pending->isTemp);
 
                        /*
                         * Tell the free space map to forget this relation.  It won't
@@ -531,11 +466,13 @@ smgrDoPendingDeletes(bool isCommit)
 }
 
 /*
- *     smgrcommit(), smgrabort() -- Commit or abort changes made during the
- *                                                              current transaction.
+ *     smgrcommit() -- Prepare to commit changes made during the current
+ *                                     transaction.
+ *
+ * This is called before we actually commit.
  */
 int
-smgrcommit()
+smgrcommit(void)
 {
        int                     i;
 
@@ -553,8 +490,11 @@ smgrcommit()
        return SM_SUCCESS;
 }
 
+/*
+ *     smgrabort() -- Abort changes made during the current transaction.
+ */
 int
-smgrabort()
+smgrabort(void)
 {
        int                     i;
 
@@ -572,8 +512,11 @@ smgrabort()
        return SM_SUCCESS;
 }
 
+/*
+ * Sync files to disk at checkpoint time.
+ */
 int
-smgrsync()
+smgrsync(void)
 {
        int                     i;
 
index f1ed253d7110ce9aa10420a4d68e9a222498fa80..f6c11206bd81fcde95d3bc7572d8b6f7d1f704f7 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/utils/cache/relcache.c,v 1.170 2002/08/04 18:12:15 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/utils/cache/relcache.c,v 1.171 2002/08/06 02:36:35 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -39,6 +39,7 @@
 #include "catalog/catalog.h"
 #include "catalog/catname.h"
 #include "catalog/indexing.h"
+#include "catalog/namespace.h"
 #include "catalog/pg_amop.h"
 #include "catalog/pg_amproc.h"
 #include "catalog/pg_attrdef.h"
@@ -94,13 +95,6 @@ static HTAB *RelationSysNameCache;
  */
 static HTAB *RelationNodeCache;
 
-/*
- * newlyCreatedRelns -
- *       relations created during this transaction. We need to keep track of
- *       these.
- */
-static List *newlyCreatedRelns = NIL;
-
 /*
  * This flag is false until we have prepared the critical relcache entries
  * that are needed to do indexscans on the tables read by relcache building.
@@ -865,9 +859,12 @@ RelationBuildDesc(RelationBuildDescInfo buildinfo,
        RelationSetReferenceCount(relation, 1);
 
        /*
-        * normal relations are not nailed into the cache
+        * normal relations are not nailed into the cache; nor can a pre-existing
+        * relation be new or temp.
         */
        relation->rd_isnailed = false;
+       relation->rd_isnew = false;
+       relation->rd_istemp = false;
 
        /*
         * initialize the tuple descriptor (relation->rd_att).
@@ -957,9 +954,6 @@ RelationInitIndexAccessInfo(Relation relation)
        ReleaseSysCache(tuple);
        relation->rd_index = iform;
 
-       /* this field is now kinda redundant... */
-       relation->rd_uniqueindex = iform->indisunique;
-
        /*
         * Make a copy of the pg_am entry for the index's access method
         */
@@ -1359,9 +1353,12 @@ formrdesc(const char *relationName,
        RelationSetReferenceCount(relation, 1);
 
        /*
-        * all entries built with this routine are nailed-in-cache
+        * all entries built with this routine are nailed-in-cache; none are
+        * for new or temp relations.
         */
        relation->rd_isnailed = true;
+       relation->rd_isnew = false;
+       relation->rd_istemp = false;
 
        /*
         * initialize relation tuple form
@@ -1603,7 +1600,9 @@ RelationClose(Relation relation)
        RelationDecrementReferenceCount(relation);
 
 #ifdef RELCACHE_FORCE_RELEASE
-       if (RelationHasReferenceCountZero(relation) && !relation->rd_myxactonly)
+       if (RelationHasReferenceCountZero(relation) &&
+               !relation->rd_isnew &&
+               !relation->rd_istemp)
                RelationClearRelation(relation, false);
 #endif
 }
@@ -1734,13 +1733,14 @@ RelationClearRelation(Relation relation, bool rebuild)
        {
                /*
                 * When rebuilding an open relcache entry, must preserve ref count
-                * and myxactonly flag.  Also attempt to preserve the tupledesc,
+                * and new/temp flags.  Also attempt to preserve the tupledesc,
                 * rewrite rules, and trigger substructures in place. Furthermore
-                * we save/restore rd_nblocks (in case it is a local relation)
+                * we save/restore rd_nblocks (in case it is a new/temp relation)
                 * *and* call RelationGetNumberOfBlocks (in case it isn't).
                 */
                int                     old_refcnt = relation->rd_refcnt;
-               bool            old_myxactonly = relation->rd_myxactonly;
+               bool            old_isnew = relation->rd_isnew;
+               bool            old_istemp = relation->rd_istemp;
                TupleDesc       old_att = relation->rd_att;
                RuleLock   *old_rules = relation->rd_rules;
                MemoryContext old_rulescxt = relation->rd_rulescxt;
@@ -1763,7 +1763,8 @@ RelationClearRelation(Relation relation, bool rebuild)
                                 buildinfo.i.info_id);
                }
                RelationSetReferenceCount(relation, old_refcnt);
-               relation->rd_myxactonly = old_myxactonly;
+               relation->rd_isnew = old_isnew;
+               relation->rd_istemp = old_istemp;
                if (equalTupleDescs(old_att, relation->rd_att))
                {
                        FreeTupleDesc(relation->rd_att);
@@ -1810,11 +1811,11 @@ RelationFlushRelation(Relation relation)
 {
        bool            rebuild;
 
-       if (relation->rd_myxactonly)
+       if (relation->rd_isnew || relation->rd_istemp)
        {
                /*
-                * Local rels should always be rebuilt, not flushed; the relcache
-                * entry must live until RelationPurgeLocalRelation().
+                * New and temp relcache entries must always be rebuilt, not
+                * flushed; else we'd forget those two important status bits.
                 */
                rebuild = true;
        }
@@ -1830,11 +1831,10 @@ RelationFlushRelation(Relation relation)
 }
 
 /*
- * RelationForgetRelation -
+ * RelationForgetRelation - unconditionally remove a relcache entry
  *
- *                RelationClearRelation + if the relation is myxactonly then
- *                remove the relation descriptor from the newly created
- *                relation list.
+ *                External interface for destroying a relcache entry when we
+ *                drop the relation.
  */
 void
 RelationForgetRelation(Oid rid)
@@ -1849,31 +1849,6 @@ RelationForgetRelation(Oid rid)
        if (!RelationHasReferenceCountZero(relation))
                elog(ERROR, "RelationForgetRelation: relation %u is still open", rid);
 
-       /* If local, remove from list */
-       if (relation->rd_myxactonly)
-       {
-               List       *curr;
-               List       *prev = NIL;
-
-               foreach(curr, newlyCreatedRelns)
-               {
-                       Relation        reln = lfirst(curr);
-
-                       Assert(reln != NULL && reln->rd_myxactonly);
-                       if (RelationGetRelid(reln) == rid)
-                               break;
-                       prev = curr;
-               }
-               if (curr == NIL)
-                       elog(ERROR, "Local relation %s not found in list",
-                                RelationGetRelationName(relation));
-               if (prev == NIL)
-                       newlyCreatedRelns = lnext(newlyCreatedRelns);
-               else
-                       lnext(prev) = lnext(curr);
-               pfree(curr);
-       }
-
        /* Unconditionally destroy the relcache entry */
        RelationClearRelation(relation, false);
 }
@@ -1909,7 +1884,7 @@ RelationIdInvalidateRelationCacheByRelationId(Oid relationId)
  *      and rebuild those with positive reference counts.
  *
  *      This is currently used only to recover from SI message buffer overflow,
- *      so we do not touch transaction-local relations; they cannot be targets
+ *      so we do not touch new-in-transaction relations; they cannot be targets
  *      of cross-backend SI updates (and our own updates now go through a
  *      separate linked list that isn't limited by the SI message buffer size).
  *
@@ -1940,13 +1915,13 @@ RelationCacheInvalidate(void)
        {
                relation = idhentry->reldesc;
 
-               /* Ignore xact-local relations, since they are never SI targets */
-               if (relation->rd_myxactonly)
+               /* Ignore new relations, since they are never SI targets */
+               if (relation->rd_isnew)
                        continue;
 
                relcacheInvalsReceived++;
 
-               if (RelationHasReferenceCountZero(relation))
+               if (RelationHasReferenceCountZero(relation) && !relation->rd_istemp)
                {
                        /* Delete this entry immediately */
                        RelationClearRelation(relation, false);
@@ -1968,37 +1943,16 @@ RelationCacheInvalidate(void)
 }
 
 /*
- * AtEOXactRelationCache
+ * AtEOXact_RelationCache
  *
  *     Clean up the relcache at transaction commit or abort.
- *
- *     During transaction abort, we must reset relcache entry ref counts
- *     to their normal not-in-a-transaction state.  A ref count may be
- *     too high because some routine was exited by elog() between
- *     incrementing and decrementing the count.
- *
- *     During commit, we should not have to do this, but it's useful to
- *     check that the counts are correct to catch missed relcache closes.
- *     Since that's basically a debugging thing, only pay the cost when
- *     assert checking is enabled.
- *
- *     In bootstrap mode, forget the debugging checks --- the bootstrap code
- *     expects relations to stay open across start/commit transaction calls.
  */
 void
-AtEOXactRelationCache(bool commit)
+AtEOXact_RelationCache(bool commit)
 {
        HASH_SEQ_STATUS status;
        RelIdCacheEnt *idhentry;
 
-#ifdef USE_ASSERT_CHECKING
-       if (commit && IsBootstrapProcessingMode())
-               return;
-#else
-       if (commit)
-               return;
-#endif
-
        hash_seq_init(&status, RelationIdCache);
 
        while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
@@ -2006,11 +1960,45 @@ AtEOXactRelationCache(bool commit)
                Relation        relation = idhentry->reldesc;
                int                     expected_refcnt;
 
+               /*
+                * Is it a relation created in the current transaction?
+                *
+                * During commit, reset the flag to false, since we are now out of the
+                * creating transaction.  During abort, simply delete the relcache
+                * entry --- it isn't interesting any longer.
+                */
+               if (relation->rd_isnew)
+               {
+                       if (commit)
+                               relation->rd_isnew = false;
+                       else
+                       {
+                               RelationClearRelation(relation, false);
+                               continue;
+                       }
+               }
+
+               /*
+                * During transaction abort, we must also reset relcache entry ref
+                * counts to their normal not-in-a-transaction state.  A ref count may
+                * be too high because some routine was exited by elog() between
+                * incrementing and decrementing the count.
+                *
+                * During commit, we should not have to do this, but it's still useful
+                * to check that the counts are correct to catch missed relcache
+                * closes.
+                *
+                * In bootstrap mode, do NOT reset the refcnt nor complain that it's
+                * nonzero --- the bootstrap code expects relations to stay open
+                * across start/commit transaction calls.  (That seems bogus, but it's
+                * not worth fixing.)
+                */
                expected_refcnt = relation->rd_isnailed ? 1 : 0;
 
                if (commit)
                {
-                       if (relation->rd_refcnt != expected_refcnt)
+                       if (relation->rd_refcnt != expected_refcnt &&
+                               !IsBootstrapProcessingMode())
                        {
                                elog(WARNING, "Relcache reference leak: relation \"%s\" has refcnt %d instead of %d",
                                         RelationGetRelationName(relation),
@@ -2055,10 +2043,11 @@ RelationBuildLocalRelation(const char *relname,
        oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
 
        /*
-        * allocate a new relation descriptor.
+        * allocate a new relation descriptor and fill in basic state fields.
         */
        rel = (Relation) palloc(sizeof(RelationData));
        MemSet((char *) rel, 0, sizeof(RelationData));
+
        rel->rd_targblock = InvalidBlockNumber;
 
        /* make sure relation is marked as having no open file yet */
@@ -2066,6 +2055,12 @@ RelationBuildLocalRelation(const char *relname,
 
        RelationSetReferenceCount(rel, 1);
 
+       /* it's being created in this transaction */
+       rel->rd_isnew = true;
+
+       /* is it a temporary relation? */
+       rel->rd_istemp = isTempNamespace(relnamespace);
+
        /*
         * nail the reldesc if this is a bootstrap create reln and we may need
         * it in the cache later on in the bootstrap process so we don't ever
@@ -2121,17 +2116,6 @@ RelationBuildLocalRelation(const char *relname,
         */
        RelationCacheInsert(rel);
 
-       /*
-        * we've just created the relation. It is invisible to anyone else
-        * before the transaction is committed. Setting rd_myxactonly allows
-        * us to use the local buffer manager for select/insert/etc before the
-        * end of transaction. (We also need to keep track of relations
-        * created during a transaction and do the necessary clean up at the
-        * end of the transaction.)                             - ay 3/95
-        */
-       rel->rd_myxactonly = true;
-       newlyCreatedRelns = lcons(rel, newlyCreatedRelns);
-
        /*
         * done building relcache entry.
         */
@@ -2140,38 +2124,6 @@ RelationBuildLocalRelation(const char *relname,
        return rel;
 }
 
-/*
- * RelationPurgeLocalRelation -
- *       find all the Relation descriptors marked rd_myxactonly and reset them.
- *       This should be called at the end of a transaction (commit/abort) when
- *       the "local" relations will become visible to others and the multi-user
- *       buffer pool should be used.
- */
-void
-RelationPurgeLocalRelation(bool xactCommitted)
-{
-       while (newlyCreatedRelns)
-       {
-               List       *l = newlyCreatedRelns;
-               Relation        reln = lfirst(l);
-
-               newlyCreatedRelns = lnext(newlyCreatedRelns);
-               pfree(l);
-
-               Assert(reln != NULL && reln->rd_myxactonly);
-
-               reln->rd_myxactonly = false;    /* mark it not on list anymore */
-
-               /*
-                * XXX while we clearly must throw out new Relation entries at
-                * xact abort, it's not clear why we need to do it at commit.
-                * Could this be improved?
-                */
-               if (!IsBootstrapProcessingMode())
-                       RelationClearRelation(reln, false);
-       }
-}
-
 /*
  *             RelationCacheInitialize
  *
index 4b661f53d3249ad8d370133e8724f8d556282210..132fef26c882d7529b9ff527aaace3bb13e7fc5b 100644 (file)
@@ -6,7 +6,7 @@
  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: xlog.h,v 1.33 2002/08/05 01:24:16 thomas Exp $
+ * $Id: xlog.h,v 1.34 2002/08/06 02:36:35 tgl Exp $
  */
 #ifndef XLOG_H
 #define XLOG_H
@@ -182,6 +182,7 @@ extern StartUpID ThisStartUpID; /* current SUI */
 extern bool InRecovery;
 extern XLogRecPtr MyLastRecPtr;
 extern bool MyXactMadeXLogEntry;
+extern bool MyXactMadeTempRelUpdate;
 extern XLogRecPtr ProcLastRecEnd;
 
 /* these variables are GUC parameters related to XLOG */
index bd0907ed95281008d2b3af078928c45f11e8255f..6f8c4ad841d012ce3242a32fad28716f85d7d784 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: buf_internals.h,v 1.57 2002/06/20 20:29:52 momjian Exp $
+ * $Id: buf_internals.h,v 1.58 2002/08/06 02:36:35 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -149,6 +149,15 @@ typedef struct _bmtrace
 #endif   /* BMTRACE */
 
 
+/* counters in buf_init.c */
+extern long int        ReadBufferCount;
+extern long int        ReadLocalBufferCount;
+extern long int        BufferHitCount;
+extern long int        LocalBufferHitCount;
+extern long int        BufferFlushCount;
+extern long int        LocalBufferFlushCount;
+
+
 /*
  * Bufmgr Interface:
  */
@@ -177,8 +186,6 @@ extern BufferDesc *LocalBufferDescriptors;
 extern BufferDesc *LocalBufferAlloc(Relation reln, BlockNumber blockNum,
                                 bool *foundPtr);
 extern void    WriteLocalBuffer(Buffer buffer, bool release);
-extern int     FlushLocalBuffer(Buffer buffer, bool sync, bool release);
-extern void LocalBufferSync(void);
-extern void ResetLocalBufferPool(void);
+extern void AtEOXact_LocalBuffers(bool isCommit);
 
 #endif   /* BUFMGR_INTERNALS_H */
index a6952fa1f69cb921ab36096469393e62a5c25dfa..7aebaa73da62cd456ea82bbedc64ff94a999db94 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: bufmgr.h,v 1.61 2002/07/02 05:47:37 momjian Exp $
+ * $Id: bufmgr.h,v 1.62 2002/08/06 02:36:35 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -152,20 +152,18 @@ extern void       WriteBuffer(Buffer buffer);
 extern void    WriteNoReleaseBuffer(Buffer buffer);
 extern Buffer ReleaseAndReadBuffer(Buffer buffer, Relation relation,
                                         BlockNumber blockNum);
-extern int     FlushBuffer(Buffer buffer, bool sync, bool release);
 
 extern void InitBufferPool(void);
 extern void InitBufferPoolAccess(void);
 extern char *ShowBufferUsage(void);
 extern void ResetBufferUsage(void);
-extern void ResetBufferPool(bool isCommit);
-extern bool BufferPoolCheckLeak(void);
+extern void AtEOXact_Buffers(bool isCommit);
 extern void FlushBufferPool(void);
 extern BlockNumber BufferGetBlockNumber(Buffer buffer);
 extern BlockNumber RelationGetNumberOfBlocks(Relation relation);
 extern int     FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock);
 extern void DropRelationBuffers(Relation rel);
-extern void DropRelFileNodeBuffers(RelFileNode rnode);
+extern void DropRelFileNodeBuffers(RelFileNode rnode, bool istemp);
 extern void DropBuffers(Oid dbid);
 #ifdef NOT_USED
 extern void PrintPinnedBufs(void);
index 13f5100c66181f8110ef59dcb1a150cc06363cc4..a13cec41ea6827dbc137737e9c15ae6a38f44923 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: fd.h,v 1.35 2002/06/20 20:29:52 momjian Exp $
+ * $Id: fd.h,v 1.36 2002/08/06 02:36:35 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -62,8 +62,6 @@ extern int    FileRead(File file, char *buffer, int amount);
 extern int     FileWrite(File file, char *buffer, int amount);
 extern long FileSeek(File file, long offset, int whence);
 extern int     FileTruncate(File file, long offset);
-extern int     FileSync(File file);
-extern void FileMarkDirty(File file);
 
 /* Operations that allow use of regular stdio --- USE WITH CAUTION */
 extern FILE *AllocateFile(char *name, char *mode);
index 474bfbc9326cb2e4fbcfee5f6b16565779628927..d5a96ea0c9979af6130ea0df0454c9243e90eb79 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: smgr.h,v 1.35 2002/06/20 20:29:52 momjian Exp $
+ * $Id: smgr.h,v 1.36 2002/08/06 02:36:35 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -36,13 +36,8 @@ extern int smgrread(int16 which, Relation reln, BlockNumber blocknum,
                 char *buffer);
 extern int smgrwrite(int16 which, Relation reln, BlockNumber blocknum,
                  char *buffer);
-extern int smgrflush(int16 which, Relation reln, BlockNumber blocknum,
-                 char *buffer);
 extern int smgrblindwrt(int16 which, RelFileNode rnode,
-                        BlockNumber blkno, char *buffer, bool dofsync);
-extern int smgrblindmarkdirty(int16 which, RelFileNode rnode,
-                                  BlockNumber blkno);
-extern int     smgrmarkdirty(int16 which, Relation reln, BlockNumber blkno);
+                        BlockNumber blkno, char *buffer);
 extern BlockNumber smgrnblocks(int16 which, Relation reln);
 extern BlockNumber smgrtruncate(int16 which, Relation reln,
                         BlockNumber nblocks);
@@ -67,11 +62,7 @@ extern int   mdopen(Relation reln);
 extern int     mdclose(Relation reln);
 extern int     mdread(Relation reln, BlockNumber blocknum, char *buffer);
 extern int     mdwrite(Relation reln, BlockNumber blocknum, char *buffer);
-extern int     mdflush(Relation reln, BlockNumber blocknum, char *buffer);
-extern int     mdmarkdirty(Relation reln, BlockNumber blkno);
-extern int mdblindwrt(RelFileNode rnode, BlockNumber blkno,
-                  char *buffer, bool dofsync);
-extern int     mdblindmarkdirty(RelFileNode rnode, BlockNumber blkno);
+extern int     mdblindwrt(RelFileNode rnode, BlockNumber blkno, char *buffer);
 extern BlockNumber mdnblocks(Relation reln);
 extern BlockNumber mdtruncate(Relation reln, BlockNumber nblocks);
 extern int     mdcommit(void);
@@ -87,13 +78,7 @@ extern int   mmopen(Relation reln);
 extern int     mmclose(Relation reln);
 extern int     mmread(Relation reln, BlockNumber blocknum, char *buffer);
 extern int     mmwrite(Relation reln, BlockNumber blocknum, char *buffer);
-extern int     mmflush(Relation reln, BlockNumber blocknum, char *buffer);
-extern int mmblindwrt(char *dbname, char *relname, Oid dbid, Oid relid,
-                  BlockNumber blkno, char *buffer,
-                  bool dofsync);
-extern int     mmmarkdirty(Relation reln, BlockNumber blkno);
-extern int mmblindmarkdirty(char *dbname, char *relname, Oid dbid, Oid relid,
-                                BlockNumber blkno);
+extern int     mmblindwrt(RelFileNode rnode, BlockNumber blkno, char *buffer);
 extern BlockNumber mmnblocks(Relation reln);
 extern BlockNumber mmtruncate(Relation reln, BlockNumber nblocks);
 extern int     mmcommit(void);
index 3dce1757ed432067881f544b95e3c42494bfcd18..d913f28aba362a36de1aff5ad4cbf6ef55983d88 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: rel.h,v 1.60 2002/06/20 20:29:53 momjian Exp $
+ * $Id: rel.h,v 1.61 2002/08/06 02:36:35 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -112,10 +112,10 @@ typedef struct RelationData
        BlockNumber rd_targblock;       /* current insertion target block, or
                                                                 * InvalidBlockNumber */
        int                     rd_refcnt;              /* reference count */
-       bool            rd_myxactonly;  /* rel uses the local buffer mgr */
+       bool            rd_isnew;               /* rel was created in current xact */
+       bool            rd_istemp;              /* rel uses the local buffer mgr */
        bool            rd_isnailed;    /* rel is nailed in cache */
        bool            rd_indexfound;  /* true if rd_indexlist is valid */
-       bool            rd_uniqueindex; /* true if rel is a UNIQUE index */
        Form_pg_class rd_rel;           /* RELATION tuple */
        TupleDesc       rd_att;                 /* tuple descriptor */
        Oid                     rd_id;                  /* relation's object id */
index fd22a65296d8637b0e9f96cc9581aa76dc6ed17a..c04952849129f22f4722f1d5ba7d7d75abdd5495 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: relcache.h,v 1.33 2002/08/02 22:36:05 tgl Exp $
+ * $Id: relcache.h,v 1.34 2002/08/06 02:36:35 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -61,9 +61,7 @@ extern void RelationIdInvalidateRelationCacheByRelationId(Oid relationId);
 
 extern void RelationCacheInvalidate(void);
 
-extern void RelationPurgeLocalRelation(bool xactComitted);
-
-extern void AtEOXactRelationCache(bool commit);
+extern void AtEOXact_RelationCache(bool commit);
 
 /*
  * Routines to help manage rebuilding of relcache init file