From 499abb0c0f21cb861c5af1d49a06469f3cfcc1eb Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sat, 29 Sep 2001 04:02:27 +0000 Subject: [PATCH] Implement new 'lightweight lock manager' that's intermediate between existing lock manager and spinlocks: it understands exclusive vs shared lock but has few other fancy features. Replace most uses of spinlocks with lightweight locks. All remaining uses of spinlocks have very short lock hold times (a few dozen instructions), so tweak spinlock backoff code to work efficiently given this assumption. All per my proposal on pghackers 26-Sep-01. --- doc/src/sgml/wal.sgml | 4 +- src/backend/access/transam/clog.c | 85 ++- src/backend/access/transam/varsup.c | 27 +- src/backend/access/transam/xact.c | 25 +- src/backend/access/transam/xlog.c | 325 ++++++------ src/backend/bootstrap/bootparse.y | 3 +- src/backend/bootstrap/bootstrap.c | 43 +- src/backend/commands/vacuumlazy.c | 4 +- src/backend/storage/buffer/README | 12 +- src/backend/storage/buffer/buf_init.c | 24 +- src/backend/storage/buffer/buf_table.c | 5 +- src/backend/storage/buffer/bufmgr.c | 327 +++++------- src/backend/storage/buffer/freelist.c | 3 +- src/backend/storage/freespace/freespace.c | 32 +- src/backend/storage/ipc/ipc.c | 9 +- src/backend/storage/ipc/ipci.c | 20 +- src/backend/storage/ipc/shmem.c | 145 +++--- src/backend/storage/ipc/sinval.c | 65 ++- src/backend/storage/ipc/sinvaladt.c | 12 +- src/backend/storage/lmgr/Makefile | 4 +- src/backend/storage/lmgr/README | 47 +- src/backend/storage/lmgr/deadlock.c | 6 +- src/backend/storage/lmgr/lock.c | 72 ++- src/backend/storage/lmgr/lwlock.c | 483 ++++++++++++++++++ src/backend/storage/lmgr/proc.c | 270 ++++++---- src/backend/storage/lmgr/s_lock.c | 120 ++--- src/backend/storage/lmgr/spin.c | 258 +--------- src/backend/storage/smgr/mm.c | 88 ++-- src/backend/storage/smgr/smgr.c | 3 +- src/backend/utils/init/postinit.c | 9 +- src/backend/utils/misc/guc.c | 4 +- src/backend/utils/misc/postgresql.conf.sample | 2 +- src/include/access/clog.h | 5 +- src/include/access/transam.h | 6 +- src/include/miscadmin.h | 6 +- src/include/storage/buf_internals.h | 18 +- src/include/storage/freespace.h | 5 +- src/include/storage/ipc.h | 29 +- src/include/storage/lock.h | 28 +- src/include/storage/lwlock.h | 69 +++ src/include/storage/proc.h | 44 +- src/include/storage/s_lock.h | 91 ++-- src/include/storage/shmem.h | 11 +- src/include/storage/sinval.h | 9 +- src/include/storage/smgr.h | 6 +- src/include/storage/spin.h | 73 ++- 46 files changed, 1588 insertions(+), 1348 deletions(-) create mode 100644 src/backend/storage/lmgr/lwlock.c create mode 100644 src/include/storage/lwlock.h diff --git a/doc/src/sgml/wal.sgml b/doc/src/sgml/wal.sgml index 16f4e6c6eb..3314088c1c 100644 --- a/doc/src/sgml/wal.sgml +++ b/doc/src/sgml/wal.sgml @@ -1,4 +1,4 @@ - + Write-Ahead Logging (<acronym>WAL</acronym>) @@ -146,7 +146,7 @@ The WAL buffers and control structure are in shared memory, and are handled by the backends; they are protected - by spinlocks. The demand on shared memory is dependent on the + by lightweight locks. The demand on shared memory is dependent on the number of buffers; the default size of the WAL buffers is 64 kB. diff --git a/src/backend/access/transam/clog.c b/src/backend/access/transam/clog.c index a403838bd7..cd83da93ea 100644 --- a/src/backend/access/transam/clog.c +++ b/src/backend/access/transam/clog.c @@ -13,7 +13,7 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Header: /cvsroot/pgsql/src/backend/access/transam/clog.c,v 1.3 2001/08/26 16:55:59 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/transam/clog.c,v 1.4 2001/09/29 04:02:21 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -27,7 +27,7 @@ #include #include "access/clog.h" -#include "storage/s_lock.h" +#include "storage/lwlock.h" #include "miscadmin.h" @@ -74,8 +74,8 @@ * The management algorithm is straight LRU except that we will never swap * out the latest page (since we know it's going to be hit again eventually). * - * We use an overall spinlock to protect the shared data structures, plus - * per-buffer spinlocks that synchronize I/O for each buffer. A process + * We use an overall LWLock to protect the shared data structures, plus + * per-buffer LWLocks that synchronize I/O for each buffer. A process * that is reading in or writing out a page buffer does not hold the control * lock, only the per-buffer lock for the buffer it is working on. * @@ -105,10 +105,6 @@ * by setting the page's state from WRITE_IN_PROGRESS to DIRTY. The writing * process must notice this and not mark the page CLEAN when it's done. * - * XXX it's probably okay to use a spinlock for the control lock, since - * that lock is only held for very short operations. It'd be nice to use - * some other form of lock for the per-buffer I/O locks, however. - * * XLOG interactions: this module generates an XLOG record whenever a new * CLOG page is initialized to zeroes. Other writes of CLOG come from * recording of transaction commit or abort in xact.c, which generates its @@ -121,7 +117,6 @@ * synchronization already. *---------- */ -#define NUM_CLOG_BUFFERS 8 typedef enum { @@ -153,13 +148,17 @@ typedef struct ClogCtlData * swapping out the latest page. */ int latest_page_number; - - slock_t control_lck; /* Lock for ClogCtlData itself */ - slock_t buffer_lck[NUM_CLOG_BUFFERS]; /* Per-buffer I/O locks */ } ClogCtlData; static ClogCtlData *ClogCtl = NULL; +/* + * ClogBufferLocks is set during CLOGShmemInit and does not change thereafter. + * The value is automatically inherited by backends via fork, and + * doesn't need to be in shared memory. + */ +static LWLockId ClogBufferLocks[NUM_CLOG_BUFFERS]; /* Per-buffer I/O locks */ + /* * ClogDir is set during CLOGShmemInit and does not change thereafter. * The value is automatically inherited by backends via fork, and @@ -211,7 +210,7 @@ TransactionIdSetStatus(TransactionId xid, XidStatus status) Assert(status == TRANSACTION_STATUS_COMMITTED || status == TRANSACTION_STATUS_ABORTED); - S_LOCK(&(ClogCtl->control_lck)); + LWLockAcquire(CLogControlLock, LW_EXCLUSIVE); slotno = ReadCLOGPage(pageno); byteptr = ClogCtl->page_buffer[slotno] + byteno; @@ -224,7 +223,7 @@ TransactionIdSetStatus(TransactionId xid, XidStatus status) ClogCtl->page_status[slotno] = CLOG_PAGE_DIRTY; - S_UNLOCK(&(ClogCtl->control_lck)); + LWLockRelease(CLogControlLock); } /* @@ -243,14 +242,14 @@ TransactionIdGetStatus(TransactionId xid) char *byteptr; XidStatus status; - S_LOCK(&(ClogCtl->control_lck)); + LWLockAcquire(CLogControlLock, LW_EXCLUSIVE); slotno = ReadCLOGPage(pageno); byteptr = ClogCtl->page_buffer[slotno] + byteno; status = (*byteptr >> bshift) & CLOG_XACT_BITMASK; - S_UNLOCK(&(ClogCtl->control_lck)); + LWLockRelease(CLogControlLock); return status; } @@ -283,15 +282,13 @@ CLOGShmemInit(void) memset(ClogCtl, 0, sizeof(ClogCtlData)); - S_INIT_LOCK(&(ClogCtl->control_lck)); - bufptr = ((char *) ClogCtl) + sizeof(ClogCtlData); for (slotno = 0; slotno < NUM_CLOG_BUFFERS; slotno++) { ClogCtl->page_buffer[slotno] = bufptr; ClogCtl->page_status[slotno] = CLOG_PAGE_EMPTY; - S_INIT_LOCK(&(ClogCtl->buffer_lck[slotno])); + ClogBufferLocks[slotno] = LWLockAssign(); bufptr += CLOG_BLCKSZ; } @@ -312,7 +309,7 @@ BootStrapCLOG(void) { int slotno; - S_LOCK(&(ClogCtl->control_lck)); + LWLockAcquire(CLogControlLock, LW_EXCLUSIVE); /* Create and zero the first page of the commit log */ slotno = ZeroCLOGPage(0, false); @@ -321,7 +318,7 @@ BootStrapCLOG(void) WriteCLOGPage(slotno); Assert(ClogCtl->page_status[slotno] == CLOG_PAGE_CLEAN); - S_UNLOCK(&(ClogCtl->control_lck)); + LWLockRelease(CLogControlLock); } /* @@ -411,8 +408,8 @@ ReadCLOGPage(int pageno) ClogCtl->page_lru_count[slotno] = 0; /* Release shared lock, grab per-buffer lock instead */ - S_UNLOCK(&(ClogCtl->control_lck)); - S_LOCK(&(ClogCtl->buffer_lck[slotno])); + LWLockRelease(CLogControlLock); + LWLockAcquire(ClogBufferLocks[slotno], LW_EXCLUSIVE); /* * Check to see if someone else already did the read, or took the @@ -421,8 +418,8 @@ ReadCLOGPage(int pageno) if (ClogCtl->page_number[slotno] != pageno || ClogCtl->page_status[slotno] != CLOG_PAGE_READ_IN_PROGRESS) { - S_UNLOCK(&(ClogCtl->buffer_lck[slotno])); - S_LOCK(&(ClogCtl->control_lck)); + LWLockRelease(ClogBufferLocks[slotno]); + LWLockAcquire(CLogControlLock, LW_EXCLUSIVE); continue; } @@ -430,14 +427,14 @@ ReadCLOGPage(int pageno) CLOGPhysicalReadPage(pageno, slotno); /* Re-acquire shared control lock and update page state */ - S_LOCK(&(ClogCtl->control_lck)); + LWLockAcquire(CLogControlLock, LW_EXCLUSIVE); Assert(ClogCtl->page_number[slotno] == pageno && ClogCtl->page_status[slotno] == CLOG_PAGE_READ_IN_PROGRESS); ClogCtl->page_status[slotno] = CLOG_PAGE_CLEAN; - S_UNLOCK(&(ClogCtl->buffer_lck[slotno])); + LWLockRelease(ClogBufferLocks[slotno]); ClogRecentlyUsed(slotno); return slotno; @@ -468,8 +465,8 @@ WriteCLOGPage(int slotno) pageno = ClogCtl->page_number[slotno]; /* Release shared lock, grab per-buffer lock instead */ - S_UNLOCK(&(ClogCtl->control_lck)); - S_LOCK(&(ClogCtl->buffer_lck[slotno])); + LWLockRelease(CLogControlLock); + LWLockAcquire(ClogBufferLocks[slotno], LW_EXCLUSIVE); /* * Check to see if someone else already did the write, or took the @@ -482,8 +479,8 @@ WriteCLOGPage(int slotno) (ClogCtl->page_status[slotno] != CLOG_PAGE_DIRTY && ClogCtl->page_status[slotno] != CLOG_PAGE_WRITE_IN_PROGRESS)) { - S_UNLOCK(&(ClogCtl->buffer_lck[slotno])); - S_LOCK(&(ClogCtl->control_lck)); + LWLockRelease(ClogBufferLocks[slotno]); + LWLockAcquire(CLogControlLock, LW_EXCLUSIVE); return; } @@ -504,7 +501,7 @@ WriteCLOGPage(int slotno) CLOGPhysicalWritePage(pageno, slotno); /* Re-acquire shared control lock and update page state */ - S_LOCK(&(ClogCtl->control_lck)); + LWLockAcquire(CLogControlLock, LW_EXCLUSIVE); Assert(ClogCtl->page_number[slotno] == pageno && (ClogCtl->page_status[slotno] == CLOG_PAGE_WRITE_IN_PROGRESS || @@ -514,7 +511,7 @@ WriteCLOGPage(int slotno) if (ClogCtl->page_status[slotno] == CLOG_PAGE_WRITE_IN_PROGRESS) ClogCtl->page_status[slotno] = CLOG_PAGE_CLEAN; - S_UNLOCK(&(ClogCtl->buffer_lck[slotno])); + LWLockRelease(ClogBufferLocks[slotno]); } /* @@ -714,7 +711,7 @@ ShutdownCLOG(void) { int slotno; - S_LOCK(&(ClogCtl->control_lck)); + LWLockAcquire(CLogControlLock, LW_EXCLUSIVE); for (slotno = 0; slotno < NUM_CLOG_BUFFERS; slotno++) { @@ -723,7 +720,7 @@ ShutdownCLOG(void) ClogCtl->page_status[slotno] == CLOG_PAGE_CLEAN); } - S_UNLOCK(&(ClogCtl->control_lck)); + LWLockRelease(CLogControlLock); } /* @@ -734,7 +731,7 @@ CheckPointCLOG(void) { int slotno; - S_LOCK(&(ClogCtl->control_lck)); + LWLockAcquire(CLogControlLock, LW_EXCLUSIVE); for (slotno = 0; slotno < NUM_CLOG_BUFFERS; slotno++) { @@ -745,7 +742,7 @@ CheckPointCLOG(void) */ } - S_UNLOCK(&(ClogCtl->control_lck)); + LWLockRelease(CLogControlLock); } @@ -772,12 +769,12 @@ ExtendCLOG(TransactionId newestXact) pageno = TransactionIdToPage(newestXact); - S_LOCK(&(ClogCtl->control_lck)); + LWLockAcquire(CLogControlLock, LW_EXCLUSIVE); /* Zero the page and make an XLOG entry about it */ ZeroCLOGPage(pageno, true); - S_UNLOCK(&(ClogCtl->control_lck)); + LWLockRelease(CLogControlLock); } @@ -819,7 +816,7 @@ TruncateCLOG(TransactionId oldestXact) * should have been flushed already during the checkpoint, we're * just being extra careful here.) */ - S_LOCK(&(ClogCtl->control_lck)); + LWLockAcquire(CLogControlLock, LW_EXCLUSIVE); restart:; /* @@ -830,7 +827,7 @@ restart:; */ if (CLOGPagePrecedes(ClogCtl->latest_page_number, cutoffPage)) { - S_UNLOCK(&(ClogCtl->control_lck)); + LWLockRelease(CLogControlLock); elog(LOG, "unable to truncate commit log: apparent wraparound"); return; } @@ -861,7 +858,7 @@ restart:; goto restart; } - S_UNLOCK(&(ClogCtl->control_lck)); + LWLockRelease(CLogControlLock); /* Now we can remove the old CLOG segment(s) */ (void) ScanCLOGDirectory(cutoffPage, true); @@ -974,13 +971,13 @@ clog_redo(XLogRecPtr lsn, XLogRecord *record) memcpy(&pageno, XLogRecGetData(record), sizeof(int)); - S_LOCK(&(ClogCtl->control_lck)); + LWLockAcquire(CLogControlLock, LW_EXCLUSIVE); slotno = ZeroCLOGPage(pageno, false); WriteCLOGPage(slotno); Assert(ClogCtl->page_status[slotno] == CLOG_PAGE_CLEAN); - S_UNLOCK(&(ClogCtl->control_lck)); + LWLockRelease(CLogControlLock); } } diff --git a/src/backend/access/transam/varsup.c b/src/backend/access/transam/varsup.c index 6b25e0a801..048080a180 100644 --- a/src/backend/access/transam/varsup.c +++ b/src/backend/access/transam/varsup.c @@ -6,7 +6,7 @@ * Copyright (c) 2000, PostgreSQL Global Development Group * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/transam/varsup.c,v 1.45 2001/08/25 18:52:41 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/transam/varsup.c,v 1.46 2001/09/29 04:02:21 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -15,16 +15,13 @@ #include "access/clog.h" #include "access/transam.h" +#include "storage/ipc.h" #include "storage/proc.h" /* Number of OIDs to prefetch (preallocate) per XLOG write */ #define VAR_OID_PREFETCH 8192 -/* Spinlocks for serializing generation of XIDs and OIDs, respectively */ -SPINLOCK XidGenLockId; -SPINLOCK OidGenLockId; - /* pointer to "variable cache" in shared memory (set up by shmem.c) */ VariableCache ShmemVariableCache = NULL; @@ -44,7 +41,7 @@ GetNewTransactionId(void) if (AMI_OVERRIDE) return BootstrapTransactionId; - SpinAcquire(XidGenLockId); + LWLockAcquire(XidGenLock, LW_EXCLUSIVE); xid = ShmemVariableCache->nextXid; @@ -83,7 +80,7 @@ GetNewTransactionId(void) if (MyProc != (PROC *) NULL) MyProc->xid = xid; - SpinRelease(XidGenLockId); + LWLockRelease(XidGenLock); return xid; } @@ -103,9 +100,9 @@ ReadNewTransactionId(void) if (AMI_OVERRIDE) return BootstrapTransactionId; - SpinAcquire(XidGenLockId); + LWLockAcquire(XidGenLock, LW_SHARED); xid = ShmemVariableCache->nextXid; - SpinRelease(XidGenLockId); + LWLockRelease(XidGenLock); return xid; } @@ -122,7 +119,7 @@ GetNewObjectId(void) { Oid result; - SpinAcquire(OidGenLockId); + LWLockAcquire(OidGenLock, LW_EXCLUSIVE); /* * Check for wraparound of the OID counter. We *must* not return 0 @@ -149,7 +146,7 @@ GetNewObjectId(void) (ShmemVariableCache->nextOid)++; (ShmemVariableCache->oidCount)--; - SpinRelease(OidGenLockId); + LWLockRelease(OidGenLock); lastSeenOid = result; @@ -162,12 +159,12 @@ CheckMaxObjectId(Oid assigned_oid) if (lastSeenOid != InvalidOid && assigned_oid < lastSeenOid) return; - SpinAcquire(OidGenLockId); + LWLockAcquire(OidGenLock, LW_EXCLUSIVE); if (assigned_oid < ShmemVariableCache->nextOid) { lastSeenOid = ShmemVariableCache->nextOid - 1; - SpinRelease(OidGenLockId); + LWLockRelease(OidGenLock); return; } @@ -178,7 +175,7 @@ CheckMaxObjectId(Oid assigned_oid) ShmemVariableCache->oidCount -= assigned_oid - ShmemVariableCache->nextOid + 1; ShmemVariableCache->nextOid = assigned_oid + 1; - SpinRelease(OidGenLockId); + LWLockRelease(OidGenLock); return; } @@ -192,5 +189,5 @@ CheckMaxObjectId(Oid assigned_oid) ShmemVariableCache->nextOid = assigned_oid + 1; ShmemVariableCache->oidCount = VAR_OID_PREFETCH - 1; - SpinRelease(OidGenLockId); + LWLockRelease(OidGenLock); } diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index 6b0d4de720..b7e13ec0fc 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/transam/xact.c,v 1.110 2001/09/28 08:08:57 thomas Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/transam/xact.c,v 1.111 2001/09/29 04:02:21 tgl Exp $ * * NOTES * Transaction aborts can now occur two ways: @@ -965,7 +965,7 @@ CommitTransaction(void) * this must be done _before_ releasing locks we hold and _after_ * RecordTransactionCommit. * - * SpinAcquire(SInvalLock) is required: UPDATE with xid 0 is blocked + * LWLockAcquire(SInvalLock) is required: UPDATE with xid 0 is blocked * by xid 1' UPDATE, xid 1 is doing commit while xid 2 gets snapshot - * if xid 2' GetSnapshotData sees xid 1 as running then it must see * xid 0 as running as well or it will see two tuple versions - one @@ -975,10 +975,10 @@ CommitTransaction(void) if (MyProc != (PROC *) NULL) { /* Lock SInvalLock because that's what GetSnapshotData uses. */ - SpinAcquire(SInvalLock); + LWLockAcquire(SInvalLock, LW_EXCLUSIVE); MyProc->xid = InvalidTransactionId; MyProc->xmin = InvalidTransactionId; - SpinRelease(SInvalLock); + LWLockRelease(SInvalLock); } /* @@ -1030,12 +1030,15 @@ AbortTransaction(void) HOLD_INTERRUPTS(); /* - * Release any spinlocks or buffer context locks we might be holding - * as quickly as possible. (Real locks, however, must be held till we - * finish aborting.) Releasing spinlocks is critical since we might - * try to grab them again while cleaning up! + * Release any LW locks we might be holding as quickly as possible. + * (Regular locks, however, must be held till we finish aborting.) + * Releasing LW locks is critical since we might try to grab them again + * while cleaning up! */ - ProcReleaseSpins(NULL); + LWLockReleaseAll(); + + /* Clean up buffer I/O and buffer context locks, too */ + AbortBufferIO(); UnlockBuffers(); /* @@ -1081,10 +1084,10 @@ AbortTransaction(void) if (MyProc != (PROC *) NULL) { /* Lock SInvalLock because that's what GetSnapshotData uses. */ - SpinAcquire(SInvalLock); + LWLockAcquire(SInvalLock, LW_EXCLUSIVE); MyProc->xid = InvalidTransactionId; MyProc->xmin = InvalidTransactionId; - SpinRelease(SInvalLock); + LWLockRelease(SInvalLock); } RelationPurgeLocalRelation(false); diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 56a0e2ba6b..057ee72d55 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Header: /cvsroot/pgsql/src/backend/access/transam/xlog.c,v 1.77 2001/09/26 20:24:02 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/transam/xlog.c,v 1.78 2001/09/29 04:02:21 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -33,11 +33,11 @@ #include "access/xlogutils.h" #include "catalog/catversion.h" #include "catalog/pg_control.h" -#include "storage/sinval.h" +#include "storage/bufpage.h" +#include "storage/lwlock.h" #include "storage/proc.h" +#include "storage/sinval.h" #include "storage/spin.h" -#include "storage/s_lock.h" -#include "storage/bufpage.h" #include "utils/builtins.h" #include "utils/relcache.h" #include "utils/selfuncs.h" @@ -86,11 +86,6 @@ #endif -/* Max time to wait to acquire XLog activity locks */ -#define XLOG_LOCK_TIMEOUT (5*60*1000000) /* 5 minutes */ -/* Max time to wait to acquire checkpoint lock */ -#define CHECKPOINT_LOCK_TIMEOUT (20*60*1000000) /* 20 minutes */ - /* User-settable parameters */ int CheckPointSegments = 3; int XLOGbuffers = 8; @@ -155,13 +150,10 @@ static XLogRecPtr ProcLastRecPtr = {0, 0}; * (which is almost but not quite the same as a pointer to the most recent * CHECKPOINT record). We update this from the shared-memory copy, * XLogCtl->Insert.RedoRecPtr, whenever we can safely do so (ie, when we - * hold the Insert spinlock). See XLogInsert for details. + * hold the Insert lock). See XLogInsert for details. */ static XLogRecPtr RedoRecPtr; -/* This lock must be held to read/update control file or create new log file */ -SPINLOCK ControlFileLockId; - /*---------- * Shared-memory data structures for XLOG control * @@ -171,24 +163,24 @@ SPINLOCK ControlFileLockId; * These structs are identical but are declared separately to indicate their * slightly different functions. * - * We do a lot of pushups to minimize the amount of access to spinlocked + * We do a lot of pushups to minimize the amount of access to lockable * shared memory values. There are actually three shared-memory copies of * LogwrtResult, plus one unshared copy in each backend. Here's how it works: * XLogCtl->LogwrtResult is protected by info_lck - * XLogCtl->Write.LogwrtResult is protected by logwrt_lck - * XLogCtl->Insert.LogwrtResult is protected by insert_lck - * One must hold the associated spinlock to read or write any of these, but - * of course no spinlock is needed to read/write the unshared LogwrtResult. + * XLogCtl->Write.LogwrtResult is protected by WALWriteLock + * XLogCtl->Insert.LogwrtResult is protected by WALInsertLock + * One must hold the associated lock to read or write any of these, but + * of course no lock is needed to read/write the unshared LogwrtResult. * * XLogCtl->LogwrtResult and XLogCtl->Write.LogwrtResult are both "always * right", since both are updated by a write or flush operation before - * it releases logwrt_lck. The point of keeping XLogCtl->Write.LogwrtResult - * is that it can be examined/modified by code that already holds logwrt_lck + * it releases WALWriteLock. The point of keeping XLogCtl->Write.LogwrtResult + * is that it can be examined/modified by code that already holds WALWriteLock * without needing to grab info_lck as well. * * XLogCtl->Insert.LogwrtResult may lag behind the reality of the other two, * but is updated when convenient. Again, it exists for the convenience of - * code that is already holding insert_lck but not the other locks. + * code that is already holding WALInsertLock but not the other locks. * * The unshared LogwrtResult may lag behind any or all of these, and again * is updated when convenient. @@ -199,6 +191,24 @@ SPINLOCK ControlFileLockId; * Note that this all works because the request and result positions can only * advance forward, never back up, and so we can easily determine which of two * values is "more up to date". + * + * info_lck is only held long enough to read/update the protected variables, + * so it's a plain spinlock. The other locks are held longer (potentially + * over I/O operations), so we use LWLocks for them. These locks are: + * + * WALInsertLock: must be held to insert a record into the WAL buffers. + * + * WALWriteLock: must be held to write WAL buffers to disk (XLogWrite or + * XLogFlush). + * + * ControlFileLock: must be held to read/update control file or create + * new log file. + * + * CheckpointLock: must be held to do a checkpoint (ensures only one + * checkpointer at a time; even though the postmaster won't launch + * parallel checkpoint processes, we need this because manual checkpoints + * could be launched simultaneously). + * *---------- */ typedef struct XLogwrtRqst @@ -240,18 +250,18 @@ typedef struct XLogCtlWrite */ typedef struct XLogCtlData { - /* Protected by insert_lck: */ + /* Protected by WALInsertLock: */ XLogCtlInsert Insert; /* Protected by info_lck: */ XLogwrtRqst LogwrtRqst; XLogwrtResult LogwrtResult; - /* Protected by logwrt_lck: */ + /* Protected by WALWriteLock: */ XLogCtlWrite Write; /* * These values do not change after startup, although the pointed-to - * pages and xlblocks values certainly do. Permission to read/write - * the pages and xlblocks values depends on insert_lck and logwrt_lck. + * pages and xlblocks values certainly do. Permission to read/write the + * pages and xlblocks values depends on WALInsertLock and WALWriteLock. */ char *pages; /* buffers for unwritten XLOG pages */ XLogRecPtr *xlblocks; /* 1st byte ptr-s + BLCKSZ */ @@ -259,13 +269,10 @@ typedef struct XLogCtlData uint32 XLogCacheBlck; /* highest allocated xlog buffer index */ StartUpID ThisStartUpID; - /* This value is not protected by *any* spinlock... */ + /* This value is not protected by *any* lock... */ XLogRecPtr RedoRecPtr; /* see SetRedoRecPtr/GetRedoRecPtr */ - slock_t insert_lck; /* XLogInsert lock */ slock_t info_lck; /* locks shared LogwrtRqst/LogwrtResult */ - slock_t logwrt_lck; /* XLogWrite/XLogFlush lock */ - slock_t chkp_lck; /* checkpoint lock */ } XLogCtlData; static XLogCtlData *XLogCtl = NULL; @@ -473,7 +480,7 @@ XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata) uint32 len, write_len; unsigned i; - bool do_logwrt; + XLogwrtRqst LogwrtRqst; bool updrqst; bool no_tran = (rmid == RM_XLOG_ID) ? true : false; @@ -505,7 +512,7 @@ XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata) * * We may have to loop back to here if a race condition is detected * below. We could prevent the race by doing all this work while - * holding the insert spinlock, but it seems better to avoid doing CRC + * holding the insert lock, but it seems better to avoid doing CRC * calculations while holding the lock. This means we have to be * careful about modifying the rdata list until we know we aren't * going to loop back again. The only change we allow ourselves to @@ -607,48 +614,33 @@ begin:; START_CRIT_SECTION(); - /* wait to obtain xlog insert lock */ - do_logwrt = true; + /* update LogwrtResult before doing cache fill check */ + SpinLockAcquire_NoHoldoff(&XLogCtl->info_lck); + LogwrtRqst = XLogCtl->LogwrtRqst; + LogwrtResult = XLogCtl->LogwrtResult; + SpinLockRelease_NoHoldoff(&XLogCtl->info_lck); - for (i = 0;;) + /* + * If cache is half filled then try to acquire write lock and + * do XLogWrite. Ignore any fractional blocks in performing this check. + */ + LogwrtRqst.Write.xrecoff -= LogwrtRqst.Write.xrecoff % BLCKSZ; + if (LogwrtRqst.Write.xlogid != LogwrtResult.Write.xlogid || + (LogwrtRqst.Write.xrecoff >= LogwrtResult.Write.xrecoff + + XLogCtl->XLogCacheByte / 2)) { - /* try to update LogwrtResult while waiting for insert lock */ - if (!TAS(&(XLogCtl->info_lck))) + if (LWLockConditionalAcquire(WALWriteLock, LW_EXCLUSIVE)) { - XLogwrtRqst LogwrtRqst; - - LogwrtRqst = XLogCtl->LogwrtRqst; - LogwrtResult = XLogCtl->LogwrtResult; - S_UNLOCK(&(XLogCtl->info_lck)); - - /* - * If cache is half filled then try to acquire logwrt lock and - * do LOGWRT work, but only once per XLogInsert call. Ignore - * any fractional blocks in performing this check. - */ - LogwrtRqst.Write.xrecoff -= LogwrtRqst.Write.xrecoff % BLCKSZ; - if (do_logwrt && - (LogwrtRqst.Write.xlogid != LogwrtResult.Write.xlogid || - (LogwrtRqst.Write.xrecoff >= LogwrtResult.Write.xrecoff + - XLogCtl->XLogCacheByte / 2))) - { - if (!TAS(&(XLogCtl->logwrt_lck))) - { - LogwrtResult = XLogCtl->Write.LogwrtResult; - if (XLByteLT(LogwrtResult.Write, LogwrtRqst.Write)) - { - XLogWrite(LogwrtRqst); - do_logwrt = false; - } - S_UNLOCK(&(XLogCtl->logwrt_lck)); - } - } + LogwrtResult = XLogCtl->Write.LogwrtResult; + if (XLByteLT(LogwrtResult.Write, LogwrtRqst.Write)) + XLogWrite(LogwrtRqst); + LWLockRelease(WALWriteLock); } - if (!TAS(&(XLogCtl->insert_lck))) - break; - S_LOCK_SLEEP(&(XLogCtl->insert_lck), i++, XLOG_LOCK_TIMEOUT); } + /* Now wait to get insert lock */ + LWLockAcquire(WALInsertLock, LW_EXCLUSIVE); + /* * Check to see if my RedoRecPtr is out of date. If so, may have to * go back and recompute everything. This can only happen just after @@ -667,12 +659,11 @@ begin:; if (dtbuf_bkp[i] == false && XLByteLE(dtbuf_lsn[i], RedoRecPtr)) { - /* * Oops, this buffer now needs to be backed up, but we * didn't think so above. Start over. */ - S_UNLOCK(&(XLogCtl->insert_lck)); + LWLockRelease(WALInsertLock); END_CRIT_SECTION(); goto begin; } @@ -751,9 +742,9 @@ begin:; /* If first XLOG record of transaction, save it in PROC array */ if (MyLastRecPtr.xrecoff == 0 && !no_tran) { - SpinAcquire(SInvalLock); + LWLockAcquire(SInvalLock, LW_EXCLUSIVE); MyProc->logRec = RecPtr; - SpinRelease(SInvalLock); + LWLockRelease(SInvalLock); } if (XLOG_DEBUG) @@ -837,17 +828,17 @@ begin:; curridx = PrevBufIdx(curridx); WriteRqst = XLogCtl->xlblocks[curridx]; - S_UNLOCK(&(XLogCtl->insert_lck)); + LWLockRelease(WALInsertLock); if (updrqst) { - S_LOCK(&(XLogCtl->info_lck)); + SpinLockAcquire_NoHoldoff(&XLogCtl->info_lck); /* advance global request to include new block(s) */ if (XLByteLT(XLogCtl->LogwrtRqst.Write, WriteRqst)) XLogCtl->LogwrtRqst.Write = WriteRqst; /* update local result copy while I have the chance */ LogwrtResult = XLogCtl->LogwrtResult; - S_UNLOCK(&(XLogCtl->info_lck)); + SpinLockRelease_NoHoldoff(&XLogCtl->info_lck); } END_CRIT_SECTION(); @@ -859,11 +850,11 @@ begin:; * buffer if it still contains unwritten data. * * The global LogwrtRqst.Write pointer needs to be advanced to include the - * just-filled page. If we can do this for free (without an extra spinlock), + * just-filled page. If we can do this for free (without an extra lock), * we do so here. Otherwise the caller must do it. We return TRUE if the * request update still needs to be done, FALSE if we did it internally. * - * Must be called with insert_lck held. + * Must be called with WALInsertLock held. */ static bool AdvanceXLInsertBuffer(void) @@ -890,45 +881,37 @@ AdvanceXLInsertBuffer(void) if (!XLByteLE(OldPageRqstPtr, LogwrtResult.Write)) { /* nope, got work to do... */ - unsigned spins = 0; XLogRecPtr FinishedPageRqstPtr; FinishedPageRqstPtr = XLogCtl->xlblocks[Insert->curridx]; - for (;;) + /* Before waiting, get info_lck and update LogwrtResult */ + SpinLockAcquire_NoHoldoff(&XLogCtl->info_lck); + if (XLByteLT(XLogCtl->LogwrtRqst.Write, FinishedPageRqstPtr)) + XLogCtl->LogwrtRqst.Write = FinishedPageRqstPtr; + LogwrtResult = XLogCtl->LogwrtResult; + SpinLockRelease_NoHoldoff(&XLogCtl->info_lck); + + update_needed = false; /* Did the shared-request update */ + + if (XLByteLE(OldPageRqstPtr, LogwrtResult.Write)) + { + /* OK, someone wrote it already */ + Insert->LogwrtResult = LogwrtResult; + } + else { - /* While waiting, try to get info_lck and update LogwrtResult */ - if (!TAS(&(XLogCtl->info_lck))) + /* Must acquire write lock */ + LWLockAcquire(WALWriteLock, LW_EXCLUSIVE); + LogwrtResult = Write->LogwrtResult; + if (XLByteLE(OldPageRqstPtr, LogwrtResult.Write)) { - if (XLByteLT(XLogCtl->LogwrtRqst.Write, FinishedPageRqstPtr)) - XLogCtl->LogwrtRqst.Write = FinishedPageRqstPtr; - update_needed = false; /* Did the shared-request update */ - LogwrtResult = XLogCtl->LogwrtResult; - S_UNLOCK(&(XLogCtl->info_lck)); - - if (XLByteLE(OldPageRqstPtr, LogwrtResult.Write)) - { - /* OK, someone wrote it already */ - Insert->LogwrtResult = LogwrtResult; - break; - } + /* OK, someone wrote it already */ + LWLockRelease(WALWriteLock); + Insert->LogwrtResult = LogwrtResult; } - - /* - * LogwrtResult lock is busy or we know the page is still - * dirty. Try to acquire logwrt lock and write full blocks. - */ - if (!TAS(&(XLogCtl->logwrt_lck))) + else { - LogwrtResult = Write->LogwrtResult; - if (XLByteLE(OldPageRqstPtr, LogwrtResult.Write)) - { - S_UNLOCK(&(XLogCtl->logwrt_lck)); - /* OK, someone wrote it already */ - Insert->LogwrtResult = LogwrtResult; - break; - } - /* * Have to write buffers while holding insert lock. This * is not good, so only write as much as we absolutely @@ -938,11 +921,9 @@ AdvanceXLInsertBuffer(void) WriteRqst.Flush.xlogid = 0; WriteRqst.Flush.xrecoff = 0; XLogWrite(WriteRqst); - S_UNLOCK(&(XLogCtl->logwrt_lck)); + LWLockRelease(WALWriteLock); Insert->LogwrtResult = LogwrtResult; - break; } - S_LOCK_SLEEP(&(XLogCtl->logwrt_lck), spins++, XLOG_LOCK_TIMEOUT); } } @@ -986,7 +967,7 @@ AdvanceXLInsertBuffer(void) /* * Write and/or fsync the log at least as far as WriteRqst indicates. * - * Must be called with logwrt_lck held. + * Must be called with WALWriteLock held. */ static void XLogWrite(XLogwrtRqst WriteRqst) @@ -1047,7 +1028,7 @@ XLogWrite(XLogwrtRqst WriteRqst) "consider increasing WAL_FILES"); /* update pg_control, unless someone else already did */ - SpinAcquire(ControlFileLockId); + LWLockAcquire(ControlFileLock, LW_EXCLUSIVE); if (ControlFile->logId < openLogId || (ControlFile->logId == openLogId && ControlFile->logSeg < openLogSeg + 1)) @@ -1073,7 +1054,7 @@ XLogWrite(XLogwrtRqst WriteRqst) kill(getppid(), SIGUSR1); } } - SpinRelease(ControlFileLockId); + LWLockRelease(ControlFileLock); } if (openLogFile < 0) @@ -1167,13 +1148,13 @@ XLogWrite(XLogwrtRqst WriteRqst) * 'result' values. This is not absolutely essential, but it saves * some code in a couple of places. */ - S_LOCK(&(XLogCtl->info_lck)); + SpinLockAcquire_NoHoldoff(&XLogCtl->info_lck); XLogCtl->LogwrtResult = LogwrtResult; if (XLByteLT(XLogCtl->LogwrtRqst.Write, LogwrtResult.Write)) XLogCtl->LogwrtRqst.Write = LogwrtResult.Write; if (XLByteLT(XLogCtl->LogwrtRqst.Flush, LogwrtResult.Flush)) XLogCtl->LogwrtRqst.Flush = LogwrtResult.Flush; - S_UNLOCK(&(XLogCtl->info_lck)); + SpinLockRelease_NoHoldoff(&XLogCtl->info_lck); Write->LogwrtResult = LogwrtResult; } @@ -1181,7 +1162,7 @@ XLogWrite(XLogwrtRqst WriteRqst) /* * Ensure that all XLOG data through the given position is flushed to disk. * - * NOTE: this differs from XLogWrite mainly in that the logwrt_lck is not + * NOTE: this differs from XLogWrite mainly in that the WALWriteLock is not * already held, and we try to avoid acquiring it if possible. */ void @@ -1189,7 +1170,6 @@ XLogFlush(XLogRecPtr record) { XLogRecPtr WriteRqstPtr; XLogwrtRqst WriteRqst; - unsigned spins = 0; if (XLOG_DEBUG) { @@ -1224,23 +1204,18 @@ XLogFlush(XLogRecPtr record) /* initialize to given target; may increase below */ WriteRqstPtr = record; - for (;;) + /* read LogwrtResult and update local state */ + SpinLockAcquire_NoHoldoff(&XLogCtl->info_lck); + if (XLByteLT(WriteRqstPtr, XLogCtl->LogwrtRqst.Write)) + WriteRqstPtr = XLogCtl->LogwrtRqst.Write; + LogwrtResult = XLogCtl->LogwrtResult; + SpinLockRelease_NoHoldoff(&XLogCtl->info_lck); + + /* done already? */ + if (!XLByteLE(record, LogwrtResult.Flush)) { - /* try to read LogwrtResult and update local state */ - if (!TAS(&(XLogCtl->info_lck))) - { - if (XLByteLT(WriteRqstPtr, XLogCtl->LogwrtRqst.Write)) - WriteRqstPtr = XLogCtl->LogwrtRqst.Write; - LogwrtResult = XLogCtl->LogwrtResult; - S_UNLOCK(&(XLogCtl->info_lck)); - if (XLByteLE(record, LogwrtResult.Flush)) - { - /* Done already */ - break; - } - } /* if something was added to log cache then try to flush this too */ - if (!TAS(&(XLogCtl->insert_lck))) + if (LWLockConditionalAcquire(WALInsertLock, LW_EXCLUSIVE)) { XLogCtlInsert *Insert = &XLogCtl->Insert; uint32 freespace = INSERT_FREESPACE(Insert); @@ -1252,29 +1227,22 @@ XLogFlush(XLogRecPtr record) WriteRqstPtr = XLogCtl->xlblocks[Insert->curridx]; WriteRqstPtr.xrecoff -= freespace; } - S_UNLOCK(&(XLogCtl->insert_lck)); + LWLockRelease(WALInsertLock); } - /* now try to get the logwrt lock */ - if (!TAS(&(XLogCtl->logwrt_lck))) + /* now wait for the write lock */ + LWLockAcquire(WALWriteLock, LW_EXCLUSIVE); + LogwrtResult = XLogCtl->Write.LogwrtResult; + if (!XLByteLE(record, LogwrtResult.Flush)) { - LogwrtResult = XLogCtl->Write.LogwrtResult; - if (XLByteLE(record, LogwrtResult.Flush)) - { - /* Done already */ - S_UNLOCK(&(XLogCtl->logwrt_lck)); - break; - } WriteRqst.Write = WriteRqstPtr; WriteRqst.Flush = record; XLogWrite(WriteRqst); - S_UNLOCK(&(XLogCtl->logwrt_lck)); if (XLByteLT(LogwrtResult.Flush, record)) elog(STOP, "XLogFlush: request %X/%X is not satisfied --- flushed only to %X/%X", record.xlogid, record.xrecoff, LogwrtResult.Flush.xlogid, LogwrtResult.Flush.xrecoff); - break; } - S_LOCK_SLEEP(&(XLogCtl->logwrt_lck), spins++, XLOG_LOCK_TIMEOUT); + LWLockRelease(WALWriteLock); } END_CRIT_SECTION(); @@ -1289,9 +1257,9 @@ XLogFlush(XLogRecPtr record) * pre-existing file will be deleted). On return, TRUE if a pre-existing * file was used. * - * use_lock: if TRUE, acquire ControlFileLock spinlock while moving file into + * use_lock: if TRUE, acquire ControlFileLock while moving file into * place. This should be TRUE except during bootstrap log creation. The - * caller must *not* hold the spinlock at call. + * caller must *not* hold the lock at call. * * Returns FD of opened file. */ @@ -1329,7 +1297,7 @@ XLogFileInit(uint32 log, uint32 seg, * Initialize an empty (all zeroes) segment. NOTE: it is possible * that another process is doing the same thing. If so, we will end * up pre-creating an extra log segment. That seems OK, and better - * than holding the spinlock throughout this lengthy process. + * than holding the lock throughout this lengthy process. */ snprintf(tmppath, MAXPGPATH, "%s/xlogtemp.%d", XLogDir, (int) getpid()); @@ -1423,9 +1391,9 @@ XLogFileInit(uint32 log, uint32 seg, * point. Fail if no free slot is found in this range. (Irrelevant if * find_free is FALSE.) * - * use_lock: if TRUE, acquire ControlFileLock spinlock while moving file into + * use_lock: if TRUE, acquire ControlFileLock while moving file into * place. This should be TRUE except during bootstrap log creation. The - * caller must *not* hold the spinlock at call. + * caller must *not* hold the lock at call. * * Returns TRUE if file installed, FALSE if not installed because of * exceeding max_advance limit. (Any other kind of failure causes elog().) @@ -1444,7 +1412,7 @@ InstallXLogFileSegment(uint32 log, uint32 seg, char *tmppath, * We want to be sure that only one process does this at a time. */ if (use_lock) - SpinAcquire(ControlFileLockId); + LWLockAcquire(ControlFileLock, LW_EXCLUSIVE); if (!find_free) { @@ -1462,7 +1430,7 @@ InstallXLogFileSegment(uint32 log, uint32 seg, char *tmppath, { /* Failed to find a free slot within specified range */ if (use_lock) - SpinRelease(ControlFileLockId); + LWLockRelease(ControlFileLock); return false; } NextLogSeg(log, seg); @@ -1487,7 +1455,7 @@ InstallXLogFileSegment(uint32 log, uint32 seg, char *tmppath, #endif if (use_lock) - SpinRelease(ControlFileLockId); + LWLockRelease(ControlFileLock); return true; } @@ -2319,10 +2287,7 @@ XLOGShmemInit(void) XLogCtl->XLogCacheByte = BLCKSZ * XLOGbuffers; XLogCtl->XLogCacheBlck = XLOGbuffers - 1; XLogCtl->Insert.currpage = (XLogPageHeader) (XLogCtl->pages); - S_INIT_LOCK(&(XLogCtl->insert_lck)); - S_INIT_LOCK(&(XLogCtl->info_lck)); - S_INIT_LOCK(&(XLogCtl->logwrt_lck)); - S_INIT_LOCK(&(XLogCtl->chkp_lck)); + SpinLockInit(&XLogCtl->info_lck); /* * If we are not in bootstrap mode, pg_control should already exist. @@ -2821,12 +2786,12 @@ SetThisStartUpID(void) * in shmem (using SetRedoRecPtr). When checkpointer completes, postmaster * calls GetRedoRecPtr to update its own copy of RedoRecPtr, so that * subsequently-spawned backends will start out with a reasonably up-to-date - * local RedoRecPtr. Since these operations are not protected by any spinlock + * local RedoRecPtr. Since these operations are not protected by any lock * and copying an XLogRecPtr isn't atomic, it's unsafe to use either of these * routines at other times! * * Note: once spawned, a backend must update its local RedoRecPtr from - * XLogCtl->Insert.RedoRecPtr while holding the insert spinlock. This is + * XLogCtl->Insert.RedoRecPtr while holding the insert lock. This is * done in XLogInsert(). */ void @@ -2874,20 +2839,26 @@ CreateCheckPoint(bool shutdown) uint32 freespace; uint32 _logId; uint32 _logSeg; - unsigned spins = 0; if (MyLastRecPtr.xrecoff != 0) elog(ERROR, "CreateCheckPoint: cannot be called inside transaction block"); - START_CRIT_SECTION(); - - /* Grab lock, using larger than normal sleep between tries (1 sec) */ - while (TAS(&(XLogCtl->chkp_lck))) + /* + * The CheckpointLock can be held for quite a while, which is not good + * because we won't respond to a cancel/die request while waiting for an + * LWLock. (But the alternative of using a regular lock won't work for + * background checkpoint processes, which are not regular backends.) + * So, rather than use a plain LWLockAcquire, use this kluge to allow + * an interrupt to be accepted while we are waiting: + */ + while (!LWLockConditionalAcquire(CheckpointLock, LW_EXCLUSIVE)) { - S_LOCK_SLEEP_INTERVAL(&(XLogCtl->chkp_lck), spins++, - CHECKPOINT_LOCK_TIMEOUT, 1000000); + CHECK_FOR_INTERRUPTS(); + sleep(1); } + START_CRIT_SECTION(); + if (shutdown) { ControlFile->state = DB_SHUTDOWNING; @@ -2899,7 +2870,7 @@ CreateCheckPoint(bool shutdown) checkPoint.ThisStartUpID = ThisStartUpID; checkPoint.time = time(NULL); - S_LOCK(&(XLogCtl->insert_lck)); + LWLockAcquire(WALInsertLock, LW_EXCLUSIVE); /* * If this isn't a shutdown, and we have not inserted any XLOG records @@ -2929,8 +2900,8 @@ CreateCheckPoint(bool shutdown) ControlFile->checkPoint.xrecoff == ControlFile->checkPointCopy.redo.xrecoff) { - S_UNLOCK(&(XLogCtl->insert_lck)); - S_UNLOCK(&(XLogCtl->chkp_lck)); + LWLockRelease(WALInsertLock); + LWLockRelease(CheckpointLock); END_CRIT_SECTION(); return; } @@ -2974,17 +2945,17 @@ CreateCheckPoint(bool shutdown) * Now we can release insert lock, allowing other xacts to proceed * even while we are flushing disk buffers. */ - S_UNLOCK(&(XLogCtl->insert_lck)); + LWLockRelease(WALInsertLock); - SpinAcquire(XidGenLockId); + LWLockAcquire(XidGenLock, LW_SHARED); checkPoint.nextXid = ShmemVariableCache->nextXid; - SpinRelease(XidGenLockId); + LWLockRelease(XidGenLock); - SpinAcquire(OidGenLockId); + LWLockAcquire(OidGenLock, LW_SHARED); checkPoint.nextOid = ShmemVariableCache->nextOid; if (!shutdown) checkPoint.nextOid += ShmemVariableCache->oidCount; - SpinRelease(OidGenLockId); + LWLockRelease(OidGenLock); /* * Having constructed the checkpoint record, ensure all shmem disk @@ -3039,7 +3010,7 @@ CreateCheckPoint(bool shutdown) /* * Update the control file. */ - SpinAcquire(ControlFileLockId); + LWLockAcquire(ControlFileLock, LW_EXCLUSIVE); if (shutdown) ControlFile->state = DB_SHUTDOWNED; ControlFile->prevCheckPoint = ControlFile->checkPoint; @@ -3047,7 +3018,7 @@ CreateCheckPoint(bool shutdown) ControlFile->checkPointCopy = checkPoint; ControlFile->time = time(NULL); UpdateControlFile(); - SpinRelease(ControlFileLockId); + LWLockRelease(ControlFileLock); /* * Delete offline log files (those no longer needed even for previous @@ -3067,7 +3038,7 @@ CreateCheckPoint(bool shutdown) if (!shutdown) PreallocXlogFiles(recptr); - S_UNLOCK(&(XLogCtl->chkp_lck)); + LWLockRelease(CheckpointLock); END_CRIT_SECTION(); } diff --git a/src/backend/bootstrap/bootparse.y b/src/backend/bootstrap/bootparse.y index 0265561856..825620a6fb 100644 --- a/src/backend/bootstrap/bootparse.y +++ b/src/backend/bootstrap/bootparse.y @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/bootstrap/bootparse.y,v 1.38 2001/08/21 16:36:00 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/bootstrap/bootparse.y,v 1.39 2001/09/29 04:02:22 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -45,7 +45,6 @@ #include "storage/itemptr.h" #include "storage/off.h" #include "storage/smgr.h" -#include "storage/spin.h" #include "tcop/dest.h" #include "utils/nabstime.h" #include "utils/rel.h" diff --git a/src/backend/bootstrap/bootstrap.c b/src/backend/bootstrap/bootstrap.c index 6a0a1306e2..76d4d0252d 100644 --- a/src/backend/bootstrap/bootstrap.c +++ b/src/backend/bootstrap/bootstrap.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/bootstrap/bootstrap.c,v 1.116 2001/09/27 16:29:12 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/bootstrap/bootstrap.c,v 1.117 2001/09/29 04:02:22 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -33,6 +33,7 @@ #include "catalog/pg_type.h" #include "libpq/pqsignal.h" #include "miscadmin.h" +#include "storage/proc.h" #include "tcop/tcopprot.h" #include "utils/builtins.h" #include "utils/exc.h" @@ -360,29 +361,39 @@ BootstrapMain(int argc, char *argv[]) * XLOG operations */ SetProcessingMode(NormalProcessing); - if (xlogop == BS_XLOG_NOP) - StartupXLOG(); - else if (xlogop == BS_XLOG_BOOTSTRAP) - { - BootStrapXLOG(); - StartupXLOG(); - } - else + + switch (xlogop) { - if (xlogop == BS_XLOG_CHECKPOINT) - { + case BS_XLOG_NOP: + StartupXLOG(); + break; + + case BS_XLOG_BOOTSTRAP: + BootStrapXLOG(); + StartupXLOG(); + break; + + case BS_XLOG_CHECKPOINT: + if (IsUnderPostmaster) + InitDummyProcess(); /* needed to get LWLocks */ CreateDummyCaches(); CreateCheckPoint(false); SetRedoRecPtr(); - } - else if (xlogop == BS_XLOG_STARTUP) + proc_exit(0); /* done */ + + case BS_XLOG_STARTUP: StartupXLOG(); - else if (xlogop == BS_XLOG_SHUTDOWN) + proc_exit(0); /* done */ + + case BS_XLOG_SHUTDOWN: ShutdownXLOG(); - else + proc_exit(0); /* done */ + + default: elog(STOP, "Unsupported XLOG op %d", xlogop); - proc_exit(0); + proc_exit(0); } + SetProcessingMode(BootstrapProcessing); /* diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c index 3685217fb7..38405f6c6c 100644 --- a/src/backend/commands/vacuumlazy.c +++ b/src/backend/commands/vacuumlazy.c @@ -31,7 +31,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/commands/vacuumlazy.c,v 1.7 2001/09/21 03:32:35 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/commands/vacuumlazy.c,v 1.8 2001/09/29 04:02:22 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -53,7 +53,7 @@ * A page with less than PAGE_SPACE_THRESHOLD free space will be forgotten * immediately, and not even passed to the free space map. Removing the * uselessly small entries early saves cycles, and in particular reduces - * the amount of time we spend holding the FSM spinlock when we finally call + * the amount of time we spend holding the FSM lock when we finally call * MultiRecordFreeSpace. Since the FSM will ignore pages below its own * runtime threshold anyway, there's no point in making this really small. * XXX Is it worth trying to measure average tuple size, and using that to diff --git a/src/backend/storage/buffer/README b/src/backend/storage/buffer/README index 498deb489d..a2861b8db9 100644 --- a/src/backend/storage/buffer/README +++ b/src/backend/storage/buffer/README @@ -1,4 +1,4 @@ -$Header: /cvsroot/pgsql/src/backend/storage/buffer/README,v 1.2 2001/08/25 18:52:42 tgl Exp $ +$Header: /cvsroot/pgsql/src/backend/storage/buffer/README,v 1.3 2001/09/29 04:02:22 tgl Exp $ Notes about shared buffer access rules -------------------------------------- @@ -30,12 +30,10 @@ Buffer locks: there are two kinds of buffer locks, shared and exclusive, which act just as you'd expect: multiple backends can hold shared locks on the same buffer, but an exclusive lock prevents anyone else from holding either shared or exclusive lock. (These can alternatively be called READ -and WRITE locks.) These locks are short-term: they should not be held for -long. They are implemented as per-buffer spinlocks, so another backend -trying to acquire a competing lock will spin as long as you hold yours! -Buffer locks are acquired and released by LockBuffer(). It will *not* work -for a single backend to try to acquire multiple locks on the same buffer. -One must pin a buffer before trying to lock it. +and WRITE locks.) These locks are intended to be short-term: they should not +be held for long. Buffer locks are acquired and released by LockBuffer(). +It will *not* work for a single backend to try to acquire multiple locks on +the same buffer. One must pin a buffer before trying to lock it. Buffer access rules: diff --git a/src/backend/storage/buffer/buf_init.c b/src/backend/storage/buffer/buf_init.c index 819fe7e206..45a2e4aa16 100644 --- a/src/backend/storage/buffer/buf_init.c +++ b/src/backend/storage/buffer/buf_init.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/buffer/buf_init.c,v 1.43 2001/07/06 21:04:25 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/buffer/buf_init.c,v 1.44 2001/09/29 04:02:22 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -28,10 +28,9 @@ #include "storage/fd.h" #include "storage/ipc.h" #include "storage/lmgr.h" -#include "storage/s_lock.h" #include "storage/shmem.h" #include "storage/smgr.h" -#include "storage/spin.h" +#include "storage/lwlock.h" #include "utils/builtins.h" #include "utils/hsearch.h" #include "utils/memutils.h" @@ -117,8 +116,6 @@ bool *BufferDirtiedByMe; /* T if buf has been dirtied in cur xact */ * */ -SPINLOCK BufMgrLock; - long int ReadBufferCount; long int ReadLocalBufferCount; long int BufferHitCount; @@ -151,7 +148,7 @@ InitBufferPool(void) * anyone else attached to the shmem at this point, we've got * problems. */ - SpinAcquire(BufMgrLock); + LWLockAcquire(BufMgrLock, LW_EXCLUSIVE); #ifdef BMTRACE CurTraceBuf = (long *) ShmemInitStruct("Buffer trace", @@ -186,8 +183,8 @@ InitBufferPool(void) /* * link the buffers into a circular, doubly-linked list to - * initialize free list. Still don't know anything about - * replacement strategy in this file. + * initialize free list, and initialize the buffer headers. + * Still don't know anything about replacement strategy in this file. */ for (i = 0; i < Data_Descriptors; block += BLCKSZ, buf++, i++) { @@ -197,12 +194,15 @@ InitBufferPool(void) buf->freePrev = i - 1; CLEAR_BUFFERTAG(&(buf->tag)); + buf->buf_id = i; + buf->data = MAKE_OFFSET(block); buf->flags = (BM_DELETED | BM_FREE | BM_VALID); buf->refcount = 0; - buf->buf_id = i; - S_INIT_LOCK(&(buf->io_in_progress_lock)); - S_INIT_LOCK(&(buf->cntx_lock)); + buf->io_in_progress_lock = LWLockAssign(); + buf->cntx_lock = LWLockAssign(); + buf->cntxDirty = false; + buf->wait_backend_id = 0; } /* close the circular queue */ @@ -214,7 +214,7 @@ InitBufferPool(void) InitBufTable(); InitFreeList(!foundDescs); - SpinRelease(BufMgrLock); + LWLockRelease(BufMgrLock); } /* diff --git a/src/backend/storage/buffer/buf_table.c b/src/backend/storage/buffer/buf_table.c index 9e8f164778..671b13efa0 100644 --- a/src/backend/storage/buffer/buf_table.c +++ b/src/backend/storage/buffer/buf_table.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/buffer/buf_table.c,v 1.21 2001/03/22 03:59:44 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/buffer/buf_table.c,v 1.22 2001/09/29 04:02:22 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -23,8 +23,7 @@ * * Synchronization: * - * All routines in this file assume buffer manager spinlock is - * held by their caller. + * All routines in this file assume BufMgrLock is held by their caller. */ #include "postgres.h" diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 89443ee160..86c2c478f4 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/buffer/bufmgr.c,v 1.116 2001/07/06 21:04:25 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/buffer/bufmgr.c,v 1.117 2001/09/29 04:02:23 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -59,7 +59,6 @@ (*((XLogRecPtr*)MAKE_PTR((bufHdr)->data))) -extern SPINLOCK BufMgrLock; extern long int ReadBufferCount; extern long int ReadLocalBufferCount; extern long int BufferHitCount; @@ -76,7 +75,7 @@ extern long int LocalBufferFlushCount; */ bool SharedBufferChanged = false; -static void WaitIO(BufferDesc *buf, SPINLOCK spinlock); +static void WaitIO(BufferDesc *buf); static void StartBufferIO(BufferDesc *buf, bool forInput); static void TerminateBufferIO(BufferDesc *buf); static void ContinueBufferIO(BufferDesc *buf, bool forInput); @@ -130,7 +129,7 @@ ReadBuffer(Relation reln, BlockNumber blockNum) /* * ReadBufferInternal -- internal version of ReadBuffer with more options * - * bufferLockHeld: if true, caller already acquired the bufmgr spinlock. + * bufferLockHeld: if true, caller already acquired the bufmgr lock. * (This is assumed never to be true if dealing with a local buffer!) */ static Buffer @@ -179,7 +178,7 @@ ReadBufferInternal(Relation reln, BlockNumber blockNum, * block is not currently in memory. */ if (!bufferLockHeld) - SpinAcquire(BufMgrLock); + LWLockAcquire(BufMgrLock, LW_EXCLUSIVE); bufHdr = BufferAlloc(reln, blockNum, &found); if (found) { @@ -188,7 +187,7 @@ ReadBufferInternal(Relation reln, BlockNumber blockNum, } } - /* At this point we do NOT hold the bufmgr spinlock. */ + /* At this point we do NOT hold the bufmgr lock. */ if (!bufHdr) return InvalidBuffer; @@ -208,9 +207,9 @@ ReadBufferInternal(Relation reln, BlockNumber blockNum, */ if (!isLocalBuf) { - SpinAcquire(BufMgrLock); + LWLockAcquire(BufMgrLock, LW_EXCLUSIVE); StartBufferIO(bufHdr, false); - SpinRelease(BufMgrLock); + LWLockRelease(BufMgrLock); } } @@ -243,7 +242,7 @@ ReadBufferInternal(Relation reln, BlockNumber blockNum, } /* lock buffer manager again to update IO IN PROGRESS */ - SpinAcquire(BufMgrLock); + LWLockAcquire(BufMgrLock, LW_EXCLUSIVE); if (status == SM_FAIL) { @@ -251,7 +250,7 @@ ReadBufferInternal(Relation reln, BlockNumber blockNum, if (!BufTableDelete(bufHdr)) { - SpinRelease(BufMgrLock); + LWLockRelease(BufMgrLock); elog(FATAL, "BufRead: buffer table broken after IO error"); } /* remember that BufferAlloc() pinned the buffer */ @@ -274,7 +273,7 @@ ReadBufferInternal(Relation reln, BlockNumber blockNum, /* If anyone was waiting for IO to complete, wake them up now */ TerminateBufferIO(bufHdr); - SpinRelease(BufMgrLock); + LWLockRelease(BufMgrLock); if (status == SM_FAIL) return InvalidBuffer; @@ -322,7 +321,7 @@ BufferAlloc(Relation reln, *foundPtr = TRUE; if (inProgress) /* confirm end of IO */ { - WaitIO(buf, BufMgrLock); + WaitIO(buf); inProgress = (buf->flags & BM_IO_IN_PROGRESS); } if (BUFFER_IS_BROKEN(buf)) @@ -354,7 +353,7 @@ BufferAlloc(Relation reln, if (!(*foundPtr)) StartBufferIO(buf, true); - SpinRelease(BufMgrLock); + LWLockRelease(BufMgrLock); return buf; } @@ -364,7 +363,7 @@ BufferAlloc(Relation reln, /* * Didn't find it in the buffer pool. We'll have to initialize a new * buffer. First, grab one from the free list. If it's dirty, flush - * it to disk. Remember to unlock BufMgr spinlock while doing the IOs. + * it to disk. Remember to unlock BufMgrLock while doing the IOs. */ inProgress = FALSE; for (buf = (BufferDesc *) NULL; buf == (BufferDesc *) NULL;) @@ -502,7 +501,7 @@ BufferAlloc(Relation reln, *foundPtr = TRUE; if (inProgress) { - WaitIO(buf2, BufMgrLock); + WaitIO(buf2); inProgress = (buf2->flags & BM_IO_IN_PROGRESS); } if (BUFFER_IS_BROKEN(buf2)) @@ -510,7 +509,7 @@ BufferAlloc(Relation reln, if (!(*foundPtr)) StartBufferIO(buf2, true); - SpinRelease(BufMgrLock); + LWLockRelease(BufMgrLock); return buf2; } @@ -534,7 +533,7 @@ BufferAlloc(Relation reln, if (!BufTableDelete(buf)) { - SpinRelease(BufMgrLock); + LWLockRelease(BufMgrLock); elog(FATAL, "buffer wasn't in the buffer table"); } @@ -542,7 +541,7 @@ BufferAlloc(Relation reln, if (!BufTableInsert(buf)) { - SpinRelease(BufMgrLock); + LWLockRelease(BufMgrLock); elog(FATAL, "Buffer in lookup table twice"); } @@ -561,7 +560,7 @@ BufferAlloc(Relation reln, _bm_trace((reln->rd_rel->relisshared ? 0 : MyDatabaseId), RelationGetRelid(reln), blockNum, BufferDescriptorGetBuffer(buf), BMT_ALLOCNOTFND); #endif /* BMTRACE */ - SpinRelease(BufMgrLock); + LWLockRelease(BufMgrLock); return buf; } @@ -595,13 +594,13 @@ WriteBuffer(Buffer buffer) SharedBufferChanged = true; - SpinAcquire(BufMgrLock); + LWLockAcquire(BufMgrLock, LW_EXCLUSIVE); Assert(bufHdr->refcount > 0); bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED); UnpinBuffer(bufHdr); - SpinRelease(BufMgrLock); + LWLockRelease(BufMgrLock); return TRUE; } @@ -625,12 +624,12 @@ WriteNoReleaseBuffer(Buffer buffer) SharedBufferChanged = true; - SpinAcquire(BufMgrLock); + LWLockAcquire(BufMgrLock, LW_EXCLUSIVE); Assert(bufHdr->refcount > 0); bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED); - SpinRelease(BufMgrLock); + LWLockRelease(BufMgrLock); return STATUS_OK; } @@ -639,10 +638,10 @@ WriteNoReleaseBuffer(Buffer buffer) #undef ReleaseAndReadBuffer /* * ReleaseAndReadBuffer -- combine ReleaseBuffer() and ReadBuffer() - * to save a spinlock release/acquire. + * to save a lock release/acquire. * * Also, if the passed buffer is valid and already contains the desired block - * number, we simply return it without ever acquiring the spinlock at all. + * number, we simply return it without ever acquiring the lock at all. * Since the passed buffer must be pinned, it's OK to examine its block * number without getting the lock first. * @@ -652,7 +651,7 @@ WriteNoReleaseBuffer(Buffer buffer) * * Also note: while it will work to call this routine with blockNum == P_NEW, * it's best to avoid doing so, since that would result in calling - * smgrnblocks() while holding the bufmgr spinlock, hence some loss of + * smgrnblocks() while holding the bufmgr lock, hence some loss of * concurrency. */ Buffer @@ -684,7 +683,7 @@ ReleaseAndReadBuffer(Buffer buffer, PrivateRefCount[buffer - 1]--; else { - SpinAcquire(BufMgrLock); + LWLockAcquire(BufMgrLock, LW_EXCLUSIVE); UnpinBuffer(bufHdr); return ReadBufferInternal(relation, blockNum, true); } @@ -712,12 +711,11 @@ BufferSync() for (i = 0, bufHdr = BufferDescriptors; i < NBuffers; i++, bufHdr++) { - - SpinAcquire(BufMgrLock); + LWLockAcquire(BufMgrLock, LW_EXCLUSIVE); if (!(bufHdr->flags & BM_VALID)) { - SpinRelease(BufMgrLock); + LWLockRelease(BufMgrLock); continue; } @@ -731,7 +729,7 @@ BufferSync() */ if (!(bufHdr->flags & BM_DIRTY) && !(bufHdr->cntxDirty)) { - SpinRelease(BufMgrLock); + LWLockRelease(BufMgrLock); continue; } @@ -741,11 +739,11 @@ BufferSync() */ if (bufHdr->flags & BM_IO_IN_PROGRESS) { - WaitIO(bufHdr, BufMgrLock); + WaitIO(bufHdr); if (!(bufHdr->flags & BM_VALID) || (!(bufHdr->flags & BM_DIRTY) && !(bufHdr->cntxDirty))) { - SpinRelease(BufMgrLock); + LWLockRelease(BufMgrLock); continue; } } @@ -761,7 +759,7 @@ BufferSync() buffer = BufferDescriptorGetBuffer(bufHdr); rnode = bufHdr->tag.rnode; - SpinRelease(BufMgrLock); + LWLockRelease(BufMgrLock); /* * Try to find relation for buffer @@ -784,10 +782,10 @@ BufferSync() * should not be able to write it while we were busy with locking * and log flushing because of we setted IO flag. */ - SpinAcquire(BufMgrLock); + LWLockAcquire(BufMgrLock, LW_EXCLUSIVE); Assert(bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty); bufHdr->flags &= ~BM_JUST_DIRTIED; - SpinRelease(BufMgrLock); + LWLockRelease(BufMgrLock); if (reln == (Relation) NULL) { @@ -822,7 +820,7 @@ BufferSync() LockBuffer(buffer, BUFFER_LOCK_UNLOCK); BufferFlushCount++; - SpinAcquire(BufMgrLock); + LWLockAcquire(BufMgrLock, LW_EXCLUSIVE); bufHdr->flags &= ~BM_IO_IN_PROGRESS; /* mark IO finished */ TerminateBufferIO(bufHdr); /* Sync IO finished */ @@ -834,7 +832,7 @@ BufferSync() if (!(bufHdr->flags & BM_JUST_DIRTIED)) bufHdr->flags &= ~BM_DIRTY; UnpinBuffer(bufHdr); - SpinRelease(BufMgrLock); + LWLockRelease(BufMgrLock); /* drop refcnt obtained by RelationNodeCacheGetRelation */ if (reln != (Relation) NULL) @@ -846,24 +844,25 @@ BufferSync() /* * WaitIO -- Block until the IO_IN_PROGRESS flag on 'buf' is cleared. * - * Should be entered with buffer manager spinlock held; releases it before + * Should be entered with buffer manager lock held; releases it before * waiting and re-acquires it afterwards. */ static void -WaitIO(BufferDesc *buf, SPINLOCK spinlock) +WaitIO(BufferDesc *buf) { - /* * Changed to wait until there's no IO - Inoue 01/13/2000 + * + * Note this is *necessary* because an error abort in the process + * doing I/O could release the io_in_progress_lock prematurely. + * See AbortBufferIO. */ while ((buf->flags & BM_IO_IN_PROGRESS) != 0) { - SpinRelease(spinlock); - HOLD_INTERRUPTS(); /* don't want to die() holding the lock... */ - S_LOCK(&(buf->io_in_progress_lock)); - S_UNLOCK(&(buf->io_in_progress_lock)); - RESUME_INTERRUPTS(); - SpinAcquire(spinlock); + LWLockRelease(BufMgrLock); + LWLockAcquire(buf->io_in_progress_lock, LW_SHARED); + LWLockRelease(buf->io_in_progress_lock); + LWLockAcquire(BufMgrLock, LW_EXCLUSIVE); } } @@ -932,9 +931,9 @@ ResetBufferPool(bool isCommit) BufferDesc *buf = &BufferDescriptors[i]; PrivateRefCount[i] = 1; /* make sure we release shared pin */ - SpinAcquire(BufMgrLock); + LWLockAcquire(BufMgrLock, LW_EXCLUSIVE); UnpinBuffer(buf); - SpinRelease(BufMgrLock); + LWLockRelease(BufMgrLock); Assert(PrivateRefCount[i] == 0); } } @@ -1039,7 +1038,7 @@ BufferReplace(BufferDesc *bufHdr) /* To check if block content changed while flushing. - vadim 01/17/97 */ bufHdr->flags &= ~BM_JUST_DIRTIED; - SpinRelease(BufMgrLock); + LWLockRelease(BufMgrLock); /* * No need to lock buffer context - no one should be able to end @@ -1067,7 +1066,7 @@ BufferReplace(BufferDesc *bufHdr) if (reln != (Relation) NULL) RelationDecrementReferenceCount(reln); - SpinAcquire(BufMgrLock); + LWLockAcquire(BufMgrLock, LW_EXCLUSIVE); if (status == SM_FAIL) return FALSE; @@ -1140,7 +1139,8 @@ DropRelationBuffers(Relation rel) return; } - SpinAcquire(BufMgrLock); + LWLockAcquire(BufMgrLock, LW_EXCLUSIVE); + for (i = 1; i <= NBuffers; i++) { bufHdr = &BufferDescriptors[i - 1]; @@ -1155,7 +1155,7 @@ recheck: */ if (bufHdr->flags & BM_IO_IN_PROGRESS) { - WaitIO(bufHdr, BufMgrLock); + WaitIO(bufHdr); /* * By now, the buffer very possibly belongs to some other @@ -1189,7 +1189,7 @@ recheck: } } - SpinRelease(BufMgrLock); + LWLockRelease(BufMgrLock); } /* --------------------------------------------------------------------- @@ -1223,7 +1223,8 @@ DropRelFileNodeBuffers(RelFileNode rnode) } } - SpinAcquire(BufMgrLock); + LWLockAcquire(BufMgrLock, LW_EXCLUSIVE); + for (i = 1; i <= NBuffers; i++) { bufHdr = &BufferDescriptors[i - 1]; @@ -1238,7 +1239,7 @@ recheck: */ if (bufHdr->flags & BM_IO_IN_PROGRESS) { - WaitIO(bufHdr, BufMgrLock); + WaitIO(bufHdr); /* * By now, the buffer very possibly belongs to some other @@ -1272,7 +1273,7 @@ recheck: } } - SpinRelease(BufMgrLock); + LWLockRelease(BufMgrLock); } /* --------------------------------------------------------------------- @@ -1292,7 +1293,8 @@ DropBuffers(Oid dbid) int i; BufferDesc *bufHdr; - SpinAcquire(BufMgrLock); + LWLockAcquire(BufMgrLock, LW_EXCLUSIVE); + for (i = 1; i <= NBuffers; i++) { bufHdr = &BufferDescriptors[i - 1]; @@ -1313,7 +1315,7 @@ recheck: */ if (bufHdr->flags & BM_IO_IN_PROGRESS) { - WaitIO(bufHdr, BufMgrLock); + WaitIO(bufHdr); /* * By now, the buffer very possibly belongs to some other @@ -1337,7 +1339,8 @@ recheck: BufTableDelete(bufHdr); } } - SpinRelease(BufMgrLock); + + LWLockRelease(BufMgrLock); } /* ----------------------------------------------------------------- @@ -1355,7 +1358,7 @@ PrintBufferDescs() if (IsUnderPostmaster) { - SpinAcquire(BufMgrLock); + LWLockAcquire(BufMgrLock, LW_EXCLUSIVE); for (i = 0; i < NBuffers; ++i, ++buf) { elog(DEBUG, "[%02d] (freeNext=%d, freePrev=%d, rel=%u/%u, \ @@ -1365,7 +1368,7 @@ blockNum=%u, flags=0x%x, refcount=%d %ld)", buf->tag.blockNum, buf->flags, buf->refcount, PrivateRefCount[i]); } - SpinRelease(BufMgrLock); + LWLockRelease(BufMgrLock); } else { @@ -1386,7 +1389,7 @@ PrintPinnedBufs() int i; BufferDesc *buf = BufferDescriptors; - SpinAcquire(BufMgrLock); + LWLockAcquire(BufMgrLock, LW_EXCLUSIVE); for (i = 0; i < NBuffers; ++i, ++buf) { if (PrivateRefCount[i] > 0) @@ -1397,7 +1400,7 @@ blockNum=%u, flags=0x%x, refcount=%d %ld)", buf->tag.blockNum, buf->flags, buf->refcount, PrivateRefCount[i]); } - SpinRelease(BufMgrLock); + LWLockRelease(BufMgrLock); } /* @@ -1514,7 +1517,8 @@ FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock) return 0; } - SpinAcquire(BufMgrLock); + LWLockAcquire(BufMgrLock, LW_EXCLUSIVE); + for (i = 0; i < NBuffers; i++) { bufHdr = &BufferDescriptors[i]; @@ -1524,8 +1528,8 @@ FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock) { PinBuffer(bufHdr); if (bufHdr->flags & BM_IO_IN_PROGRESS) - WaitIO(bufHdr, BufMgrLock); - SpinRelease(BufMgrLock); + WaitIO(bufHdr); + LWLockRelease(BufMgrLock); /* * Force XLOG flush for buffer' LSN @@ -1537,16 +1541,16 @@ FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock) * Now it's safe to write buffer to disk */ - SpinAcquire(BufMgrLock); + LWLockAcquire(BufMgrLock, LW_EXCLUSIVE); if (bufHdr->flags & BM_IO_IN_PROGRESS) - WaitIO(bufHdr, BufMgrLock); + WaitIO(bufHdr); if (bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty) { bufHdr->flags &= ~BM_JUST_DIRTIED; StartBufferIO(bufHdr, false); /* output IO start */ - SpinRelease(BufMgrLock); + LWLockRelease(BufMgrLock); status = smgrwrite(DEFAULT_SMGR, rel, bufHdr->tag.blockNum, @@ -1560,7 +1564,7 @@ FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock) BufferFlushCount++; - SpinAcquire(BufMgrLock); + LWLockAcquire(BufMgrLock, LW_EXCLUSIVE); bufHdr->flags &= ~BM_IO_IN_PROGRESS; TerminateBufferIO(bufHdr); Assert(!(bufHdr->flags & BM_JUST_DIRTIED)); @@ -1578,7 +1582,7 @@ FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock) } if (!(bufHdr->flags & BM_FREE)) { - SpinRelease(BufMgrLock); + LWLockRelease(BufMgrLock); elog(NOTICE, "FlushRelationBuffers(%s, %u): block %u is referenced (private %ld, global %d)", RelationGetRelationName(rel), firstDelBlock, bufHdr->tag.blockNum, @@ -1589,7 +1593,7 @@ FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock) BufTableDelete(bufHdr); } } - SpinRelease(BufMgrLock); + LWLockRelease(BufMgrLock); return 0; } @@ -1621,9 +1625,9 @@ ReleaseBuffer(Buffer buffer) PrivateRefCount[buffer - 1]--; else { - SpinAcquire(BufMgrLock); + LWLockAcquire(BufMgrLock, LW_EXCLUSIVE); UnpinBuffer(bufHdr); - SpinRelease(BufMgrLock); + LWLockRelease(BufMgrLock); } return STATUS_OK; @@ -1919,13 +1923,18 @@ SetBufferCommitInfoNeedsSave(Buffer buffer) if ((bufHdr->flags & (BM_DIRTY | BM_JUST_DIRTIED)) != (BM_DIRTY | BM_JUST_DIRTIED)) { - SpinAcquire(BufMgrLock); + LWLockAcquire(BufMgrLock, LW_EXCLUSIVE); Assert(bufHdr->refcount > 0); bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED); - SpinRelease(BufMgrLock); + LWLockRelease(BufMgrLock); } } +/* + * Release buffer context locks for shared buffers. + * + * Used to clean up after errors. + */ void UnlockBuffers(void) { @@ -1942,36 +1951,15 @@ UnlockBuffers(void) Assert(BufferIsValid(i + 1)); buf = &(BufferDescriptors[i]); - HOLD_INTERRUPTS(); /* don't want to die() holding the lock... */ - - S_LOCK(&(buf->cntx_lock)); - - if (buflocks & BL_R_LOCK) - { - Assert(buf->r_locks > 0); - (buf->r_locks)--; - } - if (buflocks & BL_RI_LOCK) - { - /* - * Someone else could remove our RI lock when acquiring W - * lock. This is possible if we came here from elog(ERROR) - * from IpcSemaphore{Lock|Unlock}(WaitCLSemId). And so we - * don't do Assert(buf->ri_lock) here. - */ - buf->ri_lock = false; - } - if (buflocks & BL_W_LOCK) - { - Assert(buf->w_lock); - buf->w_lock = false; - } + HOLD_INTERRUPTS(); /* don't want to die() partway through... */ - S_UNLOCK(&(buf->cntx_lock)); + /* + * The buffer's cntx_lock has already been released by lwlock.c. + */ if (buflocks & BL_PIN_COUNT_LOCK) { - SpinAcquire(BufMgrLock); + LWLockAcquire(BufMgrLock, LW_EXCLUSIVE); /* * Don't complain if flag bit not set; it could have been reset * but we got a cancel/die interrupt before getting the signal. @@ -1979,7 +1967,7 @@ UnlockBuffers(void) if ((buf->flags & BM_PIN_COUNT_WAITER) != 0 && buf->wait_backend_id == MyBackendId) buf->flags &= ~BM_PIN_COUNT_WAITER; - SpinRelease(BufMgrLock); + LWLockRelease(BufMgrLock); ProcCancelWaitForSignal(); } @@ -1989,94 +1977,31 @@ UnlockBuffers(void) } } -/* Max time to wait to acquire a buffer read or write lock */ -#define BUFFER_LOCK_TIMEOUT (10*60*1000000) /* 10 minutes */ - +/* + * Acquire or release the cntx_lock for the buffer. + */ void LockBuffer(Buffer buffer, int mode) { BufferDesc *buf; - bits8 *buflock; Assert(BufferIsValid(buffer)); if (BufferIsLocal(buffer)) return; buf = &(BufferDescriptors[buffer - 1]); - buflock = &(BufferLocks[buffer - 1]); - - HOLD_INTERRUPTS(); /* don't want to die() holding the lock... */ - - S_LOCK(&(buf->cntx_lock)); if (mode == BUFFER_LOCK_UNLOCK) { - if (*buflock & BL_R_LOCK) - { - Assert(buf->r_locks > 0); - Assert(!(buf->w_lock)); - Assert(!(*buflock & (BL_W_LOCK | BL_RI_LOCK))); - (buf->r_locks)--; - *buflock &= ~BL_R_LOCK; - } - else if (*buflock & BL_W_LOCK) - { - Assert(buf->w_lock); - Assert(buf->r_locks == 0); - Assert(!(*buflock & (BL_R_LOCK | BL_RI_LOCK))); - buf->w_lock = false; - *buflock &= ~BL_W_LOCK; - } - else - { - S_UNLOCK(&(buf->cntx_lock)); - RESUME_INTERRUPTS(); - elog(ERROR, "UNLockBuffer: buffer %d is not locked", buffer); - } + LWLockRelease(buf->cntx_lock); } else if (mode == BUFFER_LOCK_SHARE) { - unsigned i = 0; - - Assert(!(*buflock & (BL_R_LOCK | BL_W_LOCK | BL_RI_LOCK))); - while (buf->ri_lock || buf->w_lock) - { - S_UNLOCK(&(buf->cntx_lock)); - RESUME_INTERRUPTS(); - S_LOCK_SLEEP(&(buf->cntx_lock), i++, BUFFER_LOCK_TIMEOUT); - HOLD_INTERRUPTS(); - S_LOCK(&(buf->cntx_lock)); - } - (buf->r_locks)++; - *buflock |= BL_R_LOCK; + LWLockAcquire(buf->cntx_lock, LW_SHARED); } else if (mode == BUFFER_LOCK_EXCLUSIVE) { - unsigned i = 0; - - Assert(!(*buflock & (BL_R_LOCK | BL_W_LOCK | BL_RI_LOCK))); - while (buf->r_locks > 0 || buf->w_lock) - { - if (buf->r_locks > 3 || (*buflock & BL_RI_LOCK)) - { - - /* - * Our RI lock might be removed by concurrent W lock - * acquiring (see what we do with RI locks below when our - * own W acquiring succeeded) and so we set RI lock again - * if we already did this. - */ - *buflock |= BL_RI_LOCK; - buf->ri_lock = true; - } - S_UNLOCK(&(buf->cntx_lock)); - RESUME_INTERRUPTS(); - S_LOCK_SLEEP(&(buf->cntx_lock), i++, BUFFER_LOCK_TIMEOUT); - HOLD_INTERRUPTS(); - S_LOCK(&(buf->cntx_lock)); - } - buf->w_lock = true; - *buflock |= BL_W_LOCK; + LWLockAcquire(buf->cntx_lock, LW_EXCLUSIVE); /* * This is not the best place to set cntxDirty flag (eg indices do @@ -2085,27 +2010,11 @@ LockBuffer(Buffer buffer, int mode) * changes with XLogInsert() - see comments in BufferSync(). */ buf->cntxDirty = true; - - if (*buflock & BL_RI_LOCK) - { - - /* - * It's possible to remove RI locks acquired by another W - * lockers here, but they'll take care about it. - */ - buf->ri_lock = false; - *buflock &= ~BL_RI_LOCK; - } } else { - S_UNLOCK(&(buf->cntx_lock)); - RESUME_INTERRUPTS(); elog(ERROR, "LockBuffer: unknown lock mode %d", mode); } - - S_UNLOCK(&(buf->cntx_lock)); - RESUME_INTERRUPTS(); } /* @@ -2152,25 +2061,25 @@ LockBufferForCleanup(Buffer buffer) { /* Try to acquire lock */ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); - SpinAcquire(BufMgrLock); + LWLockAcquire(BufMgrLock, LW_EXCLUSIVE); Assert(bufHdr->refcount > 0); if (bufHdr->refcount == 1) { /* Successfully acquired exclusive lock with pincount 1 */ - SpinRelease(BufMgrLock); + LWLockRelease(BufMgrLock); return; } /* Failed, so mark myself as waiting for pincount 1 */ if (bufHdr->flags & BM_PIN_COUNT_WAITER) { - SpinRelease(BufMgrLock); + LWLockRelease(BufMgrLock); LockBuffer(buffer, BUFFER_LOCK_UNLOCK); elog(ERROR, "Multiple backends attempting to wait for pincount 1"); } bufHdr->wait_backend_id = MyBackendId; bufHdr->flags |= BM_PIN_COUNT_WAITER; *buflock |= BL_PIN_COUNT_LOCK; - SpinRelease(BufMgrLock); + LWLockRelease(BufMgrLock); LockBuffer(buffer, BUFFER_LOCK_UNLOCK); /* Wait to be signaled by UnpinBuffer() */ ProcWaitForSignal(); @@ -2183,8 +2092,7 @@ LockBufferForCleanup(Buffer buffer) * Functions for IO error handling * * Note : We assume that nested buffer IO never occur. - * i.e at most one io_in_progress spinlock is held - * per proc. + * i.e at most one io_in_progress lock is held per proc. */ static BufferDesc *InProgressBuf = (BufferDesc *) NULL; static bool IsForInput; @@ -2207,18 +2115,7 @@ StartBufferIO(BufferDesc *buf, bool forInput) Assert(!(buf->flags & BM_IO_IN_PROGRESS)); buf->flags |= BM_IO_IN_PROGRESS; - /* - * There used to be - * - * Assert(S_LOCK_FREE(&(buf->io_in_progress_lock))); - * - * here, but that's wrong because of the way WaitIO works: someone else - * waiting for the I/O to complete will succeed in grabbing the lock - * for a few instructions, and if we context-swap back to here the - * Assert could fail. Tiny window for failure, but I've seen it - * happen -- tgl - */ - S_LOCK(&(buf->io_in_progress_lock)); + LWLockAcquire(buf->io_in_progress_lock, LW_EXCLUSIVE); InProgressBuf = buf; IsForInput = forInput; @@ -2238,7 +2135,7 @@ static void TerminateBufferIO(BufferDesc *buf) { Assert(buf == InProgressBuf); - S_UNLOCK(&(buf->io_in_progress_lock)); + LWLockRelease(buf->io_in_progress_lock); InProgressBuf = (BufferDesc *) 0; } @@ -2271,7 +2168,6 @@ InitBufferIO(void) /* * Clean up any active buffer I/O after an error. - * This function is called from ProcReleaseSpins(). * BufMgrLock isn't held when this function is called. * * If I/O was in progress, we always set BM_IO_ERROR. @@ -2283,7 +2179,16 @@ AbortBufferIO(void) if (buf) { - SpinAcquire(BufMgrLock); + /* + * Since LWLockReleaseAll has already been called, + * we're not holding the buffer's io_in_progress_lock. + * We have to re-acquire it so that we can use TerminateBufferIO. + * Anyone who's executing WaitIO on the buffer will be in a busy spin + * until we succeed in doing this. + */ + LWLockAcquire(buf->io_in_progress_lock, LW_EXCLUSIVE); + + LWLockAcquire(BufMgrLock, LW_EXCLUSIVE); Assert(buf->flags & BM_IO_IN_PROGRESS); if (IsForInput) Assert(!(buf->flags & BM_DIRTY) && !(buf->cntxDirty)); @@ -2302,7 +2207,7 @@ AbortBufferIO(void) buf->flags |= BM_IO_ERROR; buf->flags &= ~BM_IO_IN_PROGRESS; TerminateBufferIO(buf); - SpinRelease(BufMgrLock); + LWLockRelease(BufMgrLock); } } diff --git a/src/backend/storage/buffer/freelist.c b/src/backend/storage/buffer/freelist.c index 84eded7950..f8ac2e287c 100644 --- a/src/backend/storage/buffer/freelist.c +++ b/src/backend/storage/buffer/freelist.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/buffer/freelist.c,v 1.24 2001/07/06 21:04:26 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/buffer/freelist.c,v 1.25 2001/09/29 04:02:23 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -29,6 +29,7 @@ #include "storage/buf_internals.h" #include "storage/bufmgr.h" +#include "storage/ipc.h" #include "storage/proc.h" diff --git a/src/backend/storage/freespace/freespace.c b/src/backend/storage/freespace/freespace.c index fec9f594a6..b20e808615 100644 --- a/src/backend/storage/freespace/freespace.c +++ b/src/backend/storage/freespace/freespace.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/freespace/freespace.c,v 1.4 2001/07/19 21:25:37 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/freespace/freespace.c,v 1.5 2001/09/29 04:02:23 tgl Exp $ * * * NOTES: @@ -56,6 +56,7 @@ #include "storage/freespace.h" #include "storage/itemid.h" +#include "storage/lwlock.h" #include "storage/shmem.h" @@ -122,9 +123,6 @@ struct FSMChunk }; -SPINLOCK FreeSpaceLock; /* in Shmem or created in - * CreateSpinlocks() */ - int MaxFSMRelations; /* these are set by guc.c */ int MaxFSMPages; @@ -256,7 +254,7 @@ GetPageWithFreeSpace(RelFileNode *rel, Size spaceNeeded) FSMRelation *fsmrel; BlockNumber freepage; - SpinAcquire(FreeSpaceLock); + LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE); /* * We always add a rel to the hashtable when it is inquired about. */ @@ -279,7 +277,7 @@ GetPageWithFreeSpace(RelFileNode *rel, Size spaceNeeded) fsmrel->threshold = (Size) cur_avg; } freepage = find_free_space(fsmrel, spaceNeeded); - SpinRelease(FreeSpaceLock); + LWLockRelease(FreeSpaceLock); return freepage; } @@ -299,7 +297,7 @@ RecordFreeSpace(RelFileNode *rel, BlockNumber page, Size spaceAvail) /* Sanity check: ensure spaceAvail will fit into ItemLength */ AssertArg(spaceAvail < BLCKSZ); - SpinAcquire(FreeSpaceLock); + LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE); /* * We choose not to add rels to the hashtable unless they've been * inquired about with GetPageWithFreeSpace. Also, a Record operation @@ -308,11 +306,11 @@ RecordFreeSpace(RelFileNode *rel, BlockNumber page, Size spaceAvail) fsmrel = lookup_fsm_rel(rel); if (fsmrel) fsm_record_free_space(fsmrel, page, spaceAvail); - SpinRelease(FreeSpaceLock); + LWLockRelease(FreeSpaceLock); } /* - * RecordAndGetPageWithFreeSpace - combo form to save one spinlock and + * RecordAndGetPageWithFreeSpace - combo form to save one lock and * hash table lookup cycle. */ BlockNumber @@ -327,7 +325,7 @@ RecordAndGetPageWithFreeSpace(RelFileNode *rel, /* Sanity check: ensure spaceAvail will fit into ItemLength */ AssertArg(oldSpaceAvail < BLCKSZ); - SpinAcquire(FreeSpaceLock); + LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE); /* * We always add a rel to the hashtable when it is inquired about. */ @@ -351,7 +349,7 @@ RecordAndGetPageWithFreeSpace(RelFileNode *rel, fsm_record_free_space(fsmrel, oldPage, oldSpaceAvail); /* Do the Get */ freepage = find_free_space(fsmrel, spaceNeeded); - SpinRelease(FreeSpaceLock); + LWLockRelease(FreeSpaceLock); return freepage; } @@ -378,7 +376,7 @@ MultiRecordFreeSpace(RelFileNode *rel, FSMRelation *fsmrel; int i; - SpinAcquire(FreeSpaceLock); + LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE); fsmrel = lookup_fsm_rel(rel); if (fsmrel) { @@ -437,7 +435,7 @@ MultiRecordFreeSpace(RelFileNode *rel, fsm_record_free_space(fsmrel, page, avail); } } - SpinRelease(FreeSpaceLock); + LWLockRelease(FreeSpaceLock); } /* @@ -452,11 +450,11 @@ FreeSpaceMapForgetRel(RelFileNode *rel) { FSMRelation *fsmrel; - SpinAcquire(FreeSpaceLock); + LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE); fsmrel = lookup_fsm_rel(rel); if (fsmrel) delete_fsm_rel(fsmrel); - SpinRelease(FreeSpaceLock); + LWLockRelease(FreeSpaceLock); } /* @@ -474,14 +472,14 @@ FreeSpaceMapForgetDatabase(Oid dbid) FSMRelation *fsmrel, *nextrel; - SpinAcquire(FreeSpaceLock); + LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE); for (fsmrel = FreeSpaceMap->relList; fsmrel; fsmrel = nextrel) { nextrel = fsmrel->nextRel; /* in case we delete it */ if (fsmrel->key.tblNode == dbid) delete_fsm_rel(fsmrel); } - SpinRelease(FreeSpaceLock); + LWLockRelease(FreeSpaceLock); } diff --git a/src/backend/storage/ipc/ipc.c b/src/backend/storage/ipc/ipc.c index b5871a5e09..9d2b373a58 100644 --- a/src/backend/storage/ipc/ipc.c +++ b/src/backend/storage/ipc/ipc.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/ipc/ipc.c,v 1.68 2001/09/04 00:22:34 petere Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/ipc/ipc.c,v 1.69 2001/09/29 04:02:23 tgl Exp $ * * NOTES * @@ -34,7 +34,6 @@ #include #include "storage/ipc.h" -#include "storage/s_lock.h" /* In Ultrix, sem.h and shm.h must be included AFTER ipc.h */ #ifdef HAVE_SYS_SEM_H #include @@ -306,7 +305,7 @@ InternalIpcSemaphoreCreate(IpcSemaphoreKey semKey, if (errno == ENOSPC) fprintf(stderr, "\nThis error does *not* mean that you have run out of disk space.\n\n" - "It occurs either because system limit for the maximum number of\n" + "It occurs because either the system limit for the maximum number of\n" "semaphore sets (SEMMNI), or the system wide maximum number of\n" "semaphores (SEMMNS), would be exceeded. You need to raise the\n" "respective kernel parameter. Look into the PostgreSQL documentation\n" @@ -416,8 +415,8 @@ IpcSemaphoreLock(IpcSemaphoreId semId, int sem, bool interruptOK) * record acquiring the lock. (This is currently true for lockmanager * locks, since the process that granted us the lock did all the * necessary state updates. It's not true for SysV semaphores used to - * emulate spinlocks --- but our performance on such platforms is so - * horrible anyway that I'm not going to worry too much about it.) + * implement LW locks or emulate spinlocks --- but the wait time for + * such locks should not be very long, anyway.) */ do { diff --git a/src/backend/storage/ipc/ipci.c b/src/backend/storage/ipc/ipci.c index 06988baf34..7dac93f3a0 100644 --- a/src/backend/storage/ipc/ipci.c +++ b/src/backend/storage/ipc/ipci.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/ipc/ipci.c,v 1.42 2001/08/25 18:52:42 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/ipc/ipci.c,v 1.43 2001/09/29 04:02:23 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -22,6 +22,7 @@ #include "storage/bufmgr.h" #include "storage/freespace.h" #include "storage/lmgr.h" +#include "storage/lwlock.h" #include "storage/proc.h" #include "storage/sinval.h" #include "storage/spin.h" @@ -53,7 +54,7 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int maxBackends) size += LockShmemSize(maxBackends); size += XLOGShmemSize(); size += CLOGShmemSize(); - size += SLockShmemSize(); + size += LWLockShmemSize(); size += SInvalShmemSize(maxBackends); size += FreeSpaceShmemSize(); #ifdef STABLE_MEMORY_STORAGE @@ -74,13 +75,24 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int maxBackends) /* * First initialize spinlocks --- needed by InitShmemAllocation() */ - CreateSpinlocks(seghdr); + CreateSpinlocks(); /* - * Set up shmem.c hashtable + * Set up shared memory allocation mechanism */ InitShmemAllocation(seghdr); + /* + * Now initialize LWLocks, which do shared memory allocation and + * are needed for InitShmemIndex. + */ + CreateLWLocks(); + + /* + * Set up shmem.c index hashtable + */ + InitShmemIndex(); + /* * Set up xlog, clog, and buffers */ diff --git a/src/backend/storage/ipc/shmem.c b/src/backend/storage/ipc/shmem.c index dd86609875..0ad168680a 100644 --- a/src/backend/storage/ipc/shmem.c +++ b/src/backend/storage/ipc/shmem.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/ipc/shmem.c,v 1.58 2001/09/07 00:27:29 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/ipc/shmem.c,v 1.59 2001/09/29 04:02:23 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -61,8 +61,10 @@ #include "postgres.h" #include "access/transam.h" +#include "storage/spin.h" #include "utils/tqual.h" + /* shared memory global variables */ static PGShmemHeader *ShmemSegHdr; /* shared mem segment header */ @@ -71,9 +73,7 @@ SHMEM_OFFSET ShmemBase; /* start address of shared memory */ static SHMEM_OFFSET ShmemEnd; /* end+1 address of shared memory */ -SPINLOCK ShmemLock; /* lock for shared memory allocation */ - -SPINLOCK ShmemIndexLock; /* lock for shmem index access */ +static slock_t *ShmemLock; /* spinlock for shared memory allocation */ static HTAB *ShmemIndex = NULL; /* primary index hashtable for shmem */ @@ -81,63 +81,33 @@ static bool ShmemBootstrap = false; /* bootstrapping shmem index? */ /* - * InitShmemAllocation() --- set up shared-memory allocation and index table. + * InitShmemAllocation() --- set up shared-memory allocation. + * + * Note: the argument should be declared "PGShmemHeader *seghdr", + * but we use void to avoid having to include ipc.h in shmem.h. */ void -InitShmemAllocation(PGShmemHeader *seghdr) +InitShmemAllocation(void *seghdr) { - HASHCTL info; - int hash_flags; - ShmemIndexEnt *result, - item; - bool found; + PGShmemHeader *shmhdr = (PGShmemHeader *) seghdr; /* Set up basic pointers to shared memory */ - ShmemSegHdr = seghdr; - ShmemBase = (SHMEM_OFFSET) seghdr; - ShmemEnd = ShmemBase + seghdr->totalsize; - - /* - * Since ShmemInitHash calls ShmemInitStruct, which expects the - * ShmemIndex hashtable to exist already, we have a bit of a - * circularity problem in initializing the ShmemIndex itself. We set - * ShmemBootstrap to tell ShmemInitStruct to fake it. - */ - ShmemIndex = (HTAB *) NULL; - ShmemBootstrap = true; - - /* create the shared memory shmem index */ - info.keysize = SHMEM_INDEX_KEYSIZE; - info.datasize = SHMEM_INDEX_DATASIZE; - hash_flags = HASH_ELEM; - - /* This will acquire the shmem index lock, but not release it. */ - ShmemIndex = ShmemInitHash("ShmemIndex", - SHMEM_INDEX_SIZE, SHMEM_INDEX_SIZE, - &info, hash_flags); - if (!ShmemIndex) - elog(FATAL, "InitShmemAllocation: couldn't initialize Shmem Index"); + ShmemSegHdr = shmhdr; + ShmemBase = (SHMEM_OFFSET) shmhdr; + ShmemEnd = ShmemBase + shmhdr->totalsize; /* - * Now, create an entry in the hashtable for the index itself. + * Initialize the spinlock used by ShmemAlloc. We have to do the + * space allocation the hard way, since ShmemAlloc can't be called yet. */ - MemSet(item.key, 0, SHMEM_INDEX_KEYSIZE); - strncpy(item.key, "ShmemIndex", SHMEM_INDEX_KEYSIZE); + ShmemLock = (slock_t *) (((char *) shmhdr) + shmhdr->freeoffset); + shmhdr->freeoffset += MAXALIGN(sizeof(slock_t)); + Assert(shmhdr->freeoffset <= shmhdr->totalsize); - result = (ShmemIndexEnt *) - hash_search(ShmemIndex, (char *) &item, HASH_ENTER, &found); - if (!result) - elog(FATAL, "InitShmemAllocation: corrupted shmem index"); + SpinLockInit(ShmemLock); - Assert(ShmemBootstrap && !found); - - result->location = MAKE_OFFSET(ShmemIndex->hctl); - result->size = SHMEM_INDEX_SIZE; - - ShmemBootstrap = false; - - /* now release the lock acquired in ShmemInitStruct */ - SpinRelease(ShmemIndexLock); + /* ShmemIndex can't be set up yet (need LWLocks first) */ + ShmemIndex = (HTAB *) NULL; /* * Initialize ShmemVariableCache for transaction manager. @@ -167,9 +137,9 @@ ShmemAlloc(Size size) */ size = MAXALIGN(size); - Assert(ShmemSegHdr); + Assert(ShmemSegHdr != NULL); - SpinAcquire(ShmemLock); + SpinLockAcquire(ShmemLock); newFree = ShmemSegHdr->freeoffset + size; if (newFree <= ShmemSegHdr->totalsize) @@ -180,7 +150,7 @@ ShmemAlloc(Size size) else newSpace = NULL; - SpinRelease(ShmemLock); + SpinLockRelease(ShmemLock); if (!newSpace) elog(NOTICE, "ShmemAlloc: out of memory"); @@ -199,6 +169,60 @@ ShmemIsValid(unsigned long addr) return (addr < ShmemEnd) && (addr >= ShmemBase); } +/* + * InitShmemIndex() --- set up shmem index table. + */ +void +InitShmemIndex(void) +{ + HASHCTL info; + int hash_flags; + ShmemIndexEnt *result, + item; + bool found; + + /* + * Since ShmemInitHash calls ShmemInitStruct, which expects the + * ShmemIndex hashtable to exist already, we have a bit of a + * circularity problem in initializing the ShmemIndex itself. We set + * ShmemBootstrap to tell ShmemInitStruct to fake it. + */ + ShmemBootstrap = true; + + /* create the shared memory shmem index */ + info.keysize = SHMEM_INDEX_KEYSIZE; + info.datasize = SHMEM_INDEX_DATASIZE; + hash_flags = HASH_ELEM; + + /* This will acquire the shmem index lock, but not release it. */ + ShmemIndex = ShmemInitHash("ShmemIndex", + SHMEM_INDEX_SIZE, SHMEM_INDEX_SIZE, + &info, hash_flags); + if (!ShmemIndex) + elog(FATAL, "InitShmemIndex: couldn't initialize Shmem Index"); + + /* + * Now, create an entry in the hashtable for the index itself. + */ + MemSet(item.key, 0, SHMEM_INDEX_KEYSIZE); + strncpy(item.key, "ShmemIndex", SHMEM_INDEX_KEYSIZE); + + result = (ShmemIndexEnt *) + hash_search(ShmemIndex, (char *) &item, HASH_ENTER, &found); + if (!result) + elog(FATAL, "InitShmemIndex: corrupted shmem index"); + + Assert(ShmemBootstrap && !found); + + result->location = MAKE_OFFSET(ShmemIndex->hctl); + result->size = SHMEM_INDEX_SIZE; + + ShmemBootstrap = false; + + /* now release the lock acquired in ShmemInitStruct */ + LWLockRelease(ShmemIndexLock); +} + /* * ShmemInitHash -- Create/Attach to and initialize * shared memory hash table. @@ -207,8 +231,7 @@ ShmemIsValid(unsigned long addr) * * assume caller is doing some kind of synchronization * so that two people dont try to create/initialize the - * table at once. Use SpinAlloc() to create a spinlock - * for the structure before creating the structure itself. + * table at once. */ HTAB * ShmemInitHash(char *name, /* table string name for shmem index */ @@ -283,7 +306,7 @@ ShmemInitStruct(char *name, Size size, bool *foundPtr) strncpy(item.key, name, SHMEM_INDEX_KEYSIZE); item.location = BAD_LOCATION; - SpinAcquire(ShmemIndexLock); + LWLockAcquire(ShmemIndexLock, LW_EXCLUSIVE); if (!ShmemIndex) { @@ -306,7 +329,7 @@ ShmemInitStruct(char *name, Size size, bool *foundPtr) if (!result) { - SpinRelease(ShmemIndexLock); + LWLockRelease(ShmemIndexLock); elog(ERROR, "ShmemInitStruct: Shmem Index corrupted"); return NULL; } @@ -320,7 +343,7 @@ ShmemInitStruct(char *name, Size size, bool *foundPtr) */ if (result->size != size) { - SpinRelease(ShmemIndexLock); + LWLockRelease(ShmemIndexLock); elog(NOTICE, "ShmemInitStruct: ShmemIndex entry size is wrong"); /* let caller print its message too */ @@ -337,7 +360,7 @@ ShmemInitStruct(char *name, Size size, bool *foundPtr) /* out of memory */ Assert(ShmemIndex); hash_search(ShmemIndex, (char *) &item, HASH_REMOVE, foundPtr); - SpinRelease(ShmemIndexLock); + LWLockRelease(ShmemIndexLock); *foundPtr = FALSE; elog(NOTICE, "ShmemInitStruct: cannot allocate '%s'", @@ -349,6 +372,6 @@ ShmemInitStruct(char *name, Size size, bool *foundPtr) } Assert(ShmemIsValid((unsigned long) structPtr)); - SpinRelease(ShmemIndexLock); + LWLockRelease(ShmemIndexLock); return structPtr; } diff --git a/src/backend/storage/ipc/sinval.c b/src/backend/storage/ipc/sinval.c index 1d43b1ead4..24506b9729 100644 --- a/src/backend/storage/ipc/sinval.c +++ b/src/backend/storage/ipc/sinval.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/ipc/sinval.c,v 1.40 2001/08/26 16:56:00 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/ipc/sinval.c,v 1.41 2001/09/29 04:02:24 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -23,8 +23,6 @@ #include "miscadmin.h" -SPINLOCK SInvalLock = (SPINLOCK) NULL; - /****************************************************************************/ /* CreateSharedInvalidationState() Initialize SI buffer */ /* */ @@ -33,7 +31,7 @@ SPINLOCK SInvalLock = (SPINLOCK) NULL; void CreateSharedInvalidationState(int maxBackends) { - /* SInvalLock must be initialized already, during spinlock init */ + /* SInvalLock must be initialized already, during LWLock init */ SIBufferInit(maxBackends); } @@ -46,9 +44,9 @@ InitBackendSharedInvalidationState(void) { int flag; - SpinAcquire(SInvalLock); + LWLockAcquire(SInvalLock, LW_EXCLUSIVE); flag = SIBackendInit(shmInvalBuffer); - SpinRelease(SInvalLock); + LWLockRelease(SInvalLock); if (flag < 0) /* unexpected problem */ elog(FATAL, "Backend cache invalidation initialization failed"); if (flag == 0) /* expected problem: MaxBackends exceeded */ @@ -64,9 +62,9 @@ SendSharedInvalidMessage(SharedInvalidationMessage *msg) { bool insertOK; - SpinAcquire(SInvalLock); + LWLockAcquire(SInvalLock, LW_EXCLUSIVE); insertOK = SIInsertDataEntry(shmInvalBuffer, msg); - SpinRelease(SInvalLock); + LWLockRelease(SInvalLock); if (!insertOK) elog(DEBUG, "SendSharedInvalidMessage: SI buffer overflow"); } @@ -86,9 +84,25 @@ ReceiveSharedInvalidMessages( for (;;) { - SpinAcquire(SInvalLock); + /* + * We can run SIGetDataEntry in parallel with other backends running + * SIGetDataEntry for themselves, since each instance will modify + * only fields of its own backend's ProcState, and no instance will + * look at fields of other backends' ProcStates. We express this + * by grabbing SInvalLock in shared mode. Note that this is not + * exactly the normal (read-only) interpretation of a shared lock! + * Look closely at the interactions before allowing SInvalLock to + * be grabbed in shared mode for any other reason! + * + * The routines later in this file that use shared mode are okay + * with this, because they aren't looking at the ProcState fields + * associated with SI message transfer; they only use the ProcState + * array as an easy way to find all the PROC structures. + */ + LWLockAcquire(SInvalLock, LW_SHARED); getResult = SIGetDataEntry(shmInvalBuffer, MyBackendId, &data); - SpinRelease(SInvalLock); + LWLockRelease(SInvalLock); + if (getResult == 0) break; /* nothing more to do */ if (getResult < 0) @@ -108,9 +122,9 @@ ReceiveSharedInvalidMessages( /* If we got any messages, try to release dead messages */ if (gotMessage) { - SpinAcquire(SInvalLock); + LWLockAcquire(SInvalLock, LW_EXCLUSIVE); SIDelExpiredDataEntries(shmInvalBuffer); - SpinRelease(SInvalLock); + LWLockRelease(SInvalLock); } } @@ -149,7 +163,7 @@ DatabaseHasActiveBackends(Oid databaseId, bool ignoreMyself) ProcState *stateP = segP->procState; int index; - SpinAcquire(SInvalLock); + LWLockAcquire(SInvalLock, LW_SHARED); for (index = 0; index < segP->lastBackend; index++) { @@ -170,7 +184,7 @@ DatabaseHasActiveBackends(Oid databaseId, bool ignoreMyself) } } - SpinRelease(SInvalLock); + LWLockRelease(SInvalLock); return result; } @@ -186,7 +200,7 @@ TransactionIdIsInProgress(TransactionId xid) ProcState *stateP = segP->procState; int index; - SpinAcquire(SInvalLock); + LWLockAcquire(SInvalLock, LW_SHARED); for (index = 0; index < segP->lastBackend; index++) { @@ -206,7 +220,7 @@ TransactionIdIsInProgress(TransactionId xid) } } - SpinRelease(SInvalLock); + LWLockRelease(SInvalLock); return result; } @@ -237,7 +251,7 @@ GetOldestXmin(bool allDbs) result = GetCurrentTransactionId(); - SpinAcquire(SInvalLock); + LWLockAcquire(SInvalLock, LW_SHARED); for (index = 0; index < segP->lastBackend; index++) { @@ -265,7 +279,7 @@ GetOldestXmin(bool allDbs) } } - SpinRelease(SInvalLock); + LWLockRelease(SInvalLock); return result; } @@ -298,7 +312,7 @@ GetSnapshotData(bool serializable) snapshot->xmin = GetCurrentTransactionId(); - SpinAcquire(SInvalLock); + LWLockAcquire(SInvalLock, LW_SHARED); /* * There can be no more than lastBackend active transactions, so this @@ -307,15 +321,12 @@ GetSnapshotData(bool serializable) snapshot->xip = (TransactionId *) malloc(segP->lastBackend * sizeof(TransactionId)); if (snapshot->xip == NULL) - { - SpinRelease(SInvalLock); elog(ERROR, "Memory exhausted in GetSnapshotData"); - } /*-------------------- * Unfortunately, we have to call ReadNewTransactionId() after acquiring * SInvalLock above. It's not good because ReadNewTransactionId() does - * SpinAcquire(XidGenLockId), but *necessary*. We need to be sure that + * LWLockAcquire(XidGenLock), but *necessary*. We need to be sure that * no transactions exit the set of currently-running transactions * between the time we fetch xmax and the time we finish building our * snapshot. Otherwise we could have a situation like this: @@ -373,7 +384,7 @@ GetSnapshotData(bool serializable) if (serializable) MyProc->xmin = snapshot->xmin; - SpinRelease(SInvalLock); + LWLockRelease(SInvalLock); /* Serializable snapshot must be computed before any other... */ Assert(TransactionIdIsValid(MyProc->xmin)); @@ -439,7 +450,7 @@ GetUndoRecPtr(void) XLogRecPtr tempr; int index; - SpinAcquire(SInvalLock); + LWLockAcquire(SInvalLock, LW_SHARED); for (index = 0; index < segP->lastBackend; index++) { @@ -458,7 +469,7 @@ GetUndoRecPtr(void) } } - SpinRelease(SInvalLock); + LWLockRelease(SInvalLock); return (urec); } @@ -470,7 +481,7 @@ GetUndoRecPtr(void) * knows that the backend isn't going to go away, so we do not bother with * locking. */ -struct proc * +struct PROC * BackendIdGetProc(BackendId procId) { SISeg *segP = shmInvalBuffer; diff --git a/src/backend/storage/ipc/sinvaladt.c b/src/backend/storage/ipc/sinvaladt.c index 3809619009..d05a651097 100644 --- a/src/backend/storage/ipc/sinvaladt.c +++ b/src/backend/storage/ipc/sinvaladt.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/ipc/sinvaladt.c,v 1.40 2001/06/19 19:42:15 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/ipc/sinvaladt.c,v 1.41 2001/09/29 04:02:24 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -83,7 +83,7 @@ SIBufferInit(int maxBackends) * <0 Some other failure (not currently used) * * NB: this routine, and all following ones, must be executed with the - * SInvalLock spinlock held, since there may be multiple backends trying + * SInvalLock lock held, since there may be multiple backends trying * to access the buffer. */ int @@ -152,7 +152,7 @@ CleanupInvalidationState(int status, Datum arg) Assert(PointerIsValid(segP)); - SpinAcquire(SInvalLock); + LWLockAcquire(SInvalLock, LW_EXCLUSIVE); /* Mark myself inactive */ segP->procState[MyBackendId - 1].nextMsgNum = -1; @@ -167,7 +167,7 @@ CleanupInvalidationState(int status, Datum arg) } segP->lastBackend = i; - SpinRelease(SInvalLock); + LWLockRelease(SInvalLock); } /* @@ -267,6 +267,10 @@ SISetProcStateInvalid(SISeg *segP) * 1: next SI message has been extracted into *data * (there may be more messages available after this one!) * -1: SI reset message extracted + * + * NB: this can run in parallel with other instances of SIGetDataEntry + * executing on behalf of other backends. See comments in sinval.c in + * ReceiveSharedInvalidMessages(). */ int SIGetDataEntry(SISeg *segP, int backendId, diff --git a/src/backend/storage/lmgr/Makefile b/src/backend/storage/lmgr/Makefile index bab933c842..f7471a7b00 100644 --- a/src/backend/storage/lmgr/Makefile +++ b/src/backend/storage/lmgr/Makefile @@ -4,7 +4,7 @@ # Makefile for storage/lmgr # # IDENTIFICATION -# $Header: /cvsroot/pgsql/src/backend/storage/lmgr/Makefile,v 1.16 2001/09/27 19:10:02 tgl Exp $ +# $Header: /cvsroot/pgsql/src/backend/storage/lmgr/Makefile,v 1.17 2001/09/29 04:02:24 tgl Exp $ # #------------------------------------------------------------------------- @@ -12,7 +12,7 @@ subdir = src/backend/storage/lmgr top_builddir = ../../../.. include $(top_builddir)/src/Makefile.global -OBJS = lmgr.o lock.o proc.o deadlock.o spin.o s_lock.o +OBJS = lmgr.o lock.o proc.o deadlock.o lwlock.o spin.o s_lock.o all: SUBSYS.o diff --git a/src/backend/storage/lmgr/README b/src/backend/storage/lmgr/README index cb8f56ca8a..80cc7ce060 100644 --- a/src/backend/storage/lmgr/README +++ b/src/backend/storage/lmgr/README @@ -1,4 +1,49 @@ -$Header: /cvsroot/pgsql/src/backend/storage/lmgr/README,v 1.8 2001/01/26 18:23:12 tgl Exp $ +$Header: /cvsroot/pgsql/src/backend/storage/lmgr/README,v 1.9 2001/09/29 04:02:24 tgl Exp $ + + +LOCKING OVERVIEW + +Postgres uses three types of interprocess locks: + +* Spinlocks. These are intended for *very* short-term locks. If a lock +is to be held more than a few dozen instructions, or across any sort of +kernel call (or even a call to a nontrivial subroutine), don't use a spinlock. +Spinlocks are primarily used as infrastructure for lightweight locks. +They are implemented using a hardware atomic-test-and-set instruction, +if available. Waiting processes busy-loop until they can get the lock. +There is no provision for deadlock detection, automatic release on error, +or any other nicety. There is a timeout if the lock cannot be gotten after +a minute or so (which is approximately forever in comparison to the intended +lock hold time, so this is certainly an error condition). + +* Lightweight locks (LWLocks). These locks are typically used to interlock +access to datastructures in shared memory. LWLocks support both exclusive +and shared lock modes (for read/write and read-only access to a shared object). +There is no provision for deadlock detection, but the LWLock manager will +automatically release held LWLocks during elog() recovery, so it is safe to +raise an error while holding LWLocks. Obtaining or releasing an LWLock is +quite fast (a few dozen instructions) when there is no contention for the +lock. When a process has to wait for an LWLock, it blocks on a SysV semaphore +so as to not consume CPU time. Waiting processes will be granted the lock +in arrival order. There is no timeout. + +* Regular locks (a/k/a heavyweight locks). The regular lock manager supports +a variety of lock modes with table-driven semantics, and it has full deadlock +detection and automatic release at transaction end. Regular locks should be +used for all user-driven lock requests. + +Acquisition of either a spinlock or a lightweight lock causes query cancel +and die() interrupts to be held off until all such locks are released. +No such restriction exists for regular locks, however. Also note that we +can accept query cancel and die() interrupts while waiting for a regular +lock, but we will not accept them while waiting for spinlocks or LW locks. +It is therefore not a good idea to use LW locks when the wait time might +exceed a few seconds. + +The rest of this README file discusses the regular lock manager in detail. + + +LOCK DATA STRUCTURES There are two fundamental lock structures: the per-lockable-object LOCK struct, and the per-lock-holder HOLDER struct. A LOCK object exists diff --git a/src/backend/storage/lmgr/deadlock.c b/src/backend/storage/lmgr/deadlock.c index 160fc64fb2..a69a078474 100644 --- a/src/backend/storage/lmgr/deadlock.c +++ b/src/backend/storage/lmgr/deadlock.c @@ -12,7 +12,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/lmgr/deadlock.c,v 1.3 2001/03/22 03:59:46 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/lmgr/deadlock.c,v 1.4 2001/09/29 04:02:24 tgl Exp $ * * Interface: * @@ -172,8 +172,8 @@ InitDeadLockChecking(void) * * We must have already locked the master lock before being called. * NOTE: although the lockctl structure appears to allow each lock - * table to have a different spinlock, all locks that can block had - * better use the same spinlock, else this code will not be adequately + * table to have a different LWLock, all locks that can block had + * better use the same LWLock, else this code will not be adequately * interlocked! */ bool diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c index 3fc31ed6a2..b3221db2a0 100644 --- a/src/backend/storage/lmgr/lock.c +++ b/src/backend/storage/lmgr/lock.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/lmgr/lock.c,v 1.95 2001/09/27 16:29:12 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/lmgr/lock.c,v 1.96 2001/09/29 04:02:24 tgl Exp $ * * NOTES * Outside modules can create a lock table and acquire/release @@ -78,8 +78,8 @@ static char *lock_mode_names[] = * TRACE_LOCK_TABLE -- trace locks on this table (oid) unconditionally * DEBUG_DEADLOCKS -- currently dumps locks at untimely occasions ;) * - * Furthermore, but in storage/ipc/spin.c: - * TRACE_SPINLOCKS -- trace spinlocks (pretty useless) + * Furthermore, but in storage/lmgr/lwlock.c: + * TRACE_LWLOCKS -- trace lightweight locks (pretty useless) * * Define LOCK_DEBUG at compile time to get all these enabled. * -------- @@ -151,10 +151,6 @@ HOLDER_PRINT(const char *where, const HOLDER *holderP) #endif /* not LOCK_DEBUG */ - -SPINLOCK LockMgrLock; /* in Shmem or created in - * CreateSpinlocks() */ - /* * These are to simplify/speed up some bit arithmetic. * @@ -230,12 +226,6 @@ LockMethodInit(LOCKMETHODTABLE *lockMethodTable, /* * LockMethodTableInit -- initialize a lock table structure * - * Notes: - * (a) a lock table has four separate entries in the shmem index - * table. This is because every shared hash table and spinlock - * has its name stored in the shmem index at its creation. It - * is wasteful, in this case, but not much space is involved. - * * NOTE: data structures allocated here are allocated permanently, using * TopMemoryContext and shared memory. We don't ever release them anyway, * and in normal multi-backend operation the lock table structures set up @@ -277,9 +267,9 @@ LockMethodTableInit(char *tabName, MemoryContextAlloc(TopMemoryContext, sizeof(LOCKMETHODTABLE)); /* - * find/acquire the spinlock for the table + * Lock the LWLock for the table (probably not necessary here) */ - SpinAcquire(LockMgrLock); + LWLockAcquire(LockMgrLock, LW_EXCLUSIVE); /* * allocate a control structure from shared memory or attach to it if @@ -356,7 +346,7 @@ LockMethodTableInit(char *tabName, /* init ctl data structures */ LockMethodInit(lockMethodTable, conflictsP, prioP, numModes); - SpinRelease(LockMgrLock); + LWLockRelease(LockMgrLock); pfree(shmemName); @@ -464,7 +454,7 @@ LockAcquire(LOCKMETHOD lockmethod, LOCKTAG *locktag, HTAB *holderTable; bool found; LOCK *lock; - SPINLOCK masterLock; + LWLockId masterLock; LOCKMETHODTABLE *lockMethodTable; int status; int myHolding[MAX_LOCKMODES]; @@ -489,7 +479,7 @@ LockAcquire(LOCKMETHOD lockmethod, LOCKTAG *locktag, masterLock = lockMethodTable->ctl->masterLock; - SpinAcquire(masterLock); + LWLockAcquire(masterLock, LW_EXCLUSIVE); /* * Find or create a lock with this tag @@ -499,7 +489,7 @@ LockAcquire(LOCKMETHOD lockmethod, LOCKTAG *locktag, HASH_ENTER, &found); if (!lock) { - SpinRelease(masterLock); + LWLockRelease(masterLock); elog(FATAL, "LockAcquire: lock table %d is corrupted", lockmethod); return FALSE; } @@ -544,7 +534,7 @@ LockAcquire(LOCKMETHOD lockmethod, LOCKTAG *locktag, HASH_ENTER, &found); if (!holder) { - SpinRelease(masterLock); + LWLockRelease(masterLock); elog(FATAL, "LockAcquire: holder table corrupted"); return FALSE; } @@ -617,7 +607,7 @@ LockAcquire(LOCKMETHOD lockmethod, LOCKTAG *locktag, { GrantLock(lock, holder, lockmode); HOLDER_PRINT("LockAcquire: owning", holder); - SpinRelease(masterLock); + LWLockRelease(masterLock); return TRUE; } @@ -630,7 +620,7 @@ LockAcquire(LOCKMETHOD lockmethod, LOCKTAG *locktag, { GrantLock(lock, holder, lockmode); HOLDER_PRINT("LockAcquire: my other XID owning", holder); - SpinRelease(masterLock); + LWLockRelease(masterLock); return TRUE; } @@ -677,7 +667,7 @@ LockAcquire(LOCKMETHOD lockmethod, LOCKTAG *locktag, LOCK_PRINT("LockAcquire: conditional lock failed", lock, lockmode); Assert((lock->nRequested > 0) && (lock->requested[lockmode] >= 0)); Assert(lock->nGranted <= lock->nRequested); - SpinRelease(masterLock); + LWLockRelease(masterLock); return FALSE; } @@ -719,14 +709,14 @@ LockAcquire(LOCKMETHOD lockmethod, LOCKTAG *locktag, HOLDER_PRINT("LockAcquire: INCONSISTENT", holder); LOCK_PRINT("LockAcquire: INCONSISTENT", lock, lockmode); /* Should we retry ? */ - SpinRelease(masterLock); + LWLockRelease(masterLock); return FALSE; } HOLDER_PRINT("LockAcquire: granted", holder); LOCK_PRINT("LockAcquire: granted", lock, lockmode); } - SpinRelease(masterLock); + LWLockRelease(masterLock); return status == STATUS_OK; } @@ -879,7 +869,7 @@ GrantLock(LOCK *lock, HOLDER *holder, LOCKMODE lockmode) * Caller must have set MyProc->heldLocks to reflect locks already held * on the lockable object by this process (under all XIDs). * - * The locktable spinlock must be held at entry. + * The locktable's masterLock must be held at entry. */ static int WaitOnLock(LOCKMETHOD lockmethod, LOCKMODE lockmode, @@ -925,7 +915,7 @@ WaitOnLock(LOCKMETHOD lockmethod, LOCKMODE lockmode, * needed, will happen in xact cleanup (see above for motivation). */ LOCK_PRINT("WaitOnLock: aborting on lock", lock, lockmode); - SpinRelease(lockMethodTable->ctl->masterLock); + LWLockRelease(lockMethodTable->ctl->masterLock); elog(ERROR, "deadlock detected"); /* not reached */ } @@ -998,7 +988,7 @@ LockRelease(LOCKMETHOD lockmethod, LOCKTAG *locktag, TransactionId xid, LOCKMODE lockmode) { LOCK *lock; - SPINLOCK masterLock; + LWLockId masterLock; bool found; LOCKMETHODTABLE *lockMethodTable; HOLDER *holder; @@ -1023,7 +1013,7 @@ LockRelease(LOCKMETHOD lockmethod, LOCKTAG *locktag, } masterLock = lockMethodTable->ctl->masterLock; - SpinAcquire(masterLock); + LWLockAcquire(masterLock, LW_EXCLUSIVE); /* * Find a lock with this tag @@ -1038,14 +1028,14 @@ LockRelease(LOCKMETHOD lockmethod, LOCKTAG *locktag, */ if (!lock) { - SpinRelease(masterLock); + LWLockRelease(masterLock); elog(NOTICE, "LockRelease: locktable corrupted"); return FALSE; } if (!found) { - SpinRelease(masterLock); + LWLockRelease(masterLock); elog(NOTICE, "LockRelease: no such lock"); return FALSE; } @@ -1065,7 +1055,7 @@ LockRelease(LOCKMETHOD lockmethod, LOCKTAG *locktag, HASH_FIND_SAVE, &found); if (!holder || !found) { - SpinRelease(masterLock); + LWLockRelease(masterLock); #ifdef USER_LOCKS if (!found && lockmethod == USER_LOCKMETHOD) elog(NOTICE, "LockRelease: no lock with this tag"); @@ -1084,7 +1074,7 @@ LockRelease(LOCKMETHOD lockmethod, LOCKTAG *locktag, { HOLDER_PRINT("LockRelease: WRONGTYPE", holder); Assert(holder->holding[lockmode] >= 0); - SpinRelease(masterLock); + LWLockRelease(masterLock); elog(NOTICE, "LockRelease: you don't own a lock of type %s", lock_mode_names[lockmode]); return FALSE; @@ -1139,7 +1129,7 @@ LockRelease(LOCKMETHOD lockmethod, LOCKTAG *locktag, &found); if (!lock || !found) { - SpinRelease(masterLock); + LWLockRelease(masterLock); elog(NOTICE, "LockRelease: remove lock, table corrupted"); return FALSE; } @@ -1167,7 +1157,7 @@ LockRelease(LOCKMETHOD lockmethod, LOCKTAG *locktag, HASH_REMOVE_SAVED, &found); if (!holder || !found) { - SpinRelease(masterLock); + LWLockRelease(masterLock); elog(NOTICE, "LockRelease: remove holder, table corrupted"); return FALSE; } @@ -1179,7 +1169,7 @@ LockRelease(LOCKMETHOD lockmethod, LOCKTAG *locktag, if (wakeupNeeded) ProcLockWakeup(lockMethodTable, lock); - SpinRelease(masterLock); + LWLockRelease(masterLock); return TRUE; } @@ -1201,7 +1191,7 @@ LockReleaseAll(LOCKMETHOD lockmethod, PROC *proc, SHM_QUEUE *procHolders = &(proc->procHolders); HOLDER *holder; HOLDER *nextHolder; - SPINLOCK masterLock; + LWLockId masterLock; LOCKMETHODTABLE *lockMethodTable; int i, numLockModes; @@ -1225,7 +1215,7 @@ LockReleaseAll(LOCKMETHOD lockmethod, PROC *proc, numLockModes = lockMethodTable->ctl->numLockModes; masterLock = lockMethodTable->ctl->masterLock; - SpinAcquire(masterLock); + LWLockAcquire(masterLock, LW_EXCLUSIVE); holder = (HOLDER *) SHMQueueNext(procHolders, procHolders, offsetof(HOLDER, procLink)); @@ -1321,7 +1311,7 @@ LockReleaseAll(LOCKMETHOD lockmethod, PROC *proc, &found); if (!holder || !found) { - SpinRelease(masterLock); + LWLockRelease(masterLock); elog(NOTICE, "LockReleaseAll: holder table corrupted"); return FALSE; } @@ -1340,7 +1330,7 @@ LockReleaseAll(LOCKMETHOD lockmethod, PROC *proc, HASH_REMOVE, &found); if (!lock || !found) { - SpinRelease(masterLock); + LWLockRelease(masterLock); elog(NOTICE, "LockReleaseAll: cannot remove lock from HTAB"); return FALSE; } @@ -1352,7 +1342,7 @@ next_item: holder = nextHolder; } - SpinRelease(masterLock); + LWLockRelease(masterLock); #ifdef LOCK_DEBUG if (lockmethod == USER_LOCKMETHOD ? Trace_userlocks : Trace_locks) diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c new file mode 100644 index 0000000000..5fdcc11f59 --- /dev/null +++ b/src/backend/storage/lmgr/lwlock.c @@ -0,0 +1,483 @@ +/*------------------------------------------------------------------------- + * + * lwlock.c + * Lightweight lock manager + * + * Lightweight locks are intended primarily to provide mutual exclusion of + * access to shared-memory data structures. Therefore, they offer both + * exclusive and shared lock modes (to support read/write and read-only + * access to a shared object). There are few other frammishes. User-level + * locking should be done with the full lock manager --- which depends on + * an LWLock to protect its shared state. + * + * + * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * $Header: /cvsroot/pgsql/src/backend/storage/lmgr/lwlock.c,v 1.1 2001/09/29 04:02:24 tgl Exp $ + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/clog.h" +#include "storage/lwlock.h" +#include "storage/proc.h" +#include "storage/spin.h" + + +typedef struct LWLock +{ + slock_t mutex; /* Protects LWLock and queue of PROCs */ + char exclusive; /* # of exclusive holders (0 or 1) */ + int shared; /* # of shared holders (0..MaxBackends) */ + PROC *head; /* head of list of waiting PROCs */ + PROC *tail; /* tail of list of waiting PROCs */ + /* tail is undefined when head is NULL */ +} LWLock; + +/* + * This points to the array of LWLocks in shared memory. Backends inherit + * the pointer by fork from the postmaster. LWLockIds are indexes into + * the array. + */ +static LWLock *LWLockArray = NULL; +/* shared counter for dynamic allocation of LWLockIds */ +static int *LWLockCounter; + + +/* + * We use this structure to keep track of locked LWLocks for release + * during error recovery. The maximum size could be determined at runtime + * if necessary, but it seems unlikely that more than a few locks could + * ever be held simultaneously. + */ +#define MAX_SIMUL_LWLOCKS 100 + +static int num_held_lwlocks = 0; +static LWLockId held_lwlocks[MAX_SIMUL_LWLOCKS]; + + +#ifdef LOCK_DEBUG +bool Trace_lwlocks = false; + +inline static void +PRINT_LWDEBUG(const char *where, LWLockId lockid, const LWLock *lock) +{ + if (Trace_lwlocks) + elog(DEBUG, "%s(%d): excl %d shared %d head %p", + where, (int) lockid, + (int) lock->exclusive, lock->shared, lock->head); +} + +#else /* not LOCK_DEBUG */ +#define PRINT_LWDEBUG(a,b,c) +#endif /* LOCK_DEBUG */ + + +/* + * Compute number of LWLocks to allocate. + */ +int +NumLWLocks(void) +{ + int numLocks; + + /* + * Possibly this logic should be spread out among the affected modules, + * the same way that shmem space estimation is done. But for now, + * there are few enough users of LWLocks that we can get away with + * just keeping the knowledge here. + */ + + /* Predefined LWLocks */ + numLocks = (int) NumFixedLWLocks; + + /* bufmgr.c needs two for each shared buffer */ + numLocks += 2 * NBuffers; + + /* clog.c needs one per CLOG buffer */ + numLocks += NUM_CLOG_BUFFERS; + + /* Perhaps create a few more for use by user-defined modules? */ + + return numLocks; +} + + +/* + * Compute shmem space needed for LWLocks. + */ +int +LWLockShmemSize(void) +{ + int numLocks = NumLWLocks(); + uint32 spaceLocks; + + /* Allocate the LWLocks plus space for shared allocation counter. */ + spaceLocks = numLocks * sizeof(LWLock) + 2 * sizeof(int); + spaceLocks = MAXALIGN(spaceLocks); + + return (int) spaceLocks; +} + + +/* + * Allocate shmem space for LWLocks and initialize the locks. + */ +void +CreateLWLocks(void) +{ + int numLocks = NumLWLocks(); + uint32 spaceLocks = LWLockShmemSize(); + LWLock *lock; + int id; + + /* Allocate space */ + LWLockArray = (LWLock *) ShmemAlloc(spaceLocks); + + /* + * Initialize all LWLocks to "unlocked" state + */ + for (id = 0, lock = LWLockArray; id < numLocks; id++, lock++) + { + SpinLockInit(&lock->mutex); + lock->exclusive = 0; + lock->shared = 0; + lock->head = NULL; + lock->tail = NULL; + } + + /* + * Initialize the dynamic-allocation counter at the end of the array + */ + LWLockCounter = (int *) lock; + LWLockCounter[0] = (int) NumFixedLWLocks; + LWLockCounter[1] = numLocks; +} + + +/* + * LWLockAssign - assign a dynamically-allocated LWLock number + * + * NB: we do not currently try to interlock this. Could perhaps use + * ShmemLock spinlock if there were any need to assign LWLockIds after + * shmem setup. + */ +LWLockId +LWLockAssign(void) +{ + if (LWLockCounter[0] >= LWLockCounter[1]) + elog(FATAL, "No more LWLockIds available"); + return (LWLockId) (LWLockCounter[0]++); +} + + +/* + * LWLockAcquire - acquire a lightweight lock in the specified mode + * + * If the lock is not available, sleep until it is. + * + * Side effect: cancel/die interrupts are held off until lock release. + */ +void +LWLockAcquire(LWLockId lockid, LWLockMode mode) +{ + LWLock *lock = LWLockArray + lockid; + bool mustwait; + + PRINT_LWDEBUG("LWLockAcquire", lockid, lock); + + /* + * Lock out cancel/die interrupts until we exit the code section + * protected by the LWLock. This ensures that interrupts will not + * interfere with manipulations of data structures in shared memory. + */ + HOLD_INTERRUPTS(); + + /* Acquire mutex. Time spent holding mutex should be short! */ + SpinLockAcquire_NoHoldoff(&lock->mutex); + + /* If I can get the lock, do so quickly. */ + if (mode == LW_EXCLUSIVE) + { + if (lock->exclusive == 0 && lock->shared == 0) + { + lock->exclusive++; + mustwait = false; + } + else + mustwait = true; + } + else + { + /* + * If there is someone waiting (presumably for exclusive access), + * queue up behind him even though I could get the lock. This + * prevents a stream of read locks from starving a writer. + */ + if (lock->exclusive == 0 && lock->head == NULL) + { + lock->shared++; + mustwait = false; + } + else + mustwait = true; + } + + if (mustwait) + { + /* Add myself to wait queue */ + PROC *proc = MyProc; + int extraWaits = 0; + + /* + * If we don't have a PROC structure, there's no way to wait. + * This should never occur, since MyProc should only be null + * during shared memory initialization. + */ + if (proc == NULL) + elog(FATAL, "LWLockAcquire: can't wait without a PROC structure"); + + proc->lwWaiting = true; + proc->lwExclusive = (mode == LW_EXCLUSIVE); + proc->lwWaitLink = NULL; + if (lock->head == NULL) + lock->head = proc; + else + lock->tail->lwWaitLink = proc; + lock->tail = proc; + + /* Can release the mutex now */ + SpinLockRelease_NoHoldoff(&lock->mutex); + + /* + * Wait until awakened. + * + * Since we share the process wait semaphore with the regular lock + * manager and ProcWaitForSignal, and we may need to acquire an LWLock + * while one of those is pending, it is possible that we get awakened + * for a reason other than being granted the LWLock. If so, loop back + * and wait again. Once we've gotten the lock, re-increment the sema + * by the number of additional signals received, so that the lock + * manager or signal manager will see the received signal when it + * next waits. + */ + for (;;) + { + /* "false" means cannot accept cancel/die interrupt here. */ + IpcSemaphoreLock(proc->sem.semId, proc->sem.semNum, false); + if (!proc->lwWaiting) + break; + extraWaits++; + } + /* + * The awakener already updated the lock struct's state, so we + * don't need to do anything more to it. Just need to fix the + * semaphore count. + */ + while (extraWaits-- > 0) + IpcSemaphoreUnlock(proc->sem.semId, proc->sem.semNum); + } + else + { + /* Got the lock without waiting */ + SpinLockRelease_NoHoldoff(&lock->mutex); + } + + /* Add lock to list of locks held by this backend */ + Assert(num_held_lwlocks < MAX_SIMUL_LWLOCKS); + held_lwlocks[num_held_lwlocks++] = lockid; +} + +/* + * LWLockConditionalAcquire - acquire a lightweight lock in the specified mode + * + * If the lock is not available, return FALSE with no side-effects. + * + * If successful, cancel/die interrupts are held off until lock release. + */ +bool +LWLockConditionalAcquire(LWLockId lockid, LWLockMode mode) +{ + LWLock *lock = LWLockArray + lockid; + bool mustwait; + + PRINT_LWDEBUG("LWLockConditionalAcquire", lockid, lock); + + /* + * Lock out cancel/die interrupts until we exit the code section + * protected by the LWLock. This ensures that interrupts will not + * interfere with manipulations of data structures in shared memory. + */ + HOLD_INTERRUPTS(); + + /* Acquire mutex. Time spent holding mutex should be short! */ + SpinLockAcquire_NoHoldoff(&lock->mutex); + + /* If I can get the lock, do so quickly. */ + if (mode == LW_EXCLUSIVE) + { + if (lock->exclusive == 0 && lock->shared == 0) + { + lock->exclusive++; + mustwait = false; + } + else + mustwait = true; + } + else + { + /* + * If there is someone waiting (presumably for exclusive access), + * queue up behind him even though I could get the lock. This + * prevents a stream of read locks from starving a writer. + */ + if (lock->exclusive == 0 && lock->head == NULL) + { + lock->shared++; + mustwait = false; + } + else + mustwait = true; + } + + /* We are done updating shared state of the lock itself. */ + SpinLockRelease_NoHoldoff(&lock->mutex); + + if (mustwait) + { + /* Failed to get lock, so release interrupt holdoff */ + RESUME_INTERRUPTS(); + } + else + { + /* Add lock to list of locks held by this backend */ + Assert(num_held_lwlocks < MAX_SIMUL_LWLOCKS); + held_lwlocks[num_held_lwlocks++] = lockid; + } + + return !mustwait; +} + +/* + * LWLockRelease - release a previously acquired lock + */ +void +LWLockRelease(LWLockId lockid) +{ + LWLock *lock = LWLockArray + lockid; + PROC *head; + PROC *proc; + int i; + + PRINT_LWDEBUG("LWLockRelease", lockid, lock); + + /* + * Remove lock from list of locks held. Usually, but not always, + * it will be the latest-acquired lock; so search array backwards. + */ + for (i = num_held_lwlocks; --i >= 0; ) + { + if (lockid == held_lwlocks[i]) + break; + } + if (i < 0) + elog(ERROR, "LWLockRelease: lock %d is not held", (int) lockid); + num_held_lwlocks--; + for (; i < num_held_lwlocks; i++) + held_lwlocks[i] = held_lwlocks[i+1]; + + /* Acquire mutex. Time spent holding mutex should be short! */ + SpinLockAcquire_NoHoldoff(&lock->mutex); + + /* Release my hold on lock */ + if (lock->exclusive > 0) + lock->exclusive--; + else + { + Assert(lock->shared > 0); + lock->shared--; + } + + /* + * See if I need to awaken any waiters. If I released a non-last shared + * hold, there cannot be anything to do. + */ + head = lock->head; + if (head != NULL) + { + if (lock->exclusive == 0 && lock->shared == 0) + { + /* + * Remove the to-be-awakened PROCs from the queue, and update the + * lock state to show them as holding the lock. + */ + proc = head; + if (proc->lwExclusive) + { + lock->exclusive++; + } + else + { + lock->shared++; + while (proc->lwWaitLink != NULL && + !proc->lwWaitLink->lwExclusive) + { + proc = proc->lwWaitLink; + lock->shared++; + } + } + /* proc is now the last PROC to be released */ + lock->head = proc->lwWaitLink; + proc->lwWaitLink = NULL; + } + else + { + /* lock is still held, can't awaken anything */ + head = NULL; + } + } + + /* We are done updating shared state of the lock itself. */ + SpinLockRelease_NoHoldoff(&lock->mutex); + + /* + * Awaken any waiters I removed from the queue. + */ + while (head != NULL) + { + proc = head; + head = proc->lwWaitLink; + proc->lwWaitLink = NULL; + proc->lwWaiting = false; + IpcSemaphoreUnlock(proc->sem.semId, proc->sem.semNum); + } + + /* + * Now okay to allow cancel/die interrupts. + */ + RESUME_INTERRUPTS(); +} + + +/* + * LWLockReleaseAll - release all currently-held locks + * + * Used to clean up after elog(ERROR). An important difference between this + * function and retail LWLockRelease calls is that InterruptHoldoffCount is + * unchanged by this operation. This is necessary since InterruptHoldoffCount + * has been set to an appropriate level earlier in error recovery. We could + * decrement it below zero if we allow it to drop for each released lock! + */ +void +LWLockReleaseAll(void) +{ + while (num_held_lwlocks > 0) + { + HOLD_INTERRUPTS(); /* match the upcoming RESUME_INTERRUPTS */ + + LWLockRelease(held_lwlocks[num_held_lwlocks-1]); + } +} diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c index 0a02e6f006..e687304dba 100644 --- a/src/backend/storage/lmgr/proc.c +++ b/src/backend/storage/lmgr/proc.c @@ -8,15 +8,11 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/lmgr/proc.c,v 1.108 2001/09/21 17:06:12 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/lmgr/proc.c,v 1.109 2001/09/29 04:02:24 tgl Exp $ * *------------------------------------------------------------------------- */ /* - * Each postgres backend gets one of these. We'll use it to - * clean up after the process should the process suddenly die. - * - * * Interface (a): * ProcSleep(), ProcWakeup(), * ProcQueueAlloc() -- create a shm queue for sleeping processes @@ -75,27 +71,31 @@ #include "access/xact.h" #include "storage/proc.h" #include "storage/sinval.h" +#include "storage/spin.h" int DeadlockTimeout = 1000; -/* -------------------- - * Spin lock for manipulating the shared process data structure: - * ProcGlobal.... Adding an extra spin lock seemed like the smallest - * hack to get around reading and updating this structure in shared - * memory. -mer 17 July 1991 - * -------------------- +PROC *MyProc = NULL; + +/* + * This spinlock protects the freelist of recycled PROC structures and the + * bitmap of free semaphores. We cannot use an LWLock because the LWLock + * manager depends on already having a PROC and a wait semaphore! But these + * structures are touched relatively infrequently (only at backend startup + * or shutdown) and not for very long, so a spinlock is okay. */ -SPINLOCK ProcStructLock; +static slock_t *ProcStructLock = NULL; static PROC_HDR *ProcGlobal = NULL; -PROC *MyProc = NULL; +static PROC *DummyProc = NULL; static bool waitingForLock = false; static bool waitingForSignal = false; static void ProcKill(void); +static void DummyProcKill(void); static void ProcGetNewSemIdAndNum(IpcSemaphoreId *semId, int *semNum); static void ProcFreeSem(IpcSemaphoreId semId, int semNum); static void ZeroProcSemaphore(PROC *proc); @@ -128,9 +128,12 @@ InitProcGlobal(int maxBackends) Size procGlobalSize; bool found = false; - /* Compute size for ProcGlobal structure */ + /* + * Compute size for ProcGlobal structure. Note we need one more sema + * besides those used for regular backends. + */ Assert(maxBackends > 0); - semMapEntries = PROC_SEM_MAP_ENTRIES(maxBackends); + semMapEntries = PROC_SEM_MAP_ENTRIES(maxBackends+1); procGlobalSize = sizeof(PROC_HDR) + (semMapEntries-1) * sizeof(SEM_MAP_ENTRY); /* Create or attach to the ProcGlobal shared structure */ @@ -178,13 +181,26 @@ InitProcGlobal(int maxBackends) false); ProcGlobal->procSemMap[i].procSemId = semId; } + + /* + * Pre-allocate a PROC structure for dummy (checkpoint) processes, + * and reserve the last sema of the precreated semas for it. + */ + DummyProc = (PROC *) ShmemAlloc(sizeof(PROC)); + DummyProc->pid = 0; /* marks DummyProc as not in use */ + i = semMapEntries-1; + ProcGlobal->procSemMap[i].freeSemMap |= 1 << (PROC_NSEMS_PER_SET-1); + DummyProc->sem.semId = ProcGlobal->procSemMap[i].procSemId; + DummyProc->sem.semNum = PROC_NSEMS_PER_SET-1; + + /* Create ProcStructLock spinlock, too */ + ProcStructLock = (slock_t *) ShmemAlloc(sizeof(slock_t)); + SpinLockInit(ProcStructLock); } } -/* ------------------------ - * InitProc -- create a per-process data structure for this process - * used by the lock manager on semaphore queues. - * ------------------------ +/* + * InitProcess -- create a per-process data structure for this backend */ void InitProcess(void) @@ -202,39 +218,27 @@ InitProcess(void) elog(ERROR, "InitProcess: you already exist"); /* - * ProcStructLock protects the freelist of PROC entries and the map - * of free semaphores. Note that when we acquire it here, we do not - * have a PROC entry and so the ownership of the spinlock is not - * recorded anywhere; even if it was, until we register ProcKill as - * an on_shmem_exit callback, there is no exit hook that will cause - * owned spinlocks to be released. Upshot: during the first part of - * this routine, be careful to release the lock manually before any - * elog(), else you'll have a stuck spinlock to add to your woes. + * try to get a proc struct from the free list first */ - SpinAcquire(ProcStructLock); + SpinLockAcquire(ProcStructLock); - /* try to get a proc struct from the free list first */ myOffset = ProcGlobal->freeProcs; if (myOffset != INVALID_OFFSET) { MyProc = (PROC *) MAKE_PTR(myOffset); ProcGlobal->freeProcs = MyProc->links.next; + SpinLockRelease(ProcStructLock); } else { /* - * have to allocate one. We can't use the normal shmem index - * table mechanism because the proc structure is stored by PID - * instead of by a global name (need to look it up by PID when we - * cleanup dead processes). + * have to allocate a new one. */ + SpinLockRelease(ProcStructLock); MyProc = (PROC *) ShmemAlloc(sizeof(PROC)); if (!MyProc) - { - SpinRelease(ProcStructLock); elog(FATAL, "cannot create new proc: out of memory"); - } } /* @@ -246,39 +250,30 @@ InitProcess(void) MyProc->errType = STATUS_OK; MyProc->xid = InvalidTransactionId; MyProc->xmin = InvalidTransactionId; + MyProc->pid = MyProcPid; + MyProc->databaseId = MyDatabaseId; MyProc->logRec.xrecoff = 0; + MyProc->lwWaiting = false; + MyProc->lwExclusive = false; + MyProc->lwWaitLink = NULL; MyProc->waitLock = NULL; MyProc->waitHolder = NULL; - MyProc->pid = MyProcPid; - MyProc->databaseId = MyDatabaseId; SHMQueueInit(&(MyProc->procHolders)); - /* - * Zero out the spin lock counts and set the sLocks field for - * ProcStructLock to 1 as we have acquired this spinlock above but - * didn't record it since we didn't have MyProc until now. - */ - MemSet(MyProc->sLocks, 0, sizeof(MyProc->sLocks)); - MyProc->sLocks[ProcStructLock] = 1; /* - * Arrange to clean up at backend exit. Once we do this, owned - * spinlocks will be released on exit, and so we can be a lot less - * tense about errors. + * Arrange to clean up at backend exit. */ on_shmem_exit(ProcKill, 0); /* * Set up a wait-semaphore for the proc. (We rely on ProcKill to clean - * up if this fails.) + * up MyProc if this fails.) */ if (IsUnderPostmaster) ProcGetNewSemIdAndNum(&MyProc->sem.semId, &MyProc->sem.semNum); - /* Done with freelist and sem map */ - SpinRelease(ProcStructLock); - /* - * We might be reusing a semaphore that belongs to a dead backend. + * We might be reusing a semaphore that belonged to a failed process. * So be careful and reinitialize its value here. */ if (MyProc->sem.semId >= 0) @@ -291,6 +286,65 @@ InitProcess(void) InitDeadLockChecking(); } +/* + * InitDummyProcess -- create a dummy per-process data structure + * + * This is called by checkpoint processes so that they will have a MyProc + * value that's real enough to let them wait for LWLocks. The PROC and + * sema that are assigned are the extra ones created during InitProcGlobal. + */ +void +InitDummyProcess(void) +{ + /* + * ProcGlobal should be set by a previous call to InitProcGlobal + * (we inherit this by fork() from the postmaster). + */ + if (ProcGlobal == NULL || DummyProc == NULL) + elog(STOP, "InitDummyProcess: Proc Header uninitialized"); + + if (MyProc != NULL) + elog(ERROR, "InitDummyProcess: you already exist"); + + /* + * DummyProc should not presently be in use by anyone else + */ + if (DummyProc->pid != 0) + elog(FATAL, "InitDummyProcess: DummyProc is in use by PID %d", + DummyProc->pid); + MyProc = DummyProc; + + /* + * Initialize all fields of MyProc, except MyProc->sem which was + * set up by InitProcGlobal. + */ + MyProc->pid = MyProcPid; /* marks DummyProc as in use by me */ + SHMQueueElemInit(&(MyProc->links)); + MyProc->errType = STATUS_OK; + MyProc->xid = InvalidTransactionId; + MyProc->xmin = InvalidTransactionId; + MyProc->databaseId = MyDatabaseId; + MyProc->logRec.xrecoff = 0; + MyProc->lwWaiting = false; + MyProc->lwExclusive = false; + MyProc->lwWaitLink = NULL; + MyProc->waitLock = NULL; + MyProc->waitHolder = NULL; + SHMQueueInit(&(MyProc->procHolders)); + + /* + * Arrange to clean up at process exit. + */ + on_shmem_exit(DummyProcKill, 0); + + /* + * We might be reusing a semaphore that belonged to a failed process. + * So be careful and reinitialize its value here. + */ + if (MyProc->sem.semId >= 0) + ZeroProcSemaphore(MyProc); +} + /* * Initialize the proc's wait-semaphore to count zero. */ @@ -330,10 +384,10 @@ LockWaitCancel(void) disable_sigalrm_interrupt(); /* Unlink myself from the wait queue, if on it (might not be anymore!) */ - LockLockTable(); + LWLockAcquire(LockMgrLock, LW_EXCLUSIVE); if (MyProc->links.next != INVALID_OFFSET) RemoveFromWaitQueue(MyProc); - UnlockLockTable(); + LWLockRelease(LockMgrLock); /* * Reset the proc wait semaphore to zero. This is necessary in the @@ -381,15 +435,18 @@ ProcReleaseLocks(bool isCommit) /* * ProcKill() -- Destroy the per-proc data structure for - * this process. Release any of its held spin locks. + * this process. Release any of its held LW locks. */ static void ProcKill(void) { Assert(MyProc != NULL); - /* Release any spinlocks I am holding */ - ProcReleaseSpins(MyProc); + /* Release any LW locks I am holding */ + LWLockReleaseAll(); + + /* Abort any buffer I/O in progress */ + AbortBufferIO(); /* Get off any wait queue I might be on */ LockWaitCancel(); @@ -402,7 +459,7 @@ ProcKill(void) LockReleaseAll(USER_LOCKMETHOD, MyProc, true, InvalidTransactionId); #endif - SpinAcquire(ProcStructLock); + SpinLockAcquire(ProcStructLock); /* Free up my wait semaphore, if I got one */ if (MyProc->sem.semId >= 0) @@ -412,10 +469,35 @@ ProcKill(void) MyProc->links.next = ProcGlobal->freeProcs; ProcGlobal->freeProcs = MAKE_OFFSET(MyProc); - /* PROC struct isn't mine anymore; stop tracking spinlocks with it! */ + /* PROC struct isn't mine anymore */ MyProc = NULL; - SpinRelease(ProcStructLock); + SpinLockRelease(ProcStructLock); +} + +/* + * DummyProcKill() -- Cut-down version of ProcKill for dummy (checkpoint) + * processes. The PROC and sema are not released, only marked + * as not-in-use. + */ +static void +DummyProcKill(void) +{ + Assert(MyProc != NULL && MyProc == DummyProc); + + /* Release any LW locks I am holding */ + LWLockReleaseAll(); + + /* Abort any buffer I/O in progress */ + AbortBufferIO(); + + /* I can't be on regular lock queues, so needn't check */ + + /* Mark DummyProc no longer in use */ + MyProc->pid = 0; + + /* PROC struct isn't mine anymore */ + MyProc = NULL; } @@ -464,13 +546,13 @@ ProcQueueInit(PROC_QUEUE *queue) * Caller must have set MyProc->heldLocks to reflect locks already held * on the lockable object by this process (under all XIDs). * - * Locktable's spinlock must be held at entry, and will be held + * Locktable's masterLock must be held at entry, and will be held * at exit. * * Result: STATUS_OK if we acquired the lock, STATUS_ERROR if not (deadlock). * * ASSUME: that no one will fiddle with the queue until after - * we release the spin lock. + * we release the masterLock. * * NOTES: The process queue is now a priority queue for locking. * @@ -484,7 +566,7 @@ ProcSleep(LOCKMETHODTABLE *lockMethodTable, HOLDER *holder) { LOCKMETHODCTL *lockctl = lockMethodTable->ctl; - SPINLOCK spinlock = lockctl->masterLock; + LWLockId masterLock = lockctl->masterLock; PROC_QUEUE *waitQueue = &(lock->waitProcs); int myHeldLocks = MyProc->heldLocks; bool early_deadlock = false; @@ -595,14 +677,14 @@ ProcSleep(LOCKMETHODTABLE *lockMethodTable, waitingForLock = true; /* - * Release the locktable's spin lock. + * Release the locktable's masterLock. * * NOTE: this may also cause us to exit critical-section state, possibly * allowing a cancel/die interrupt to be accepted. This is OK because * we have recorded the fact that we are waiting for a lock, and so * LockWaitCancel will clean up if cancel/die happens. */ - SpinRelease(spinlock); + LWLockRelease(masterLock); /* * Set timer so we can wake up after awhile and check for a deadlock. @@ -617,7 +699,7 @@ ProcSleep(LOCKMETHODTABLE *lockMethodTable, elog(FATAL, "ProcSleep: Unable to set timer for process wakeup"); /* - * If someone wakes us between SpinRelease and IpcSemaphoreLock, + * If someone wakes us between LWLockRelease and IpcSemaphoreLock, * IpcSemaphoreLock will not block. The wakeup is "saved" by the * semaphore implementation. Note also that if HandleDeadLock is * invoked but does not detect a deadlock, IpcSemaphoreLock() will @@ -644,12 +726,9 @@ ProcSleep(LOCKMETHODTABLE *lockMethodTable, waitingForLock = false; /* - * Re-acquire the locktable's spin lock. - * - * We could accept a cancel/die interrupt here. That's OK because the - * lock is now registered as being held by this process. + * Re-acquire the locktable's masterLock. */ - SpinAcquire(spinlock); + LWLockAcquire(masterLock, LW_EXCLUSIVE); /* * We don't have to do anything else, because the awaker did all the @@ -674,7 +753,7 @@ ProcWakeup(PROC *proc, int errType) { PROC *retProc; - /* assume that spinlock has been acquired */ + /* assume that masterLock has been acquired */ /* Proc should be sleeping ... */ if (proc->links.prev == INVALID_OFFSET || @@ -777,11 +856,11 @@ HandleDeadLock(SIGNAL_ARGS) /* * Acquire locktable lock. Note that the SIGALRM interrupt had better * not be enabled anywhere that this process itself holds the - * locktable lock, else this will wait forever. Also note that this - * calls SpinAcquire which creates a critical section, so that this + * locktable lock, else this will wait forever. Also note that + * LWLockAcquire creates a critical section, so that this * routine cannot be interrupted by cancel/die interrupts. */ - LockLockTable(); + LWLockAcquire(LockMgrLock, LW_EXCLUSIVE); /* * Check to see if we've been awoken by anyone in the interim. @@ -799,7 +878,7 @@ HandleDeadLock(SIGNAL_ARGS) if (MyProc->links.prev == INVALID_OFFSET || MyProc->links.next == INVALID_OFFSET) { - UnlockLockTable(); + LWLockRelease(LockMgrLock); errno = save_errno; return; } @@ -812,7 +891,7 @@ HandleDeadLock(SIGNAL_ARGS) if (!DeadLockCheck(MyProc)) { /* No deadlock, so keep waiting */ - UnlockLockTable(); + LWLockRelease(LockMgrLock); errno = save_errno; return; } @@ -846,30 +925,10 @@ HandleDeadLock(SIGNAL_ARGS) * wakable because we're not in front of them anymore. However, * RemoveFromWaitQueue took care of waking up any such processes. */ - UnlockLockTable(); + LWLockRelease(LockMgrLock); errno = save_errno; } -void -ProcReleaseSpins(PROC *proc) -{ - int i; - - if (!proc) - proc = MyProc; - - if (!proc) - return; - for (i = 0; i < (int) MAX_SPINS; i++) - { - if (proc->sLocks[i]) - { - Assert(proc->sLocks[i] == 1); - SpinRelease(i); - } - } - AbortBufferIO(); -} /* * ProcWaitForSignal - wait for a signal from another backend. @@ -994,10 +1053,7 @@ ProcGetNewSemIdAndNum(IpcSemaphoreId *semId, int *semNum) SEM_MAP_ENTRY *procSemMap = ProcGlobal->procSemMap; int32 fullmask = (1 << PROC_NSEMS_PER_SET) - 1; - /* - * we hold ProcStructLock when entering this routine. We scan through - * the bitmap to look for a free semaphore. - */ + SpinLockAcquire(ProcStructLock); for (i = 0; i < semMapEntries; i++) { @@ -1018,12 +1074,17 @@ ProcGetNewSemIdAndNum(IpcSemaphoreId *semId, int *semNum) *semId = procSemMap[i].procSemId; *semNum = j; + + SpinLockRelease(ProcStructLock); + return; } mask <<= 1; } } + SpinLockRelease(ProcStructLock); + /* * If we reach here, all the semaphores are in use. This is one of the * possible places to detect "too many backends", so give the standard @@ -1036,6 +1097,8 @@ ProcGetNewSemIdAndNum(IpcSemaphoreId *semId, int *semNum) /* * ProcFreeSem - * free up our semaphore in the semaphore set. + * + * Caller is assumed to hold ProcStructLock. */ static void ProcFreeSem(IpcSemaphoreId semId, int semNum) @@ -1054,6 +1117,7 @@ ProcFreeSem(IpcSemaphoreId semId, int semNum) return; } } + /* can't elog here!!! */ fprintf(stderr, "ProcFreeSem: no ProcGlobal entry for semId %d\n", semId); } diff --git a/src/backend/storage/lmgr/s_lock.c b/src/backend/storage/lmgr/s_lock.c index 6dc38b5955..055c809cf8 100644 --- a/src/backend/storage/lmgr/s_lock.c +++ b/src/backend/storage/lmgr/s_lock.c @@ -1,14 +1,15 @@ /*------------------------------------------------------------------------- * * s_lock.c - * Spinlock support routines + * Hardware-dependent implementation of spinlocks. + * * * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/lmgr/s_lock.c,v 1.1 2001/09/27 19:10:02 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/lmgr/s_lock.c,v 1.2 2001/09/29 04:02:25 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -17,49 +18,14 @@ #include #include -#include "miscadmin.h" #include "storage/s_lock.h" -/*---------- - * Each time we busy spin we select the next element of this array as the - * number of microseconds to wait. This accomplishes pseudo random back-off. - * - * Note that on most platforms, specified values will be rounded up to the - * next multiple of a clock tick, which is often ten milliseconds (10000). - * So, we are being way overoptimistic to assume that these different values - * are really different, other than the last. But there are a few platforms - * with better-than-usual timekeeping, and on these we will get pretty good - * pseudo-random behavior. - * - * Total time to cycle through all 20 entries will be at least 100 msec, - * more commonly (10 msec resolution) 220 msec, and on some platforms - * as much as 420 msec (when the remainder of the current tick cycle is - * ignored in deciding when to time out, as on FreeBSD and older Linuxen). - * We use the 100msec figure to figure max_spins, so actual timeouts may - * be as much as four times the nominal value, but will never be less. - *---------- - */ -#define S_NSPINCYCLE 20 - -int s_spincycle[S_NSPINCYCLE] = -{1, 10, 100, 1000, - 10000, 1000, 1000, 1000, - 10000, 1000, 1000, 10000, - 1000, 1000, 10000, 1000, - 10000, 1000, 10000, 30000 -}; - -#define AVG_SPINCYCLE 5000 /* average entry in microsec: 100ms / 20 */ - -#define DEFAULT_TIMEOUT (100*1000000) /* default timeout: 100 sec */ - - /* * s_lock_stuck() - complain about a stuck spinlock */ static void -s_lock_stuck(volatile slock_t *lock, const char *file, const int line) +s_lock_stuck(volatile slock_t *lock, const char *file, int line) { fprintf(stderr, "\nFATAL: s_lock(%p) at %s:%d, stuck spinlock. Aborting.\n", @@ -72,69 +38,41 @@ s_lock_stuck(volatile slock_t *lock, const char *file, const int line) /* - * s_lock_sleep() - sleep a pseudo-random amount of time, check for timeout - * - * The 'timeout' is given in microsec, or may be 0 for "infinity". Note that - * this will be a lower bound (a fairly loose lower bound, on most platforms). - * - * 'microsec' is the number of microsec to delay per loop. Normally - * 'microsec' is 0, specifying to use the next s_spincycle[] value. - * Some callers may pass a nonzero interval, specifying to use exactly that - * delay value rather than a pseudo-random delay. + * s_lock(lock) - platform-independent portion of waiting for a spinlock. */ void -s_lock_sleep(unsigned spins, int timeout, int microsec, - volatile slock_t *lock, - const char *file, const int line) -{ - struct timeval delay; - - if (microsec > 0) - { - delay.tv_sec = microsec / 1000000; - delay.tv_usec = microsec % 1000000; - } - else - { - delay.tv_sec = 0; - delay.tv_usec = s_spincycle[spins % S_NSPINCYCLE]; - microsec = AVG_SPINCYCLE; /* use average to figure timeout */ - } - - if (timeout > 0) - { - unsigned max_spins = timeout / microsec; - - if (spins > max_spins) - s_lock_stuck(lock, file, line); - } - - (void) select(0, NULL, NULL, NULL, &delay); -} - - -/* - * s_lock(lock) - take a spinlock with backoff - */ -void -s_lock(volatile slock_t *lock, const char *file, const int line) +s_lock(volatile slock_t *lock, const char *file, int line) { unsigned spins = 0; + unsigned delays = 0; + struct timeval delay; /* - * If you are thinking of changing this code, be careful. This same - * loop logic is used in other places that call TAS() directly. + * We loop tightly for awhile, then delay using select() and try again. + * Preferably, "awhile" should be a small multiple of the maximum time + * we expect a spinlock to be held. 100 iterations seems about right. * - * While waiting for a lock, we check for cancel/die interrupts (which is - * a no-op if we are inside a critical section). The interrupt check - * can be omitted in places that know they are inside a critical - * section. Note that an interrupt must NOT be accepted after - * acquiring the lock. + * We use a 10 millisec select delay because that is the lower limit on + * many platforms. The timeout is figured on this delay only, and so the + * nominal 1 minute is a lower bound. */ +#define SPINS_PER_DELAY 100 +#define DELAY_MSEC 10 +#define TIMEOUT_MSEC (60 * 1000) + while (TAS(lock)) { - s_lock_sleep(spins++, DEFAULT_TIMEOUT, 0, lock, file, line); - CHECK_FOR_INTERRUPTS(); + if (++spins > SPINS_PER_DELAY) + { + if (++delays > (TIMEOUT_MSEC / DELAY_MSEC)) + s_lock_stuck(lock, file, line); + + delay.tv_sec = 0; + delay.tv_usec = DELAY_MSEC * 1000; + (void) select(0, NULL, NULL, NULL, &delay); + + spins = 0; + } } } diff --git a/src/backend/storage/lmgr/spin.c b/src/backend/storage/lmgr/spin.c index a1284cbbca..55587791f0 100644 --- a/src/backend/storage/lmgr/spin.c +++ b/src/backend/storage/lmgr/spin.c @@ -1,197 +1,45 @@ /*------------------------------------------------------------------------- * * spin.c - * routines for managing spin locks + * Hardware-independent implementation of spinlocks. + * + * + * For machines that have test-and-set (TAS) instructions, s_lock.h/.c + * define the spinlock implementation. This file contains only a stub + * implementation for spinlocks using SysV semaphores. The semaphore method + * is too slow to be very useful :-( * - * POSTGRES has two kinds of locks: semaphores (which put the - * process to sleep) and spinlocks (which are supposed to be - * short term locks). Spinlocks are implemented via test-and-set (TAS) - * instructions if possible, else via semaphores. The semaphore method - * is too slow to be useful :-( * * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/lmgr/spin.c,v 1.1 2001/09/27 19:10:02 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/lmgr/spin.c,v 1.2 2001/09/29 04:02:25 tgl Exp $ * *------------------------------------------------------------------------- */ #include "postgres.h" #include -#if !defined(HAS_TEST_AND_SET) && defined(HAVE_SYS_SEM_H) +#ifdef HAVE_SYS_SEM_H #include #endif -#include "miscadmin.h" +#include "storage/lwlock.h" #include "storage/proc.h" -#include "storage/s_lock.h" - - -/* Probably should move these to an appropriate header file */ -extern SPINLOCK BufMgrLock; -extern SPINLOCK OidGenLockId; -extern SPINLOCK XidGenLockId; -extern SPINLOCK ControlFileLockId; -extern SPINLOCK ShmemLock; -extern SPINLOCK ShmemIndexLock; -extern SPINLOCK LockMgrLock; -extern SPINLOCK SInvalLock; -extern SPINLOCK ProcStructLock; -extern SPINLOCK FreeSpaceLock; -#ifdef STABLE_MEMORY_STORAGE -extern SPINLOCK MMCacheLock; -#endif - - -/* - * Initialize identifiers for permanent spinlocks during startup - * - * The same identifiers are used for both TAS and semaphore implementations, - * although in one case they are indexes into a shmem array and in the other - * they are semaphore numbers. - */ -static void -InitSpinLockIDs(void) -{ - BufMgrLock = (SPINLOCK) BUFMGRLOCKID; - OidGenLockId = (SPINLOCK) OIDGENLOCKID; - XidGenLockId = (SPINLOCK) XIDGENLOCKID; - ControlFileLockId = (SPINLOCK) CNTLFILELOCKID; - ShmemLock = (SPINLOCK) SHMEMLOCKID; - ShmemIndexLock = (SPINLOCK) SHMEMINDEXLOCKID; - LockMgrLock = (SPINLOCK) LOCKMGRLOCKID; - SInvalLock = (SPINLOCK) SINVALLOCKID; - ProcStructLock = (SPINLOCK) PROCSTRUCTLOCKID; - FreeSpaceLock = (SPINLOCK) FREESPACELOCKID; -#ifdef STABLE_MEMORY_STORAGE - MMCacheLock = (SPINLOCK) MMCACHELOCKID; -#endif -} +#include "storage/spin.h" #ifdef HAS_TEST_AND_SET -/* real spin lock implementation */ - -typedef struct slock -{ - slock_t shlock; -} SLock; - -#ifdef LOCK_DEBUG -bool Trace_spinlocks = false; - -inline static void -PRINT_SLDEBUG(const char *where, SPINLOCK lockid, const SLock *lock) -{ - if (Trace_spinlocks) - elog(DEBUG, "%s: id=%d", where, lockid); -} - -#else /* not LOCK_DEBUG */ -#define PRINT_SLDEBUG(a,b,c) -#endif /* not LOCK_DEBUG */ - - -static SLock *SLockArray = NULL; - -#define SLOCKMEMORYSIZE ((int) MAX_SPINS * sizeof(SLock)) - -/* - * SLockShmemSize --- return shared-memory space needed - */ -int -SLockShmemSize(void) -{ - return MAXALIGN(SLOCKMEMORYSIZE); -} - /* * CreateSpinlocks --- create and initialize spinlocks during startup */ void -CreateSpinlocks(PGShmemHeader *seghdr) -{ - int id; - - /* - * We must allocate the space "by hand" because shmem.c isn't up yet - */ - SLockArray = (SLock *) (((char *) seghdr) + seghdr->freeoffset); - seghdr->freeoffset += MAXALIGN(SLOCKMEMORYSIZE); - Assert(seghdr->freeoffset <= seghdr->totalsize); - - /* - * Initialize all spinlocks to "unlocked" state - */ - for (id = 0; id < (int) MAX_SPINS; id++) - { - SLock *slckP = &(SLockArray[id]); - - S_INIT_LOCK(&(slckP->shlock)); - } - - /* - * Assign indexes for fixed spinlocks - */ - InitSpinLockIDs(); -} - -void -SpinAcquire(SPINLOCK lockid) -{ - SLock *slckP = &(SLockArray[lockid]); - - PRINT_SLDEBUG("SpinAcquire", lockid, slckP); - - /* - * Acquire the lock, then record that we have done so (for recovery in - * case of elog(ERROR) while holding the lock). Note we assume here - * that S_LOCK will not accept cancel/die interrupts once it has - * acquired the lock. However, interrupts should be accepted while - * waiting, if InterruptHoldoffCount is zero. - */ - S_LOCK(&(slckP->shlock)); - PROC_INCR_SLOCK(lockid); - - /* - * Lock out cancel/die interrupts until we exit the code section - * protected by the spinlock. This ensures that interrupts will not - * interfere with manipulations of data structures in shared memory. - */ - HOLD_INTERRUPTS(); - - PRINT_SLDEBUG("SpinAcquire/done", lockid, slckP); -} - -void -SpinRelease(SPINLOCK lockid) +CreateSpinlocks(void) { - SLock *slckP = &(SLockArray[lockid]); - - PRINT_SLDEBUG("SpinRelease", lockid, slckP); - - /* - * Check that we are actually holding the lock we are releasing. This - * can be done only after MyProc has been initialized. - */ - Assert(!MyProc || MyProc->sLocks[lockid] > 0); - - /* - * Record that we no longer hold the spinlock, and release it. - */ - PROC_DECR_SLOCK(lockid); - S_UNLOCK(&(slckP->shlock)); - - /* - * Exit the interrupt holdoff entered in SpinAcquire(). - */ - RESUME_INTERRUPTS(); - - PRINT_SLDEBUG("SpinRelease/done", lockid, slckP); + /* no-op when we have TAS spinlocks */ } #else /* !HAS_TEST_AND_SET */ @@ -199,11 +47,7 @@ SpinRelease(SPINLOCK lockid) /* * No TAS, so spinlocks are implemented using SysV semaphores. * - * We support two slightly different APIs here: SpinAcquire/SpinRelease - * work with SPINLOCK integer indexes for the permanent spinlocks, which - * are all assumed to live in the first spinlock semaphore set. There - * is also an emulation of the s_lock.h TAS-spinlock macros; for that case, - * typedef slock_t stores the semId and sem number of the sema to use. + * Typedef slock_t stores the semId and sem number of the sema to use. * The semas needed are created by CreateSpinlocks and doled out by * s_init_lock_sema. * @@ -228,35 +72,26 @@ static int nextSpinLock = 0; /* next free spinlock index */ static void SpinFreeAllSemaphores(void); -/* - * SLockShmemSize --- return shared-memory space needed - */ -int -SLockShmemSize(void) -{ - return 0; -} /* * CreateSpinlocks --- create and initialize spinlocks during startup */ void -CreateSpinlocks(PGShmemHeader *seghdr) +CreateSpinlocks(void) { int i; if (SpinLockIds == NULL) { - /* - * Compute number of spinlocks needed. If this logic gets any - * more complicated, it should be distributed into the affected - * modules, similar to the way shmem space estimation is handled. + * Compute number of spinlocks needed. It would be cleaner to + * distribute this logic into the affected modules, + * similar to the way shmem space estimation is handled. * - * For now, though, we just need the fixed spinlocks (MAX_SPINS), two - * spinlocks per shared disk buffer, and four spinlocks for XLOG. + * For now, though, we just need a few spinlocks (10 should be + * plenty) plus one for each LWLock. */ - numSpinLocks = (int) MAX_SPINS + 2 * NBuffers + 4; + numSpinLocks = NumLWLocks() + 10; /* might as well round up to a multiple of SPINLOCKS_PER_SET */ numSpinSets = (numSpinLocks - 1) / SPINLOCKS_PER_SET + 1; @@ -288,14 +123,8 @@ CreateSpinlocks(PGShmemHeader *seghdr) false); } - /* - * Assign indexes for fixed spinlocks - */ - Assert(MAX_SPINS <= SPINLOCKS_PER_SET); - InitSpinLockIDs(); - /* Init counter for allocating dynamic spinlocks */ - nextSpinLock = MAX_SPINS; + nextSpinLock = 0; } /* @@ -318,49 +147,6 @@ SpinFreeAllSemaphores(void) SpinLockIds = NULL; } -/* - * SpinAcquire -- grab a fixed spinlock - * - * FAILS if the semaphore is corrupted. - */ -void -SpinAcquire(SPINLOCK lock) -{ - - /* - * See the TAS() version of this routine for primary commentary. - * - * NOTE we must pass interruptOK = false to IpcSemaphoreLock, to ensure - * that a cancel/die interrupt cannot prevent us from recording - * ownership of a lock we have just acquired. - */ - IpcSemaphoreLock(SpinLockIds[0], lock, false); - PROC_INCR_SLOCK(lock); - HOLD_INTERRUPTS(); -} - -/* - * SpinRelease -- release a fixed spin lock - * - * FAILS if the semaphore is corrupted - */ -void -SpinRelease(SPINLOCK lock) -{ - /* See the TAS() version of this routine for commentary */ -#ifdef USE_ASSERT_CHECKING - /* Check it's locked */ - int semval; - - semval = IpcSemaphoreGetValue(SpinLockIds[0], lock); - Assert(semval < 1); -#endif - Assert(!MyProc || MyProc->sLocks[lockid] > 0); - PROC_DECR_SLOCK(lock); - IpcSemaphoreUnlock(SpinLockIds[0], lock); - RESUME_INTERRUPTS(); -} - /* * s_lock.h hardware-spinlock emulation */ diff --git a/src/backend/storage/smgr/mm.c b/src/backend/storage/smgr/mm.c index 791c375de0..43da08b19c 100644 --- a/src/backend/storage/smgr/mm.c +++ b/src/backend/storage/smgr/mm.c @@ -11,17 +11,19 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/smgr/Attic/mm.c,v 1.24 2001/06/27 23:31:39 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/smgr/Attic/mm.c,v 1.25 2001/09/29 04:02:25 tgl Exp $ * *------------------------------------------------------------------------- */ #include "postgres.h" -#include "miscadmin.h" - -#ifdef STABLE_MEMORY_STORAGE #include +#include "storage/smgr.h" +#include "miscadmin.h" + + +#ifdef STABLE_MEMORY_STORAGE /* * MMCacheTag -- Unique triplet for blocks stored by the main memory @@ -71,8 +73,6 @@ typedef struct MMRelHashEntry #define MMNBUFFERS 10 #define MMNRELATIONS 2 -SPINLOCK MMCacheLock; - static int *MMCurTop; static int *MMCurRelno; static MMCacheTag *MMBlockTags; @@ -88,7 +88,7 @@ mminit() bool found; HASHCTL info; - SpinAcquire(MMCacheLock); + LWLockAcquire(MMCacheLock, LW_EXCLUSIVE); mmsize += MAXALIGN(BLCKSZ * MMNBUFFERS); mmsize += MAXALIGN(sizeof(*MMCurTop)); @@ -98,7 +98,7 @@ mminit() if (mmcacheblk == (char *) NULL) { - SpinRelease(MMCacheLock); + LWLockRelease(MMCacheLock); return SM_FAIL; } @@ -112,7 +112,7 @@ mminit() if (MMCacheHT == (HTAB *) NULL) { - SpinRelease(MMCacheLock); + LWLockRelease(MMCacheLock); return SM_FAIL; } @@ -126,18 +126,18 @@ mminit() if (MMRelCacheHT == (HTAB *) NULL) { - SpinRelease(MMCacheLock); + LWLockRelease(MMCacheLock); return SM_FAIL; } if (IsUnderPostmaster) /* was IsPostmaster bjm */ { MemSet(mmcacheblk, 0, mmsize); - SpinRelease(MMCacheLock); + LWLockRelease(MMCacheLock); return SM_SUCCESS; } - SpinRelease(MMCacheLock); + LWLockRelease(MMCacheLock); MMCurTop = (int *) mmcacheblk; mmcacheblk += sizeof(int); @@ -163,11 +163,11 @@ mmcreate(Relation reln) bool found; MMRelTag tag; - SpinAcquire(MMCacheLock); + LWLockAcquire(MMCacheLock, LW_EXCLUSIVE); if (*MMCurRelno == MMNRELATIONS) { - SpinRelease(MMCacheLock); + LWLockRelease(MMCacheLock); return SM_FAIL; } @@ -184,20 +184,20 @@ mmcreate(Relation reln) if (entry == (MMRelHashEntry *) NULL) { - SpinRelease(MMCacheLock); + LWLockRelease(MMCacheLock); elog(FATAL, "main memory storage mgr rel cache hash table corrupt"); } if (found) { /* already exists */ - SpinRelease(MMCacheLock); + LWLockRelease(MMCacheLock); return SM_FAIL; } entry->mmrhe_nblocks = 0; - SpinRelease(MMCacheLock); + LWLockRelease(MMCacheLock); return SM_SUCCESS; } @@ -211,30 +211,24 @@ int mmunlink(RelFileNode rnode) { int i; - Oid reldbid; MMHashEntry *entry; MMRelHashEntry *rentry; bool found; MMRelTag rtag; - if (reln->rd_rel->relisshared) - reldbid = (Oid) 0; - else - reldbid = MyDatabaseId; - - SpinAcquire(MMCacheLock); + LWLockAcquire(MMCacheLock, LW_EXCLUSIVE); for (i = 0; i < MMNBUFFERS; i++) { - if (MMBlockTags[i].mmct_dbid == reldbid - && MMBlockTags[i].mmct_relid == RelationGetRelid(reln)) + if (MMBlockTags[i].mmct_dbid == rnode.tblNode + && MMBlockTags[i].mmct_relid == rnode.relNode) { entry = (MMHashEntry *) hash_search(MMCacheHT, (char *) &MMBlockTags[i], HASH_REMOVE, &found); if (entry == (MMHashEntry *) NULL || !found) { - SpinRelease(MMCacheLock); + LWLockRelease(MMCacheLock); elog(FATAL, "mmunlink: cache hash table corrupted"); } MMBlockTags[i].mmct_dbid = (Oid) 0; @@ -242,21 +236,21 @@ mmunlink(RelFileNode rnode) MMBlockTags[i].mmct_blkno = (BlockNumber) 0; } } - rtag.mmrt_dbid = reldbid; - rtag.mmrt_relid = RelationGetRelid(reln); + rtag.mmrt_dbid = rnode.tblNode; + rtag.mmrt_relid = rnode.relNode; rentry = (MMRelHashEntry *) hash_search(MMRelCacheHT, (char *) &rtag, HASH_REMOVE, &found); if (rentry == (MMRelHashEntry *) NULL || !found) { - SpinRelease(MMCacheLock); + LWLockRelease(MMCacheLock); elog(FATAL, "mmunlink: rel cache hash table corrupted"); } (*MMCurRelno)--; - SpinRelease(MMCacheLock); + LWLockRelease(MMCacheLock); return 1; } @@ -286,7 +280,7 @@ mmextend(Relation reln, BlockNumber blocknum, char *buffer) tag.mmct_dbid = rtag.mmrt_dbid = reldbid; tag.mmct_relid = rtag.mmrt_relid = RelationGetRelid(reln); - SpinAcquire(MMCacheLock); + LWLockAcquire(MMCacheLock, LW_EXCLUSIVE); if (*MMCurTop == MMNBUFFERS) { @@ -298,7 +292,7 @@ mmextend(Relation reln, BlockNumber blocknum, char *buffer) } if (i == MMNBUFFERS) { - SpinRelease(MMCacheLock); + LWLockRelease(MMCacheLock); return SM_FAIL; } } @@ -312,7 +306,7 @@ mmextend(Relation reln, BlockNumber blocknum, char *buffer) HASH_FIND, &found); if (rentry == (MMRelHashEntry *) NULL || !found) { - SpinRelease(MMCacheLock); + LWLockRelease(MMCacheLock); elog(FATAL, "mmextend: rel cache hash table corrupt"); } @@ -322,7 +316,7 @@ mmextend(Relation reln, BlockNumber blocknum, char *buffer) HASH_ENTER, &found); if (entry == (MMHashEntry *) NULL || found) { - SpinRelease(MMCacheLock); + LWLockRelease(MMCacheLock); elog(FATAL, "mmextend: cache hash table corrupt"); } @@ -338,7 +332,7 @@ mmextend(Relation reln, BlockNumber blocknum, char *buffer) offset = (i * BLCKSZ); memmove(&(MMBlockCache[offset]), buffer, BLCKSZ); - SpinRelease(MMCacheLock); + LWLockRelease(MMCacheLock); return SM_SUCCESS; } @@ -386,20 +380,20 @@ mmread(Relation reln, BlockNumber blocknum, char *buffer) tag.mmct_relid = RelationGetRelid(reln); tag.mmct_blkno = blocknum; - SpinAcquire(MMCacheLock); + LWLockAcquire(MMCacheLock, LW_EXCLUSIVE); entry = (MMHashEntry *) hash_search(MMCacheHT, (char *) &tag, HASH_FIND, &found); if (entry == (MMHashEntry *) NULL) { - SpinRelease(MMCacheLock); + LWLockRelease(MMCacheLock); elog(FATAL, "mmread: hash table corrupt"); } if (!found) { /* reading nonexistent pages is defined to fill them with zeroes */ - SpinRelease(MMCacheLock); + LWLockRelease(MMCacheLock); MemSet(buffer, 0, BLCKSZ); return SM_SUCCESS; } @@ -407,7 +401,7 @@ mmread(Relation reln, BlockNumber blocknum, char *buffer) offset = (entry->mmhe_bufno * BLCKSZ); memmove(buffer, &MMBlockCache[offset], BLCKSZ); - SpinRelease(MMCacheLock); + LWLockRelease(MMCacheLock); return SM_SUCCESS; } @@ -433,26 +427,26 @@ mmwrite(Relation reln, BlockNumber blocknum, char *buffer) tag.mmct_relid = RelationGetRelid(reln); tag.mmct_blkno = blocknum; - SpinAcquire(MMCacheLock); + LWLockAcquire(MMCacheLock, LW_EXCLUSIVE); entry = (MMHashEntry *) hash_search(MMCacheHT, (char *) &tag, HASH_FIND, &found); if (entry == (MMHashEntry *) NULL) { - SpinRelease(MMCacheLock); + LWLockRelease(MMCacheLock); elog(FATAL, "mmread: hash table corrupt"); } if (!found) { - SpinRelease(MMCacheLock); + LWLockRelease(MMCacheLock); elog(FATAL, "mmwrite: hash table missing requested page"); } offset = (entry->mmhe_bufno * BLCKSZ); memmove(&MMBlockCache[offset], buffer, BLCKSZ); - SpinRelease(MMCacheLock); + LWLockRelease(MMCacheLock); return SM_SUCCESS; } @@ -506,14 +500,14 @@ mmnblocks(Relation reln) rtag.mmrt_relid = RelationGetRelid(reln); - SpinAcquire(MMCacheLock); + LWLockAcquire(MMCacheLock, LW_EXCLUSIVE); rentry = (MMRelHashEntry *) hash_search(MMRelCacheHT, (char *) &rtag, HASH_FIND, &found); if (rentry == (MMRelHashEntry *) NULL) { - SpinRelease(MMCacheLock); + LWLockRelease(MMCacheLock); elog(FATAL, "mmnblocks: rel cache hash table corrupt"); } @@ -522,7 +516,7 @@ mmnblocks(Relation reln) else nblocks = InvalidBlockNumber; - SpinRelease(MMCacheLock); + LWLockRelease(MMCacheLock); return nblocks; } diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c index 1cc078c8f6..555b666faa 100644 --- a/src/backend/storage/smgr/smgr.c +++ b/src/backend/storage/smgr/smgr.c @@ -11,7 +11,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/smgr/smgr.c,v 1.52 2001/07/02 20:50:46 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/smgr/smgr.c,v 1.53 2001/09/29 04:02:25 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -19,6 +19,7 @@ #include "storage/bufmgr.h" #include "storage/freespace.h" +#include "storage/ipc.h" #include "storage/smgr.h" #include "utils/memutils.h" diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c index 2a57a7acd7..47926b146f 100644 --- a/src/backend/utils/init/postinit.c +++ b/src/backend/utils/init/postinit.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/utils/init/postinit.c,v 1.92 2001/09/27 16:29:12 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/utils/init/postinit.c,v 1.93 2001/09/29 04:02:25 tgl Exp $ * * *------------------------------------------------------------------------- @@ -401,11 +401,12 @@ ShutdownPostgres(void) * since that just raises the odds of failure --- but there's some * stuff we need to do. * - * Release any spinlocks or buffer context locks we might be holding. + * Release any LW locks and buffer context locks we might be holding. * This is a kluge to improve the odds that we won't get into a - * self-made stuck-spinlock scenario while trying to shut down. + * self-made stuck-lock scenario while trying to shut down. */ - ProcReleaseSpins(NULL); + LWLockReleaseAll(); + AbortBufferIO(); UnlockBuffers(); /* diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 17f142b1ea..6efd41c2a8 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -4,7 +4,7 @@ * Support for grand unified configuration scheme, including SET * command, configuration file, and command line options. * - * $Header: /cvsroot/pgsql/src/backend/utils/misc/guc.c,v 1.52 2001/09/23 21:52:36 petere Exp $ + * $Header: /cvsroot/pgsql/src/backend/utils/misc/guc.c,v 1.53 2001/09/29 04:02:25 tgl Exp $ * * Copyright 2000 by PostgreSQL Global Development Group * Written by Peter Eisentraut . @@ -240,7 +240,7 @@ static struct config_bool #ifdef LOCK_DEBUG {"trace_locks", PGC_SUSET, &Trace_locks, false, NULL}, {"trace_userlocks", PGC_SUSET, &Trace_userlocks, false, NULL}, - {"trace_spinlocks", PGC_SUSET, &Trace_spinlocks, false, NULL}, + {"trace_lwlocks", PGC_SUSET, &Trace_lwlocks, false, NULL}, {"debug_deadlocks", PGC_SUSET, &Debug_deadlocks, false, NULL}, #endif diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index fdb7c775ae..789f1a1d08 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -164,7 +164,7 @@ #ifdef LOCK_DEBUG #trace_locks = false #trace_userlocks = false -#trace_spinlocks = false +#trace_lwlocks = false #debug_deadlocks = false #trace_lock_oidmin = 16384 #trace_lock_table = 0 diff --git a/src/include/access/clog.h b/src/include/access/clog.h index 4e44e8036c..9bcd3a4294 100644 --- a/src/include/access/clog.h +++ b/src/include/access/clog.h @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: clog.h,v 1.1 2001/08/25 18:52:42 tgl Exp $ + * $Id: clog.h,v 1.2 2001/09/29 04:02:26 tgl Exp $ */ #ifndef CLOG_H #define CLOG_H @@ -24,6 +24,9 @@ typedef int XidStatus; #define TRANSACTION_STATUS_ABORTED 0x02 /* 0x03 is available without changing commit log space allocation */ +/* exported because lwlock.c needs it */ +#define NUM_CLOG_BUFFERS 8 + extern void TransactionIdSetStatus(TransactionId xid, XidStatus status); extern XidStatus TransactionIdGetStatus(TransactionId xid); diff --git a/src/include/access/transam.h b/src/include/access/transam.h index 5ce62d289f..e6fda69691 100644 --- a/src/include/access/transam.h +++ b/src/include/access/transam.h @@ -7,15 +7,13 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: transam.h,v 1.40 2001/08/26 16:56:00 tgl Exp $ + * $Id: transam.h,v 1.41 2001/09/29 04:02:26 tgl Exp $ * *------------------------------------------------------------------------- */ #ifndef TRANSAM_H #define TRANSAM_H -#include "storage/spin.h" - /* ---------------- * Special transaction ID values @@ -122,8 +120,6 @@ extern void CheckMaxObjectId(Oid assigned_oid); extern bool AMI_OVERRIDE; /* in varsup.c */ -extern SPINLOCK OidGenLockId; -extern SPINLOCK XidGenLockId; extern VariableCache ShmemVariableCache; #endif /* TRAMSAM_H */ diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h index 456147b907..1257faf005 100644 --- a/src/include/miscadmin.h +++ b/src/include/miscadmin.h @@ -12,7 +12,7 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: miscadmin.h,v 1.92 2001/09/27 16:29:13 tgl Exp $ + * $Id: miscadmin.h,v 1.93 2001/09/29 04:02:26 tgl Exp $ * * NOTES * some of the information in this file should be moved to @@ -46,8 +46,8 @@ * will be held off until the last matching RESUME_INTERRUPTS() occurs. * * Special mechanisms are used to let an interrupt be accepted when we are - * waiting for a lock or spinlock, and when we are waiting for command input - * (but, of course, only if the interrupt holdoff counter is zero). See the + * waiting for a lock or when we are waiting for command input (but, of + * course, only if the interrupt holdoff counter is zero). See the * related code for details. * * A related, but conceptually distinct, mechanism is the "critical section" diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h index ca1e1dd4b4..f85a93c258 100644 --- a/src/include/storage/buf_internals.h +++ b/src/include/storage/buf_internals.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: buf_internals.h,v 1.49 2001/07/06 21:04:26 tgl Exp $ + * $Id: buf_internals.h,v 1.50 2001/09/29 04:02:26 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -17,7 +17,7 @@ #include "storage/backendid.h" #include "storage/buf.h" #include "storage/lmgr.h" -#include "storage/s_lock.h" +#include "storage/lwlock.h" /* Buf Mgr constants */ @@ -89,12 +89,8 @@ typedef struct sbufdesc BufFlags flags; /* see bit definitions above */ unsigned refcount; /* # of backends holding pins on buffer */ - slock_t io_in_progress_lock; /* to wait for I/O to complete */ - slock_t cntx_lock; /* to lock access to page context */ - - unsigned r_locks; /* # of shared locks */ - bool ri_lock; /* read-intent lock */ - bool w_lock; /* context exclusively locked */ + LWLockId io_in_progress_lock; /* to wait for I/O to complete */ + LWLockId cntx_lock; /* to lock access to page context */ bool cntxDirty; /* new way to mark block as dirty */ @@ -117,10 +113,7 @@ typedef struct sbufdesc * We have to free these locks in elog(ERROR)... */ #define BL_IO_IN_PROGRESS (1 << 0) /* unimplemented */ -#define BL_R_LOCK (1 << 1) -#define BL_RI_LOCK (1 << 2) -#define BL_W_LOCK (1 << 3) -#define BL_PIN_COUNT_LOCK (1 << 4) +#define BL_PIN_COUNT_LOCK (1 << 1) /* * mao tracing buffer allocation @@ -173,7 +166,6 @@ extern bits8 *BufferLocks; extern BufferTag *BufferTagLastDirtied; extern LockRelId *BufferRelidLastDirtied; extern bool *BufferDirtiedByMe; -extern SPINLOCK BufMgrLock; /* localbuf.c */ extern BufferDesc *LocalBufferDescriptors; diff --git a/src/include/storage/freespace.h b/src/include/storage/freespace.h index 0f11dd02f9..affde80717 100644 --- a/src/include/storage/freespace.h +++ b/src/include/storage/freespace.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: freespace.h,v 1.2 2001/07/02 20:50:46 tgl Exp $ + * $Id: freespace.h,v 1.3 2001/09/29 04:02:26 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -16,11 +16,8 @@ #include "storage/block.h" #include "storage/relfilenode.h" -#include "storage/spin.h" -extern SPINLOCK FreeSpaceLock; - extern int MaxFSMRelations; extern int MaxFSMPages; diff --git a/src/include/storage/ipc.h b/src/include/storage/ipc.h index 8ce1a84593..dae16748d0 100644 --- a/src/include/storage/ipc.h +++ b/src/include/storage/ipc.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: ipc.h,v 1.50 2001/06/27 23:31:39 tgl Exp $ + * $Id: ipc.h,v 1.51 2001/09/29 04:02:26 tgl Exp $ * * Some files that would normally need to include only sys/ipc.h must * instead include this file because on Ultrix, sys/ipc.h is not designed @@ -30,9 +30,9 @@ union semun struct semid_ds *buf; unsigned short *array; }; - #endif + /* generic IPC definitions */ #define IPCProtection (0600) /* access/modify by user only */ @@ -51,7 +51,7 @@ typedef int IpcSemaphoreId; /* semaphore ID returned by semget(2) */ typedef uint32 IpcMemoryKey; /* shared memory key passed to shmget(2) */ typedef int IpcMemoryId; /* shared memory ID returned by shmget(2) */ -typedef struct /* standard header for all Postgres shmem */ +typedef struct PGShmemHeader /* standard header for all Postgres shmem */ { int32 magic; /* magic # to identify Postgres segments */ #define PGShmemMagic 679834892 @@ -61,29 +61,6 @@ typedef struct /* standard header for all Postgres shmem */ } PGShmemHeader; -/* spinlock definitions */ - -typedef enum _LockId_ -{ - BUFMGRLOCKID, - OIDGENLOCKID, - XIDGENLOCKID, - CNTLFILELOCKID, - SHMEMLOCKID, - SHMEMINDEXLOCKID, - LOCKMGRLOCKID, - SINVALLOCKID, - PROCSTRUCTLOCKID, - FREESPACELOCKID, - -#ifdef STABLE_MEMORY_STORAGE - MMCACHELOCKID, -#endif - - MAX_SPINS /* must be last item! */ -} _LockId_; - - /* ipc.c */ extern bool proc_exit_inprogress; diff --git a/src/include/storage/lock.h b/src/include/storage/lock.h index ed134bcc77..b5ac03eb7e 100644 --- a/src/include/storage/lock.h +++ b/src/include/storage/lock.h @@ -7,15 +7,15 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: lock.h,v 1.52 2001/09/27 16:29:13 tgl Exp $ + * $Id: lock.h,v 1.53 2001/09/29 04:02:26 tgl Exp $ * *------------------------------------------------------------------------- */ #ifndef LOCK_H_ #define LOCK_H_ -#include "storage/ipc.h" #include "storage/itemptr.h" +#include "storage/lwlock.h" #include "storage/shmem.h" @@ -26,12 +26,10 @@ typedef struct PROC_QUEUE int size; /* number of entries in list */ } PROC_QUEUE; -/* struct proc is declared in storage/proc.h, but must forward-reference it */ -typedef struct proc PROC; +/* struct PROC is declared in storage/proc.h, but must forward-reference it */ +typedef struct PROC PROC; -extern SPINLOCK LockMgrLock; - extern int max_locks_per_xact; #ifdef LOCK_DEBUG @@ -51,11 +49,7 @@ typedef int LOCKMETHOD; /* MAX_LOCKMODES cannot be larger than the # of bits in LOCKMASK */ #define MAX_LOCKMODES 10 -/* - * MAX_LOCK_METHODS corresponds to the number of spin locks allocated in - * CreateSpinLocks() or the number of shared memory locations allocated - * for lock table spin locks in the case of machines with TAS instructions. - */ +/* MAX_LOCK_METHODS is the number of distinct lock control tables allowed */ #define MAX_LOCK_METHODS 3 #define INVALID_TABLEID 0 @@ -69,7 +63,7 @@ typedef int LOCKMETHOD; * If user locks are enabled, an additional lock method is present. * * LOCKMETHODCTL and LOCKMETHODTABLE are split because the first lives - * in shared memory. This is because it contains a spinlock. + * in shared memory. (There isn't any really good reason for the split.) * LOCKMETHODTABLE exists in private memory. Both are created by the * postmaster and should be the same in all backends. */ @@ -93,7 +87,7 @@ typedef int LOCKMETHOD; * writers can be given priority over readers (to avoid * starvation). XXX this field is not actually used at present! * - * masterlock -- synchronizes access to the table + * masterLock -- synchronizes access to the table */ typedef struct LOCKMETHODCTL { @@ -101,7 +95,7 @@ typedef struct LOCKMETHODCTL int numLockModes; int conflictTab[MAX_LOCKMODES]; int prio[MAX_LOCKMODES]; - SPINLOCK masterLock; + LWLockId masterLock; } LOCKMETHODCTL; /* @@ -235,11 +229,6 @@ typedef struct HOLDER (((LOCK *) MAKE_PTR((holder).tag.lock))->tag.lockmethod) - -#define LockLockTable() SpinAcquire(LockMgrLock) -#define UnlockLockTable() SpinRelease(LockMgrLock) - - /* * function prototypes */ @@ -267,7 +256,6 @@ extern void InitDeadLockChecking(void); #ifdef LOCK_DEBUG extern void DumpLocks(void); extern void DumpAllLocks(void); - #endif #endif /* LOCK_H */ diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h new file mode 100644 index 0000000000..02610d3541 --- /dev/null +++ b/src/include/storage/lwlock.h @@ -0,0 +1,69 @@ +/*------------------------------------------------------------------------- + * + * lwlock.h + * Lightweight lock manager + * + * + * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * $Id: lwlock.h,v 1.1 2001/09/29 04:02:26 tgl Exp $ + * + *------------------------------------------------------------------------- + */ +#ifndef LWLOCK_H +#define LWLOCK_H + +/* + * We have a number of predefined LWLocks, plus a bunch of LWLocks that are + * dynamically assigned (for shared buffers). The LWLock structures live + * in shared memory (since they contain shared data) and are identified by + * values of this enumerated type. We abuse the notion of an enum somewhat + * by allowing values not listed in the enum declaration to be assigned. + * The extra value MaxDynamicLWLock is there to keep the compiler from + * deciding that the enum can be represented as char or short ... + */ +typedef enum LWLockId +{ + BufMgrLock, + LockMgrLock, + OidGenLock, + XidGenLock, + ShmemIndexLock, + SInvalLock, + FreeSpaceLock, + MMCacheLock, + WALInsertLock, + WALWriteLock, + ControlFileLock, + CheckpointLock, + CLogControlLock, + + NumFixedLWLocks, /* must be last except for MaxDynamicLWLock */ + + MaxDynamicLWLock = 1000000000 +} LWLockId; + + +typedef enum LWLockMode +{ + LW_EXCLUSIVE, + LW_SHARED +} LWLockMode; + + +#ifdef LOCK_DEBUG +extern bool Trace_lwlocks; +#endif + +extern LWLockId LWLockAssign(void); +extern void LWLockAcquire(LWLockId lockid, LWLockMode mode); +extern bool LWLockConditionalAcquire(LWLockId lockid, LWLockMode mode); +extern void LWLockRelease(LWLockId lockid); +extern void LWLockReleaseAll(void); + +extern int NumLWLocks(void); +extern int LWLockShmemSize(void); +extern void CreateLWLocks(void); + +#endif /* LWLOCK_H */ diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h index 9a1c63ef1b..77def9f473 100644 --- a/src/include/storage/proc.h +++ b/src/include/storage/proc.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: proc.h,v 1.47 2001/09/21 17:06:12 tgl Exp $ + * $Id: proc.h,v 1.48 2001/09/29 04:02:26 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -16,10 +16,9 @@ #include "access/xlog.h" #include "storage/backendid.h" +#include "storage/ipc.h" #include "storage/lock.h" -/* configurable option */ -extern int DeadlockTimeout; typedef struct { @@ -35,10 +34,9 @@ typedef struct * the PROC is linked into that lock's waitProcs queue. A recycled PROC * is linked into ProcGlobal's freeProcs list. */ -struct proc +struct PROC { /* proc->links MUST BE FIRST IN STRUCT (see ProcSleep,ProcWakeup,etc) */ - SHM_QUEUE links; /* list link if process is in a list */ SEMA sem; /* ONE semaphore to sleep on */ @@ -51,6 +49,9 @@ struct proc * were starting our xact: vacuum must not * remove tuples deleted by xid >= xmin ! */ + int pid; /* This backend's process id */ + Oid databaseId; /* OID of database this backend is using */ + /* * XLOG location of first XLOG record written by this backend's * current transaction. If backend is not in a transaction or hasn't @@ -58,6 +59,11 @@ struct proc */ XLogRecPtr logRec; + /* Info about LWLock the process is currently waiting for, if any. */ + bool lwWaiting; /* true if waiting for an LW lock */ + bool lwExclusive; /* true if waiting for exclusive access */ + struct PROC *lwWaitLink; /* next waiter for same LW lock */ + /* Info about lock the process is currently waiting for, if any. */ /* waitLock and waitHolder are NULL if not currently waiting. */ LOCK *waitLock; /* Lock object we're sleeping on ... */ @@ -66,32 +72,15 @@ struct proc LOCKMASK heldLocks; /* bitmask for lock types already held on * this lock object by this backend */ - int pid; /* This backend's process id */ - Oid databaseId; /* OID of database this backend is using */ - - short sLocks[MAX_SPINS]; /* Spin lock stats */ SHM_QUEUE procHolders; /* list of HOLDER objects for locks held * or awaited by this backend */ }; -/* NOTE: "typedef struct proc PROC" appears in storage/lock.h. */ +/* NOTE: "typedef struct PROC PROC" appears in storage/lock.h. */ extern PROC *MyProc; -extern SPINLOCK ProcStructLock; - - -#define PROC_INCR_SLOCK(lock) \ -do { \ - if (MyProc) (MyProc->sLocks[(lock)])++; \ -} while (0) - -#define PROC_DECR_SLOCK(lock) \ -do { \ - if (MyProc) (MyProc->sLocks[(lock)])--; \ -} while (0) - /* * There is one ProcGlobal struct for the whole installation. @@ -120,7 +109,7 @@ typedef struct */ } SEM_MAP_ENTRY; -typedef struct procglobal +typedef struct PROC_HDR { /* Head of list of free PROC structures */ SHMEM_OFFSET freeProcs; @@ -134,11 +123,17 @@ typedef struct procglobal SEM_MAP_ENTRY procSemMap[1]; } PROC_HDR; + +/* configurable option */ +extern int DeadlockTimeout; + + /* * Function Prototypes */ extern void InitProcGlobal(int maxBackends); extern void InitProcess(void); +extern void InitDummyProcess(void); extern void ProcReleaseLocks(bool isCommit); extern void ProcQueueInit(PROC_QUEUE *queue); @@ -146,7 +141,6 @@ extern int ProcSleep(LOCKMETHODTABLE *lockMethodTable, LOCKMODE lockmode, LOCK *lock, HOLDER *holder); extern PROC *ProcWakeup(PROC *proc, int errType); extern void ProcLockWakeup(LOCKMETHODTABLE *lockMethodTable, LOCK *lock); -extern void ProcReleaseSpins(PROC *proc); extern bool LockWaitCancel(void); extern void HandleDeadLock(SIGNAL_ARGS); diff --git a/src/include/storage/s_lock.h b/src/include/storage/s_lock.h index 2188dcd9f2..166019a6fa 100644 --- a/src/include/storage/s_lock.h +++ b/src/include/storage/s_lock.h @@ -1,22 +1,13 @@ /*------------------------------------------------------------------------- * * s_lock.h - * This file contains the in-line portion of the implementation - * of spinlocks. + * Hardware-dependent implementation of spinlocks. * - * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group - * Portions Copyright (c) 1994, Regents of the University of California - * - * - * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/include/storage/s_lock.h,v 1.94 2001/09/24 20:10:44 petere Exp $ + * NOTE: none of the macros in this file are intended to be called directly. + * Call them through the hardware-independent macros in spin.h. * - *------------------------------------------------------------------------- - */ - -/*---------- - * DESCRIPTION - * The public macros that must be provided are: + * The following hardware-dependent macros must be provided for each + * supported platform: * * void S_INIT_LOCK(slock_t *lock) * Initialize a spinlock (to the unlocked state). @@ -33,51 +24,43 @@ * Tests if the lock is free. Returns TRUE if free, FALSE if locked. * This does *not* change the state of the lock. * + * Note to implementors: there are default implementations for all these + * macros at the bottom of the file. Check if your platform can use + * these or needs to override them. + * + * Usually, S_LOCK() is implemented in terms of an even lower-level macro + * TAS(): + * * int TAS(slock_t *lock) * Atomic test-and-set instruction. Attempt to acquire the lock, * but do *not* wait. Returns 0 if successful, nonzero if unable * to acquire the lock. * - * TAS() is a lower-level part of the API, but is used directly in a - * few places that want to do other things while waiting for a lock. - * The S_LOCK() macro is equivalent to - * - * void - * S_LOCK(slock_t *lock) - * { - * unsigned spins = 0; - * - * while (TAS(lock)) - * S_LOCK_SLEEP(lock, spins++, timeout); - * } + * TAS() is NOT part of the API, and should never be called directly. * - * where S_LOCK_SLEEP() checks for timeout and sleeps for a short - * interval. (The timeout is expressed in microseconds, or can be 0 for - * "infinity".) Callers that want to perform useful work while waiting - * can write out this entire loop and insert the "useful work" inside - * the loop. - * - * CAUTION to TAS() callers: on some platforms TAS() may sometimes - * report failure to acquire a lock even when the lock is not locked. - * For example, on Alpha TAS() will "fail" if interrupted. Therefore - * TAS() must *always* be invoked in a retry loop as depicted, even when - * you are certain the lock is free. + * CAUTION: on some platforms TAS() may sometimes report failure to acquire + * a lock even when the lock is not locked. For example, on Alpha TAS() + * will "fail" if interrupted. Therefore TAS() should always be invoked + * in a retry loop, even if you are certain the lock is free. * * On most supported platforms, TAS() uses a tas() function written * in assembly language to execute a hardware atomic-test-and-set * instruction. Equivalent OS-supplied mutex routines could be used too. * * If no system-specific TAS() is available (ie, HAS_TEST_AND_SET is not - * defined), then we fall back on an emulation that uses SysV semaphores. - * This emulation will be MUCH MUCH MUCH slower than a proper TAS() + * defined), then we fall back on an emulation that uses SysV semaphores + * (see spin.c). This emulation will be MUCH MUCH slower than a proper TAS() * implementation, because of the cost of a kernel call per lock or unlock. * An old report is that Postgres spends around 40% of its time in semop(2) * when using the SysV semaphore code. * - * Note to implementors: there are default implementations for all these - * macros at the bottom of the file. Check if your platform can use - * these or needs to override them. - *---------- + * + * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * $Id: s_lock.h,v 1.95 2001/09/29 04:02:26 tgl Exp $ + * + *------------------------------------------------------------------------- */ #ifndef S_LOCK_H #define S_LOCK_H @@ -476,7 +459,7 @@ extern slock_t wc_tas(volatile slock_t *lock); /* * Fake spinlock implementation using SysV semaphores --- slow and prone * to fall foul of kernel limits on number of semaphores, so don't use this - * unless you must! + * unless you must! The subroutines appear in spin.c. */ typedef struct @@ -500,7 +483,7 @@ extern int tas_sema(volatile slock_t *lock); -/**************************************************************************** +/* * Default Definitions - override these above as needed. */ @@ -512,16 +495,6 @@ extern int tas_sema(volatile slock_t *lock); } while (0) #endif /* S_LOCK */ -#if !defined(S_LOCK_SLEEP) -#define S_LOCK_SLEEP(lock,spins,timeout) \ - s_lock_sleep((spins), (timeout), 0, (lock), __FILE__, __LINE__) -#endif /* S_LOCK_SLEEP */ - -#if !defined(S_LOCK_SLEEP_INTERVAL) -#define S_LOCK_SLEEP_INTERVAL(lock,spins,timeout,microsec) \ - s_lock_sleep((spins), (timeout), (microsec), (lock), __FILE__, __LINE__) -#endif /* S_LOCK_SLEEP_INTERVAL */ - #if !defined(S_LOCK_FREE) #define S_LOCK_FREE(lock) (*(lock) == 0) #endif /* S_LOCK_FREE */ @@ -542,13 +515,9 @@ extern int tas(volatile slock_t *lock); /* in port/.../tas.s, or #endif /* TAS */ -/**************************************************************************** +/* * Platform-independent out-of-line support routines */ -extern void s_lock(volatile slock_t *lock, - const char *file, const int line); -extern void s_lock_sleep(unsigned spins, int timeout, int microsec, - volatile slock_t *lock, - const char *file, const int line); +extern void s_lock(volatile slock_t *lock, const char *file, int line); #endif /* S_LOCK_H */ diff --git a/src/include/storage/shmem.h b/src/include/storage/shmem.h index e01a0f0143..1043beb634 100644 --- a/src/include/storage/shmem.h +++ b/src/include/storage/shmem.h @@ -7,14 +7,13 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: shmem.h,v 1.30 2001/09/07 00:27:30 tgl Exp $ + * $Id: shmem.h,v 1.31 2001/09/29 04:02:27 tgl Exp $ * *------------------------------------------------------------------------- */ #ifndef SHMEM_H #define SHMEM_H -#include "storage/spin.h" #include "utils/hsearch.h" @@ -55,9 +54,6 @@ extern DLLIMPORT SHMEM_OFFSET ShmemBase; (((xx_offs) != 0) && ((xx_offs) != INVALID_OFFSET)) -extern SPINLOCK ShmemLock; -extern SPINLOCK ShmemIndexLock; - /* shmemqueue.c */ typedef struct SHM_QUEUE { @@ -66,16 +62,15 @@ typedef struct SHM_QUEUE } SHM_QUEUE; /* shmem.c */ -extern void InitShmemAllocation(PGShmemHeader *seghdr); +extern void InitShmemAllocation(void *seghdr); extern void *ShmemAlloc(Size size); extern bool ShmemIsValid(unsigned long addr); +extern void InitShmemIndex(void); extern HTAB *ShmemInitHash(char *name, long init_size, long max_size, HASHCTL *infoP, int hash_flags); extern void *ShmemInitStruct(char *name, Size size, bool *foundPtr); -typedef int TableID; - /* size constants for the shmem index table */ /* max size of data structure string name */ #define SHMEM_INDEX_KEYSIZE (50) diff --git a/src/include/storage/sinval.h b/src/include/storage/sinval.h index 2e1ac7bfb1..8ecb9024d2 100644 --- a/src/include/storage/sinval.h +++ b/src/include/storage/sinval.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: sinval.h,v 1.21 2001/08/26 16:56:02 tgl Exp $ + * $Id: sinval.h,v 1.22 2001/09/29 04:02:27 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -16,7 +16,6 @@ #include "storage/backendid.h" #include "storage/itemptr.h" -#include "storage/spin.h" /* @@ -64,8 +63,6 @@ typedef union } SharedInvalidationMessage; -extern SPINLOCK SInvalLock; - extern int SInvalShmemSize(int maxBackends); extern void CreateSharedInvalidationState(int maxBackends); extern void InitBackendSharedInvalidationState(void); @@ -78,7 +75,7 @@ extern bool DatabaseHasActiveBackends(Oid databaseId, bool ignoreMyself); extern bool TransactionIdIsInProgress(TransactionId xid); extern TransactionId GetOldestXmin(bool allDbs); extern int CountActiveBackends(void); -/* Use "struct proc", not PROC, to avoid including proc.h here */ -extern struct proc *BackendIdGetProc(BackendId procId); +/* Use "struct PROC", not PROC, to avoid including proc.h here */ +extern struct PROC *BackendIdGetProc(BackendId procId); #endif /* SINVAL_H */ diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h index b4193b5fa8..d20ce73a8f 100644 --- a/src/include/storage/smgr.h +++ b/src/include/storage/smgr.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: smgr.h,v 1.30 2001/06/27 23:31:39 tgl Exp $ + * $Id: smgr.h,v 1.31 2001/09/29 04:02:27 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -17,9 +17,9 @@ #include "access/xlog.h" #include "storage/relfilenode.h" #include "storage/block.h" -#include "storage/spin.h" #include "utils/rel.h" + #define SM_FAIL 0 #define SM_SUCCESS 1 @@ -79,8 +79,6 @@ extern int mdabort(void); extern int mdsync(void); /* mm.c */ -extern SPINLOCK MMCacheLock; - extern int mminit(void); extern int mmcreate(Relation reln); extern int mmunlink(RelFileNode rnode); diff --git a/src/include/storage/spin.h b/src/include/storage/spin.h index 1d488e5476..bc6ad3fde0 100644 --- a/src/include/storage/spin.h +++ b/src/include/storage/spin.h @@ -1,42 +1,77 @@ /*------------------------------------------------------------------------- * * spin.h - * synchronization routines + * Hardware-independent implementation of spinlocks. + * + * + * The hardware-independent interface to spinlocks is defined by the + * typedef "slock_t" and these macros: + * + * void SpinLockInit(slock_t *lock) + * Initialize a spinlock (to the unlocked state). + * + * void SpinLockAcquire(slock_t *lock) + * Acquire a spinlock, waiting if necessary. + * Time out and abort() if unable to acquire the lock in a + * "reasonable" amount of time --- typically ~ 1 minute. + * Cancel/die interrupts are held off until the lock is released. + * + * void SpinLockRelease(slock_t *lock) + * Unlock a previously acquired lock. + * Release the cancel/die interrupt holdoff. + * + * void SpinLockAcquire_NoHoldoff(slock_t *lock) + * void SpinLockRelease_NoHoldoff(slock_t *lock) + * Same as above, except no interrupt holdoff processing is done. + * This pair of macros may be used when there is a surrounding + * interrupt holdoff. + * + * bool SpinLockFree(slock_t *lock) + * Tests if the lock is free. Returns TRUE if free, FALSE if locked. + * This does *not* change the state of the lock. + * + * Callers must beware that the macro argument may be evaluated multiple + * times! + * + * The macros are implemented in terms of hardware-dependent macros + * supplied by s_lock.h. * * * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: spin.h,v 1.15 2001/03/22 04:01:09 momjian Exp $ + * $Id: spin.h,v 1.16 2001/09/29 04:02:27 tgl Exp $ * *------------------------------------------------------------------------- */ #ifndef SPIN_H #define SPIN_H -#include "storage/ipc.h" +#include "storage/s_lock.h" +#include "miscadmin.h" -/* - * two implementations of spin locks - * - * Where TAS instruction is available: real spin locks. - * See src/storage/ipc/s_lock.c for details. - * - * Otherwise: fake spin locks using semaphores. see spin.c - */ -typedef int SPINLOCK; +#define SpinLockInit(lock) S_INIT_LOCK(lock) + +#define SpinLockAcquire(lock) \ + do { \ + HOLD_INTERRUPTS(); \ + S_LOCK(lock); \ + } while (0) + +#define SpinLockAcquire_NoHoldoff(lock) S_LOCK(lock) -#ifdef LOCK_DEBUG -extern bool Trace_spinlocks; +#define SpinLockRelease(lock) \ + do { \ + S_UNLOCK(lock); \ + RESUME_INTERRUPTS(); \ + } while (0) -#endif +#define SpinLockRelease_NoHoldoff(lock) S_UNLOCK(lock) +#define SpinLockFree(lock) S_LOCK_FREE(lock) -extern int SLockShmemSize(void); -extern void CreateSpinlocks(PGShmemHeader *seghdr); -extern void SpinAcquire(SPINLOCK lockid); -extern void SpinRelease(SPINLOCK lockid); +extern void CreateSpinlocks(void); #endif /* SPIN_H */ -- 2.40.0