]> granicus.if.org Git - postgresql/commitdiff
Reduce ProcArrayLock contention by removing backends in batches.
authorRobert Haas <rhaas@postgresql.org>
Thu, 6 Aug 2015 15:52:51 +0000 (11:52 -0400)
committerRobert Haas <rhaas@postgresql.org>
Thu, 6 Aug 2015 16:02:12 +0000 (12:02 -0400)
When a write transaction commits, it must clear its XID advertised via
the ProcArray, which requires that we hold ProcArrayLock in exclusive
mode in order to prevent concurrent processes running GetSnapshotData
from seeing inconsistent results.  When many processes try to commit
at once, ProcArrayLock must change hands repeatedly, with each
concurrent process trying to commit waking up to acquire the lock in
turn.  To make things more efficient, when more than one backend is
trying to commit a write transaction at the same time, have just one
of them acquire ProcArrayLock in exclusive mode and clear the XIDs of
all processes in the group.  Benchmarking reveals that this is much
more efficient at very high client counts.

Amit Kapila, heavily revised by me, with some review also from Pavan
Deolasee.

src/backend/access/transam/README
src/backend/storage/ipc/procarray.c
src/backend/storage/lmgr/proc.c
src/include/storage/proc.h

index bc68b470e09f817e6ddd4d7c44c7ca08db55d0f0..f6db580a0bf3c09bce2d26678712720fbd14d4fc 100644 (file)
@@ -252,6 +252,9 @@ implementation of this is that GetSnapshotData takes the ProcArrayLock in
 shared mode (so that multiple backends can take snapshots in parallel),
 but ProcArrayEndTransaction must take the ProcArrayLock in exclusive mode
 while clearing MyPgXact->xid at transaction end (either commit or abort).
+(To reduce context switching, when multiple transactions commit nearly
+simultaneously, we have one backend take ProcArrayLock and clear the XIDs
+of multiple processes at once.)
 
 ProcArrayEndTransaction also holds the lock while advancing the shared
 latestCompletedXid variable.  This allows GetSnapshotData to use
index 4f3c5c9dec9c128ea9ee8f04b5a52d9f3bb6c3e3..cced823de02080b276d676dcdabd442156b86396 100644 (file)
@@ -167,6 +167,9 @@ static int KnownAssignedXidsGetAndSetXmin(TransactionId *xarray,
 static TransactionId KnownAssignedXidsGetOldestXmin(void);
 static void KnownAssignedXidsDisplay(int trace_level);
 static void KnownAssignedXidsReset(void);
+static inline void ProcArrayEndTransactionInternal(PGPROC *proc,
+                                                               PGXACT *pgxact, TransactionId latestXid);
+static void ProcArrayGroupClearXid(PGPROC *proc, TransactionId latestXid);
 
 /*
  * Report shared-memory space needed by CreateSharedProcArray.
@@ -399,26 +402,18 @@ ProcArrayEndTransaction(PGPROC *proc, TransactionId latestXid)
                 */
                Assert(TransactionIdIsValid(allPgXact[proc->pgprocno].xid));
 
-               LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
-
-               pgxact->xid = InvalidTransactionId;
-               proc->lxid = InvalidLocalTransactionId;
-               pgxact->xmin = InvalidTransactionId;
-               /* must be cleared with xid/xmin: */
-               pgxact->vacuumFlags &= ~PROC_VACUUM_STATE_MASK;
-               pgxact->delayChkpt = false;             /* be sure this is cleared in abort */
-               proc->recoveryConflictPending = false;
-
-               /* Clear the subtransaction-XID cache too while holding the lock */
-               pgxact->nxids = 0;
-               pgxact->overflowed = false;
-
-               /* Also advance global latestCompletedXid while holding the lock */
-               if (TransactionIdPrecedes(ShmemVariableCache->latestCompletedXid,
-                                                                 latestXid))
-                       ShmemVariableCache->latestCompletedXid = latestXid;
-
-               LWLockRelease(ProcArrayLock);
+               /*
+                * If we can immediately acquire ProcArrayLock, we clear our own XID
+                * and release the lock.  If not, use group XID clearing to improve
+                * efficiency.
+                */
+               if (LWLockConditionalAcquire(ProcArrayLock, LW_EXCLUSIVE))
+               {
+                       ProcArrayEndTransactionInternal(proc, pgxact, latestXid);
+                       LWLockRelease(ProcArrayLock);
+               }
+               else
+                       ProcArrayGroupClearXid(proc, latestXid);
        }
        else
        {
@@ -441,6 +436,137 @@ ProcArrayEndTransaction(PGPROC *proc, TransactionId latestXid)
        }
 }
 
+/*
+ * Mark a write transaction as no longer running.
+ *
+ * We don't do any locking here; caller must handle that.
+ */
+static inline void
+ProcArrayEndTransactionInternal(PGPROC *proc, PGXACT *pgxact,
+                                                               TransactionId latestXid)
+{
+       pgxact->xid = InvalidTransactionId;
+       proc->lxid = InvalidLocalTransactionId;
+       pgxact->xmin = InvalidTransactionId;
+       /* must be cleared with xid/xmin: */
+       pgxact->vacuumFlags &= ~PROC_VACUUM_STATE_MASK;
+       pgxact->delayChkpt = false;             /* be sure this is cleared in abort */
+       proc->recoveryConflictPending = false;
+
+       /* Clear the subtransaction-XID cache too while holding the lock */
+       pgxact->nxids = 0;
+       pgxact->overflowed = false;
+
+       /* Also advance global latestCompletedXid while holding the lock */
+       if (TransactionIdPrecedes(ShmemVariableCache->latestCompletedXid,
+                                                         latestXid))
+               ShmemVariableCache->latestCompletedXid = latestXid;
+}
+
+/*
+ * ProcArrayGroupClearXid -- group XID clearing
+ *
+ * When we cannot immediately acquire ProcArrayLock in exclusive mode at
+ * commit time, add ourselves to a list of processes that need their XIDs
+ * cleared.  The first process to add itself to the list will acquire
+ * ProcArrayLock in exclusive mode and perform ProcArrayEndTransactionInternal
+ * on behalf of all group members.  This avoids a great deal of context
+ * switching when many processes are trying to commit at once, since the lock
+ * only needs to be handed from the last share-locker to one process waiting
+ * for the exclusive lock, rather than to each one in turn.
+ */
+static void
+ProcArrayGroupClearXid(PGPROC *proc, TransactionId latestXid)
+{
+       volatile PROC_HDR *procglobal = ProcGlobal;
+       uint32          nextidx;
+       uint32          wakeidx;
+       int                     extraWaits = -1;
+
+       /* We should definitely have an XID to clear. */
+       Assert(TransactionIdIsValid(allPgXact[proc->pgprocno].xid));
+
+       /* Add ourselves to the list of processes needing a group XID clear. */
+       proc->backendLatestXid = latestXid;
+       while (true)
+       {
+               nextidx = pg_atomic_read_u32(&procglobal->nextClearXidElem);
+               pg_atomic_write_u32(&proc->nextClearXidElem, nextidx);
+
+               if (pg_atomic_compare_exchange_u32(&procglobal->nextClearXidElem,
+                                                                                  &nextidx,
+                                                                                  (uint32) proc->pgprocno))
+                       break;
+       }
+
+       /* If the list was not empty, the leader will clear our XID. */
+       if (nextidx != INVALID_PGPROCNO)
+       {
+               /* Sleep until the leader clears our XID. */
+               while (pg_atomic_read_u32(&proc->nextClearXidElem) != INVALID_PGPROCNO)
+               {
+                       extraWaits++;
+                       PGSemaphoreLock(&proc->sem);
+               }
+
+               /* Fix semaphore count for any absorbed wakeups */
+               while (extraWaits-- > 0)
+                       PGSemaphoreUnlock(&proc->sem);
+               return;
+       }
+
+       /* We are the leader.  Acquire the lock on behalf of everyone. */
+       LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
+
+       /*
+        * Now that we've got the lock, clear the list of processes waiting for
+        * group XID clearing, saving a pointer to the head of the list.
+        */
+       while (true)
+       {
+               nextidx = pg_atomic_read_u32(&procglobal->nextClearXidElem);
+               if (pg_atomic_compare_exchange_u32(&procglobal->nextClearXidElem,
+                                                                                  &nextidx,
+                                                                                  INVALID_PGPROCNO))
+                       break;
+       }
+
+       /* Remember head of list so we can perform wakeups after dropping lock. */
+       wakeidx = nextidx;
+
+       /* Walk the list and clear all XIDs. */
+       while (nextidx != INVALID_PGPROCNO)
+       {
+               PGPROC  *proc = &allProcs[nextidx];
+               PGXACT  *pgxact = &allPgXact[nextidx];
+
+               ProcArrayEndTransactionInternal(proc, pgxact, proc->backendLatestXid);
+
+               /* Move to next proc in list. */
+               nextidx = pg_atomic_read_u32(&proc->nextClearXidElem);
+       }
+
+       /* We're done with the lock now. */
+       LWLockRelease(ProcArrayLock);
+
+       /*
+        * Now that we've released the lock, go back and wake everybody up.  We
+        * don't do this under the lock so as to keep lock hold times to a
+        * minimum.  The system calls we need to perform to wake other processes
+        * up are probably much slower than the simple memory writes we did while
+        * holding the lock.
+        */
+       while (wakeidx != INVALID_PGPROCNO)
+       {
+               PGPROC  *proc = &allProcs[wakeidx];
+
+               wakeidx = pg_atomic_read_u32(&proc->nextClearXidElem);
+               pg_atomic_write_u32(&proc->nextClearXidElem, INVALID_PGPROCNO);
+
+               if (proc != MyProc)
+                       PGSemaphoreUnlock(&proc->sem);
+       }
+}
 
 /*
  * ProcArrayClearTransaction -- clear the transaction fields
index 884e91b7364a32fd0227708bd0ad672b17150a11..93f2656afb812c967bd6ef372316c941181c9c5f 100644 (file)
@@ -181,6 +181,7 @@ InitProcGlobal(void)
        ProcGlobal->startupBufferPinWaitBufId = -1;
        ProcGlobal->walwriterLatch = NULL;
        ProcGlobal->checkpointerLatch = NULL;
+       pg_atomic_init_u32(&ProcGlobal->nextClearXidElem, INVALID_PGPROCNO);
 
        /*
         * Create and initialize all the PGPROC structures we'll need.  There are
@@ -393,6 +394,10 @@ InitProcess(void)
        MyProc->syncRepState = SYNC_REP_NOT_WAITING;
        SHMQueueElemInit(&(MyProc->syncRepLinks));
 
+       /* Initialize fields for group XID clearing. */
+       MyProc->backendLatestXid = InvalidTransactionId;
+       pg_atomic_init_u32(&MyProc->nextClearXidElem, INVALID_PGPROCNO);
+
        /*
         * Acquire ownership of the PGPROC's latch, so that we can use WaitLatch
         * on it.  That allows us to repoint the process latch, which so far
index 202a672bca5286da305103581340930c38ce65ac..421bb587df5bf365ef482025191fe800f6274cf5 100644 (file)
@@ -58,6 +58,12 @@ struct XidCache
  */
 #define                FP_LOCK_SLOTS_PER_BACKEND 16
 
+/*
+ * An invalid pgprocno.  Must be larger than the maximum number of PGPROC
+ * structures we could possibly have.  See comments for MAX_BACKENDS.
+ */
+#define INVALID_PGPROCNO               PG_INT32_MAX
+
 /*
  * Each backend has a PGPROC struct in shared memory.  There is also a list of
  * currently-unused PGPROC structs that will be reallocated to new backends.
@@ -135,6 +141,10 @@ struct PGPROC
 
        struct XidCache subxids;        /* cache for subtransaction XIDs */
 
+       /* Support for group XID clearing. */
+       volatile pg_atomic_uint32       nextClearXidElem;
+       TransactionId   backendLatestXid;
+
        /* Per-backend LWLock.  Protects fields below. */
        LWLock     *backendLock;        /* protects the fields below */
 
@@ -196,6 +206,8 @@ typedef struct PROC_HDR
        PGPROC     *autovacFreeProcs;
        /* Head of list of bgworker free PGPROC structures */
        PGPROC     *bgworkerFreeProcs;
+       /* First pgproc waiting for group XID clear */
+       volatile pg_atomic_uint32 nextClearXidElem;
        /* WALWriter process's latch */
        Latch      *walwriterLatch;
        /* Checkpointer process's latch */