if (pcxt->nworkers == 0)
return;
+ /* We need to be a lock group leader. */
+ BecomeLockGroupLeader();
+
/* If we do have workers, we'd better have a DSM segment. */
Assert(pcxt->seg != NULL);
* backend-local state to match the original backend.
*/
+ /*
+ * Join locking group. We must do this before anything that could try
+ * to acquire a heavyweight lock, because any heavyweight locks acquired
+ * to this point could block either directly against the parallel group
+ * leader or against some process which in turn waits for a lock that
+ * conflicts with the parallel group leader, causing an undetected
+ * deadlock. (If we can't join the lock group, the leader has gone away,
+ * so just exit quietly.)
+ */
+ if (!BecomeLockGroupMember(fps->parallel_master_pgproc,
+ fps->parallel_master_pid))
+ return;
+
/*
* Load libraries that were loaded by original backend. We want to do
* this before restoring GUCs, because the libraries might define custom
principle that autovacuum has a low locking priority (eg it must not block
DDL on the table).
+Group Locking
+-------------
+
+As if all of that weren't already complicated enough, PostgreSQL now supports
+parallelism (see src/backend/access/transam/README.parallel), which means that
+we might need to resolve deadlocks that occur between gangs of related processes
+rather than individual processes. This doesn't change the basic deadlock
+detection algorithm very much, but it makes the bookkeeping more complicated.
+
+We choose to regard locks held by processes in the same parallel group as
+non-conflicting. This means that two processes in a parallel group can hold
+a self-exclusive lock on the same relation at the same time, or one process
+can acquire an AccessShareLock while the other already holds AccessExclusiveLock.
+This might seem dangerous and could be in some cases (more on that below), but
+if we didn't do this then parallel query would be extremely prone to
+self-deadlock. For example, a parallel query against a relation on which the
+leader had already AccessExclusiveLock would hang, because the workers would
+try to lock the same relation and be blocked by the leader; yet the leader can't
+finish until it receives completion indications from all workers. An undetected
+deadlock results. This is far from the only scenario where such a problem
+happens. The same thing will occur if the leader holds only AccessShareLock,
+the worker seeks AccessShareLock, but between the time the leader attempts to
+acquire the lock and the time the worker attempts to acquire it, some other
+process queues up waiting for an AccessExclusiveLock. In this case, too, an
+indefinite hang results.
+
+It might seem that we could predict which locks the workers will attempt to
+acquire and ensure before going parallel that those locks would be acquired
+successfully. But this is very difficult to make work in a general way. For
+example, a parallel worker's portion of the query plan could involve an
+SQL-callable function which generates a query dynamically, and that query
+might happen to hit a table on which the leader happens to hold
+AccessExcusiveLock. By imposing enough restrictions on what workers can do,
+we could eventually create a situation where their behavior can be adequately
+restricted, but these restrictions would be fairly onerous, and even then, the
+system required to decide whether the workers will succeed at acquiring the
+necessary locks would be complex and possibly buggy.
+
+So, instead, we take the approach of deciding that locks within a lock group
+do not conflict. This eliminates the possibility of an undetected deadlock,
+but also opens up some problem cases: if the leader and worker try to do some
+operation at the same time which would ordinarily be prevented by the heavyweight
+lock mechanism, undefined behavior might result. In practice, the dangers are
+modest. The leader and worker share the same transaction, snapshot, and combo
+CID hash, and neither can perform any DDL or, indeed, write any data at all.
+Thus, for either to read a table locked exclusively by the other is safe enough.
+Problems would occur if the leader initiated parallelism from a point in the
+code at which it had some backend-private state that made table access from
+another process unsafe, for example after calling SetReindexProcessing and
+before calling ResetReindexProcessing, catastrophe could ensue, because the
+worker won't have that state. Similarly, problems could occur with certain
+kinds of non-relation locks, such as relation extension locks. It's no safer
+for two related processes to extend the same relation at the time than for
+unrelated processes to do the same. However, since parallel mode is strictly
+read-only at present, neither this nor most of the similar cases can arise at
+present. To allow parallel writes, we'll either need to (1) further enhance
+the deadlock detector to handle those types of locks in a different way than
+other types; or (2) have parallel workers use some other mutual exclusion
+method for such cases; or (3) revise those cases so that they no longer use
+heavyweight locking in the first place (which is not a crazy idea, given that
+such lock acquisitions are not expected to deadlock and that heavyweight lock
+acquisition is fairly slow anyway).
+
User Locks (Advisory Locks)
---------------------------
{
PGPROC *waiter; /* the waiting process */
PGPROC *blocker; /* the process it is waiting for */
+ LOCK *lock; /* the lock it is waiting for */
int pred; /* workspace for TopoSort */
int link; /* workspace for TopoSort */
} EDGE;
EDGE *softEdges, int *nSoftEdges);
static bool FindLockCycleRecurse(PGPROC *checkProc, int depth,
EDGE *softEdges, int *nSoftEdges);
+static bool FindLockCycleRecurseMember(PGPROC *checkProc,
+ PGPROC *checkProcLeader,
+ int depth, EDGE *softEdges, int *nSoftEdges);
static bool ExpandConstraints(EDGE *constraints, int nConstraints);
static bool TopoSort(LOCK *lock, EDGE *constraints, int nConstraints,
PGPROC **ordering);
EDGE *softEdges, /* output argument */
int *nSoftEdges) /* output argument */
{
- PGPROC *proc;
- PGXACT *pgxact;
- LOCK *lock;
- PROCLOCK *proclock;
- SHM_QUEUE *procLocks;
- LockMethod lockMethodTable;
- PROC_QUEUE *waitQueue;
- int queue_size;
- int conflictMask;
int i;
- int numLockModes,
- lm;
+ dlist_iter iter;
+
+ /*
+ * If this process is a lock group member, check the leader instead. (Note
+ * that we might be the leader, in which case this is a no-op.)
+ */
+ if (checkProc->lockGroupLeader != NULL)
+ checkProc = checkProc->lockGroupLeader;
/*
* Have we already seen this proc?
visitedProcs[nVisitedProcs++] = checkProc;
/*
- * If the proc is not waiting, we have no outgoing waits-for edges.
+ * If the process is waiting, there is an outgoing waits-for edge to each
+ * process that blocks it.
+ */
+ if (checkProc->links.next != NULL && checkProc->waitLock != NULL &&
+ FindLockCycleRecurseMember(checkProc, checkProc, depth, softEdges,
+ nSoftEdges))
+ return true;
+
+ /*
+ * If the process is not waiting, there could still be outgoing waits-for
+ * edges if it is part of a lock group, because other members of the lock
+ * group might be waiting even though this process is not. (Given lock
+ * groups {A1, A2} and {B1, B2}, if A1 waits for B1 and B2 waits for A2,
+ * that is a deadlock even neither of B1 and A2 are waiting for anything.)
*/
- if (checkProc->links.next == NULL)
- return false;
- lock = checkProc->waitLock;
- if (lock == NULL)
- return false;
+ dlist_foreach(iter, &checkProc->lockGroupMembers)
+ {
+ PGPROC *memberProc;
+
+ memberProc = dlist_container(PGPROC, lockGroupLink, iter.cur);
+
+ if (memberProc->links.next != NULL && memberProc->waitLock != NULL &&
+ memberProc != checkProc &&
+ FindLockCycleRecurseMember(memberProc, checkProc, depth, softEdges,
+ nSoftEdges))
+ return true;
+ }
+
+ return false;
+}
+
+static bool
+FindLockCycleRecurseMember(PGPROC *checkProc,
+ PGPROC *checkProcLeader,
+ int depth,
+ EDGE *softEdges, /* output argument */
+ int *nSoftEdges) /* output argument */
+{
+ PGPROC *proc;
+ LOCK *lock = checkProc->waitLock;
+ PGXACT *pgxact;
+ PROCLOCK *proclock;
+ SHM_QUEUE *procLocks;
+ LockMethod lockMethodTable;
+ PROC_QUEUE *waitQueue;
+ int queue_size;
+ int conflictMask;
+ int i;
+ int numLockModes,
+ lm;
+
lockMethodTable = GetLocksMethodTable(lock);
numLockModes = lockMethodTable->numLockModes;
conflictMask = lockMethodTable->conflictTab[checkProc->waitLockMode];
while (proclock)
{
+ PGPROC *leader;
+
proc = proclock->tag.myProc;
pgxact = &ProcGlobal->allPgXact[proc->pgprocno];
+ leader = proc->lockGroupLeader == NULL ? proc : proc->lockGroupLeader;
- /* A proc never blocks itself */
- if (proc != checkProc)
+ /* A proc never blocks itself or any other lock group member */
+ if (leader != checkProcLeader)
{
for (lm = 1; lm <= numLockModes; lm++)
{
for (i = 0; i < queue_size; i++)
{
+ PGPROC *leader;
+
proc = procs[i];
+ leader = proc->lockGroupLeader == NULL ? proc :
+ proc->lockGroupLeader;
- /* Done when we reach the target proc */
- if (proc == checkProc)
+ /*
+ * TopoSort will always return an ordering with group members
+ * adjacent to each other in the wait queue (see comments
+ * therein). So, as soon as we reach a process in the same lock
+ * group as checkProc, we know we've found all the conflicts that
+ * precede any member of the lock group lead by checkProcLeader.
+ */
+ if (leader == checkProcLeader)
break;
/* Is there a conflict with this guy's request? */
* Add this edge to the list of soft edges in the cycle
*/
Assert(*nSoftEdges < MaxBackends);
- softEdges[*nSoftEdges].waiter = checkProc;
- softEdges[*nSoftEdges].blocker = proc;
+ softEdges[*nSoftEdges].waiter = checkProcLeader;
+ softEdges[*nSoftEdges].blocker = leader;
+ softEdges[*nSoftEdges].lock = lock;
(*nSoftEdges)++;
return true;
}
}
else
{
+ PGPROC *lastGroupMember = NULL;
+
/* Use the true lock wait queue order */
waitQueue = &(lock->waitProcs);
- queue_size = waitQueue->size;
- proc = (PGPROC *) waitQueue->links.next;
+ /*
+ * Find the last member of the lock group that is present in the wait
+ * queue. Anything after this is not a soft lock conflict. If group
+ * locking is not in use, then we know immediately which process we're
+ * looking for, but otherwise we've got to search the wait queue to
+ * find the last process actually present.
+ */
+ if (checkProc->lockGroupLeader == NULL)
+ lastGroupMember = checkProc;
+ else
+ {
+ proc = (PGPROC *) waitQueue->links.next;
+ queue_size = waitQueue->size;
+ while (queue_size-- > 0)
+ {
+ if (proc->lockGroupLeader == checkProcLeader)
+ lastGroupMember = proc;
+ proc = (PGPROC *) proc->links.next;
+ }
+ Assert(lastGroupMember != NULL);
+ }
+ /*
+ * OK, now rescan (or scan) the queue to identify the soft conflicts.
+ */
+ queue_size = waitQueue->size;
+ proc = (PGPROC *) waitQueue->links.next;
while (queue_size-- > 0)
{
+ PGPROC *leader;
+
+ leader = proc->lockGroupLeader == NULL ? proc :
+ proc->lockGroupLeader;
+
/* Done when we reach the target proc */
- if (proc == checkProc)
+ if (proc == lastGroupMember)
break;
/* Is there a conflict with this guy's request? */
- if ((LOCKBIT_ON(proc->waitLockMode) & conflictMask) != 0)
+ if ((LOCKBIT_ON(proc->waitLockMode) & conflictMask) != 0 &&
+ leader != checkProcLeader)
{
/* This proc soft-blocks checkProc */
if (FindLockCycleRecurse(proc, depth + 1,
* Add this edge to the list of soft edges in the cycle
*/
Assert(*nSoftEdges < MaxBackends);
- softEdges[*nSoftEdges].waiter = checkProc;
- softEdges[*nSoftEdges].blocker = proc;
+ softEdges[*nSoftEdges].waiter = checkProcLeader;
+ softEdges[*nSoftEdges].blocker = leader;
+ softEdges[*nSoftEdges].lock = lock;
(*nSoftEdges)++;
return true;
}
*/
for (i = nConstraints; --i >= 0;)
{
- PGPROC *proc = constraints[i].waiter;
- LOCK *lock = proc->waitLock;
+ LOCK *lock = constraints[i].lock;
/* Did we already make a list for this lock? */
for (j = nWaitOrders; --j >= 0;)
PGPROC *proc;
int i,
j,
+ jj,
k,
+ kk,
last;
/* First, fill topoProcs[] array with the procs in their current order */
* stores its list link in constraints[i].link (note any constraint will
* be in just one list). The array index for the before-proc of the i'th
* constraint is remembered in constraints[i].pred.
+ *
+ * Note that it's not necessarily the case that every constraint affects
+ * this particular wait queue. Prior to group locking, a process could be
+ * waiting for at most one lock. But a lock group can be waiting for
+ * zero, one, or multiple locks. Since topoProcs[] is an array of the
+ * processes actually waiting, while constraints[] is an array of group
+ * leaders, we've got to scan through topoProcs[] for each constraint,
+ * checking whether both a waiter and a blocker for that group are
+ * present. If so, the constraint is relevant to this wait queue; if not,
+ * it isn't.
*/
MemSet(beforeConstraints, 0, queue_size * sizeof(int));
MemSet(afterConstraints, 0, queue_size * sizeof(int));
for (i = 0; i < nConstraints; i++)
{
+ /*
+ * Find a representative process that is on the lock queue and part of
+ * the waiting lock group. This may or may not be the leader, which
+ * may or may not be waiting at all. If there are any other processes
+ * in the same lock group on the queue, set their number of
+ * beforeConstraints to -1 to indicate that they should be emitted
+ * with their groupmates rather than considered separately.
+ */
proc = constraints[i].waiter;
- /* Ignore constraint if not for this lock */
- if (proc->waitLock != lock)
- continue;
- /* Find the waiter proc in the array */
+ Assert(proc != NULL);
+ jj = -1;
for (j = queue_size; --j >= 0;)
{
- if (topoProcs[j] == proc)
+ PGPROC *waiter = topoProcs[j];
+
+ if (waiter == proc || waiter->lockGroupLeader == proc)
+ {
+ Assert(waiter->waitLock == lock);
+ if (jj == -1)
+ jj = j;
+ else
+ {
+ Assert(beforeConstraints[j] <= 0);
+ beforeConstraints[j] = -1;
+ }
break;
+ }
}
- Assert(j >= 0); /* should have found a match */
- /* Find the blocker proc in the array */
+
+ /* If no matching waiter, constraint is not relevant to this lock. */
+ if (jj < 0)
+ continue;
+
+ /*
+ * Similarly, find a representative process that is on the lock queue
+ * and waiting for the blocking lock group. Again, this could be the
+ * leader but does not need to be.
+ */
proc = constraints[i].blocker;
+ Assert(proc != NULL);
+ kk = -1;
for (k = queue_size; --k >= 0;)
{
- if (topoProcs[k] == proc)
- break;
+ PGPROC *blocker = topoProcs[k];
+
+ if (blocker == proc || blocker->lockGroupLeader == proc)
+ {
+ Assert(blocker->waitLock == lock);
+ if (kk == -1)
+ kk = k;
+ else
+ {
+ Assert(beforeConstraints[k] <= 0);
+ beforeConstraints[k] = -1;
+ }
+ }
}
- Assert(k >= 0); /* should have found a match */
- beforeConstraints[j]++; /* waiter must come before */
+
+ /* If no matching blocker, constraint is not relevant to this lock. */
+ if (kk < 0)
+ continue;
+
+ beforeConstraints[jj]++; /* waiter must come before */
/* add this constraint to list of after-constraints for blocker */
- constraints[i].pred = j;
- constraints[i].link = afterConstraints[k];
- afterConstraints[k] = i + 1;
+ constraints[i].pred = jj;
+ constraints[i].link = afterConstraints[kk];
+ afterConstraints[kk] = i + 1;
}
+
/*--------------------
* Now scan the topoProcs array backwards. At each step, output the
- * last proc that has no remaining before-constraints, and decrease
- * the beforeConstraints count of each of the procs it was constrained
- * against.
+ * last proc that has no remaining before-constraints plus any other
+ * members of the same lock group; then decrease the beforeConstraints
+ * count of each of the procs it was constrained against.
* i = index of ordering[] entry we want to output this time
* j = search index for topoProcs[]
* k = temp for scanning constraint list for proc j
*--------------------
*/
last = queue_size - 1;
- for (i = queue_size; --i >= 0;)
+ for (i = queue_size - 1; i >= 0;)
{
+ int c;
+ int nmatches = 0;
+
/* Find next candidate to output */
while (topoProcs[last] == NULL)
last--;
if (topoProcs[j] != NULL && beforeConstraints[j] == 0)
break;
}
+
/* If no available candidate, topological sort fails */
if (j < 0)
return false;
- /* Output candidate, and mark it done by zeroing topoProcs[] entry */
- ordering[i] = topoProcs[j];
- topoProcs[j] = NULL;
+
+ /*
+ * Output everything in the lock group. There's no point in outputing
+ * an ordering where members of the same lock group are not
+ * consecutive on the wait queue: if some other waiter is between two
+ * requests that belong to the same group, then either it conflicts
+ * with both of them and is certainly not a solution; or it conflicts
+ * with at most one of them and is thus isomorphic to an ordering
+ * where the group members are consecutive.
+ */
+ proc = topoProcs[j];
+ if (proc->lockGroupLeader != NULL)
+ proc = proc->lockGroupLeader;
+ Assert(proc != NULL);
+ for (c = 0; c <= last; ++c)
+ {
+ if (topoProcs[c] == proc || (topoProcs[c] != NULL &&
+ topoProcs[c]->lockGroupLeader == proc))
+ {
+ ordering[i - nmatches] = topoProcs[c];
+ topoProcs[c] = NULL;
+ ++nmatches;
+ }
+ }
+ Assert(nmatches > 0);
+ i -= nmatches;
+
/* Update beforeConstraints counts of its predecessors */
for (k = afterConstraints[j]; k > 0; k = constraints[k - 1].link)
beforeConstraints[constraints[k - 1].pred]--;
#include "access/transam.h"
#include "access/twophase.h"
#include "access/twophase_rmgr.h"
+#include "access/xact.h"
#include "access/xlog.h"
#include "miscadmin.h"
#include "pg_trace.h"
{
uint32 partition = LockHashPartition(hashcode);
+ /*
+ * It might seem unsafe to access proclock->groupLeader without a lock,
+ * but it's not really. Either we are initializing a proclock on our
+ * own behalf, in which case our group leader isn't changing because
+ * the group leader for a process can only ever be changed by the
+ * process itself; or else we are transferring a fast-path lock to the
+ * main lock table, in which case that process can't change it's lock
+ * group leader without first releasing all of its locks (and in
+ * particular the one we are currently transferring).
+ */
+ proclock->groupLeader = proc->lockGroupLeader != NULL ?
+ proc->lockGroupLeader : proc;
proclock->holdMask = 0;
proclock->releaseMask = 0;
/* Add proclock to appropriate lists */
* NOTES:
* Here's what makes this complicated: one process's locks don't
* conflict with one another, no matter what purpose they are held for
- * (eg, session and transaction locks do not conflict).
- * So, we must subtract off our own locks when determining whether the
- * requested new lock conflicts with those already held.
+ * (eg, session and transaction locks do not conflict). Nor do the locks
+ * of one process in a lock group conflict with those of another process in
+ * the same group. So, we must subtract off these locks when determining
+ * whether the requested new lock conflicts with those already held.
*/
int
LockCheckConflicts(LockMethod lockMethodTable,
{
int numLockModes = lockMethodTable->numLockModes;
LOCKMASK myLocks;
- LOCKMASK otherLocks;
+ int conflictMask = lockMethodTable->conflictTab[lockmode];
+ int conflictsRemaining[MAX_LOCKMODES];
+ int totalConflictsRemaining = 0;
int i;
+ SHM_QUEUE *procLocks;
+ PROCLOCK *otherproclock;
/*
* first check for global conflicts: If no locks conflict with my request,
* type of lock that conflicts with request. Bitwise compare tells if
* there is a conflict.
*/
- if (!(lockMethodTable->conflictTab[lockmode] & lock->grantMask))
+ if (!(conflictMask & lock->grantMask))
{
PROCLOCK_PRINT("LockCheckConflicts: no conflict", proclock);
return STATUS_OK;
}
/*
- * Rats. Something conflicts. But it could still be my own lock. We have
- * to construct a conflict mask that does not reflect our own locks, but
- * only lock types held by other processes.
+ * Rats. Something conflicts. But it could still be my own lock, or
+ * a lock held by another member of my locking group. First, figure out
+ * how many conflicts remain after subtracting out any locks I hold
+ * myself.
*/
myLocks = proclock->holdMask;
- otherLocks = 0;
for (i = 1; i <= numLockModes; i++)
{
- int myHolding = (myLocks & LOCKBIT_ON(i)) ? 1 : 0;
+ if ((conflictMask & LOCKBIT_ON(i)) == 0)
+ {
+ conflictsRemaining[i] = 0;
+ continue;
+ }
+ conflictsRemaining[i] = lock->granted[i];
+ if (myLocks & LOCKBIT_ON(i))
+ --conflictsRemaining[i];
+ totalConflictsRemaining += conflictsRemaining[i];
+ }
- if (lock->granted[i] > myHolding)
- otherLocks |= LOCKBIT_ON(i);
+ /* If no conflicts remain, we get the lock. */
+ if (totalConflictsRemaining == 0)
+ {
+ PROCLOCK_PRINT("LockCheckConflicts: resolved (simple)", proclock);
+ return STATUS_OK;
+ }
+
+ /* If no group locking, it's definitely a conflict. */
+ if (proclock->groupLeader == MyProc && MyProc->lockGroupLeader == NULL)
+ {
+ Assert(proclock->tag.myProc == MyProc);
+ PROCLOCK_PRINT("LockCheckConflicts: conflicting (simple)",
+ proclock);
+ return STATUS_FOUND;
}
/*
- * now check again for conflicts. 'otherLocks' describes the types of
- * locks held by other processes. If one of these conflicts with the kind
- * of lock that I want, there is a conflict and I have to sleep.
+ * Locks held in conflicting modes by members of our own lock group are
+ * not real conflicts; we can subtract those out and see if we still have
+ * a conflict. This is O(N) in the number of processes holding or awaiting
+ * locks on this object. We could improve that by making the shared memory
+ * state more complex (and larger) but it doesn't seem worth it.
*/
- if (!(lockMethodTable->conflictTab[lockmode] & otherLocks))
+ procLocks = &(lock->procLocks);
+ otherproclock = (PROCLOCK *)
+ SHMQueueNext(procLocks, procLocks, offsetof(PROCLOCK, lockLink));
+ while (otherproclock != NULL)
{
- /* no conflict. OK to get the lock */
- PROCLOCK_PRINT("LockCheckConflicts: resolved", proclock);
- return STATUS_OK;
+ if (proclock != otherproclock &&
+ proclock->groupLeader == otherproclock->groupLeader &&
+ (otherproclock->holdMask & conflictMask) != 0)
+ {
+ int intersectMask = otherproclock->holdMask & conflictMask;
+
+ for (i = 1; i <= numLockModes; i++)
+ {
+ if ((intersectMask & LOCKBIT_ON(i)) != 0)
+ {
+ if (conflictsRemaining[i] <= 0)
+ elog(PANIC, "proclocks held do not match lock");
+ conflictsRemaining[i]--;
+ totalConflictsRemaining--;
+ }
+ }
+
+ if (totalConflictsRemaining == 0)
+ {
+ PROCLOCK_PRINT("LockCheckConflicts: resolved (group)",
+ proclock);
+ return STATUS_OK;
+ }
+ }
+ otherproclock = (PROCLOCK *)
+ SHMQueueNext(procLocks, &otherproclock->lockLink,
+ offsetof(PROCLOCK, lockLink));
}
- PROCLOCK_PRINT("LockCheckConflicts: conflicting", proclock);
+ /* Nope, it's a real conflict. */
+ PROCLOCK_PRINT("LockCheckConflicts: conflicting (group)", proclock);
return STATUS_FOUND;
}
PROCLOCKTAG proclocktag;
int partition;
+ /* Can't prepare a lock group follower. */
+ Assert(MyProc->lockGroupLeader == NULL ||
+ MyProc->lockGroupLeader == MyProc);
+
/* This is a critical section: any error means big trouble */
START_CRIT_SECTION();
proclocktag.myLock = lock;
proclocktag.myProc = newproc;
+ /*
+ * Update groupLeader pointer to point to the new proc. (We'd
+ * better not be a member of somebody else's lock group!)
+ */
+ Assert(proclock->groupLeader == proclock->tag.myProc);
+ proclock->groupLeader = newproc;
+
/*
* Update the proclock. We should not find any existing entry for
* the same hash key, since there can be only one entry for any
*/
if (!found)
{
+ Assert(proc->lockGroupLeader == NULL);
+ proclock->groupLeader = proc;
proclock->holdMask = 0;
proclock->releaseMask = 0;
/* Add proclock to appropriate lists */
/* Initialize myProcLocks[] shared memory queues. */
for (j = 0; j < NUM_LOCK_PARTITIONS; j++)
SHMQueueInit(&(procs[i].myProcLocks[j]));
+
+ /* Initialize lockGroupMembers list. */
+ dlist_init(&procs[i].lockGroupMembers);
}
/*
MyProc->backendLatestXid = InvalidTransactionId;
pg_atomic_init_u32(&MyProc->nextClearXidElem, INVALID_PGPROCNO);
+ /* Check that group locking fields are in a proper initial state. */
+ Assert(MyProc->lockGroupLeaderIdentifier == 0);
+ Assert(MyProc->lockGroupLeader == NULL);
+ Assert(dlist_is_empty(&MyProc->lockGroupMembers));
+
/*
* Acquire ownership of the PGPROC's latch, so that we can use WaitLatch
* on it. That allows us to repoint the process latch, which so far
OwnLatch(&MyProc->procLatch);
SwitchToSharedLatch();
+ /* Check that group locking fields are in a proper initial state. */
+ Assert(MyProc->lockGroupLeaderIdentifier == 0);
+ Assert(MyProc->lockGroupLeader == NULL);
+ Assert(dlist_is_empty(&MyProc->lockGroupMembers));
+
/*
* We might be reusing a semaphore that belonged to a failed process. So
* be careful and reinitialize its value here. (This is not strictly
if (MyReplicationSlot != NULL)
ReplicationSlotRelease();
+ /*
+ * Detach from any lock group of which we are a member. If the leader
+ * exist before all other group members, it's PGPROC will remain allocated
+ * until the last group process exits; that process must return the
+ * leader's PGPROC to the appropriate list.
+ */
+ if (MyProc->lockGroupLeader != NULL)
+ {
+ PGPROC *leader = MyProc->lockGroupLeader;
+ LWLock *leader_lwlock = LockHashPartitionLockByProc(leader);
+
+ LWLockAcquire(leader_lwlock, LW_EXCLUSIVE);
+ Assert(!dlist_is_empty(&leader->lockGroupMembers));
+ dlist_delete(&MyProc->lockGroupLink);
+ if (dlist_is_empty(&leader->lockGroupMembers))
+ {
+ leader->lockGroupLeaderIdentifier = 0;
+ leader->lockGroupLeader = NULL;
+ if (leader != MyProc)
+ {
+ procgloballist = leader->procgloballist;
+
+ /* Leader exited first; return its PGPROC. */
+ SpinLockAcquire(ProcStructLock);
+ leader->links.next = (SHM_QUEUE *) *procgloballist;
+ *procgloballist = leader;
+ SpinLockRelease(ProcStructLock);
+ }
+ }
+ else if (leader != MyProc)
+ MyProc->lockGroupLeader = NULL;
+ LWLockRelease(leader_lwlock);
+ }
+
/*
* Reset MyLatch to the process local one. This is so that signal
* handlers et al can continue using the latch after the shared latch
procgloballist = proc->procgloballist;
SpinLockAcquire(ProcStructLock);
- /* Return PGPROC structure (and semaphore) to appropriate freelist */
- proc->links.next = (SHM_QUEUE *) *procgloballist;
- *procgloballist = proc;
+ /*
+ * If we're still a member of a locking group, that means we're a leader
+ * which has somehow exited before its children. The last remaining child
+ * will release our PGPROC. Otherwise, release it now.
+ */
+ if (proc->lockGroupLeader == NULL)
+ {
+ /* Since lockGroupLeader is NULL, lockGroupMembers should be empty. */
+ Assert(dlist_is_empty(&proc->lockGroupMembers));
+
+ /* Return PGPROC structure (and semaphore) to appropriate freelist */
+ proc->links.next = (SHM_QUEUE *) *procgloballist;
+ *procgloballist = proc;
+ }
/* Update shared estimate of spins_per_delay */
ProcGlobal->spins_per_delay = update_spins_per_delay(ProcGlobal->spins_per_delay);
bool allow_autovacuum_cancel = true;
int myWaitStatus;
PGPROC *proc;
+ PGPROC *leader = MyProc->lockGroupLeader;
int i;
+ /*
+ * If group locking is in use, locks held my members of my locking group
+ * need to be included in myHeldLocks.
+ */
+ if (leader != NULL)
+ {
+ SHM_QUEUE *procLocks = &(lock->procLocks);
+ PROCLOCK *otherproclock;
+
+ otherproclock = (PROCLOCK *)
+ SHMQueueNext(procLocks, procLocks, offsetof(PROCLOCK, lockLink));
+ while (otherproclock != NULL)
+ {
+ if (otherproclock->groupLeader == leader)
+ myHeldLocks |= otherproclock->holdMask;
+ otherproclock = (PROCLOCK *)
+ SHMQueueNext(procLocks, &otherproclock->lockLink,
+ offsetof(PROCLOCK, lockLink));
+ }
+ }
+
/*
* Determine where to add myself in the wait queue.
*
proc = (PGPROC *) waitQueue->links.next;
for (i = 0; i < waitQueue->size; i++)
{
+ /*
+ * If we're part of the same locking group as this waiter, its
+ * locks neither conflict with ours nor contribute to aheadRequsts.
+ */
+ if (leader != NULL && leader == proc->lockGroupLeader)
+ {
+ proc = (PGPROC *) proc->links.next;
+ continue;
+ }
/* Must he wait for me? */
if (lockMethodTable->conflictTab[proc->waitLockMode] & myHeldLocks)
{
SetLatch(&proc->procLatch);
}
}
+
+/*
+ * BecomeLockGroupLeader - designate process as lock group leader
+ *
+ * Once this function has returned, other processes can join the lock group
+ * by calling BecomeLockGroupMember.
+ */
+void
+BecomeLockGroupLeader(void)
+{
+ LWLock *leader_lwlock;
+
+ /* If we already did it, we don't need to do it again. */
+ if (MyProc->lockGroupLeader == MyProc)
+ return;
+
+ /* We had better not be a follower. */
+ Assert(MyProc->lockGroupLeader == NULL);
+
+ /* Create single-member group, containing only ourselves. */
+ leader_lwlock = LockHashPartitionLockByProc(MyProc);
+ LWLockAcquire(leader_lwlock, LW_EXCLUSIVE);
+ MyProc->lockGroupLeader = MyProc;
+ MyProc->lockGroupLeaderIdentifier = MyProcPid;
+ dlist_push_head(&MyProc->lockGroupMembers, &MyProc->lockGroupLink);
+ LWLockRelease(leader_lwlock);
+}
+
+/*
+ * BecomeLockGroupMember - designate process as lock group member
+ *
+ * This is pretty straightforward except for the possibility that the leader
+ * whose group we're trying to join might exit before we manage to do so;
+ * and the PGPROC might get recycled for an unrelated process. To avoid
+ * that, we require the caller to pass the PID of the intended PGPROC as
+ * an interlock. Returns true if we successfully join the intended lock
+ * group, and false if not.
+ */
+bool
+BecomeLockGroupMember(PGPROC *leader, int pid)
+{
+ LWLock *leader_lwlock;
+ bool ok = false;
+
+ /* Group leader can't become member of group */
+ Assert(MyProc != leader);
+
+ /* PID must be valid. */
+ Assert(pid != 0);
+
+ /* Try to join the group. */
+ leader_lwlock = LockHashPartitionLockByProc(MyProc);
+ LWLockAcquire(leader_lwlock, LW_EXCLUSIVE);
+ if (leader->lockGroupLeaderIdentifier == pid)
+ {
+ ok = true;
+ MyProc->lockGroupLeader = leader;
+ dlist_push_tail(&leader->lockGroupMembers, &MyProc->lockGroupLink);
+ }
+ LWLockRelease(leader_lwlock);
+
+ return ok;
+}
PROCLOCKTAG tag; /* unique identifier of proclock object */
/* data */
+ PGPROC *groupLeader; /* group leader, or NULL if no lock group */
LOCKMASK holdMask; /* bitmask for lock types currently held */
LOCKMASK releaseMask; /* bitmask for lock types to be released */
SHM_QUEUE lockLink; /* list link in LOCK's list of proclocks */
* worker */
} DeadLockState;
-
/*
* The lockmgr's shared hash tables are partitioned to reduce contention.
* To determine which partition a given locktag belongs to, compute the tag's
#define LockHashPartitionLockByIndex(i) \
(&MainLWLockArray[LOCK_MANAGER_LWLOCK_OFFSET + (i)].lock)
+/*
+ * The deadlock detector needs to be able to access lockGroupLeader and
+ * related fields in the PGPROC, so we arrange for those fields to be protected
+ * by one of the lock hash partition locks. Since the deadlock detector
+ * acquires all such locks anyway, this makes it safe for it to access these
+ * fields without doing anything extra. To avoid contention as much as
+ * possible, we map different PGPROCs to different partition locks.
+ */
+#define LockHashPartitionLockByProc(p) \
+ LockHashPartitionLock((p)->pgprocno)
+
/*
* function prototypes
*/
bool fpVXIDLock; /* are we holding a fast-path VXID lock? */
LocalTransactionId fpLocalTransactionId; /* lxid for fast-path VXID
* lock */
+
+ /*
+ * Support for lock groups. Use LockHashPartitionLockByProc to get the
+ * LWLock protecting these fields.
+ */
+ int lockGroupLeaderIdentifier; /* MyProcPid, if I'm a leader */
+ PGPROC *lockGroupLeader; /* lock group leader, if I'm a follower */
+ dlist_head lockGroupMembers; /* list of members, if I'm a leader */
+ dlist_node lockGroupLink; /* my member link, if I'm a member */
};
/* NOTE: "typedef struct PGPROC PGPROC" appears in storage/lock.h. */
extern void ProcWaitForSignal(void);
extern void ProcSendSignal(int pid);
+extern void BecomeLockGroupLeader(void);
+extern bool BecomeLockGroupMember(PGPROC *leader, int pid);
+
#endif /* PROC_H */