Fix stale-pointer problem in fast-path locking logic.

author Tom Lane <tgl@sss.pgh.pa.us>

Wed, 27 Nov 2013 23:10:06 +0000 (18:10 -0500)

committer Tom Lane <tgl@sss.pgh.pa.us>

Wed, 27 Nov 2013 23:10:06 +0000 (18:10 -0500)
author Tom Lane <tgl@sss.pgh.pa.us>
Wed, 27 Nov 2013 23:10:06 +0000 (18:10 -0500)
committer Tom Lane <tgl@sss.pgh.pa.us>
Wed, 27 Nov 2013 23:10:06 +0000 (18:10 -0500)
diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c

index 0d9a3b660516206720e58bbf6bd215ff2a586337..7e518f4dde5e9318aecba4ac71a32c9c9d56a3b8 100644 (file)
--- a/src/backend/storage/lmgr/lock.c
+++ b/src/backend/storage/lmgr/lock.c
@@ -773,14 +773,16 @@ LockAcquireExtended(const LOCKTAG *locktag,
         }
  
         /*
-        * Emit a WAL record if acquisition of this lock needs to be replayed in a
-        * standby server. Only AccessExclusiveLocks can conflict with lock types
-        * that read-only transactions can acquire in a standby server.
+        * Prepare to emit a WAL record if acquisition of this lock needs to be
+        * replayed in a standby server.
          *
-        * Make sure this definition matches the one in
-        * GetRunningTransactionLocks().
+        * Here we prepare to log; after lock is acquired we'll issue log record.
+        * This arrangement simplifies error recovery in case the preparation step
+        * fails.
          *
-        * First we prepare to log, then after lock acquired we issue log record.
+        * Only AccessExclusiveLocks can conflict with lock types that read-only
+        * transactions can acquire in a standby server. Make sure this definition
+        * matches the one in GetRunningTransactionLocks().
          */
         if (lockmode >= AccessExclusiveLock &&
                 locktag->locktag_type == LOCKTAG_RELATION &&
@@ -801,8 +803,8 @@ LockAcquireExtended(const LOCKTAG *locktag,
          * lock type on a relation we have already locked using the fast-path, but
          * for now we don't worry about that case either.
          */
-       if (EligibleForRelationFastPath(locktag, lockmode)
-               && FastPathLocalUseCount < FP_LOCK_SLOTS_PER_BACKEND)
+       if (EligibleForRelationFastPath(locktag, lockmode) &&
+               FastPathLocalUseCount < FP_LOCK_SLOTS_PER_BACKEND)
         {
                 uint32          fasthashcode = FastPathStrongLockHashPartition(hashcode);
                 bool            acquired;
@@ -822,6 +824,13 @@ LockAcquireExtended(const LOCKTAG *locktag,
                 LWLockRelease(MyProc->backendLock);
                 if (acquired)
                 {
+                       /*
+                        * The locallock might contain stale pointers to some old shared
+                        * objects; we MUST reset these to null before considering the
+                        * lock to be acquired via fast-path.
+                        */
+                       locallock->lock = NULL;
+                       locallock->proclock = NULL;
                         GrantLockLocal(locallock, owner);
                         return LOCKACQUIRE_OK;
                 }
@@ -862,7 +871,13 @@ LockAcquireExtended(const LOCKTAG *locktag,
         LWLockAcquire(partitionLock, LW_EXCLUSIVE);
  
         /*
-        * Find or create a proclock entry with this tag
+        * Find or create lock and proclock entries with this tag
+        *
+        * Note: if the locallock object already existed, it might have a pointer
+        * to the lock already ... but we should not assume that that pointer is
+        * valid, since a lock object with zero hold and request counts can go
+        * away anytime.  So we have to use SetupLockInTable() to recompute the
+        * lock and proclock pointers, even if they're already set.
          */
         proclock = SetupLockInTable(lockMethodTable, MyProc, locktag,
                                                                 hashcode, lockmode);
@@ -995,7 +1010,7 @@ LockAcquireExtended(const LOCKTAG *locktag,
         LWLockRelease(partitionLock);
  
         /*
-        * Emit a WAL record if acquisition of this lock need to be replayed in a
+        * Emit a WAL record if acquisition of this lock needs to be replayed in a
          * standby server.
          */
         if (log_lock)
@@ -1034,11 +1049,6 @@ SetupLockInTable(LockMethod lockMethodTable, PGPROC *proc,
  
         /*
          * Find or create a lock with this tag.
-        *
-        * Note: if the locallock object already existed, it might have a pointer
-        * to the lock already ... but we probably should not assume that that
-        * pointer is valid, since a lock object with no locks can go away
-        * anytime.
          */
         lock = (LOCK *) hash_search_with_hash_value(LockMethodLockHash,
                                                                                                 (const void *) locktag,
@@ -1794,8 +1804,8 @@ LockRelease(const LOCKTAG *locktag, LOCKMODE lockmode, bool sessionLock)
                 return TRUE;
  
         /* Attempt fast release of any lock eligible for the fast path. */
-       if (EligibleForRelationFastPath(locktag, lockmode)
-               && FastPathLocalUseCount > 0)
+       if (EligibleForRelationFastPath(locktag, lockmode) &&
+               FastPathLocalUseCount > 0)
         {
                 bool            released;
  
@@ -1825,30 +1835,33 @@ LockRelease(const LOCKTAG *locktag, LOCKMODE lockmode, bool sessionLock)
          * Normally, we don't need to re-find the lock or proclock, since we kept
          * their addresses in the locallock table, and they couldn't have been
          * removed while we were holding a lock on them.  But it's possible that
-        * the locks have been moved to the main hash table by another backend, in
-        * which case we might need to go look them up after all.
+        * the lock was taken fast-path and has since been moved to the main hash
+        * table by another backend, in which case we will need to look up the
+        * objects here.  We assume the lock field is NULL if so.
          */
         lock = locallock->lock;
         if (!lock)
         {
                 PROCLOCKTAG proclocktag;
-               bool            found;
  
                 Assert(EligibleForRelationFastPath(locktag, lockmode));
                 lock = (LOCK *) hash_search_with_hash_value(LockMethodLockHash,
                                                                                                         (const void *) locktag,
                                                                                                         locallock->hashcode,
                                                                                                         HASH_FIND,
-                                                                                                       &found);
-               Assert(found && lock != NULL);
+                                                                                                       NULL);
+               if (!lock)
+                       elog(ERROR, "failed to re-find shared lock object");
                 locallock->lock = lock;
  
                 proclocktag.myLock = lock;
                 proclocktag.myProc = MyProc;
                 locallock->proclock = (PROCLOCK *) hash_search(LockMethodProcLockHash,
                                                                                                            (void *) &proclocktag,
-                                                                                                          HASH_FIND, &found);
-               Assert(found);
+                                                                                                          HASH_FIND,
+                                                                                                          NULL);
+               if (!locallock->proclock)
+                       elog(ERROR, "failed to re-find shared proclock object");
         }
         LOCK_PRINT("LockRelease: found", lock, lockmode);
         proclock = locallock->proclock;
@@ -1929,7 +1942,8 @@ LockReleaseAll(LOCKMETHODID lockmethodid, bool allLocks)
          * entries, then we scan the process's proclocks and get rid of those. We
          * do this separately because we may have multiple locallock entries
          * pointing to the same proclock, and we daren't end up with any dangling
-        * pointers.
+        * pointers.  Fast-path locks are cleaned up during the locallock table
+        * scan, though.
          */
         hash_seq_init(&status, LockMethodLocalHash);
  
@@ -1983,7 +1997,7 @@ LockReleaseAll(LOCKMETHODID lockmethodid, bool allLocks)
  
                 /*
                  * If the lock or proclock pointers are NULL, this lock was taken via
-                * the relation fast-path.
+                * the relation fast-path (and is not known to have been transferred).
                  */
                 if (locallock->proclock == NULL || locallock->lock == NULL)
                 {
@@ -1997,7 +2011,10 @@ LockReleaseAll(LOCKMETHODID lockmethodid, bool allLocks)
                         /*
                          * If we don't currently hold the LWLock that protects our
                          * fast-path data structures, we must acquire it before attempting
-                        * to release the lock via the fast-path.
+                        * to release the lock via the fast-path.  We will continue to
+                        * hold the LWLock until we're done scanning the locallock table,
+                        * unless we hit a transferred fast-path lock.  (XXX is this
+                        * really such a good idea?  There could be a lot of entries ...)
                          */
                         if (!have_fast_path_lwlock)
                         {
@@ -2042,6 +2059,7 @@ LockReleaseAll(LOCKMETHODID lockmethodid, bool allLocks)
                 RemoveLocalLock(locallock);
         }
  
+       /* Done with the fast-path data structures */
         if (have_fast_path_lwlock)
                 LWLockRelease(MyProc->backendLock);
  
@@ -2352,6 +2370,7 @@ FastPathUnGrantRelationLock(Oid relid, LOCKMODE lockmode)
                         Assert(!result);
                         FAST_PATH_CLEAR_LOCKMODE(MyProc, f, lockmode);
                         result = true;
+                       /* we continue iterating so as to update FastPathLocalUseCount */
                 }
                 if (FAST_PATH_GET_BITS(MyProc, f) != 0)
                         ++FastPathLocalUseCount;
@@ -2437,6 +2456,9 @@ FastPathTransferRelationLocks(LockMethod lockMethodTable, const LOCKTAG *locktag
                                 FAST_PATH_CLEAR_LOCKMODE(proc, f, lockmode);
                         }
                         LWLockRelease(partitionLock);
+
+                       /* No need to examine remaining slots. */
+                       break;
                 }
                 LWLockRelease(proc->backendLock);
         }
@@ -2447,6 +2469,8 @@ FastPathTransferRelationLocks(LockMethod lockMethodTable, const LOCKTAG *locktag
   * FastPathGetLockEntry
   *             Return the PROCLOCK for a lock originally taken via the fast-path,
   *             transferring it to the primary lock table if necessary.
+ *
+ * Note: caller takes care of updating the locallock object.
   */
  static PROCLOCK *
  FastPathGetRelationLockEntry(LOCALLOCK *locallock)
@@ -2490,6 +2514,9 @@ FastPathGetRelationLockEntry(LOCALLOCK *locallock)
                 FAST_PATH_CLEAR_LOCKMODE(MyProc, f, lockmode);
  
                 LWLockRelease(partitionLock);
+
+               /* No need to examine remaining slots. */
+               break;
         }
  
         LWLockRelease(MyProc->backendLock);
@@ -2662,6 +2689,8 @@ GetLockConflicts(const LOCKTAG *locktag, LOCKMODE lockmode)
                                  */
                                 if (VirtualTransactionIdIsValid(vxid))
                                         vxids[count++] = vxid;
+
+                               /* No need to examine remaining slots. */
                                 break;
                         }
  
diff --git a/src/include/storage/lock.h b/src/include/storage/lock.h

index 7dea1b2a32ccb49216e18b8764e90e49c6acb3ef..c0fb08a989f6fe115ac24506c223bc65db4acdbe 100644 (file)
--- a/src/include/storage/lock.h
+++ b/src/include/storage/lock.h
@@ -379,6 +379,20 @@ typedef struct PROCLOCK
   * shared memory.  We also track the number of lock acquisitions per
   * ResourceOwner, so that we can release just those locks belonging to a
   * particular ResourceOwner.
+ *
+ * When holding a lock taken "normally", the lock and proclock fields always
+ * point to the associated objects in shared memory.  However, if we acquired
+ * the lock via the fast-path mechanism, the lock and proclock fields are set
+ * to NULL, since there probably aren't any such objects in shared memory.
+ * (If the lock later gets promoted to normal representation, we may eventually
+ * update our locallock's lock/proclock fields after finding the shared
+ * objects.)
+ *
+ * Caution: a locallock object can be left over from a failed lock acquisition
+ * attempt.  In this case its lock/proclock fields are untrustworthy, since
+ * the shared lock object is neither held nor awaited, and hence is available
+ * to be reclaimed.  If nLocks > 0 then these pointers must either be valid or
+ * NULL, but when nLocks == 0 they should be considered garbage.
   */
  typedef struct LOCALLOCKTAG
  {
@@ -404,13 +418,13 @@ typedef struct LOCALLOCK
         LOCALLOCKTAG tag;                       /* unique identifier of locallock entry */
  
         /* data */
-       LOCK       *lock;                       /* associated LOCK object in shared mem */
-       PROCLOCK   *proclock;           /* associated PROCLOCK object in shmem */
+       LOCK       *lock;                       /* associated LOCK object, if any */
+       PROCLOCK   *proclock;           /* associated PROCLOCK object, if any */
         uint32          hashcode;               /* copy of LOCKTAG's hash value */
         int64           nLocks;                 /* total number of times lock is held */
         int                     numLockOwners;  /* # of relevant ResourceOwners */
         int                     maxLockOwners;  /* allocated size of array */
-       bool            holdsStrongLockCount;   /* bumped FastPathStrongRelatonLocks? */
+       bool            holdsStrongLockCount;   /* bumped FastPathStrongRelationLocks */
         LOCALLOCKOWNER *lockOwners; /* dynamically resizable array */
  } LOCALLOCK;
author	Tom Lane <tgl@sss.pgh.pa.us>
	Wed, 27 Nov 2013 23:10:06 +0000 (18:10 -0500)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Wed, 27 Nov 2013 23:10:06 +0000 (18:10 -0500)
src/backend/storage/lmgr/lock.c		patch \| blob \| history
src/include/storage/lock.h		patch \| blob \| history