Fix performance problems with autovacuum truncation in busy workloads.

author Kevin Grittner <kgrittn@postgresql.org>

Wed, 23 Jan 2013 19:39:28 +0000 (13:39 -0600)

committer Kevin Grittner <kgrittn@postgresql.org>

Wed, 23 Jan 2013 19:39:28 +0000 (13:39 -0600)
author Kevin Grittner <kgrittn@postgresql.org>
Wed, 23 Jan 2013 19:39:28 +0000 (13:39 -0600)
committer Kevin Grittner <kgrittn@postgresql.org>
Wed, 23 Jan 2013 19:39:28 +0000 (13:39 -0600)
diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c

index 219a613ac3eb93e9eca3767ec7dad1dd3ad003df..1c4f7eb16c4190275c0ee5799e13511df5b6607e 100644 (file)
--- a/src/backend/commands/vacuumlazy.c
+++ b/src/backend/commands/vacuumlazy.c
@@ -46,6 +46,7 @@
  #include "commands/vacuum.h"
  #include "miscadmin.h"
  #include "pgstat.h"
+#include "portability/instr_time.h"
  #include "postmaster/autovacuum.h"
  #include "storage/bufmgr.h"
  #include "storage/freespace.h"
@@ -67,6 +68,17 @@
  #define REL_TRUNCATE_MINIMUM   1000
  #define REL_TRUNCATE_FRACTION  16
  
+/*
+ * Timing parameters for truncate locking heuristics.
+ *
+ * These were not exposed as user tunable GUC values because it didn't seem
+ * that the potential for improvement was great enough to merit the cost of
+ * supporting them.
+ */
+#define AUTOVACUUM_TRUNCATE_LOCK_CHECK_INTERVAL                20      /* ms */
+#define AUTOVACUUM_TRUNCATE_LOCK_WAIT_INTERVAL         50      /* ms */
+#define AUTOVACUUM_TRUNCATE_LOCK_TIMEOUT                       5000            /* ms */
+
  /*
   * Guesstimation of number of dead tuples per page.  This is used to
   * provide an upper limit to memory allocated when vacuuming small
@@ -101,6 +113,7 @@ typedef struct LVRelStats
         ItemPointer dead_tuples;        /* array of ItemPointerData */
         int                     num_index_scans;
         TransactionId latestRemovedXid;
+       bool            lock_waiter_detected;
  } LVRelStats;
  
  
@@ -191,6 +204,8 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
         vacrelstats->old_rel_pages = onerel->rd_rel->relpages;
         vacrelstats->old_rel_tuples = onerel->rd_rel->reltuples;
         vacrelstats->num_index_scans = 0;
+       vacrelstats->pages_removed = 0;
+       vacrelstats->lock_waiter_detected = false;
  
         /* Open all indexes of the relation */
         vac_open_indexes(onerel, RowExclusiveLock, &nindexes, &Irel);
@@ -257,10 +272,17 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
                                                 vacrelstats->hasindex,
                                                 new_frozen_xid);
  
-       /* report results to the stats collector, too */
-       pgstat_report_vacuum(RelationGetRelid(onerel),
-                                                onerel->rd_rel->relisshared,
-                                                new_rel_tuples);
+       /*
+        * Report results to the stats collector, too. An early terminated
+        * lazy_truncate_heap attempt suppresses the message and also cancels the
+        * execution of ANALYZE, if that was ordered.
+        */
+       if (!vacrelstats->lock_waiter_detected)
+               pgstat_report_vacuum(RelationGetRelid(onerel),
+                                                        onerel->rd_rel->relisshared,
+                                                        new_rel_tuples);
+       else
+               vacstmt->options &= ~VACOPT_ANALYZE;
  
         /* and log the action if appropriate */
         if (IsAutoVacuumWorkerProcess() && Log_autovacuum_min_duration >= 0)
@@ -1255,80 +1277,124 @@ lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats)
         BlockNumber old_rel_pages = vacrelstats->rel_pages;
         BlockNumber new_rel_pages;
         PGRUsage        ru0;
+       int                     lock_retry;
  
         pg_rusage_init(&ru0);
  
         /*
-        * We need full exclusive lock on the relation in order to do truncation.
-        * If we can't get it, give up rather than waiting --- we don't want to
-        * block other backends, and we don't want to deadlock (which is quite
-        * possible considering we already hold a lower-grade lock).
-        */
-       if (!ConditionalLockRelation(onerel, AccessExclusiveLock))
-               return;
-
-       /*
-        * Now that we have exclusive lock, look to see if the rel has grown
-        * whilst we were vacuuming with non-exclusive lock.  If so, give up; the
-        * newly added pages presumably contain non-deletable tuples.
+        * Loop until no more truncating can be done.
          */
-       new_rel_pages = RelationGetNumberOfBlocks(onerel);
-       if (new_rel_pages != old_rel_pages)
+       do
         {
                 /*
-                * Note: we intentionally don't update vacrelstats->rel_pages with the
-                * new rel size here.  If we did, it would amount to assuming that the
-                * new pages are empty, which is unlikely.      Leaving the numbers alone
-                * amounts to assuming that the new pages have the same tuple density
-                * as existing ones, which is less unlikely.
+                * We need full exclusive lock on the relation in order to do
+                * truncation. If we can't get it, give up rather than waiting --- we
+                * don't want to block other backends, and we don't want to deadlock
+                * (which is quite possible considering we already hold a lower-grade
+                * lock).
                  */
-               UnlockRelation(onerel, AccessExclusiveLock);
-               return;
-       }
+               vacrelstats->lock_waiter_detected = false;
+               lock_retry = 0;
+               while (true)
+               {
+                       if (ConditionalLockRelation(onerel, AccessExclusiveLock))
+                               break;
  
-       /*
-        * Scan backwards from the end to verify that the end pages actually
-        * contain no tuples.  This is *necessary*, not optional, because other
-        * backends could have added tuples to these pages whilst we were
-        * vacuuming.
-        */
-       new_rel_pages = count_nondeletable_pages(onerel, vacrelstats);
+                       /*
+                        * Check for interrupts while trying to (re-)acquire the exclusive
+                        * lock.
+                        */
+                       CHECK_FOR_INTERRUPTS();
  
-       if (new_rel_pages >= old_rel_pages)
-       {
-               /* can't do anything after all */
-               UnlockRelation(onerel, AccessExclusiveLock);
-               return;
-       }
+                       if (++lock_retry > (AUTOVACUUM_TRUNCATE_LOCK_TIMEOUT /
+                                                               AUTOVACUUM_TRUNCATE_LOCK_WAIT_INTERVAL))
+                       {
+                               /*
+                                * We failed to establish the lock in the specified number of
+                                * retries. This means we give up truncating. Suppress the
+                                * ANALYZE step. Doing an ANALYZE at this point will reset the
+                                * dead_tuple_count in the stats collector, so we will not get
+                                * called by the autovacuum launcher again to do the truncate.
+                                */
+                               vacrelstats->lock_waiter_detected = true;
+                               ereport(LOG,
+                                               (errmsg("automatic vacuum of table \"%s.%s.%s\": "
+                                                               "cannot (re)acquire exclusive "
+                                                               "lock for truncate scan",
+                                                               get_database_name(MyDatabaseId),
+                                                       get_namespace_name(RelationGetNamespace(onerel)),
+                                                               RelationGetRelationName(onerel))));
+                               return;
+                       }
  
-       /*
-        * Okay to truncate.
-        */
-       RelationTruncate(onerel, new_rel_pages);
+                       pg_usleep(AUTOVACUUM_TRUNCATE_LOCK_WAIT_INTERVAL);
+               }
  
-       /*
-        * We can release the exclusive lock as soon as we have truncated.      Other
-        * backends can't safely access the relation until they have processed the
-        * smgr invalidation that smgrtruncate sent out ... but that should happen
-        * as part of standard invalidation processing once they acquire lock on
-        * the relation.
-        */
-       UnlockRelation(onerel, AccessExclusiveLock);
+               /*
+                * Now that we have exclusive lock, look to see if the rel has grown
+                * whilst we were vacuuming with non-exclusive lock.  If so, give up;
+                * the newly added pages presumably contain non-deletable tuples.
+                */
+               new_rel_pages = RelationGetNumberOfBlocks(onerel);
+               if (new_rel_pages != old_rel_pages)
+               {
+                       /*
+                        * Note: we intentionally don't update vacrelstats->rel_pages with
+                        * the new rel size here.  If we did, it would amount to assuming
+                        * that the new pages are empty, which is unlikely. Leaving the
+                        * numbers alone amounts to assuming that the new pages have the
+                        * same tuple density as existing ones, which is less unlikely.
+                        */
+                       UnlockRelation(onerel, AccessExclusiveLock);
+                       return;
+               }
  
-       /*
-        * Update statistics.  Here, it *is* correct to adjust rel_pages without
-        * also touching reltuples, since the tuple count wasn't changed by the
-        * truncation.
-        */
-       vacrelstats->rel_pages = new_rel_pages;
-       vacrelstats->pages_removed = old_rel_pages - new_rel_pages;
+               /*
+                * Scan backwards from the end to verify that the end pages actually
+                * contain no tuples.  This is *necessary*, not optional, because
+                * other backends could have added tuples to these pages whilst we
+                * were vacuuming.
+                */
+               new_rel_pages = count_nondeletable_pages(onerel, vacrelstats);
  
-       ereport(elevel,
-                       (errmsg("\"%s\": truncated %u to %u pages",
-                                       RelationGetRelationName(onerel),
-                                       old_rel_pages, new_rel_pages),
-                        errdetail("%s.",
-                                          pg_rusage_show(&ru0))));
+               if (new_rel_pages >= old_rel_pages)
+               {
+                       /* can't do anything after all */
+                       UnlockRelation(onerel, AccessExclusiveLock);
+                       return;
+               }
+
+               /*
+                * Okay to truncate.
+                */
+               RelationTruncate(onerel, new_rel_pages);
+
+               /*
+                * We can release the exclusive lock as soon as we have truncated.
+                * Other backends can't safely access the relation until they have
+                * processed the smgr invalidation that smgrtruncate sent out ... but
+                * that should happen as part of standard invalidation processing once
+                * they acquire lock on the relation.
+                */
+               UnlockRelation(onerel, AccessExclusiveLock);
+
+               /*
+                * Update statistics.  Here, it *is* correct to adjust rel_pages
+                * without also touching reltuples, since the tuple count wasn't
+                * changed by the truncation.
+                */
+               vacrelstats->pages_removed += old_rel_pages - new_rel_pages;
+               vacrelstats->rel_pages = new_rel_pages;
+
+               ereport(elevel,
+                               (errmsg("\"%s\": truncated %u to %u pages",
+                                               RelationGetRelationName(onerel),
+                                               old_rel_pages, new_rel_pages),
+                                errdetail("%s.",
+                                                  pg_rusage_show(&ru0))));
+               old_rel_pages = new_rel_pages;
+       } while (new_rel_pages > vacrelstats->nonempty_pages &&
+                        vacrelstats->lock_waiter_detected);
  }
  
  /*
@@ -1340,6 +1406,12 @@ static BlockNumber
  count_nondeletable_pages(Relation onerel, LVRelStats *vacrelstats)
  {
         BlockNumber blkno;
+       instr_time      starttime;
+       instr_time      currenttime;
+       instr_time      elapsed;
+
+       /* Initialize the starttime if we check for conflicting lock requests */
+       INSTR_TIME_SET_CURRENT(starttime);
  
         /* Strange coding of loop control is needed because blkno is unsigned */
         blkno = vacrelstats->rel_pages;
@@ -1351,6 +1423,36 @@ count_nondeletable_pages(Relation onerel, LVRelStats *vacrelstats)
                                         maxoff;
                 bool            hastup;
  
+               /*
+                * Check if another process requests a lock on our relation. We are
+                * holding an AccessExclusiveLock here, so they will be waiting. We
+                * only do this in autovacuum_truncate_lock_check millisecond
+                * intervals, and we only check if that interval has elapsed once
+                * every 32 blocks to keep the number of system calls and actual
+                * shared lock table lookups to a minimum.
+                */
+               if ((blkno % 32) == 0)
+               {
+                       INSTR_TIME_SET_CURRENT(currenttime);
+                       elapsed = currenttime;
+                       INSTR_TIME_SUBTRACT(elapsed, starttime);
+                       if ((INSTR_TIME_GET_MICROSEC(elapsed) / 1000)
+                               >= AUTOVACUUM_TRUNCATE_LOCK_CHECK_INTERVAL)
+                       {
+                               if (LockHasWaitersRelation(onerel, AccessExclusiveLock))
+                               {
+                                       ereport(elevel,
+                                                       (errmsg("\"%s\": suspending truncate "
+                                                                       "due to conflicting lock request",
+                                                                       RelationGetRelationName(onerel))));
+
+                                       vacrelstats->lock_waiter_detected = true;
+                                       return blkno;
+                               }
+                               starttime = currenttime;
+                       }
+               }
+
                 /*
                  * We don't insert a vacuum delay point here, because we have an
                  * exclusive lock on the table which we want to hold for as short a
diff --git a/src/backend/storage/lmgr/lmgr.c b/src/backend/storage/lmgr/lmgr.c

index a7786d0231f9117fa273345a4c34877860dc5a63..e1fa74f9601a2c9958300ed08dad04fc47f71bf0 100644 (file)
--- a/src/backend/storage/lmgr/lmgr.c
+++ b/src/backend/storage/lmgr/lmgr.c
@@ -232,6 +232,24 @@ UnlockRelation(Relation relation, LOCKMODE lockmode)
         LockRelease(&tag, lockmode, false);
  }
  
+/*
+ *             LockHasWaitersRelation
+ *
+ * This is a functiion to check if someone else is waiting on a
+ * lock, we are currently holding.
+ */
+bool
+LockHasWaitersRelation(Relation relation, LOCKMODE lockmode)
+{
+       LOCKTAG         tag;
+
+       SET_LOCKTAG_RELATION(tag,
+                                                relation->rd_lockInfo.lockRelId.dbId,
+                                                relation->rd_lockInfo.lockRelId.relId);
+
+       return LockHasWaiters(&tag, lockmode, false);
+}
+
  /*
   *             LockRelationIdForSession
   *
diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c

index 0997825bc4ec64c15649acdcbe26cc6de4f4a642..fed21a9fa26afadee3da66fe22bf8efd8fd48e64 100644 (file)
--- a/src/backend/storage/lmgr/lock.c
+++ b/src/backend/storage/lmgr/lock.c
@@ -536,6 +536,98 @@ ProcLockHashCode(const PROCLOCKTAG *proclocktag, uint32 hashcode)
         return lockhash;
  }
  
+/*
+ * LockHasWaiters -- look up 'locktag' and check if releasing this
+ *             lock would wake up other processes waiting for it.
+ */
+bool
+LockHasWaiters(const LOCKTAG *locktag, LOCKMODE lockmode, bool sessionLock)
+{
+       LOCKMETHODID lockmethodid = locktag->locktag_lockmethodid;
+       LockMethod      lockMethodTable;
+       LOCALLOCKTAG localtag;
+       LOCALLOCK  *locallock;
+       LOCK       *lock;
+       PROCLOCK   *proclock;
+       LWLockId        partitionLock;
+       bool            hasWaiters = false;
+
+       if (lockmethodid <= 0 || lockmethodid >= lengthof(LockMethods))
+               elog(ERROR, "unrecognized lock method: %d", lockmethodid);
+       lockMethodTable = LockMethods[lockmethodid];
+       if (lockmode <= 0 || lockmode > lockMethodTable->numLockModes)
+               elog(ERROR, "unrecognized lock mode: %d", lockmode);
+
+#ifdef LOCK_DEBUG
+       if (LOCK_DEBUG_ENABLED(locktag))
+               elog(LOG, "LockHasWaiters: lock [%u,%u] %s",
+                        locktag->locktag_field1, locktag->locktag_field2,
+                        lockMethodTable->lockModeNames[lockmode]);
+#endif
+
+       /*
+        * Find the LOCALLOCK entry for this lock and lockmode
+        */
+       MemSet(&localtag, 0, sizeof(localtag));         /* must clear padding */
+       localtag.lock = *locktag;
+       localtag.mode = lockmode;
+
+       locallock = (LOCALLOCK *) hash_search(LockMethodLocalHash,
+                                                                                 (void *) &localtag,
+                                                                                 HASH_FIND, NULL);
+
+       /*
+        * let the caller print its own error message, too. Do not ereport(ERROR).
+        */
+       if (!locallock || locallock->nLocks <= 0)
+       {
+               elog(WARNING, "you don't own a lock of type %s",
+                        lockMethodTable->lockModeNames[lockmode]);
+               return false;
+       }
+
+       /*
+        * Check the shared lock table.
+        */
+       partitionLock = LockHashPartitionLock(locallock->hashcode);
+
+       LWLockAcquire(partitionLock, LW_SHARED);
+
+       /*
+        * We don't need to re-find the lock or proclock, since we kept their
+        * addresses in the locallock table, and they couldn't have been removed
+        * while we were holding a lock on them.
+        */
+       lock = locallock->lock;
+       LOCK_PRINT("LockHasWaiters: found", lock, lockmode);
+       proclock = locallock->proclock;
+       PROCLOCK_PRINT("LockHasWaiters: found", proclock);
+
+       /*
+        * Double-check that we are actually holding a lock of the type we want to
+        * release.
+        */
+       if (!(proclock->holdMask & LOCKBIT_ON(lockmode)))
+       {
+               PROCLOCK_PRINT("LockHasWaiters: WRONGTYPE", proclock);
+               LWLockRelease(partitionLock);
+               elog(WARNING, "you don't own a lock of type %s",
+                        lockMethodTable->lockModeNames[lockmode]);
+               RemoveLocalLock(locallock);
+               return false;
+       }
+
+       /*
+        * Do the checking.
+        */
+       if ((lockMethodTable->conflictTab[lockmode] & lock->waitMask) != 0)
+               hasWaiters = true;
+
+       LWLockRelease(partitionLock);
+
+       return hasWaiters;
+}
+
  
  /*
   * LockAcquire -- Check for lock conflicts, sleep if conflict found,
diff --git a/src/include/storage/lmgr.h b/src/include/storage/lmgr.h

index de340c4973e435610e690f23890503f2652a3dbd..aa79edaaa48b6056bac9ef0254569284a9e5708b 100644 (file)
--- a/src/include/storage/lmgr.h
+++ b/src/include/storage/lmgr.h
@@ -31,6 +31,7 @@ extern void UnlockRelationOid(Oid relid, LOCKMODE lockmode);
  extern void LockRelation(Relation relation, LOCKMODE lockmode);
  extern bool ConditionalLockRelation(Relation relation, LOCKMODE lockmode);
  extern void UnlockRelation(Relation relation, LOCKMODE lockmode);
+extern bool LockHasWaitersRelation(Relation relation, LOCKMODE lockmode);
  
  extern void LockRelationIdForSession(LockRelId *relid, LOCKMODE lockmode);
  extern void UnlockRelationIdForSession(LockRelId *relid, LOCKMODE lockmode);
diff --git a/src/include/storage/lock.h b/src/include/storage/lock.h

index 2d487fffe3acc16f6873f12d12e06090381b5fd8..7dea1b2a32ccb49216e18b8764e90e49c6acb3ef 100644 (file)
--- a/src/include/storage/lock.h
+++ b/src/include/storage/lock.h
@@ -494,6 +494,8 @@ extern void LockReleaseAll(LOCKMETHODID lockmethodid, bool allLocks);
  extern void LockReleaseSession(LOCKMETHODID lockmethodid);
  extern void LockReleaseCurrentOwner(void);
  extern void LockReassignCurrentOwner(void);
+extern bool LockHasWaiters(const LOCKTAG *locktag,
+                          LOCKMODE lockmode, bool sessionLock);
  extern VirtualTransactionId *GetLockConflicts(const LOCKTAG *locktag,
                                  LOCKMODE lockmode);
  extern void AtPrepare_Locks(void);
author	Kevin Grittner <kgrittn@postgresql.org>
	Wed, 23 Jan 2013 19:39:28 +0000 (13:39 -0600)
committer	Kevin Grittner <kgrittn@postgresql.org>
	Wed, 23 Jan 2013 19:39:28 +0000 (13:39 -0600)
src/backend/commands/vacuumlazy.c		patch \| blob \| history
src/backend/storage/lmgr/lmgr.c		patch \| blob \| history
src/backend/storage/lmgr/lock.c		patch \| blob \| history
src/include/storage/lmgr.h		patch \| blob \| history
src/include/storage/lock.h		patch \| blob \| history