]> granicus.if.org Git - postgresql/commitdiff
Improve multixact emergency autovacuum logic.
authorAndres Freund <andres@anarazel.de>
Sun, 21 Jun 2015 16:57:28 +0000 (18:57 +0200)
committerAndres Freund <andres@anarazel.de>
Sun, 21 Jun 2015 16:57:28 +0000 (18:57 +0200)
Previously autovacuum was not necessarily triggered if space in the
members slru got tight. The first problem was that the signalling was
tied to values in the offsets slru, but members can advance much
faster. Thats especially a problem if old sessions had been around that
previously prevented the multixact horizon to increase. Secondly the
skipping logic doesn't work if the database was restarted after
autovacuum was triggered - that knowledge is not preserved across
restart. This is especially a problem because it's a common
panic-reaction to restart the database if it gets slow to
anti-wraparound vacuums.

Fix the first problem by separating the logic for members from
offsets. Trigger autovacuum whenever a multixact crosses a segment
boundary, as the current member offset increases in irregular values, so
we can't use a simple modulo logic as for offsets.  Add a stopgap for
the second problem, by signalling autovacuum whenver ERRORing out
because of boundaries.

Discussion: 20150608163707.GD20772@alap3.anarazel.de

Backpatch into 9.3, where it became more likely that multixacts wrap
around.

src/backend/access/transam/multixact.c

index 4daa5ae9b02ecde10d86ef50aa552bccda6b217f..377d0842bdd9d0f77588ba58b4fdeb4bc00f6922 100644 (file)
@@ -977,10 +977,7 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset)
         * Note these are pretty much the same protections in GetNewTransactionId.
         *----------
         */
-       if (!MultiXactIdPrecedes(result, MultiXactState->multiVacLimit) ||
-               !MultiXactState->oldestOffsetKnown ||
-               (MultiXactState->nextOffset - MultiXactState->oldestOffset
-                > MULTIXACT_MEMBER_SAFE_THRESHOLD))
+       if (!MultiXactIdPrecedes(result, MultiXactState->multiVacLimit))
        {
                /*
                 * For safety's sake, we release MultiXactGenLock while sending
@@ -996,19 +993,17 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset)
 
                LWLockRelease(MultiXactGenLock);
 
-               /*
-                * To avoid swamping the postmaster with signals, we issue the autovac
-                * request only once per 64K multis generated.  This still gives
-                * plenty of chances before we get into real trouble.
-                */
-               if (IsUnderPostmaster && (result % 65536) == 0)
-                       SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER);
-
                if (IsUnderPostmaster &&
                        !MultiXactIdPrecedes(result, multiStopLimit))
                {
                        char       *oldest_datname = get_database_name(oldest_datoid);
 
+                       /*
+                        * Immediately kick autovacuum into action as we're already
+                        * in ERROR territory.
+                        */
+                       SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER);
+
                        /* complain even if that DB has disappeared */
                        if (oldest_datname)
                                ereport(ERROR,
@@ -1025,7 +1020,16 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset)
                                 errhint("Execute a database-wide VACUUM in that database.\n"
                                                 "You might also need to commit or roll back old prepared transactions.")));
                }
-               else if (!MultiXactIdPrecedes(result, multiWarnLimit))
+
+               /*
+                * To avoid swamping the postmaster with signals, we issue the autovac
+                * request only once per 64K multis generated.  This still gives
+                * plenty of chances before we get into real trouble.
+                */
+               if (IsUnderPostmaster && (result % 65536) == 0)
+                       SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER);
+
+               if (!MultiXactIdPrecedes(result, multiWarnLimit))
                {
                        char       *oldest_datname = get_database_name(oldest_datoid);
 
@@ -1096,6 +1100,10 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset)
        if (MultiXactState->offsetStopLimitKnown &&
                MultiXactOffsetWouldWrap(MultiXactState->offsetStopLimit, nextOffset,
                                                                 nmembers))
+       {
+               /* see comment in the corresponding offsets wraparound case */
+               SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER);
+
                ereport(ERROR,
                                (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
                                 errmsg("multixact \"members\" limit exceeded"),
@@ -1106,10 +1114,33 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset)
                                                   MultiXactState->offsetStopLimit - nextOffset - 1),
                                 errhint("Execute a database-wide VACUUM in database with OID %u with reduced vacuum_multixact_freeze_min_age and vacuum_multixact_freeze_table_age settings.",
                                                 MultiXactState->oldestMultiXactDB)));
-       else if (MultiXactState->offsetStopLimitKnown &&
-                        MultiXactOffsetWouldWrap(MultiXactState->offsetStopLimit,
-                                                                         nextOffset,
-                                                                         nmembers + MULTIXACT_MEMBERS_PER_PAGE * SLRU_PAGES_PER_SEGMENT * OFFSET_WARN_SEGMENTS))
+       }
+
+       /*
+        * Check whether we should kick autovacuum into action, to prevent members
+        * wraparound. NB we use a much larger window to trigger autovacuum than
+        * just the warning limit. The warning is just a measure of last resort -
+        * this is in line with GetNewTransactionId's behaviour.
+        */
+       if (!MultiXactState->oldestOffsetKnown ||
+               (MultiXactState->nextOffset - MultiXactState->oldestOffset
+                > MULTIXACT_MEMBER_SAFE_THRESHOLD))
+       {
+               /*
+                * To avoid swamping the postmaster with signals, we issue the autovac
+                * request only when crossing a segment boundary. With default
+                * compilation settings that's rougly after 50k members.  This still
+                * gives plenty of chances before we get into real trouble.
+                */
+               if ((MXOffsetToMemberPage(nextOffset) / SLRU_PAGES_PER_SEGMENT) !=
+                       (MXOffsetToMemberPage(nextOffset + nmembers) / SLRU_PAGES_PER_SEGMENT))
+                       SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER);
+       }
+
+       if (MultiXactState->offsetStopLimitKnown &&
+               MultiXactOffsetWouldWrap(MultiXactState->offsetStopLimit,
+                                                                nextOffset,
+                                                                nmembers + MULTIXACT_MEMBERS_PER_PAGE * SLRU_PAGES_PER_SEGMENT * OFFSET_WARN_SEGMENTS))
                ereport(WARNING,
                                (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
                                 errmsg("database with OID %u must be vacuumed before %d more multixact members are used",