]> granicus.if.org Git - postgresql/commitdiff
Improve performance of get_actual_variable_range with recently-dead tuples.
authorTom Lane <tgl@sss.pgh.pa.us>
Thu, 7 Sep 2017 23:41:51 +0000 (19:41 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Thu, 7 Sep 2017 23:41:51 +0000 (19:41 -0400)
In commit fccebe421, we hacked get_actual_variable_range() to scan the
index with SnapshotDirty, so that if there are many uncommitted tuples
at the end of the index range, it wouldn't laboriously scan through all
of them looking for a live value to return.  However, that didn't fix it
for the case of many recently-dead tuples at the end of the index;
SnapshotDirty recognizes those as committed dead and so we're back to
the same problem.

To improve the situation, invent a "SnapshotNonVacuumable" snapshot type
and use that instead.  The reason this helps is that, if the snapshot
rejects a given index entry, we know that the indexscan will mark that
index entry as killed.  This means the next get_actual_variable_range()
scan will proceed past that entry without visiting the heap, making the
scan a lot faster.  We may end up accepting a recently-dead tuple as
being the estimated extremal value, but that doesn't seem much worse than
the compromise we made before to accept not-yet-committed extremal values.

The cost of the scan is still proportional to the number of dead index
entries at the end of the range, so in the interval after a mass delete
but before VACUUM's cleaned up the mess, it's still possible for
get_actual_variable_range() to take a noticeable amount of time, if you've
got enough such dead entries.  But the constant factor is much much better
than before, since all we need to do with each index entry is test its
"killed" bit.

We chose to back-patch commit fccebe421 at the time, but I'm hesitant to
do so here, because this form of the problem seems to affect many fewer
people.  Also, even when it happens, it's less bad than the case fixed
by commit fccebe421 because we don't get the contention effects from
expensive TransactionIdIsInProgress tests.

Dmitriy Sarafannikov, reviewed by Andrey Borodin

Discussion: https://postgr.es/m/05C72CF7-B5F6-4DB9-8A09-5AC897653113@yandex.ru

src/backend/access/heap/heapam.c
src/backend/utils/adt/selfuncs.c
src/backend/utils/time/tqual.c
src/include/utils/snapshot.h
src/include/utils/tqual.h

index e29c5ad0868da6c74ae747ebd92680a2653d2cc9..d20f0381f3bc23f99c505ef8609d63240ac5d44b 100644 (file)
@@ -2118,6 +2118,9 @@ heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer,
                 * If we can't see it, maybe no one else can either.  At caller
                 * request, check whether all chain members are dead to all
                 * transactions.
+                *
+                * Note: if you change the criterion here for what is "dead", fix the
+                * planner's get_actual_variable_range() function to match.
                 */
                if (all_dead && *all_dead &&
                        !HeapTupleIsSurelyDead(heapTuple, RecentGlobalXmin))
index 23e5526a8e1453a8e1c47459061b2b602e913f8c..81b0bc37d27615f1d43b40f109d81a7888228a73 100644 (file)
 #include "utils/pg_locale.h"
 #include "utils/rel.h"
 #include "utils/selfuncs.h"
+#include "utils/snapmgr.h"
 #include "utils/spccache.h"
 #include "utils/syscache.h"
 #include "utils/timestamp.h"
@@ -5328,7 +5329,7 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata,
                        HeapTuple       tup;
                        Datum           values[INDEX_MAX_KEYS];
                        bool            isnull[INDEX_MAX_KEYS];
-                       SnapshotData SnapshotDirty;
+                       SnapshotData SnapshotNonVacuumable;
 
                        estate = CreateExecutorState();
                        econtext = GetPerTupleExprContext(estate);
@@ -5351,7 +5352,7 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata,
                        slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRel));
                        econtext->ecxt_scantuple = slot;
                        get_typlenbyval(vardata->atttype, &typLen, &typByVal);
-                       InitDirtySnapshot(SnapshotDirty);
+                       InitNonVacuumableSnapshot(SnapshotNonVacuumable, RecentGlobalXmin);
 
                        /* set up an IS NOT NULL scan key so that we ignore nulls */
                        ScanKeyEntryInitialize(&scankeys[0],
@@ -5373,17 +5374,29 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata,
                                 * active snapshot, which is the best approximation we've got
                                 * to what the query will see when executed.  But that won't
                                 * be exact if a new snap is taken before running the query,
-                                * and it can be very expensive if a lot of uncommitted rows
-                                * exist at the end of the index (because we'll laboriously
-                                * fetch each one and reject it).  What seems like a good
-                                * compromise is to use SnapshotDirty.  That will accept
-                                * uncommitted rows, and thus avoid fetching multiple heap
-                                * tuples in this scenario.  On the other hand, it will reject
-                                * known-dead rows, and thus not give a bogus answer when the
-                                * extreme value has been deleted; that case motivates not
-                                * using SnapshotAny here.
+                                * and it can be very expensive if a lot of recently-dead or
+                                * uncommitted rows exist at the beginning or end of the index
+                                * (because we'll laboriously fetch each one and reject it).
+                                * Instead, we use SnapshotNonVacuumable.  That will accept
+                                * recently-dead and uncommitted rows as well as normal
+                                * visible rows.  On the other hand, it will reject known-dead
+                                * rows, and thus not give a bogus answer when the extreme
+                                * value has been deleted (unless the deletion was quite
+                                * recent); that case motivates not using SnapshotAny here.
+                                *
+                                * A crucial point here is that SnapshotNonVacuumable, with
+                                * RecentGlobalXmin as horizon, yields the inverse of the
+                                * condition that the indexscan will use to decide that index
+                                * entries are killable (see heap_hot_search_buffer()).
+                                * Therefore, if the snapshot rejects a tuple and we have to
+                                * continue scanning past it, we know that the indexscan will
+                                * mark that index entry killed.  That means that the next
+                                * get_actual_variable_range() call will not have to visit
+                                * that heap entry.  In this way we avoid repetitive work when
+                                * this function is used a lot during planning.
                                 */
-                               index_scan = index_beginscan(heapRel, indexRel, &SnapshotDirty,
+                               index_scan = index_beginscan(heapRel, indexRel,
+                                                                                        &SnapshotNonVacuumable,
                                                                                         1, 0);
                                index_rescan(index_scan, scankeys, 1, NULL, 0);
 
@@ -5415,7 +5428,8 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata,
                        /* If max is requested, and we didn't find the index is empty */
                        if (max && have_data)
                        {
-                               index_scan = index_beginscan(heapRel, indexRel, &SnapshotDirty,
+                               index_scan = index_beginscan(heapRel, indexRel,
+                                                                                        &SnapshotNonVacuumable,
                                                                                         1, 0);
                                index_rescan(index_scan, scankeys, 1, NULL, 0);
 
index f9da9e17f523307f7103b6b19a28aab8819d83de..bbac4083c98e14930091875faa0d15887560a0bf 100644 (file)
@@ -45,6 +45,8 @@
  *               like HeapTupleSatisfiesSelf(), but includes open transactions
  *      HeapTupleSatisfiesVacuum()
  *               visible to any running transaction, used by VACUUM
+ *      HeapTupleSatisfiesNonVacuumable()
+ *               Snapshot-style API for HeapTupleSatisfiesVacuum
  *      HeapTupleSatisfiesToast()
  *               visible unless part of interrupted vacuum, used for TOAST
  *      HeapTupleSatisfiesAny()
@@ -1392,6 +1394,26 @@ HeapTupleSatisfiesVacuum(HeapTuple htup, TransactionId OldestXmin,
        return HEAPTUPLE_DEAD;
 }
 
+
+/*
+ * HeapTupleSatisfiesNonVacuumable
+ *
+ *     True if tuple might be visible to some transaction; false if it's
+ *     surely dead to everyone, ie, vacuumable.
+ *
+ *     This is an interface to HeapTupleSatisfiesVacuum that meets the
+ *     SnapshotSatisfiesFunc API, so it can be used through a Snapshot.
+ *     snapshot->xmin must have been set up with the xmin horizon to use.
+ */
+bool
+HeapTupleSatisfiesNonVacuumable(HeapTuple htup, Snapshot snapshot,
+                                                               Buffer buffer)
+{
+       return HeapTupleSatisfiesVacuum(htup, snapshot->xmin, buffer)
+               != HEAPTUPLE_DEAD;
+}
+
+
 /*
  * HeapTupleIsSurelyDead
  *
index 074cc818649078068d5846bbab9581345baebfd7..bf519778df4b791c55be144c3a92a2ae9d47d348 100644 (file)
@@ -41,6 +41,7 @@ typedef bool (*SnapshotSatisfiesFunc) (HeapTuple htup,
  * * MVCC snapshots taken during recovery (in Hot-Standby mode)
  * * Historic MVCC snapshots used during logical decoding
  * * snapshots passed to HeapTupleSatisfiesDirty()
+ * * snapshots passed to HeapTupleSatisfiesNonVacuumable()
  * * snapshots used for SatisfiesAny, Toast, Self where no members are
  *      accessed.
  *
@@ -56,7 +57,8 @@ typedef struct SnapshotData
        /*
         * The remaining fields are used only for MVCC snapshots, and are normally
         * just zeroes in special snapshots.  (But xmin and xmax are used
-        * specially by HeapTupleSatisfiesDirty.)
+        * specially by HeapTupleSatisfiesDirty, and xmin is used specially by
+        * HeapTupleSatisfiesNonVacuumable.)
         *
         * An MVCC snapshot can never see the effects of XIDs >= xmax. It can see
         * the effects of all older XIDs except those listed in the snapshot. xmin
index 036d9898d694e0a26826ea3a5b804954e0022cd1..9a3b56e5f0324f45d42a8822e59c687682871b33 100644 (file)
@@ -66,6 +66,8 @@ extern bool HeapTupleSatisfiesToast(HeapTuple htup,
                                                Snapshot snapshot, Buffer buffer);
 extern bool HeapTupleSatisfiesDirty(HeapTuple htup,
                                                Snapshot snapshot, Buffer buffer);
+extern bool HeapTupleSatisfiesNonVacuumable(HeapTuple htup,
+                                                               Snapshot snapshot, Buffer buffer);
 extern bool HeapTupleSatisfiesHistoricMVCC(HeapTuple htup,
                                                           Snapshot snapshot, Buffer buffer);
 
@@ -100,6 +102,14 @@ extern bool ResolveCminCmaxDuringDecoding(struct HTAB *tuplecid_data,
 #define InitDirtySnapshot(snapshotdata)  \
        ((snapshotdata).satisfies = HeapTupleSatisfiesDirty)
 
+/*
+ * Similarly, some initialization is required for a NonVacuumable snapshot.
+ * The caller must supply the xmin horizon to use (e.g., RecentGlobalXmin).
+ */
+#define InitNonVacuumableSnapshot(snapshotdata, xmin_horizon)  \
+       ((snapshotdata).satisfies = HeapTupleSatisfiesNonVacuumable, \
+        (snapshotdata).xmin = (xmin_horizon))
+
 /*
  * Similarly, some initialization is required for SnapshotToast.  We need
  * to set lsn and whenTaken correctly to support snapshot_too_old.