]> granicus.if.org Git - postgresql/commitdiff
Allow bitmap scans to operate as index-only scans when possible.
authorTom Lane <tgl@sss.pgh.pa.us>
Wed, 1 Nov 2017 21:38:12 +0000 (17:38 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Wed, 1 Nov 2017 21:38:20 +0000 (17:38 -0400)
If we don't have to return any columns from heap tuples, and there's
no need to recheck qual conditions, and the heap page is all-visible,
then we can skip fetching the heap page altogether.

Skip prefetching pages too, when possible, on the assumption that the
recheck flag will remain the same from one page to the next.  While that
assumption is hardly bulletproof, it seems like a good bet most of the
time, and better than prefetching pages we don't need.

This commit installs the executor infrastructure, but doesn't change
any planner cost estimates, thus possibly causing bitmap scans to
not be chosen in cases where this change renders them the best choice.
I (tgl) am not entirely convinced that we need to account for this
behavior in the planner, because I think typically the bitmap scan would
get chosen anyway if it's the best bet.  In any case the submitted patch
took way too many shortcuts, resulting in too many clearly-bad choices,
to be committable.

Alexander Kuzmenkov, reviewed by Alexey Chernyshov, and whacked around
rather heavily by me.

Discussion: https://postgr.es/m/239a8955-c0fc-f506-026d-c837e86c827b@postgrespro.ru

src/backend/executor/nodeBitmapHeapscan.c
src/backend/optimizer/plan/createplan.c
src/include/nodes/execnodes.h
src/test/regress/expected/stats.out
src/test/regress/sql/stats.sql

index 6035b4dfd4c1077fc538d2c5e27c689e46c8e216..b885f2a3a65f1231b66768ba7715381273c9d784 100644 (file)
@@ -39,6 +39,7 @@
 
 #include "access/relscan.h"
 #include "access/transam.h"
+#include "access/visibilitymap.h"
 #include "executor/execdebug.h"
 #include "executor/nodeBitmapHeapscan.h"
 #include "miscadmin.h"
@@ -225,9 +226,31 @@ BitmapHeapNext(BitmapHeapScanState *node)
                        }
 
                        /*
-                        * Fetch the current heap page and identify candidate tuples.
+                        * We can skip fetching the heap page if we don't need any fields
+                        * from the heap, and the bitmap entries don't need rechecking,
+                        * and all tuples on the page are visible to our transaction.
                         */
-                       bitgetpage(scan, tbmres);
+                       node->skip_fetch = (node->can_skip_fetch &&
+                                                               !tbmres->recheck &&
+                                                               VM_ALL_VISIBLE(node->ss.ss_currentRelation,
+                                                                                          tbmres->blockno,
+                                                                                          &node->vmbuffer));
+
+                       if (node->skip_fetch)
+                       {
+                               /*
+                                * The number of tuples on this page is put into
+                                * scan->rs_ntuples; note we don't fill scan->rs_vistuples.
+                                */
+                               scan->rs_ntuples = tbmres->ntuples;
+                       }
+                       else
+                       {
+                               /*
+                                * Fetch the current heap page and identify candidate tuples.
+                                */
+                               bitgetpage(scan, tbmres);
+                       }
 
                        if (tbmres->ntuples >= 0)
                                node->exact_pages++;
@@ -289,45 +312,55 @@ BitmapHeapNext(BitmapHeapScanState *node)
                 */
                BitmapPrefetch(node, scan);
 
-               /*
-                * Okay to fetch the tuple
-                */
-               targoffset = scan->rs_vistuples[scan->rs_cindex];
-               dp = (Page) BufferGetPage(scan->rs_cbuf);
-               lp = PageGetItemId(dp, targoffset);
-               Assert(ItemIdIsNormal(lp));
+               if (node->skip_fetch)
+               {
+                       /*
+                        * If we don't have to fetch the tuple, just return nulls.
+                        */
+                       ExecStoreAllNullTuple(slot);
+               }
+               else
+               {
+                       /*
+                        * Okay to fetch the tuple.
+                        */
+                       targoffset = scan->rs_vistuples[scan->rs_cindex];
+                       dp = (Page) BufferGetPage(scan->rs_cbuf);
+                       lp = PageGetItemId(dp, targoffset);
+                       Assert(ItemIdIsNormal(lp));
 
-               scan->rs_ctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp);
-               scan->rs_ctup.t_len = ItemIdGetLength(lp);
-               scan->rs_ctup.t_tableOid = scan->rs_rd->rd_id;
-               ItemPointerSet(&scan->rs_ctup.t_self, tbmres->blockno, targoffset);
+                       scan->rs_ctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp);
+                       scan->rs_ctup.t_len = ItemIdGetLength(lp);
+                       scan->rs_ctup.t_tableOid = scan->rs_rd->rd_id;
+                       ItemPointerSet(&scan->rs_ctup.t_self, tbmres->blockno, targoffset);
 
-               pgstat_count_heap_fetch(scan->rs_rd);
+                       pgstat_count_heap_fetch(scan->rs_rd);
 
-               /*
-                * Set up the result slot to point to this tuple. Note that the slot
-                * acquires a pin on the buffer.
-                */
-               ExecStoreTuple(&scan->rs_ctup,
-                                          slot,
-                                          scan->rs_cbuf,
-                                          false);
-
-               /*
-                * If we are using lossy info, we have to recheck the qual conditions
-                * at every tuple.
-                */
-               if (tbmres->recheck)
-               {
-                       econtext->ecxt_scantuple = slot;
-                       ResetExprContext(econtext);
+                       /*
+                        * Set up the result slot to point to this tuple.  Note that the
+                        * slot acquires a pin on the buffer.
+                        */
+                       ExecStoreTuple(&scan->rs_ctup,
+                                                  slot,
+                                                  scan->rs_cbuf,
+                                                  false);
 
-                       if (!ExecQual(node->bitmapqualorig, econtext))
+                       /*
+                        * If we are using lossy info, we have to recheck the qual
+                        * conditions at every tuple.
+                        */
+                       if (tbmres->recheck)
                        {
-                               /* Fails recheck, so drop it and loop back for another */
-                               InstrCountFiltered2(node, 1);
-                               ExecClearTuple(slot);
-                               continue;
+                               econtext->ecxt_scantuple = slot;
+                               ResetExprContext(econtext);
+
+                               if (!ExecQual(node->bitmapqualorig, econtext))
+                               {
+                                       /* Fails recheck, so drop it and loop back for another */
+                                       InstrCountFiltered2(node, 1);
+                                       ExecClearTuple(slot);
+                                       continue;
+                               }
                        }
                }
 
@@ -582,6 +615,7 @@ BitmapPrefetch(BitmapHeapScanState *node, HeapScanDesc scan)
                        while (node->prefetch_pages < node->prefetch_target)
                        {
                                TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator);
+                               bool            skip_fetch;
 
                                if (tbmpre == NULL)
                                {
@@ -591,7 +625,26 @@ BitmapPrefetch(BitmapHeapScanState *node, HeapScanDesc scan)
                                        break;
                                }
                                node->prefetch_pages++;
-                               PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
+
+                               /*
+                                * If we expect not to have to actually read this heap page,
+                                * skip this prefetch call, but continue to run the prefetch
+                                * logic normally.  (Would it be better not to increment
+                                * prefetch_pages?)
+                                *
+                                * This depends on the assumption that the index AM will
+                                * report the same recheck flag for this future heap page as
+                                * it did for the current heap page; which is not a certainty
+                                * but is true in many cases.
+                                */
+                               skip_fetch = (node->can_skip_fetch &&
+                                                         (node->tbmres ? !node->tbmres->recheck : false) &&
+                                                         VM_ALL_VISIBLE(node->ss.ss_currentRelation,
+                                                                                        tbmpre->blockno,
+                                                                                        &node->pvmbuffer));
+
+                               if (!skip_fetch)
+                                       PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
                        }
                }
 
@@ -608,6 +661,7 @@ BitmapPrefetch(BitmapHeapScanState *node, HeapScanDesc scan)
                        {
                                TBMIterateResult *tbmpre;
                                bool            do_prefetch = false;
+                               bool            skip_fetch;
 
                                /*
                                 * Recheck under the mutex. If some other process has already
@@ -633,7 +687,15 @@ BitmapPrefetch(BitmapHeapScanState *node, HeapScanDesc scan)
                                        break;
                                }
 
-                               PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
+                               /* As above, skip prefetch if we expect not to need page */
+                               skip_fetch = (node->can_skip_fetch &&
+                                                         (node->tbmres ? !node->tbmres->recheck : false) &&
+                                                         VM_ALL_VISIBLE(node->ss.ss_currentRelation,
+                                                                                        tbmpre->blockno,
+                                                                                        &node->pvmbuffer));
+
+                               if (!skip_fetch)
+                                       PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
                        }
                }
        }
@@ -687,6 +749,7 @@ ExecReScanBitmapHeapScan(BitmapHeapScanState *node)
        /* rescan to release any page pin */
        heap_rescan(node->ss.ss_currentScanDesc, NULL);
 
+       /* release bitmaps and buffers if any */
        if (node->tbmiterator)
                tbm_end_iterate(node->tbmiterator);
        if (node->prefetch_iterator)
@@ -697,6 +760,10 @@ ExecReScanBitmapHeapScan(BitmapHeapScanState *node)
                tbm_end_shared_iterate(node->shared_prefetch_iterator);
        if (node->tbm)
                tbm_free(node->tbm);
+       if (node->vmbuffer != InvalidBuffer)
+               ReleaseBuffer(node->vmbuffer);
+       if (node->pvmbuffer != InvalidBuffer)
+               ReleaseBuffer(node->pvmbuffer);
        node->tbm = NULL;
        node->tbmiterator = NULL;
        node->tbmres = NULL;
@@ -704,6 +771,8 @@ ExecReScanBitmapHeapScan(BitmapHeapScanState *node)
        node->initialized = false;
        node->shared_tbmiterator = NULL;
        node->shared_prefetch_iterator = NULL;
+       node->vmbuffer = InvalidBuffer;
+       node->pvmbuffer = InvalidBuffer;
 
        ExecScanReScan(&node->ss);
 
@@ -748,7 +817,7 @@ ExecEndBitmapHeapScan(BitmapHeapScanState *node)
        ExecEndNode(outerPlanState(node));
 
        /*
-        * release bitmap if any
+        * release bitmaps and buffers if any
         */
        if (node->tbmiterator)
                tbm_end_iterate(node->tbmiterator);
@@ -760,6 +829,10 @@ ExecEndBitmapHeapScan(BitmapHeapScanState *node)
                tbm_end_shared_iterate(node->shared_tbmiterator);
        if (node->shared_prefetch_iterator)
                tbm_end_shared_iterate(node->shared_prefetch_iterator);
+       if (node->vmbuffer != InvalidBuffer)
+               ReleaseBuffer(node->vmbuffer);
+       if (node->pvmbuffer != InvalidBuffer)
+               ReleaseBuffer(node->pvmbuffer);
 
        /*
         * close heap scan
@@ -805,6 +878,9 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
        scanstate->tbm = NULL;
        scanstate->tbmiterator = NULL;
        scanstate->tbmres = NULL;
+       scanstate->skip_fetch = false;
+       scanstate->vmbuffer = InvalidBuffer;
+       scanstate->pvmbuffer = InvalidBuffer;
        scanstate->exact_pages = 0;
        scanstate->lossy_pages = 0;
        scanstate->prefetch_iterator = NULL;
@@ -815,8 +891,19 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
        scanstate->pscan_len = 0;
        scanstate->initialized = false;
        scanstate->shared_tbmiterator = NULL;
+       scanstate->shared_prefetch_iterator = NULL;
        scanstate->pstate = NULL;
 
+       /*
+        * We can potentially skip fetching heap pages if we do not need any
+        * columns of the table, either for checking non-indexable quals or for
+        * returning data.  This test is a bit simplistic, as it checks the
+        * stronger condition that there's no qual or return tlist at all.  But in
+        * most cases it's probably not worth working harder than that.
+        */
+       scanstate->can_skip_fetch = (node->scan.plan.qual == NIL &&
+                                                                node->scan.plan.targetlist == NIL);
+
        /*
         * Miscellaneous initialization
         *
index c802d61c39e3290a024eedd3d9ca787bc1a35854..4b497486a0ef093ec57a4a64ab507e563bacd746 100644 (file)
@@ -807,6 +807,15 @@ use_physical_tlist(PlannerInfo *root, Path *path, int flags)
        if (IsA(path, CustomPath))
                return false;
 
+       /*
+        * If a bitmap scan's tlist is empty, keep it as-is.  This may allow the
+        * executor to skip heap page fetches, and in any case, the benefit of
+        * using a physical tlist instead would be minimal.
+        */
+       if (IsA(path, BitmapHeapPath) &&
+               path->pathtarget->exprs == NIL)
+               return false;
+
        /*
         * Can't do it if any system columns or whole-row Vars are requested.
         * (This could possibly be fixed but would take some fragile assumptions
index 8698c8a50c0ba5963becff5c9efea16da3121650..d209ec012c5cfe135c7f7eee793740a322628ca9 100644 (file)
@@ -507,7 +507,7 @@ typedef struct EState
        bool       *es_epqTupleSet; /* true if EPQ tuple is provided */
        bool       *es_epqScanDone; /* true if EPQ tuple has been fetched */
 
-       bool            es_use_parallel_mode; /* can we use parallel workers? */
+       bool            es_use_parallel_mode;   /* can we use parallel workers? */
 
        /* The per-query shared memory area to use for parallel execution. */
        struct dsa_area *es_query_dsa;
@@ -1331,6 +1331,10 @@ typedef struct ParallelBitmapHeapState
  *             tbm                                bitmap obtained from child index scan(s)
  *             tbmiterator                iterator for scanning current pages
  *             tbmres                     current-page data
+ *             can_skip_fetch     can we potentially skip tuple fetches in this scan?
+ *             skip_fetch                 are we skipping tuple fetches on this page?
+ *             vmbuffer                   buffer for visibility-map lookups
+ *             pvmbuffer                  ditto, for prefetched pages
  *             exact_pages                total number of exact pages retrieved
  *             lossy_pages                total number of lossy pages retrieved
  *             prefetch_iterator  iterator for prefetching ahead of current page
@@ -1351,6 +1355,10 @@ typedef struct BitmapHeapScanState
        TIDBitmap  *tbm;
        TBMIterator *tbmiterator;
        TBMIterateResult *tbmres;
+       bool            can_skip_fetch;
+       bool            skip_fetch;
+       Buffer          vmbuffer;
+       Buffer          pvmbuffer;
        long            exact_pages;
        long            lossy_pages;
        TBMIterator *prefetch_iterator;
index fc91f3ce3652f9ce2a00201f0d5c43fa598486dd..991c287b11456408f733041d4f0cf4e98a9fc3ad 100644 (file)
@@ -136,12 +136,15 @@ SELECT count(*) FROM tenk2;
 (1 row)
 
 -- do an indexscan
+-- make sure it is not a bitmap scan, which might skip fetching heap tuples
+SET enable_bitmapscan TO off;
 SELECT count(*) FROM tenk2 WHERE unique1 = 1;
  count 
 -------
      1
 (1 row)
 
+RESET enable_bitmapscan;
 -- We can't just call wait_for_stats() at this point, because we only
 -- transmit stats when the session goes idle, and we probably didn't
 -- transmit the last couple of counts yet thanks to the rate-limiting logic
index 6e882bf3acade32df1599c9be3f031be6dc9c8d2..2be7dde8346390c75b3fe243cc90bb80e76c750c 100644 (file)
@@ -138,7 +138,10 @@ ROLLBACK;
 -- do a seqscan
 SELECT count(*) FROM tenk2;
 -- do an indexscan
+-- make sure it is not a bitmap scan, which might skip fetching heap tuples
+SET enable_bitmapscan TO off;
 SELECT count(*) FROM tenk2 WHERE unique1 = 1;
+RESET enable_bitmapscan;
 
 -- We can't just call wait_for_stats() at this point, because we only
 -- transmit stats when the session goes idle, and we probably didn't