Adjustments to the btree fastpath optimization.

author Andrew Dunstan <andrew@dunslane.net>

Tue, 10 Apr 2018 22:21:03 +0000 (18:21 -0400)

committer Andrew Dunstan <andrew@dunslane.net>

Tue, 10 Apr 2018 22:21:03 +0000 (18:21 -0400)
author Andrew Dunstan <andrew@dunslane.net>
Tue, 10 Apr 2018 22:21:03 +0000 (18:21 -0400)
committer Andrew Dunstan <andrew@dunslane.net>
Tue, 10 Apr 2018 22:21:03 +0000 (18:21 -0400)
diff --git a/src/backend/access/nbtree/README b/src/backend/access/nbtree/README

index aef455c122a3440089fb5b0465152a4eb9d803d0..3680e69b89a8458d58f6c3361eb612788fa33786 100644 (file)
--- a/src/backend/access/nbtree/README
+++ b/src/backend/access/nbtree/README
@@ -375,6 +375,25 @@ positives, so long as it never gives a false negative.  This makes it
  possible to implement the test with a small counter value stored on each
  index page.
  
+Fastpath For Index Insertion
+----------------------------
+
+We optimize for a common case of insertion of increasing index key
+values by caching the last page to which this backend inserted the last
+value, if this page was the rightmost leaf page. For the next insert, we
+can then quickly check if the cached page is still the rightmost leaf
+page and also the correct place to hold the current value. We can avoid
+the cost of walking down the tree in such common cases.
+
+The optimization works on the assumption that there can only be one
+non-ignorable leaf rightmost page, and so even a RecentGlobalXmin style
+interlock isn't required.  We cannot fail to detect that our hint was
+invalidated, because there can only be one such page in the B-Tree at
+any time. It's possible that the page will be deleted and recycled
+without a backend's cached page also being detected as invalidated, but
+only when we happen to recycle a block that once again gets recycled as the
+rightmost leaf page.
+
  On-the-Fly Deletion Of Index Tuples
  -----------------------------------
  
diff --git a/src/backend/access/nbtree/nbtinsert.c b/src/backend/access/nbtree/nbtinsert.c

index 0b93acd02443a11aa914139cae81b76918a020d2..995cc61b4cf9f730ed526901f5b8e40af71651c0 100644 (file)
--- a/src/backend/access/nbtree/nbtinsert.c
+++ b/src/backend/access/nbtree/nbtinsert.c
@@ -26,6 +26,8 @@
  #include "storage/smgr.h"
  #include "utils/tqual.h"
  
+/* Minimum tree height for application of fastpath optimization */
+#define BTREE_FASTPATH_MIN_LEVEL       2
  
  typedef struct
  {
@@ -125,7 +127,7 @@ _bt_doinsert(Relation rel, IndexTuple itup,
         /*
          * It's very common to have an index on an auto-incremented or
          * monotonically increasing value. In such cases, every insertion happens
-        * towards the end of the index. We try to optimise that case by caching
+        * towards the end of the index. We try to optimize that case by caching
          * the right-most leaf of the index. If our cached block is still the
          * rightmost leaf, has enough free space to accommodate a new entry and
          * the insertion key is strictly greater than the first key in this page,
@@ -176,13 +178,17 @@ top:
                          * the first key on the page.
                          */
                         if (P_ISLEAF(lpageop) && P_RIGHTMOST(lpageop) &&
-                               !P_INCOMPLETE_SPLIT(lpageop) &&
                                 !P_IGNORE(lpageop) &&
                                 (PageGetFreeSpace(page) > itemsz) &&
                                 PageGetMaxOffsetNumber(page) >= P_FIRSTDATAKEY(lpageop) &&
                                 _bt_compare(rel, indnkeyatts, itup_scankey, page,
                                                         P_FIRSTDATAKEY(lpageop)) > 0)
                         {
+                               /*
+                                * The right-most block should never have incomplete split. But
+                                * be paranoid and check for it anyway.
+                                */
+                               Assert(!P_INCOMPLETE_SPLIT(lpageop));
                                 fastpath = true;
                         }
                         else
@@ -868,6 +874,24 @@ _bt_insertonpg(Relation rel,
                 bool            newitemonleft;
                 Buffer          rbuf;
  
+               /*
+                * If we're here then a pagesplit is needed. We should never reach here
+                * if we're using the fastpath since we should have checked for all the
+                * required conditions, including the fact that this page has enough
+                * freespace. Note that this routine can in theory deal with the
+                * situation where a NULL stack pointer is passed (that's what would
+                * happen if the fastpath is taken), like it does during crash
+                * recovery. But that path is much slower, defeating the very purpose
+                * of the optimization.  The following assertion should protect us from
+                * any future code changes that invalidate those assumptions.
+                *
+                * Note that whenever we fail to take the fastpath, we clear the
+                * cached block. Checking for a valid cached block at this point is
+                * enough to decide whether we're in a fastpath or not.
+                */
+               Assert(!(P_ISLEAF(lpageop) &&
+                               BlockNumberIsValid(RelationGetTargetBlock(rel))));
+
                 /* Choose the split point */
                 firstright = _bt_findsplitloc(rel, page,
                                                                           newitemoff, itemsz,
@@ -905,6 +929,7 @@ _bt_insertonpg(Relation rel,
                 BTMetaPageData *metad = NULL;
                 OffsetNumber itup_off;
                 BlockNumber itup_blkno;
+               BlockNumber     cachedBlock = InvalidBlockNumber;
  
                 itup_off = newitemoff;
                 itup_blkno = BufferGetBlockNumber(buf);
@@ -962,6 +987,15 @@ _bt_insertonpg(Relation rel,
                         MarkBufferDirty(cbuf);
                 }
  
+               /*
+                * Cache the block information if we just inserted into the rightmost
+                * leaf page of the index and it's not the root page.  For very small
+                * index where root is also the leaf, there is no point trying for any
+                * optimization.
+                */
+               if (P_RIGHTMOST(lpageop) && P_ISLEAF(lpageop) && !P_ISROOT(lpageop))
+                       cachedBlock = BufferGetBlockNumber(buf);
+
                 /* XLOG stuff */
                 if (RelationNeedsWAL(rel))
                 {
@@ -977,16 +1011,7 @@ _bt_insertonpg(Relation rel,
                         XLogRegisterData((char *) &xlrec, SizeOfBtreeInsert);
  
                         if (P_ISLEAF(lpageop))
-                       {
                                 xlinfo = XLOG_BTREE_INSERT_LEAF;
-
-                               /*
-                                * Cache the block information if we just inserted into the
-                                * rightmost leaf page of the index.
-                                */
-                               if (P_RIGHTMOST(lpageop))
-                                       RelationSetTargetBlock(rel, BufferGetBlockNumber(buf));
-                       }
                         else
                         {
                                 /*
@@ -1048,6 +1073,22 @@ _bt_insertonpg(Relation rel,
                 if (BufferIsValid(cbuf))
                         _bt_relbuf(rel, cbuf);
                 _bt_relbuf(rel, buf);
+
+               /*
+                * If we decided to cache the insertion target block, then set it now.
+                * But before that, check for the height of the tree and don't go for
+                * the optimization for small indexes. We defer that check to this
+                * point to ensure that we don't call _bt_getrootheight while holding
+                * lock on any other block.
+                *
+                * We do this after dropping locks on all buffers. So the information
+                * about whether the insertion block is still the rightmost block or
+                * not may have changed in between. But we will deal with that during
+                * next insert operation. No special care is required while setting it.
+                */
+               if (BlockNumberIsValid(cachedBlock) &&
+                       _bt_getrootheight(rel) >= BTREE_FASTPATH_MIN_LEVEL)
+                       RelationSetTargetBlock(rel, cachedBlock);
         }
  }
author	Andrew Dunstan <andrew@dunslane.net>
	Tue, 10 Apr 2018 22:21:03 +0000 (18:21 -0400)
committer	Andrew Dunstan <andrew@dunslane.net>
	Tue, 10 Apr 2018 22:21:03 +0000 (18:21 -0400)
src/backend/access/nbtree/README		patch \| blob \| history
src/backend/access/nbtree/nbtinsert.c		patch \| blob \| history