]> granicus.if.org Git - postgresql/commitdiff
Further optimize GIN multi-key searches.
authorHeikki Linnakangas <heikki.linnakangas@iki.fi>
Wed, 29 Jan 2014 19:22:08 +0000 (21:22 +0200)
committerHeikki Linnakangas <heikki.linnakangas@iki.fi>
Wed, 29 Jan 2014 19:24:38 +0000 (21:24 +0200)
When skipping over some items in a posting tree, re-find the new location
by descending the tree from root, rather than walking the right links.
This can save a lot of I/O.

Heavily modified from Alexander Korotkov's fast scan patch.

src/backend/access/gin/gindatapage.c
src/backend/access/gin/ginget.c
src/include/access/gin_private.h

index 9a0b8ab1f214258ece93695f96ec04a441cc3018..c6230f3bc5a433a408a7290ba72900a9f5aa354b 100644 (file)
@@ -1639,16 +1639,15 @@ ginInsertItemPointers(Relation index, BlockNumber rootBlkno,
  * Starts a new scan on a posting tree.
  */
 GinBtreeStack *
-ginScanBeginPostingTree(Relation index, BlockNumber rootBlkno)
+ginScanBeginPostingTree(GinBtree btree, Relation index, BlockNumber rootBlkno)
 {
-       GinBtreeData btree;
        GinBtreeStack *stack;
 
-       ginPrepareDataScan(&btree, index, rootBlkno);
+       ginPrepareDataScan(btree, index, rootBlkno);
 
-       btree.fullScan = TRUE;
+       btree->fullScan = TRUE;
 
-       stack = ginFindLeafPage(&btree, TRUE);
+       stack = ginFindLeafPage(btree, TRUE);
 
        return stack;
 }
index 49e47c6859c57e44932aa87e1489f0e8e715f9cf..a45d72212e9436b27e418dec81a9a0380d1bf637 100644 (file)
@@ -99,12 +99,13 @@ static void
 scanPostingTree(Relation index, GinScanEntry scanEntry,
                                BlockNumber rootPostingTree)
 {
+       GinBtreeData btree;
        GinBtreeStack *stack;
        Buffer          buffer;
        Page            page;
 
        /* Descend to the leftmost leaf page */
-       stack = ginScanBeginPostingTree(index, rootPostingTree);
+       stack = ginScanBeginPostingTree(&btree, index, rootPostingTree);
        buffer = stack->buffer;
        IncrBufferRefCount(buffer); /* prevent unpin in freeGinBtreeStack */
 
@@ -412,7 +413,8 @@ restartScanEntry:
                        LockBuffer(stackEntry->buffer, GIN_UNLOCK);
                        needUnlock = FALSE;
 
-                       stack = ginScanBeginPostingTree(ginstate->index, rootPostingTree);
+                       stack = ginScanBeginPostingTree(&entry->btree, ginstate->index,
+                                                                                       rootPostingTree);
                        entry->buffer = stack->buffer;
 
                        /*
@@ -506,8 +508,60 @@ entryLoadMoreItems(GinState *ginstate, GinScanEntry entry, ItemPointerData advan
 {
        Page            page;
        int                     i;
+       bool            stepright;
+
+       if (!BufferIsValid(entry->buffer))
+       {
+               entry->isFinished = true;
+               return;
+       }
+
+       /*
+        * We have two strategies for finding the correct page: step right from
+        * the current page, or descend the tree again from the root. If
+        * advancePast equals the current item, the next matching item should be
+        * on the next page, so we step right. Otherwise, descend from root.
+        */
+       if (ginCompareItemPointers(&entry->curItem, &advancePast) == 0)
+       {
+               stepright = true;
+               LockBuffer(entry->buffer, GIN_SHARE);
+       }
+       else
+       {
+               GinBtreeStack *stack;
+
+               ReleaseBuffer(entry->buffer);
+
+               /*
+                * Set the search key, and find the correct leaf page.
+                */
+               if (ItemPointerIsLossyPage(&advancePast))
+               {
+                       ItemPointerSet(&entry->btree.itemptr,
+                                                  GinItemPointerGetBlockNumber(&advancePast) + 1,
+                                                  FirstOffsetNumber);
+               }
+               else
+               {
+                       entry->btree.itemptr = advancePast;
+                       entry->btree.itemptr.ip_posid++;
+               }
+               entry->btree.fullScan = false;
+               stack = ginFindLeafPage(&entry->btree, true);
+
+               /* we don't need the stack, just the buffer. */
+               entry->buffer = stack->buffer;
+               IncrBufferRefCount(entry->buffer);
+               freeGinBtreeStack(stack);
+               stepright = false;
+       }
+
+       elog(DEBUG2, "entryLoadMoreItems, %u/%u, skip: %d",
+                GinItemPointerGetBlockNumber(&advancePast),
+                GinItemPointerGetOffsetNumber(&advancePast),
+                !stepright);
 
-       LockBuffer(entry->buffer, GIN_SHARE);
        page = BufferGetPage(entry->buffer);
        for (;;)
        {
@@ -519,30 +573,34 @@ entryLoadMoreItems(GinState *ginstate, GinScanEntry entry, ItemPointerData advan
                        entry->nlist = 0;
                }
 
-               /*
-                * We've processed all the entries on this page. If it was the last
-                * page in the tree, we're done.
-                */
-               if (GinPageRightMost(page))
+               if (stepright)
                {
-                       UnlockReleaseBuffer(entry->buffer);
-                       entry->buffer = InvalidBuffer;
-                       entry->isFinished = TRUE;
-                       return;
+                       /*
+                        * We've processed all the entries on this page. If it was the last
+                        * page in the tree, we're done.
+                        */
+                       if (GinPageRightMost(page))
+                       {
+                               UnlockReleaseBuffer(entry->buffer);
+                               entry->buffer = InvalidBuffer;
+                               entry->isFinished = TRUE;
+                               return;
+                       }
+
+                       /*
+                        * Step to next page, following the right link. then find the first
+                        * ItemPointer greater than advancePast.
+                        */
+                       entry->buffer = ginStepRight(entry->buffer,
+                                                                                ginstate->index,
+                                                                                GIN_SHARE);
+                       page = BufferGetPage(entry->buffer);
                }
+               stepright = true;
 
                if (GinPageGetOpaque(page)->flags & GIN_DELETED)
                        continue;               /* page was deleted by concurrent vacuum */
 
-               /*
-                * Step to next page, following the right link. then find the first
-                * ItemPointer greater than advancePast.
-                */
-               entry->buffer = ginStepRight(entry->buffer,
-                                                                        ginstate->index,
-                                                                        GIN_SHARE);
-               page = BufferGetPage(entry->buffer);
-
                /*
                 * The first item > advancePast might not be on this page, but
                 * somewhere to the right, if the page was split, or a non-match from
@@ -566,8 +624,16 @@ entryLoadMoreItems(GinState *ginstate, GinScanEntry entry, ItemPointerData advan
                {
                        if (ginCompareItemPointers(&advancePast, &entry->list[i]) < 0)
                        {
-                               LockBuffer(entry->buffer, GIN_UNLOCK);
                                entry->offset = i;
+
+                               if (GinPageRightMost(page))
+                               {
+                                       /* after processing the copied items, we're done. */
+                                       UnlockReleaseBuffer(entry->buffer);
+                                       entry->buffer = InvalidBuffer;
+                               }
+                               else
+                                       LockBuffer(entry->buffer, GIN_UNLOCK);
                                return;
                        }
                }
@@ -677,7 +743,10 @@ entryGetItem(GinState *ginstate, GinScanEntry entry,
        }
        else if (!BufferIsValid(entry->buffer))
        {
-               /* A posting list from an entry tuple  */
+               /*
+                * A posting list from an entry tuple, or the last page of a posting
+                * tree.
+                */
                do
                {
                        if (entry->offset >= entry->nlist)
index ea9ae31acc0ea57b42740585f78de2fd893eda42..bb0ab317cbcf6859eccdb36807f4a55a818b5d64 100644 (file)
@@ -702,7 +702,7 @@ extern void GinPageDeletePostingItem(Page page, OffsetNumber offset);
 extern void ginInsertItemPointers(Relation index, BlockNumber rootBlkno,
                                          ItemPointerData *items, uint32 nitem,
                                          GinStatsData *buildStats);
-extern GinBtreeStack *ginScanBeginPostingTree(Relation index, BlockNumber rootBlkno);
+extern GinBtreeStack *ginScanBeginPostingTree(GinBtree btree, Relation index, BlockNumber rootBlkno);
 extern void ginDataFillRoot(GinBtree btree, Page root, BlockNumber lblkno, Page lpage, BlockNumber rblkno, Page rpage);
 extern void ginPrepareDataScan(GinBtree btree, Relation index, BlockNumber rootBlkno);
 
@@ -802,6 +802,7 @@ typedef struct GinScanEntryData
        bool            isFinished;
        bool            reduceResult;
        uint32          predictNumberResult;
+       GinBtreeData btree;
 }      GinScanEntryData;
 
 typedef struct GinScanOpaqueData