]> granicus.if.org Git - postgresql/commitdiff
Microvacuum for GIST
authorTeodor Sigaev <teodor@sigaev.ru>
Wed, 9 Sep 2015 15:43:37 +0000 (18:43 +0300)
committerTeodor Sigaev <teodor@sigaev.ru>
Wed, 9 Sep 2015 15:43:37 +0000 (18:43 +0300)
Mark index tuple as dead if it's pointed by kill_prior_tuple during
ordinary (search) scan and remove it during insert process if there is no
enough space for new tuple to insert. This improves select performance
because index will not return tuple marked as dead and improves insert
performance because it reduces number of page split.

Anastasia Lubennikova <a.lubennikova@postgrespro.ru> with
 minor editorialization by me

src/backend/access/gist/gist.c
src/backend/access/gist/gistget.c
src/backend/access/gist/gistscan.c
src/include/access/gist.h
src/include/access/gist_private.h

index 0e499598a428c8859168f8665662049797109f10..4edc5a75f2836c14f79d78155162646bfb5d5cd3 100644 (file)
@@ -36,6 +36,7 @@ static bool gistinserttuples(GISTInsertState *state, GISTInsertStack *stack,
                                 bool unlockbuf, bool unlockleftchild);
 static void gistfinishsplit(GISTInsertState *state, GISTInsertStack *stack,
                                GISTSTATE *giststate, List *splitinfo, bool releasebuf);
+static void gistvacuumpage(Relation rel, Page page, Buffer buffer);
 
 
 #define ROTATEDIST(d) do { \
@@ -209,6 +210,17 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
         * because the tuple vector passed to gistSplit won't include this tuple.
         */
        is_split = gistnospace(page, itup, ntup, oldoffnum, freespace);
+
+       /*
+        * If leaf page is full, try at first to delete dead tuples. And then
+        * check again.
+        */
+       if (is_split && GistPageIsLeaf(page) && GistPageHasGarbage(page))
+       {
+               gistvacuumpage(rel, page, buffer);
+               is_split = gistnospace(page, itup, ntup, oldoffnum, freespace);
+       }
+
        if (is_split)
        {
                /* no space for insertion */
@@ -1440,3 +1452,73 @@ freeGISTstate(GISTSTATE *giststate)
        /* It's sufficient to delete the scanCxt */
        MemoryContextDelete(giststate->scanCxt);
 }
+
+/*
+ * gistvacuumpage() -- try to remove LP_DEAD items from the given page.
+ * Function assumes that buffer is exclusively locked.
+ */
+static void
+gistvacuumpage(Relation rel, Page page, Buffer buffer)
+{
+       OffsetNumber deletable[MaxIndexTuplesPerPage];
+       int                      ndeletable = 0;
+       OffsetNumber offnum, maxoff;
+
+       Assert(GistPageIsLeaf(page));
+
+       /*
+        * Scan over all items to see which ones need to be deleted according to
+        * LP_DEAD flags.
+        */
+       maxoff = PageGetMaxOffsetNumber(page);
+       for (offnum = FirstOffsetNumber;
+                offnum <= maxoff;
+                offnum = OffsetNumberNext(offnum))
+       {
+               ItemId          itemId = PageGetItemId(page, offnum);
+
+               if (ItemIdIsDead(itemId))
+                       deletable[ndeletable++] = offnum;
+       }
+
+       if (ndeletable > 0)
+       {
+               START_CRIT_SECTION();
+
+               PageIndexMultiDelete(page, deletable, ndeletable);
+
+               /*
+                * Mark the page as not containing any LP_DEAD items.  This is not
+                * certainly true (there might be some that have recently been marked,
+                * but weren't included in our target-item list), but it will almost
+                * always be true and it doesn't seem worth an additional page scan to
+                * check it. Remember that F_HAS_GARBAGE is only a hint anyway.
+                */
+               GistClearPageHasGarbage(page);
+
+               MarkBufferDirty(buffer);
+
+               /* XLOG stuff */
+               if (RelationNeedsWAL(rel))
+               {
+                       XLogRecPtr      recptr;
+
+                       recptr = gistXLogUpdate(rel->rd_node, buffer,
+                                                                       deletable, ndeletable,
+                                                                       NULL, 0, InvalidBuffer);
+
+                       PageSetLSN(page, recptr);
+               }
+               else
+                       PageSetLSN(page, gistGetFakeLSN(rel));
+
+               END_CRIT_SECTION();
+       }
+
+       /*
+        * Note: if we didn't find any LP_DEAD items, then the page's
+        * F_HAS_GARBAGE hint bit is falsely set.  We do not bother expending a
+        * separate write to clear it, however.  We will clear it when we split
+        * the page.
+        */
+}
index 20f695cee4ed09be550817a4e480e36cbbf9b920..473ae430dd01a4bba7eca50f2b89d8cde1a070dc 100644 (file)
 #include "utils/memutils.h"
 #include "utils/rel.h"
 
+/*
+ * gistkillitems() -- set LP_DEAD state for items an indexscan caller has
+ * told us were killed.
+ *
+ * We re-read page here, so it's important to check page LSN. If the page
+ * has been modified since the last read (as determined by LSN), we cannot
+ * flag any entries because it is possible that the old entry was vacuumed
+ * away and the TID was re-used by a completely different heap tuple.
+ */
+static void
+gistkillitems(IndexScanDesc scan)
+{
+       GISTScanOpaque  so = (GISTScanOpaque) scan->opaque;
+       Buffer                  buffer;
+       Page                    page;
+       OffsetNumber    offnum;
+       ItemId                  iid;
+       int                             i;
+       bool                    killedsomething = false;
+
+       Assert(so->curBlkno != InvalidBlockNumber);
+       Assert(!XLogRecPtrIsInvalid(so->curPageLSN));
+       Assert(so->killedItems != NULL);
+
+       buffer = ReadBuffer(scan->indexRelation, so->curBlkno);
+       if (!BufferIsValid(buffer))
+               return;
+
+       LockBuffer(buffer, GIST_SHARE);
+       gistcheckpage(scan->indexRelation, buffer);
+       page = BufferGetPage(buffer);
+
+       /*
+        * If page LSN differs it means that the page was modified since the last read.
+        * killedItems could be not valid so LP_DEAD hints applying is not safe.
+        */
+       if(PageGetLSN(page) != so->curPageLSN)
+       {
+               UnlockReleaseBuffer(buffer);
+               so->numKilled = 0; /* reset counter */
+               return;
+       }
+
+       Assert(GistPageIsLeaf(page));
+
+       /*
+        * Mark all killedItems as dead. We need no additional recheck,
+        * because, if page was modified, pageLSN must have changed.
+        */
+       for (i = 0; i < so->numKilled; i++)
+       {
+               offnum = so->killedItems[i];
+               iid = PageGetItemId(page, offnum);
+               ItemIdMarkDead(iid);
+               killedsomething = true;
+       }
+
+       if (killedsomething)
+       {
+               GistMarkPageHasGarbage(page);
+               MarkBufferDirtyHint(buffer, true);
+       }
+
+       UnlockReleaseBuffer(buffer);
+
+       /*
+        * Always reset the scan state, so we don't look for same items on other
+        * pages.
+        */
+       so->numKilled = 0;
+}
 
 /*
  * gistindex_keytest() -- does this index tuple satisfy the scan key(s)?
@@ -305,17 +376,33 @@ gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem, double *myDistances,
        if (so->pageDataCxt)
                MemoryContextReset(so->pageDataCxt);
 
+       /*
+        * We save the LSN of the page as we read it, so that we know whether it
+        * safe to apply LP_DEAD hints to the page later. This allows us to drop
+        * the pin for MVCC scans, which allows vacuum to avoid blocking.
+        */
+       so->curPageLSN = PageGetLSN(page);
+
        /*
         * check all tuples on page
         */
        maxoff = PageGetMaxOffsetNumber(page);
        for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
        {
-               IndexTuple      it = (IndexTuple) PageGetItem(page, PageGetItemId(page, i));
+               ItemId      iid = PageGetItemId(page, i);
+               IndexTuple      it;
                bool            match;
                bool            recheck;
                bool            recheck_distances;
 
+               /*
+                * If the scan specifies not to return killed tuples, then we treat a
+                * killed tuple as not passing the qual.
+                */
+               if(scan->ignore_killed_tuples && ItemIdIsDead(iid))
+                       continue;
+
+               it = (IndexTuple) PageGetItem(page, iid);
                /*
                 * Must call gistindex_keytest in tempCxt, and clean up any leftover
                 * junk afterward.
@@ -348,6 +435,7 @@ gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem, double *myDistances,
                         */
                        so->pageData[so->nPageData].heapPtr = it->t_tid;
                        so->pageData[so->nPageData].recheck = recheck;
+                       so->pageData[so->nPageData].offnum = i;
 
                        /*
                         * In an index-only scan, also fetch the data from the tuple.
@@ -572,7 +660,24 @@ gistgettuple(PG_FUNCTION_ARGS)
                {
                        if (so->curPageData < so->nPageData)
                        {
+                               if (scan->kill_prior_tuple && so->curPageData > 0)
+                               {
+
+                                       if (so->killedItems == NULL)
+                                       {
+                                               MemoryContext oldCxt =
+                                                       MemoryContextSwitchTo(so->giststate->scanCxt);
+
+                                               so->killedItems =
+                                                       (OffsetNumber *) palloc(MaxIndexTuplesPerPage
+                                                               * sizeof(OffsetNumber));
 
+                                               MemoryContextSwitchTo(oldCxt);
+                                       }
+                                       if (so->numKilled < MaxIndexTuplesPerPage)
+                                               so->killedItems[so->numKilled++] =
+                                                       so->pageData[so->curPageData - 1].offnum;
+                               }
                                /* continuing to return tuples from a leaf page */
                                scan->xs_ctup.t_self = so->pageData[so->curPageData].heapPtr;
                                scan->xs_recheck = so->pageData[so->curPageData].recheck;
@@ -586,9 +691,36 @@ gistgettuple(PG_FUNCTION_ARGS)
                                PG_RETURN_BOOL(true);
                        }
 
+                       /*
+                        * Check the last returned tuple and add it to killitems if
+                        * necessary
+                        */
+                       if (scan->kill_prior_tuple
+                               && so->curPageData > 0
+                               && so->curPageData == so->nPageData)
+                       {
+
+                               if (so->killedItems == NULL)
+                               {
+                                       MemoryContext oldCxt =
+                                               MemoryContextSwitchTo(so->giststate->scanCxt);
+
+                                       so->killedItems =
+                                               (OffsetNumber *) palloc(MaxIndexTuplesPerPage
+                                                       * sizeof(OffsetNumber));
+
+                                       MemoryContextSwitchTo(oldCxt);
+                               }
+                               if (so->numKilled < MaxIndexTuplesPerPage)
+                                       so->killedItems[so->numKilled++] =
+                                               so->pageData[so->curPageData - 1].offnum;
+                       }
                        /* find and process the next index page */
                        do
                        {
+                               if ((so->curBlkno != InvalidBlockNumber) && (so->numKilled > 0))
+                                       gistkillitems(scan);
+
                                GISTSearchItem *item = getNextGISTSearchItem(so);
 
                                if (!item)
@@ -596,6 +728,9 @@ gistgettuple(PG_FUNCTION_ARGS)
 
                                CHECK_FOR_INTERRUPTS();
 
+                               /* save current item BlockNumber for next gistkillitems() call */
+                               so->curBlkno = item->blkno;
+
                                /*
                                 * While scanning a leaf page, ItemPointers of matching heap
                                 * tuples are stored in so->pageData.  If there are any on
index ad392948756e6df3e158d26588650b1331a820b5..a17c5bc56460eb0805923a3e40cc10f87c719918 100644 (file)
@@ -93,6 +93,11 @@ gistbeginscan(PG_FUNCTION_ARGS)
                memset(scan->xs_orderbynulls, true, sizeof(bool) * scan->numberOfOrderBys);
        }
 
+       so->killedItems = NULL;         /* until needed */
+       so->numKilled = 0;
+       so->curBlkno = InvalidBlockNumber;
+       so->curPageLSN = InvalidXLogRecPtr;
+
        scan->opaque = so;
 
        /*
index 81e559bc2dd260ce76bb546996fc3d41153e94f4..ea3a3b01f47389de9e4f9f812e511ef378725bae 100644 (file)
  */
 #define F_LEAF                         (1 << 0)        /* leaf page */
 #define F_DELETED                      (1 << 1)        /* the page has been deleted */
-#define F_TUPLES_DELETED       (1 << 2)        /* some tuples on the page are dead */
+#define F_TUPLES_DELETED       (1 << 2)        /* some tuples on the page were
+                                                                                * deleted */
 #define F_FOLLOW_RIGHT         (1 << 3)        /* page to the right has no downlink */
+#define F_HAS_GARBAGE          (1 << 4)        /* some tuples on the page are dead,
+                                                                                * but not deleted yet */
 
 typedef XLogRecPtr GistNSN;
 
@@ -137,6 +140,10 @@ typedef struct GISTENTRY
 #define GistMarkTuplesDeleted(page) ( GistPageGetOpaque(page)->flags |= F_TUPLES_DELETED)
 #define GistClearTuplesDeleted(page)   ( GistPageGetOpaque(page)->flags &= ~F_TUPLES_DELETED)
 
+#define GistPageHasGarbage(page) ( GistPageGetOpaque(page)->flags & F_HAS_GARBAGE)
+#define GistMarkPageHasGarbage(page) ( GistPageGetOpaque(page)->flags |= F_HAS_GARBAGE)
+#define GistClearPageHasGarbage(page)  ( GistPageGetOpaque(page)->flags &= ~F_HAS_GARBAGE)
+
 #define GistFollowRight(page) ( GistPageGetOpaque(page)->flags & F_FOLLOW_RIGHT)
 #define GistMarkFollowRight(page) ( GistPageGetOpaque(page)->flags |= F_FOLLOW_RIGHT)
 #define GistClearFollowRight(page)     ( GistPageGetOpaque(page)->flags &= ~F_FOLLOW_RIGHT)
index 4f1a5c33eae20abbbfea94628533734dd487947d..1a77982391eed614eb627f748773a9b3a1916e61 100644 (file)
@@ -22,6 +22,7 @@
 #include "storage/bufmgr.h"
 #include "storage/buffile.h"
 #include "utils/hsearch.h"
+#include "access/genam.h"
 
 /*
  * Maximum number of "halves" a page can be split into in one operation.
@@ -121,9 +122,11 @@ typedef struct GISTSearchHeapItem
 {
        ItemPointerData heapPtr;
        bool            recheck;                /* T if quals must be rechecked */
-       bool            recheckDistances;               /* T if distances must be rechecked */
+       bool            recheckDistances;       /* T if distances must be rechecked */
        IndexTuple      ftup;                   /* data fetched back from the index, used in
                                                                 * index-only scans */
+       OffsetNumber    offnum;         /* track offset in page to mark tuple as
+                                                                * LP_DEAD */
 } GISTSearchHeapItem;
 
 /* Unvisited item, either index page or heap tuple */
@@ -161,6 +164,12 @@ typedef struct GISTScanOpaqueData
        /* pre-allocated workspace arrays */
        double     *distances;          /* output area for gistindex_keytest */
 
+       /* info about killed items if any (killedItems is NULL if never used) */
+       OffsetNumber *killedItems;              /* offset numbers of killed items */
+       int                     numKilled;              /* number of currently stored items */
+       BlockNumber curBlkno;           /* current number of block */
+       GistNSN         curPageLSN;     /* pos in the WAL stream when page was read */
+
        /* In a non-ordered search, returnable heap items are stored here: */
        GISTSearchHeapItem pageData[BLCKSZ / sizeof(IndexTupleData)];
        OffsetNumber nPageData;         /* number of valid items in array */