]> granicus.if.org Git - postgresql/commitdiff
Fix using too many LWLocks bug, reported by Craig Ringer
authorTeodor Sigaev <teodor@sigaev.ru>
Tue, 22 Apr 2008 17:52:43 +0000 (17:52 +0000)
committerTeodor Sigaev <teodor@sigaev.ru>
Tue, 22 Apr 2008 17:52:43 +0000 (17:52 +0000)
<craig@postnewspapers.com.au>.
It was my mistake, I missed limitation of number of held locks, now GIN doesn't
use continiuous locks, but still hold buffers pinned to prevent interference
with vacuum's deletion algorithm.

Backpatch is needed.

src/backend/access/gin/ginget.c
src/include/access/gin.h

index 26abaa76afaaf621cef2be495d97b46a2e0c6ec8..b95fd7eed91c1d956184b840352758c0e64d8e4b 100644 (file)
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *                     $PostgreSQL: pgsql/src/backend/access/gin/ginget.c,v 1.13 2008/04/14 17:05:33 tgl Exp $
+ *                     $PostgreSQL: pgsql/src/backend/access/gin/ginget.c,v 1.14 2008/04/22 17:52:43 teodor Exp $
  *-------------------------------------------------------------------------
  */
 
@@ -29,58 +29,16 @@ findItemInPage(Page page, ItemPointer item, OffsetNumber *off)
                /* page was deleted by concurrent  vacuum */
                return false;
 
-       if (*off > maxoff || *off == InvalidOffsetNumber)
-               res = -1;
-       else
-               res = compareItemPointers(item, (ItemPointer) GinDataPageGetItem(page, *off));
-
-       if (res == 0)
-       {
-               /* page isn't changed */
-               return true;
-       }
-       else if (res > 0)
-       {
-               /*
-                * some items was added before our position, look further to find it
-                * or first greater
-                */
-
-               (*off)++;
-               for (; *off <= maxoff; (*off)++)
-               {
-                       res = compareItemPointers(item, (ItemPointer) GinDataPageGetItem(page, *off));
-
-                       if (res == 0)
-                               return true;
+       /*
+        * scan page to find equal or first greater value
+        */
 
-                       if (res < 0)
-                       {
-                               (*off)--;
-                               return true;
-                       }
-               }
-       }
-       else
+       for (*off = FirstOffsetNumber; *off <= maxoff; (*off)++)
        {
-               /*
-                * some items was deleted before our position, look from begining to
-                * find it or first greater
-                */
-
-               for (*off = FirstOffsetNumber; *off <= maxoff; (*off)++)
-               {
-                       res = compareItemPointers(item, (ItemPointer) GinDataPageGetItem(page, *off));
-
-                       if (res == 0)
-                               return true;
+               res = compareItemPointers(item, (ItemPointer) GinDataPageGetItem(page, *off));
 
-                       if (res < 0)
-                       {
-                               (*off)--;
-                               return true;
-                       }
-               }
+               if (res <= 0)
+                       return true;
        }
 
        return false;
@@ -91,119 +49,87 @@ findItemInPage(Page page, ItemPointer item, OffsetNumber *off)
  * Stop* functions unlock buffer (but don't release!)
  */
 static void
-startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry, bool firstCall)
+startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry)
 {
+       GinBtreeData    btreeEntry;
+       GinBtreeStack  *stackEntry;
+       Page                    page;
+       bool                    needUnlock = TRUE;
+
        if (entry->master != NULL)
        {
                entry->isFinished = entry->master->isFinished;
                return;
        }
 
-       if (firstCall)
-       {
-               /*
-                * at first call we should find entry, and begin scan of posting tree
-                * or just store posting list in memory
-                */
-               GinBtreeData btreeEntry;
-               GinBtreeStack *stackEntry;
-               Page            page;
-               bool            needUnlock = TRUE;
-
-               prepareEntryScan(&btreeEntry, index, entry->entry, ginstate);
-               btreeEntry.searchMode = TRUE;
-               stackEntry = ginFindLeafPage(&btreeEntry, NULL);
-               page = BufferGetPage(stackEntry->buffer);
-
-               entry->isFinished = TRUE;
-               entry->buffer = InvalidBuffer;
-               entry->offset = InvalidOffsetNumber;
-               entry->list = NULL;
-               entry->nlist = 0;
-               entry->reduceResult = FALSE;
-               entry->predictNumberResult = 0;
-
-               if (btreeEntry.findItem(&btreeEntry, stackEntry))
-               {
-                       IndexTuple      itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stackEntry->off));
-
-                       if (GinIsPostingTree(itup))
-                       {
-                               BlockNumber rootPostingTree = GinGetPostingTree(itup);
-                               GinPostingTreeScan *gdi;
-                               Page            page;
+       /*
+        * we should find entry, and begin scan of posting tree
+        * or just store posting list in memory
+        */
 
-                               LockBuffer(stackEntry->buffer, GIN_UNLOCK);
-                               needUnlock = FALSE;
-                               gdi = prepareScanPostingTree(index, rootPostingTree, TRUE);
+       prepareEntryScan(&btreeEntry, index, entry->entry, ginstate);
+       btreeEntry.searchMode = TRUE;
+       stackEntry = ginFindLeafPage(&btreeEntry, NULL);
+       page = BufferGetPage(stackEntry->buffer);
 
-                               entry->buffer = scanBeginPostingTree(gdi);
-                               IncrBufferRefCount(entry->buffer);
+       entry->isFinished = TRUE;
+       entry->buffer = InvalidBuffer;
+       entry->offset = InvalidOffsetNumber;
+       entry->list = NULL;
+       entry->nlist = 0;
+       entry->reduceResult = FALSE;
+       entry->predictNumberResult = 0;
 
-                               page = BufferGetPage(entry->buffer);
-                               entry->predictNumberResult = gdi->stack->predictNumber * GinPageGetOpaque(page)->maxoff;
+       if (btreeEntry.findItem(&btreeEntry, stackEntry))
+       {
+               IndexTuple      itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stackEntry->off));
 
-                               freeGinBtreeStack(gdi->stack);
-                               pfree(gdi);
-                               entry->isFinished = FALSE;
-                       }
-                       else if (GinGetNPosting(itup) > 0)
-                       {
-                               entry->nlist = GinGetNPosting(itup);
-                               entry->list = (ItemPointerData *) palloc(sizeof(ItemPointerData) * entry->nlist);
-                               memcpy(entry->list, GinGetPosting(itup), sizeof(ItemPointerData) * entry->nlist);
-                               entry->isFinished = FALSE;
-                       }
-               }
+               if (GinIsPostingTree(itup))
+               {
+                       BlockNumber rootPostingTree = GinGetPostingTree(itup);
+                       GinPostingTreeScan *gdi;
+                       Page            page;
 
-               if (needUnlock)
                        LockBuffer(stackEntry->buffer, GIN_UNLOCK);
-               freeGinBtreeStack(stackEntry);
-       }
-       else if (entry->buffer != InvalidBuffer)
-       {
-               /* we should find place where we was stopped */
-               BlockNumber blkno;
-               Page            page;
+                       needUnlock = FALSE;
+                       gdi = prepareScanPostingTree(index, rootPostingTree, TRUE);
 
-               LockBuffer(entry->buffer, GIN_SHARE);
-
-               if (!ItemPointerIsValid(&entry->curItem))
-                       /* start position */
-                       return;
-               Assert(entry->offset != InvalidOffsetNumber);
+                       entry->buffer = scanBeginPostingTree(gdi);
+                       /*
+                        * We keep buffer pinned because we need to prevent deletition
+                        * page during scan. See GIN's vacuum implementation. RefCount
+                        * is increased to keep buffer pinned after freeGinBtreeStack() call.
+                        */
+                       IncrBufferRefCount(entry->buffer);
 
-               page = BufferGetPage(entry->buffer);
+                       page = BufferGetPage(entry->buffer);
+                       entry->predictNumberResult = gdi->stack->predictNumber * GinPageGetOpaque(page)->maxoff;
 
-               /* try to find curItem in current buffer */
-               if (findItemInPage(page, &entry->curItem, &entry->offset))
-                       return;
+                       /*
+                        * Keep page content in memory to prevent durable page locking
+                        */
+                       entry->list = (ItemPointerData *) palloc( BLCKSZ );
+                       entry->nlist = GinPageGetOpaque(page)->maxoff;
+                       memcpy( entry->list, GinDataPageGetItem(page, FirstOffsetNumber), 
+                                               GinPageGetOpaque(page)->maxoff * sizeof(ItemPointerData) );
 
-               /* walk to right */
-               while ((blkno = GinPageGetOpaque(page)->rightlink) != InvalidBlockNumber)
-               {
                        LockBuffer(entry->buffer, GIN_UNLOCK);
-                       entry->buffer = ReleaseAndReadBuffer(entry->buffer, index, blkno);
-                       LockBuffer(entry->buffer, GIN_SHARE);
-                       page = BufferGetPage(entry->buffer);
-
-                       entry->offset = InvalidOffsetNumber;
-                       if (findItemInPage(page, &entry->curItem, &entry->offset))
-                               return;
+                       freeGinBtreeStack(gdi->stack);
+                       pfree(gdi);
+                       entry->isFinished = FALSE;
+               }
+               else if (GinGetNPosting(itup) > 0)
+               {
+                       entry->nlist = GinGetNPosting(itup);
+                       entry->list = (ItemPointerData *) palloc(sizeof(ItemPointerData) * entry->nlist);
+                       memcpy(entry->list, GinGetPosting(itup), sizeof(ItemPointerData) * entry->nlist);
+                       entry->isFinished = FALSE;
                }
-
-               /*
-                * curItem and any greated items was deleted by concurrent vacuum, so
-                * we finished scan with currrent entry
-                */
        }
-}
 
-static void
-stopScanEntry(GinScanEntry entry)
-{
-       if (entry->buffer != InvalidBuffer)
-               LockBuffer(entry->buffer, GIN_UNLOCK);
+       if (needUnlock)
+                       LockBuffer(stackEntry->buffer, GIN_UNLOCK);
+       freeGinBtreeStack(stackEntry);
 }
 
 static void
@@ -211,47 +137,38 @@ startScanKey(Relation index, GinState *ginstate, GinScanKey key)
 {
        uint32          i;
 
+       if (!key->firstCall)
+               return;
+
        for (i = 0; i < key->nentries; i++)
-               startScanEntry(index, ginstate, key->scanEntry + i, key->firstCall);
+               startScanEntry(index, ginstate, key->scanEntry + i);
 
-       if (key->firstCall)
-       {
-               memset(key->entryRes, TRUE, sizeof(bool) * key->nentries);
-               key->isFinished = FALSE;
-               key->firstCall = FALSE;
+       memset(key->entryRes, TRUE, sizeof(bool) * key->nentries);
+       key->isFinished = FALSE;
+       key->firstCall = FALSE;
 
-               if (GinFuzzySearchLimit > 0)
-               {
-                       /*
-                        * If all of keys more than threshold we will try to reduce
-                        * result, we hope (and only hope, for intersection operation of
-                        * array our supposition isn't true), that total result will not
-                        * more than minimal predictNumberResult.
-                        */
+       if (GinFuzzySearchLimit > 0)
+       {
+               /*
+                * If all of keys more than threshold we will try to reduce
+                * result, we hope (and only hope, for intersection operation of
+                * array our supposition isn't true), that total result will not
+                * more than minimal predictNumberResult.
+                */
 
-                       for (i = 0; i < key->nentries; i++)
-                               if (key->scanEntry[i].predictNumberResult <= key->nentries * GinFuzzySearchLimit)
-                                       return;
+               for (i = 0; i < key->nentries; i++)
+                       if (key->scanEntry[i].predictNumberResult <= key->nentries * GinFuzzySearchLimit)
+                               return;
 
-                       for (i = 0; i < key->nentries; i++)
-                               if (key->scanEntry[i].predictNumberResult > key->nentries * GinFuzzySearchLimit)
-                               {
-                                       key->scanEntry[i].predictNumberResult /= key->nentries;
-                                       key->scanEntry[i].reduceResult = TRUE;
-                               }
-               }
+               for (i = 0; i < key->nentries; i++)
+                       if (key->scanEntry[i].predictNumberResult > key->nentries * GinFuzzySearchLimit)
+                       {
+                               key->scanEntry[i].predictNumberResult /= key->nentries;
+                               key->scanEntry[i].reduceResult = TRUE;
+                       }
        }
 }
 
-static void
-stopScanKey(GinScanKey key)
-{
-       uint32          i;
-
-       for (i = 0; i < key->nentries; i++)
-               stopScanEntry(key->scanEntry + i);
-}
-
 static void
 startScan(IndexScanDesc scan)
 {
@@ -262,44 +179,82 @@ startScan(IndexScanDesc scan)
                startScanKey(scan->indexRelation, &so->ginstate, so->keys + i);
 }
 
-static void
-stopScan(IndexScanDesc scan)
-{
-       uint32          i;
-       GinScanOpaque so = (GinScanOpaque) scan->opaque;
-
-       for (i = 0; i < so->nkeys; i++)
-               stopScanKey(so->keys + i);
-}
-
-
+/*
+ * Gets next ItemPointer from PostingTree. Note, that we copy
+ * page into GinScanEntry->list array and unlock page, but keep it pinned
+ * to prevent interference with vacuum
+ */
 static void
 entryGetNextItem(Relation index, GinScanEntry entry)
 {
-       Page            page = BufferGetPage(entry->buffer);
+       Page            page;
+       BlockNumber blkno;
 
-       entry->offset++;
-       if (entry->offset <= GinPageGetOpaque(page)->maxoff && GinPageGetOpaque(page)->maxoff >= FirstOffsetNumber)
+       for(;;)
        {
-               entry->curItem = *(ItemPointerData *) GinDataPageGetItem(page, entry->offset);
-       }
-       else
-       {
-               BlockNumber blkno = GinPageGetOpaque(page)->rightlink;
+               entry->offset++;
 
-               LockBuffer(entry->buffer, GIN_UNLOCK);
-               if (blkno == InvalidBlockNumber)
+               if (entry->offset <= entry->nlist)
                {
-                       ReleaseBuffer(entry->buffer);
-                       entry->buffer = InvalidBuffer;
-                       entry->isFinished = TRUE;
+                       entry->curItem = entry->list[entry->offset - 1];
+                       return;
                }
-               else
+
+               LockBuffer(entry->buffer, GIN_SHARE);
+               page = BufferGetPage(entry->buffer);
+               for(;;)
                {
+                       /*
+                        * It's needed to go by right link. During that we should refind
+                        * first ItemPointer greater that stored
+                        */
+
+                       blkno = GinPageGetOpaque(page)->rightlink;
+
+                       LockBuffer(entry->buffer, GIN_UNLOCK);
+                       if (blkno == InvalidBlockNumber)
+                       {
+                               ReleaseBuffer(entry->buffer);
+                               ItemPointerSet(&entry->curItem, InvalidBlockNumber, InvalidOffsetNumber);
+                               entry->buffer = InvalidBuffer;
+                               entry->isFinished = TRUE;
+                               return;
+                       }
+
                        entry->buffer = ReleaseAndReadBuffer(entry->buffer, index, blkno);
                        LockBuffer(entry->buffer, GIN_SHARE);
+                       page = BufferGetPage(entry->buffer);
+
                        entry->offset = InvalidOffsetNumber;
-                       entryGetNextItem(index, entry);
+                       if (!ItemPointerIsValid(&entry->curItem) || findItemInPage(page, &entry->curItem, &entry->offset))
+                       {
+                               /*
+                                * Found position equal to or greater than stored
+                                */
+                               entry->nlist = GinPageGetOpaque(page)->maxoff;
+                               memcpy( entry->list, GinDataPageGetItem(page, FirstOffsetNumber), 
+                                                       GinPageGetOpaque(page)->maxoff * sizeof(ItemPointerData) );
+
+                               LockBuffer(entry->buffer, GIN_UNLOCK);
+
+                               if ( !ItemPointerIsValid(&entry->curItem) || 
+                                        compareItemPointers( &entry->curItem, entry->list + entry->offset - 1 ) == 0 )
+                               {
+                                       /*
+                                        * First pages are deleted or empty, or we found exact position,
+                                        * so break inner loop and continue outer one.
+                                        */
+
+                                        break;
+                               }
+                       
+                               /*
+                                * Find greater than entry->curItem position, store it.
+                                */
+                               entry->curItem = entry->list[entry->offset - 1];
+
+                               return;
+                       }
                }
        }
 }
@@ -319,7 +274,7 @@ entryGetItem(Relation index, GinScanEntry entry)
                entry->isFinished = entry->master->isFinished;
                entry->curItem = entry->master->curItem;
        }
-       else if (entry->list)
+       else if (!BufferIsValid(entry->buffer))
        {
                entry->offset++;
                if (entry->offset <= entry->nlist)
@@ -527,8 +482,6 @@ gingetbitmap(PG_FUNCTION_ARGS)
                ntids++;
        }
 
-       stopScan(scan);
-
        PG_RETURN_INT64(ntids);
 }
 
@@ -550,7 +503,6 @@ gingettuple(PG_FUNCTION_ARGS)
 
        startScan(scan);
        res = scanGetItem(scan, &scan->xs_ctup.t_self, &scan->xs_recheck);
-       stopScan(scan);
 
        PG_RETURN_BOOL(res);
 }
index 7a343c62c491a9d6230d882a0131838a41845990..614e8566b903af8686a01aacb47a96c13e2d0479 100644 (file)
@@ -4,7 +4,7 @@
  *
  *     Copyright (c) 2006-2008, PostgreSQL Global Development Group
  *
- *     $PostgreSQL: pgsql/src/include/access/gin.h,v 1.17 2008/04/10 22:25:25 tgl Exp $
+ *     $PostgreSQL: pgsql/src/include/access/gin.h,v 1.18 2008/04/22 17:52:43 teodor Exp $
  *--------------------------------------------------------------------------
  */
 
@@ -356,14 +356,16 @@ typedef struct GinScanEntryData
        /* entry, got from extractQueryFn */
        Datum           entry;
 
-       /* current ItemPointer to heap, its offset in buffer and buffer */
-       ItemPointerData curItem;
-       OffsetNumber offset;
+       /* Current page in posting tree */
        Buffer          buffer;
 
-       /* in case of Posing list */
+       /* current ItemPointer to heap */
+       ItemPointerData curItem;
+
+       /* used for Posting list and one page in Posting tree */
        ItemPointerData *list;
-       uint32          nlist;
+       uint32                   nlist;
+       OffsetNumber     offset;
 
        bool            isFinished;
        bool            reduceResult;