From cf23b75b4dce75151df7164ed72263e66b758ae9 Mon Sep 17 00:00:00 2001 From: Teodor Sigaev Date: Tue, 22 Apr 2008 17:52:43 +0000 Subject: [PATCH] Fix using too many LWLocks bug, reported by Craig Ringer . It was my mistake, I missed limitation of number of held locks, now GIN doesn't use continiuous locks, but still hold buffers pinned to prevent interference with vacuum's deletion algorithm. Backpatch is needed. --- src/backend/access/gin/ginget.c | 362 ++++++++++++++------------------ src/include/access/gin.h | 14 +- 2 files changed, 165 insertions(+), 211 deletions(-) diff --git a/src/backend/access/gin/ginget.c b/src/backend/access/gin/ginget.c index 26abaa76af..b95fd7eed9 100644 --- a/src/backend/access/gin/ginget.c +++ b/src/backend/access/gin/ginget.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/gin/ginget.c,v 1.13 2008/04/14 17:05:33 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/gin/ginget.c,v 1.14 2008/04/22 17:52:43 teodor Exp $ *------------------------------------------------------------------------- */ @@ -29,58 +29,16 @@ findItemInPage(Page page, ItemPointer item, OffsetNumber *off) /* page was deleted by concurrent vacuum */ return false; - if (*off > maxoff || *off == InvalidOffsetNumber) - res = -1; - else - res = compareItemPointers(item, (ItemPointer) GinDataPageGetItem(page, *off)); - - if (res == 0) - { - /* page isn't changed */ - return true; - } - else if (res > 0) - { - /* - * some items was added before our position, look further to find it - * or first greater - */ - - (*off)++; - for (; *off <= maxoff; (*off)++) - { - res = compareItemPointers(item, (ItemPointer) GinDataPageGetItem(page, *off)); - - if (res == 0) - return true; + /* + * scan page to find equal or first greater value + */ - if (res < 0) - { - (*off)--; - return true; - } - } - } - else + for (*off = FirstOffsetNumber; *off <= maxoff; (*off)++) { - /* - * some items was deleted before our position, look from begining to - * find it or first greater - */ - - for (*off = FirstOffsetNumber; *off <= maxoff; (*off)++) - { - res = compareItemPointers(item, (ItemPointer) GinDataPageGetItem(page, *off)); - - if (res == 0) - return true; + res = compareItemPointers(item, (ItemPointer) GinDataPageGetItem(page, *off)); - if (res < 0) - { - (*off)--; - return true; - } - } + if (res <= 0) + return true; } return false; @@ -91,119 +49,87 @@ findItemInPage(Page page, ItemPointer item, OffsetNumber *off) * Stop* functions unlock buffer (but don't release!) */ static void -startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry, bool firstCall) +startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry) { + GinBtreeData btreeEntry; + GinBtreeStack *stackEntry; + Page page; + bool needUnlock = TRUE; + if (entry->master != NULL) { entry->isFinished = entry->master->isFinished; return; } - if (firstCall) - { - /* - * at first call we should find entry, and begin scan of posting tree - * or just store posting list in memory - */ - GinBtreeData btreeEntry; - GinBtreeStack *stackEntry; - Page page; - bool needUnlock = TRUE; - - prepareEntryScan(&btreeEntry, index, entry->entry, ginstate); - btreeEntry.searchMode = TRUE; - stackEntry = ginFindLeafPage(&btreeEntry, NULL); - page = BufferGetPage(stackEntry->buffer); - - entry->isFinished = TRUE; - entry->buffer = InvalidBuffer; - entry->offset = InvalidOffsetNumber; - entry->list = NULL; - entry->nlist = 0; - entry->reduceResult = FALSE; - entry->predictNumberResult = 0; - - if (btreeEntry.findItem(&btreeEntry, stackEntry)) - { - IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stackEntry->off)); - - if (GinIsPostingTree(itup)) - { - BlockNumber rootPostingTree = GinGetPostingTree(itup); - GinPostingTreeScan *gdi; - Page page; + /* + * we should find entry, and begin scan of posting tree + * or just store posting list in memory + */ - LockBuffer(stackEntry->buffer, GIN_UNLOCK); - needUnlock = FALSE; - gdi = prepareScanPostingTree(index, rootPostingTree, TRUE); + prepareEntryScan(&btreeEntry, index, entry->entry, ginstate); + btreeEntry.searchMode = TRUE; + stackEntry = ginFindLeafPage(&btreeEntry, NULL); + page = BufferGetPage(stackEntry->buffer); - entry->buffer = scanBeginPostingTree(gdi); - IncrBufferRefCount(entry->buffer); + entry->isFinished = TRUE; + entry->buffer = InvalidBuffer; + entry->offset = InvalidOffsetNumber; + entry->list = NULL; + entry->nlist = 0; + entry->reduceResult = FALSE; + entry->predictNumberResult = 0; - page = BufferGetPage(entry->buffer); - entry->predictNumberResult = gdi->stack->predictNumber * GinPageGetOpaque(page)->maxoff; + if (btreeEntry.findItem(&btreeEntry, stackEntry)) + { + IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stackEntry->off)); - freeGinBtreeStack(gdi->stack); - pfree(gdi); - entry->isFinished = FALSE; - } - else if (GinGetNPosting(itup) > 0) - { - entry->nlist = GinGetNPosting(itup); - entry->list = (ItemPointerData *) palloc(sizeof(ItemPointerData) * entry->nlist); - memcpy(entry->list, GinGetPosting(itup), sizeof(ItemPointerData) * entry->nlist); - entry->isFinished = FALSE; - } - } + if (GinIsPostingTree(itup)) + { + BlockNumber rootPostingTree = GinGetPostingTree(itup); + GinPostingTreeScan *gdi; + Page page; - if (needUnlock) LockBuffer(stackEntry->buffer, GIN_UNLOCK); - freeGinBtreeStack(stackEntry); - } - else if (entry->buffer != InvalidBuffer) - { - /* we should find place where we was stopped */ - BlockNumber blkno; - Page page; + needUnlock = FALSE; + gdi = prepareScanPostingTree(index, rootPostingTree, TRUE); - LockBuffer(entry->buffer, GIN_SHARE); - - if (!ItemPointerIsValid(&entry->curItem)) - /* start position */ - return; - Assert(entry->offset != InvalidOffsetNumber); + entry->buffer = scanBeginPostingTree(gdi); + /* + * We keep buffer pinned because we need to prevent deletition + * page during scan. See GIN's vacuum implementation. RefCount + * is increased to keep buffer pinned after freeGinBtreeStack() call. + */ + IncrBufferRefCount(entry->buffer); - page = BufferGetPage(entry->buffer); + page = BufferGetPage(entry->buffer); + entry->predictNumberResult = gdi->stack->predictNumber * GinPageGetOpaque(page)->maxoff; - /* try to find curItem in current buffer */ - if (findItemInPage(page, &entry->curItem, &entry->offset)) - return; + /* + * Keep page content in memory to prevent durable page locking + */ + entry->list = (ItemPointerData *) palloc( BLCKSZ ); + entry->nlist = GinPageGetOpaque(page)->maxoff; + memcpy( entry->list, GinDataPageGetItem(page, FirstOffsetNumber), + GinPageGetOpaque(page)->maxoff * sizeof(ItemPointerData) ); - /* walk to right */ - while ((blkno = GinPageGetOpaque(page)->rightlink) != InvalidBlockNumber) - { LockBuffer(entry->buffer, GIN_UNLOCK); - entry->buffer = ReleaseAndReadBuffer(entry->buffer, index, blkno); - LockBuffer(entry->buffer, GIN_SHARE); - page = BufferGetPage(entry->buffer); - - entry->offset = InvalidOffsetNumber; - if (findItemInPage(page, &entry->curItem, &entry->offset)) - return; + freeGinBtreeStack(gdi->stack); + pfree(gdi); + entry->isFinished = FALSE; + } + else if (GinGetNPosting(itup) > 0) + { + entry->nlist = GinGetNPosting(itup); + entry->list = (ItemPointerData *) palloc(sizeof(ItemPointerData) * entry->nlist); + memcpy(entry->list, GinGetPosting(itup), sizeof(ItemPointerData) * entry->nlist); + entry->isFinished = FALSE; } - - /* - * curItem and any greated items was deleted by concurrent vacuum, so - * we finished scan with currrent entry - */ } -} -static void -stopScanEntry(GinScanEntry entry) -{ - if (entry->buffer != InvalidBuffer) - LockBuffer(entry->buffer, GIN_UNLOCK); + if (needUnlock) + LockBuffer(stackEntry->buffer, GIN_UNLOCK); + freeGinBtreeStack(stackEntry); } static void @@ -211,47 +137,38 @@ startScanKey(Relation index, GinState *ginstate, GinScanKey key) { uint32 i; + if (!key->firstCall) + return; + for (i = 0; i < key->nentries; i++) - startScanEntry(index, ginstate, key->scanEntry + i, key->firstCall); + startScanEntry(index, ginstate, key->scanEntry + i); - if (key->firstCall) - { - memset(key->entryRes, TRUE, sizeof(bool) * key->nentries); - key->isFinished = FALSE; - key->firstCall = FALSE; + memset(key->entryRes, TRUE, sizeof(bool) * key->nentries); + key->isFinished = FALSE; + key->firstCall = FALSE; - if (GinFuzzySearchLimit > 0) - { - /* - * If all of keys more than threshold we will try to reduce - * result, we hope (and only hope, for intersection operation of - * array our supposition isn't true), that total result will not - * more than minimal predictNumberResult. - */ + if (GinFuzzySearchLimit > 0) + { + /* + * If all of keys more than threshold we will try to reduce + * result, we hope (and only hope, for intersection operation of + * array our supposition isn't true), that total result will not + * more than minimal predictNumberResult. + */ - for (i = 0; i < key->nentries; i++) - if (key->scanEntry[i].predictNumberResult <= key->nentries * GinFuzzySearchLimit) - return; + for (i = 0; i < key->nentries; i++) + if (key->scanEntry[i].predictNumberResult <= key->nentries * GinFuzzySearchLimit) + return; - for (i = 0; i < key->nentries; i++) - if (key->scanEntry[i].predictNumberResult > key->nentries * GinFuzzySearchLimit) - { - key->scanEntry[i].predictNumberResult /= key->nentries; - key->scanEntry[i].reduceResult = TRUE; - } - } + for (i = 0; i < key->nentries; i++) + if (key->scanEntry[i].predictNumberResult > key->nentries * GinFuzzySearchLimit) + { + key->scanEntry[i].predictNumberResult /= key->nentries; + key->scanEntry[i].reduceResult = TRUE; + } } } -static void -stopScanKey(GinScanKey key) -{ - uint32 i; - - for (i = 0; i < key->nentries; i++) - stopScanEntry(key->scanEntry + i); -} - static void startScan(IndexScanDesc scan) { @@ -262,44 +179,82 @@ startScan(IndexScanDesc scan) startScanKey(scan->indexRelation, &so->ginstate, so->keys + i); } -static void -stopScan(IndexScanDesc scan) -{ - uint32 i; - GinScanOpaque so = (GinScanOpaque) scan->opaque; - - for (i = 0; i < so->nkeys; i++) - stopScanKey(so->keys + i); -} - - +/* + * Gets next ItemPointer from PostingTree. Note, that we copy + * page into GinScanEntry->list array and unlock page, but keep it pinned + * to prevent interference with vacuum + */ static void entryGetNextItem(Relation index, GinScanEntry entry) { - Page page = BufferGetPage(entry->buffer); + Page page; + BlockNumber blkno; - entry->offset++; - if (entry->offset <= GinPageGetOpaque(page)->maxoff && GinPageGetOpaque(page)->maxoff >= FirstOffsetNumber) + for(;;) { - entry->curItem = *(ItemPointerData *) GinDataPageGetItem(page, entry->offset); - } - else - { - BlockNumber blkno = GinPageGetOpaque(page)->rightlink; + entry->offset++; - LockBuffer(entry->buffer, GIN_UNLOCK); - if (blkno == InvalidBlockNumber) + if (entry->offset <= entry->nlist) { - ReleaseBuffer(entry->buffer); - entry->buffer = InvalidBuffer; - entry->isFinished = TRUE; + entry->curItem = entry->list[entry->offset - 1]; + return; } - else + + LockBuffer(entry->buffer, GIN_SHARE); + page = BufferGetPage(entry->buffer); + for(;;) { + /* + * It's needed to go by right link. During that we should refind + * first ItemPointer greater that stored + */ + + blkno = GinPageGetOpaque(page)->rightlink; + + LockBuffer(entry->buffer, GIN_UNLOCK); + if (blkno == InvalidBlockNumber) + { + ReleaseBuffer(entry->buffer); + ItemPointerSet(&entry->curItem, InvalidBlockNumber, InvalidOffsetNumber); + entry->buffer = InvalidBuffer; + entry->isFinished = TRUE; + return; + } + entry->buffer = ReleaseAndReadBuffer(entry->buffer, index, blkno); LockBuffer(entry->buffer, GIN_SHARE); + page = BufferGetPage(entry->buffer); + entry->offset = InvalidOffsetNumber; - entryGetNextItem(index, entry); + if (!ItemPointerIsValid(&entry->curItem) || findItemInPage(page, &entry->curItem, &entry->offset)) + { + /* + * Found position equal to or greater than stored + */ + entry->nlist = GinPageGetOpaque(page)->maxoff; + memcpy( entry->list, GinDataPageGetItem(page, FirstOffsetNumber), + GinPageGetOpaque(page)->maxoff * sizeof(ItemPointerData) ); + + LockBuffer(entry->buffer, GIN_UNLOCK); + + if ( !ItemPointerIsValid(&entry->curItem) || + compareItemPointers( &entry->curItem, entry->list + entry->offset - 1 ) == 0 ) + { + /* + * First pages are deleted or empty, or we found exact position, + * so break inner loop and continue outer one. + */ + + break; + } + + /* + * Find greater than entry->curItem position, store it. + */ + entry->curItem = entry->list[entry->offset - 1]; + + return; + } } } } @@ -319,7 +274,7 @@ entryGetItem(Relation index, GinScanEntry entry) entry->isFinished = entry->master->isFinished; entry->curItem = entry->master->curItem; } - else if (entry->list) + else if (!BufferIsValid(entry->buffer)) { entry->offset++; if (entry->offset <= entry->nlist) @@ -527,8 +482,6 @@ gingetbitmap(PG_FUNCTION_ARGS) ntids++; } - stopScan(scan); - PG_RETURN_INT64(ntids); } @@ -550,7 +503,6 @@ gingettuple(PG_FUNCTION_ARGS) startScan(scan); res = scanGetItem(scan, &scan->xs_ctup.t_self, &scan->xs_recheck); - stopScan(scan); PG_RETURN_BOOL(res); } diff --git a/src/include/access/gin.h b/src/include/access/gin.h index 7a343c62c4..614e8566b9 100644 --- a/src/include/access/gin.h +++ b/src/include/access/gin.h @@ -4,7 +4,7 @@ * * Copyright (c) 2006-2008, PostgreSQL Global Development Group * - * $PostgreSQL: pgsql/src/include/access/gin.h,v 1.17 2008/04/10 22:25:25 tgl Exp $ + * $PostgreSQL: pgsql/src/include/access/gin.h,v 1.18 2008/04/22 17:52:43 teodor Exp $ *-------------------------------------------------------------------------- */ @@ -356,14 +356,16 @@ typedef struct GinScanEntryData /* entry, got from extractQueryFn */ Datum entry; - /* current ItemPointer to heap, its offset in buffer and buffer */ - ItemPointerData curItem; - OffsetNumber offset; + /* Current page in posting tree */ Buffer buffer; - /* in case of Posing list */ + /* current ItemPointer to heap */ + ItemPointerData curItem; + + /* used for Posting list and one page in Posting tree */ ItemPointerData *list; - uint32 nlist; + uint32 nlist; + OffsetNumber offset; bool isFinished; bool reduceResult; -- 2.40.0