OBJS = ginutil.o gininsert.o ginxlog.o ginentrypage.o gindatapage.o \
ginbtree.o ginscan.o ginget.o ginvacuum.o ginarrayproc.o \
- ginbulk.o ginfast.o
+ ginbulk.o ginfast.o ginpostinglist.o
include $(top_srcdir)/src/backend/common.mk
comes mainly from not having to do multiple searches/insertions when the
same key appears in multiple new heap tuples.)
-Key entries are nominally of the same IndexEntry format as used in other
+Key entries are nominally of the same IndexTuple format as used in other
index types, but since a leaf key entry typically refers to multiple heap
tuples, there are significant differences. (See GinFormTuple, which works
by building a "normal" index tuple and then modifying it.) The points to
* Insert value (stored in GinBtree) to tree described by stack
*
* During an index build, buildStats is non-null and the counters
- * it contains should be incremented as needed.
+ * it contains are incremented as needed.
*
* NB: the passed-in stack is freed, as though by freeGinBtreeStack.
*/
{
XLogRecData *rdata;
BlockNumber savedRightLink;
+ bool fit;
page = BufferGetPage(stack->buffer);
savedRightLink = GinPageGetOpaque(page)->rightlink;
- if (btree->isEnoughSpace(btree, stack->buffer, stack->off))
+ START_CRIT_SECTION();
+ fit = btree->placeToPage(btree, stack->buffer, stack->off, &rdata);
+ if (fit)
{
- START_CRIT_SECTION();
- btree->placeToPage(btree, stack->buffer, stack->off, &rdata);
-
MarkBufferDirty(stack->buffer);
if (RelationNeedsWAL(btree->index))
}
else
{
- Buffer rbuffer = GinNewBuffer(btree->index);
+ /* Didn't fit, have to split */
+ Buffer rbuffer;
Page newlpage;
+ END_CRIT_SECTION();
+
+ rbuffer = GinNewBuffer(btree->index);
+
/*
- * newlpage is a pointer to memory page, it doesn't associate with
- * buffer, stack->buffer should be untouched
+ * newlpage is a pointer to memory page, it is not associated with
+ * a buffer. stack->buffer is not touched yet.
*/
newlpage = btree->splitPage(btree, stack->buffer, rbuffer, stack->off, &rdata);
#include "postgres.h"
#include "access/gin_private.h"
+#include "miscadmin.h"
#include "utils/rel.h"
-/*
- * Merge two ordered arrays of itempointers, eliminating any duplicates.
- * Returns the number of items in the result.
- * Caller is responsible that there is enough space at *dst.
- */
-uint32
-ginMergeItemPointers(ItemPointerData *dst,
- ItemPointerData *a, uint32 na,
- ItemPointerData *b, uint32 nb)
-{
- ItemPointerData *dptr = dst;
- ItemPointerData *aptr = a,
- *bptr = b;
-
- while (aptr - a < na && bptr - b < nb)
- {
- int cmp = ginCompareItemPointers(aptr, bptr);
-
- if (cmp > 0)
- *dptr++ = *bptr++;
- else if (cmp == 0)
- {
- /* we want only one copy of the identical items */
- *dptr++ = *bptr++;
- aptr++;
- }
- else
- *dptr++ = *aptr++;
- }
-
- while (aptr - a < na)
- *dptr++ = *aptr++;
-
- while (bptr - b < nb)
- *dptr++ = *bptr++;
-
- return dptr - dst;
-}
-
/*
* Checks, should we move to right link...
* Compares inserting itemp pointer with right bound of current page
/*
* Places keys to page and fills WAL record. In case leaf page and
* build mode puts all ItemPointers to page.
+ *
+ * If none of the keys fit, returns false without modifying the page.
*/
-static void
-dataPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off, XLogRecData **prdata)
+static bool
+dataPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off,
+ XLogRecData **prdata)
{
Page page = BufferGetPage(buf);
int sizeofitem = GinSizeOfDataPageItem(page);
static XLogRecData rdata[3];
static ginxlogInsert data;
+ /* quick exit if it doesn't fit */
+ if (!dataIsEnoughSpace(btree, buf, off))
+ return false;
+
*prdata = rdata;
Assert(GinPageIsData(page));
}
else
GinDataPageAddPostingItem(page, &(btree->pitem), off);
+
+ return true;
}
/*
}
/*
- * we suppose that during index creation table scaned from begin to end,
- * so ItemPointers are monotonically increased..
+ * we assume that during index creation the table scanned from beginning
+ * to end, so ItemPointers are in monotonically increasing order.
*/
if (btree->isBuild && GinPageRightMost(lpage))
separator = freeSpace / sizeofitem;
GinPageGetOpaque(rpage)->maxoff = maxoff - separator;
- PostingItemSetBlockNumber(&(btree->pitem), BufferGetBlockNumber(lbuf));
- if (GinPageIsLeaf(lpage))
- btree->pitem.key = *GinDataPageGetItemPointer(lpage,
- GinPageGetOpaque(lpage)->maxoff);
- else
- btree->pitem.key = GinDataPageGetPostingItem(lpage,
- GinPageGetOpaque(lpage)->maxoff)->key;
- btree->rightblkno = BufferGetBlockNumber(rbuf);
-
/* set up right bound for left page */
bound = GinDataPageGetRightBound(lpage);
*bound = btree->pitem.key;
rdata[1].len = MAXALIGN(maxoff * sizeofitem);
rdata[1].next = NULL;
+ /* Prepare a downlink tuple for insertion to the parent */
+ PostingItemSetBlockNumber(&(btree->pitem), BufferGetBlockNumber(lbuf));
+ if (GinPageIsLeaf(lpage))
+ btree->pitem.key = *GinDataPageGetItemPointer(lpage,
+ GinPageGetOpaque(lpage)->maxoff);
+ else
+ btree->pitem.key = GinDataPageGetPostingItem(lpage,
+ GinPageGetOpaque(lpage)->maxoff)->key;
+ btree->rightblkno = BufferGetBlockNumber(rbuf);
+
return lpage;
}
GinDataPageAddPostingItem(page, &ri, InvalidOffsetNumber);
}
+/*
+ * Creates new posting tree containing the given TIDs. Returns the page
+ * number of the root of the new posting tree.
+ *
+ * items[] must be in sorted order with no duplicates.
+ */
+BlockNumber
+createPostingTree(Relation index, ItemPointerData *items, uint32 nitems,
+ GinStatsData *buildStats)
+{
+ BlockNumber blkno;
+ Buffer buffer;
+ Page page;
+ int itemsCount;
+
+ /* Calculate how many TIDs will fit on first page. */
+ itemsCount = Min(nitems, GinMaxLeafDataItems);
+
+ /*
+ * Create the root page.
+ */
+ buffer = GinNewBuffer(index);
+ page = BufferGetPage(buffer);
+ blkno = BufferGetBlockNumber(buffer);
+
+ START_CRIT_SECTION();
+
+ GinInitBuffer(buffer, GIN_DATA | GIN_LEAF);
+ memcpy(GinDataPageGetData(page), items, sizeof(ItemPointerData) * nitems);
+ GinPageGetOpaque(page)->maxoff = nitems;
+
+ MarkBufferDirty(buffer);
+
+ if (RelationNeedsWAL(index))
+ {
+ XLogRecPtr recptr;
+ XLogRecData rdata[2];
+ ginxlogCreatePostingTree data;
+
+ data.node = index->rd_node;
+ data.blkno = blkno;
+ data.nitem = nitems;
+
+ rdata[0].buffer = InvalidBuffer;
+ rdata[0].data = (char *) &data;
+ rdata[0].len = sizeof(ginxlogCreatePostingTree);
+ rdata[0].next = &rdata[1];
+
+ rdata[1].buffer = InvalidBuffer;
+ rdata[1].data = (char *) items;
+ rdata[1].len = sizeof(ItemPointerData) * itemsCount;
+ rdata[1].next = NULL;
+
+ recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_PTREE, rdata);
+ PageSetLSN(page, recptr);
+ }
+
+ UnlockReleaseBuffer(buffer);
+
+ END_CRIT_SECTION();
+
+ /* During index build, count the newly-added data page */
+ if (buildStats)
+ buildStats->nDataPages++;
+
+ /*
+ * Add any remaining TIDs to the newly-created posting tree.
+ */
+ if (itemsCount < nitems)
+ {
+ GinPostingTreeScan *gdi;
+
+ gdi = ginPrepareScanPostingTree(index, blkno, FALSE);
+ gdi->btree.isBuild = (buildStats != NULL);
+
+ ginInsertItemPointers(gdi,
+ items + itemsCount,
+ nitems - itemsCount,
+ buildStats);
+
+ pfree(gdi);
+ }
+
+ return blkno;
+}
+
void
ginPrepareDataScan(GinBtree btree, Relation index)
{
btree->findItem = dataLocateLeafItem;
btree->findChildPtr = dataFindChildPtr;
btree->getLeftMostPage = dataGetLeftMostPage;
- btree->isEnoughSpace = dataIsEnoughSpace;
btree->placeToPage = dataPlaceToPage;
btree->splitPage = dataSplitPage;
btree->fillRoot = ginDataFillRoot;
/*
* Place tuple on page and fills WAL record
+ *
+ * If the tuple doesn't fit, returns false without modifying the page.
*/
-static void
-entryPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off, XLogRecData **prdata)
+static bool
+entryPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off,
+ XLogRecData **prdata)
{
Page page = BufferGetPage(buf);
OffsetNumber placed;
static XLogRecData rdata[3];
static ginxlogInsert data;
+ /* quick exit if it doesn't fit */
+ if (!entryIsEnoughSpace(btree, buf, off))
+ return false;
+
*prdata = rdata;
data.updateBlkno = entryPreparePage(btree, page, off);
rdata[cnt].next = NULL;
btree->entry = NULL;
+
+ return true;
}
/*
btree->findItem = entryLocateLeafEntry;
btree->findChildPtr = entryFindChildPtr;
btree->getLeftMostPage = entryGetLeftMostPage;
- btree->isEnoughSpace = entryIsEnoughSpace;
btree->placeToPage = entryPlaceToPage;
btree->splitPage = entrySplitPage;
btree->fillRoot = ginEntryFillRoot;
BuildAccumulator accum;
} GinBuildState;
-/*
- * Creates new posting tree with one page, containing the given TIDs.
- * Returns the page number (which will be the root of this posting tree).
- *
- * items[] must be in sorted order with no duplicates.
- */
-static BlockNumber
-createPostingTree(Relation index, ItemPointerData *items, uint32 nitems)
-{
- BlockNumber blkno;
- Buffer buffer = GinNewBuffer(index);
- Page page;
-
- /* Assert that the items[] array will fit on one page */
- Assert(nitems <= GinMaxLeafDataItems);
-
- START_CRIT_SECTION();
-
- GinInitBuffer(buffer, GIN_DATA | GIN_LEAF);
- page = BufferGetPage(buffer);
- blkno = BufferGetBlockNumber(buffer);
-
- memcpy(GinDataPageGetData(page), items, sizeof(ItemPointerData) * nitems);
- GinPageGetOpaque(page)->maxoff = nitems;
-
- MarkBufferDirty(buffer);
-
- if (RelationNeedsWAL(index))
- {
- XLogRecPtr recptr;
- XLogRecData rdata[2];
- ginxlogCreatePostingTree data;
-
- data.node = index->rd_node;
- data.blkno = blkno;
- data.nitem = nitems;
-
- rdata[0].buffer = InvalidBuffer;
- rdata[0].data = (char *) &data;
- rdata[0].len = sizeof(ginxlogCreatePostingTree);
- rdata[0].next = &rdata[1];
-
- rdata[1].buffer = InvalidBuffer;
- rdata[1].data = (char *) items;
- rdata[1].len = sizeof(ItemPointerData) * nitems;
- rdata[1].next = NULL;
-
- recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_PTREE, rdata);
- PageSetLSN(page, recptr);
- }
-
- UnlockReleaseBuffer(buffer);
-
- END_CRIT_SECTION();
-
- return blkno;
-}
-
/*
* Adds array of item pointers to tuple's posting list, or
*/
postingRoot = createPostingTree(ginstate->index,
GinGetPosting(old),
- GinGetNPosting(old));
-
- /* During index build, count the newly-added data page */
- if (buildStats)
- buildStats->nDataPages++;
+ GinGetNPosting(old),
+ buildStats);
/* Now insert the TIDs-to-be-added into the posting tree */
gdi = ginPrepareScanPostingTree(ginstate->index, postingRoot, FALSE);
{
IndexTuple res;
- /* try to build tuple with room for all the items */
+ /* try to build a posting list tuple with all the items */
res = GinFormTuple(ginstate, attnum, key, category,
items, nitem, false);
res = GinFormTuple(ginstate, attnum, key, category, NULL, 0, true);
/*
- * Initialize posting tree with as many TIDs as will fit on the first
- * page.
+ * Initialize a new posting tree with the TIDs.
*/
- postingRoot = createPostingTree(ginstate->index,
- items,
- Min(nitem, GinMaxLeafDataItems));
-
- /* During index build, count the newly-added data page */
- if (buildStats)
- buildStats->nDataPages++;
-
- /* Add any remaining TIDs to the posting tree */
- if (nitem > GinMaxLeafDataItems)
- {
- GinPostingTreeScan *gdi;
-
- gdi = ginPrepareScanPostingTree(ginstate->index, postingRoot, FALSE);
- gdi->btree.isBuild = (buildStats != NULL);
-
- ginInsertItemPointers(gdi,
- items + GinMaxLeafDataItems,
- nitem - GinMaxLeafDataItems,
- buildStats);
-
- pfree(gdi);
- }
+ postingRoot = createPostingTree(ginstate->index, items, nitem);
/* And save the root link in the result tuple */
GinSetPostingTree(res, postingRoot);
--- /dev/null
+/*-------------------------------------------------------------------------
+ *
+ * ginpostinglist.c
+ * routines for dealing with posting lists.
+ *
+ *
+ * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/backend/access/gin/ginpostinglist.c
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/gin_private.h"
+
+/*
+ * Merge two ordered arrays of itempointers, eliminating any duplicates.
+ * Returns the number of items in the result.
+ * Caller is responsible that there is enough space at *dst.
+ */
+uint32
+ginMergeItemPointers(ItemPointerData *dst,
+ ItemPointerData *a, uint32 na,
+ ItemPointerData *b, uint32 nb)
+{
+ ItemPointerData *dptr = dst;
+ ItemPointerData *aptr = a,
+ *bptr = b;
+
+ while (aptr - a < na && bptr - b < nb)
+ {
+ int cmp = ginCompareItemPointers(aptr, bptr);
+
+ if (cmp > 0)
+ *dptr++ = *bptr++;
+ else if (cmp == 0)
+ {
+ /* we want only one copy of the identical items */
+ *dptr++ = *bptr++;
+ aptr++;
+ }
+ else
+ *dptr++ = *aptr++;
+ }
+
+ while (aptr - a < na)
+ *dptr++ = *aptr++;
+
+ while (bptr - b < nb)
+ *dptr++ = *bptr++;
+
+ return dptr - dst;
+}
/*
- * Cleans array of ItemPointer (removes dead pointers)
- * Results are always stored in *cleaned, which will be allocated
- * if it's needed. In case of *cleaned!=NULL caller is responsible to
- * have allocated enough space. *cleaned and items may point to the same
- * memory address.
+ * Vacuums a list of item pointers. The original size of the list is 'nitem',
+ * returns the number of items remaining afterwards.
+ *
+ * If *cleaned == NULL on entry, the original array is left unmodified; if
+ * any items are removed, a palloc'd copy of the result is stored in *cleaned.
+ * Otherwise *cleaned should point to the original array, in which case it's
+ * modified directly.
*/
-
-static uint32
-ginVacuumPostingList(GinVacuumState *gvs, ItemPointerData *items, uint32 nitem, ItemPointerData **cleaned)
+static int
+ginVacuumPostingList(GinVacuumState *gvs, ItemPointerData *items, int nitem,
+ ItemPointerData **cleaned)
{
- uint32 i,
+ int i,
j = 0;
+ Assert(*cleaned == NULL || *cleaned == items);
+
/*
* just scan over ItemPointer array
*/
-
for (i = 0; i < nitem; i++)
{
if (gvs->callback(items + i, gvs->callback_state))
* scans posting tree and deletes empty pages
*/
static bool
-ginScanToDelete(GinVacuumState *gvs, BlockNumber blkno, bool isRoot, DataPageDeleteStack *parent, OffsetNumber myoff)
+ginScanToDelete(GinVacuumState *gvs, BlockNumber blkno, bool isRoot,
+ DataPageDeleteStack *parent, OffsetNumber myoff)
{
DataPageDeleteStack *me;
Buffer buffer;
if (GinPageGetOpaque(page)->maxoff < FirstOffsetNumber)
{
+ /* the page is empty */
if (!(me->leftBlkno == InvalidBlockNumber && GinPageRightMost(page)))
{
/* we never delete right most branch */
Assert(!isRoot);
- if (GinPageGetOpaque(page)->maxoff < FirstOffsetNumber)
- {
- ginDeletePage(gvs, blkno, me->leftBlkno, me->parent->blkno, myoff, me->parent->isRoot);
- meDelete = TRUE;
- }
+ ginDeletePage(gvs, blkno, me->leftBlkno, me->parent->blkno, myoff, me->parent->isRoot);
+ meDelete = TRUE;
}
}
else if (GinGetNPosting(itup) > 0)
{
/*
- * if we already create temporary page, we will make changes in
- * place
+ * if we already created a temporary page, make changes in place
*/
ItemPointerData *cleaned = (tmppage == origpage) ? NULL : GinGetPosting(itup);
- uint32 newN = ginVacuumPostingList(gvs, GinGetPosting(itup), GinGetNPosting(itup), &cleaned);
+ int newN;
+
+ newN = ginVacuumPostingList(gvs, GinGetPosting(itup), GinGetNPosting(itup), &cleaned);
if (GinGetNPosting(itup) != newN)
{
GinNullCategory category;
/*
- * Some ItemPointers was deleted, so we should remake our
- * tuple
+ * Some ItemPointers were deleted, recreate tuple.
*/
-
if (tmppage == origpage)
{
/*
- * On first difference we create temporary page in memory
- * and copies content in to it.
+ * On first difference, create a temporary copy of the
+ * page and copy the tuple's posting list to it.
*/
tmppage = PageGetTempPageCopy(origpage);
/* insert methods */
OffsetNumber (*findChildPtr) (GinBtree, Page, BlockNumber, OffsetNumber);
BlockNumber (*getLeftMostPage) (GinBtree, Page);
- bool (*isEnoughSpace) (GinBtree, Buffer, OffsetNumber);
- void (*placeToPage) (GinBtree, Buffer, OffsetNumber, XLogRecData **);
+ bool (*placeToPage) (GinBtree, Buffer, OffsetNumber, XLogRecData **);
Page (*splitPage) (GinBtree, Buffer, Buffer, OffsetNumber, XLogRecData **);
void (*fillRoot) (GinBtree, Buffer, Buffer, Buffer);