*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.78 2001/01/29 07:28:16 vadim Exp $
+ * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.79 2001/01/31 01:08:36 vadim Exp $
*
*-------------------------------------------------------------------------
*/
extern bool FixBTree;
Buffer _bt_fixroot(Relation rel, Buffer oldrootbuf, bool release);
-static void _bt_fixtree(Relation rel, BlockNumber blkno, BTStack stack);
+static void _bt_fixtree(Relation rel, BlockNumber blkno);
+static BlockNumber _bt_fixlevel(Relation rel, Buffer buf, BlockNumber limit);
+static OffsetNumber _bt_getoff(Page page, BlockNumber blkno);
static Buffer _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf);
{
blkno = lpageop->btpo_parent;
_bt_relbuf(rel, buf, BT_WRITE);
- _bt_fixtree(rel, blkno, NULL);
+ _bt_fixtree(rel, blkno);
goto formres;
}
}
pbuf = _bt_getstackbuf(rel, stack);
+ if (pbuf == InvalidBuffer)
+ elog(ERROR, "_bt_getstackbuf: my bits moved right off the end of the world!"
+ "\n\tRecreate index %s.", RelationGetRelationName(rel));
+
/* Now we can write and unlock the children */
_bt_wrtbuf(rel, rbuf);
_bt_wrtbuf(rel, buf);
}
/* by here, the item we're looking for moved right at least one page */
if (P_RIGHTMOST(opaque))
- elog(FATAL, "_bt_getstackbuf: my bits moved right off the end of the world!"
- "\n\tRecreate index %s.", RelationGetRelationName(rel));
+ {
+ _bt_relbuf(rel, buf, BT_WRITE);
+ return(InvalidBuffer);
+ }
blkno = opaque->btpo_next;
_bt_relbuf(rel, buf, BT_WRITE);
/* give up left buffer */
_bt_relbuf(rel, leftbuf, BT_WRITE);
+ pfree(btitem);
leftbuf = rightbuf;
leftpage = rightpage;
leftopaque = rightopaque;
return(rootbuf);
}
+/*
+ * Using blkno of leftmost page on a level inside tree this func
+ * checks/fixes tree from this level up to the root page.
+ */
static void
-_bt_fixtree(Relation rel, BlockNumber blkno, BTStack stack)
+_bt_fixtree(Relation rel, BlockNumber blkno)
+{
+ Buffer buf;
+ Page page;
+ BTPageOpaque opaque;
+ BlockNumber pblkno;
+
+ elog(ERROR, "bt_fixtree: unimplemented , yet (need to recreate index)");
+
+ for ( ; ; )
+ {
+ buf = _bt_getbuf(rel, blkno, BT_READ);
+ page = BufferGetPage(buf);
+ opaque = (BTPageOpaque) PageGetSpecialPointer(page);
+ if (! P_LEFTMOST(opaque) || P_ISLEAF(opaque))
+ elog(ERROR, "bt_fixtree: invalid start page (need to recreate index)");
+ pblkno = opaque->btpo_parent;
+
+ /* check/fix entire level */
+ _bt_fixlevel(rel, buf, InvalidBlockNumber);
+
+ /*
+ * No pins/locks are held here. Re-read start page if its
+ * btpo_parent pointed to meta page else go up one level.
+ */
+ if (pblkno == BTREE_METAPAGE)
+ {
+ buf = _bt_getbuf(rel, blkno, BT_WRITE);
+ page = BufferGetPage(buf);
+ opaque = (BTPageOpaque) PageGetSpecialPointer(page);
+ if (P_ISROOT(opaque))
+ {
+ /* Tree is Ok now */
+ _bt_relbuf(rel, buf, BT_WRITE);
+ return;
+ }
+ pblkno = opaque->btpo_parent;
+ /* Call _bt_fixroot() if there is no upper level */
+ if (pblkno == BTREE_METAPAGE)
+ {
+ buf = _bt_fixroot(rel, buf, true);
+ _bt_relbuf(rel, buf, BT_WRITE);
+ return;
+ }
+ /* Have to go up one level */
+ _bt_relbuf(rel, buf, BT_WRITE);
+ blkno = pblkno;
+ }
+ }
+
+}
+
+/*
+ * Check/fix level starting from page in buffer buf up to block
+ * limit on *child* level (or till rightmost child page if limit
+ * is InvalidBlockNumber). Start buffer must be read locked.
+ * No pins/locks are held on exit. Returns block number of last
+ * visited/pointing-to-limit page on *check/fix* level.
+ */
+static BlockNumber
+_bt_fixlevel(Relation rel, Buffer buf, BlockNumber limit)
+{
+ BlockNumber blkno = BufferGetBlockNumber(buf);
+ BlockNumber pblkno = blkno;
+ Page page;
+ BTPageOpaque opaque;
+ BlockNumber cblkno[3];
+ OffsetNumber coff[3];
+ Buffer cbuf[3];
+ Page cpage[3];
+ BTPageOpaque copaque[3];
+ BTItem btitem;
+ int cidx, i;
+ bool goodbye = false;
+ char tbuf[BLCKSZ];
+
+ page = BufferGetPage(buf);
+ /* copy page to temp storage */
+ memmove(tbuf, page, PageGetPageSize(page));
+ _bt_relbuf(rel, buf, BT_READ);
+
+ page = (Page)tbuf;
+ opaque = (BTPageOpaque) PageGetSpecialPointer(page);
+
+ /* Initialize first child data */
+ coff[0] = P_FIRSTDATAKEY(opaque);
+ if (coff[0] > PageGetMaxOffsetNumber(page))
+ elog(ERROR, "bt_fixlevel: invalid maxoff on start page (need to recreate index)");
+ btitem = (BTItem) PageGetItem(page, PageGetItemId(page, coff[0]));
+ cblkno[0] = ItemPointerGetBlockNumber(&(btitem->bti_itup.t_tid));
+ cbuf[0] = _bt_getbuf(rel, cblkno[0], BT_READ);
+ cpage[0] = BufferGetPage(cbuf[0]);
+ copaque[0] = (BTPageOpaque) PageGetSpecialPointer(cpage[0]);
+ if (P_LEFTMOST(opaque) && ! P_LEFTMOST(copaque[0]))
+ elog(ERROR, "bt_fixtlevel: non-leftmost child page of leftmost parent (need to recreate index)");
+ /* caller should take care and avoid this */
+ if (P_RIGHTMOST(copaque[0]))
+ elog(ERROR, "bt_fixtlevel: invalid start child (need to recreate index)");
+
+ for ( ; ; )
+ {
+ /*
+ * Read up to 2 more child pages and look for pointers
+ * to them in *saved* parent page
+ */
+ coff[1] = coff[2] = InvalidOffsetNumber;
+ for (cidx = 0; cidx < 2; )
+ {
+ cidx++;
+ cblkno[cidx] = (copaque[cidx - 1])->btpo_next;
+ cbuf[cidx] = _bt_getbuf(rel, cblkno[cidx], BT_READ);
+ cpage[cidx] = BufferGetPage(cbuf[cidx]);
+ copaque[cidx] = (BTPageOpaque) PageGetSpecialPointer(cpage[cidx]);
+ coff[cidx] = _bt_getoff(page, cblkno[cidx]);
+
+ /* sanity check */
+ if (coff[cidx] != InvalidOffsetNumber)
+ {
+ for (i = cidx - 1; i >= 0; i--)
+ {
+ if (coff[i] == InvalidOffsetNumber)
+ continue;
+ if (coff[cidx] != coff[i] + 1)
+ elog(ERROR, "bt_fixlevel: invalid item order(1) (need to recreate index)");
+ break;
+ }
+ }
+
+ if (P_RIGHTMOST(copaque[cidx]))
+ break;
+ }
+
+ /*
+ * Read parent page and insert missed pointers.
+ */
+ if (coff[1] == InvalidOffsetNumber ||
+ (cidx == 2 && coff[2] == InvalidOffsetNumber))
+ {
+ Buffer newbuf;
+ Page newpage;
+ BTPageOpaque newopaque;
+ BTItem ritem;
+ Size itemsz;
+ OffsetNumber newitemoff;
+ BlockNumber parblk[3];
+ BTStackData stack;
+
+ stack.bts_parent = NULL;
+ stack.bts_blkno = pblkno;
+ stack.bts_offset = InvalidOffsetNumber;
+ ItemPointerSet(&(stack.bts_btitem.bti_itup.t_tid),
+ cblkno[0], P_HIKEY);
+
+ buf = _bt_getstackbuf(rel, &stack);
+ if (buf == InvalidBuffer)
+ elog(ERROR, "bt_fixlevel: pointer disappeared (need to recreate index)");
+
+ page = BufferGetPage(buf);
+ opaque = (BTPageOpaque) PageGetSpecialPointer(page);
+ coff[0] = stack.bts_offset;
+ pblkno = BufferGetBlockNumber(buf);
+ parblk[0] = pblkno;
+ if (cblkno[0] == limit)
+ blkno = pblkno; /* where we have seen pointer to limit */
+
+ /* Check/insert missed pointers */
+ for (i = 1; i <= cidx; i++)
+ {
+ coff[i] = _bt_getoff(page, cblkno[i]);
+
+ /* sanity check */
+ parblk[i] = BufferGetBlockNumber(buf);
+ if (coff[i] != InvalidOffsetNumber)
+ {
+ if (parblk[i] == parblk[i - 1] &&
+ coff[i] != coff[i - 1] + 1)
+ elog(ERROR, "bt_fixlevel: invalid item order(2) (need to recreate index)");
+ if (cblkno[i] == limit)
+ blkno = parblk[i];
+ continue;
+ }
+ /* Have to check next page ? */
+ if ((! P_RIGHTMOST(opaque)) &&
+ coff[i - 1] == PageGetMaxOffsetNumber(page)) /* yes */
+ {
+ newbuf = _bt_getbuf(rel, opaque->btpo_next, BT_WRITE);
+ newpage = BufferGetPage(newbuf);
+ newopaque = (BTPageOpaque) PageGetSpecialPointer(newpage);
+ coff[i] = _bt_getoff(newpage, cblkno[i]);
+ if (coff[i] != InvalidOffsetNumber) /* found ! */
+ {
+ if (coff[i] != P_FIRSTDATAKEY(newopaque))
+ elog(ERROR, "bt_fixlevel: invalid item order(3) (need to recreate index)");
+ _bt_relbuf(rel, buf, BT_WRITE);
+ buf = newbuf;
+ page = newpage;
+ opaque = newopaque;
+ pblkno = BufferGetBlockNumber(buf);
+ parblk[i] = pblkno;
+ if (cblkno[i] == limit)
+ blkno = pblkno;
+ continue;
+ }
+ /* unfound - need to insert on current page */
+ _bt_relbuf(rel, newbuf, BT_WRITE);
+ }
+ /* insert pointer */
+ ritem = (BTItem) PageGetItem(cpage[i - 1],
+ PageGetItemId(cpage[i - 1], P_HIKEY));
+ btitem = _bt_formitem(&(ritem->bti_itup));
+ ItemPointerSet(&(btitem->bti_itup.t_tid), cblkno[i], P_HIKEY);
+ itemsz = IndexTupleDSize(btitem->bti_itup)
+ + (sizeof(BTItemData) - sizeof(IndexTupleData));
+ itemsz = MAXALIGN(itemsz);
+
+ newitemoff = coff[i - 1] + 1;
+
+ if (PageGetFreeSpace(page) < itemsz)
+ {
+ OffsetNumber firstright;
+ OffsetNumber itup_off;
+ BlockNumber itup_blkno;
+ bool newitemonleft;
+
+ firstright = _bt_findsplitloc(rel, page,
+ newitemoff, itemsz, &newitemonleft);
+ newbuf = _bt_split(rel, buf, firstright,
+ newitemoff, itemsz, btitem, newitemonleft,
+ &itup_off, &itup_blkno);
+ /* what buffer we need in ? */
+ if (newitemonleft)
+ _bt_relbuf(rel, newbuf, BT_WRITE);
+ else
+ {
+ _bt_relbuf(rel, buf, BT_WRITE);
+ buf = newbuf;
+ page = BufferGetPage(buf);
+ opaque = (BTPageOpaque) PageGetSpecialPointer(page);
+ }
+ pblkno = BufferGetBlockNumber(buf);
+ coff[i] = itup_off;
+ }
+ else
+ {
+ _bt_insertuple(rel, buf, itemsz, btitem, newitemoff);
+ coff[i] = newitemoff;
+ }
+
+ pfree(btitem);
+ parblk[i] = pblkno;
+ if (cblkno[i] == limit)
+ blkno = pblkno;
+ }
+
+ /* copy page with pointer to cblkno[cidx] to temp storage */
+ memmove(tbuf, page, PageGetPageSize(page));
+ _bt_relbuf(rel, buf, BT_WRITE);
+ page = (Page)tbuf;
+ opaque = (BTPageOpaque) PageGetSpecialPointer(page);
+ if (limit == InvalidBlockNumber)
+ blkno = pblkno; /* last visited page */
+ }
+
+ /* Continue if current check/fix level page is rightmost */
+ if (P_RIGHTMOST(opaque))
+ goodbye = false;
+
+ /* Pointers to child pages are Ok - right end of child level ? */
+ _bt_relbuf(rel, cbuf[0], BT_READ);
+ _bt_relbuf(rel, cbuf[1], BT_READ);
+ if (cidx == 1 ||
+ (cidx == 2 && (P_RIGHTMOST(copaque[2]) || goodbye)))
+ {
+ if (cidx == 2)
+ _bt_relbuf(rel, cbuf[2], BT_READ);
+ return(blkno);
+ }
+ if (cblkno[0] == limit || cblkno[1] == limit)
+ goodbye = true;
+ cblkno[0] = cblkno[2];
+ cbuf[0] = cbuf[2];
+ cpage[0] = cpage[2];
+ copaque[0] = copaque[2];
+ coff[0] = coff[2];
+ }
+}
+
+static OffsetNumber
+_bt_getoff(Page page, BlockNumber blkno)
{
- elog(ERROR, "bt_fixtree: unimplemented , yet");
+ BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page);
+ OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
+ OffsetNumber offnum = P_FIRSTDATAKEY(opaque);
+ BlockNumber curblkno;
+ ItemId itemid;
+ BTItem item;
+
+ for ( ; offnum <= maxoff; offnum++)
+ {
+ itemid = PageGetItemId(page, offnum);
+ item = (BTItem) PageGetItem(page, itemid);
+ curblkno = ItemPointerGetBlockNumber(&(item->bti_itup.t_tid));
+ if (curblkno == blkno)
+ return(offnum);
+ }
+
+ return(InvalidOffsetNumber);
}
/*