*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.161 2007/11/15 21:14:32 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.162 2007/11/16 19:53:50 tgl Exp $
*
*-------------------------------------------------------------------------
*/
* removing any LP_DEAD tuples.
*
* On entry, *buf and *offsetptr point to the first legal position
- * where the new tuple could be inserted. The caller should hold an
- * exclusive lock on *buf. *offsetptr can also be set to
- * InvalidOffsetNumber, in which case the function will search the right
- * location within the page if needed. On exit, they point to the chosen
- * insert location. If findinsertloc decided to move right, the lock and
- * pin on the original page will be released and the new page returned to
- * the caller is exclusively locked instead.
+ * where the new tuple could be inserted. The caller should hold an
+ * exclusive lock on *buf. *offsetptr can also be set to
+ * InvalidOffsetNumber, in which case the function will search for the
+ * right location within the page if needed. On exit, they point to the
+ * chosen insert location. If _bt_findinsertloc decides to move right,
+ * the lock and pin on the original page will be released and the new
+ * page returned to the caller is exclusively locked instead.
*
* newtup is the new tuple we're inserting, and scankey is an insertion
* type scan key for it.
"Consider a function index of an MD5 hash of the value, "
"or use full text indexing.")));
-
-
/*----------
* If we will need to split the page to put the item on this page,
* check whether we can put the tuple somewhere to the right,
xl_btree_split xlrec;
uint8 xlinfo;
XLogRecPtr recptr;
- XLogRecData rdata[6];
+ XLogRecData rdata[7];
XLogRecData *lastrdata;
xlrec.node = rel->rd_node;
lastrdata = &rdata[0];
- /* Log downlink on non-leaf pages. */
if (ropaque->btpo.level > 0)
{
+ /* Log downlink on non-leaf pages */
lastrdata->next = lastrdata + 1;
lastrdata++;
lastrdata->data = (char *) &newitem->t_tid.ip_blkid;
lastrdata->len = sizeof(BlockIdData);
lastrdata->buffer = InvalidBuffer;
+
+ /*
+ * We must also log the left page's high key, because the right
+ * page's leftmost key is suppressed on non-leaf levels. Show it
+ * as belonging to the left page buffer, so that it is not stored
+ * if XLogInsert decides it needs a full-page image of the left
+ * page.
+ */
+ lastrdata->next = lastrdata + 1;
+ lastrdata++;
+
+ itemid = PageGetItemId(origpage, P_HIKEY);
+ item = (IndexTuple) PageGetItem(origpage, itemid);
+ lastrdata->data = (char *) item;
+ lastrdata->len = MAXALIGN(IndexTupleSize(item));
+ lastrdata->buffer = buf; /* backup block 1 */
+ lastrdata->buffer_std = true;
}
/*
lastrdata->buffer = buf; /* backup block 1 */
lastrdata->buffer_std = true;
}
- else
+ else if (ropaque->btpo.level == 0)
{
/*
* Although we don't need to WAL-log the new item, we still need
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.48 2007/11/15 22:25:15 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.49 2007/11/16 19:53:50 tgl Exp $
*
*-------------------------------------------------------------------------
*/
OffsetNumber newitemoff = 0;
Item newitem = NULL;
Size newitemsz = 0;
+ Item left_hikey = NULL;
+ Size left_hikeysz = 0;
reln = XLogOpenRelation(xlrec->node);
datalen -= sizeof(BlockIdData);
forget_matching_split(xlrec->node, downlink, false);
+
+ /* Extract left hikey and its size (still assuming 16-bit alignment) */
+ if (!(record->xl_info & XLR_BKP_BLOCK_1))
+ {
+ /* We assume 16-bit alignment is enough for IndexTupleSize */
+ left_hikey = (Item) datapos;
+ left_hikeysz = MAXALIGN(IndexTupleSize(left_hikey));
+
+ datapos += left_hikeysz;
+ datalen -= left_hikeysz;
+ }
}
/* Extract newitem and newitemoff, if present */
if (onleft && !(record->xl_info & XLR_BKP_BLOCK_1))
{
- IndexTupleData itupdata;
-
/*
- * We need to copy the tuple header to apply IndexTupleDSize, because
- * of alignment considerations. However, we assume that PageAddItem
- * doesn't care about the alignment of the newitem pointer it's given.
+ * We assume that 16-bit alignment is enough to apply IndexTupleSize
+ * (since it's fetching from a uint16 field) and also enough for
+ * PageAddItem to insert the tuple.
*/
- newitem = datapos;
- memcpy(&itupdata, datapos, sizeof(IndexTupleData));
- newitemsz = IndexTupleDSize(itupdata);
- newitemsz = MAXALIGN(newitemsz);
+ newitem = (Item) datapos;
+ newitemsz = MAXALIGN(IndexTupleSize(newitem));
datapos += newitemsz;
datalen -= newitemsz;
}
_bt_restore_page(rpage, datapos, datalen);
+ /*
+ * On leaf level, the high key of the left page is equal to the
+ * first key on the right page.
+ */
+ if (xlrec->level == 0)
+ {
+ ItemId hiItemId = PageGetItemId(rpage, P_FIRSTDATAKEY(ropaque));
+
+ left_hikey = PageGetItem(rpage, hiItemId);
+ left_hikeysz = ItemIdGetLength(hiItemId);
+ }
+
PageSetLSN(rpage, lsn);
PageSetTLI(rpage, ThisTimeLineID);
MarkBufferDirty(rbuf);
OffsetNumber maxoff = PageGetMaxOffsetNumber(lpage);
OffsetNumber deletable[MaxOffsetNumber];
int ndeletable = 0;
- ItemId hiItemId;
- Item hiItem;
/*
* Remove the items from the left page that were copied to the
elog(PANIC, "failed to add new item to left page after split");
}
- /* Set high key equal to the first key on the right page */
- hiItemId = PageGetItemId(rpage, P_FIRSTDATAKEY(ropaque));
- hiItem = PageGetItem(rpage, hiItemId);
-
- if (PageAddItem(lpage, hiItem, ItemIdGetLength(hiItemId),
+ /* Set high key */
+ if (PageAddItem(lpage, left_hikey, left_hikeysz,
P_HIKEY, false, false) == InvalidOffsetNumber)
elog(PANIC, "failed to add high key to left page after split");
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/access/nbtree.h,v 1.114 2007/11/15 21:14:42 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/access/nbtree.h,v 1.115 2007/11/16 19:53:50 tgl Exp $
*
*-------------------------------------------------------------------------
*/
* than BlockNumber for alignment reasons: SizeOfBtreeSplit is only 16-bit
* aligned.)
*
+ * If level > 0, an IndexTuple representing the HIKEY of the left page
+ * follows. We don't need this on leaf pages, because it's the same
+ * as the leftmost key in the new right page. Also, it's suppressed if
+ * XLogInsert chooses to store the left page's whole page image.
+ *
* In the _L variants, next are OffsetNumber newitemoff and the new item.
* (In the _R variants, the new item is one of the right page's tuples.)
+ * The new item, but not newitemoff, is suppressed if XLogInsert chooses
+ * to store the left page's whole page image.
*
* Last are the right page's tuples in the form used by _bt_restore_page.
*/