- /*
- * Now we are on the right page, so find the insert position. If we
- * moved right at all, we know we should insert at the start of the
- * page, else must find the position by searching.
- */
- if (movedright)
- newitemoff = P_FIRSTDATAKEY(lpageop);
- else
- newitemoff = _bt_binsrch(rel, buf, keysz, scankey, false);
- }
+/*----------
+ * _bt_insertonpg() -- Insert a tuple on a particular page in the index.
+ *
+ * This recursive procedure does the following things:
+ *
+ * + if necessary, splits the target page (making sure that the
+ * split is equitable as far as post-insert free space goes).
+ * + inserts the tuple.
+ * + if the page was split, pops the parent stack, and finds the
+ * right place to insert the new child pointer (by walking
+ * right using information stored in the parent stack).
+ * + invokes itself with the appropriate tuple for the right
+ * child page on the parent.
+ * + updates the metapage if a true root or fast root is split.
+ *
+ * On entry, we must have the correct buffer in which to do the
+ * insertion, and the buffer must be pinned and write-locked. On return,
+ * we will have dropped both the pin and the lock on the buffer.
+ *
+ * When inserting to a non-leaf page, 'cbuf' is the left-sibling of the
+ * page we're inserting the downlink for. This function will clear the
+ * INCOMPLETE_SPLIT flag on it, and release the buffer.
+ *
+ * The locking interactions in this code are critical. You should
+ * grok Lehman and Yao's paper before making any changes. In addition,
+ * you need to understand how we disambiguate duplicate keys in this
+ * implementation, in order to be able to find our location using
+ * L&Y "move right" operations. Since we may insert duplicate user
+ * keys, and since these dups may propagate up the tree, we use the
+ * 'afteritem' parameter to position ourselves correctly for the
+ * insertion on internal pages.
+ *----------
+ */
+static void
+_bt_insertonpg(Relation rel,
+ Buffer buf,
+ Buffer cbuf,
+ BTStack stack,
+ IndexTuple itup,
+ OffsetNumber newitemoff,
+ bool split_only_page)
+{
+ Page page;
+ BTPageOpaque lpageop;
+ OffsetNumber firstright = InvalidOffsetNumber;
+ Size itemsz;
+
+ page = BufferGetPage(buf);
+ lpageop = (BTPageOpaque) PageGetSpecialPointer(page);
+
+ /* child buffer must be given iff inserting on an internal page */
+ Assert(P_ISLEAF(lpageop) == !BufferIsValid(cbuf));
+
+ /* The caller should've finished any incomplete splits already. */
+ if (P_INCOMPLETE_SPLIT(lpageop))
+ elog(ERROR, "cannot insert to incompletely split page %u",
+ BufferGetBlockNumber(buf));
+
+ itemsz = IndexTupleDSize(*itup);
+ itemsz = MAXALIGN(itemsz); /* be safe, PageAddItem will do this but we
+ * need to be consistent */