+
+/*
+ * _bt_findinsertloc() -- Finds an insert location for a tuple
+ *
+ * If the new key is equal to one or more existing keys, we can
+ * legitimately place it anywhere in the series of equal keys --- in fact,
+ * if the new key is equal to the page's "high key" we can place it on
+ * the next page. If it is equal to the high key, and there's not room
+ * to insert the new tuple on the current page without splitting, then
+ * we can move right hoping to find more free space and avoid a split.
+ * (We should not move right indefinitely, however, since that leads to
+ * O(N^2) insertion behavior in the presence of many equal keys.)
+ * Once we have chosen the page to put the key on, we'll insert it before
+ * any existing equal keys because of the way _bt_binsrch() works.
+ *
+ * If there's not enough room in the space, we try to make room by
+ * removing any LP_DEAD tuples.
+ *
+ * On entry, *bufptr and *offsetptr point to the first legal position
+ * where the new tuple could be inserted. The caller should hold an
+ * exclusive lock on *bufptr. *offsetptr can also be set to
+ * InvalidOffsetNumber, in which case the function will search for the
+ * right location within the page if needed. On exit, they point to the
+ * chosen insert location. If _bt_findinsertloc decides to move right,
+ * the lock and pin on the original page will be released and the new
+ * page returned to the caller is exclusively locked instead.
+ *
+ * newtup is the new tuple we're inserting, and scankey is an insertion
+ * type scan key for it.
+ */
+static void
+_bt_findinsertloc(Relation rel,
+ Buffer *bufptr,
+ OffsetNumber *offsetptr,
+ int keysz,
+ ScanKey scankey,
+ IndexTuple newtup,
+ BTStack stack,
+ Relation heapRel)
+{
+ Buffer buf = *bufptr;
+ Page page = BufferGetPage(buf);
+ Size itemsz;
+ BTPageOpaque lpageop;
+ bool movedright,
+ vacuumed;
+ OffsetNumber newitemoff;
+ OffsetNumber firstlegaloff = *offsetptr;
+
+ lpageop = (BTPageOpaque) PageGetSpecialPointer(page);
+
+ itemsz = IndexTupleDSize(*newtup);
+ itemsz = MAXALIGN(itemsz); /* be safe, PageAddItem will do this but we
+ * need to be consistent */
+
+ /*
+ * Check whether the item can fit on a btree page at all. (Eventually, we
+ * ought to try to apply TOAST methods if not.) We actually need to be
+ * able to fit three items on every page, so restrict any one item to 1/3
+ * the per-page available space. Note that at this point, itemsz doesn't
+ * include the ItemId.
+ *
+ * NOTE: if you change this, see also the similar code in _bt_buildadd().
+ */
+ if (itemsz > BTMaxItemSize(page))
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("index row size %zu exceeds maximum %zu for index \"%s\"",
+ itemsz, BTMaxItemSize(page),
+ RelationGetRelationName(rel)),
+ errhint("Values larger than 1/3 of a buffer page cannot be indexed.\n"
+ "Consider a function index of an MD5 hash of the value, "
+ "or use full text indexing."),
+ errtableconstraint(heapRel,
+ RelationGetRelationName(rel))));
+
+ /*----------
+ * If we will need to split the page to put the item on this page,
+ * check whether we can put the tuple somewhere to the right,
+ * instead. Keep scanning right until we
+ * (a) find a page with enough free space,
+ * (b) reach the last page where the tuple can legally go, or
+ * (c) get tired of searching.
+ * (c) is not flippant; it is important because if there are many
+ * pages' worth of equal keys, it's better to split one of the early
+ * pages than to scan all the way to the end of the run of equal keys
+ * on every insert. We implement "get tired" as a random choice,
+ * since stopping after scanning a fixed number of pages wouldn't work
+ * well (we'd never reach the right-hand side of previously split
+ * pages). Currently the probability of moving right is set at 0.99,
+ * which may seem too high to change the behavior much, but it does an
+ * excellent job of preventing O(N^2) behavior with many equal keys.
+ *----------
+ */
+ movedright = false;
+ vacuumed = false;
+ while (PageGetFreeSpace(page) < itemsz)
+ {
+ Buffer rbuf;
+ BlockNumber rblkno;
+
+ /*
+ * before considering moving right, see if we can obtain enough space
+ * by erasing LP_DEAD items
+ */
+ if (P_ISLEAF(lpageop) && P_HAS_GARBAGE(lpageop))
+ {
+ _bt_vacuum_one_page(rel, buf, heapRel);
+
+ /*
+ * remember that we vacuumed this page, because that makes the
+ * hint supplied by the caller invalid
+ */
+ vacuumed = true;
+
+ if (PageGetFreeSpace(page) >= itemsz)
+ break; /* OK, now we have enough space */
+ }
+
+ /*
+ * nope, so check conditions (b) and (c) enumerated above
+ */
+ if (P_RIGHTMOST(lpageop) ||
+ _bt_compare(rel, keysz, scankey, page, P_HIKEY) != 0 ||
+ random() <= (MAX_RANDOM_VALUE / 100))
+ break;
+
+ /*
+ * step right to next non-dead page
+ *
+ * must write-lock that page before releasing write lock on current
+ * page; else someone else's _bt_check_unique scan could fail to see
+ * our insertion. write locks on intermediate dead pages won't do
+ * because we don't know when they will get de-linked from the tree.
+ */
+ rbuf = InvalidBuffer;
+
+ rblkno = lpageop->btpo_next;
+ for (;;)
+ {
+ rbuf = _bt_relandgetbuf(rel, rbuf, rblkno, BT_WRITE);
+ page = BufferGetPage(rbuf);
+ lpageop = (BTPageOpaque) PageGetSpecialPointer(page);
+
+ /*
+ * If this page was incompletely split, finish the split now. We
+ * do this while holding a lock on the left sibling, which is not
+ * good because finishing the split could be a fairly lengthy
+ * operation. But this should happen very seldom.
+ */
+ if (P_INCOMPLETE_SPLIT(lpageop))
+ {
+ _bt_finish_split(rel, rbuf, stack);
+ rbuf = InvalidBuffer;
+ continue;
+ }
+
+ if (!P_IGNORE(lpageop))
+ break;
+ if (P_RIGHTMOST(lpageop))
+ elog(ERROR, "fell off the end of index \"%s\"",
+ RelationGetRelationName(rel));
+
+ rblkno = lpageop->btpo_next;
+ }
+ _bt_relbuf(rel, buf);
+ buf = rbuf;
+ movedright = true;
+ vacuumed = false;
+ }
+
+ /*
+ * Now we are on the right page, so find the insert position. If we moved
+ * right at all, we know we should insert at the start of the page. If we
+ * didn't move right, we can use the firstlegaloff hint if the caller
+ * supplied one, unless we vacuumed the page which might have moved tuples
+ * around making the hint invalid. If we didn't move right or can't use
+ * the hint, find the position by searching.
+ */
+ if (movedright)
+ newitemoff = P_FIRSTDATAKEY(lpageop);
+ else if (firstlegaloff != InvalidOffsetNumber && !vacuumed)
+ newitemoff = firstlegaloff;
+ else
+ newitemoff = _bt_binsrch(rel, buf, keysz, scankey, false);
+
+ *bufptr = buf;
+ *offsetptr = newitemoff;
+}
+