From 2fd8685e7fd9fddf16f99de1284a787d29781cc8 Mon Sep 17 00:00:00 2001
From: Alvaro Herrera <alvherre@alvh.no-ip.org>
Date: Wed, 29 Mar 2017 12:18:48 -0300
Subject: [PATCH] Simplify check of modified attributes in heap_update

The old coding was getting more complicated as new things were added,
and it would be barely tolerable with upcoming WARM updates and other
future features such as indirect indexes.  The new coding incurs a small
performance cost in synthetic benchmark cases, and is barely measurable
in normal cases.  A much larger benefit is expected from WARM, which
could actually bolt its needs on top of the existing coding, but it is
much uglier and bug-prone than doing it on this new code.  Additional
optimization can be applied on top of this, if need be.

Reviewed-by: Pavan Deolasee, Amit Kapila, Mithun CY
Discussion: https://postgr.es/m/20161228232018.4hc66ndrzpz4g4wn@alvherre.pgsql
	https://postgr.es/m/CABOikdMJfz69dBNRTOZcB6s5A0tf8OMCyQVYQyR-WFFdoEwKMQ@mail.gmail.com
---
 src/backend/access/heap/heapam.c | 197 +++++++++++--------------------
 1 file changed, 71 insertions(+), 126 deletions(-)

diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index b147f6482c..0c3e2b065a 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -96,11 +96,8 @@ static XLogRecPtr log_heap_update(Relation reln, Buffer oldbuf,
 				Buffer newbuf, HeapTuple oldtup,
 				HeapTuple newtup, HeapTuple old_key_tup,
 				bool all_visible_cleared, bool new_all_visible_cleared);
-static void HeapSatisfiesHOTandKeyUpdate(Relation relation,
-							 Bitmapset *hot_attrs,
-							 Bitmapset *key_attrs, Bitmapset *id_attrs,
-							 bool *satisfies_hot, bool *satisfies_key,
-							 bool *satisfies_id,
+static Bitmapset *HeapDetermineModifiedColumns(Relation relation,
+							 Bitmapset *interesting_cols,
 							 HeapTuple oldtup, HeapTuple newtup);
 static bool heap_acquire_tuplock(Relation relation, ItemPointer tid,
 					 LockTupleMode mode, LockWaitPolicy wait_policy,
@@ -3471,6 +3468,8 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
 	Bitmapset  *hot_attrs;
 	Bitmapset  *key_attrs;
 	Bitmapset  *id_attrs;
+	Bitmapset  *interesting_attrs;
+	Bitmapset  *modified_attrs;
 	ItemId		lp;
 	HeapTupleData oldtup;
 	HeapTuple	heaptup;
@@ -3488,10 +3487,8 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
 				pagefree;
 	bool		have_tuple_lock = false;
 	bool		iscombo;
-	bool		satisfies_hot;
-	bool		satisfies_key;
-	bool		satisfies_id;
 	bool		use_hot_update = false;
+	bool		hot_attrs_checked = false;
 	bool		key_intact;
 	bool		all_visible_cleared = false;
 	bool		all_visible_cleared_new = false;
@@ -3517,26 +3514,50 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
 				 errmsg("cannot update tuples during a parallel operation")));
 
 	/*
-	 * Fetch the list of attributes to be checked for HOT update.  This is
-	 * wasted effort if we fail to update or have to put the new tuple on a
-	 * different page.  But we must compute the list before obtaining buffer
-	 * lock --- in the worst case, if we are doing an update on one of the
-	 * relevant system catalogs, we could deadlock if we try to fetch the list
-	 * later.  In any case, the relcache caches the data so this is usually
-	 * pretty cheap.
+	 * Fetch the list of attributes to be checked for various operations.
 	 *
-	 * Note that we get a copy here, so we need not worry about relcache flush
-	 * happening midway through.
+	 * For HOT considerations, this is wasted effort if we fail to update or
+	 * have to put the new tuple on a different page.  But we must compute the
+	 * list before obtaining buffer lock --- in the worst case, if we are doing
+	 * an update on one of the relevant system catalogs, we could deadlock if
+	 * we try to fetch the list later.  In any case, the relcache caches the
+	 * data so this is usually pretty cheap.
+	 *
+	 * We also need columns used by the replica identity and columns that are
+	 * considered the "key" of rows in the table.
+	 *
+	 * Note that we get copies of each bitmap, so we need not worry about
+	 * relcache flush happening midway through.
 	 */
 	hot_attrs = RelationGetIndexAttrBitmap(relation, INDEX_ATTR_BITMAP_ALL);
 	key_attrs = RelationGetIndexAttrBitmap(relation, INDEX_ATTR_BITMAP_KEY);
 	id_attrs = RelationGetIndexAttrBitmap(relation,
 										  INDEX_ATTR_BITMAP_IDENTITY_KEY);
 
+
 	block = ItemPointerGetBlockNumber(otid);
 	buffer = ReadBuffer(relation, block);
 	page = BufferGetPage(buffer);
 
+	interesting_attrs = NULL;
+	/*
+	 * If the page is already full, there is hardly any chance of doing a HOT
+	 * update on this page. It might be wasteful effort to look for index
+	 * column updates only to later reject HOT updates for lack of space in the
+	 * same page. So we be conservative and only fetch hot_attrs if the page is
+	 * not already full. Since we are already holding a pin on the buffer,
+	 * there is no chance that the buffer can get cleaned up concurrently and
+	 * even if that was possible, in the worst case we lose a chance to do a
+	 * HOT update.
+	 */
+	if (!PageIsFull(page))
+	{
+		interesting_attrs = bms_add_members(interesting_attrs, hot_attrs);
+		hot_attrs_checked = true;
+	}
+	interesting_attrs = bms_add_members(interesting_attrs, key_attrs);
+	interesting_attrs = bms_add_members(interesting_attrs, id_attrs);
+
 	/*
 	 * Before locking the buffer, pin the visibility map page if it appears to
 	 * be necessary.  Since we haven't got the lock yet, someone else might be
@@ -3552,7 +3573,7 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
 	Assert(ItemIdIsNormal(lp));
 
 	/*
-	 * Fill in enough data in oldtup for HeapSatisfiesHOTandKeyUpdate to work
+	 * Fill in enough data in oldtup for HeapDetermineModifiedColumns to work
 	 * properly.
 	 */
 	oldtup.t_tableOid = RelationGetRelid(relation);
@@ -3578,6 +3599,10 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
 		Assert(!(newtup->t_data->t_infomask & HEAP_HASOID));
 	}
 
+	/* Determine columns modified by the update. */
+	modified_attrs = HeapDetermineModifiedColumns(relation, interesting_attrs,
+												  &oldtup, newtup);
+
 	/*
 	 * If we're not updating any "key" column, we can grab a weaker lock type.
 	 * This allows for more concurrency when we are running simultaneously
@@ -3589,10 +3614,7 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
 	 * is updates that don't manipulate key columns, not those that
 	 * serendipitiously arrive at the same key values.
 	 */
-	HeapSatisfiesHOTandKeyUpdate(relation, hot_attrs, key_attrs, id_attrs,
-								 &satisfies_hot, &satisfies_key,
-								 &satisfies_id, &oldtup, newtup);
-	if (satisfies_key)
+	if (!bms_overlap(modified_attrs, key_attrs))
 	{
 		*lockmode = LockTupleNoKeyExclusive;
 		mxact_status = MultiXactStatusNoKeyUpdate;
@@ -3831,6 +3853,8 @@ l2:
 		bms_free(hot_attrs);
 		bms_free(key_attrs);
 		bms_free(id_attrs);
+		bms_free(modified_attrs);
+		bms_free(interesting_attrs);
 		return result;
 	}
 
@@ -4133,9 +4157,10 @@ l2:
 		/*
 		 * Since the new tuple is going into the same page, we might be able
 		 * to do a HOT update.  Check if any of the index columns have been
-		 * changed.  If not, then HOT update is possible.
+		 * changed. If the page was already full, we may have skipped checking
+		 * for index columns. If so, HOT update is possible.
 		 */
-		if (satisfies_hot)
+		if (hot_attrs_checked && !bms_overlap(modified_attrs, hot_attrs))
 			use_hot_update = true;
 	}
 	else
@@ -4150,7 +4175,9 @@ l2:
 	 * ExtractReplicaIdentity() will return NULL if nothing needs to be
 	 * logged.
 	 */
-	old_key_tuple = ExtractReplicaIdentity(relation, &oldtup, !satisfies_id, &old_key_copied);
+	old_key_tuple = ExtractReplicaIdentity(relation, &oldtup,
+										   bms_overlap(modified_attrs, id_attrs),
+										   &old_key_copied);
 
 	/* NO EREPORT(ERROR) from here till changes are logged */
 	START_CRIT_SECTION();
@@ -4298,13 +4325,15 @@ l2:
 	bms_free(hot_attrs);
 	bms_free(key_attrs);
 	bms_free(id_attrs);
+	bms_free(modified_attrs);
+	bms_free(interesting_attrs);
 
 	return HeapTupleMayBeUpdated;
 }
 
 /*
  * Check if the specified attribute's value is same in both given tuples.
- * Subroutine for HeapSatisfiesHOTandKeyUpdate.
+ * Subroutine for HeapDetermineModifiedColumns.
  */
 static bool
 heap_tuple_attr_equals(TupleDesc tupdesc, int attrnum,
@@ -4338,7 +4367,7 @@ heap_tuple_attr_equals(TupleDesc tupdesc, int attrnum,
 
 	/*
 	 * Extract the corresponding values.  XXX this is pretty inefficient if
-	 * there are many indexed columns.  Should HeapSatisfiesHOTandKeyUpdate do
+	 * there are many indexed columns.  Should HeapDetermineModifiedColumns do
 	 * a single heap_deform_tuple call on each tuple, instead?	But that
 	 * doesn't work for system columns ...
 	 */
@@ -4383,114 +4412,30 @@ heap_tuple_attr_equals(TupleDesc tupdesc, int attrnum,
 /*
  * Check which columns are being updated.
  *
- * This simultaneously checks conditions for HOT updates, for FOR KEY
- * SHARE updates, and REPLICA IDENTITY concerns.  Since much of the time they
- * will be checking very similar sets of columns, and doing the same tests on
- * them, it makes sense to optimize and do them together.
- *
- * We receive three bitmapsets comprising the three sets of columns we're
- * interested in.  Note these are destructively modified; that is OK since
- * this is invoked at most once in heap_update.
+ * Given an updated tuple, determine (and return into the output bitmapset),
+ * from those listed as interesting, the set of columns that changed.
  *
- * hot_result is set to TRUE if it's okay to do a HOT update (i.e. it does not
- * modified indexed columns); key_result is set to TRUE if the update does not
- * modify columns used in the key; id_result is set to TRUE if the update does
- * not modify columns in any index marked as the REPLICA IDENTITY.
+ * The input bitmapset is destructively modified; that is OK since this is
+ * invoked at most once in heap_update.
  */
-static void
-HeapSatisfiesHOTandKeyUpdate(Relation relation, Bitmapset *hot_attrs,
-							 Bitmapset *key_attrs, Bitmapset *id_attrs,
-							 bool *satisfies_hot, bool *satisfies_key,
-							 bool *satisfies_id,
+static Bitmapset *
+HeapDetermineModifiedColumns(Relation relation, Bitmapset *interesting_cols,
 							 HeapTuple oldtup, HeapTuple newtup)
 {
-	int			next_hot_attnum;
-	int			next_key_attnum;
-	int			next_id_attnum;
-	bool		hot_result = true;
-	bool		key_result = true;
-	bool		id_result = true;
-
-	/* If REPLICA IDENTITY is set to FULL, id_attrs will be empty. */
-	Assert(bms_is_subset(id_attrs, key_attrs));
-	Assert(bms_is_subset(key_attrs, hot_attrs));
-
-	/*
-	 * If one of these sets contains no remaining bits, bms_first_member will
-	 * return -1, and after adding FirstLowInvalidHeapAttributeNumber (which
-	 * is negative!)  we'll get an attribute number that can't possibly be
-	 * real, and thus won't match any actual attribute number.
-	 */
-	next_hot_attnum = bms_first_member(hot_attrs);
-	next_hot_attnum += FirstLowInvalidHeapAttributeNumber;
-	next_key_attnum = bms_first_member(key_attrs);
-	next_key_attnum += FirstLowInvalidHeapAttributeNumber;
-	next_id_attnum = bms_first_member(id_attrs);
-	next_id_attnum += FirstLowInvalidHeapAttributeNumber;
+	int		attnum;
+	Bitmapset *modified = NULL;
 
-	for (;;)
+	while ((attnum = bms_first_member(interesting_cols)) >= 0)
 	{
-		bool		changed;
-		int			check_now;
+		attnum += FirstLowInvalidHeapAttributeNumber;
 
-		/*
-		 * Since the HOT attributes are a superset of the key attributes and
-		 * the key attributes are a superset of the id attributes, this logic
-		 * is guaranteed to identify the next column that needs to be checked.
-		 */
-		if (hot_result && next_hot_attnum > FirstLowInvalidHeapAttributeNumber)
-			check_now = next_hot_attnum;
-		else if (key_result && next_key_attnum > FirstLowInvalidHeapAttributeNumber)
-			check_now = next_key_attnum;
-		else if (id_result && next_id_attnum > FirstLowInvalidHeapAttributeNumber)
-			check_now = next_id_attnum;
-		else
-			break;
-
-		/* See whether it changed. */
-		changed = !heap_tuple_attr_equals(RelationGetDescr(relation),
-										  check_now, oldtup, newtup);
-		if (changed)
-		{
-			if (check_now == next_hot_attnum)
-				hot_result = false;
-			if (check_now == next_key_attnum)
-				key_result = false;
-			if (check_now == next_id_attnum)
-				id_result = false;
-
-			/* if all are false now, we can stop checking */
-			if (!hot_result && !key_result && !id_result)
-				break;
-		}
-
-		/*
-		 * Advance the next attribute numbers for the sets that contain the
-		 * attribute we just checked.  As we work our way through the columns,
-		 * the next_attnum values will rise; but when each set becomes empty,
-		 * bms_first_member() will return -1 and the attribute number will end
-		 * up with a value less than FirstLowInvalidHeapAttributeNumber.
-		 */
-		if (hot_result && check_now == next_hot_attnum)
-		{
-			next_hot_attnum = bms_first_member(hot_attrs);
-			next_hot_attnum += FirstLowInvalidHeapAttributeNumber;
-		}
-		if (key_result && check_now == next_key_attnum)
-		{
-			next_key_attnum = bms_first_member(key_attrs);
-			next_key_attnum += FirstLowInvalidHeapAttributeNumber;
-		}
-		if (id_result && check_now == next_id_attnum)
-		{
-			next_id_attnum = bms_first_member(id_attrs);
-			next_id_attnum += FirstLowInvalidHeapAttributeNumber;
-		}
+		if (!heap_tuple_attr_equals(RelationGetDescr(relation),
+								   attnum, oldtup, newtup))
+			modified = bms_add_member(modified,
+									  attnum - FirstLowInvalidHeapAttributeNumber);
 	}
 
-	*satisfies_hot = hot_result;
-	*satisfies_key = key_result;
-	*satisfies_id = id_result;
+	return modified;
 }
 
 /*
-- 
2.40.0