From: Alvaro Herrera Date: Wed, 29 Mar 2017 15:18:48 +0000 (-0300) Subject: Simplify check of modified attributes in heap_update X-Git-Tag: REL_10_BETA1~451 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=2fd8685e7fd9fddf16f99de1284a787d29781cc8;p=postgresql Simplify check of modified attributes in heap_update The old coding was getting more complicated as new things were added, and it would be barely tolerable with upcoming WARM updates and other future features such as indirect indexes. The new coding incurs a small performance cost in synthetic benchmark cases, and is barely measurable in normal cases. A much larger benefit is expected from WARM, which could actually bolt its needs on top of the existing coding, but it is much uglier and bug-prone than doing it on this new code. Additional optimization can be applied on top of this, if need be. Reviewed-by: Pavan Deolasee, Amit Kapila, Mithun CY Discussion: https://postgr.es/m/20161228232018.4hc66ndrzpz4g4wn@alvherre.pgsql https://postgr.es/m/CABOikdMJfz69dBNRTOZcB6s5A0tf8OMCyQVYQyR-WFFdoEwKMQ@mail.gmail.com --- diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index b147f6482c..0c3e2b065a 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -96,11 +96,8 @@ static XLogRecPtr log_heap_update(Relation reln, Buffer oldbuf, Buffer newbuf, HeapTuple oldtup, HeapTuple newtup, HeapTuple old_key_tup, bool all_visible_cleared, bool new_all_visible_cleared); -static void HeapSatisfiesHOTandKeyUpdate(Relation relation, - Bitmapset *hot_attrs, - Bitmapset *key_attrs, Bitmapset *id_attrs, - bool *satisfies_hot, bool *satisfies_key, - bool *satisfies_id, +static Bitmapset *HeapDetermineModifiedColumns(Relation relation, + Bitmapset *interesting_cols, HeapTuple oldtup, HeapTuple newtup); static bool heap_acquire_tuplock(Relation relation, ItemPointer tid, LockTupleMode mode, LockWaitPolicy wait_policy, @@ -3471,6 +3468,8 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, Bitmapset *hot_attrs; Bitmapset *key_attrs; Bitmapset *id_attrs; + Bitmapset *interesting_attrs; + Bitmapset *modified_attrs; ItemId lp; HeapTupleData oldtup; HeapTuple heaptup; @@ -3488,10 +3487,8 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, pagefree; bool have_tuple_lock = false; bool iscombo; - bool satisfies_hot; - bool satisfies_key; - bool satisfies_id; bool use_hot_update = false; + bool hot_attrs_checked = false; bool key_intact; bool all_visible_cleared = false; bool all_visible_cleared_new = false; @@ -3517,26 +3514,50 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, errmsg("cannot update tuples during a parallel operation"))); /* - * Fetch the list of attributes to be checked for HOT update. This is - * wasted effort if we fail to update or have to put the new tuple on a - * different page. But we must compute the list before obtaining buffer - * lock --- in the worst case, if we are doing an update on one of the - * relevant system catalogs, we could deadlock if we try to fetch the list - * later. In any case, the relcache caches the data so this is usually - * pretty cheap. + * Fetch the list of attributes to be checked for various operations. * - * Note that we get a copy here, so we need not worry about relcache flush - * happening midway through. + * For HOT considerations, this is wasted effort if we fail to update or + * have to put the new tuple on a different page. But we must compute the + * list before obtaining buffer lock --- in the worst case, if we are doing + * an update on one of the relevant system catalogs, we could deadlock if + * we try to fetch the list later. In any case, the relcache caches the + * data so this is usually pretty cheap. + * + * We also need columns used by the replica identity and columns that are + * considered the "key" of rows in the table. + * + * Note that we get copies of each bitmap, so we need not worry about + * relcache flush happening midway through. */ hot_attrs = RelationGetIndexAttrBitmap(relation, INDEX_ATTR_BITMAP_ALL); key_attrs = RelationGetIndexAttrBitmap(relation, INDEX_ATTR_BITMAP_KEY); id_attrs = RelationGetIndexAttrBitmap(relation, INDEX_ATTR_BITMAP_IDENTITY_KEY); + block = ItemPointerGetBlockNumber(otid); buffer = ReadBuffer(relation, block); page = BufferGetPage(buffer); + interesting_attrs = NULL; + /* + * If the page is already full, there is hardly any chance of doing a HOT + * update on this page. It might be wasteful effort to look for index + * column updates only to later reject HOT updates for lack of space in the + * same page. So we be conservative and only fetch hot_attrs if the page is + * not already full. Since we are already holding a pin on the buffer, + * there is no chance that the buffer can get cleaned up concurrently and + * even if that was possible, in the worst case we lose a chance to do a + * HOT update. + */ + if (!PageIsFull(page)) + { + interesting_attrs = bms_add_members(interesting_attrs, hot_attrs); + hot_attrs_checked = true; + } + interesting_attrs = bms_add_members(interesting_attrs, key_attrs); + interesting_attrs = bms_add_members(interesting_attrs, id_attrs); + /* * Before locking the buffer, pin the visibility map page if it appears to * be necessary. Since we haven't got the lock yet, someone else might be @@ -3552,7 +3573,7 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, Assert(ItemIdIsNormal(lp)); /* - * Fill in enough data in oldtup for HeapSatisfiesHOTandKeyUpdate to work + * Fill in enough data in oldtup for HeapDetermineModifiedColumns to work * properly. */ oldtup.t_tableOid = RelationGetRelid(relation); @@ -3578,6 +3599,10 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, Assert(!(newtup->t_data->t_infomask & HEAP_HASOID)); } + /* Determine columns modified by the update. */ + modified_attrs = HeapDetermineModifiedColumns(relation, interesting_attrs, + &oldtup, newtup); + /* * If we're not updating any "key" column, we can grab a weaker lock type. * This allows for more concurrency when we are running simultaneously @@ -3589,10 +3614,7 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, * is updates that don't manipulate key columns, not those that * serendipitiously arrive at the same key values. */ - HeapSatisfiesHOTandKeyUpdate(relation, hot_attrs, key_attrs, id_attrs, - &satisfies_hot, &satisfies_key, - &satisfies_id, &oldtup, newtup); - if (satisfies_key) + if (!bms_overlap(modified_attrs, key_attrs)) { *lockmode = LockTupleNoKeyExclusive; mxact_status = MultiXactStatusNoKeyUpdate; @@ -3831,6 +3853,8 @@ l2: bms_free(hot_attrs); bms_free(key_attrs); bms_free(id_attrs); + bms_free(modified_attrs); + bms_free(interesting_attrs); return result; } @@ -4133,9 +4157,10 @@ l2: /* * Since the new tuple is going into the same page, we might be able * to do a HOT update. Check if any of the index columns have been - * changed. If not, then HOT update is possible. + * changed. If the page was already full, we may have skipped checking + * for index columns. If so, HOT update is possible. */ - if (satisfies_hot) + if (hot_attrs_checked && !bms_overlap(modified_attrs, hot_attrs)) use_hot_update = true; } else @@ -4150,7 +4175,9 @@ l2: * ExtractReplicaIdentity() will return NULL if nothing needs to be * logged. */ - old_key_tuple = ExtractReplicaIdentity(relation, &oldtup, !satisfies_id, &old_key_copied); + old_key_tuple = ExtractReplicaIdentity(relation, &oldtup, + bms_overlap(modified_attrs, id_attrs), + &old_key_copied); /* NO EREPORT(ERROR) from here till changes are logged */ START_CRIT_SECTION(); @@ -4298,13 +4325,15 @@ l2: bms_free(hot_attrs); bms_free(key_attrs); bms_free(id_attrs); + bms_free(modified_attrs); + bms_free(interesting_attrs); return HeapTupleMayBeUpdated; } /* * Check if the specified attribute's value is same in both given tuples. - * Subroutine for HeapSatisfiesHOTandKeyUpdate. + * Subroutine for HeapDetermineModifiedColumns. */ static bool heap_tuple_attr_equals(TupleDesc tupdesc, int attrnum, @@ -4338,7 +4367,7 @@ heap_tuple_attr_equals(TupleDesc tupdesc, int attrnum, /* * Extract the corresponding values. XXX this is pretty inefficient if - * there are many indexed columns. Should HeapSatisfiesHOTandKeyUpdate do + * there are many indexed columns. Should HeapDetermineModifiedColumns do * a single heap_deform_tuple call on each tuple, instead? But that * doesn't work for system columns ... */ @@ -4383,114 +4412,30 @@ heap_tuple_attr_equals(TupleDesc tupdesc, int attrnum, /* * Check which columns are being updated. * - * This simultaneously checks conditions for HOT updates, for FOR KEY - * SHARE updates, and REPLICA IDENTITY concerns. Since much of the time they - * will be checking very similar sets of columns, and doing the same tests on - * them, it makes sense to optimize and do them together. - * - * We receive three bitmapsets comprising the three sets of columns we're - * interested in. Note these are destructively modified; that is OK since - * this is invoked at most once in heap_update. + * Given an updated tuple, determine (and return into the output bitmapset), + * from those listed as interesting, the set of columns that changed. * - * hot_result is set to TRUE if it's okay to do a HOT update (i.e. it does not - * modified indexed columns); key_result is set to TRUE if the update does not - * modify columns used in the key; id_result is set to TRUE if the update does - * not modify columns in any index marked as the REPLICA IDENTITY. + * The input bitmapset is destructively modified; that is OK since this is + * invoked at most once in heap_update. */ -static void -HeapSatisfiesHOTandKeyUpdate(Relation relation, Bitmapset *hot_attrs, - Bitmapset *key_attrs, Bitmapset *id_attrs, - bool *satisfies_hot, bool *satisfies_key, - bool *satisfies_id, +static Bitmapset * +HeapDetermineModifiedColumns(Relation relation, Bitmapset *interesting_cols, HeapTuple oldtup, HeapTuple newtup) { - int next_hot_attnum; - int next_key_attnum; - int next_id_attnum; - bool hot_result = true; - bool key_result = true; - bool id_result = true; - - /* If REPLICA IDENTITY is set to FULL, id_attrs will be empty. */ - Assert(bms_is_subset(id_attrs, key_attrs)); - Assert(bms_is_subset(key_attrs, hot_attrs)); - - /* - * If one of these sets contains no remaining bits, bms_first_member will - * return -1, and after adding FirstLowInvalidHeapAttributeNumber (which - * is negative!) we'll get an attribute number that can't possibly be - * real, and thus won't match any actual attribute number. - */ - next_hot_attnum = bms_first_member(hot_attrs); - next_hot_attnum += FirstLowInvalidHeapAttributeNumber; - next_key_attnum = bms_first_member(key_attrs); - next_key_attnum += FirstLowInvalidHeapAttributeNumber; - next_id_attnum = bms_first_member(id_attrs); - next_id_attnum += FirstLowInvalidHeapAttributeNumber; + int attnum; + Bitmapset *modified = NULL; - for (;;) + while ((attnum = bms_first_member(interesting_cols)) >= 0) { - bool changed; - int check_now; + attnum += FirstLowInvalidHeapAttributeNumber; - /* - * Since the HOT attributes are a superset of the key attributes and - * the key attributes are a superset of the id attributes, this logic - * is guaranteed to identify the next column that needs to be checked. - */ - if (hot_result && next_hot_attnum > FirstLowInvalidHeapAttributeNumber) - check_now = next_hot_attnum; - else if (key_result && next_key_attnum > FirstLowInvalidHeapAttributeNumber) - check_now = next_key_attnum; - else if (id_result && next_id_attnum > FirstLowInvalidHeapAttributeNumber) - check_now = next_id_attnum; - else - break; - - /* See whether it changed. */ - changed = !heap_tuple_attr_equals(RelationGetDescr(relation), - check_now, oldtup, newtup); - if (changed) - { - if (check_now == next_hot_attnum) - hot_result = false; - if (check_now == next_key_attnum) - key_result = false; - if (check_now == next_id_attnum) - id_result = false; - - /* if all are false now, we can stop checking */ - if (!hot_result && !key_result && !id_result) - break; - } - - /* - * Advance the next attribute numbers for the sets that contain the - * attribute we just checked. As we work our way through the columns, - * the next_attnum values will rise; but when each set becomes empty, - * bms_first_member() will return -1 and the attribute number will end - * up with a value less than FirstLowInvalidHeapAttributeNumber. - */ - if (hot_result && check_now == next_hot_attnum) - { - next_hot_attnum = bms_first_member(hot_attrs); - next_hot_attnum += FirstLowInvalidHeapAttributeNumber; - } - if (key_result && check_now == next_key_attnum) - { - next_key_attnum = bms_first_member(key_attrs); - next_key_attnum += FirstLowInvalidHeapAttributeNumber; - } - if (id_result && check_now == next_id_attnum) - { - next_id_attnum = bms_first_member(id_attrs); - next_id_attnum += FirstLowInvalidHeapAttributeNumber; - } + if (!heap_tuple_attr_equals(RelationGetDescr(relation), + attnum, oldtup, newtup)) + modified = bms_add_member(modified, + attnum - FirstLowInvalidHeapAttributeNumber); } - *satisfies_hot = hot_result; - *satisfies_key = key_result; - *satisfies_id = id_result; + return modified; } /*