*/
if ((tp.t_data->t_infomask & HEAP_XMAX_INVALID) ||
HeapTupleHeaderIsOnlyLocked(tp.t_data) ||
+ HeapTupleHeaderIndicatesMovedPartitions(tp.t_data) ||
ItemPointerEquals(&tp.t_self, &tp.t_data->t_ctid))
{
UnlockReleaseBuffer(buffer);
* crosscheck - if not InvalidSnapshot, also check tuple against this
* wait - true if should wait for any conflicting update to commit/abort
* hufd - output parameter, filled in failure cases (see below)
+ * changingPart - true iff the tuple is being moved to another partition
+ * table due to an update of the partition key. Otherwise, false.
*
* Normal, successful return value is HeapTupleMayBeUpdated, which
* actually means we did delete it. Failure return codes are
HTSU_Result
heap_delete(Relation relation, ItemPointer tid,
CommandId cid, Snapshot crosscheck, bool wait,
- HeapUpdateFailureData *hufd)
+ HeapUpdateFailureData *hufd, bool changingPart)
{
HTSU_Result result;
TransactionId xid = GetCurrentTransactionId();
/* Make sure there is no forward chain link in t_ctid */
tp.t_data->t_ctid = tp.t_self;
+ /* Signal that this is actually a move into another partition */
+ if (changingPart)
+ HeapTupleHeaderSetMovedPartitions(tp.t_data);
+
MarkBufferDirty(buffer);
/*
if (RelationIsAccessibleInLogicalDecoding(relation))
log_heap_new_cid(relation, &tp);
- xlrec.flags = all_visible_cleared ? XLH_DELETE_ALL_VISIBLE_CLEARED : 0;
+ xlrec.flags = 0;
+ if (all_visible_cleared)
+ xlrec.flags |= XLH_DELETE_ALL_VISIBLE_CLEARED;
+ if (changingPart)
+ xlrec.flags |= XLH_DELETE_IS_PARTITION_MOVE;
xlrec.infobits_set = compute_infobits(tp.t_data->t_infomask,
tp.t_data->t_infomask2);
xlrec.offnum = ItemPointerGetOffsetNumber(&tp.t_self);
result = heap_delete(relation, tid,
GetCurrentCommandId(true), InvalidSnapshot,
true /* wait for commit */ ,
- &hufd);
+ &hufd, false /* changingPart */);
switch (result)
{
case HeapTupleSelfUpdated:
next:
/* if we find the end of update chain, we're done. */
if (mytup.t_data->t_infomask & HEAP_XMAX_INVALID ||
+ HeapTupleHeaderIndicatesMovedPartitions(mytup.t_data) ||
ItemPointerEquals(&mytup.t_self, &mytup.t_data->t_ctid) ||
HeapTupleHeaderIsOnlyLocked(mytup.t_data))
{
heap_lock_updated_tuple(Relation rel, HeapTuple tuple, ItemPointer ctid,
TransactionId xid, LockTupleMode mode)
{
- if (!ItemPointerEquals(&tuple->t_self, ctid))
+ /*
+ * If the tuple has not been updated, or has moved into another partition
+ * (effectively a delete) stop here.
+ */
+ if (!HeapTupleHeaderIndicatesMovedPartitions(tuple->t_data) &&
+ !ItemPointerEquals(&tuple->t_self, ctid))
{
/*
* If this is the first possibly-multixact-able operation in the
if (xlrec->flags & XLH_DELETE_ALL_VISIBLE_CLEARED)
PageClearAllVisible(page);
- /* Make sure there is no forward chain link in t_ctid */
- htup->t_ctid = target_tid;
+ /* Make sure t_ctid is set correctly */
+ if (xlrec->flags & XLH_DELETE_IS_PARTITION_MOVE)
+ HeapTupleHeaderSetMovedPartitions(htup);
+ else
+ htup->t_ctid = target_tid;
PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
}
*/
if (HeapTupleHeaderIsSpeculative(page_htup))
ItemPointerSet(&page_htup->t_ctid, blkno, off);
+
+ /*
+ * NB: Not ignoring ctid changes due to the tuple having moved
+ * (i.e. HeapTupleHeaderIndicatesMovedPartitions), because that's
+ * important information that needs to be in-sync between primary
+ * and standby, and thus is WAL logged.
+ */
}
/*
if (!HeapTupleHeaderIsHotUpdated(htup))
break;
+ /* HOT implies it can't have moved to different partition */
+ Assert(!HeapTupleHeaderIndicatesMovedPartitions(htup));
+
/*
* Advance to next chain member.
*/
if (!HeapTupleHeaderIsHotUpdated(htup))
break;
+ /* HOT implies it can't have moved to different partition */
+ Assert(!HeapTupleHeaderIndicatesMovedPartitions(htup));
+
nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
priorXmax = HeapTupleHeaderGetUpdateXid(htup);
}
*/
if (!((old_tuple->t_data->t_infomask & HEAP_XMAX_INVALID) ||
HeapTupleHeaderIsOnlyLocked(old_tuple->t_data)) &&
+ !HeapTupleHeaderIndicatesMovedPartitions(old_tuple->t_data) &&
!(ItemPointerEquals(&(old_tuple->t_self),
&(old_tuple->t_data->t_ctid))))
{
ereport(ERROR,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("could not serialize access due to concurrent update")));
+ if (ItemPointerIndicatesMovedPartitions(&hufd.ctid))
+ ereport(ERROR,
+ (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+ errmsg("tuple to be locked was already moved to another partition due to concurrent update")));
+
if (!ItemPointerEquals(&hufd.ctid, &tuple.t_self))
{
/* it was updated, so look at the updated version */
ereport(ERROR,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("could not serialize access due to concurrent update")));
+ if (ItemPointerIndicatesMovedPartitions(&hufd.ctid))
+ ereport(ERROR,
+ (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+ errmsg("tuple to be locked was already moved to another partition due to concurrent update")));
/* Should not encounter speculative tuple on recheck */
Assert(!HeapTupleHeaderIsSpeculative(tuple.t_data));
* As above, it should be safe to examine xmax and t_ctid without the
* buffer content lock, because they can't be changing.
*/
+
+ /* check whether next version would be in a different partition */
+ if (HeapTupleHeaderIndicatesMovedPartitions(tuple.t_data))
+ ereport(ERROR,
+ (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+ errmsg("tuple to be locked was already moved to another partition due to concurrent update")));
+
+ /* check whether tuple has been deleted */
if (ItemPointerEquals(&tuple.t_self, &tuple.t_data->t_ctid))
{
/* deleted, so forget about it */
slot = ExecDelete(mtstate, tupleid, NULL,
slot, epqstate, estate,
&tuple_deleted, false, &hufd, action,
- mtstate->canSetTag);
+ mtstate->canSetTag,
+ false /* changingPart */);
break;
break;
case HeapTupleUpdated:
/* XXX: Improve handling here */
- ereport(LOG,
- (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
- errmsg("concurrent update, retrying")));
+ if (ItemPointerIndicatesMovedPartitions(&hufd.ctid))
+ ereport(LOG,
+ (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+ errmsg("tuple to be locked was already moved to another partition due to concurrent update, retrying")));
+ else
+ ereport(LOG,
+ (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+ errmsg("concurrent update, retrying")));
goto retry;
case HeapTupleInvisible:
elog(ERROR, "attempted to lock invisible tuple");
break;
case HeapTupleUpdated:
/* XXX: Improve handling here */
- ereport(LOG,
- (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
- errmsg("concurrent update, retrying")));
+ if (ItemPointerIndicatesMovedPartitions(&hufd.ctid))
+ ereport(LOG,
+ (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+ errmsg("tuple to be locked was already moved to another partition due to concurrent update, retrying")));
+ else
+ ereport(LOG,
+ (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+ errmsg("concurrent update, retrying")));
goto retry;
case HeapTupleInvisible:
elog(ERROR, "attempted to lock invisible tuple");
ereport(ERROR,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("could not serialize access due to concurrent update")));
+ if (ItemPointerIndicatesMovedPartitions(&hufd.ctid))
+ ereport(ERROR,
+ (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+ errmsg("tuple to be locked was already moved to another partition due to concurrent update")));
+
if (ItemPointerEquals(&hufd.ctid, &tuple.t_self))
{
/* Tuple was deleted, so don't return it */
bool processReturning,
HeapUpdateFailureData *hufdp,
MergeActionState *actionState,
- bool canSetTag)
+ bool canSetTag,
+ bool changingPart)
{
ResultRelInfo *resultRelInfo;
Relation resultRelationDesc;
estate->es_output_cid,
estate->es_crosscheck_snapshot,
true /* wait for commit */ ,
- &hufd);
+ &hufd,
+ changingPart);
/*
* Copy the necessary information, if the caller has asked for it. We
ereport(ERROR,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("could not serialize access due to concurrent update")));
+ if (ItemPointerIndicatesMovedPartitions(&hufd.ctid))
+ ereport(ERROR,
+ (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+ errmsg("tuple to be deleted was already moved to another partition due to concurrent update")));
if (!ItemPointerEquals(tupleid, &hufd.ctid))
{
*/
ExecDelete(mtstate, tupleid, oldtuple, planSlot, epqstate,
estate, &tuple_deleted, false, hufdp, NULL,
- false);
+ false /* canSetTag */, true /* changingPart */);
/*
* For some reason if DELETE didn't happen (e.g. trigger prevented
ereport(ERROR,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("could not serialize access due to concurrent update")));
+ if (ItemPointerIndicatesMovedPartitions(&hufd.ctid))
+ ereport(ERROR,
+ (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+ errmsg("tuple to be updated was already moved to another partition due to concurrent update")));
if (!ItemPointerEquals(tupleid, &hufd.ctid))
{
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("could not serialize access due to concurrent update")));
+ /*
+ * As long as we don't support an UPDATE of INSERT ON CONFLICT for
+ * a partitioned table we shouldn't reach to a case where tuple to
+ * be lock is moved to another partition due to concurrent update
+ * of the partition key.
+ */
+ Assert(!ItemPointerIndicatesMovedPartitions(&hufd.ctid));
+
/*
* Tell caller to try again from the very start.
*
case CMD_DELETE:
slot = ExecDelete(node, tupleid, oldtuple, planSlot,
&node->mt_epqstate, estate,
- NULL, true, NULL, NULL, node->canSetTag);
+ NULL, true, NULL, NULL, node->canSetTag,
+ false /* changingPart */);
break;
default:
elog(ERROR, "unknown operation");
CommandId cid, int options, BulkInsertState bistate);
extern HTSU_Result heap_delete(Relation relation, ItemPointer tid,
CommandId cid, Snapshot crosscheck, bool wait,
- HeapUpdateFailureData *hufd);
+ HeapUpdateFailureData *hufd, bool changingPart);
extern void heap_finish_speculative(Relation relation, HeapTuple tuple);
extern void heap_abort_speculative(Relation relation, HeapTuple tuple);
extern HTSU_Result heap_update(Relation relation, ItemPointer otid,
#define XLH_DELETE_CONTAINS_OLD_TUPLE (1<<1)
#define XLH_DELETE_CONTAINS_OLD_KEY (1<<2)
#define XLH_DELETE_IS_SUPER (1<<3)
+#define XLH_DELETE_IS_PARTITION_MOVE (1<<4)
/* convenience macro for checking whether any form of old tuple was logged */
#define XLH_DELETE_CONTAINS_OLD \
*
* A word about t_ctid: whenever a new tuple is stored on disk, its t_ctid
* is initialized with its own TID (location). If the tuple is ever updated,
- * its t_ctid is changed to point to the replacement version of the tuple.
- * Thus, a tuple is the latest version of its row iff XMAX is invalid or
+ * its t_ctid is changed to point to the replacement version of the tuple or
+ * the block number (ip_blkid) is invalidated if the tuple is moved from one
+ * partition to another partition relation due to an update of the partition
+ * key. Thus, a tuple is the latest version of its row iff XMAX is invalid or
* t_ctid points to itself (in which case, if XMAX is valid, the tuple is
* either locked or deleted). One can follow the chain of t_ctid links
* to find the newest version of the row. Beware however that VACUUM might
ItemPointerSet(&(tup)->t_ctid, token, SpecTokenOffsetNumber) \
)
+#define HeapTupleHeaderSetMovedPartitions(tup) \
+ ItemPointerSetMovedPartitions(&(tup)->t_ctid)
+
+#define HeapTupleHeaderIndicatesMovedPartitions(tup) \
+ ItemPointerIndicatesMovedPartitions(&tup->t_ctid)
+
#define HeapTupleHeaderGetDatumLength(tup) \
VARSIZE(tup)
ItemPointer tupleid, HeapTuple oldtuple, TupleTableSlot *planSlot,
EPQState *epqstate, EState *estate, bool *tupleDeleted,
bool processReturning, HeapUpdateFailureData *hufdp,
- MergeActionState *actionState, bool canSetTag);
+ MergeActionState *actionState, bool canSetTag,
+ bool changingPart);
extern TupleTableSlot *ExecUpdate(ModifyTableState *mtstate,
ItemPointer tupleid, HeapTuple oldtuple, TupleTableSlot *slot,
TupleTableSlot *planSlot, EPQState *epqstate, EState *estate,
(pointer)->ip_posid = InvalidOffsetNumber \
)
+/*
+ * ItemPointerIndicatesMovedPartitions
+ * True iff the block number indicates the tuple has moved to another
+ * partition.
+ */
+#define ItemPointerIndicatesMovedPartitions(pointer) \
+ !BlockNumberIsValid(ItemPointerGetBlockNumberNoCheck(pointer))
+
+/*
+ * ItemPointerSetMovedPartitions
+ * Indicate that the item referenced by the itempointer has moved into a
+ * different partition.
+ */
+#define ItemPointerSetMovedPartitions(pointer) \
+ ItemPointerSetBlockNumber((pointer), InvalidBlockNumber)
+
/* ----------------
* externs
* ----------------
<waiting ...>
step c1: COMMIT;
step pa_merge2a: <... completed>
+error in steps c1 pa_merge2a: ERROR: tuple to be deleted was already moved to another partition due to concurrent update
+step pa_select2: SELECT * FROM pa_target;
+ERROR: current transaction is aborted, commands ignored until end of transaction block
+step c2: COMMIT;
+
+starting permutation: pa_merge2 c1 pa_merge2a pa_select2 c2
+step pa_merge2:
+ MERGE INTO pa_target t
+ USING (SELECT 1 as key, 'pa_merge1' as val) s
+ ON s.key = t.key
+ WHEN NOT MATCHED THEN
+ INSERT VALUES (s.key, s.val)
+ WHEN MATCHED THEN
+ UPDATE set key = t.key + 1, val = t.val || ' updated by ' || s.val;
+
+step c1: COMMIT;
+step pa_merge2a:
+ MERGE INTO pa_target t
+ USING (SELECT 1 as key, 'pa_merge2a' as val) s
+ ON s.key = t.key
+ WHEN NOT MATCHED THEN
+ INSERT VALUES (s.key, s.val)
+ WHEN MATCHED THEN
+ UPDATE set key = t.key + 1, val = t.val || ' updated by ' || s.val;
+
step pa_select2: SELECT * FROM pa_target;
key val
--- /dev/null
+Parsed test spec with 2 sessions
+
+starting permutation: s1b s2b s1u s1c s2d s2c
+step s1b: BEGIN ISOLATION LEVEL READ COMMITTED;
+step s2b: BEGIN ISOLATION LEVEL READ COMMITTED;
+step s1u: UPDATE foo SET a=2 WHERE a=1;
+step s1c: COMMIT;
+step s2d: DELETE FROM foo WHERE a=1;
+step s2c: COMMIT;
+
+starting permutation: s1b s2b s1u s2d s1c s2c
+step s1b: BEGIN ISOLATION LEVEL READ COMMITTED;
+step s2b: BEGIN ISOLATION LEVEL READ COMMITTED;
+step s1u: UPDATE foo SET a=2 WHERE a=1;
+step s2d: DELETE FROM foo WHERE a=1; <waiting ...>
+step s1c: COMMIT;
+step s2d: <... completed>
+error in steps s1c s2d: ERROR: tuple to be deleted was already moved to another partition due to concurrent update
+step s2c: COMMIT;
+
+starting permutation: s1b s2b s2d s1u s2c s1c
+step s1b: BEGIN ISOLATION LEVEL READ COMMITTED;
+step s2b: BEGIN ISOLATION LEVEL READ COMMITTED;
+step s2d: DELETE FROM foo WHERE a=1;
+step s1u: UPDATE foo SET a=2 WHERE a=1; <waiting ...>
+step s2c: COMMIT;
+step s1u: <... completed>
+step s1c: COMMIT;
+
+starting permutation: s1b s2b s1u2 s1c s2u2 s2c
+step s1b: BEGIN ISOLATION LEVEL READ COMMITTED;
+step s2b: BEGIN ISOLATION LEVEL READ COMMITTED;
+step s1u2: UPDATE footrg SET b='EFG' WHERE a=1;
+step s1c: COMMIT;
+step s2u2: UPDATE footrg SET b='XYZ' WHERE a=1;
+step s2c: COMMIT;
+
+starting permutation: s1b s2b s1u2 s2u2 s1c s2c
+step s1b: BEGIN ISOLATION LEVEL READ COMMITTED;
+step s2b: BEGIN ISOLATION LEVEL READ COMMITTED;
+step s1u2: UPDATE footrg SET b='EFG' WHERE a=1;
+step s2u2: UPDATE footrg SET b='XYZ' WHERE a=1; <waiting ...>
+step s1c: COMMIT;
+step s2u2: <... completed>
+error in steps s1c s2u2: ERROR: tuple to be locked was already moved to another partition due to concurrent update
+step s2c: COMMIT;
+
+starting permutation: s1b s2b s2u2 s1u2 s2c s1c
+step s1b: BEGIN ISOLATION LEVEL READ COMMITTED;
+step s2b: BEGIN ISOLATION LEVEL READ COMMITTED;
+step s2u2: UPDATE footrg SET b='XYZ' WHERE a=1;
+step s1u2: UPDATE footrg SET b='EFG' WHERE a=1; <waiting ...>
+step s2c: COMMIT;
+step s1u2: <... completed>
+error in steps s2c s1u2: ERROR: tuple to be locked was already moved to another partition due to concurrent update
+step s1c: COMMIT;
+
+starting permutation: s1b s2b s1u3pc s2i s1c s2c
+step s1b: BEGIN ISOLATION LEVEL READ COMMITTED;
+step s2b: BEGIN ISOLATION LEVEL READ COMMITTED;
+step s1u3pc: UPDATE foo_range_parted SET a=11 WHERE a=7;
+step s2i: INSERT INTO bar VALUES(7); <waiting ...>
+step s1c: COMMIT;
+step s2i: <... completed>
+error in steps s1c s2i: ERROR: tuple to be locked was already moved to another partition due to concurrent update
+step s2c: COMMIT;
+
+starting permutation: s1b s2b s1u3pc s2i s1r s2c
+step s1b: BEGIN ISOLATION LEVEL READ COMMITTED;
+step s2b: BEGIN ISOLATION LEVEL READ COMMITTED;
+step s1u3pc: UPDATE foo_range_parted SET a=11 WHERE a=7;
+step s2i: INSERT INTO bar VALUES(7); <waiting ...>
+step s1r: ROLLBACK;
+step s2i: <... completed>
+step s2c: COMMIT;
+
+starting permutation: s1b s2b s1u3npc s1u3pc s2i s1c s2c
+step s1b: BEGIN ISOLATION LEVEL READ COMMITTED;
+step s2b: BEGIN ISOLATION LEVEL READ COMMITTED;
+step s1u3npc: UPDATE foo_range_parted SET b='XYZ' WHERE a=7;
+step s1u3pc: UPDATE foo_range_parted SET a=11 WHERE a=7;
+step s2i: INSERT INTO bar VALUES(7); <waiting ...>
+step s1c: COMMIT;
+step s2i: <... completed>
+error in steps s1c s2i: ERROR: tuple to be locked was already moved to another partition due to concurrent update
+step s2c: COMMIT;
+
+starting permutation: s1b s2b s1u3npc s1u3pc s2i s1r s2c
+step s1b: BEGIN ISOLATION LEVEL READ COMMITTED;
+step s2b: BEGIN ISOLATION LEVEL READ COMMITTED;
+step s1u3npc: UPDATE foo_range_parted SET b='XYZ' WHERE a=7;
+step s1u3pc: UPDATE foo_range_parted SET a=11 WHERE a=7;
+step s2i: INSERT INTO bar VALUES(7); <waiting ...>
+step s1r: ROLLBACK;
+step s2i: <... completed>
+step s2c: COMMIT;
+
+starting permutation: s1b s2b s1u3npc s1u3pc s1u3pc s2i s1c s2c
+step s1b: BEGIN ISOLATION LEVEL READ COMMITTED;
+step s2b: BEGIN ISOLATION LEVEL READ COMMITTED;
+step s1u3npc: UPDATE foo_range_parted SET b='XYZ' WHERE a=7;
+step s1u3pc: UPDATE foo_range_parted SET a=11 WHERE a=7;
+step s1u3pc: UPDATE foo_range_parted SET a=11 WHERE a=7;
+step s2i: INSERT INTO bar VALUES(7); <waiting ...>
+step s1c: COMMIT;
+step s2i: <... completed>
+error in steps s1c s2i: ERROR: tuple to be locked was already moved to another partition due to concurrent update
+step s2c: COMMIT;
+
+starting permutation: s1b s2b s1u3npc s1u3pc s1u3pc s2i s1r s2c
+step s1b: BEGIN ISOLATION LEVEL READ COMMITTED;
+step s2b: BEGIN ISOLATION LEVEL READ COMMITTED;
+step s1u3npc: UPDATE foo_range_parted SET b='XYZ' WHERE a=7;
+step s1u3pc: UPDATE foo_range_parted SET a=11 WHERE a=7;
+step s1u3pc: UPDATE foo_range_parted SET a=11 WHERE a=7;
+step s2i: INSERT INTO bar VALUES(7); <waiting ...>
+step s1r: ROLLBACK;
+step s2i: <... completed>
+step s2c: COMMIT;
--- /dev/null
+Parsed test spec with 3 sessions
+
+starting permutation: s1u s2donothing s3donothing s1c s2c s3select s3c
+step s1u: UPDATE foo SET a=2, b=b || ' -> moved by session-1' WHERE a=1;
+step s2donothing: INSERT INTO foo VALUES(1, 'session-2 donothing') ON CONFLICT DO NOTHING; <waiting ...>
+step s3donothing: INSERT INTO foo VALUES(2, 'session-3 donothing') ON CONFLICT DO NOTHING; <waiting ...>
+step s1c: COMMIT;
+step s2donothing: <... completed>
+step s3donothing: <... completed>
+step s2c: COMMIT;
+step s3select: SELECT * FROM foo ORDER BY a;
+a b
+
+1 session-2 donothing
+2 initial tuple -> moved by session-1
+step s3c: COMMIT;
+
+starting permutation: s2donothing s1u s3donothing s1c s2c s3select s3c
+step s2donothing: INSERT INTO foo VALUES(1, 'session-2 donothing') ON CONFLICT DO NOTHING;
+step s1u: UPDATE foo SET a=2, b=b || ' -> moved by session-1' WHERE a=1;
+step s3donothing: INSERT INTO foo VALUES(2, 'session-3 donothing') ON CONFLICT DO NOTHING; <waiting ...>
+step s1c: COMMIT;
+step s3donothing: <... completed>
+step s2c: COMMIT;
+step s3select: SELECT * FROM foo ORDER BY a;
+a b
+
+2 initial tuple -> moved by session-1
+step s3c: COMMIT;
--- /dev/null
+Parsed test spec with 3 sessions
+
+starting permutation: s2beginrr s3beginrr s1u s2donothing s1c s2c s3donothing s3c s2select
+step s2beginrr: BEGIN ISOLATION LEVEL REPEATABLE READ;
+step s3beginrr: BEGIN ISOLATION LEVEL REPEATABLE READ;
+step s1u: UPDATE foo SET a=2, b=b || ' -> moved by session-1' WHERE a=1;
+step s2donothing: INSERT INTO foo VALUES(1, 'session-2 donothing') ON CONFLICT DO NOTHING; <waiting ...>
+step s1c: COMMIT;
+step s2donothing: <... completed>
+step s2c: COMMIT;
+step s3donothing: INSERT INTO foo VALUES(2, 'session-3 donothing'), (2, 'session-3 donothing2') ON CONFLICT DO NOTHING;
+step s3c: COMMIT;
+step s2select: SELECT * FROM foo ORDER BY a;
+a b
+
+1 session-2 donothing
+2 initial tuple -> moved by session-1
+
+starting permutation: s2beginrr s3beginrr s1u s3donothing s1c s3c s2donothing s2c s2select
+step s2beginrr: BEGIN ISOLATION LEVEL REPEATABLE READ;
+step s3beginrr: BEGIN ISOLATION LEVEL REPEATABLE READ;
+step s1u: UPDATE foo SET a=2, b=b || ' -> moved by session-1' WHERE a=1;
+step s3donothing: INSERT INTO foo VALUES(2, 'session-3 donothing'), (2, 'session-3 donothing2') ON CONFLICT DO NOTHING; <waiting ...>
+step s1c: COMMIT;
+step s3donothing: <... completed>
+error in steps s1c s3donothing: ERROR: could not serialize access due to concurrent update
+step s3c: COMMIT;
+step s2donothing: INSERT INTO foo VALUES(1, 'session-2 donothing') ON CONFLICT DO NOTHING;
+step s2c: COMMIT;
+step s2select: SELECT * FROM foo ORDER BY a;
+a b
+
+1 session-2 donothing
+2 initial tuple -> moved by session-1
+
+starting permutation: s2beginrr s3beginrr s1u s2donothing s3donothing s1c s2c s3c s2select
+step s2beginrr: BEGIN ISOLATION LEVEL REPEATABLE READ;
+step s3beginrr: BEGIN ISOLATION LEVEL REPEATABLE READ;
+step s1u: UPDATE foo SET a=2, b=b || ' -> moved by session-1' WHERE a=1;
+step s2donothing: INSERT INTO foo VALUES(1, 'session-2 donothing') ON CONFLICT DO NOTHING; <waiting ...>
+step s3donothing: INSERT INTO foo VALUES(2, 'session-3 donothing'), (2, 'session-3 donothing2') ON CONFLICT DO NOTHING; <waiting ...>
+step s1c: COMMIT;
+step s2donothing: <... completed>
+step s3donothing: <... completed>
+error in steps s1c s2donothing s3donothing: ERROR: could not serialize access due to concurrent update
+step s2c: COMMIT;
+step s3c: COMMIT;
+step s2select: SELECT * FROM foo ORDER BY a;
+a b
+
+1 session-2 donothing
+2 initial tuple -> moved by session-1
+
+starting permutation: s2beginrr s3beginrr s1u s3donothing s2donothing s1c s3c s2c s2select
+step s2beginrr: BEGIN ISOLATION LEVEL REPEATABLE READ;
+step s3beginrr: BEGIN ISOLATION LEVEL REPEATABLE READ;
+step s1u: UPDATE foo SET a=2, b=b || ' -> moved by session-1' WHERE a=1;
+step s3donothing: INSERT INTO foo VALUES(2, 'session-3 donothing'), (2, 'session-3 donothing2') ON CONFLICT DO NOTHING; <waiting ...>
+step s2donothing: INSERT INTO foo VALUES(1, 'session-2 donothing') ON CONFLICT DO NOTHING; <waiting ...>
+step s1c: COMMIT;
+step s3donothing: <... completed>
+step s2donothing: <... completed>
+error in steps s1c s3donothing s2donothing: ERROR: could not serialize access due to concurrent update
+step s3c: COMMIT;
+step s2c: COMMIT;
+step s2select: SELECT * FROM foo ORDER BY a;
+a b
+
+1 session-2 donothing
+2 initial tuple -> moved by session-1
+
+starting permutation: s2begins s3begins s1u s2donothing s1c s2c s3donothing s3c s2select
+step s2begins: BEGIN ISOLATION LEVEL SERIALIZABLE;
+step s3begins: BEGIN ISOLATION LEVEL SERIALIZABLE;
+step s1u: UPDATE foo SET a=2, b=b || ' -> moved by session-1' WHERE a=1;
+step s2donothing: INSERT INTO foo VALUES(1, 'session-2 donothing') ON CONFLICT DO NOTHING; <waiting ...>
+step s1c: COMMIT;
+step s2donothing: <... completed>
+step s2c: COMMIT;
+step s3donothing: INSERT INTO foo VALUES(2, 'session-3 donothing'), (2, 'session-3 donothing2') ON CONFLICT DO NOTHING;
+step s3c: COMMIT;
+step s2select: SELECT * FROM foo ORDER BY a;
+a b
+
+1 session-2 donothing
+2 initial tuple -> moved by session-1
+
+starting permutation: s2begins s3begins s1u s3donothing s1c s3c s2donothing s2c s2select
+step s2begins: BEGIN ISOLATION LEVEL SERIALIZABLE;
+step s3begins: BEGIN ISOLATION LEVEL SERIALIZABLE;
+step s1u: UPDATE foo SET a=2, b=b || ' -> moved by session-1' WHERE a=1;
+step s3donothing: INSERT INTO foo VALUES(2, 'session-3 donothing'), (2, 'session-3 donothing2') ON CONFLICT DO NOTHING; <waiting ...>
+step s1c: COMMIT;
+step s3donothing: <... completed>
+error in steps s1c s3donothing: ERROR: could not serialize access due to concurrent update
+step s3c: COMMIT;
+step s2donothing: INSERT INTO foo VALUES(1, 'session-2 donothing') ON CONFLICT DO NOTHING;
+step s2c: COMMIT;
+step s2select: SELECT * FROM foo ORDER BY a;
+a b
+
+1 session-2 donothing
+2 initial tuple -> moved by session-1
+
+starting permutation: s2begins s3begins s1u s2donothing s3donothing s1c s2c s3c s2select
+step s2begins: BEGIN ISOLATION LEVEL SERIALIZABLE;
+step s3begins: BEGIN ISOLATION LEVEL SERIALIZABLE;
+step s1u: UPDATE foo SET a=2, b=b || ' -> moved by session-1' WHERE a=1;
+step s2donothing: INSERT INTO foo VALUES(1, 'session-2 donothing') ON CONFLICT DO NOTHING; <waiting ...>
+step s3donothing: INSERT INTO foo VALUES(2, 'session-3 donothing'), (2, 'session-3 donothing2') ON CONFLICT DO NOTHING; <waiting ...>
+step s1c: COMMIT;
+step s2donothing: <... completed>
+step s3donothing: <... completed>
+error in steps s1c s2donothing s3donothing: ERROR: could not serialize access due to concurrent update
+step s2c: COMMIT;
+step s3c: COMMIT;
+step s2select: SELECT * FROM foo ORDER BY a;
+a b
+
+1 session-2 donothing
+2 initial tuple -> moved by session-1
+
+starting permutation: s2begins s3begins s1u s3donothing s2donothing s1c s3c s2c s2select
+step s2begins: BEGIN ISOLATION LEVEL SERIALIZABLE;
+step s3begins: BEGIN ISOLATION LEVEL SERIALIZABLE;
+step s1u: UPDATE foo SET a=2, b=b || ' -> moved by session-1' WHERE a=1;
+step s3donothing: INSERT INTO foo VALUES(2, 'session-3 donothing'), (2, 'session-3 donothing2') ON CONFLICT DO NOTHING; <waiting ...>
+step s2donothing: INSERT INTO foo VALUES(1, 'session-2 donothing') ON CONFLICT DO NOTHING; <waiting ...>
+step s1c: COMMIT;
+step s3donothing: <... completed>
+step s2donothing: <... completed>
+error in steps s1c s3donothing s2donothing: ERROR: could not serialize access due to concurrent update
+step s3c: COMMIT;
+step s2c: COMMIT;
+step s2select: SELECT * FROM foo ORDER BY a;
+a b
+
+1 session-2 donothing
+2 initial tuple -> moved by session-1
test: predicate-hash
test: predicate-gist
test: predicate-gin
+test: partition-key-update-1
+test: partition-key-update-2
+test: partition-key-update-3
# The checksum_enable suite will enable checksums for the cluster so should
# not run before anything expecting the cluster to have checksums turned off
# test: checksum_cancel
permutation "merge1" "merge2b" "c1" "select2" "c2"
permutation "merge1" "merge2c" "c1" "select2" "c2"
permutation "pa_merge1" "pa_merge2a" "c1" "pa_select2" "c2"
-permutation "pa_merge2" "pa_merge2a" "c1" "pa_select2" "c2"
+permutation "pa_merge2" "pa_merge2a" "c1" "pa_select2" "c2" # fails
+permutation "pa_merge2" "c1" "pa_merge2a" "pa_select2" "c2" # succeeds
--- /dev/null
+# Test that an error if thrown if the target row has been moved to a
+# different partition by a concurrent session.
+
+setup
+{
+ --
+ -- Setup to test an error from ExecUpdate and ExecDelete.
+ --
+ CREATE TABLE foo (a int, b text) PARTITION BY LIST(a);
+ CREATE TABLE foo1 PARTITION OF foo FOR VALUES IN (1);
+ CREATE TABLE foo2 PARTITION OF foo FOR VALUES IN (2);
+ INSERT INTO foo VALUES (1, 'ABC');
+
+ --
+ -- Setup to test an error from GetTupleForTrigger
+ --
+ CREATE TABLE footrg (a int, b text) PARTITION BY LIST(a);
+ CREATE TABLE footrg1 PARTITION OF footrg FOR VALUES IN (1);
+ CREATE TABLE footrg2 PARTITION OF footrg FOR VALUES IN (2);
+ INSERT INTO footrg VALUES (1, 'ABC');
+ CREATE FUNCTION func_footrg_mod_a() RETURNS TRIGGER AS $$
+ BEGIN
+ NEW.a = 2; -- This is changing partition key column.
+ RETURN NEW;
+ END $$ LANGUAGE PLPGSQL;
+ CREATE TRIGGER footrg_mod_a BEFORE UPDATE ON footrg1
+ FOR EACH ROW EXECUTE PROCEDURE func_footrg_mod_a();
+
+ --
+ -- Setup to test an error from ExecLockRows
+ --
+ CREATE TABLE foo_range_parted (a int, b text) PARTITION BY RANGE(a);
+ CREATE TABLE foo_range_parted1 PARTITION OF foo_range_parted FOR VALUES FROM (1) TO (10);
+ CREATE TABLE foo_range_parted2 PARTITION OF foo_range_parted FOR VALUES FROM (10) TO (20);
+ INSERT INTO foo_range_parted VALUES(7, 'ABC');
+ CREATE UNIQUE INDEX foo_range_parted1_a_unique ON foo_range_parted1 (a);
+ CREATE TABLE bar (a int REFERENCES foo_range_parted1(a));
+}
+
+teardown
+{
+ DROP TABLE foo;
+ DROP TRIGGER footrg_mod_a ON footrg1;
+ DROP FUNCTION func_footrg_mod_a();
+ DROP TABLE footrg;
+ DROP TABLE bar, foo_range_parted;
+}
+
+session "s1"
+step "s1b" { BEGIN ISOLATION LEVEL READ COMMITTED; }
+step "s1u" { UPDATE foo SET a=2 WHERE a=1; }
+step "s1u2" { UPDATE footrg SET b='EFG' WHERE a=1; }
+step "s1u3pc" { UPDATE foo_range_parted SET a=11 WHERE a=7; }
+step "s1u3npc" { UPDATE foo_range_parted SET b='XYZ' WHERE a=7; }
+step "s1c" { COMMIT; }
+step "s1r" { ROLLBACK; }
+
+session "s2"
+step "s2b" { BEGIN ISOLATION LEVEL READ COMMITTED; }
+step "s2u" { UPDATE foo SET b='EFG' WHERE a=1; }
+step "s2u2" { UPDATE footrg SET b='XYZ' WHERE a=1; }
+step "s2i" { INSERT INTO bar VALUES(7); }
+step "s2d" { DELETE FROM foo WHERE a=1; }
+step "s2c" { COMMIT; }
+
+# Concurrency error from ExecUpdate and ExecDelete.
+permutation "s1b" "s2b" "s1u" "s1c" "s2d" "s2c"
+permutation "s1b" "s2b" "s1u" "s2d" "s1c" "s2c"
+permutation "s1b" "s2b" "s2d" "s1u" "s2c" "s1c"
+
+# Concurrency error from GetTupleForTrigger
+permutation "s1b" "s2b" "s1u2" "s1c" "s2u2" "s2c"
+permutation "s1b" "s2b" "s1u2" "s2u2" "s1c" "s2c"
+permutation "s1b" "s2b" "s2u2" "s1u2" "s2c" "s1c"
+
+# Concurrency error from ExecLockRows
+# test waiting for moved row itself
+permutation "s1b" "s2b" "s1u3pc" "s2i" "s1c" "s2c"
+permutation "s1b" "s2b" "s1u3pc" "s2i" "s1r" "s2c"
+# test waiting for in-partition update, followed by cross-partition move
+permutation "s1b" "s2b" "s1u3npc" "s1u3pc" "s2i" "s1c" "s2c"
+permutation "s1b" "s2b" "s1u3npc" "s1u3pc" "s2i" "s1r" "s2c"
+# test waiting for in-partition update, followed by cross-partition move
+permutation "s1b" "s2b" "s1u3npc" "s1u3pc" "s1u3pc" "s2i" "s1c" "s2c"
+permutation "s1b" "s2b" "s1u3npc" "s1u3pc" "s1u3pc" "s2i" "s1r" "s2c"
--- /dev/null
+# Concurrent update of a partition key and INSERT...ON CONFLICT DO NOTHING test
+#
+# This test tries to expose problems with the interaction between concurrent
+# sessions during an update of the partition key and INSERT...ON CONFLICT DO
+# NOTHING on a partitioned table.
+#
+# The convention here is that session 1 moves row from one partition to
+# another due update of the partition key and session 2 always ends up
+# inserting, and session 3 always ends up doing nothing.
+#
+# Note: This test is slightly resemble to insert-conflict-do-nothing test.
+
+setup
+{
+ CREATE TABLE foo (a int primary key, b text) PARTITION BY LIST(a);
+ CREATE TABLE foo1 PARTITION OF foo FOR VALUES IN (1);
+ CREATE TABLE foo2 PARTITION OF foo FOR VALUES IN (2);
+ INSERT INTO foo VALUES (1, 'initial tuple');
+}
+
+teardown
+{
+ DROP TABLE foo;
+}
+
+session "s1"
+setup { BEGIN ISOLATION LEVEL READ COMMITTED; }
+step "s1u" { UPDATE foo SET a=2, b=b || ' -> moved by session-1' WHERE a=1; }
+step "s1c" { COMMIT; }
+
+session "s2"
+setup { BEGIN ISOLATION LEVEL READ COMMITTED; }
+step "s2donothing" { INSERT INTO foo VALUES(1, 'session-2 donothing') ON CONFLICT DO NOTHING; }
+step "s2c" { COMMIT; }
+
+session "s3"
+setup { BEGIN ISOLATION LEVEL READ COMMITTED; }
+step "s3donothing" { INSERT INTO foo VALUES(2, 'session-3 donothing') ON CONFLICT DO NOTHING; }
+step "s3select" { SELECT * FROM foo ORDER BY a; }
+step "s3c" { COMMIT; }
+
+# Regular case where one session block-waits on another to determine if it
+# should proceed with an insert or do nothing.
+permutation "s1u" "s2donothing" "s3donothing" "s1c" "s2c" "s3select" "s3c"
+permutation "s2donothing" "s1u" "s3donothing" "s1c" "s2c" "s3select" "s3c"
--- /dev/null
+# Concurrent update of a partition key and INSERT...ON CONFLICT DO NOTHING
+# test on partitioned table with multiple rows in higher isolation levels.
+#
+# Note: This test is resemble to insert-conflict-do-nothing-2 test
+
+setup
+{
+ CREATE TABLE foo (a int primary key, b text) PARTITION BY LIST(a);
+ CREATE TABLE foo1 PARTITION OF foo FOR VALUES IN (1);
+ CREATE TABLE foo2 PARTITION OF foo FOR VALUES IN (2);
+ INSERT INTO foo VALUES (1, 'initial tuple');
+}
+
+teardown
+{
+ DROP TABLE foo;
+}
+
+session "s1"
+setup { BEGIN ISOLATION LEVEL READ COMMITTED; }
+step "s1u" { UPDATE foo SET a=2, b=b || ' -> moved by session-1' WHERE a=1; }
+step "s1c" { COMMIT; }
+
+session "s2"
+step "s2beginrr" { BEGIN ISOLATION LEVEL REPEATABLE READ; }
+step "s2begins" { BEGIN ISOLATION LEVEL SERIALIZABLE; }
+step "s2donothing" { INSERT INTO foo VALUES(1, 'session-2 donothing') ON CONFLICT DO NOTHING; }
+step "s2c" { COMMIT; }
+step "s2select" { SELECT * FROM foo ORDER BY a; }
+
+session "s3"
+step "s3beginrr" { BEGIN ISOLATION LEVEL REPEATABLE READ; }
+step "s3begins" { BEGIN ISOLATION LEVEL SERIALIZABLE; }
+step "s3donothing" { INSERT INTO foo VALUES(2, 'session-3 donothing'), (2, 'session-3 donothing2') ON CONFLICT DO NOTHING; }
+step "s3c" { COMMIT; }
+
+permutation "s2beginrr" "s3beginrr" "s1u" "s2donothing" "s1c" "s2c" "s3donothing" "s3c" "s2select"
+permutation "s2beginrr" "s3beginrr" "s1u" "s3donothing" "s1c" "s3c" "s2donothing" "s2c" "s2select"
+permutation "s2beginrr" "s3beginrr" "s1u" "s2donothing" "s3donothing" "s1c" "s2c" "s3c" "s2select"
+permutation "s2beginrr" "s3beginrr" "s1u" "s3donothing" "s2donothing" "s1c" "s3c" "s2c" "s2select"
+permutation "s2begins" "s3begins" "s1u" "s2donothing" "s1c" "s2c" "s3donothing" "s3c" "s2select"
+permutation "s2begins" "s3begins" "s1u" "s3donothing" "s1c" "s3c" "s2donothing" "s2c" "s2select"
+permutation "s2begins" "s3begins" "s1u" "s2donothing" "s3donothing" "s1c" "s2c" "s3c" "s2select"
+permutation "s2begins" "s3begins" "s1u" "s3donothing" "s2donothing" "s1c" "s3c" "s2c" "s2select"