false, NULL, NULL, NIL, NULL, transition_capture);
}
+/*
+ * Execute BEFORE ROW DELETE triggers.
+ *
+ * True indicates caller can proceed with the delete. False indicates caller
+ * need to suppress the delete and additionally if requested, we need to pass
+ * back the concurrently updated tuple if any.
+ */
bool
ExecBRDeleteTriggers(EState *estate, EPQState *epqstate,
ResultRelInfo *relinfo,
ItemPointer tupleid,
- HeapTuple fdw_trigtuple)
+ HeapTuple fdw_trigtuple,
+ TupleTableSlot **epqslot)
{
TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
bool result = true;
LockTupleExclusive, &newSlot);
if (trigtuple == NULL)
return false;
+
+ /*
+ * If the tuple was concurrently updated and the caller of this
+ * function requested for the updated tuple, skip the trigger
+ * execution.
+ */
+ if (newSlot != NULL && epqslot != NULL)
+ {
+ *epqslot = newSlot;
+ heap_freetuple(trigtuple);
+ return false;
+ }
}
else
trigtuple = fdw_trigtuple;
{
skip_tuple = !ExecBRDeleteTriggers(estate, epqstate, resultRelInfo,
&searchslot->tts_tuple->t_self,
- NULL);
+ NULL, NULL);
}
if (!skip_tuple)
* foreign table, tupleid is invalid; the FDW has to figure out
* which row to delete using data from the planSlot. oldtuple is
* passed to foreign table triggers; it is NULL when the foreign
- * table has no relevant triggers.
+ * table has no relevant triggers. We use tupleDeleted to indicate
+ * whether the tuple is actually deleted, callers can use it to
+ * decide whether to continue the operation. When this DELETE is a
+ * part of an UPDATE of partition-key, then the slot returned by
+ * EvalPlanQual() is passed back using output parameter epqslot.
*
* Returns RETURNING result if any, otherwise NULL.
* ----------------------------------------------------------------
TupleTableSlot *planSlot,
EPQState *epqstate,
EState *estate,
- bool *tupleDeleted,
bool processReturning,
bool canSetTag,
- bool changingPart)
+ bool changingPart,
+ bool *tupleDeleted,
+ TupleTableSlot **epqslot)
{
ResultRelInfo *resultRelInfo;
Relation resultRelationDesc;
bool dodelete;
dodelete = ExecBRDeleteTriggers(estate, epqstate, resultRelInfo,
- tupleid, oldtuple);
+ tupleid, oldtuple, epqslot);
if (!dodelete) /* "do nothing" */
return NULL;
if (!ItemPointerEquals(tupleid, &hufd.ctid))
{
- TupleTableSlot *epqslot;
-
- epqslot = EvalPlanQual(estate,
- epqstate,
- resultRelationDesc,
- resultRelInfo->ri_RangeTableIndex,
- LockTupleExclusive,
- &hufd.ctid,
- hufd.xmax);
- if (!TupIsNull(epqslot))
+ TupleTableSlot *my_epqslot;
+
+ my_epqslot = EvalPlanQual(estate,
+ epqstate,
+ resultRelationDesc,
+ resultRelInfo->ri_RangeTableIndex,
+ LockTupleExclusive,
+ &hufd.ctid,
+ hufd.xmax);
+ if (!TupIsNull(my_epqslot))
{
*tupleid = hufd.ctid;
- goto ldelete;
+
+ /*
+ * If requested, skip delete and pass back the updated
+ * row.
+ */
+ if (epqslot)
+ {
+ *epqslot = my_epqslot;
+ return NULL;
+ }
+ else
+ goto ldelete;
}
}
/* tuple already deleted; nothing to do */
{
bool tuple_deleted;
TupleTableSlot *ret_slot;
+ TupleTableSlot *epqslot = NULL;
PartitionTupleRouting *proute = mtstate->mt_partition_tuple_routing;
int map_index;
TupleConversionMap *tupconv_map;
* processing. We want to return rows from INSERT.
*/
ExecDelete(mtstate, tupleid, oldtuple, planSlot, epqstate,
- estate, &tuple_deleted, false,
- false /* canSetTag */ , true /* changingPart */ );
+ estate, false, false /* canSetTag */ ,
+ true /* changingPart */ , &tuple_deleted, &epqslot);
/*
* For some reason if DELETE didn't happen (e.g. trigger prevented
* resurrect it.
*/
if (!tuple_deleted)
- return NULL;
+ {
+ /*
+ * epqslot will be typically NULL. But when ExecDelete()
+ * finds that another transaction has concurrently updated the
+ * same row, it re-fetches the row, skips the delete, and
+ * epqslot is set to the re-fetched tuple slot. In that case,
+ * we need to do all the checks again.
+ */
+ if (TupIsNull(epqslot))
+ return NULL;
+ else
+ {
+ slot = ExecFilterJunk(resultRelInfo->ri_junkFilter, epqslot);
+ tuple = ExecMaterializeSlot(slot);
+ goto lreplace;
+ }
+ }
/*
* Updates set the transition capture map only when a new subplan
case CMD_DELETE:
slot = ExecDelete(node, tupleid, oldtuple, planSlot,
&node->mt_epqstate, estate,
- NULL, true, node->canSetTag,
- false /* changingPart */ );
+ true, node->canSetTag,
+ false /* changingPart */ , NULL, NULL);
break;
default:
elog(ERROR, "unknown operation");
EPQState *epqstate,
ResultRelInfo *relinfo,
ItemPointer tupleid,
- HeapTuple fdw_trigtuple);
+ HeapTuple fdw_trigtuple,
+ TupleTableSlot **epqslot);
extern void ExecARDeleteTriggers(EState *estate,
ResultRelInfo *relinfo,
ItemPointer tupleid,
--- /dev/null
+Parsed test spec with 2 sessions
+
+starting permutation: s1b s2b s2u1 s1u s2c s1c s1s
+step s1b: BEGIN ISOLATION LEVEL READ COMMITTED;
+step s2b: BEGIN ISOLATION LEVEL READ COMMITTED;
+step s2u1: UPDATE foo SET b = b || ' update2' WHERE a = 1;
+step s1u: UPDATE foo SET a = a + 1, b = b || ' update1' WHERE b like '%ABC%'; <waiting ...>
+step s2c: COMMIT;
+step s1u: <... completed>
+step s1c: COMMIT;
+step s1s: SELECT tableoid::regclass, * FROM foo ORDER BY a;
+tableoid a b
+
+foo2 2 ABC update2 update1
+
+starting permutation: s1b s2b s2ut1 s1ut s2c s1c s1st s1stl
+step s1b: BEGIN ISOLATION LEVEL READ COMMITTED;
+step s2b: BEGIN ISOLATION LEVEL READ COMMITTED;
+step s2ut1: UPDATE footrg SET b = b || ' update2' WHERE a = 1;
+step s1ut: UPDATE footrg SET a = a + 1, b = b || ' update1' WHERE b like '%ABC%'; <waiting ...>
+step s2c: COMMIT;
+step s1ut: <... completed>
+step s1c: COMMIT;
+step s1st: SELECT tableoid::regclass, * FROM footrg ORDER BY a;
+tableoid a b
+
+footrg2 2 ABC update2 update1
+step s1stl: SELECT * FROM triglog ORDER BY a;
+a b
+
+1 ABC update2 trigger
+
+starting permutation: s1b s2b s2u2 s1u s2c s1c s1s
+step s1b: BEGIN ISOLATION LEVEL READ COMMITTED;
+step s2b: BEGIN ISOLATION LEVEL READ COMMITTED;
+step s2u2: UPDATE foo SET b = 'EFG' WHERE a = 1;
+step s1u: UPDATE foo SET a = a + 1, b = b || ' update1' WHERE b like '%ABC%'; <waiting ...>
+step s2c: COMMIT;
+step s1u: <... completed>
+step s1c: COMMIT;
+step s1s: SELECT tableoid::regclass, * FROM foo ORDER BY a;
+tableoid a b
+
+foo1 1 EFG
+
+starting permutation: s1b s2b s2ut2 s1ut s2c s1c s1st s1stl
+step s1b: BEGIN ISOLATION LEVEL READ COMMITTED;
+step s2b: BEGIN ISOLATION LEVEL READ COMMITTED;
+step s2ut2: UPDATE footrg SET b = 'EFG' WHERE a = 1;
+step s1ut: UPDATE footrg SET a = a + 1, b = b || ' update1' WHERE b like '%ABC%'; <waiting ...>
+step s2c: COMMIT;
+step s1ut: <... completed>
+step s1c: COMMIT;
+step s1st: SELECT tableoid::regclass, * FROM footrg ORDER BY a;
+tableoid a b
+
+footrg1 1 EFG
+step s1stl: SELECT * FROM triglog ORDER BY a;
+a b
+
test: partition-key-update-1
test: partition-key-update-2
test: partition-key-update-3
+test: partition-key-update-4
test: plpgsql-toast
--- /dev/null
+# Test that a row that ends up in a new partition contains changes made by
+# a concurrent transaction.
+
+setup
+{
+ --
+ -- Setup to test concurrent handling of ExecDelete().
+ --
+ CREATE TABLE foo (a int, b text) PARTITION BY LIST(a);
+ CREATE TABLE foo1 PARTITION OF foo FOR VALUES IN (1);
+ CREATE TABLE foo2 PARTITION OF foo FOR VALUES IN (2);
+ INSERT INTO foo VALUES (1, 'ABC');
+
+ --
+ -- Setup to test concurrent handling of GetTupleForTrigger().
+ --
+ CREATE TABLE footrg (a int, b text) PARTITION BY LIST(a);
+ CREATE TABLE triglog as select * from footrg;
+ CREATE TABLE footrg1 PARTITION OF footrg FOR VALUES IN (1);
+ CREATE TABLE footrg2 PARTITION OF footrg FOR VALUES IN (2);
+ INSERT INTO footrg VALUES (1, 'ABC');
+ CREATE FUNCTION func_footrg() RETURNS TRIGGER AS $$
+ BEGIN
+ OLD.b = OLD.b || ' trigger';
+
+ -- This will verify that the trigger is not run *before* the row is
+ -- refetched by EvalPlanQual. The OLD row should contain the changes made
+ -- by the concurrent session.
+ INSERT INTO triglog select OLD.*;
+
+ RETURN OLD;
+ END $$ LANGUAGE PLPGSQL;
+ CREATE TRIGGER footrg_ondel BEFORE DELETE ON footrg1
+ FOR EACH ROW EXECUTE PROCEDURE func_footrg();
+
+}
+
+teardown
+{
+ DROP TABLE foo;
+ DROP TRIGGER footrg_ondel ON footrg1;
+ DROP FUNCTION func_footrg();
+ DROP TABLE footrg;
+ DROP TABLE triglog;
+}
+
+session "s1"
+step "s1b" { BEGIN ISOLATION LEVEL READ COMMITTED; }
+step "s1u" { UPDATE foo SET a = a + 1, b = b || ' update1' WHERE b like '%ABC%'; }
+step "s1ut" { UPDATE footrg SET a = a + 1, b = b || ' update1' WHERE b like '%ABC%'; }
+step "s1s" { SELECT tableoid::regclass, * FROM foo ORDER BY a; }
+step "s1st" { SELECT tableoid::regclass, * FROM footrg ORDER BY a; }
+step "s1stl" { SELECT * FROM triglog ORDER BY a; }
+step "s1c" { COMMIT; }
+
+session "s2"
+step "s2b" { BEGIN ISOLATION LEVEL READ COMMITTED; }
+step "s2u1" { UPDATE foo SET b = b || ' update2' WHERE a = 1; }
+step "s2u2" { UPDATE foo SET b = 'EFG' WHERE a = 1; }
+step "s2ut1" { UPDATE footrg SET b = b || ' update2' WHERE a = 1; }
+step "s2ut2" { UPDATE footrg SET b = 'EFG' WHERE a = 1; }
+step "s2c" { COMMIT; }
+
+
+# Session s1 is moving a row into another partition, but is waiting for
+# another session s2 that is updating the original row. The row that ends up
+# in the new partition should contain the changes made by session s2.
+permutation "s1b" "s2b" "s2u1" "s1u" "s2c" "s1c" "s1s"
+
+# Same as above, except, session s1 is waiting in GetTupleTrigger().
+permutation "s1b" "s2b" "s2ut1" "s1ut" "s2c" "s1c" "s1st" "s1stl"
+
+# Below two cases are similar to the above two; except that the session s1
+# fails EvalPlanQual() test, so partition key update does not happen.
+permutation "s1b" "s2b" "s2u2" "s1u" "s2c" "s1c" "s1s"
+permutation "s1b" "s2b" "s2ut2" "s1ut" "s2c" "s1c" "s1st" "s1stl"