Commit
09529a70b changed nodeIndexscan.c and nodeIndexonlyscan.c to
postpone initialization of the indexscan proper until the first tuple
fetch. It overlooked the question of mark/restore behavior, which means
that if some caller attempts to mark the scan before the first tuple fetch,
you get a null pointer dereference.
The only existing user of mark/restore is nodeMergejoin.c, which (somewhat
accidentally) will never attempt to set a mark before the first inner tuple
unless the inner child node is a Material node. Hence the case can't arise
normally, so it seems sufficient to document the assumption at both ends.
However, during an EvalPlanQual recheck, ExecScanFetch doesn't call
IndexNext but just returns the jammed-in test tuple. Therefore, if we're
doing a recheck in a plan tree with a mergejoin with inner indexscan,
it's possible to reach ExecIndexMarkPos with iss_ScanDesc still null,
as reported by Guo Xiang Tan in bug #15032.
Really, when there's a test tuple supplied during an EPQ recheck, touching
the index at all is the wrong thing: rather, the behavior of mark/restore
ought to amount to saving and restoring the es_epqScanDone flag. We can
avoid finding a place to actually save the flag, for the moment, because
given the assumption that no caller will set a mark before fetching a
tuple, es_epqScanDone must always be set by the time we try to mark.
So the actual behavior change required is just to not reach the index
access if a test tuple is supplied.
The set of plan node types that need to consider this issue are those
that support EPQ test tuples (i.e., call ExecScan()) and also support
mark/restore; which is to say, IndexScan, IndexOnlyScan, and perhaps
CustomScan. It's tempting to try to fix the problem in one place by
teaching ExecMarkPos() itself about EPQ; but ExecMarkPos supports some
plan types that aren't Scans, and also it seems risky to make assumptions
about what a CustomScan wants to do here. Also, the most likely future
change here is to decide that we do need to support marks placed before
the first tuple, which would require additional work in IndexScan and
IndexOnlyScan in any case. Hence, fix the EPQ issue in nodeIndexscan.c
and nodeIndexonlyscan.c, accepting the small amount of code duplicated
thereby, and leave it to CustomScan providers to fix this bug if they
have it.
Back-patch to v10 where commit
09529a70b came in. In earlier branches,
the index_markpos() call is a waste of cycles when EPQ is active, but
no more than that, so it doesn't seem appropriate to back-patch further.
Discussion: https://postgr.es/m/
20180126074932.3098.97815@wrigleys.postgresql.org
/* ----------------------------------------------------------------
* ExecIndexOnlyMarkPos
+ *
+ * Note: we assume that no caller attempts to set a mark before having read
+ * at least one tuple. Otherwise, ioss_ScanDesc might still be NULL.
* ----------------------------------------------------------------
*/
void
ExecIndexOnlyMarkPos(IndexOnlyScanState *node)
{
+ EState *estate = node->ss.ps.state;
+
+ if (estate->es_epqTuple != NULL)
+ {
+ /*
+ * We are inside an EvalPlanQual recheck. If a test tuple exists for
+ * this relation, then we shouldn't access the index at all. We would
+ * instead need to save, and later restore, the state of the
+ * es_epqScanDone flag, so that re-fetching the test tuple is
+ * possible. However, given the assumption that no caller sets a mark
+ * at the start of the scan, we can only get here with es_epqScanDone
+ * already set, and so no state need be saved.
+ */
+ Index scanrelid = ((Scan *) node->ss.ps.plan)->scanrelid;
+
+ Assert(scanrelid > 0);
+ if (estate->es_epqTupleSet[scanrelid - 1])
+ {
+ /* Verify the claim above */
+ if (!estate->es_epqScanDone[scanrelid - 1])
+ elog(ERROR, "unexpected ExecIndexOnlyMarkPos call in EPQ recheck");
+ return;
+ }
+ }
+
index_markpos(node->ioss_ScanDesc);
}
void
ExecIndexOnlyRestrPos(IndexOnlyScanState *node)
{
+ EState *estate = node->ss.ps.state;
+
+ if (estate->es_epqTuple != NULL)
+ {
+ /* See comments in ExecIndexOnlyMarkPos */
+ Index scanrelid = ((Scan *) node->ss.ps.plan)->scanrelid;
+
+ Assert(scanrelid > 0);
+ if (estate->es_epqTupleSet[scanrelid - 1])
+ {
+ /* Verify the claim above */
+ if (!estate->es_epqScanDone[scanrelid - 1])
+ elog(ERROR, "unexpected ExecIndexOnlyRestrPos call in EPQ recheck");
+ return;
+ }
+ }
+
index_restrpos(node->ioss_ScanDesc);
}
/* ----------------------------------------------------------------
* ExecIndexMarkPos
+ *
+ * Note: we assume that no caller attempts to set a mark before having read
+ * at least one tuple. Otherwise, iss_ScanDesc might still be NULL.
* ----------------------------------------------------------------
*/
void
ExecIndexMarkPos(IndexScanState *node)
{
+ EState *estate = node->ss.ps.state;
+
+ if (estate->es_epqTuple != NULL)
+ {
+ /*
+ * We are inside an EvalPlanQual recheck. If a test tuple exists for
+ * this relation, then we shouldn't access the index at all. We would
+ * instead need to save, and later restore, the state of the
+ * es_epqScanDone flag, so that re-fetching the test tuple is
+ * possible. However, given the assumption that no caller sets a mark
+ * at the start of the scan, we can only get here with es_epqScanDone
+ * already set, and so no state need be saved.
+ */
+ Index scanrelid = ((Scan *) node->ss.ps.plan)->scanrelid;
+
+ Assert(scanrelid > 0);
+ if (estate->es_epqTupleSet[scanrelid - 1])
+ {
+ /* Verify the claim above */
+ if (!estate->es_epqScanDone[scanrelid - 1])
+ elog(ERROR, "unexpected ExecIndexMarkPos call in EPQ recheck");
+ return;
+ }
+ }
+
index_markpos(node->iss_ScanDesc);
}
void
ExecIndexRestrPos(IndexScanState *node)
{
+ EState *estate = node->ss.ps.state;
+
+ if (estate->es_epqTuple != NULL)
+ {
+ /* See comments in ExecIndexMarkPos */
+ Index scanrelid = ((Scan *) node->ss.ps.plan)->scanrelid;
+
+ Assert(scanrelid > 0);
+ if (estate->es_epqTupleSet[scanrelid - 1])
+ {
+ /* Verify the claim above */
+ if (!estate->es_epqScanDone[scanrelid - 1])
+ elog(ERROR, "unexpected ExecIndexRestrPos call in EPQ recheck");
+ return;
+ }
+ }
+
index_restrpos(node->iss_ScanDesc);
}
*
* Currently, only Material wants the extra MARKs, and it will be helpful
* only if eflags doesn't specify REWIND.
+ *
+ * Note that for IndexScan and IndexOnlyScan, it is *necessary* that we
+ * not set mj_ExtraMarks; otherwise we might attempt to set a mark before
+ * the first inner tuple, which they do not support.
*/
if (IsA(innerPlan(node), Material) &&
(eflags & EXEC_FLAG_REWIND) == 0 &&
id value
1 tableAValue2
+
+starting permutation: wrjt selectjoinforupdate c2 c1
+step wrjt: UPDATE jointest SET data = 42 WHERE id = 7;
+step selectjoinforupdate:
+ set enable_nestloop to 0;
+ set enable_hashjoin to 0;
+ set enable_seqscan to 0;
+ explain (costs off)
+ select * from jointest a join jointest b on a.id=b.id for update;
+ select * from jointest a join jointest b on a.id=b.id for update;
+ <waiting ...>
+step c2: COMMIT;
+step selectjoinforupdate: <... completed>
+QUERY PLAN
+
+LockRows
+ -> Merge Join
+ Merge Cond: (a.id = b.id)
+ -> Index Scan using jointest_id_idx on jointest a
+ -> Index Scan using jointest_id_idx on jointest b
+id data id data
+
+1 0 1 0
+2 0 2 0
+3 0 3 0
+4 0 4 0
+5 0 5 0
+6 0 6 0
+7 42 7 42
+8 0 8 0
+9 0 9 0
+10 0 10 0
+step c1: COMMIT;
CREATE TABLE table_b (id integer, value text);
INSERT INTO table_a VALUES (1, 'tableAValue');
INSERT INTO table_b VALUES (1, 'tableBValue');
+
+ CREATE TABLE jointest AS SELECT generate_series(1,10) AS id, 0 AS data;
+ CREATE INDEX ON jointest(id);
}
teardown
{
DROP TABLE accounts;
DROP TABLE p CASCADE;
- DROP TABLE table_a, table_b;
+ DROP TABLE table_a, table_b, jointest;
}
session "s1"
UPDATE table_b SET value = 'newTableBValue' WHERE id = 1;
}
+# these tests exercise mark/restore during EPQ recheck, cf bug #15032
+
+step "selectjoinforupdate" {
+ set enable_nestloop to 0;
+ set enable_hashjoin to 0;
+ set enable_seqscan to 0;
+ explain (costs off)
+ select * from jointest a join jointest b on a.id=b.id for update;
+ select * from jointest a join jointest b on a.id=b.id for update;
+}
+
session "s2"
setup { BEGIN ISOLATION LEVEL READ COMMITTED; }
WHERE ta.id = 1 FOR UPDATE OF ta;
}
step "wrtwcte" { UPDATE table_a SET value = 'tableAValue2' WHERE id = 1; }
+step "wrjt" { UPDATE jointest SET data = 42 WHERE id = 7; }
step "c2" { COMMIT; }
session "s3"
permutation "wx2" "lockwithvalues" "c2" "c1" "read"
permutation "updateforss" "readforss" "c1" "c2"
permutation "wrtwcte" "readwcte" "c1" "c2"
+permutation "wrjt" "selectjoinforupdate" "c2" "c1"