From 9ee1cf04ab6bcefe03a11837b53f29ca9dc24c7a Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sun, 7 Aug 2016 17:46:08 -0400 Subject: [PATCH] Fix TOAST access failure in RETURNING queries. Discussion of commit 3e2f3c2e4 exposed a problem that is of longer standing: since we don't detoast data while sticking it into a portal's holdStore for PORTAL_ONE_RETURNING and PORTAL_UTIL_SELECT queries, and we release the query's snapshot as soon as we're done loading the holdStore, later readout of the holdStore can do TOAST fetches against data that can no longer be seen by any of the session's live snapshots. This means that a concurrent VACUUM could remove the TOAST data before we can fetch it. Commit 3e2f3c2e4 exposed the problem by showing that sometimes we had *no* live snapshots while fetching TOAST data, but we'd be at risk anyway. I believe this code was all right when written, because our management of a session's exposed xmin was such that the TOAST references were safe until end of transaction. But that's no longer true now that we can advance or clear our PGXACT.xmin intra-transaction. To fix, copy the query's snapshot during FillPortalStore() and save it in the Portal; release it only when the portal is dropped. This essentially implements a policy that we must hold a relevant snapshot whenever we access potentially-toasted data. We had already come to that conclusion in other places, cf commits 08e261cbc94ce9a7 and ec543db77b6b72f2. I'd have liked to add a regression test case for this, but I didn't see a way to make one that's not unreasonably bloated; it seems to require returning a toasted value to the client, and those will be big. In passing, improve PortalRunUtility() so that it positively verifies that its ending PopActiveSnapshot() call will pop the expected snapshot, removing a rather shaky assumption about which utility commands might do their own PopActiveSnapshot(). There's no known bug here, but now that we're actively referencing the snapshot it's almost free to make this code a bit more bulletproof. We might want to consider back-patching something like this into older branches, but it would be prudent to let it prove itself more in HEAD beforehand. Discussion: <87vazemeda.fsf@credativ.de> --- src/backend/commands/portalcmds.c | 6 ++- src/backend/tcop/pquery.c | 70 +++++++++++++++++++++--------- src/backend/utils/mmgr/portalmem.c | 16 +++++++ src/include/utils/portal.h | 10 +++++ 4 files changed, 80 insertions(+), 22 deletions(-) diff --git a/src/backend/commands/portalcmds.c b/src/backend/commands/portalcmds.c index 50a54e7463..e52830f7ec 100644 --- a/src/backend/commands/portalcmds.c +++ b/src/backend/commands/portalcmds.c @@ -317,10 +317,11 @@ PersistHoldablePortal(Portal portal) Assert(queryDesc != NULL); /* - * Caller must have created the tuplestore already. + * Caller must have created the tuplestore already ... but not a snapshot. */ Assert(portal->holdContext != NULL); Assert(portal->holdStore != NULL); + Assert(portal->holdSnapshot == NULL); /* * Before closing down the executor, we must copy the tupdesc into @@ -362,7 +363,8 @@ PersistHoldablePortal(Portal portal) /* * Change the destination to output to the tuplestore. Note we tell - * the tuplestore receiver to detoast all data passed through it. + * the tuplestore receiver to detoast all data passed through it; this + * makes it safe to not keep a snapshot associated with the data. */ queryDesc->dest = CreateDestReceiver(DestTuplestore); SetTuplestoreDestReceiverParams(queryDesc->dest, diff --git a/src/backend/tcop/pquery.c b/src/backend/tcop/pquery.c index 3f6cb12b4e..92d07fcb5d 100644 --- a/src/backend/tcop/pquery.c +++ b/src/backend/tcop/pquery.c @@ -43,9 +43,11 @@ static uint64 RunFromStore(Portal portal, ScanDirection direction, uint64 count, DestReceiver *dest); static uint64 PortalRunSelect(Portal portal, bool forward, long count, DestReceiver *dest); -static void PortalRunUtility(Portal portal, Node *utilityStmt, bool isTopLevel, +static void PortalRunUtility(Portal portal, Node *utilityStmt, + bool isTopLevel, bool setHoldSnapshot, DestReceiver *dest, char *completionTag); -static void PortalRunMulti(Portal portal, bool isTopLevel, +static void PortalRunMulti(Portal portal, + bool isTopLevel, bool setHoldSnapshot, DestReceiver *dest, DestReceiver *altdest, char *completionTag); static uint64 DoPortalRunFetch(Portal portal, @@ -810,7 +812,7 @@ PortalRun(Portal portal, long count, bool isTopLevel, break; case PORTAL_MULTI_QUERY: - PortalRunMulti(portal, isTopLevel, + PortalRunMulti(portal, isTopLevel, false, dest, altdest, completionTag); /* Prevent portal's commands from being re-executed */ @@ -1039,15 +1041,16 @@ FillPortalStore(Portal portal, bool isTopLevel) /* * Run the portal to completion just as for the default * MULTI_QUERY case, but send the primary query's output to the - * tuplestore. Auxiliary query outputs are discarded. + * tuplestore. Auxiliary query outputs are discarded. Set the + * portal's holdSnapshot to the snapshot used (or a copy of it). */ - PortalRunMulti(portal, isTopLevel, + PortalRunMulti(portal, isTopLevel, true, treceiver, None_Receiver, completionTag); break; case PORTAL_UTIL_SELECT: PortalRunUtility(portal, (Node *) linitial(portal->stmts), - isTopLevel, treceiver, completionTag); + isTopLevel, true, treceiver, completionTag); break; default: @@ -1142,10 +1145,11 @@ RunFromStore(Portal portal, ScanDirection direction, uint64 count, * Execute a utility statement inside a portal. */ static void -PortalRunUtility(Portal portal, Node *utilityStmt, bool isTopLevel, +PortalRunUtility(Portal portal, Node *utilityStmt, + bool isTopLevel, bool setHoldSnapshot, DestReceiver *dest, char *completionTag) { - bool active_snapshot_set; + Snapshot snapshot; elog(DEBUG3, "ProcessUtility"); @@ -1172,11 +1176,19 @@ PortalRunUtility(Portal portal, Node *utilityStmt, bool isTopLevel, IsA(utilityStmt, UnlistenStmt) || IsA(utilityStmt, CheckPointStmt))) { - PushActiveSnapshot(GetTransactionSnapshot()); - active_snapshot_set = true; + snapshot = GetTransactionSnapshot(); + /* If told to, register the snapshot we're using and save in portal */ + if (setHoldSnapshot) + { + snapshot = RegisterSnapshot(snapshot); + portal->holdSnapshot = snapshot; + } + PushActiveSnapshot(snapshot); + /* PushActiveSnapshot might have copied the snapshot */ + snapshot = GetActiveSnapshot(); } else - active_snapshot_set = false; + snapshot = NULL; ProcessUtility(utilityStmt, portal->sourceText, @@ -1190,12 +1202,11 @@ PortalRunUtility(Portal portal, Node *utilityStmt, bool isTopLevel, /* * Some utility commands may pop the ActiveSnapshot stack from under us, - * so we only pop the stack if we actually see a snapshot set. Note that - * the set of utility commands that do this must be the same set - * disallowed to run inside a transaction; otherwise, we could be popping - * a snapshot that belongs to some other operation. + * so be careful to only pop the stack if our snapshot is still at the + * top. */ - if (active_snapshot_set && ActiveSnapshotSet()) + if (snapshot != NULL && ActiveSnapshotSet() && + snapshot == GetActiveSnapshot()) PopActiveSnapshot(); } @@ -1205,7 +1216,8 @@ PortalRunUtility(Portal portal, Node *utilityStmt, bool isTopLevel, * or non-SELECT-like queries) */ static void -PortalRunMulti(Portal portal, bool isTopLevel, +PortalRunMulti(Portal portal, + bool isTopLevel, bool setHoldSnapshot, DestReceiver *dest, DestReceiver *altdest, char *completionTag) { @@ -1261,7 +1273,25 @@ PortalRunMulti(Portal portal, bool isTopLevel, */ if (!active_snapshot_set) { - PushActiveSnapshot(GetTransactionSnapshot()); + Snapshot snapshot = GetTransactionSnapshot(); + + /* If told to, register the snapshot and save in portal */ + if (setHoldSnapshot) + { + snapshot = RegisterSnapshot(snapshot); + portal->holdSnapshot = snapshot; + } + + /* + * We can't have the holdSnapshot also be the active one, + * because UpdateActiveSnapshotCommandId would complain. So + * force an extra snapshot copy. Plain PushActiveSnapshot + * would have copied the transaction snapshot anyway, so this + * only adds a copy step when setHoldSnapshot is true. (It's + * okay for the command ID of the active snapshot to diverge + * from what holdSnapshot has.) + */ + PushCopiedSnapshot(snapshot); active_snapshot_set = true; } else @@ -1309,14 +1339,14 @@ PortalRunMulti(Portal portal, bool isTopLevel, { Assert(!active_snapshot_set); /* statement can set tag string */ - PortalRunUtility(portal, stmt, isTopLevel, + PortalRunUtility(portal, stmt, isTopLevel, false, dest, completionTag); } else { Assert(IsA(stmt, NotifyStmt)); /* stmt added by rewrite cannot set tag */ - PortalRunUtility(portal, stmt, isTopLevel, + PortalRunUtility(portal, stmt, isTopLevel, false, altdest, NULL); } } diff --git a/src/backend/utils/mmgr/portalmem.c b/src/backend/utils/mmgr/portalmem.c index 053b613cb2..425cae12ea 100644 --- a/src/backend/utils/mmgr/portalmem.c +++ b/src/backend/utils/mmgr/portalmem.c @@ -24,6 +24,7 @@ #include "miscadmin.h" #include "utils/builtins.h" #include "utils/memutils.h" +#include "utils/snapmgr.h" #include "utils/timestamp.h" /* @@ -351,6 +352,7 @@ PortalCreateHoldStore(Portal portal) Assert(portal->holdContext == NULL); Assert(portal->holdStore == NULL); + Assert(portal->holdSnapshot == NULL); /* * Create the memory context that is used for storage of the tuple set. @@ -526,6 +528,20 @@ PortalDrop(Portal portal, bool isTopCommit) /* drop cached plan reference, if any */ PortalReleaseCachedPlan(portal); + /* + * If portal has a snapshot protecting its data, release that. This needs + * a little care since the registration will be attached to the portal's + * resowner; if the portal failed, we will already have released the + * resowner (and the snapshot) during transaction abort. + */ + if (portal->holdSnapshot) + { + if (portal->resowner) + UnregisterSnapshotFromOwner(portal->holdSnapshot, + portal->resowner); + portal->holdSnapshot = NULL; + } + /* * Release any resources still attached to the portal. There are several * cases being covered here: diff --git a/src/include/utils/portal.h b/src/include/utils/portal.h index 7250c9c1bb..c1d93a96bb 100644 --- a/src/include/utils/portal.h +++ b/src/include/utils/portal.h @@ -162,6 +162,16 @@ typedef struct PortalData Tuplestorestate *holdStore; /* store for holdable cursors */ MemoryContext holdContext; /* memory containing holdStore */ + /* + * Snapshot under which tuples in the holdStore were read. We must keep a + * reference to this snapshot if there is any possibility that the tuples + * contain TOAST references, because releasing the snapshot could allow + * recently-dead rows to be vacuumed away, along with any toast data + * belonging to them. In the case of a held cursor, we avoid needing to + * keep such a snapshot by forcibly detoasting the data. + */ + Snapshot holdSnapshot; /* registered snapshot, or NULL if none */ + /* * atStart, atEnd and portalPos indicate the current cursor position. * portalPos is zero before the first row, N after fetching N'th row of -- 2.40.0