Commit 9ee1cf04 authored by Tom Lane's avatar Tom Lane

Fix TOAST access failure in RETURNING queries.

Discussion of commit 3e2f3c2e exposed a problem that is of longer
standing: since we don't detoast data while sticking it into a portal's
holdStore for PORTAL_ONE_RETURNING and PORTAL_UTIL_SELECT queries, and we
release the query's snapshot as soon as we're done loading the holdStore,
later readout of the holdStore can do TOAST fetches against data that can
no longer be seen by any of the session's live snapshots.  This means that
a concurrent VACUUM could remove the TOAST data before we can fetch it.
Commit 3e2f3c2e exposed the problem by showing that sometimes we had *no*
live snapshots while fetching TOAST data, but we'd be at risk anyway.

I believe this code was all right when written, because our management of a
session's exposed xmin was such that the TOAST references were safe until
end of transaction.  But that's no longer true now that we can advance or
clear our PGXACT.xmin intra-transaction.

To fix, copy the query's snapshot during FillPortalStore() and save it in
the Portal; release it only when the portal is dropped.  This essentially
implements a policy that we must hold a relevant snapshot whenever we
access potentially-toasted data.  We had already come to that conclusion
in other places, cf commits 08e261cb and ec543db7.

I'd have liked to add a regression test case for this, but I didn't see
a way to make one that's not unreasonably bloated; it seems to require
returning a toasted value to the client, and those will be big.

In passing, improve PortalRunUtility() so that it positively verifies
that its ending PopActiveSnapshot() call will pop the expected snapshot,
removing a rather shaky assumption about which utility commands might
do their own PopActiveSnapshot().  There's no known bug here, but now
that we're actively referencing the snapshot it's almost free to make
this code a bit more bulletproof.

We might want to consider back-patching something like this into older
branches, but it would be prudent to let it prove itself more in HEAD
beforehand.

Discussion: <87vazemeda.fsf@credativ.de>
parent 07a601ee
...@@ -317,10 +317,11 @@ PersistHoldablePortal(Portal portal) ...@@ -317,10 +317,11 @@ PersistHoldablePortal(Portal portal)
Assert(queryDesc != NULL); Assert(queryDesc != NULL);
/* /*
* Caller must have created the tuplestore already. * Caller must have created the tuplestore already ... but not a snapshot.
*/ */
Assert(portal->holdContext != NULL); Assert(portal->holdContext != NULL);
Assert(portal->holdStore != NULL); Assert(portal->holdStore != NULL);
Assert(portal->holdSnapshot == NULL);
/* /*
* Before closing down the executor, we must copy the tupdesc into * Before closing down the executor, we must copy the tupdesc into
...@@ -362,7 +363,8 @@ PersistHoldablePortal(Portal portal) ...@@ -362,7 +363,8 @@ PersistHoldablePortal(Portal portal)
/* /*
* Change the destination to output to the tuplestore. Note we tell * Change the destination to output to the tuplestore. Note we tell
* the tuplestore receiver to detoast all data passed through it. * the tuplestore receiver to detoast all data passed through it; this
* makes it safe to not keep a snapshot associated with the data.
*/ */
queryDesc->dest = CreateDestReceiver(DestTuplestore); queryDesc->dest = CreateDestReceiver(DestTuplestore);
SetTuplestoreDestReceiverParams(queryDesc->dest, SetTuplestoreDestReceiverParams(queryDesc->dest,
......
...@@ -43,9 +43,11 @@ static uint64 RunFromStore(Portal portal, ScanDirection direction, uint64 count, ...@@ -43,9 +43,11 @@ static uint64 RunFromStore(Portal portal, ScanDirection direction, uint64 count,
DestReceiver *dest); DestReceiver *dest);
static uint64 PortalRunSelect(Portal portal, bool forward, long count, static uint64 PortalRunSelect(Portal portal, bool forward, long count,
DestReceiver *dest); DestReceiver *dest);
static void PortalRunUtility(Portal portal, Node *utilityStmt, bool isTopLevel, static void PortalRunUtility(Portal portal, Node *utilityStmt,
bool isTopLevel, bool setHoldSnapshot,
DestReceiver *dest, char *completionTag); DestReceiver *dest, char *completionTag);
static void PortalRunMulti(Portal portal, bool isTopLevel, static void PortalRunMulti(Portal portal,
bool isTopLevel, bool setHoldSnapshot,
DestReceiver *dest, DestReceiver *altdest, DestReceiver *dest, DestReceiver *altdest,
char *completionTag); char *completionTag);
static uint64 DoPortalRunFetch(Portal portal, static uint64 DoPortalRunFetch(Portal portal,
...@@ -810,7 +812,7 @@ PortalRun(Portal portal, long count, bool isTopLevel, ...@@ -810,7 +812,7 @@ PortalRun(Portal portal, long count, bool isTopLevel,
break; break;
case PORTAL_MULTI_QUERY: case PORTAL_MULTI_QUERY:
PortalRunMulti(portal, isTopLevel, PortalRunMulti(portal, isTopLevel, false,
dest, altdest, completionTag); dest, altdest, completionTag);
/* Prevent portal's commands from being re-executed */ /* Prevent portal's commands from being re-executed */
...@@ -1039,15 +1041,16 @@ FillPortalStore(Portal portal, bool isTopLevel) ...@@ -1039,15 +1041,16 @@ FillPortalStore(Portal portal, bool isTopLevel)
/* /*
* Run the portal to completion just as for the default * Run the portal to completion just as for the default
* MULTI_QUERY case, but send the primary query's output to the * MULTI_QUERY case, but send the primary query's output to the
* tuplestore. Auxiliary query outputs are discarded. * tuplestore. Auxiliary query outputs are discarded. Set the
* portal's holdSnapshot to the snapshot used (or a copy of it).
*/ */
PortalRunMulti(portal, isTopLevel, PortalRunMulti(portal, isTopLevel, true,
treceiver, None_Receiver, completionTag); treceiver, None_Receiver, completionTag);
break; break;
case PORTAL_UTIL_SELECT: case PORTAL_UTIL_SELECT:
PortalRunUtility(portal, (Node *) linitial(portal->stmts), PortalRunUtility(portal, (Node *) linitial(portal->stmts),
isTopLevel, treceiver, completionTag); isTopLevel, true, treceiver, completionTag);
break; break;
default: default:
...@@ -1142,10 +1145,11 @@ RunFromStore(Portal portal, ScanDirection direction, uint64 count, ...@@ -1142,10 +1145,11 @@ RunFromStore(Portal portal, ScanDirection direction, uint64 count,
* Execute a utility statement inside a portal. * Execute a utility statement inside a portal.
*/ */
static void static void
PortalRunUtility(Portal portal, Node *utilityStmt, bool isTopLevel, PortalRunUtility(Portal portal, Node *utilityStmt,
bool isTopLevel, bool setHoldSnapshot,
DestReceiver *dest, char *completionTag) DestReceiver *dest, char *completionTag)
{ {
bool active_snapshot_set; Snapshot snapshot;
elog(DEBUG3, "ProcessUtility"); elog(DEBUG3, "ProcessUtility");
...@@ -1172,11 +1176,19 @@ PortalRunUtility(Portal portal, Node *utilityStmt, bool isTopLevel, ...@@ -1172,11 +1176,19 @@ PortalRunUtility(Portal portal, Node *utilityStmt, bool isTopLevel,
IsA(utilityStmt, UnlistenStmt) || IsA(utilityStmt, UnlistenStmt) ||
IsA(utilityStmt, CheckPointStmt))) IsA(utilityStmt, CheckPointStmt)))
{ {
PushActiveSnapshot(GetTransactionSnapshot()); snapshot = GetTransactionSnapshot();
active_snapshot_set = true; /* If told to, register the snapshot we're using and save in portal */
if (setHoldSnapshot)
{
snapshot = RegisterSnapshot(snapshot);
portal->holdSnapshot = snapshot;
}
PushActiveSnapshot(snapshot);
/* PushActiveSnapshot might have copied the snapshot */
snapshot = GetActiveSnapshot();
} }
else else
active_snapshot_set = false; snapshot = NULL;
ProcessUtility(utilityStmt, ProcessUtility(utilityStmt,
portal->sourceText, portal->sourceText,
...@@ -1190,12 +1202,11 @@ PortalRunUtility(Portal portal, Node *utilityStmt, bool isTopLevel, ...@@ -1190,12 +1202,11 @@ PortalRunUtility(Portal portal, Node *utilityStmt, bool isTopLevel,
/* /*
* Some utility commands may pop the ActiveSnapshot stack from under us, * Some utility commands may pop the ActiveSnapshot stack from under us,
* so we only pop the stack if we actually see a snapshot set. Note that * so be careful to only pop the stack if our snapshot is still at the
* the set of utility commands that do this must be the same set * top.
* disallowed to run inside a transaction; otherwise, we could be popping
* a snapshot that belongs to some other operation.
*/ */
if (active_snapshot_set && ActiveSnapshotSet()) if (snapshot != NULL && ActiveSnapshotSet() &&
snapshot == GetActiveSnapshot())
PopActiveSnapshot(); PopActiveSnapshot();
} }
...@@ -1205,7 +1216,8 @@ PortalRunUtility(Portal portal, Node *utilityStmt, bool isTopLevel, ...@@ -1205,7 +1216,8 @@ PortalRunUtility(Portal portal, Node *utilityStmt, bool isTopLevel,
* or non-SELECT-like queries) * or non-SELECT-like queries)
*/ */
static void static void
PortalRunMulti(Portal portal, bool isTopLevel, PortalRunMulti(Portal portal,
bool isTopLevel, bool setHoldSnapshot,
DestReceiver *dest, DestReceiver *altdest, DestReceiver *dest, DestReceiver *altdest,
char *completionTag) char *completionTag)
{ {
...@@ -1261,7 +1273,25 @@ PortalRunMulti(Portal portal, bool isTopLevel, ...@@ -1261,7 +1273,25 @@ PortalRunMulti(Portal portal, bool isTopLevel,
*/ */
if (!active_snapshot_set) if (!active_snapshot_set)
{ {
PushActiveSnapshot(GetTransactionSnapshot()); Snapshot snapshot = GetTransactionSnapshot();
/* If told to, register the snapshot and save in portal */
if (setHoldSnapshot)
{
snapshot = RegisterSnapshot(snapshot);
portal->holdSnapshot = snapshot;
}
/*
* We can't have the holdSnapshot also be the active one,
* because UpdateActiveSnapshotCommandId would complain. So
* force an extra snapshot copy. Plain PushActiveSnapshot
* would have copied the transaction snapshot anyway, so this
* only adds a copy step when setHoldSnapshot is true. (It's
* okay for the command ID of the active snapshot to diverge
* from what holdSnapshot has.)
*/
PushCopiedSnapshot(snapshot);
active_snapshot_set = true; active_snapshot_set = true;
} }
else else
...@@ -1309,14 +1339,14 @@ PortalRunMulti(Portal portal, bool isTopLevel, ...@@ -1309,14 +1339,14 @@ PortalRunMulti(Portal portal, bool isTopLevel,
{ {
Assert(!active_snapshot_set); Assert(!active_snapshot_set);
/* statement can set tag string */ /* statement can set tag string */
PortalRunUtility(portal, stmt, isTopLevel, PortalRunUtility(portal, stmt, isTopLevel, false,
dest, completionTag); dest, completionTag);
} }
else else
{ {
Assert(IsA(stmt, NotifyStmt)); Assert(IsA(stmt, NotifyStmt));
/* stmt added by rewrite cannot set tag */ /* stmt added by rewrite cannot set tag */
PortalRunUtility(portal, stmt, isTopLevel, PortalRunUtility(portal, stmt, isTopLevel, false,
altdest, NULL); altdest, NULL);
} }
} }
......
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#include "miscadmin.h" #include "miscadmin.h"
#include "utils/builtins.h" #include "utils/builtins.h"
#include "utils/memutils.h" #include "utils/memutils.h"
#include "utils/snapmgr.h"
#include "utils/timestamp.h" #include "utils/timestamp.h"
/* /*
...@@ -351,6 +352,7 @@ PortalCreateHoldStore(Portal portal) ...@@ -351,6 +352,7 @@ PortalCreateHoldStore(Portal portal)
Assert(portal->holdContext == NULL); Assert(portal->holdContext == NULL);
Assert(portal->holdStore == NULL); Assert(portal->holdStore == NULL);
Assert(portal->holdSnapshot == NULL);
/* /*
* Create the memory context that is used for storage of the tuple set. * Create the memory context that is used for storage of the tuple set.
...@@ -526,6 +528,20 @@ PortalDrop(Portal portal, bool isTopCommit) ...@@ -526,6 +528,20 @@ PortalDrop(Portal portal, bool isTopCommit)
/* drop cached plan reference, if any */ /* drop cached plan reference, if any */
PortalReleaseCachedPlan(portal); PortalReleaseCachedPlan(portal);
/*
* If portal has a snapshot protecting its data, release that. This needs
* a little care since the registration will be attached to the portal's
* resowner; if the portal failed, we will already have released the
* resowner (and the snapshot) during transaction abort.
*/
if (portal->holdSnapshot)
{
if (portal->resowner)
UnregisterSnapshotFromOwner(portal->holdSnapshot,
portal->resowner);
portal->holdSnapshot = NULL;
}
/* /*
* Release any resources still attached to the portal. There are several * Release any resources still attached to the portal. There are several
* cases being covered here: * cases being covered here:
......
...@@ -162,6 +162,16 @@ typedef struct PortalData ...@@ -162,6 +162,16 @@ typedef struct PortalData
Tuplestorestate *holdStore; /* store for holdable cursors */ Tuplestorestate *holdStore; /* store for holdable cursors */
MemoryContext holdContext; /* memory containing holdStore */ MemoryContext holdContext; /* memory containing holdStore */
/*
* Snapshot under which tuples in the holdStore were read. We must keep a
* reference to this snapshot if there is any possibility that the tuples
* contain TOAST references, because releasing the snapshot could allow
* recently-dead rows to be vacuumed away, along with any toast data
* belonging to them. In the case of a held cursor, we avoid needing to
* keep such a snapshot by forcibly detoasting the data.
*/
Snapshot holdSnapshot; /* registered snapshot, or NULL if none */
/* /*
* atStart, atEnd and portalPos indicate the current cursor position. * atStart, atEnd and portalPos indicate the current cursor position.
* portalPos is zero before the first row, N after fetching N'th row of * portalPos is zero before the first row, N after fetching N'th row of
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment