Commit afb9249d authored by Tom Lane's avatar Tom Lane

Add support for doing late row locking in FDWs.

Previously, FDWs could only do "early row locking", that is lock a row as
soon as it's fetched, even though local restriction/join conditions might
discard the row later.  This patch adds callbacks that allow FDWs to do
late locking in the same way that it's done for regular tables.

To make use of this feature, an FDW must support the "ctid" column as a
unique row identifier.  Currently, since ctid has to be of type TID,
the feature is of limited use, though in principle it could be used by
postgres_fdw.  We may eventually allow FDWs to specify another data type
for ctid, which would make it possible for more FDWs to use this feature.

This commit does not modify postgres_fdw to use late locking.  We've
tested some prototype code for that, but it's not in committable shape,
and besides it's quite unclear whether it actually makes sense to do late
locking against a remote server.  The extra round trips required are likely
to outweigh any benefit from improved concurrency.

Etsuro Fujita, reviewed by Ashutosh Bapat, and hacked up a lot by me
parent aa4a0b95
This diff is collapsed.
......@@ -898,8 +898,11 @@ InitPlan(QueryDesc *queryDesc, int eflags)
erm->prti = rc->prti;
erm->rowmarkId = rc->rowmarkId;
erm->markType = rc->markType;
erm->strength = rc->strength;
erm->waitPolicy = rc->waitPolicy;
erm->ermActive = false;
ItemPointerSetInvalid(&(erm->curCtid));
erm->ermExtra = NULL;
estate->es_rowMarks = lappend(estate->es_rowMarks, erm);
}
......@@ -1143,6 +1146,8 @@ CheckValidResultRel(Relation resultRel, CmdType operation)
static void
CheckValidRowMarkRel(Relation rel, RowMarkType markType)
{
FdwRoutine *fdwroutine;
switch (rel->rd_rel->relkind)
{
case RELKIND_RELATION:
......@@ -1178,11 +1183,13 @@ CheckValidRowMarkRel(Relation rel, RowMarkType markType)
RelationGetRelationName(rel))));
break;
case RELKIND_FOREIGN_TABLE:
/* Should not get here; planner should have used ROW_MARK_COPY */
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("cannot lock rows in foreign table \"%s\"",
RelationGetRelationName(rel))));
/* Okay only if the FDW supports it */
fdwroutine = GetFdwRoutineForRelation(rel, false);
if (fdwroutine->RefetchForeignRow == NULL)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot lock rows in foreign table \"%s\"",
RelationGetRelationName(rel))));
break;
default:
ereport(ERROR,
......@@ -2005,9 +2012,11 @@ ExecUpdateLockMode(EState *estate, ResultRelInfo *relinfo)
/*
* ExecFindRowMark -- find the ExecRowMark struct for given rangetable index
*
* If no such struct, either return NULL or throw error depending on missing_ok
*/
ExecRowMark *
ExecFindRowMark(EState *estate, Index rti)
ExecFindRowMark(EState *estate, Index rti, bool missing_ok)
{
ListCell *lc;
......@@ -2018,8 +2027,9 @@ ExecFindRowMark(EState *estate, Index rti)
if (erm->rti == rti)
return erm;
}
elog(ERROR, "failed to find ExecRowMark for rangetable index %u", rti);
return NULL; /* keep compiler quiet */
if (!missing_ok)
elog(ERROR, "failed to find ExecRowMark for rangetable index %u", rti);
return NULL;
}
/*
......@@ -2530,7 +2540,7 @@ EvalPlanQualFetchRowMarks(EPQState *epqstate)
if (erm->markType == ROW_MARK_REFERENCE)
{
Buffer buffer;
HeapTuple copyTuple;
Assert(erm->relation != NULL);
......@@ -2541,17 +2551,50 @@ EvalPlanQualFetchRowMarks(EPQState *epqstate)
/* non-locked rels could be on the inside of outer joins */
if (isNull)
continue;
tuple.t_self = *((ItemPointer) DatumGetPointer(datum));
/* okay, fetch the tuple */
if (!heap_fetch(erm->relation, SnapshotAny, &tuple, &buffer,
false, NULL))
elog(ERROR, "failed to fetch tuple for EvalPlanQual recheck");
/* fetch requests on foreign tables must be passed to their FDW */
if (erm->relation->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
{
FdwRoutine *fdwroutine;
bool updated = false;
/* successful, copy and store tuple */
EvalPlanQualSetTuple(epqstate, erm->rti,
heap_copytuple(&tuple));
ReleaseBuffer(buffer);
fdwroutine = GetFdwRoutineForRelation(erm->relation, false);
/* this should have been checked already, but let's be safe */
if (fdwroutine->RefetchForeignRow == NULL)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot lock rows in foreign table \"%s\"",
RelationGetRelationName(erm->relation))));
copyTuple = fdwroutine->RefetchForeignRow(epqstate->estate,
erm,
datum,
&updated);
if (copyTuple == NULL)
elog(ERROR, "failed to fetch tuple for EvalPlanQual recheck");
/*
* Ideally we'd insist on updated == false here, but that
* assumes that FDWs can track that exactly, which they might
* not be able to. So just ignore the flag.
*/
}
else
{
/* ordinary table, fetch the tuple */
Buffer buffer;
tuple.t_self = *((ItemPointer) DatumGetPointer(datum));
if (!heap_fetch(erm->relation, SnapshotAny, &tuple, &buffer,
false, NULL))
elog(ERROR, "failed to fetch tuple for EvalPlanQual recheck");
/* successful, copy tuple */
copyTuple = heap_copytuple(&tuple);
ReleaseBuffer(buffer);
}
/* store tuple */
EvalPlanQualSetTuple(epqstate, erm->rti, copyTuple);
}
else
{
......
......@@ -805,20 +805,11 @@ ExecOpenScanRelation(EState *estate, Index scanrelid, int eflags)
lockmode = NoLock;
else
{
ListCell *l;
/* Keep this check in sync with InitPlan! */
ExecRowMark *erm = ExecFindRowMark(estate, scanrelid, true);
foreach(l, estate->es_rowMarks)
{
ExecRowMark *erm = lfirst(l);
/* Keep this check in sync with InitPlan! */
if (erm->rti == scanrelid &&
erm->relation != NULL)
{
lockmode = NoLock;
break;
}
}
if (erm != NULL && erm->relation != NULL)
lockmode = NoLock;
}
/* Open the relation and acquire lock as needed */
......
......@@ -25,6 +25,7 @@
#include "access/xact.h"
#include "executor/executor.h"
#include "executor/nodeLockRows.h"
#include "foreign/fdwapi.h"
#include "storage/bufmgr.h"
#include "utils/rel.h"
#include "utils/tqual.h"
......@@ -40,7 +41,7 @@ ExecLockRows(LockRowsState *node)
TupleTableSlot *slot;
EState *estate;
PlanState *outerPlan;
bool epq_started;
bool epq_needed;
ListCell *lc;
/*
......@@ -58,15 +59,18 @@ lnext:
if (TupIsNull(slot))
return NULL;
/* We don't need EvalPlanQual unless we get updated tuple version(s) */
epq_needed = false;
/*
* Attempt to lock the source tuple(s). (Note we only have locking
* rowmarks in lr_arowMarks.)
*/
epq_started = false;
foreach(lc, node->lr_arowMarks)
{
ExecAuxRowMark *aerm = (ExecAuxRowMark *) lfirst(lc);
ExecRowMark *erm = aerm->rowmark;
HeapTuple *testTuple;
Datum datum;
bool isNull;
HeapTupleData tuple;
......@@ -77,8 +81,10 @@ lnext:
HeapTuple copyTuple;
/* clear any leftover test tuple for this rel */
if (node->lr_epqstate.estate != NULL)
EvalPlanQualSetTuple(&node->lr_epqstate, erm->rti, NULL);
testTuple = &(node->lr_curtuples[erm->rti - 1]);
if (*testTuple != NULL)
heap_freetuple(*testTuple);
*testTuple = NULL;
/* if child rel, must check whether it produced this row */
if (erm->rti != erm->prti)
......@@ -97,10 +103,12 @@ lnext:
if (tableoid != erm->relid)
{
/* this child is inactive right now */
erm->ermActive = false;
ItemPointerSetInvalid(&(erm->curCtid));
continue;
}
}
erm->ermActive = true;
/* fetch the tuple's ctid */
datum = ExecGetJunkAttribute(slot,
......@@ -109,9 +117,45 @@ lnext:
/* shouldn't ever get a null result... */
if (isNull)
elog(ERROR, "ctid is NULL");
tuple.t_self = *((ItemPointer) DatumGetPointer(datum));
/* requests for foreign tables must be passed to their FDW */
if (erm->relation->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
{
FdwRoutine *fdwroutine;
bool updated = false;
fdwroutine = GetFdwRoutineForRelation(erm->relation, false);
/* this should have been checked already, but let's be safe */
if (fdwroutine->RefetchForeignRow == NULL)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot lock rows in foreign table \"%s\"",
RelationGetRelationName(erm->relation))));
copyTuple = fdwroutine->RefetchForeignRow(estate,
erm,
datum,
&updated);
if (copyTuple == NULL)
{
/* couldn't get the lock, so skip this row */
goto lnext;
}
/* save locked tuple for possible EvalPlanQual testing below */
*testTuple = copyTuple;
/*
* if FDW says tuple was updated before getting locked, we need to
* perform EPQ testing to see if quals are still satisfied
*/
if (updated)
epq_needed = true;
continue;
}
/* okay, try to lock the tuple */
tuple.t_self = *((ItemPointer) DatumGetPointer(datum));
switch (erm->markType)
{
case ROW_MARK_EXCLUSIVE:
......@@ -191,40 +235,11 @@ lnext:
/* remember the actually locked tuple's TID */
tuple.t_self = copyTuple->t_self;
/*
* Need to run a recheck subquery. Initialize EPQ state if we
* didn't do so already.
*/
if (!epq_started)
{
ListCell *lc2;
/* Save locked tuple for EvalPlanQual testing below */
*testTuple = copyTuple;
EvalPlanQualBegin(&node->lr_epqstate, estate);
/*
* Ensure that rels with already-visited rowmarks are told
* not to return tuples during the first EPQ test. We can
* exit this loop once it reaches the current rowmark;
* rels appearing later in the list will be set up
* correctly by the EvalPlanQualSetTuple call at the top
* of the loop.
*/
foreach(lc2, node->lr_arowMarks)
{
ExecAuxRowMark *aerm2 = (ExecAuxRowMark *) lfirst(lc2);
if (lc2 == lc)
break;
EvalPlanQualSetTuple(&node->lr_epqstate,
aerm2->rowmark->rti,
NULL);
}
epq_started = true;
}
/* Store target tuple for relation's scan node */
EvalPlanQualSetTuple(&node->lr_epqstate, erm->rti, copyTuple);
/* Remember we need to do EPQ testing */
epq_needed = true;
/* Continue loop until we have all target tuples */
break;
......@@ -237,17 +252,35 @@ lnext:
test);
}
/* Remember locked tuple's TID for WHERE CURRENT OF */
/* Remember locked tuple's TID for EPQ testing and WHERE CURRENT OF */
erm->curCtid = tuple.t_self;
}
/*
* If we need to do EvalPlanQual testing, do so.
*/
if (epq_started)
if (epq_needed)
{
int i;
/* Initialize EPQ machinery */
EvalPlanQualBegin(&node->lr_epqstate, estate);
/*
* Transfer already-fetched tuples into the EPQ state, and make sure
* its test tuples for other tables are reset to NULL.
*/
for (i = 0; i < node->lr_ntables; i++)
{
EvalPlanQualSetTuple(&node->lr_epqstate,
i + 1,
node->lr_curtuples[i]);
/* freeing this tuple is now the responsibility of EPQ */
node->lr_curtuples[i] = NULL;
}
/*
* First, fetch a copy of any rows that were successfully locked
* Next, fetch a copy of any rows that were successfully locked
* without any update having occurred. (We do this in a separate pass
* so as to avoid overhead in the common case where there are no
* concurrent updates.)
......@@ -260,7 +293,7 @@ lnext:
Buffer buffer;
/* ignore non-active child tables */
if (!ItemPointerIsValid(&(erm->curCtid)))
if (!erm->ermActive)
{
Assert(erm->rti != erm->prti); /* check it's child table */
continue;
......@@ -269,6 +302,10 @@ lnext:
if (EvalPlanQualGetTuple(&node->lr_epqstate, erm->rti) != NULL)
continue; /* it was updated and fetched above */
/* foreign tables should have been fetched above */
Assert(erm->relation->rd_rel->relkind != RELKIND_FOREIGN_TABLE);
Assert(ItemPointerIsValid(&(erm->curCtid)));
/* okay, fetch the tuple */
tuple.t_self = erm->curCtid;
if (!heap_fetch(erm->relation, SnapshotAny, &tuple, &buffer,
......@@ -351,6 +388,13 @@ ExecInitLockRows(LockRows *node, EState *estate, int eflags)
ExecAssignResultTypeFromTL(&lrstate->ps);
lrstate->ps.ps_ProjInfo = NULL;
/*
* Create workspace in which we can remember per-RTE locked tuples
*/
lrstate->lr_ntables = list_length(estate->es_range_table);
lrstate->lr_curtuples = (HeapTuple *)
palloc0(lrstate->lr_ntables * sizeof(HeapTuple));
/*
* Locate the ExecRowMark(s) that this node is responsible for, and
* construct ExecAuxRowMarks for them. (InitPlan should already have
......@@ -370,8 +414,11 @@ ExecInitLockRows(LockRows *node, EState *estate, int eflags)
if (rc->isParent)
continue;
/* safety check on size of lr_curtuples array */
Assert(rc->rti > 0 && rc->rti <= lrstate->lr_ntables);
/* find ExecRowMark and build ExecAuxRowMark */
erm = ExecFindRowMark(estate, rc->rti);
erm = ExecFindRowMark(estate, rc->rti, false);
aerm = ExecBuildAuxRowMark(erm, outerPlan->targetlist);
/*
......
......@@ -1720,7 +1720,7 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
continue;
/* find ExecRowMark (same for all subplans) */
erm = ExecFindRowMark(estate, rc->rti);
erm = ExecFindRowMark(estate, rc->rti, false);
/* build ExecAuxRowMark for each subplan */
for (i = 0; i < nplans; i++)
......
......@@ -20,6 +20,7 @@
#include "access/htup_details.h"
#include "executor/executor.h"
#include "executor/nodeAgg.h"
#include "foreign/fdwapi.h"
#include "miscadmin.h"
#include "nodes/makefuncs.h"
#ifdef OPTIMIZER_DEBUG
......@@ -2324,7 +2325,12 @@ select_rowmark_type(RangeTblEntry *rte, LockClauseStrength strength)
}
else if (rte->relkind == RELKIND_FOREIGN_TABLE)
{
/* For now, we force all foreign tables to use ROW_MARK_COPY */
/* Let the FDW select the rowmark type, if it wants to */
FdwRoutine *fdwroutine = GetFdwRoutineByRelId(rte->relid);
if (fdwroutine->GetForeignRowMarkType != NULL)
return fdwroutine->GetForeignRowMarkType(rte, strength);
/* Otherwise, use ROW_MARK_COPY by default */
return ROW_MARK_COPY;
}
else
......
......@@ -196,7 +196,7 @@ extern void ExecConstraints(ResultRelInfo *resultRelInfo,
extern void ExecWithCheckOptions(WCOKind kind, ResultRelInfo *resultRelInfo,
TupleTableSlot *slot, EState *estate);
extern LockTupleMode ExecUpdateLockMode(EState *estate, ResultRelInfo *relinfo);
extern ExecRowMark *ExecFindRowMark(EState *estate, Index rti);
extern ExecRowMark *ExecFindRowMark(EState *estate, Index rti, bool missing_ok);
extern ExecAuxRowMark *ExecBuildAuxRowMark(ExecRowMark *erm, List *targetlist);
extern TupleTableSlot *EvalPlanQual(EState *estate, EPQState *epqstate,
Relation relation, Index rti, int lockmode,
......
......@@ -89,6 +89,14 @@ typedef void (*EndForeignModify_function) (EState *estate,
typedef int (*IsForeignRelUpdatable_function) (Relation rel);
typedef RowMarkType (*GetForeignRowMarkType_function) (RangeTblEntry *rte,
LockClauseStrength strength);
typedef HeapTuple (*RefetchForeignRow_function) (EState *estate,
ExecRowMark *erm,
Datum rowid,
bool *updated);
typedef void (*ExplainForeignScan_function) (ForeignScanState *node,
struct ExplainState *es);
......@@ -151,6 +159,10 @@ typedef struct FdwRoutine
EndForeignModify_function EndForeignModify;
IsForeignRelUpdatable_function IsForeignRelUpdatable;
/* Functions for SELECT FOR UPDATE/SHARE row locking */
GetForeignRowMarkType_function GetForeignRowMarkType;
RefetchForeignRow_function RefetchForeignRow;
/* Support functions for EXPLAIN */
ExplainForeignScan_function ExplainForeignScan;
ExplainForeignModify_function ExplainForeignModify;
......
......@@ -429,8 +429,11 @@ typedef struct EState
* parent RTEs, which can be ignored at runtime). Virtual relations such as
* subqueries-in-FROM will have an ExecRowMark with relation == NULL. See
* PlanRowMark for details about most of the fields. In addition to fields
* directly derived from PlanRowMark, we store curCtid, which is used by the
* WHERE CURRENT OF code.
* directly derived from PlanRowMark, we store an activity flag (to denote
* inactive children of inheritance trees), curCtid, which is used by the
* WHERE CURRENT OF code, and ermExtra, which is available for use by the plan
* node that sources the relation (e.g., for a foreign table the FDW can use
* ermExtra to hold information).
*
* EState->es_rowMarks is a list of these structs.
*/
......@@ -442,8 +445,11 @@ typedef struct ExecRowMark
Index prti; /* parent range table index, if child */
Index rowmarkId; /* unique identifier for resjunk columns */
RowMarkType markType; /* see enum in nodes/plannodes.h */
LockClauseStrength strength; /* LockingClause's strength, or LCS_NONE */
LockWaitPolicy waitPolicy; /* NOWAIT and SKIP LOCKED */
bool ermActive; /* is this mark relevant for current tuple? */
ItemPointerData curCtid; /* ctid of currently locked tuple, if any */
void *ermExtra; /* available for use by relation source node */
} ExecRowMark;
/*
......@@ -1921,6 +1927,8 @@ typedef struct LockRowsState
PlanState ps; /* its first field is NodeTag */
List *lr_arowMarks; /* List of ExecAuxRowMarks */
EPQState lr_epqstate; /* for evaluating EvalPlanQual rechecks */
HeapTuple *lr_curtuples; /* locked tuples (one entry per RT entry) */
int lr_ntables; /* length of lr_curtuples[] array */
} LockRowsState;
/* ----------------
......
......@@ -822,16 +822,16 @@ typedef struct Limit
*
* The first four of these values represent different lock strengths that
* we can take on tuples according to SELECT FOR [KEY] UPDATE/SHARE requests.
* We only support these on regular tables. For foreign tables, any locking
* that might be done for these requests must happen during the initial row
* fetch; there is no mechanism for going back to lock a row later (and thus
* no need for EvalPlanQual machinery during updates of foreign tables).
* We support these on regular tables, as well as on foreign tables whose FDWs
* report support for late locking. For other foreign tables, any locking
* that might be done for such requests must happen during the initial row
* fetch; their FDWs provide no mechanism for going back to lock a row later.
* This means that the semantics will be a bit different than for a local
* table; in particular we are likely to lock more rows than would be locked
* locally, since remote rows will be locked even if they then fail
* locally-checked restriction or join quals. However, the alternative of
* doing a separate remote query to lock each selected row is extremely
* unappealing, so let's do it like this for now.
* locally-checked restriction or join quals. However, the prospect of
* doing a separate remote query to lock each selected row is usually pretty
* unappealing, so early locking remains a credible design choice for FDWs.
*
* When doing UPDATE, DELETE, or SELECT FOR UPDATE/SHARE, we have to uniquely
* identify all the source rows, not only those from the target relations, so
......@@ -840,12 +840,11 @@ typedef struct Limit
* represented by ROW_MARK_REFERENCE. Otherwise (for example for VALUES or
* FUNCTION scans) we have to copy the whole row value. ROW_MARK_COPY is
* pretty inefficient, since most of the time we'll never need the data; but
* fortunately the case is not performance-critical in practice. Note that
* we use ROW_MARK_COPY for non-target foreign tables, even if the FDW has a
* concept of rowid and so could theoretically support some form of
* ROW_MARK_REFERENCE. Although copying the whole row value is inefficient,
* it's probably still faster than doing a second remote fetch, so it doesn't
* seem worth the extra complexity to permit ROW_MARK_REFERENCE.
* fortunately the overhead is usually not performance-critical in practice.
* By default we use ROW_MARK_COPY for foreign tables, but if the FDW has
* a concept of rowid it can request to use ROW_MARK_REFERENCE instead.
* (Again, this probably doesn't make sense if a physical remote fetch is
* needed, but for FDWs that map to local storage it might be credible.)
*/
typedef enum RowMarkType
{
......@@ -866,7 +865,7 @@ typedef enum RowMarkType
* When doing UPDATE, DELETE, or SELECT FOR UPDATE/SHARE, we create a separate
* PlanRowMark node for each non-target relation in the query. Relations that
* are not specified as FOR UPDATE/SHARE are marked ROW_MARK_REFERENCE (if
* regular tables) or ROW_MARK_COPY (if not).
* regular tables or supported foreign tables) or ROW_MARK_COPY (if not).
*
* Initially all PlanRowMarks have rti == prti and isParent == false.
* When the planner discovers that a relation is the root of an inheritance
......@@ -879,8 +878,8 @@ typedef enum RowMarkType
* to use different markTypes).
*
* The planner also adds resjunk output columns to the plan that carry
* information sufficient to identify the locked or fetched rows. For
* regular tables (markType != ROW_MARK_COPY), these columns are named
* information sufficient to identify the locked or fetched rows. When
* markType != ROW_MARK_COPY, these columns are named
* tableoid%u OID of table
* ctid%u TID of row
* The tableoid column is only present for an inheritance hierarchy.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment