Commit 9c703c16 authored by Tom Lane's avatar Tom Lane

Make queries' locking of indexes more consistent.

The assertions added by commit b04aeb0a exposed that there are some
code paths wherein the executor will try to open an index without
holding any lock on it.  We do have some lock on the index's table,
so it seems likely that there's no fatal problem with this (for
instance, the index couldn't get dropped from under us).  Still,
it's bad practice and we should fix it.

To do so, remove the optimizations in ExecInitIndexScan and friends
that tried to avoid taking a lock on an index belonging to a target
relation, and just take the lock always.  In non-bug cases, this
will result in no additional shared-memory access, since we'll find
in the local lock table that we already have a lock of the desired
type; hence, no significant performance degradation should occur.

Also, adjust the planner and executor so that the type of lock taken
on an index is always identical to the type of lock taken for its table,
by relying on the recently added RangeTblEntry.rellockmode field.
This avoids some corner cases where that might not have been true
before (possibly resulting in extra locking overhead), and prevents
future maintenance issues from having multiple bits of logic that
all needed to be in sync.  In addition, this change removes all core
calls to ExecRelationIsTargetRelation, which avoids a possible O(N^2)
startup penalty for queries with large numbers of target relations.
(We'd probably remove that function altogether, were it not that we
advertise it as something that FDWs might want to use.)

Also adjust some places in selfuncs.c to not take any lock on indexes
they are transiently opening, since we can assume that plancat.c
did that already.

In passing, change gin_clean_pending_list() to take RowExclusiveLock
not AccessShareLock on its target index.  Although it's not clear that
that's actually a bug, it seemed very strange for a function that's
explicitly going to modify the index to use only AccessShareLock.

David Rowley, reviewed by Julien Rouhaud and Amit Langote,
a bit of further tweaking by me

Discussion: https://postgr.es/m/19465.1541636036@sss.pgh.pa.us
parent a96c41fe
...@@ -1031,7 +1031,7 @@ Datum ...@@ -1031,7 +1031,7 @@ Datum
gin_clean_pending_list(PG_FUNCTION_ARGS) gin_clean_pending_list(PG_FUNCTION_ARGS)
{ {
Oid indexoid = PG_GETARG_OID(0); Oid indexoid = PG_GETARG_OID(0);
Relation indexRel = index_open(indexoid, AccessShareLock); Relation indexRel = index_open(indexoid, RowExclusiveLock);
IndexBulkDeleteResult stats; IndexBulkDeleteResult stats;
GinState ginstate; GinState ginstate;
...@@ -1068,7 +1068,7 @@ gin_clean_pending_list(PG_FUNCTION_ARGS) ...@@ -1068,7 +1068,7 @@ gin_clean_pending_list(PG_FUNCTION_ARGS)
initGinState(&ginstate, indexRel); initGinState(&ginstate, indexRel);
ginInsertCleanup(&ginstate, true, true, true, &stats); ginInsertCleanup(&ginstate, true, true, true, &stats);
index_close(indexRel, AccessShareLock); index_close(indexRel, RowExclusiveLock);
PG_RETURN_INT64((int64) stats.pages_deleted); PG_RETURN_INT64((int64) stats.pages_deleted);
} }
...@@ -664,6 +664,10 @@ ExecCreateScanSlotFromOuterPlan(EState *estate, ...@@ -664,6 +664,10 @@ ExecCreateScanSlotFromOuterPlan(EState *estate,
* *
* Detect whether a relation (identified by rangetable index) * Detect whether a relation (identified by rangetable index)
* is one of the target relations of the query. * is one of the target relations of the query.
*
* Note: This is currently no longer used in core. We keep it around
* because FDWs may wish to use it to determine if their foreign table
* is a target relation.
* ---------------------------------------------------------------- * ----------------------------------------------------------------
*/ */
bool bool
......
...@@ -211,7 +211,7 @@ BitmapIndexScanState * ...@@ -211,7 +211,7 @@ BitmapIndexScanState *
ExecInitBitmapIndexScan(BitmapIndexScan *node, EState *estate, int eflags) ExecInitBitmapIndexScan(BitmapIndexScan *node, EState *estate, int eflags)
{ {
BitmapIndexScanState *indexstate; BitmapIndexScanState *indexstate;
bool relistarget; LOCKMODE lockmode;
/* check for unsupported flags */ /* check for unsupported flags */
Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK))); Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
...@@ -260,16 +260,9 @@ ExecInitBitmapIndexScan(BitmapIndexScan *node, EState *estate, int eflags) ...@@ -260,16 +260,9 @@ ExecInitBitmapIndexScan(BitmapIndexScan *node, EState *estate, int eflags)
if (eflags & EXEC_FLAG_EXPLAIN_ONLY) if (eflags & EXEC_FLAG_EXPLAIN_ONLY)
return indexstate; return indexstate;
/* /* Open the index relation. */
* Open the index relation. lockmode = exec_rt_fetch(node->scan.scanrelid, estate)->rellockmode;
* indexstate->biss_RelationDesc = index_open(node->indexid, lockmode);
* If the parent table is one of the target relations of the query, then
* InitPlan already opened and write-locked the index, so we can avoid
* taking another lock here. Otherwise we need a normal reader's lock.
*/
relistarget = ExecRelationIsTargetRelation(estate, node->scan.scanrelid);
indexstate->biss_RelationDesc = index_open(node->indexid,
relistarget ? NoLock : AccessShareLock);
/* /*
* Initialize index-specific scan state * Initialize index-specific scan state
......
...@@ -493,7 +493,7 @@ ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags) ...@@ -493,7 +493,7 @@ ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags)
{ {
IndexOnlyScanState *indexstate; IndexOnlyScanState *indexstate;
Relation currentRelation; Relation currentRelation;
bool relistarget; LOCKMODE lockmode;
TupleDesc tupDesc; TupleDesc tupDesc;
/* /*
...@@ -556,16 +556,9 @@ ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags) ...@@ -556,16 +556,9 @@ ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags)
if (eflags & EXEC_FLAG_EXPLAIN_ONLY) if (eflags & EXEC_FLAG_EXPLAIN_ONLY)
return indexstate; return indexstate;
/* /* Open the index relation. */
* Open the index relation. lockmode = exec_rt_fetch(node->scan.scanrelid, estate)->rellockmode;
* indexstate->ioss_RelationDesc = index_open(node->indexid, lockmode);
* If the parent table is one of the target relations of the query, then
* InitPlan already opened and write-locked the index, so we can avoid
* taking another lock here. Otherwise we need a normal reader's lock.
*/
relistarget = ExecRelationIsTargetRelation(estate, node->scan.scanrelid);
indexstate->ioss_RelationDesc = index_open(node->indexid,
relistarget ? NoLock : AccessShareLock);
/* /*
* Initialize index-specific scan state * Initialize index-specific scan state
......
...@@ -901,7 +901,7 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags) ...@@ -901,7 +901,7 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags)
{ {
IndexScanState *indexstate; IndexScanState *indexstate;
Relation currentRelation; Relation currentRelation;
bool relistarget; LOCKMODE lockmode;
/* /*
* create state structure * create state structure
...@@ -964,16 +964,9 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags) ...@@ -964,16 +964,9 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags)
if (eflags & EXEC_FLAG_EXPLAIN_ONLY) if (eflags & EXEC_FLAG_EXPLAIN_ONLY)
return indexstate; return indexstate;
/* /* Open the index relation. */
* Open the index relation. lockmode = exec_rt_fetch(node->scan.scanrelid, estate)->rellockmode;
* indexstate->iss_RelationDesc = index_open(node->indexid, lockmode);
* If the parent table is one of the target relations of the query, then
* InitPlan already opened and write-locked the index, so we can avoid
* taking another lock here. Otherwise we need a normal reader's lock.
*/
relistarget = ExecRelationIsTargetRelation(estate, node->scan.scanrelid);
indexstate->iss_RelationDesc = index_open(node->indexid,
relistarget ? NoLock : AccessShareLock);
/* /*
* Initialize index-specific scan state * Initialize index-specific scan state
......
...@@ -6285,6 +6285,7 @@ plan_create_index_workers(Oid tableOid, Oid indexOid) ...@@ -6285,6 +6285,7 @@ plan_create_index_workers(Oid tableOid, Oid indexOid)
/* Build RelOptInfo */ /* Build RelOptInfo */
rel = build_simple_rel(root, 1, NULL); rel = build_simple_rel(root, 1, NULL);
/* Rels are assumed already locked by the caller */
heap = table_open(tableOid, NoLock); heap = table_open(tableOid, NoLock);
index = index_open(indexOid, NoLock); index = index_open(indexOid, NoLock);
......
...@@ -162,8 +162,8 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, ...@@ -162,8 +162,8 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
if (hasindex) if (hasindex)
{ {
List *indexoidlist; List *indexoidlist;
ListCell *l;
LOCKMODE lmode; LOCKMODE lmode;
ListCell *l;
indexoidlist = RelationGetIndexList(relation); indexoidlist = RelationGetIndexList(relation);
...@@ -172,13 +172,10 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, ...@@ -172,13 +172,10 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
* need, and do not release it. This saves a couple of trips to the * need, and do not release it. This saves a couple of trips to the
* shared lock manager while not creating any real loss of * shared lock manager while not creating any real loss of
* concurrency, because no schema changes could be happening on the * concurrency, because no schema changes could be happening on the
* index while we hold lock on the parent rel, and neither lock type * index while we hold lock on the parent rel, and no lock type used
* blocks any other kind of index operation. * for queries blocks any other kind of index operation.
*/ */
if (rel->relid == root->parse->resultRelation) lmode = root->simple_rte_array[varno]->rellockmode;
lmode = RowExclusiveLock;
else
lmode = AccessShareLock;
foreach(l, indexoidlist) foreach(l, indexoidlist)
{ {
...@@ -592,8 +589,8 @@ infer_arbiter_indexes(PlannerInfo *root) ...@@ -592,8 +589,8 @@ infer_arbiter_indexes(PlannerInfo *root)
OnConflictExpr *onconflict = root->parse->onConflict; OnConflictExpr *onconflict = root->parse->onConflict;
/* Iteration state */ /* Iteration state */
RangeTblEntry *rte;
Relation relation; Relation relation;
Oid relationObjectId;
Oid indexOidFromConstraint = InvalidOid; Oid indexOidFromConstraint = InvalidOid;
List *indexList; List *indexList;
ListCell *l; ListCell *l;
...@@ -620,10 +617,9 @@ infer_arbiter_indexes(PlannerInfo *root) ...@@ -620,10 +617,9 @@ infer_arbiter_indexes(PlannerInfo *root)
* the rewriter or when expand_inherited_rtentry() added it to the query's * the rewriter or when expand_inherited_rtentry() added it to the query's
* rangetable. * rangetable.
*/ */
relationObjectId = rt_fetch(root->parse->resultRelation, rte = rt_fetch(root->parse->resultRelation, root->parse->rtable);
root->parse->rtable)->relid;
relation = table_open(relationObjectId, NoLock); relation = table_open(rte->relid, NoLock);
/* /*
* Build normalized/BMS representation of plain indexed attributes, as * Build normalized/BMS representation of plain indexed attributes, as
...@@ -687,15 +683,14 @@ infer_arbiter_indexes(PlannerInfo *root) ...@@ -687,15 +683,14 @@ infer_arbiter_indexes(PlannerInfo *root)
ListCell *el; ListCell *el;
/* /*
* Extract info from the relation descriptor for the index. We know * Extract info from the relation descriptor for the index. Obtain
* that this is a target, so get lock type it is known will ultimately * the same lock type that the executor will ultimately use.
* be required by the executor.
* *
* Let executor complain about !indimmediate case directly, because * Let executor complain about !indimmediate case directly, because
* enforcement needs to occur there anyway when an inference clause is * enforcement needs to occur there anyway when an inference clause is
* omitted. * omitted.
*/ */
idxRel = index_open(indexoid, RowExclusiveLock); idxRel = index_open(indexoid, rte->rellockmode);
idxForm = idxRel->rd_index; idxForm = idxRel->rd_index;
if (!idxForm->indisvalid) if (!idxForm->indisvalid)
......
...@@ -5187,11 +5187,10 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata, ...@@ -5187,11 +5187,10 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata,
/* /*
* Open the table and index so we can read from them. We should * Open the table and index so we can read from them. We should
* already have at least AccessShareLock on the table, but not * already have some type of lock on each.
* necessarily on the index.
*/ */
heapRel = table_open(rte->relid, NoLock); heapRel = table_open(rte->relid, NoLock);
indexRel = index_open(index->indexoid, AccessShareLock); indexRel = index_open(index->indexoid, NoLock);
/* extract index key information from the index's pg_index info */ /* extract index key information from the index's pg_index info */
indexInfo = BuildIndexInfo(indexRel); indexInfo = BuildIndexInfo(indexRel);
...@@ -5305,7 +5304,7 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata, ...@@ -5305,7 +5304,7 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata,
/* Clean everything up */ /* Clean everything up */
ExecDropSingleTupleTableSlot(slot); ExecDropSingleTupleTableSlot(slot);
index_close(indexRel, AccessShareLock); index_close(indexRel, NoLock);
table_close(heapRel, NoLock); table_close(heapRel, NoLock);
MemoryContextSwitchTo(oldcontext); MemoryContextSwitchTo(oldcontext);
...@@ -6472,9 +6471,10 @@ gincostestimate(PlannerInfo *root, IndexPath *path, double loop_count, ...@@ -6472,9 +6471,10 @@ gincostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
*/ */
if (!index->hypothetical) if (!index->hypothetical)
{ {
indexRel = index_open(index->indexoid, AccessShareLock); /* Lock should have already been obtained in plancat.c */
indexRel = index_open(index->indexoid, NoLock);
ginGetStats(indexRel, &ginStats); ginGetStats(indexRel, &ginStats);
index_close(indexRel, AccessShareLock); index_close(indexRel, NoLock);
} }
else else
{ {
...@@ -6781,11 +6781,12 @@ brincostestimate(PlannerInfo *root, IndexPath *path, double loop_count, ...@@ -6781,11 +6781,12 @@ brincostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
&spc_seq_page_cost); &spc_seq_page_cost);
/* /*
* Obtain some data from the index itself. * Obtain some data from the index itself. A lock should have already
* been obtained on the index in plancat.c.
*/ */
indexRel = index_open(index->indexoid, AccessShareLock); indexRel = index_open(index->indexoid, NoLock);
brinGetStats(indexRel, &statsData); brinGetStats(indexRel, &statsData);
index_close(indexRel, AccessShareLock); index_close(indexRel, NoLock);
/* /*
* Compute index correlation * Compute index correlation
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment