Commit 46c508fb authored by Tom Lane's avatar Tom Lane

Fix PARAM_EXEC assignment mechanism to be safe in the presence of WITH.

The planner previously assumed that parameter Vars having the same absolute
query level, varno, and varattno could safely be assigned the same runtime
PARAM_EXEC slot, even though they might be different Vars appearing in
different subqueries.  This was (probably) safe before the introduction of
CTEs, but the lazy-evalution mechanism used for CTEs means that a CTE can
be executed during execution of some other subquery, causing the lifespan
of Params at the same syntactic nesting level as the CTE to overlap with
use of the same slots inside the CTE.  In 9.1 we created additional hazards
by using the same parameter-assignment technology for nestloop inner scan
parameters, but it was broken before that, as illustrated by the added
regression test.

To fix, restructure the planner's management of PlannerParamItems so that
items having different semantic lifespans are kept rigorously separated.
This will probably result in complex queries using more runtime PARAM_EXEC
slots than before, but the slots are cheap enough that this hardly matters.
Also, stop generating PlannerParamItems containing Params for subquery
outputs: all we really need to do is reserve the PARAM_EXEC slot number,
and that now only takes incrementing a counter.  The planning code is
simpler and probably faster than before, as well as being more correct.

Per report from Vik Reykja.

These changes will mostly also need to be made in the back branches, but
I'm going to hold off on that until after 9.2.0 wraps.
parent e20a90e1
......@@ -1666,7 +1666,6 @@ _outPlannerGlobal(StringInfo str, const PlannerGlobal *node)
WRITE_NODE_TYPE("PLANNERGLOBAL");
/* NB: this isn't a complete set of fields */
WRITE_NODE_FIELD(paramlist);
WRITE_NODE_FIELD(subplans);
WRITE_BITMAPSET_FIELD(rewindPlanIDs);
WRITE_NODE_FIELD(finalrtable);
......@@ -1674,6 +1673,7 @@ _outPlannerGlobal(StringInfo str, const PlannerGlobal *node)
WRITE_NODE_FIELD(resultRelations);
WRITE_NODE_FIELD(relationOids);
WRITE_NODE_FIELD(invalItems);
WRITE_INT_FIELD(nParamExec);
WRITE_UINT_FIELD(lastPHId);
WRITE_UINT_FIELD(lastRowMarkId);
WRITE_BOOL_FIELD(transientPlan);
......@@ -1688,6 +1688,7 @@ _outPlannerInfo(StringInfo str, const PlannerInfo *node)
WRITE_NODE_FIELD(parse);
WRITE_NODE_FIELD(glob);
WRITE_UINT_FIELD(query_level);
WRITE_NODE_FIELD(plan_params);
WRITE_BITMAPSET_FIELD(all_baserels);
WRITE_NODE_FIELD(join_rel_list);
WRITE_INT_FIELD(join_cur_level);
......@@ -1754,6 +1755,7 @@ _outRelOptInfo(StringInfo str, const RelOptInfo *node)
WRITE_FLOAT_FIELD(allvisfrac, "%.6f");
WRITE_NODE_FIELD(subplan);
WRITE_NODE_FIELD(subroot);
WRITE_NODE_FIELD(subplan_params);
/* we don't try to print fdwroutine or fdw_private */
WRITE_NODE_FIELD(baserestrictinfo);
WRITE_NODE_FIELD(joininfo);
......@@ -1950,7 +1952,7 @@ _outPlannerParamItem(StringInfo str, const PlannerParamItem *node)
WRITE_NODE_TYPE("PLANNERPARAMITEM");
WRITE_NODE_FIELD(item);
WRITE_UINT_FIELD(abslevel);
WRITE_INT_FIELD(paramId);
}
/*****************************************************************************
......
......@@ -1145,6 +1145,9 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
else
tuple_fraction = root->tuple_fraction;
/* plan_params should not be in use in current query level */
Assert(root->plan_params == NIL);
/* Generate the plan for the subquery */
rel->subplan = subquery_planner(root->glob, subquery,
root,
......@@ -1152,6 +1155,10 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
&subroot);
rel->subroot = subroot;
/* Isolate the params needed by this specific subplan */
rel->subplan_params = root->plan_params;
root->plan_params = NIL;
/*
* It's possible that constraint exclusion proved the subquery empty. If
* so, it's convenient to turn it back into a dummy path so that we will
......
......@@ -84,7 +84,8 @@ static HashJoin *create_hashjoin_plan(PlannerInfo *root, HashPath *best_path,
Plan *outer_plan, Plan *inner_plan);
static Node *replace_nestloop_params(PlannerInfo *root, Node *expr);
static Node *replace_nestloop_params_mutator(Node *node, PlannerInfo *root);
static void identify_nestloop_extparams(PlannerInfo *root, Plan *subplan);
static void process_subquery_nestloop_params(PlannerInfo *root,
List *subplan_params);
static List *fix_indexqual_references(PlannerInfo *root, IndexPath *index_path);
static List *fix_indexorderby_references(PlannerInfo *root, IndexPath *index_path);
static Node *fix_indexqual_operand(Node *node, IndexOptInfo *index, int indexcol);
......@@ -188,6 +189,9 @@ create_plan(PlannerInfo *root, Path *best_path)
{
Plan *plan;
/* plan_params should not be in use in current query level */
Assert(root->plan_params == NIL);
/* Initialize this module's private workspace in PlannerInfo */
root->curOuterRels = NULL;
root->curOuterParams = NIL;
......@@ -199,6 +203,12 @@ create_plan(PlannerInfo *root, Path *best_path)
if (root->curOuterParams != NIL)
elog(ERROR, "failed to assign all NestLoopParams to plan nodes");
/*
* Reset plan_params to ensure param IDs used for nestloop params are not
* re-used later
*/
root->plan_params = NIL;
return plan;
}
......@@ -1662,7 +1672,8 @@ create_subqueryscan_plan(PlannerInfo *root, Path *best_path,
{
scan_clauses = (List *)
replace_nestloop_params(root, (Node *) scan_clauses);
identify_nestloop_extparams(root, best_path->parent->subplan);
process_subquery_nestloop_params(root,
best_path->parent->subplan_params);
}
scan_plan = make_subqueryscan(tlist,
......@@ -2620,30 +2631,26 @@ replace_nestloop_params_mutator(Node *node, PlannerInfo *root)
}
/*
* identify_nestloop_extparams
* Identify extParams of a parameterized subquery that need to be fed
* process_subquery_nestloop_params
* Handle params of a parameterized subquery that need to be fed
* from an outer nestloop.
*
* Currently, that would be *all* params that a subquery in FROM has demanded
* from the current query level, since they must be LATERAL references.
*
* The subplan's references to the outer variables are already represented
* as PARAM_EXEC Params, so we need not modify the subplan here. What we
* do need to do is add entries to root->curOuterParams to signal the parent
* nestloop plan node that it must provide these values.
*/
static void
identify_nestloop_extparams(PlannerInfo *root, Plan *subplan)
process_subquery_nestloop_params(PlannerInfo *root, List *subplan_params)
{
Bitmapset *tmpset;
int paramid;
ListCell *ppl;
/* Examine each extParam of the subquery's plan */
tmpset = bms_copy(subplan->extParam);
while ((paramid = bms_first_member(tmpset)) >= 0)
foreach(ppl, subplan_params)
{
PlannerParamItem *pitem = list_nth(root->glob->paramlist, paramid);
/* Ignore anything coming from an upper query level */
if (pitem->abslevel != root->query_level)
continue;
PlannerParamItem *pitem = (PlannerParamItem *) lfirst(ppl);
if (IsA(pitem->item, Var))
{
......@@ -2651,14 +2658,14 @@ identify_nestloop_extparams(PlannerInfo *root, Plan *subplan)
NestLoopParam *nlp;
ListCell *lc;
/* If not from a nestloop outer rel, nothing to do */
/* If not from a nestloop outer rel, complain */
if (!bms_is_member(var->varno, root->curOuterRels))
continue;
elog(ERROR, "non-LATERAL parameter required by subquery");
/* Is this param already listed in root->curOuterParams? */
foreach(lc, root->curOuterParams)
{
nlp = (NestLoopParam *) lfirst(lc);
if (nlp->paramno == paramid)
if (nlp->paramno == pitem->paramId)
{
Assert(equal(var, nlp->paramval));
/* Present, so nothing to do */
......@@ -2669,7 +2676,7 @@ identify_nestloop_extparams(PlannerInfo *root, Plan *subplan)
{
/* No, so add it */
nlp = makeNode(NestLoopParam);
nlp->paramno = paramid;
nlp->paramno = pitem->paramId;
nlp->paramval = copyObject(var);
root->curOuterParams = lappend(root->curOuterParams, nlp);
}
......@@ -2680,22 +2687,15 @@ identify_nestloop_extparams(PlannerInfo *root, Plan *subplan)
NestLoopParam *nlp;
ListCell *lc;
/*
* If not from a nestloop outer rel, nothing to do. We use
* bms_overlap as a cheap/quick test to see if the PHV might be
* evaluated in the outer rels, and then grab its PlaceHolderInfo
* to tell for sure.
*/
if (!bms_overlap(phv->phrels, root->curOuterRels))
continue;
/* If not from a nestloop outer rel, complain */
if (!bms_is_subset(find_placeholder_info(root, phv, false)->ph_eval_at,
root->curOuterRels))
continue;
elog(ERROR, "non-LATERAL parameter required by subquery");
/* Is this param already listed in root->curOuterParams? */
foreach(lc, root->curOuterParams)
{
nlp = (NestLoopParam *) lfirst(lc);
if (nlp->paramno == paramid)
if (nlp->paramno == pitem->paramId)
{
Assert(equal(phv, nlp->paramval));
/* Present, so nothing to do */
......@@ -2706,13 +2706,14 @@ identify_nestloop_extparams(PlannerInfo *root, Plan *subplan)
{
/* No, so add it */
nlp = makeNode(NestLoopParam);
nlp->paramno = paramid;
nlp->paramno = pitem->paramId;
nlp->paramval = copyObject(phv);
root->curOuterParams = lappend(root->curOuterParams, nlp);
}
}
else
elog(ERROR, "unexpected type of subquery parameter");
}
bms_free(tmpset);
}
/*
......
......@@ -155,7 +155,6 @@ standard_planner(Query *parse, int cursorOptions, ParamListInfo boundParams)
glob = makeNode(PlannerGlobal);
glob->boundParams = boundParams;
glob->paramlist = NIL;
glob->subplans = NIL;
glob->subroots = NIL;
glob->rewindPlanIDs = NULL;
......@@ -164,6 +163,7 @@ standard_planner(Query *parse, int cursorOptions, ParamListInfo boundParams)
glob->resultRelations = NIL;
glob->relationOids = NIL;
glob->invalItems = NIL;
glob->nParamExec = 0;
glob->lastPHId = 0;
glob->lastRowMarkId = 0;
glob->transientPlan = false;
......@@ -243,7 +243,7 @@ standard_planner(Query *parse, int cursorOptions, ParamListInfo boundParams)
result->rowMarks = glob->finalrowmarks;
result->relationOids = glob->relationOids;
result->invalItems = glob->invalItems;
result->nParamExec = list_length(glob->paramlist);
result->nParamExec = glob->nParamExec;
return result;
}
......@@ -295,6 +295,7 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
root->glob = glob;
root->query_level = parent_root ? parent_root->query_level + 1 : 1;
root->parent_root = parent_root;
root->plan_params = NIL;
root->planner_cxt = CurrentMemoryContext;
root->init_plans = NIL;
root->cte_plan_ids = NIL;
......@@ -586,7 +587,7 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
* and attach the initPlans to the top plan node.
*/
if (list_length(glob->subplans) != num_old_subplans ||
root->glob->paramlist != NIL)
root->glob->nParamExec > 0)
SS_finalize_plan(root, plan, true);
/* Return internal info if caller wants it */
......
This diff is collapsed.
......@@ -799,6 +799,7 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte,
subroot->glob = root->glob;
subroot->query_level = root->query_level;
subroot->parent_root = root->parent_root;
subroot->plan_params = NIL;
subroot->planner_cxt = CurrentMemoryContext;
subroot->init_plans = NIL;
subroot->cte_plan_ids = NIL;
......
......@@ -238,6 +238,9 @@ recurse_set_operations(Node *setOp, PlannerInfo *root,
*/
rel = build_simple_rel(root, rtr->rtindex, RELOPT_BASEREL);
/* plan_params should not be in use in current query level */
Assert(root->plan_params == NIL);
/*
* Generate plan for primitive subquery
*/
......@@ -250,6 +253,13 @@ recurse_set_operations(Node *setOp, PlannerInfo *root,
rel->subplan = subplan;
rel->subroot = subroot;
/*
* It should not be possible for the primitive query to contain any
* cross-references to other primitive queries in the setop tree.
*/
if (root->plan_params)
elog(ERROR, "unexpected outer reference in set operation subquery");
/*
* Estimate number of groups if caller wants it. If the subquery used
* grouping or aggregation, its output is probably mostly unique
......
......@@ -119,6 +119,7 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptKind reloptkind)
rel->allvisfrac = 0;
rel->subplan = NULL;
rel->subroot = NULL;
rel->subplan_params = NIL;
rel->fdwroutine = NULL;
rel->fdw_private = NULL;
rel->baserestrictinfo = NIL;
......@@ -379,6 +380,7 @@ build_join_rel(PlannerInfo *root,
joinrel->allvisfrac = 0;
joinrel->subplan = NULL;
joinrel->subroot = NULL;
joinrel->subplan_params = NIL;
joinrel->fdwroutine = NULL;
joinrel->fdw_private = NULL;
joinrel->baserestrictinfo = NIL;
......
......@@ -75,8 +75,6 @@ typedef struct PlannerGlobal
ParamListInfo boundParams; /* Param values provided to planner() */
List *paramlist; /* to keep track of cross-level Params */
List *subplans; /* Plans for SubPlan nodes */
List *subroots; /* PlannerInfos for SubPlan nodes */
......@@ -93,6 +91,8 @@ typedef struct PlannerGlobal
List *invalItems; /* other dependencies, as PlanInvalItems */
int nParamExec; /* number of PARAM_EXEC Params used */
Index lastPHId; /* highest PlaceHolderVar ID assigned */
Index lastRowMarkId; /* highest PlanRowMark ID assigned */
......@@ -127,6 +127,8 @@ typedef struct PlannerInfo
struct PlannerInfo *parent_root; /* NULL at outermost Query */
List *plan_params; /* list of PlannerParamItems, see below */
/*
* simple_rel_array holds pointers to "base rels" and "other rels" (see
* comments for RelOptInfo for more info). It is indexed by rangetable
......@@ -344,6 +346,7 @@ typedef struct PlannerInfo
* allvisfrac - fraction of disk pages that are marked all-visible
* subplan - plan for subquery (NULL if it's not a subquery)
* subroot - PlannerInfo for subquery (NULL if it's not a subquery)
* subplan_params - list of PlannerParamItems to be passed to subquery
* fdwroutine - function hooks for FDW, if foreign table (else NULL)
* fdw_private - private state for FDW, if foreign table (else NULL)
*
......@@ -436,6 +439,7 @@ typedef struct RelOptInfo
/* use "struct Plan" to avoid including plannodes.h here */
struct Plan *subplan; /* if subquery */
PlannerInfo *subroot; /* if subquery */
List *subplan_params; /* if subquery */
/* use "struct FdwRoutine" to avoid including fdwapi.h here */
struct FdwRoutine *fdwroutine; /* if foreign table */
void *fdw_private; /* if foreign table */
......@@ -1507,23 +1511,26 @@ typedef struct MinMaxAggInfo
} MinMaxAggInfo;
/*
* glob->paramlist keeps track of the PARAM_EXEC slots that we have decided
* we need for the query. At runtime these slots are used to pass values
* around from one plan node to another. They can be used to pass values
* down into subqueries (for outer references in subqueries), or up out of
* subqueries (for the results of a subplan), or from a NestLoop plan node
* into its inner relation (when the inner scan is parameterized with values
* from the outer relation). The n'th entry in the list (n counts from 0)
* corresponds to Param->paramid = n.
*
* Each paramlist item shows the absolute query level it is associated with,
* where the outermost query is level 1 and nested subqueries have higher
* numbers. The item the parameter slot represents can be one of four kinds:
*
* A Var: the slot represents a variable of that level that must be passed
* At runtime, PARAM_EXEC slots are used to pass values around from one plan
* node to another. They can be used to pass values down into subqueries (for
* outer references in subqueries), or up out of subqueries (for the results
* of a subplan), or from a NestLoop plan node into its inner relation (when
* the inner scan is parameterized with values from the outer relation).
* The planner is responsible for assigning nonconflicting PARAM_EXEC IDs to
* the PARAM_EXEC Params it generates.
*
* Outer references are managed via root->plan_params, which is a list of
* PlannerParamItems. While planning a subquery, each parent query level's
* plan_params contains the values required from it by the current subquery.
* During create_plan(), we use plan_params to track values that must be
* passed from outer to inner sides of NestLoop plan nodes.
*
* The item a PlannerParamItem represents can be one of three kinds:
*
* A Var: the slot represents a variable of this level that must be passed
* down because subqueries have outer references to it, or must be passed
* from a NestLoop node of that level to its inner scan. The varlevelsup
* value in the Var will always be zero.
* from a NestLoop node to its inner scan. The varlevelsup value in the Var
* will always be zero.
*
* A PlaceHolderVar: this works much like the Var case, except that the
* entry is a PlaceHolderVar node with a contained expression. The PHV
......@@ -1535,25 +1542,27 @@ typedef struct MinMaxAggInfo
* subquery. The Aggref itself has agglevelsup = 0, and its argument tree
* is adjusted to match in level.
*
* A Param: the slot holds the result of a subplan (it is a setParam item
* for that subplan). The absolute level shown for such items corresponds
* to the parent query of the subplan.
*
* Note: we detect duplicate Var and PlaceHolderVar parameters and coalesce
* them into one slot, but we do not bother to do this for Aggrefs, and it
* would be incorrect to do so for Param slots. Duplicate detection is
* actually *necessary* for NestLoop parameters since it serves to match up
* the usage of a Param (in the inner scan) with the assignment of the value
* (in the NestLoop node). This might result in the same PARAM_EXEC slot being
* used by multiple NestLoop nodes or SubPlan nodes, but no harm is done since
* the same value would be assigned anyway.
* them into one slot, but we do not bother to do that for Aggrefs.
* The scope of duplicate-elimination only extends across the set of
* parameters passed from one query level into a single subquery, or for
* nestloop parameters across the set of nestloop parameters used in a single
* query level. So there is no possibility of a PARAM_EXEC slot being used
* for conflicting purposes.
*
* In addition, PARAM_EXEC slots are assigned for Params representing outputs
* from subplans (values that are setParam items for those subplans). These
* IDs need not be tracked via PlannerParamItems, since we do not need any
* duplicate-elimination nor later processing of the represented expressions.
* Instead, we just record the assignment of the slot number by incrementing
* root->glob->nParamExec.
*/
typedef struct PlannerParamItem
{
NodeTag type;
Node *item; /* the Var, PlaceHolderVar, Aggref, or Param */
Index abslevel; /* its absolute query level */
Node *item; /* the Var, PlaceHolderVar, or Aggref */
int paramId; /* its assigned PARAM_EXEC slot number */
} PlannerParamItem;
/*
......
......@@ -1209,6 +1209,27 @@ SELECT * FROM outermost;
ERROR: recursive reference to query "outermost" must not appear within a subquery
LINE 2: WITH innermost as (SELECT 2 FROM outermost)
^
--
-- This test will fail with the old implementation of PARAM_EXEC parameter
-- assignment, because the "q1" Var passed down to A's targetlist subselect
-- looks exactly like the "A.id" Var passed down to C's subselect, causing
-- the old code to give them the same runtime PARAM_EXEC slot. But the
-- lifespans of the two parameters overlap, thanks to B also reading A.
--
with
A as ( select q2 as id, (select q1) as x from int8_tbl ),
B as ( select id, row_number() over (partition by id) as r from A ),
C as ( select A.id, array(select B.id from B where B.id = A.id) from A )
select * from C;
id | array
-------------------+-------------------------------------
456 | {456}
4567890123456789 | {4567890123456789,4567890123456789}
123 | {123}
4567890123456789 | {4567890123456789,4567890123456789}
-4567890123456789 | {-4567890123456789}
(5 rows)
--
-- Test CTEs read in non-initialization orders
--
......
......@@ -574,6 +574,20 @@ WITH RECURSIVE outermost(x) AS (
)
SELECT * FROM outermost;
--
-- This test will fail with the old implementation of PARAM_EXEC parameter
-- assignment, because the "q1" Var passed down to A's targetlist subselect
-- looks exactly like the "A.id" Var passed down to C's subselect, causing
-- the old code to give them the same runtime PARAM_EXEC slot. But the
-- lifespans of the two parameters overlap, thanks to B also reading A.
--
with
A as ( select q2 as id, (select q1) as x from int8_tbl ),
B as ( select id, row_number() over (partition by id) as r from A ),
C as ( select A.id, array(select B.id from B where B.id = A.id) from A )
select * from C;
--
-- Test CTEs read in non-initialization orders
--
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment