Commit 1e7629d2 authored by Tom Lane's avatar Tom Lane

Be more careful about the shape of hashable subplan clauses.

nodeSubplan.c expects that the testexpr for a hashable ANY SubPlan
has the form of one or more OpExprs whose LHS is an expression of the
outer query's, while the RHS is an expression over Params representing
output columns of the subquery.  However, the planner only went as far
as verifying that the clauses were all binary OpExprs.  This works
99.99% of the time, because the clauses have the right shape when
emitted by the parser --- but it's possible for function inlining to
break that, as reported by PegoraroF10.  To fix, teach the planner
to check that the LHS and RHS contain the right things, or more
accurately don't contain the wrong things.  Given that this has been
broken for years without anyone noticing, it seems sufficient to just
give up hashing when it happens, rather than go to the trouble of
commuting the clauses back again (which wouldn't necessarily work
anyway).

While poking at that, I also noticed that nodeSubplan.c had a baked-in
assumption that the number of hash clauses is identical to the number
of subquery output columns.  Again, that's fine as far as parser output
goes, but it's not hard to break it via function inlining.  There seems
little reason for that assumption though --- AFAICS, the only thing
it's buying us is not having to store the number of hash clauses
explicitly.  Adding code to the planner to reject such cases would take
more code than getting nodeSubplan.c to cope, so I fixed it that way.

This has been broken for as long as we've had hashable SubPlans,
so back-patch to all supported branches.

Discussion: https://postgr.es/m/1549209182255-0.post@n3.nabble.com
parent 73487a60
...@@ -471,7 +471,7 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext) ...@@ -471,7 +471,7 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext)
{ {
SubPlan *subplan = node->subplan; SubPlan *subplan = node->subplan;
PlanState *planstate = node->planstate; PlanState *planstate = node->planstate;
int ncols = list_length(subplan->paramIds); int ncols = node->numCols;
ExprContext *innerecontext = node->innerecontext; ExprContext *innerecontext = node->innerecontext;
MemoryContext oldcontext; MemoryContext oldcontext;
long nbuckets; long nbuckets;
...@@ -878,11 +878,6 @@ ExecInitSubPlan(SubPlan *subplan, PlanState *parent) ...@@ -878,11 +878,6 @@ ExecInitSubPlan(SubPlan *subplan, PlanState *parent)
ALLOCSET_SMALL_SIZES); ALLOCSET_SMALL_SIZES);
/* and a short-lived exprcontext for function evaluation */ /* and a short-lived exprcontext for function evaluation */
sstate->innerecontext = CreateExprContext(estate); sstate->innerecontext = CreateExprContext(estate);
/* Silly little array of column numbers 1..n */
ncols = list_length(subplan->paramIds);
sstate->keyColIdx = (AttrNumber *) palloc(ncols * sizeof(AttrNumber));
for (i = 0; i < ncols; i++)
sstate->keyColIdx[i] = i + 1;
/* /*
* We use ExecProject to evaluate the lefthand and righthand * We use ExecProject to evaluate the lefthand and righthand
...@@ -914,13 +909,15 @@ ExecInitSubPlan(SubPlan *subplan, PlanState *parent) ...@@ -914,13 +909,15 @@ ExecInitSubPlan(SubPlan *subplan, PlanState *parent)
(int) nodeTag(subplan->testexpr)); (int) nodeTag(subplan->testexpr));
oplist = NIL; /* keep compiler quiet */ oplist = NIL; /* keep compiler quiet */
} }
Assert(list_length(oplist) == ncols); ncols = list_length(oplist);
lefttlist = righttlist = NIL; lefttlist = righttlist = NIL;
sstate->numCols = ncols;
sstate->keyColIdx = (AttrNumber *) palloc(ncols * sizeof(AttrNumber));
sstate->tab_eq_funcoids = (Oid *) palloc(ncols * sizeof(Oid)); sstate->tab_eq_funcoids = (Oid *) palloc(ncols * sizeof(Oid));
sstate->tab_collations = (Oid *) palloc(ncols * sizeof(Oid));
sstate->tab_hash_funcs = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo)); sstate->tab_hash_funcs = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo));
sstate->tab_eq_funcs = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo)); sstate->tab_eq_funcs = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo));
sstate->tab_collations = (Oid *) palloc(ncols * sizeof(Oid));
sstate->lhs_hash_funcs = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo)); sstate->lhs_hash_funcs = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo));
sstate->cur_eq_funcs = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo)); sstate->cur_eq_funcs = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo));
/* we'll need the cross-type equality fns below, but not in sstate */ /* we'll need the cross-type equality fns below, but not in sstate */
...@@ -979,6 +976,9 @@ ExecInitSubPlan(SubPlan *subplan, PlanState *parent) ...@@ -979,6 +976,9 @@ ExecInitSubPlan(SubPlan *subplan, PlanState *parent)
/* Set collation */ /* Set collation */
sstate->tab_collations[i - 1] = opexpr->inputcollid; sstate->tab_collations[i - 1] = opexpr->inputcollid;
/* keyColIdx is just column numbers 1..n */
sstate->keyColIdx[i - 1] = i;
i++; i++;
} }
......
...@@ -69,7 +69,7 @@ typedef struct inline_cte_walker_context ...@@ -69,7 +69,7 @@ typedef struct inline_cte_walker_context
static Node *build_subplan(PlannerInfo *root, Plan *plan, PlannerInfo *subroot, static Node *build_subplan(PlannerInfo *root, Plan *plan, PlannerInfo *subroot,
List *plan_params, List *plan_params,
SubLinkType subLinkType, int subLinkId, SubLinkType subLinkType, int subLinkId,
Node *testexpr, bool adjust_testexpr, Node *testexpr, List *testexpr_paramids,
bool unknownEqFalse); bool unknownEqFalse);
static List *generate_subquery_params(PlannerInfo *root, List *tlist, static List *generate_subquery_params(PlannerInfo *root, List *tlist,
List **paramIds); List **paramIds);
...@@ -81,7 +81,8 @@ static Node *convert_testexpr(PlannerInfo *root, ...@@ -81,7 +81,8 @@ static Node *convert_testexpr(PlannerInfo *root,
static Node *convert_testexpr_mutator(Node *node, static Node *convert_testexpr_mutator(Node *node,
convert_testexpr_context *context); convert_testexpr_context *context);
static bool subplan_is_hashable(Plan *plan); static bool subplan_is_hashable(Plan *plan);
static bool testexpr_is_hashable(Node *testexpr); static bool testexpr_is_hashable(Node *testexpr, List *param_ids);
static bool test_opexpr_is_hashable(OpExpr *testexpr, List *param_ids);
static bool hash_ok_operator(OpExpr *expr); static bool hash_ok_operator(OpExpr *expr);
static bool contain_dml(Node *node); static bool contain_dml(Node *node);
static bool contain_dml_walker(Node *node, void *context); static bool contain_dml_walker(Node *node, void *context);
...@@ -237,7 +238,7 @@ make_subplan(PlannerInfo *root, Query *orig_subquery, ...@@ -237,7 +238,7 @@ make_subplan(PlannerInfo *root, Query *orig_subquery,
/* And convert to SubPlan or InitPlan format. */ /* And convert to SubPlan or InitPlan format. */
result = build_subplan(root, plan, subroot, plan_params, result = build_subplan(root, plan, subroot, plan_params,
subLinkType, subLinkId, subLinkType, subLinkId,
testexpr, true, isTopQual); testexpr, NIL, isTopQual);
/* /*
* If it's a correlated EXISTS with an unimportant targetlist, we might be * If it's a correlated EXISTS with an unimportant targetlist, we might be
...@@ -291,12 +292,11 @@ make_subplan(PlannerInfo *root, Query *orig_subquery, ...@@ -291,12 +292,11 @@ make_subplan(PlannerInfo *root, Query *orig_subquery,
plan_params, plan_params,
ANY_SUBLINK, 0, ANY_SUBLINK, 0,
newtestexpr, newtestexpr,
false, true)); paramIds,
true));
/* Check we got what we expected */ /* Check we got what we expected */
Assert(hashplan->parParam == NIL); Assert(hashplan->parParam == NIL);
Assert(hashplan->useHashTable); Assert(hashplan->useHashTable);
/* build_subplan won't have filled in paramIds */
hashplan->paramIds = paramIds;
/* Leave it to the executor to decide which plan to use */ /* Leave it to the executor to decide which plan to use */
asplan = makeNode(AlternativeSubPlan); asplan = makeNode(AlternativeSubPlan);
...@@ -319,7 +319,7 @@ static Node * ...@@ -319,7 +319,7 @@ static Node *
build_subplan(PlannerInfo *root, Plan *plan, PlannerInfo *subroot, build_subplan(PlannerInfo *root, Plan *plan, PlannerInfo *subroot,
List *plan_params, List *plan_params,
SubLinkType subLinkType, int subLinkId, SubLinkType subLinkType, int subLinkId,
Node *testexpr, bool adjust_testexpr, Node *testexpr, List *testexpr_paramids,
bool unknownEqFalse) bool unknownEqFalse)
{ {
Node *result; Node *result;
...@@ -484,10 +484,10 @@ build_subplan(PlannerInfo *root, Plan *plan, PlannerInfo *subroot, ...@@ -484,10 +484,10 @@ build_subplan(PlannerInfo *root, Plan *plan, PlannerInfo *subroot,
else else
{ {
/* /*
* Adjust the Params in the testexpr, unless caller said it's not * Adjust the Params in the testexpr, unless caller already took care
* needed. * of it (as indicated by passing a list of Param IDs).
*/ */
if (testexpr && adjust_testexpr) if (testexpr && testexpr_paramids == NIL)
{ {
List *params; List *params;
...@@ -499,7 +499,10 @@ build_subplan(PlannerInfo *root, Plan *plan, PlannerInfo *subroot, ...@@ -499,7 +499,10 @@ build_subplan(PlannerInfo *root, Plan *plan, PlannerInfo *subroot,
params); params);
} }
else else
{
splan->testexpr = testexpr; splan->testexpr = testexpr;
splan->paramIds = testexpr_paramids;
}
/* /*
* We can't convert subplans of ALL_SUBLINK or ANY_SUBLINK types to * We can't convert subplans of ALL_SUBLINK or ANY_SUBLINK types to
...@@ -511,7 +514,7 @@ build_subplan(PlannerInfo *root, Plan *plan, PlannerInfo *subroot, ...@@ -511,7 +514,7 @@ build_subplan(PlannerInfo *root, Plan *plan, PlannerInfo *subroot,
if (subLinkType == ANY_SUBLINK && if (subLinkType == ANY_SUBLINK &&
splan->parParam == NIL && splan->parParam == NIL &&
subplan_is_hashable(plan) && subplan_is_hashable(plan) &&
testexpr_is_hashable(splan->testexpr)) testexpr_is_hashable(splan->testexpr, splan->paramIds))
splan->useHashTable = true; splan->useHashTable = true;
/* /*
...@@ -734,24 +737,20 @@ subplan_is_hashable(Plan *plan) ...@@ -734,24 +737,20 @@ subplan_is_hashable(Plan *plan)
/* /*
* testexpr_is_hashable: is an ANY SubLink's test expression hashable? * testexpr_is_hashable: is an ANY SubLink's test expression hashable?
*
* To identify LHS vs RHS of the hash expression, we must be given the
* list of output Param IDs of the SubLink's subquery.
*/ */
static bool static bool
testexpr_is_hashable(Node *testexpr) testexpr_is_hashable(Node *testexpr, List *param_ids)
{ {
/* /*
* The testexpr must be a single OpExpr, or an AND-clause containing only * The testexpr must be a single OpExpr, or an AND-clause containing only
* OpExprs. * OpExprs, each of which satisfy test_opexpr_is_hashable().
*
* The combining operators must be hashable and strict. The need for
* hashability is obvious, since we want to use hashing. Without
* strictness, behavior in the presence of nulls is too unpredictable. We
* actually must assume even more than plain strictness: they can't yield
* NULL for non-null inputs, either (see nodeSubplan.c). However, hash
* indexes and hash joins assume that too.
*/ */
if (testexpr && IsA(testexpr, OpExpr)) if (testexpr && IsA(testexpr, OpExpr))
{ {
if (hash_ok_operator((OpExpr *) testexpr)) if (test_opexpr_is_hashable((OpExpr *) testexpr, param_ids))
return true; return true;
} }
else if (is_andclause(testexpr)) else if (is_andclause(testexpr))
...@@ -764,7 +763,7 @@ testexpr_is_hashable(Node *testexpr) ...@@ -764,7 +763,7 @@ testexpr_is_hashable(Node *testexpr)
if (!IsA(andarg, OpExpr)) if (!IsA(andarg, OpExpr))
return false; return false;
if (!hash_ok_operator((OpExpr *) andarg)) if (!test_opexpr_is_hashable((OpExpr *) andarg, param_ids))
return false; return false;
} }
return true; return true;
...@@ -773,6 +772,40 @@ testexpr_is_hashable(Node *testexpr) ...@@ -773,6 +772,40 @@ testexpr_is_hashable(Node *testexpr)
return false; return false;
} }
static bool
test_opexpr_is_hashable(OpExpr *testexpr, List *param_ids)
{
/*
* The combining operator must be hashable and strict. The need for
* hashability is obvious, since we want to use hashing. Without
* strictness, behavior in the presence of nulls is too unpredictable. We
* actually must assume even more than plain strictness: it can't yield
* NULL for non-null inputs, either (see nodeSubplan.c). However, hash
* indexes and hash joins assume that too.
*/
if (!hash_ok_operator(testexpr))
return false;
/*
* The left and right inputs must belong to the outer and inner queries
* respectively; hence Params that will be supplied by the subquery must
* not appear in the LHS, and Vars of the outer query must not appear in
* the RHS. (Ordinarily, this must be true because of the way that the
* parser builds an ANY SubLink's testexpr ... but inlining of functions
* could have changed the expression's structure, so we have to check.
* Such cases do not occur often enough to be worth trying to optimize, so
* we don't worry about trying to commute the clause or anything like
* that; we just need to be sure not to build an invalid plan.)
*/
if (list_length(testexpr->args) != 2)
return false;
if (contain_exec_param((Node *) linitial(testexpr->args), param_ids))
return false;
if (contain_var_clause((Node *) lsecond(testexpr->args)))
return false;
return true;
}
/* /*
* Check expression is hashable + strict * Check expression is hashable + strict
* *
......
...@@ -108,6 +108,7 @@ static bool contain_volatile_functions_not_nextval_walker(Node *node, void *cont ...@@ -108,6 +108,7 @@ static bool contain_volatile_functions_not_nextval_walker(Node *node, void *cont
static bool max_parallel_hazard_walker(Node *node, static bool max_parallel_hazard_walker(Node *node,
max_parallel_hazard_context *context); max_parallel_hazard_context *context);
static bool contain_nonstrict_functions_walker(Node *node, void *context); static bool contain_nonstrict_functions_walker(Node *node, void *context);
static bool contain_exec_param_walker(Node *node, List *param_ids);
static bool contain_context_dependent_node(Node *clause); static bool contain_context_dependent_node(Node *clause);
static bool contain_context_dependent_node_walker(Node *node, int *flags); static bool contain_context_dependent_node_walker(Node *node, int *flags);
static bool contain_leaked_vars_walker(Node *node, void *context); static bool contain_leaked_vars_walker(Node *node, void *context);
...@@ -1221,6 +1222,40 @@ contain_nonstrict_functions_walker(Node *node, void *context) ...@@ -1221,6 +1222,40 @@ contain_nonstrict_functions_walker(Node *node, void *context)
context); context);
} }
/*****************************************************************************
* Check clauses for Params
*****************************************************************************/
/*
* contain_exec_param
* Recursively search for PARAM_EXEC Params within a clause.
*
* Returns true if the clause contains any PARAM_EXEC Param with a paramid
* appearing in the given list of Param IDs. Does not descend into
* subqueries!
*/
bool
contain_exec_param(Node *clause, List *param_ids)
{
return contain_exec_param_walker(clause, param_ids);
}
static bool
contain_exec_param_walker(Node *node, List *param_ids)
{
if (node == NULL)
return false;
if (IsA(node, Param))
{
Param *p = (Param *) node;
if (p->paramkind == PARAM_EXEC &&
list_member_int(param_ids, p->paramid))
return true;
}
return expression_tree_walker(node, contain_exec_param_walker, param_ids);
}
/***************************************************************************** /*****************************************************************************
* Check clauses for context-dependent nodes * Check clauses for context-dependent nodes
*****************************************************************************/ *****************************************************************************/
......
...@@ -867,6 +867,8 @@ typedef struct SubPlanState ...@@ -867,6 +867,8 @@ typedef struct SubPlanState
MemoryContext hashtablecxt; /* memory context containing hash tables */ MemoryContext hashtablecxt; /* memory context containing hash tables */
MemoryContext hashtempcxt; /* temp memory context for hash tables */ MemoryContext hashtempcxt; /* temp memory context for hash tables */
ExprContext *innerecontext; /* econtext for computing inner tuples */ ExprContext *innerecontext; /* econtext for computing inner tuples */
int numCols; /* number of columns being hashed */
/* each of the remaining fields is an array of length numCols: */
AttrNumber *keyColIdx; /* control data for hash tables */ AttrNumber *keyColIdx; /* control data for hash tables */
Oid *tab_eq_funcoids; /* equality func oids for table Oid *tab_eq_funcoids; /* equality func oids for table
* datatype(s) */ * datatype(s) */
......
...@@ -38,6 +38,7 @@ extern bool contain_subplans(Node *clause); ...@@ -38,6 +38,7 @@ extern bool contain_subplans(Node *clause);
extern char max_parallel_hazard(Query *parse); extern char max_parallel_hazard(Query *parse);
extern bool is_parallel_safe(PlannerInfo *root, Node *node); extern bool is_parallel_safe(PlannerInfo *root, Node *node);
extern bool contain_nonstrict_functions(Node *clause); extern bool contain_nonstrict_functions(Node *clause);
extern bool contain_exec_param(Node *clause, List *param_ids);
extern bool contain_leaked_vars(Node *clause); extern bool contain_leaked_vars(Node *clause);
extern Relids find_nonnullable_rels(Node *clause); extern Relids find_nonnullable_rels(Node *clause);
......
...@@ -757,6 +757,7 @@ insert into outer_text values ('a', null); ...@@ -757,6 +757,7 @@ insert into outer_text values ('a', null);
insert into outer_text values ('b', null); insert into outer_text values ('b', null);
create temp table inner_text (c1 text, c2 text); create temp table inner_text (c1 text, c2 text);
insert into inner_text values ('a', null); insert into inner_text values ('a', null);
insert into inner_text values ('123', '456');
select * from outer_text where (f1, f2) not in (select * from inner_text); select * from outer_text where (f1, f2) not in (select * from inner_text);
f1 | f2 f1 | f2
----+---- ----+----
...@@ -797,6 +798,82 @@ select '1'::text in (select '1'::name union all select '1'::name); ...@@ -797,6 +798,82 @@ select '1'::text in (select '1'::name union all select '1'::name);
t t
(1 row) (1 row)
--
-- Test that we don't try to use a hashed subplan if the simplified
-- testexpr isn't of the right shape
--
-- this fails by default, of course
select * from int8_tbl where q1 in (select c1 from inner_text);
ERROR: operator does not exist: bigint = text
LINE 1: select * from int8_tbl where q1 in (select c1 from inner_tex...
^
HINT: No operator matches the given name and argument types. You might need to add explicit type casts.
begin;
-- make an operator to allow it to succeed
create function bogus_int8_text_eq(int8, text) returns boolean
language sql as 'select $1::text = $2';
create operator = (procedure=bogus_int8_text_eq, leftarg=int8, rightarg=text);
explain (costs off)
select * from int8_tbl where q1 in (select c1 from inner_text);
QUERY PLAN
--------------------------------
Seq Scan on int8_tbl
Filter: (hashed SubPlan 1)
SubPlan 1
-> Seq Scan on inner_text
(4 rows)
select * from int8_tbl where q1 in (select c1 from inner_text);
q1 | q2
-----+------------------
123 | 456
123 | 4567890123456789
(2 rows)
-- inlining of this function results in unusual number of hash clauses,
-- which we can still cope with
create or replace function bogus_int8_text_eq(int8, text) returns boolean
language sql as 'select $1::text = $2 and $1::text = $2';
explain (costs off)
select * from int8_tbl where q1 in (select c1 from inner_text);
QUERY PLAN
--------------------------------
Seq Scan on int8_tbl
Filter: (hashed SubPlan 1)
SubPlan 1
-> Seq Scan on inner_text
(4 rows)
select * from int8_tbl where q1 in (select c1 from inner_text);
q1 | q2
-----+------------------
123 | 456
123 | 4567890123456789
(2 rows)
-- inlining of this function causes LHS and RHS to be switched,
-- which we can't cope with, so hashing should be abandoned
create or replace function bogus_int8_text_eq(int8, text) returns boolean
language sql as 'select $2 = $1::text';
explain (costs off)
select * from int8_tbl where q1 in (select c1 from inner_text);
QUERY PLAN
--------------------------------------
Seq Scan on int8_tbl
Filter: (SubPlan 1)
SubPlan 1
-> Materialize
-> Seq Scan on inner_text
(5 rows)
select * from int8_tbl where q1 in (select c1 from inner_text);
q1 | q2
-----+------------------
123 | 456
123 | 4567890123456789
(2 rows)
rollback; -- to get rid of the bogus operator
-- --
-- Test case for planner bug with nested EXISTS handling -- Test case for planner bug with nested EXISTS handling
-- --
......
...@@ -449,6 +449,7 @@ insert into outer_text values ('b', null); ...@@ -449,6 +449,7 @@ insert into outer_text values ('b', null);
create temp table inner_text (c1 text, c2 text); create temp table inner_text (c1 text, c2 text);
insert into inner_text values ('a', null); insert into inner_text values ('a', null);
insert into inner_text values ('123', '456');
select * from outer_text where (f1, f2) not in (select * from inner_text); select * from outer_text where (f1, f2) not in (select * from inner_text);
...@@ -468,6 +469,46 @@ select 'foo'::text in (select 'bar'::name union all select 'bar'::name); ...@@ -468,6 +469,46 @@ select 'foo'::text in (select 'bar'::name union all select 'bar'::name);
select '1'::text in (select '1'::name union all select '1'::name); select '1'::text in (select '1'::name union all select '1'::name);
--
-- Test that we don't try to use a hashed subplan if the simplified
-- testexpr isn't of the right shape
--
-- this fails by default, of course
select * from int8_tbl where q1 in (select c1 from inner_text);
begin;
-- make an operator to allow it to succeed
create function bogus_int8_text_eq(int8, text) returns boolean
language sql as 'select $1::text = $2';
create operator = (procedure=bogus_int8_text_eq, leftarg=int8, rightarg=text);
explain (costs off)
select * from int8_tbl where q1 in (select c1 from inner_text);
select * from int8_tbl where q1 in (select c1 from inner_text);
-- inlining of this function results in unusual number of hash clauses,
-- which we can still cope with
create or replace function bogus_int8_text_eq(int8, text) returns boolean
language sql as 'select $1::text = $2 and $1::text = $2';
explain (costs off)
select * from int8_tbl where q1 in (select c1 from inner_text);
select * from int8_tbl where q1 in (select c1 from inner_text);
-- inlining of this function causes LHS and RHS to be switched,
-- which we can't cope with, so hashing should be abandoned
create or replace function bogus_int8_text_eq(int8, text) returns boolean
language sql as 'select $2 = $1::text';
explain (costs off)
select * from int8_tbl where q1 in (select c1 from inner_text);
select * from int8_tbl where q1 in (select c1 from inner_text);
rollback; -- to get rid of the bogus operator
-- --
-- Test case for planner bug with nested EXISTS handling -- Test case for planner bug with nested EXISTS handling
-- --
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment