Commit 96ca8ffe authored by Tom Lane's avatar Tom Lane

Fix problems with subselects used in GROUP BY expressions, per gripe

from Philip Warner.  Side effect of change is that GROUP BY expressions
will not be re-evaluated at multiple plan levels anymore, whereas this
sometimes happened with old code.
parent 512a3aef
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.111 2001/10/28 06:25:45 momjian Exp $ * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.112 2001/10/30 19:58:58 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -173,6 +173,17 @@ subquery_planner(Query *parse, double tuple_fraction) ...@@ -173,6 +173,17 @@ subquery_planner(Query *parse, double tuple_fraction)
parse->havingQual = preprocess_expression(parse, parse->havingQual, parse->havingQual = preprocess_expression(parse, parse->havingQual,
EXPRKIND_HAVING); EXPRKIND_HAVING);
/*
* Check for ungrouped variables passed to subplans in targetlist and
* HAVING clause (but not in WHERE or JOIN/ON clauses, since those are
* evaluated before grouping). We can't do this any earlier because
* we must use the preprocessed targetlist for comparisons of grouped
* expressions.
*/
if (parse->hasSubLinks &&
(parse->groupClause != NIL || parse->hasAggs))
check_subplans_for_ungrouped_vars(parse);
/* /*
* A HAVING clause without aggregates is equivalent to a WHERE clause * A HAVING clause without aggregates is equivalent to a WHERE clause
* (except it can only refer to grouped fields). Transfer any * (except it can only refer to grouped fields). Transfer any
...@@ -623,23 +634,10 @@ preprocess_expression(Query *parse, Node *expr, int kind) ...@@ -623,23 +634,10 @@ preprocess_expression(Query *parse, Node *expr, int kind)
#endif #endif
} }
if (parse->hasSubLinks)
{
/* Expand SubLinks to SubPlans */ /* Expand SubLinks to SubPlans */
if (parse->hasSubLinks)
expr = SS_process_sublinks(expr); expr = SS_process_sublinks(expr);
if (kind != EXPRKIND_WHERE &&
(parse->groupClause != NIL || parse->hasAggs))
{
/*
* Check for ungrouped variables passed to subplans. Note we
* do NOT do this for subplans in WHERE (or JOIN/ON); it's
* legal there because WHERE is evaluated pre-GROUP.
*/
check_subplans_for_ungrouped_vars(expr, parse);
}
}
/* Replace uplevel vars with Param nodes */ /* Replace uplevel vars with Param nodes */
if (PlannerQueryLevel > 1) if (PlannerQueryLevel > 1)
expr = SS_replace_correlation_vars(expr); expr = SS_replace_correlation_vars(expr);
...@@ -1122,12 +1120,11 @@ grouping_planner(Query *parse, double tuple_fraction) ...@@ -1122,12 +1120,11 @@ grouping_planner(Query *parse, double tuple_fraction)
/* /*
* If there are aggregates then the Group node should just return * If there are aggregates then the Group node should just return
* the same set of vars as the subplan did (but we can exclude any * the same set of vars as the subplan did. If there are no aggs
* GROUP BY expressions). If there are no aggregates then the * then the Group node had better compute the final tlist.
* Group node had better compute the final tlist.
*/ */
if (parse->hasAggs) if (parse->hasAggs)
group_tlist = flatten_tlist(result_plan->targetlist); group_tlist = new_unsorted_tlist(result_plan->targetlist);
else else
group_tlist = tlist; group_tlist = tlist;
...@@ -1234,7 +1231,10 @@ grouping_planner(Query *parse, double tuple_fraction) ...@@ -1234,7 +1231,10 @@ grouping_planner(Query *parse, double tuple_fraction)
* where the a+b target will be used by the Sort/Group steps, and the * where the a+b target will be used by the Sort/Group steps, and the
* other targets will be used for computing the final results. (In the * other targets will be used for computing the final results. (In the
* above example we could theoretically suppress the a and b targets and * above example we could theoretically suppress the a and b targets and
* use only a+b, but it's not really worth the trouble.) * pass down only c,d,a+b, but it's not really worth the trouble to
* eliminate simple var references from the subplan. We will avoid doing
* the extra computation to recompute a+b at the outer level; see
* replace_vars_with_subplan_refs() in setrefs.c.)
* *
* 'parse' is the query being processed. * 'parse' is the query being processed.
* 'tlist' is the query's target list. * 'tlist' is the query's target list.
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/plan/setrefs.c,v 1.71 2001/03/22 03:59:37 momjian Exp $ * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/setrefs.c,v 1.72 2001/10/30 19:58:58 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -33,7 +33,8 @@ typedef struct ...@@ -33,7 +33,8 @@ typedef struct
typedef struct typedef struct
{ {
Index subvarno; Index subvarno;
List *subplanTargetList; List *subplan_targetlist;
bool tlist_has_non_vars;
} replace_vars_with_subplan_refs_context; } replace_vars_with_subplan_refs_context;
static void fix_expr_references(Plan *plan, Node *node); static void fix_expr_references(Plan *plan, Node *node);
...@@ -43,7 +44,8 @@ static Node *join_references_mutator(Node *node, ...@@ -43,7 +44,8 @@ static Node *join_references_mutator(Node *node,
join_references_context *context); join_references_context *context);
static Node *replace_vars_with_subplan_refs(Node *node, static Node *replace_vars_with_subplan_refs(Node *node,
Index subvarno, Index subvarno,
List *subplanTargetList); List *subplan_targetlist,
bool tlist_has_non_vars);
static Node *replace_vars_with_subplan_refs_mutator(Node *node, static Node *replace_vars_with_subplan_refs_mutator(Node *node,
replace_vars_with_subplan_refs_context *context); replace_vars_with_subplan_refs_context *context);
static bool fix_opids_walker(Node *node, void *context); static bool fix_opids_walker(Node *node, void *context);
...@@ -278,75 +280,62 @@ set_join_references(Join *join) ...@@ -278,75 +280,62 @@ set_join_references(Join *join)
* qual expressions with elements of the subplan's tlist (which was * qual expressions with elements of the subplan's tlist (which was
* generated by flatten_tlist() from these selfsame expressions, so it * generated by flatten_tlist() from these selfsame expressions, so it
* should have all the required variables). There is an important exception, * should have all the required variables). There is an important exception,
* however: a GROUP BY expression that is also an output expression will * however: GROUP BY and ORDER BY expressions will have been pushed into the
* have been pushed into the subplan tlist unflattened. We want to detect * subplan tlist unflattened. If these values are also needed in the output
* this case and reference the subplan output directly. Therefore, check * then we want to reference the subplan tlist element rather than recomputing
* for equality of the whole tlist expression to any subplan element before * the expression.
* we resort to picking the expression apart for individual Vars.
*/ */
static void static void
set_uppernode_references(Plan *plan, Index subvarno) set_uppernode_references(Plan *plan, Index subvarno)
{ {
Plan *subplan = plan->lefttree; Plan *subplan = plan->lefttree;
List *subplanTargetList, List *subplan_targetlist,
*outputTargetList, *output_targetlist,
*l; *l;
bool tlist_has_non_vars;
if (subplan != NULL) if (subplan != NULL)
subplanTargetList = subplan->targetlist; subplan_targetlist = subplan->targetlist;
else else
subplanTargetList = NIL; subplan_targetlist = NIL;
outputTargetList = NIL; /*
foreach(l, plan->targetlist) * Detect whether subplan tlist has any non-Vars (typically it won't
* because it's been flattened). This allows us to save comparisons
* in common cases.
*/
tlist_has_non_vars = false;
foreach(l, subplan_targetlist)
{ {
TargetEntry *tle = (TargetEntry *) lfirst(l); TargetEntry *tle = (TargetEntry *) lfirst(l);
TargetEntry *subplantle;
Node *newexpr;
subplantle = tlistentry_member(tle->expr, subplanTargetList);
if (subplantle)
{
/* Found a matching subplan output expression */
Resdom *resdom = subplantle->resdom;
Var *newvar;
newvar = makeVar(subvarno, if (tle->expr && ! IsA(tle->expr, Var))
resdom->resno,
resdom->restype,
resdom->restypmod,
0);
/* If we're just copying a simple Var, copy up original info */
if (subplantle->expr && IsA(subplantle->expr, Var))
{ {
Var *subvar = (Var *) subplantle->expr; tlist_has_non_vars = true;
break;
newvar->varnoold = subvar->varnoold;
newvar->varoattno = subvar->varoattno;
}
else
{
newvar->varnoold = 0;
newvar->varoattno = 0;
} }
newexpr = (Node *) newvar;
} }
else
output_targetlist = NIL;
foreach(l, plan->targetlist)
{ {
/* No matching expression, so replace individual Vars */ TargetEntry *tle = (TargetEntry *) lfirst(l);
Node *newexpr;
newexpr = replace_vars_with_subplan_refs(tle->expr, newexpr = replace_vars_with_subplan_refs(tle->expr,
subvarno, subvarno,
subplanTargetList); subplan_targetlist,
} tlist_has_non_vars);
outputTargetList = lappend(outputTargetList, output_targetlist = lappend(output_targetlist,
makeTargetEntry(tle->resdom, newexpr)); makeTargetEntry(tle->resdom, newexpr));
} }
plan->targetlist = outputTargetList; plan->targetlist = output_targetlist;
plan->qual = (List *) plan->qual = (List *)
replace_vars_with_subplan_refs((Node *) plan->qual, replace_vars_with_subplan_refs((Node *) plan->qual,
subvarno, subvarno,
subplanTargetList); subplan_targetlist,
tlist_has_non_vars);
} }
/* /*
...@@ -439,9 +428,16 @@ join_references_mutator(Node *node, ...@@ -439,9 +428,16 @@ join_references_mutator(Node *node,
* --- so this routine should only be applied to nodes whose subplans' * --- so this routine should only be applied to nodes whose subplans'
* targetlists were generated via flatten_tlist() or some such method. * targetlists were generated via flatten_tlist() or some such method.
* *
* 'node': the tree to be fixed (a targetlist or qual list) * If tlist_has_non_vars is true, then we try to match whole subexpressions
* against elements of the subplan tlist, so that we can avoid recomputing
* expressions that were already computed by the subplan. (This is relatively
* expensive, so we don't want to try it in the common case where the
* subplan tlist is just a flattened list of Vars.)
*
* 'node': the tree to be fixed (a target item or qual)
* 'subvarno': varno to be assigned to all Vars * 'subvarno': varno to be assigned to all Vars
* 'subplanTargetList': target list for subplan * 'subplan_targetlist': target list for subplan
* 'tlist_has_non_vars': true if subplan_targetlist contains non-Var exprs
* *
* The resulting tree is a copy of the original in which all Var nodes have * The resulting tree is a copy of the original in which all Var nodes have
* varno = subvarno, varattno = resno of corresponding subplan target. * varno = subvarno, varattno = resno of corresponding subplan target.
...@@ -450,12 +446,14 @@ join_references_mutator(Node *node, ...@@ -450,12 +446,14 @@ join_references_mutator(Node *node,
static Node * static Node *
replace_vars_with_subplan_refs(Node *node, replace_vars_with_subplan_refs(Node *node,
Index subvarno, Index subvarno,
List *subplanTargetList) List *subplan_targetlist,
bool tlist_has_non_vars)
{ {
replace_vars_with_subplan_refs_context context; replace_vars_with_subplan_refs_context context;
context.subvarno = subvarno; context.subvarno = subvarno;
context.subplanTargetList = subplanTargetList; context.subplan_targetlist = subplan_targetlist;
context.tlist_has_non_vars = tlist_has_non_vars;
return replace_vars_with_subplan_refs_mutator(node, &context); return replace_vars_with_subplan_refs_mutator(node, &context);
} }
...@@ -468,17 +466,38 @@ replace_vars_with_subplan_refs_mutator(Node *node, ...@@ -468,17 +466,38 @@ replace_vars_with_subplan_refs_mutator(Node *node,
if (IsA(node, Var)) if (IsA(node, Var))
{ {
Var *var = (Var *) node; Var *var = (Var *) node;
Var *newvar = (Var *) copyObject(var);
Resdom *resdom; Resdom *resdom;
Var *newvar;
resdom = tlist_member((Node *) var, context->subplanTargetList); resdom = tlist_member((Node *) var, context->subplan_targetlist);
if (!resdom) if (!resdom)
elog(ERROR, "replace_vars_with_subplan_refs: variable not in subplan target list"); elog(ERROR, "replace_vars_with_subplan_refs: variable not in subplan target list");
newvar = (Var *) copyObject(var);
newvar->varno = context->subvarno; newvar->varno = context->subvarno;
newvar->varattno = resdom->resno; newvar->varattno = resdom->resno;
return (Node *) newvar; return (Node *) newvar;
} }
/* Try matching more complex expressions too, if tlist has any */
if (context->tlist_has_non_vars)
{
Resdom *resdom;
resdom = tlist_member(node, context->subplan_targetlist);
if (resdom)
{
/* Found a matching subplan output expression */
Var *newvar;
newvar = makeVar(context->subvarno,
resdom->resno,
resdom->restype,
resdom->restypmod,
0);
newvar->varnoold = 0; /* wasn't ever a plain Var */
newvar->varoattno = 0;
return (Node *) newvar;
}
}
return expression_tree_mutator(node, return expression_tree_mutator(node,
replace_vars_with_subplan_refs_mutator, replace_vars_with_subplan_refs_mutator,
(void *) context); (void *) context);
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/util/clauses.c,v 1.89 2001/10/25 05:49:34 momjian Exp $ * $Header: /cvsroot/pgsql/src/backend/optimizer/util/clauses.c,v 1.90 2001/10/30 19:58:58 tgl Exp $
* *
* HISTORY * HISTORY
* AUTHOR DATE MAJOR EVENT * AUTHOR DATE MAJOR EVENT
...@@ -39,12 +39,18 @@ ...@@ -39,12 +39,18 @@
((Node *) makeConst(BOOLOID, 1, (Datum) (val), \ ((Node *) makeConst(BOOLOID, 1, (Datum) (val), \
(isnull), true, false, false)) (isnull), true, false, false))
typedef struct
{
Query *query;
List *groupClauses;
} check_subplans_for_ungrouped_vars_context;
static bool contain_agg_clause_walker(Node *node, void *context); static bool contain_agg_clause_walker(Node *node, void *context);
static bool pull_agg_clause_walker(Node *node, List **listptr); static bool pull_agg_clause_walker(Node *node, List **listptr);
static bool contain_subplans_walker(Node *node, void *context); static bool contain_subplans_walker(Node *node, void *context);
static bool pull_subplans_walker(Node *node, List **listptr); static bool pull_subplans_walker(Node *node, List **listptr);
static bool check_subplans_for_ungrouped_vars_walker(Node *node, static bool check_subplans_for_ungrouped_vars_walker(Node *node,
Query *context); check_subplans_for_ungrouped_vars_context *context);
static bool contain_noncachable_functions_walker(Node *node, void *context); static bool contain_noncachable_functions_walker(Node *node, void *context);
static Node *eval_const_expressions_mutator(Node *node, void *context); static Node *eval_const_expressions_mutator(Node *node, void *context);
static Expr *simplify_op_or_func(Expr *expr, List *args); static Expr *simplify_op_or_func(Expr *expr, List *args);
...@@ -517,36 +523,81 @@ pull_subplans_walker(Node *node, List **listptr) ...@@ -517,36 +523,81 @@ pull_subplans_walker(Node *node, List **listptr)
* planner runs. So we do it here, after we have processed sublinks into * planner runs. So we do it here, after we have processed sublinks into
* subplans. This ought to be cleaned up someday. * subplans. This ought to be cleaned up someday.
* *
* 'clause' is the expression tree to be searched for subplans. * A deficiency in this scheme is that any outer reference var must be
* 'query' provides the GROUP BY list, the target list that the group clauses * grouped by itself; we don't recognize groupable expressions within
* refer to, and the range table. * subselects. For example, consider
* SELECT
* (SELECT x FROM bar where y = (foo.a + foo.b))
* FROM foo
* GROUP BY a + b;
* This query will be rejected although it could be allowed.
*/ */
void void
check_subplans_for_ungrouped_vars(Node *clause, check_subplans_for_ungrouped_vars(Query *query)
Query *query)
{ {
check_subplans_for_ungrouped_vars_context context;
List *gl;
context.query = query;
/*
* Build a list of the acceptable GROUP BY expressions for use in
* the walker (to avoid repeated scans of the targetlist within the
* recursive routine).
*/
context.groupClauses = NIL;
foreach(gl, query->groupClause)
{
GroupClause *grpcl = lfirst(gl);
Node *expr;
expr = get_sortgroupclause_expr(grpcl, query->targetList);
context.groupClauses = lcons(expr, context.groupClauses);
}
/* /*
* No special setup needed; context for walker is just the Query * Recursively scan the targetlist and the HAVING clause.
* pointer * WHERE and JOIN/ON conditions are not examined, since they are
* evaluated before grouping.
*/ */
check_subplans_for_ungrouped_vars_walker(clause, query); check_subplans_for_ungrouped_vars_walker((Node *) query->targetList,
&context);
check_subplans_for_ungrouped_vars_walker(query->havingQual,
&context);
freeList(context.groupClauses);
} }
static bool static bool
check_subplans_for_ungrouped_vars_walker(Node *node, check_subplans_for_ungrouped_vars_walker(Node *node,
Query *context) check_subplans_for_ungrouped_vars_context *context)
{ {
List *gl;
if (node == NULL) if (node == NULL)
return false; return false;
if (IsA(node, Const) || IsA(node, Param))
return false; /* constants are always acceptable */
/* /*
* If we find an aggregate function, do not recurse into its * If we find an aggregate function, do not recurse into its
* arguments. Subplans invoked within aggregate calls are allowed to * arguments. Subplans invoked within aggregate calls are allowed to
* receive ungrouped variables. * receive ungrouped variables. (This test and the next one should
* match the logic in parse_agg.c's check_ungrouped_columns().)
*/ */
if (IsA(node, Aggref)) if (IsA(node, Aggref))
return false; return false;
/*
* Check to see if subexpression as a whole matches any GROUP BY item.
* We need to do this at every recursion level so that we recognize
* GROUPed-BY expressions before reaching variables within them.
*/
foreach(gl, context->groupClauses)
{
if (equal(node, lfirst(gl)))
return false; /* acceptable, do not descend more */
}
/* /*
* We can ignore Vars other than in subplan args lists, since the * We can ignore Vars other than in subplan args lists, since the
* parser already checked 'em. * parser already checked 'em.
...@@ -564,7 +615,6 @@ check_subplans_for_ungrouped_vars_walker(Node *node, ...@@ -564,7 +615,6 @@ check_subplans_for_ungrouped_vars_walker(Node *node,
Node *thisarg = lfirst(t); Node *thisarg = lfirst(t);
Var *var; Var *var;
bool contained_in_group_clause; bool contained_in_group_clause;
List *gl;
/* /*
* We do not care about args that are not local variables; * We do not care about args that are not local variables;
...@@ -582,14 +632,9 @@ check_subplans_for_ungrouped_vars_walker(Node *node, ...@@ -582,14 +632,9 @@ check_subplans_for_ungrouped_vars_walker(Node *node,
* Else, see if it is a grouping column. * Else, see if it is a grouping column.
*/ */
contained_in_group_clause = false; contained_in_group_clause = false;
foreach(gl, context->groupClause) foreach(gl, context->groupClauses)
{ {
GroupClause *gcl = lfirst(gl); if (equal(thisarg, lfirst(gl)))
Node *groupexpr;
groupexpr = get_sortgroupclause_expr(gcl,
context->targetList);
if (equal(thisarg, groupexpr))
{ {
contained_in_group_clause = true; contained_in_group_clause = true;
break; break;
...@@ -603,8 +648,8 @@ check_subplans_for_ungrouped_vars_walker(Node *node, ...@@ -603,8 +648,8 @@ check_subplans_for_ungrouped_vars_walker(Node *node,
char *attname; char *attname;
Assert(var->varno > 0 && Assert(var->varno > 0 &&
(int) var->varno <= length(context->rtable)); (int) var->varno <= length(context->query->rtable));
rte = rt_fetch(var->varno, context->rtable); rte = rt_fetch(var->varno, context->query->rtable);
attname = get_rte_attribute_name(rte, var->varattno); attname = get_rte_attribute_name(rte, var->varattno);
elog(ERROR, "Sub-SELECT uses un-GROUPed attribute %s.%s from outer query", elog(ERROR, "Sub-SELECT uses un-GROUPed attribute %s.%s from outer query",
rte->eref->relname, attname); rte->eref->relname, attname);
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $Id: clauses.h,v 1.47 2001/10/28 06:26:07 momjian Exp $ * $Id: clauses.h,v 1.48 2001/10/30 19:58:58 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -44,7 +44,7 @@ extern List *pull_agg_clause(Node *clause); ...@@ -44,7 +44,7 @@ extern List *pull_agg_clause(Node *clause);
extern bool contain_subplans(Node *clause); extern bool contain_subplans(Node *clause);
extern List *pull_subplans(Node *clause); extern List *pull_subplans(Node *clause);
extern void check_subplans_for_ungrouped_vars(Node *clause, Query *query); extern void check_subplans_for_ungrouped_vars(Query *query);
extern bool contain_noncachable_functions(Node *clause); extern bool contain_noncachable_functions(Node *clause);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment