Commit 1bc16a94 authored by Tom Lane's avatar Tom Lane

Improve make_subplanTargetList to avoid including Vars unnecessarily.

If a Var was used only in a GROUP BY expression, the previous
implementation would include the Var by itself (as well as the expression)
in the generated targetlist.  This wouldn't affect the efficiency of the
scan/join part of the plan at all, but it could result in passing
unnecessarily-wide rows through sorting and grouping steps.  It turns out
to take only a little more code, and not noticeably more time, to generate
a tlist without such redundancy, so let's do that.  Per a recent gripe from
HarmeekSingh Bedi.
parent 1af37ec9
...@@ -85,6 +85,7 @@ static bool choose_hashed_distinct(PlannerInfo *root, ...@@ -85,6 +85,7 @@ static bool choose_hashed_distinct(PlannerInfo *root,
double dNumDistinctRows); double dNumDistinctRows);
static List *make_subplanTargetList(PlannerInfo *root, List *tlist, static List *make_subplanTargetList(PlannerInfo *root, List *tlist,
AttrNumber **groupColIdx, bool *need_tlist_eval); AttrNumber **groupColIdx, bool *need_tlist_eval);
static int get_grouping_column_index(Query *parse, TargetEntry *tle);
static void locate_grouping_columns(PlannerInfo *root, static void locate_grouping_columns(PlannerInfo *root,
List *tlist, List *tlist,
List *sub_tlist, List *sub_tlist,
...@@ -2536,14 +2537,9 @@ choose_hashed_distinct(PlannerInfo *root, ...@@ -2536,14 +2537,9 @@ choose_hashed_distinct(PlannerInfo *root,
* For example, given a query like * For example, given a query like
* SELECT a+b,SUM(c+d) FROM table GROUP BY a+b; * SELECT a+b,SUM(c+d) FROM table GROUP BY a+b;
* we want to pass this targetlist to the subplan: * we want to pass this targetlist to the subplan:
* a,b,c,d,a+b * a+b,c,d
* where the a+b target will be used by the Sort/Group steps, and the * where the a+b target will be used by the Sort/Group steps, and the
* other targets will be used for computing the final results. (In the * other targets will be used for computing the final results.
* above example we could theoretically suppress the a and b targets and
* pass down only c,d,a+b, but it's not really worth the trouble to
* eliminate simple var references from the subplan. We will avoid doing
* the extra computation to recompute a+b at the outer level; see
* fix_upper_expr() in setrefs.c.)
* *
* If we are grouping or aggregating, *and* there are no non-Var grouping * If we are grouping or aggregating, *and* there are no non-Var grouping
* expressions, then the returned tlist is effectively dummy; we do not * expressions, then the returned tlist is effectively dummy; we do not
...@@ -2569,7 +2565,8 @@ make_subplanTargetList(PlannerInfo *root, ...@@ -2569,7 +2565,8 @@ make_subplanTargetList(PlannerInfo *root,
{ {
Query *parse = root->parse; Query *parse = root->parse;
List *sub_tlist; List *sub_tlist;
List *extravars; List *non_group_cols;
List *non_group_vars;
int numCols; int numCols;
*groupColIdx = NULL; *groupColIdx = NULL;
...@@ -2586,71 +2583,132 @@ make_subplanTargetList(PlannerInfo *root, ...@@ -2586,71 +2583,132 @@ make_subplanTargetList(PlannerInfo *root,
} }
/* /*
* Otherwise, start with a "flattened" tlist (having just the Vars * Otherwise, we must build a tlist containing all grouping columns,
* mentioned in the targetlist and HAVING qual). Note this includes Vars * plus any other Vars mentioned in the targetlist and HAVING qual.
* used in resjunk items, so we are covering the needs of ORDER BY and
* window specifications. Vars used within Aggrefs will be pulled out
* here, too.
*/ */
sub_tlist = flatten_tlist(tlist, sub_tlist = NIL;
PVC_RECURSE_AGGREGATES, non_group_cols = NIL;
PVC_INCLUDE_PLACEHOLDERS);
extravars = pull_var_clause(parse->havingQual,
PVC_RECURSE_AGGREGATES,
PVC_INCLUDE_PLACEHOLDERS);
sub_tlist = add_to_flat_tlist(sub_tlist, extravars);
list_free(extravars);
*need_tlist_eval = false; /* only eval if not flat tlist */ *need_tlist_eval = false; /* only eval if not flat tlist */
/*
* If grouping, create sub_tlist entries for all GROUP BY expressions
* (GROUP BY items that are simple Vars should be in the list already),
* and make an array showing where the group columns are in the sub_tlist.
*/
numCols = list_length(parse->groupClause); numCols = list_length(parse->groupClause);
if (numCols > 0) if (numCols > 0)
{ {
int keyno = 0; /*
* If grouping, create sub_tlist entries for all GROUP BY columns, and
* make an array showing where the group columns are in the sub_tlist.
*
* Note: with this implementation, the array entries will always be
* 1..N, but we don't want callers to assume that.
*/
AttrNumber *grpColIdx; AttrNumber *grpColIdx;
ListCell *gl; ListCell *tl;
grpColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols); grpColIdx = (AttrNumber *) palloc0(sizeof(AttrNumber) * numCols);
*groupColIdx = grpColIdx; *groupColIdx = grpColIdx;
foreach(gl, parse->groupClause) foreach(tl, tlist)
{ {
SortGroupClause *grpcl = (SortGroupClause *) lfirst(gl); TargetEntry *tle = (TargetEntry *) lfirst(tl);
Node *groupexpr = get_sortgroupclause_expr(grpcl, tlist); int colno;
TargetEntry *te;
/* colno = get_grouping_column_index(parse, tle);
* Find or make a matching sub_tlist entry. If the groupexpr if (colno >= 0)
* isn't a Var, no point in searching. (Note that the parser {
* won't make multiple groupClause entries for the same TLE.) /*
*/ * It's a grouping column, so add it to the result tlist and
if (groupexpr && IsA(groupexpr, Var)) * remember its resno in grpColIdx[].
te = tlist_member(groupexpr, sub_tlist); */
else TargetEntry *newtle;
te = NULL;
if (!te) newtle = makeTargetEntry(tle->expr,
list_length(sub_tlist) + 1,
NULL,
false);
sub_tlist = lappend(sub_tlist, newtle);
Assert(grpColIdx[colno] == 0); /* no dups expected */
grpColIdx[colno] = newtle->resno;
if (!(newtle->expr && IsA(newtle->expr, Var)))
*need_tlist_eval = true; /* tlist contains non Vars */
}
else
{ {
te = makeTargetEntry((Expr *) groupexpr, /*
list_length(sub_tlist) + 1, * Non-grouping column, so just remember the expression
NULL, * for later call to pull_var_clause. There's no need for
false); * pull_var_clause to examine the TargetEntry node itself.
sub_tlist = lappend(sub_tlist, te); */
*need_tlist_eval = true; /* it's not flat anymore */ non_group_cols = lappend(non_group_cols, tle->expr);
} }
/* and save its resno */
grpColIdx[keyno++] = te->resno;
} }
} }
else
{
/*
* With no grouping columns, just pass whole tlist to pull_var_clause.
* Need (shallow) copy to avoid damaging input tlist below.
*/
non_group_cols = list_copy(tlist);
}
/*
* If there's a HAVING clause, we'll need the Vars it uses, too.
*/
if (parse->havingQual)
non_group_cols = lappend(non_group_cols, parse->havingQual);
/*
* Pull out all the Vars mentioned in non-group cols (plus HAVING), and
* add them to the result tlist if not already present. (A Var used
* directly as a GROUP BY item will be present already.) Note this
* includes Vars used in resjunk items, so we are covering the needs of
* ORDER BY and window specifications. Vars used within Aggrefs will be
* pulled out here, too.
*/
non_group_vars = pull_var_clause((Node *) non_group_cols,
PVC_RECURSE_AGGREGATES,
PVC_INCLUDE_PLACEHOLDERS);
sub_tlist = add_to_flat_tlist(sub_tlist, non_group_vars);
/* clean up cruft */
list_free(non_group_vars);
list_free(non_group_cols);
return sub_tlist; return sub_tlist;
} }
/*
* get_grouping_column_index
* Get the GROUP BY column position, if any, of a targetlist entry.
*
* Returns the index (counting from 0) of the TLE in the GROUP BY list, or -1
* if it's not a grouping column. Note: the result is unique because the
* parser won't make multiple groupClause entries for the same TLE.
*/
static int
get_grouping_column_index(Query *parse, TargetEntry *tle)
{
int colno = 0;
Index ressortgroupref = tle->ressortgroupref;
ListCell *gl;
/* No need to search groupClause if TLE hasn't got a sortgroupref */
if (ressortgroupref == 0)
return -1;
foreach(gl, parse->groupClause)
{
SortGroupClause *grpcl = (SortGroupClause *) lfirst(gl);
if (grpcl->tleSortGroupRef == ressortgroupref)
return colno;
colno++;
}
return -1;
}
/* /*
* locate_grouping_columns * locate_grouping_columns
* Locate grouping columns in the tlist chosen by create_plan. * Locate grouping columns in the tlist chosen by create_plan.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment