Commit 4f15e5d0 authored by Robert Haas's avatar Robert Haas

Defer creation of partially-grouped relation until it's needed.

This avoids unnecessarily creating a RelOptInfo for which we have no
actual need.  This idea is from Ashutosh Bapat, who wrote a very
different patch to accomplish a similar goal.  It will be more
important if and when we get partition-wise aggregate, since then
there could be many partially grouped relations all of which could
potentially be unnecessary.  In passing, this sets the grouping
relation's reltarget, which wasn't done previously but makes things
simpler for this refactoring.

Along the way, adjust things so that add_paths_to_partial_grouping_rel,
now renamed create_partial_grouping_paths, does not perform the Gather
or Gather Merge steps to generate non-partial paths from partial
paths; have the caller do it instead.  This is again for the
convenience of partition-wise aggregate, which wants to inject
additional partial paths are created and before we decide which ones
to Gather/Gather Merge.  This might seem like a separate change, but
it's actually pretty closely entangled; I couldn't really see much
value in separating it and having to change some things twice.

Patch by me, reviewed by Ashutosh Bapat.

Discussion: http://postgr.es/m/CA+TgmoZ+ZJTVad-=vEq393N99KTooxv9k7M+z73qnTAqkb49BQ@mail.gmail.com
parent 4dba331c
...@@ -148,7 +148,6 @@ static void create_degenerate_grouping_paths(PlannerInfo *root, ...@@ -148,7 +148,6 @@ static void create_degenerate_grouping_paths(PlannerInfo *root,
static void create_ordinary_grouping_paths(PlannerInfo *root, static void create_ordinary_grouping_paths(PlannerInfo *root,
RelOptInfo *input_rel, RelOptInfo *input_rel,
PathTarget *target, RelOptInfo *grouped_rel, PathTarget *target, RelOptInfo *grouped_rel,
RelOptInfo *partially_grouped_rel,
const AggClauseCosts *agg_costs, const AggClauseCosts *agg_costs,
grouping_sets_data *gd); grouping_sets_data *gd);
static void consider_groupingsets_paths(PlannerInfo *root, static void consider_groupingsets_paths(PlannerInfo *root,
...@@ -208,13 +207,14 @@ static void add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, ...@@ -208,13 +207,14 @@ static void add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
const AggClauseCosts *agg_final_costs, const AggClauseCosts *agg_final_costs,
grouping_sets_data *gd, bool can_sort, bool can_hash, grouping_sets_data *gd, bool can_sort, bool can_hash,
double dNumGroups, List *havingQual); double dNumGroups, List *havingQual);
static void add_paths_to_partial_grouping_rel(PlannerInfo *root, static RelOptInfo *create_partial_grouping_paths(PlannerInfo *root,
RelOptInfo *input_rel, RelOptInfo *grouped_rel,
RelOptInfo *partially_grouped_rel, RelOptInfo *input_rel,
AggClauseCosts *agg_partial_costs, grouping_sets_data *gd,
grouping_sets_data *gd, bool can_sort,
bool can_sort, bool can_hash,
bool can_hash); AggClauseCosts *agg_final_costs);
static void gather_grouping_paths(PlannerInfo *root, RelOptInfo *rel);
static bool can_parallel_agg(PlannerInfo *root, RelOptInfo *input_rel, static bool can_parallel_agg(PlannerInfo *root, RelOptInfo *input_rel,
RelOptInfo *grouped_rel, const AggClauseCosts *agg_costs); RelOptInfo *grouped_rel, const AggClauseCosts *agg_costs);
...@@ -3688,42 +3688,30 @@ create_grouping_paths(PlannerInfo *root, ...@@ -3688,42 +3688,30 @@ create_grouping_paths(PlannerInfo *root,
{ {
Query *parse = root->parse; Query *parse = root->parse;
RelOptInfo *grouped_rel; RelOptInfo *grouped_rel;
RelOptInfo *partially_grouped_rel;
/* /*
* For now, all aggregated paths are added to the (GROUP_AGG, NULL) * For now, all aggregated paths are added to the (GROUP_AGG, NULL)
* upperrel. Paths that are only partially aggregated go into the * upperrel.
* (UPPERREL_PARTIAL_GROUP_AGG, NULL) upperrel.
*/ */
grouped_rel = fetch_upper_rel(root, UPPERREL_GROUP_AGG, NULL); grouped_rel = fetch_upper_rel(root, UPPERREL_GROUP_AGG, NULL);
partially_grouped_rel = fetch_upper_rel(root, UPPERREL_PARTIAL_GROUP_AGG, grouped_rel->reltarget = target;
NULL);
/* /*
* If the input relation is not parallel-safe, then the grouped relation * If the input relation is not parallel-safe, then the grouped relation
* can't be parallel-safe, either. Otherwise, it's parallel-safe if the * can't be parallel-safe, either. Otherwise, it's parallel-safe if the
* target list and HAVING quals are parallel-safe. The partially grouped * target list and HAVING quals are parallel-safe.
* relation obeys the same rules.
*/ */
if (input_rel->consider_parallel && target_parallel_safe && if (input_rel->consider_parallel && target_parallel_safe &&
is_parallel_safe(root, (Node *) parse->havingQual)) is_parallel_safe(root, (Node *) parse->havingQual))
{
grouped_rel->consider_parallel = true; grouped_rel->consider_parallel = true;
partially_grouped_rel->consider_parallel = true;
}
/* /*
* If the input rel belongs to a single FDW, so does the grouped rel. Same * If the input rel belongs to a single FDW, so does the grouped rel.
* for the partially_grouped_rel.
*/ */
grouped_rel->serverid = input_rel->serverid; grouped_rel->serverid = input_rel->serverid;
grouped_rel->userid = input_rel->userid; grouped_rel->userid = input_rel->userid;
grouped_rel->useridiscurrent = input_rel->useridiscurrent; grouped_rel->useridiscurrent = input_rel->useridiscurrent;
grouped_rel->fdwroutine = input_rel->fdwroutine; grouped_rel->fdwroutine = input_rel->fdwroutine;
partially_grouped_rel->serverid = input_rel->serverid;
partially_grouped_rel->userid = input_rel->userid;
partially_grouped_rel->useridiscurrent = input_rel->useridiscurrent;
partially_grouped_rel->fdwroutine = input_rel->fdwroutine;
/* /*
* Create either paths for a degenerate grouping or paths for ordinary * Create either paths for a degenerate grouping or paths for ordinary
...@@ -3733,7 +3721,7 @@ create_grouping_paths(PlannerInfo *root, ...@@ -3733,7 +3721,7 @@ create_grouping_paths(PlannerInfo *root,
create_degenerate_grouping_paths(root, input_rel, target, grouped_rel); create_degenerate_grouping_paths(root, input_rel, target, grouped_rel);
else else
create_ordinary_grouping_paths(root, input_rel, target, grouped_rel, create_ordinary_grouping_paths(root, input_rel, target, grouped_rel,
partially_grouped_rel, agg_costs, gd); agg_costs, gd);
set_cheapest(grouped_rel); set_cheapest(grouped_rel);
return grouped_rel; return grouped_rel;
...@@ -3831,18 +3819,16 @@ create_degenerate_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel, ...@@ -3831,18 +3819,16 @@ create_degenerate_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel,
static void static void
create_ordinary_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel, create_ordinary_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel,
PathTarget *target, RelOptInfo *grouped_rel, PathTarget *target, RelOptInfo *grouped_rel,
RelOptInfo *partially_grouped_rel,
const AggClauseCosts *agg_costs, const AggClauseCosts *agg_costs,
grouping_sets_data *gd) grouping_sets_data *gd)
{ {
Query *parse = root->parse; Query *parse = root->parse;
Path *cheapest_path = input_rel->cheapest_total_path; Path *cheapest_path = input_rel->cheapest_total_path;
AggClauseCosts agg_partial_costs; /* parallel only */ RelOptInfo *partially_grouped_rel = NULL;
AggClauseCosts agg_final_costs; /* parallel only */ AggClauseCosts agg_final_costs; /* parallel only */
double dNumGroups; double dNumGroups;
bool can_hash; bool can_hash;
bool can_sort; bool can_sort;
bool try_parallel_aggregation;
/* /*
* Estimate number of groups. * Estimate number of groups.
...@@ -3889,59 +3875,24 @@ create_ordinary_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel, ...@@ -3889,59 +3875,24 @@ create_ordinary_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel,
agg_costs->numOrderedAggs == 0 && agg_costs->numOrderedAggs == 0 &&
(gd ? gd->any_hashable : grouping_is_hashable(parse->groupClause))); (gd ? gd->any_hashable : grouping_is_hashable(parse->groupClause)));
/*
* Figure out whether a PartialAggregate/Finalize Aggregate execution
* strategy is viable.
*/
try_parallel_aggregation = can_parallel_agg(root, input_rel, grouped_rel,
agg_costs);
/* /*
* Before generating paths for grouped_rel, we first generate any possible * Before generating paths for grouped_rel, we first generate any possible
* partial paths for partially_grouped_rel; that way, later code can * partially grouped paths; that way, later code can easily consider both
* easily consider both parallel and non-parallel approaches to grouping. * parallel and non-parallel approaches to grouping.
*/ */
if (try_parallel_aggregation) MemSet(&agg_final_costs, 0, sizeof(AggClauseCosts));
if (can_parallel_agg(root, input_rel, grouped_rel, agg_costs))
{ {
PathTarget *partial_grouping_target; partially_grouped_rel =
create_partial_grouping_paths(root,
/* grouped_rel,
* Build target list for partial aggregate paths. These paths cannot input_rel,
* just emit the same tlist as regular aggregate paths, because (1) we gd,
* must include Vars and Aggrefs needed in HAVING, which might not can_sort,
* appear in the result tlist, and (2) the Aggrefs must be set in can_hash,
* partial mode. &agg_final_costs);
*/ gather_grouping_paths(root, partially_grouped_rel);
partial_grouping_target = make_partial_grouping_target(root, target, set_cheapest(partially_grouped_rel);
(Node *) parse->havingQual);
partially_grouped_rel->reltarget = partial_grouping_target;
/*
* Collect statistics about aggregates for estimating costs of
* performing aggregation in parallel.
*/
MemSet(&agg_partial_costs, 0, sizeof(AggClauseCosts));
MemSet(&agg_final_costs, 0, sizeof(AggClauseCosts));
if (parse->hasAggs)
{
/* partial phase */
get_agg_clause_costs(root, (Node *) partial_grouping_target->exprs,
AGGSPLIT_INITIAL_SERIAL,
&agg_partial_costs);
/* final phase */
get_agg_clause_costs(root, (Node *) target->exprs,
AGGSPLIT_FINAL_DESERIAL,
&agg_final_costs);
get_agg_clause_costs(root, parse->havingQual,
AGGSPLIT_FINAL_DESERIAL,
&agg_final_costs);
}
add_paths_to_partial_grouping_rel(root, input_rel,
partially_grouped_rel,
&agg_partial_costs,
gd, can_sort, can_hash);
} }
/* Build final grouping paths */ /* Build final grouping paths */
...@@ -6189,46 +6140,49 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, ...@@ -6189,46 +6140,49 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
* Instead of operating directly on the input relation, we can * Instead of operating directly on the input relation, we can
* consider finalizing a partially aggregated path. * consider finalizing a partially aggregated path.
*/ */
foreach(lc, partially_grouped_rel->pathlist) if (partially_grouped_rel != NULL)
{ {
Path *path = (Path *) lfirst(lc); foreach(lc, partially_grouped_rel->pathlist)
/*
* Insert a Sort node, if required. But there's no point in
* sorting anything but the cheapest path.
*/
if (!pathkeys_contained_in(root->group_pathkeys, path->pathkeys))
{ {
if (path != partially_grouped_rel->cheapest_total_path) Path *path = (Path *) lfirst(lc);
continue;
path = (Path *) create_sort_path(root,
grouped_rel,
path,
root->group_pathkeys,
-1.0);
}
if (parse->hasAggs) /*
add_path(grouped_rel, (Path *) * Insert a Sort node, if required. But there's no point in
create_agg_path(root, * sorting anything but the cheapest path.
grouped_rel, */
path, if (!pathkeys_contained_in(root->group_pathkeys, path->pathkeys))
target, {
parse->groupClause ? AGG_SORTED : AGG_PLAIN, if (path != partially_grouped_rel->cheapest_total_path)
AGGSPLIT_FINAL_DESERIAL, continue;
parse->groupClause, path = (Path *) create_sort_path(root,
havingQual, grouped_rel,
agg_final_costs, path,
dNumGroups)); root->group_pathkeys,
else -1.0);
add_path(grouped_rel, (Path *) }
create_group_path(root,
grouped_rel, if (parse->hasAggs)
path, add_path(grouped_rel, (Path *)
target, create_agg_path(root,
parse->groupClause, grouped_rel,
havingQual, path,
dNumGroups)); target,
parse->groupClause ? AGG_SORTED : AGG_PLAIN,
AGGSPLIT_FINAL_DESERIAL,
parse->groupClause,
havingQual,
agg_final_costs,
dNumGroups));
else
add_path(grouped_rel, (Path *)
create_group_path(root,
grouped_rel,
path,
target,
parse->groupClause,
havingQual,
dNumGroups));
}
} }
} }
...@@ -6279,10 +6233,10 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, ...@@ -6279,10 +6233,10 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
/* /*
* Generate a Finalize HashAgg Path atop of the cheapest partially * Generate a Finalize HashAgg Path atop of the cheapest partially
* grouped path. Once again, we'll only do this if it looks as though * grouped path, assuming there is one. Once again, we'll only do this
* the hash table won't exceed work_mem. * if it looks as though the hash table won't exceed work_mem.
*/ */
if (partially_grouped_rel->pathlist) if (partially_grouped_rel && partially_grouped_rel->pathlist)
{ {
Path *path = partially_grouped_rel->cheapest_total_path; Path *path = partially_grouped_rel->cheapest_total_path;
...@@ -6307,29 +6261,83 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, ...@@ -6307,29 +6261,83 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
} }
/* /*
* add_paths_to_partial_grouping_rel * create_partial_grouping_paths
* *
* First, generate partially aggregated partial paths from the partial paths * Create a new upper relation representing the result of partial aggregation
* for the input relation, and then generate partially aggregated non-partial * and populate it with appropriate paths. Note that we don't finalize the
* paths using Gather or Gather Merge. All paths for this relation -- both * lists of paths here, so the caller can add additional partial or non-partial
* partial and non-partial -- have been partially aggregated but require a * paths and must afterward call gather_grouping_paths and set_cheapest on
* subsequent FinalizeAggregate step. * the returned upper relation.
*
* All paths for this new upper relation -- both partial and non-partial --
* have been partially aggregated but require a subsequent FinalizeAggregate
* step.
*/ */
static void static RelOptInfo *
add_paths_to_partial_grouping_rel(PlannerInfo *root, create_partial_grouping_paths(PlannerInfo *root,
RelOptInfo *input_rel, RelOptInfo *grouped_rel,
RelOptInfo *partially_grouped_rel, RelOptInfo *input_rel,
AggClauseCosts *agg_partial_costs, grouping_sets_data *gd,
grouping_sets_data *gd, bool can_sort,
bool can_sort, bool can_hash,
bool can_hash) AggClauseCosts *agg_final_costs)
{ {
Query *parse = root->parse; Query *parse = root->parse;
RelOptInfo *partially_grouped_rel;
AggClauseCosts agg_partial_costs;
Path *cheapest_partial_path = linitial(input_rel->partial_pathlist); Path *cheapest_partial_path = linitial(input_rel->partial_pathlist);
Size hashaggtablesize; Size hashaggtablesize;
double dNumPartialGroups = 0; double dNumPartialGroups = 0;
ListCell *lc; ListCell *lc;
/*
* Build a new upper relation to represent the result of partially
* aggregating the rows from the input relation.
*/
partially_grouped_rel = fetch_upper_rel(root,
UPPERREL_PARTIAL_GROUP_AGG,
grouped_rel->relids);
partially_grouped_rel->consider_parallel =
grouped_rel->consider_parallel;
partially_grouped_rel->serverid = grouped_rel->serverid;
partially_grouped_rel->userid = grouped_rel->userid;
partially_grouped_rel->useridiscurrent = grouped_rel->useridiscurrent;
partially_grouped_rel->fdwroutine = grouped_rel->fdwroutine;
/*
* Build target list for partial aggregate paths. These paths cannot just
* emit the same tlist as regular aggregate paths, because (1) we must
* include Vars and Aggrefs needed in HAVING, which might not appear in
* the result tlist, and (2) the Aggrefs must be set in partial mode.
*/
partially_grouped_rel->reltarget =
make_partial_grouping_target(root, grouped_rel->reltarget,
(Node *) parse->havingQual);
/*
* Collect statistics about aggregates for estimating costs of performing
* aggregation in parallel.
*/
MemSet(&agg_partial_costs, 0, sizeof(AggClauseCosts));
if (parse->hasAggs)
{
List *partial_target_exprs;
/* partial phase */
partial_target_exprs = partially_grouped_rel->reltarget->exprs;
get_agg_clause_costs(root, (Node *) partial_target_exprs,
AGGSPLIT_INITIAL_SERIAL,
&agg_partial_costs);
/* final phase */
get_agg_clause_costs(root, (Node *) grouped_rel->reltarget->exprs,
AGGSPLIT_FINAL_DESERIAL,
agg_final_costs);
get_agg_clause_costs(root, parse->havingQual,
AGGSPLIT_FINAL_DESERIAL,
agg_final_costs);
}
/* Estimate number of partial groups. */ /* Estimate number of partial groups. */
dNumPartialGroups = get_number_of_groups(root, dNumPartialGroups = get_number_of_groups(root,
cheapest_partial_path->rows, cheapest_partial_path->rows,
...@@ -6372,7 +6380,7 @@ add_paths_to_partial_grouping_rel(PlannerInfo *root, ...@@ -6372,7 +6380,7 @@ add_paths_to_partial_grouping_rel(PlannerInfo *root,
AGGSPLIT_INITIAL_SERIAL, AGGSPLIT_INITIAL_SERIAL,
parse->groupClause, parse->groupClause,
NIL, NIL,
agg_partial_costs, &agg_partial_costs,
dNumPartialGroups)); dNumPartialGroups));
else else
add_partial_path(partially_grouped_rel, (Path *) add_partial_path(partially_grouped_rel, (Path *)
...@@ -6394,7 +6402,7 @@ add_paths_to_partial_grouping_rel(PlannerInfo *root, ...@@ -6394,7 +6402,7 @@ add_paths_to_partial_grouping_rel(PlannerInfo *root,
hashaggtablesize = hashaggtablesize =
estimate_hashagg_tablesize(cheapest_partial_path, estimate_hashagg_tablesize(cheapest_partial_path,
agg_partial_costs, &agg_partial_costs,
dNumPartialGroups); dNumPartialGroups);
/* /*
...@@ -6412,7 +6420,7 @@ add_paths_to_partial_grouping_rel(PlannerInfo *root, ...@@ -6412,7 +6420,7 @@ add_paths_to_partial_grouping_rel(PlannerInfo *root,
AGGSPLIT_INITIAL_SERIAL, AGGSPLIT_INITIAL_SERIAL,
parse->groupClause, parse->groupClause,
NIL, NIL,
agg_partial_costs, &agg_partial_costs,
dNumPartialGroups)); dNumPartialGroups));
} }
} }
...@@ -6431,20 +6439,32 @@ add_paths_to_partial_grouping_rel(PlannerInfo *root, ...@@ -6431,20 +6439,32 @@ add_paths_to_partial_grouping_rel(PlannerInfo *root,
input_rel, partially_grouped_rel); input_rel, partially_grouped_rel);
} }
/* return partially_grouped_rel;
* Try adding Gather or Gather Merge to partial paths to produce }
* non-partial paths.
*/
generate_gather_paths(root, partially_grouped_rel, true);
/* Get cheapest partial path from partially_grouped_rel */ /*
cheapest_partial_path = linitial(partially_grouped_rel->partial_pathlist); * Generate Gather and Gather Merge paths for a grouping relation or partial
* grouping relation.
*
* generate_gather_paths does most of the work, but we also consider a special
* case: we could try sorting the data by the group_pathkeys and then applying
* Gather Merge.
*
* NB: This function shouldn't be used for anything other than a grouped or
* partially grouped relation not only because of the fact that it explcitly
* references group_pathkeys but we pass "true" as the third argument to
* generate_gather_paths().
*/
static void
gather_grouping_paths(PlannerInfo *root, RelOptInfo *rel)
{
Path *cheapest_partial_path;
/* /* Try Gather for unordered paths and Gather Merge for ordered ones. */
* generate_gather_paths won't consider sorting the cheapest path to match generate_gather_paths(root, rel, true);
* the group keys and then applying a Gather Merge node to the result;
* that might be a winning strategy. /* Try cheapest partial path + explicit Sort + Gather Merge. */
*/ cheapest_partial_path = linitial(rel->partial_pathlist);
if (!pathkeys_contained_in(root->group_pathkeys, if (!pathkeys_contained_in(root->group_pathkeys,
cheapest_partial_path->pathkeys)) cheapest_partial_path->pathkeys))
{ {
...@@ -6453,24 +6473,20 @@ add_paths_to_partial_grouping_rel(PlannerInfo *root, ...@@ -6453,24 +6473,20 @@ add_paths_to_partial_grouping_rel(PlannerInfo *root,
total_groups = total_groups =
cheapest_partial_path->rows * cheapest_partial_path->parallel_workers; cheapest_partial_path->rows * cheapest_partial_path->parallel_workers;
path = (Path *) create_sort_path(root, partially_grouped_rel, path = (Path *) create_sort_path(root, rel, cheapest_partial_path,
cheapest_partial_path,
root->group_pathkeys, root->group_pathkeys,
-1.0); -1.0);
path = (Path *) path = (Path *)
create_gather_merge_path(root, create_gather_merge_path(root,
partially_grouped_rel, rel,
path, path,
partially_grouped_rel->reltarget, rel->reltarget,
root->group_pathkeys, root->group_pathkeys,
NULL, NULL,
&total_groups); &total_groups);
add_path(partially_grouped_rel, path); add_path(rel, path);
} }
/* Now choose the best path(s) */
set_cheapest(partially_grouped_rel);
} }
/* /*
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment